ZFS on Linux: Difference between revisions

From Lolly's Wiki
Jump to navigationJump to search
 
(11 intermediate revisions by the same user not shown)
Line 1: Line 1:
[[Kategorie:Linux|ZFS]]
[[Category:Linux|ZFS]]
[[Kategorie:ZFS|Linux]]
[[Category:ZFS|Linux]]
[[Kategorie:VirtualBox|ZFS]]
[[Category:VirtualBox|ZFS]]


==Grub==
==Grub==
Create /etc/udev/rules.d/99-local-grub.rules with this content:
Create /etc/udev/rules.d/99-local-grub.rules with this content:
<source lang=bash>
<syntaxhighlight lang=bash>
# Create by-id links in /dev as well for zfs vdev. Needed by grub
# Create by-id links in /dev as well for zfs vdev. Needed by grub
# Add links for zfs_member only
# Add links for zfs_member only
KERNEL=="sd*[0-9]", IMPORT{parent}=="ID_*", ENV{ID_FS_TYPE}=="zfs_member", SYMLINK+="$env{ID_BUS}-$env{ID_SERIAL}-part%n"
KERNEL=="sd*[0-9]", IMPORT{parent}=="ID_*", ENV{ID_FS_TYPE}=="zfs_member", SYMLINK+="$env{ID_BUS}-$env{ID_SERIAL}-part%n"
</source>
</syntaxhighlight>




==Virtualbox on ZVols==
==Virtualbox on ZVols==
If you use ZVols as rawvmdk-device in VirtualBox as normal user (vmuser in this example) create /etc/udev/rules.d/99-local-zvol.rules with this content:
If you use ZVols as rawvmdk-device in VirtualBox as normal user (vmuser in this example) create /etc/udev/rules.d/99-local-zvol.rules with this content:
<source lang=bash>
<syntaxhighlight lang=bash>
KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="/lib/udev/zvol_id /dev/%k" RESULT=="rpool/VM/*" OWNER="vmuser"
KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="/lib/udev/zvol_id /dev/%k" RESULT=="rpool/VM/*" OWNER="vmuser"
</source>
</syntaxhighlight>


<source lang=bash>
<syntaxhighlight lang=bash>
vmuser@virtualbox-server:~$ VBoxManage internalcommands createrawvmdk -filename /var/data/VMs/dev/Solaris10.vmdk -rawdisk /dev/zvol/rpool/VM/Solaris10
vmuser@virtualbox-server:~$ VBoxManage internalcommands createrawvmdk -filename /var/data/VMs/dev/Solaris10.vmdk -rawdisk /dev/zvol/rpool/VM/Solaris10
</source>
</syntaxhighlight>


==Setup Ubuntu 16.04 with ZFS root==
==Setup Ubuntu 16.04 with ZFS root==
Line 30: Line 30:


For example to get sda and sdb:
For example to get sda and sdb:
<source lang=bash>
<syntaxhighlight lang=bash>
# lsblk -o NAME,PHY-SeC,LOG-SEC /dev/sd{a,b} | awk 'function exponent (value) {for(i=0;value>1;i++){value/=2;}; return i;}{if($2 ~ /[0-9]+/){print $0,exponent($2)}else{print$0,"ashift"}}'
# lsblk -o NAME,PHY-SeC,LOG-SEC /dev/sd{a,b} | awk 'function exponent (value) {for(i=0;value>1;i++){value/=2;}; return i;}{if($2 ~ /[0-9]+/){print $0,exponent($2)}else{print$0,"ashift"}}'
NAME  PHY-SEC LOG-SEC ashift
NAME  PHY-SEC LOG-SEC ashift
Line 43: Line 43:
├─sdb3    4096    512 12
├─sdb3    4096    512 12
└─sdb4    4096    512 12
└─sdb4    4096    512 12
</source>
</syntaxhighlight>


===Connect it to your network===
===Connect it to your network===
<source lang=bash>
<syntaxhighlight lang=bash>
sudo -i
sudo -i
ifconfig ens160 <IP> netmask 255.255.255.0
ifconfig ens160 <IP> netmask 255.255.255.0
Line 159: Line 159:
swapon -av
swapon -av


</source>
</syntaxhighlight>


==Swap on ZFS with random key encryption==
==Swap on ZFS with random key encryption==
<source lang=ini>
<syntaxhighlight lang=bash>
$ sudo systemctl edit --force --full zfs-cryptswap@.service
</syntaxhighlight>
<syntaxhighlight lang=ini>
# /etc/systemd/system/zfs-cryptswap@.service
# /etc/systemd/system/zfs-cryptswap@.service
[Unit]
[Unit]
Line 170: Line 173:
Conflicts=umount.target
Conflicts=umount.target
IgnoreOnIsolate=true
IgnoreOnIsolate=true
After=systemd-random-seed.service
After=systemd-random-seed.service zfs-volumes.target
BindsTo=dev-zvol-rpool-%i.device
BindsTo=dev-zvol-rpool-%i.device
Before=umount.target
Before=umount.target
Line 186: Line 189:
ExecStartPre=-/sbin/zfs destroy 'rpool/%i'
ExecStartPre=-/sbin/zfs destroy 'rpool/%i'
ExecStartPre=/bin/dd if=/dev/urandom of=/run/zfs-cryptswap.%i/%i.key bs=32 count=1
ExecStartPre=/bin/dd if=/dev/urandom of=/run/zfs-cryptswap.%i/%i.key bs=32 count=1
ExecStart=/sbin/zfs create -V 4G -b 4k -o compression=zle -o logbias=throughput -o sync=always -o primarycache=metadata -o secondarycache=none -o com.sun:auto-snapshot=false -o encryption=on -o keyformat=raw -o keylocation=file:///run/zfs-cryptswap.%i/%i.key rpool/%i
ExecStart=/sbin/zfs create -V 4G -b 8k -o compression=zle -o logbias=throughput -o sync=always -o primarycache=metadata -o secondarycache=none -o com.sun:auto-snapshot=false -o encryption=on -o keyformat=raw -o keylocation=file:///run/zfs-cryptswap.%i/%i.key rpool/%i
ExecStart=/bin/sleep 1
ExecStartPost=/sbin/mkswap '/dev/zvol/rpool/%i'
ExecStartPost=/sbin/mkswap '/dev/zvol/rpool/%i'
ExecStartPost=/sbin/swapon '/dev/zvol/rpool/%i'
ExecStartPost=/sbin/swapon '/dev/zvol/rpool/%i'
ExecStop=/sbin/swapoff '/dev/zvol/rpool/%i'
ExecStop=/sbin/swapoff '/dev/zvol/rpool/%i'
ExecStop=/bin/sleep 2
ExecStopPost=/sbin/zfs destroy 'rpool/%i'
ExecStopPost=/sbin/zfs destroy 'rpool/%i'


[Install]
[Install]
WantedBy=swap.target
WantedBy=swap.target
</source>
</syntaxhighlight>


!!!BE CAREFUL with the name after @ !!!
!!!BE CAREFUL with the name after @ !!!


The name after the @ is the name of the ZFS the will be DESTROYED and recreated!!!
The name after the @ is the name of the ZFS that will be DESTROYED and recreated!!!


To destroy and recreate an encrypted ZFS volume named cryptswap use:
To destroy and recreate an encrypted ZFS volume named cryptswap use:
<source lang=bash>
<syntaxhighlight lang=bash>
# systemctl start  zfs-cryptswap@cryptswap.service
# systemctl start  zfs-cryptswap@cryptswap.service
# systemctl enable zfs-cryptswap@cryptswap.service
# systemctl enable zfs-cryptswap@cryptswap.service
# update-initramfs -k all -u
# update-initramfs -k $(uname -i) -u
</source>
</syntaxhighlight>


==Kernel settings for ZFS==
==Kernel settings for ZFS==


=== Set module parameter in /etc/modprobe.d/zfs.conf===
=== Set module parameter in /etc/modprobe.d/zfs.conf===
<source lang=bash>
<syntaxhighlight lang=bash>
options zfs zfs_arc_max=10737418240
options zfs zfs_arc_max=10737418240


Line 238: Line 243:
options zfs zfs_resilver_min_time_ms=8000
options zfs zfs_resilver_min_time_ms=8000
options zfs zfs_resilver_delay=0
options zfs zfs_resilver_delay=0
</source>
</syntaxhighlight>


Remember to update your initramfs before boot. This is the filesystem which is read when your module is loaded.
Remember to update your initramfs before boot. This is the filesystem which is read when your module is loaded.
<source lang=bash>
<syntaxhighlight lang=bash>
# update-initramfs -k all -u
# update-initramfs -k all -u
</source>
</syntaxhighlight>


=== Check settings ===
=== Check settings ===
<source lang=bash>
<syntaxhighlight lang=bash>
root@zfshost:~# modprobe -c | grep "options zfs"
root@zfshost:~# modprobe -c | grep "options zfs"
options zfs zfs_arc_max=10737418240
options zfs zfs_arc_max=10737418240
Line 263: Line 268:
options zfs zfs_resilver_min_time_ms=8000
options zfs zfs_resilver_min_time_ms=8000
options zfs zfs_resilver_delay=0
options zfs zfs_resilver_delay=0
</source>
</syntaxhighlight>


<source lang=bash>
<syntaxhighlight lang=bash>
root@zfshost:~# modprobe --show-depends zfs
root@zfshost:~# modprobe --show-depends zfs
insmod /lib/modules/4.15.0-58-generic/kernel/spl/spl.ko  
insmod /lib/modules/4.15.0-58-generic/kernel/spl/spl.ko  
Line 274: Line 279:
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zunicode.ko  
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zunicode.ko  
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zfs.ko zfs_arc_max=10737418240 zfs_vdev_scrub_min_active=24 zfs_vdev_scrub_max_active=64 zfs_vdev_sync_write_min_active=8 zfs_vdev_sync_write_max_active=32 zfs_vdev_sync_read_min_active=8 zfs_vdev_sync_read_max_active=32 zfs_vdev_async_read_min_active=8 zfs_vdev_async_read_max_active=32 zfs_vdev_async_write_min_active=8 zfs_vdev_async_write_max_active=32 l2arc_write_max=524288000 zfs_top_maxinflight=512 zfs_resilver_min_time_ms=8000 zfs_resilver_delay=0  
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zfs.ko zfs_arc_max=10737418240 zfs_vdev_scrub_min_active=24 zfs_vdev_scrub_max_active=64 zfs_vdev_sync_write_min_active=8 zfs_vdev_sync_write_max_active=32 zfs_vdev_sync_read_min_active=8 zfs_vdev_sync_read_max_active=32 zfs_vdev_async_read_min_active=8 zfs_vdev_async_read_max_active=32 zfs_vdev_async_write_min_active=8 zfs_vdev_async_write_max_active=32 l2arc_write_max=524288000 zfs_top_maxinflight=512 zfs_resilver_min_time_ms=8000 zfs_resilver_delay=0  
</source>
</syntaxhighlight>


=== Check actual settings ===
=== Check actual settings ===
Line 284: Line 289:
===Get the current usage of cache===
===Get the current usage of cache===


<source lang=bash>
<syntaxhighlight lang=bash>
# cat /proc/spl/kstat/zfs/arcstats |grep c_
# cat /proc/spl/kstat/zfs/arcstats |grep c_
c_min                          4    521779200
c_min                          4    521779200
Line 300: Line 305:
arc_need_free                  4    0
arc_need_free                  4    0
arc_sys_free                    4    260889600
arc_sys_free                    4    260889600
</source>
</syntaxhighlight>


===Limit the cache without reboot non permanent===
===Limit the cache without reboot non permanent===
For example limit it to 512MB (which is too small for production environments, just an example...):
For example limit it to 512MB (which is too small for production environments, just an example...):
<source lang=bash>
<syntaxhighlight lang=bash>
# echo "$[512*1024*1024]" > /sys/module/zfs/parameters/zfs_arc_max
# echo "$[512*1024*1024]" > /sys/module/zfs/parameters/zfs_arc_max
</source>
</syntaxhighlight>
Now you have to drop the caches:
Now you have to drop the caches:
<source lang=bash>
<syntaxhighlight lang=bash>
# echo 3 > /proc/sys/vm/drop_caches
# echo 3 > /proc/sys/vm/drop_caches
</source>
</syntaxhighlight>
===Make the cache limit permanent===
===Make the cache limit permanent===
For example limit it to 512MB (which is too small for production environments, just an example...):
For example limit it to 512MB (which is too small for production environments, just an example...):
<source lang=bash>
<syntaxhighlight lang=bash>
# echo "options zfs zfs_arc_max=$[512*1024*1024]" >> /etc/modprobe.d/zfs.conf
# echo "options zfs zfs_arc_max=$[512*1024*1024]" >> /etc/modprobe.d/zfs.conf
</source>
</syntaxhighlight>
After reboot this value take effect.
After reboot this value take effect.


===Check cache hits/misses===
===Check cache hits/misses===
<source lang=bash>
<syntaxhighlight lang=bash>
# (while : ; do cat /proc/spl/kstat/zfs/arcstats ; sleep 5 ; done ) | awk '
# (while : ; do cat /proc/spl/kstat/zfs/arcstats ; sleep 5 ; done ) | awk '
           BEGIN {  
           BEGIN {  
Line 347: Line 352:
           }     
           }     
   '
   '
</source>
</syntaxhighlight>
 
==Higher scrub performance==
<syntaxhighlight lang=bash highlight=3-5>
#!/bin/bash
 
#
## scrub_fast.sh
#
 
case $1 in
start)
  echo    0 > /sys/module/zfs/parameters/zfs_scan_idle
  echo    0 > /sys/module/zfs/parameters/zfs_scrub_delay
  echo  512 > /sys/module/zfs/parameters/zfs_top_maxinflight
  echo 5000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms
  echo    4 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active
  echo    8 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active
  ;;
stop)
  echo  50 > /sys/module/zfs/parameters/zfs_scan_idle
  echo    4 > /sys/module/zfs/parameters/zfs_scrub_delay
  echo  32 > /sys/module/zfs/parameters/zfs_top_maxinflight
  echo 1000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms
  echo    1 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active
  echo    2 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active
  ;;
status)
  for i in zfs_scan_idle zfs_scrub_delay zfs_top_maxinflight zfs_scan_min_time_ms zfs_vdev_scrub_{min,max}_active
  do
    param="/sys/module/zfs/parameters/${i}"
    printf "%60s\t%d\n" "${param}" "$(cat ${param})"
  done
  ;;
*)
  echo "Usage: ${0} (start|stop|status)"
  ;;
esac
</syntaxhighlight>
 
==More information on zpool status==
<SyntaxHighlight lang=bash highlight=3-5>
#!/bin/bash
 
#
## print_zpool.sh
#
 
# Written by Lars Timmann <L@rs.Timmann.de> 2022
 
columns=5 # number of columns for zpool status
if [ ${#} -gt 0 ] && [ ${1} == "iostat" ]
then
  command="iostat -v"
  columns=7
  shift
fi
 
stdbuf --output=L zpool ${command:-status} -P ${*} | awk -v columns=${columns} '
BEGIN {
  command="lsscsi --scsi_id";
  while( command | getline lsscsi ) {
    count=split(lsscsi,fields);
    dev=fields[count-1];
    scsi_id[dev]=fields[1];
  }
  close(command);
 
  command="ls -Ul /dev/disk/by-id/*";
  while( command | getline ) {
    dev=$NF;
    gsub(/[\.\/]/,"",dev);
    dev_id=$(NF-2);
    device[dev_id]="/dev/"dev;
  }
  close(command);
}
$1 ~ /\/dev\// {
  line=$0;
  dev_by_id=$1;
  dev_no_part=dev_by_id;
  gsub(/(-part|)[0-9]+$/,"",dev_no_part);
  if( NF > 5) {
    count=split(line,a,FS,seps);
    line=seps[0];
    for(i=1;i<columns;i++){
      line=line a[i] seps[i];
    }
    line=line a[columns];
    for(i=columns+1;i<=count;i++){
      rest=rest a[i] seps[i];
    }
  }
  printf("%s %s %s",line,scsi_id[device[dev_no_part]],device[dev_by_id]);
  if(rest!=""){
    printf(" %s",rest);
    rest="";
  }
  printf("\n");
  next;
}
/^errors:/ {
  print;
  fflush();
  next;
}
{
  print;
}'
</SyntaxHighlight>


==Backup ZFS settings==
==Backup ZFS settings==
A little script which may be used on your own risk.
A little script which may be used on your own risk.


<source lang=bash>
<syntaxhighlight lang=bash>
#!/bin/bash
#!/bin/bash


Line 472: Line 586:
   print_zpool ${ZPOOL}
   print_zpool ${ZPOOL}
done
done
</source>
</syntaxhighlight>


==Links==
==Links==
* [[https://github.com/zfsonlinux/pkg-zfs/wiki/HOWTO-install-Ubuntu-16.04-to-a-Whole-Disk-Native-ZFS-Root-Filesystem-using-Ubiquity-GUI-installer HOWTO install Ubuntu 16.04 to a Whole Disk Native ZFS Root Filesystem using Ubiquity GUI installer]]
* [[https://github.com/zfsonlinux/pkg-zfs/wiki/HOWTO-install-Ubuntu-16.04-to-a-Whole-Disk-Native-ZFS-Root-Filesystem-using-Ubiquity-GUI-installer HOWTO install Ubuntu 16.04 to a Whole Disk Native ZFS Root Filesystem using Ubiquity GUI installer]]
* [[https://github.com/zfsonlinux/zfs/wiki/Ubuntu-16.04-Root-on-ZFS Ubuntu 16.04 Root on ZFS]]
* [[https://github.com/zfsonlinux/zfs/wiki/Ubuntu-16.04-Root-on-ZFS Ubuntu 16.04 Root on ZFS]]

Latest revision as of 15:54, 22 June 2023


Grub

Create /etc/udev/rules.d/99-local-grub.rules with this content:

# Create by-id links in /dev as well for zfs vdev. Needed by grub
# Add links for zfs_member only
KERNEL=="sd*[0-9]", IMPORT{parent}=="ID_*", ENV{ID_FS_TYPE}=="zfs_member", SYMLINK+="$env{ID_BUS}-$env{ID_SERIAL}-part%n"


Virtualbox on ZVols

If you use ZVols as rawvmdk-device in VirtualBox as normal user (vmuser in this example) create /etc/udev/rules.d/99-local-zvol.rules with this content:

KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="/lib/udev/zvol_id /dev/%k" RESULT=="rpool/VM/*" OWNER="vmuser"
vmuser@virtualbox-server:~$ VBoxManage internalcommands createrawvmdk -filename /var/data/VMs/dev/Solaris10.vmdk -rawdisk /dev/zvol/rpool/VM/Solaris10

Setup Ubuntu 16.04 with ZFS root

Most is from here Ubuntu-16.04-Root-on-ZFS.

Boot Ubuntu Desktop (alias Live CD) and choose "try out".

Get the right ashift value

For example to get sda and sdb:

# lsblk -o NAME,PHY-SeC,LOG-SEC /dev/sd{a,b} | awk 'function exponent (value) {for(i=0;value>1;i++){value/=2;}; return i;}{if($2 ~ /[0-9]+/){print $0,exponent($2)}else{print$0,"ashift"}}'
NAME   PHY-SEC LOG-SEC ashift
sda        512     512 9
├─sda1     512     512 9
├─sda2     512     512 9
├─sda3     512     512 9
└─sda4     512     512 9
sdb       4096     512 12
├─sdb1    4096     512 12
├─sdb2    4096     512 12
├─sdb3    4096     512 12
└─sdb4    4096     512 12

Connect it to your network

sudo -i
ifconfig ens160 <IP> netmask 255.255.255.0
route add default gw <defaultrouter>

echo "nameserver <nameserver>" >> /etc/resolv.conf
echo 'Acquire::http::Proxy "http://<user>:<pass>@<proxyhost>:<proxyport>";' >> /etc/apt/apt.conf

apt-add-repository universe
apt update
apt --yes install openssh-server
passwd ubuntu

Reconnect via ssh

apt install --yes debootstrap gdisk zfs-initramfs
sgdisk -g -a1 -n2:34:2047  -t2:EF02 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
sgdisk        -n9:-8M:0    -t9:BF07 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
sgdisk        -n1:0:0      -t1:BF01 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4

zpool create -f -o ashift=12 \
      -O atime=off \
      -O canmount=off \
      -O compression=lz4 \
      -O normalization=formD \
      -O mountpoint=/ \
      -R /mnt \
      rpool /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4-part1

zfs create -o canmount=off -o mountpoint=none rpool/ROOT
zfs create -o canmount=noauto -o mountpoint=/ rpool/ROOT/ubuntu
zfs mount rpool/ROOT/ubuntu
zfs create                 -o setuid=off              rpool/home
zfs create -o mountpoint=/root                        rpool/home/root
zfs create -o canmount=off -o setuid=off  -o exec=off rpool/var
zfs create -o com.sun:auto-snapshot=false             rpool/var/cache
zfs create                                            rpool/var/log
zfs create                                            rpool/var/spool
zfs create -o com.sun:auto-snapshot=false -o exec=on  rpool/var/tmp
zfs create -V 4G -b $(getconf PAGESIZE) -o compression=zle \
      -o logbias=throughput -o sync=always \
      -o primarycache=metadata -o secondarycache=none \
      -o com.sun:auto-snapshot=false rpool/swap

cp -p {,/mnt}/etc/apt/apt.conf
export http_proxy=$(awk '/Acquire::http::Proxy/{gsub(/\"/,"");gsub(/;$/,"");print $2}' /mnt/etc/apt/apt.conf)
echo -n xenial{,-security,-updates} | \
  xargs -n 1 -d ' ' -I{} echo "deb http://archive.ubuntu.com/ubuntu {} main universe" > /mnt/etc/apt/sources.list

chmod 1777 /mnt/var/tmp
debootstrap xenial /mnt
zfs set devices=off rpool

HOSTNAME=Template-VM
echo ${HOSTNAME} > /mnt/etc/hostname
printf "127.0.1.1\t%s\n" "${HOSTNAME}" >> /mnt/etc/hosts

INTERFACE=$(ip a s scope global | awk 'NR==1{gsub(/:$/,"",$2);print $2;}')
printf "auto %s\niface %s inet dhcp\n" "${INTERFACE}" "${INTERFACE}" > /mnt/etc/network/interfaces.d/${INTERFACE}

mount --rbind /dev  /mnt/dev
mount --rbind /proc /mnt/proc
mount --rbind /sys  /mnt/sys
cp -p {,/mnt}/etc/apt/apt.conf
echo -n xenial{,-security,-updates} | \
  xargs -n 1 -d ' ' -I{} echo "deb http://archive.ubuntu.com/ubuntu {} main universe" > /mnt/etc/apt/sources.list

chroot /mnt /bin/bash --login

locale-gen en_US.UTF-8
echo 'LANG="en_US.UTF-8"' > /etc/default/locale
LANG="en_US.UTF-8"
dpkg-reconfigure tzdata

ln -s /proc/self/mounts /etc/mtab
apt update
apt install --yes ubuntu-minimal
apt install --yes --no-install-recommends linux-image-generic
apt install --yes zfs-initramfs
apt install --yes openssh-server

apt install --yes grub-pc
addgroup --system lpadmin
addgroup --system sambashare
passwd

grub-probe /

update-initramfs -c -k all

vi /etc/default/grub
Comment out: GRUB_HIDDEN_TIMEOUT=0
Remove quiet and splash from: GRUB_CMDLINE_LINUX_DEFAULT
Uncomment: GRUB_TERMINAL=console

update-grub
grub-install /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4

zfs snapshot rpool/ROOT/ubuntu@install

exit
mount | grep -v zfs | tac | awk '/\/mnt/ {print $3}' | xargs -i{} umount -lf {}
zpool export rpool

reboot

apt install --yes cryptsetup
echo cryptswap1 /dev/zvol/rpool/swap /dev/urandom swap,cipher=aes-xts-plain64:sha256,size=256 >> /etc/crypttab
systemctl daemon-reload
systemctl start systemd-cryptsetup@cryptswap1.service
echo /dev/mapper/cryptswap1 none swap defaults 0 0 >> /etc/fstab
swapon -av

Swap on ZFS with random key encryption

$ sudo systemctl edit --force --full zfs-cryptswap@.service
# /etc/systemd/system/zfs-cryptswap@.service
[Unit]
Description=ZFS Random Cryptography Setup for %I
Documentation=man:zfs(8)
DefaultDependencies=no
Conflicts=umount.target
IgnoreOnIsolate=true
After=systemd-random-seed.service zfs-volumes.target
BindsTo=dev-zvol-rpool-%i.device
Before=umount.target

[Service]
Type=oneshot
RemainAfterExit=yes
TimeoutSec=0
KeyringMode=shared
OOMScoreAdjust=500
UMask=0077
RuntimeDirectory=zfs-cryptswap.%i
RuntimeDirectoryMode=0700
ExecStartPre=-/sbin/swapoff '/dev/zvol/rpool/%i'
ExecStartPre=-/sbin/zfs destroy 'rpool/%i'
ExecStartPre=/bin/dd if=/dev/urandom of=/run/zfs-cryptswap.%i/%i.key bs=32 count=1
ExecStart=/sbin/zfs create -V 4G -b 8k -o compression=zle -o logbias=throughput -o sync=always -o primarycache=metadata -o secondarycache=none -o com.sun:auto-snapshot=false -o encryption=on -o keyformat=raw -o keylocation=file:///run/zfs-cryptswap.%i/%i.key rpool/%i
ExecStart=/bin/sleep 1
ExecStartPost=/sbin/mkswap '/dev/zvol/rpool/%i'
ExecStartPost=/sbin/swapon '/dev/zvol/rpool/%i'
ExecStop=/sbin/swapoff '/dev/zvol/rpool/%i'
ExecStop=/bin/sleep 2
ExecStopPost=/sbin/zfs destroy 'rpool/%i'

[Install]
WantedBy=swap.target

!!!BE CAREFUL with the name after @ !!!

The name after the @ is the name of the ZFS that will be DESTROYED and recreated!!!

To destroy and recreate an encrypted ZFS volume named cryptswap use:

# systemctl start  zfs-cryptswap@cryptswap.service
# systemctl enable zfs-cryptswap@cryptswap.service
# update-initramfs -k $(uname -i) -u

Kernel settings for ZFS

Set module parameter in /etc/modprobe.d/zfs.conf

options zfs zfs_arc_max=10737418240

# increase them so scrub/resilver is more quickly at the cost of other work
options zfs zfs_vdev_scrub_min_active=24
options zfs zfs_vdev_scrub_max_active=64
# sync write
options zfs zfs_vdev_sync_write_min_active=8
options zfs zfs_vdev_sync_write_max_active=32
# sync reads (normal)
options zfs zfs_vdev_sync_read_min_active=8
options zfs zfs_vdev_sync_read_max_active=32
# async reads : prefetcher
options zfs zfs_vdev_async_read_min_active=8
options zfs zfs_vdev_async_read_max_active=32
# async write : bulk writes
options zfs zfs_vdev_async_write_min_active=8
options zfs zfs_vdev_async_write_max_active=32

# max write speed to l2arc
# tradeoff between write/read and durability of ssd (?)
# default : 8 * 1024 * 1024
# setting here : 500 * 1024 * 1024
options zfs l2arc_write_max=524288000

options zfs zfs_top_maxinflight=512
options zfs zfs_resilver_min_time_ms=8000
options zfs zfs_resilver_delay=0

Remember to update your initramfs before boot. This is the filesystem which is read when your module is loaded.

# update-initramfs -k all -u

Check settings

root@zfshost:~# modprobe -c | grep "options zfs"
options zfs zfs_arc_max=10737418240
options zfs zfs_vdev_scrub_min_active=24
options zfs zfs_vdev_scrub_max_active=64
options zfs zfs_vdev_sync_write_min_active=8
options zfs zfs_vdev_sync_write_max_active=32
options zfs zfs_vdev_sync_read_min_active=8
options zfs zfs_vdev_sync_read_max_active=32
options zfs zfs_vdev_async_read_min_active=8
options zfs zfs_vdev_async_read_max_active=32
options zfs zfs_vdev_async_write_min_active=8
options zfs zfs_vdev_async_write_max_active=32
options zfs l2arc_write_max=524288000
options zfs zfs_top_maxinflight=512
options zfs zfs_resilver_min_time_ms=8000
options zfs zfs_resilver_delay=0
root@zfshost:~# modprobe --show-depends zfs
insmod /lib/modules/4.15.0-58-generic/kernel/spl/spl.ko 
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/znvpair.ko 
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zcommon.ko 
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/icp.ko 
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zavl.ko 
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zunicode.ko 
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zfs.ko zfs_arc_max=10737418240 zfs_vdev_scrub_min_active=24 zfs_vdev_scrub_max_active=64 zfs_vdev_sync_write_min_active=8 zfs_vdev_sync_write_max_active=32 zfs_vdev_sync_read_min_active=8 zfs_vdev_sync_read_max_active=32 zfs_vdev_async_read_min_active=8 zfs_vdev_async_read_max_active=32 zfs_vdev_async_write_min_active=8 zfs_vdev_async_write_max_active=32 l2arc_write_max=524288000 zfs_top_maxinflight=512 zfs_resilver_min_time_ms=8000 zfs_resilver_delay=0

Check actual settings

Check files in

  • /proc/spl/kstat/zfs/
  • /sys/module/zfs/parameters/

ARC Cache

Get the current usage of cache

# cat /proc/spl/kstat/zfs/arcstats |grep c_
c_min                           4    521779200
c_max                           4    1073741824
arc_no_grow                     4    0
arc_tempreserve                 4    0
arc_loaned_bytes                4    0
arc_prune                       4    25360
arc_meta_used                   4    493285336
arc_meta_limit                  4    805306368
arc_dnode_limit                 4    80530636
arc_meta_max                    4    706551816
arc_meta_min                    4    16777216
sync_wait_for_async             4    357
arc_need_free                   4    0
arc_sys_free                    4    260889600

Limit the cache without reboot non permanent

For example limit it to 512MB (which is too small for production environments, just an example...):

# echo "$[512*1024*1024]" > /sys/module/zfs/parameters/zfs_arc_max

Now you have to drop the caches:

# echo 3 > /proc/sys/vm/drop_caches

Make the cache limit permanent

For example limit it to 512MB (which is too small for production environments, just an example...):

# echo "options zfs zfs_arc_max=$[512*1024*1024]" >> /etc/modprobe.d/zfs.conf

After reboot this value take effect.

Check cache hits/misses

# (while : ; do cat /proc/spl/kstat/zfs/arcstats ; sleep 5 ; done ) | awk '
          BEGIN { 
          }     
          $1 ~ /(hits|misses)/ {
                  name=$1;
                  gsub(/[_]*(hits|misses)/,"",name);
                  if(name == ""){ 
                    name="global";
                  }
          }
          $1 ~ /hits/ {
                  hits[name] = $3 - hitslast[name]
                  hitslast[name] = $3
          }     
          $1 ~ /misses/ {
                  misses[name] = $3 - misslast[name]
                  misslast[name] = $3
                  rate = 0
                  total = hits[name] + misses[name]
                  if (total)
                          rate = (hits[name] * 100) / total
                  if (name=="global")
                    printf "%30s %12s %12s %9s\n", "NAME", "HITS", "MISSES", "HITRATE"

                  printf "%30s %12d %12d %8.2f%%\n", name, hits[name], misses[name], rate
          }     
  '

Higher scrub performance

#!/bin/bash

#
## scrub_fast.sh
#

case $1 in
start)
  echo    0 > /sys/module/zfs/parameters/zfs_scan_idle
  echo    0 > /sys/module/zfs/parameters/zfs_scrub_delay
  echo  512 > /sys/module/zfs/parameters/zfs_top_maxinflight
  echo 5000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms
  echo    4 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active
  echo    8 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active
  ;;
stop)
  echo   50 > /sys/module/zfs/parameters/zfs_scan_idle
  echo    4 > /sys/module/zfs/parameters/zfs_scrub_delay
  echo   32 > /sys/module/zfs/parameters/zfs_top_maxinflight
  echo 1000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms
  echo    1 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active
  echo    2 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active
  ;;
status)
  for i in zfs_scan_idle zfs_scrub_delay zfs_top_maxinflight zfs_scan_min_time_ms zfs_vdev_scrub_{min,max}_active
  do
    param="/sys/module/zfs/parameters/${i}"
    printf "%60s\t%d\n" "${param}" "$(cat ${param})"
  done
  ;;
*)
  echo "Usage: ${0} (start|stop|status)"
  ;;
esac

More information on zpool status

#!/bin/bash

#
## print_zpool.sh
#

# Written by Lars Timmann <L@rs.Timmann.de> 2022

columns=5 # number of columns for zpool status
if [ ${#} -gt 0 ] && [ ${1} == "iostat" ]
then
  command="iostat -v"
  columns=7
  shift
fi

stdbuf --output=L zpool ${command:-status} -P ${*} | awk -v columns=${columns} '
BEGIN {
  command="lsscsi --scsi_id";
  while( command | getline lsscsi ) {
    count=split(lsscsi,fields);
    dev=fields[count-1];
    scsi_id[dev]=fields[1];
  }
  close(command);
  
  command="ls -Ul /dev/disk/by-id/*";
  while( command | getline ) {
    dev=$NF;
    gsub(/[\.\/]/,"",dev);
    dev_id=$(NF-2);
    device[dev_id]="/dev/"dev;
  }
  close(command);
}
$1 ~ /\/dev\// {
  line=$0;
  dev_by_id=$1;
  dev_no_part=dev_by_id;
  gsub(/(-part|)[0-9]+$/,"",dev_no_part);
  if( NF > 5) {
    count=split(line,a,FS,seps);
    line=seps[0];
    for(i=1;i<columns;i++){
      line=line a[i] seps[i];
    }
    line=line a[columns];
    for(i=columns+1;i<=count;i++){
      rest=rest a[i] seps[i];
    }
  }
  printf("%s %s %s",line,scsi_id[device[dev_no_part]],device[dev_by_id]);
  if(rest!=""){
    printf(" %s",rest);
    rest="";
  }
  printf("\n");
  next;
}
/^errors:/ {
  print;
  fflush();
  next;
}
{ 
  print;
}'

Backup ZFS settings

A little script which may be used on your own risk.

#!/bin/bash

# Written by Lars Timmann <L@rs.Timmann.de> 2018
# Tested on solaris 11.3 & Ubuntu Linux 

# This script is a rotten bunch of code... rewrite it!

AWK_CMD=/usr/bin/gawk
ZPOOL_CMD=/sbin/zpool
ZFS_CMD=/sbin/zfs
ZDB_CMD=/sbin/zdb

function print_local_options () {
  DATASET=$1
  OPTION=$2
  EXCLUDE_REGEX=$3
  ${ZFS_CMD} get -s local -Ho property,value -p ${OPTION} ${DATASET} | while read -r property value
  do
    if [[ ! ${property} =~ ${EXCLUDE_REGEX} ]]
    then
      if [ "_${property}_" == "_share.*_" ]
      then
        print_local_options "${DATASET}" 'share.all' '^$'
      else
        printf '\t-o %s=%s \\\n' "${property}" "${value}"
      fi
    fi
  done
}

function print_filesystem () {
  ZFS=$1

  printf '%s create \\\n' "${ZFS_CMD}"
  print_local_options "${ZFS}" 'all' '^$'
  printf '\t%s\n' "${ZFS}"
}

function print_filesystems () {
  ZPOOL=$1
  for ZFS in $(${ZFS_CMD} list -Ho name -t filesystem -r ${ZPOOL})
  do
    if [ ${ZFS} == ${ZPOOL} ] ; then continue ; fi
    printf '#\n## Filesystem: %s\n#\n\n' "${ZFS}"
    print_filesystem ${ZFS}
    printf '\n'
  done
}

function print_volume () {
  ZVOL=$1
  volsize=$(${ZFS_CMD} get -Ho value volsize ${ZVOL})
  volblocksize=$(${ZFS_CMD} get -Ho value volblocksize ${ZVOL})
  
  printf '%s create \\\n\t-V %s \\\n\t-b %s \\\n' "${ZFS_CMD}" "${volsize}" "${volblocksize}"
  print_local_options "${ZVOL}" 'all' '(volsize|refreservation)'
  printf '\t%s\n' "${ZVOL}"
}

function print_volumes () {
  ZPOOL=$1
  for ZVOL in $(${ZFS_CMD} list -Ho name -t volume -r ${ZPOOL})
  do
    printf '#\n## Volume: %s\n#\n\n' "${ZVOL}"
    print_volume ${ZVOL}
    printf '\n'
  done
}

function print_vdevs () {
  ZPOOL=$1
  ${ZDB_CMD} -C ${ZPOOL} | ${AWK_CMD} -F':' '
    $1 ~ /^[[:space:]]*type$/ {
      gsub(/[ ]+/,"",$NF);
      type=substr($NF,2,length($NF)-2);
      if ( type == "mirror" ) {
        printf " \\\n\t%s",type;
      }
    }
    $1 ~ /^[[:space:]]*path$/ {
      gsub(/[ ]+/,"",$NF);
      vdev=substr($NF,2,length($NF)-2);
      printf " \\\n\t%s",vdev;
    }
    END {
      printf "\n";
    }
  '
}

function print_zpool () {
  ZPOOL=$1
  
  printf '#############################################################\n'
  printf '#\n## ZPool: %s\n#\n' "${ZPOOL}"
  printf '#############################################################\n\n'

  printf '%s create \\\n' "${ZPOOL_CMD}"
  print_local_options "${ZPOOL}" 'all' '/@/'
  printf '\t%s' "${ZPOOL}"
  print_vdevs "${ZPOOL}"
  printf '\n'

  printf '#############################################################\n\n'
  print_filesystems   "${ZPOOL}"
  print_volumes       "${ZPOOL}"
}

OS=$(uname -s)
eval $(uname -s)=1
HOSTNAME=$(hostname)

printf '#############################################################\n'
printf '# Hostname: %s\n' "${HOSTNAME}"
printf '#############################################################\n\n'
for ZPOOL in $(${ZPOOL_CMD} list -Ho name)
do
  print_zpool ${ZPOOL}
done

Links