ZFS on Linux

From Lolly's Wiki
Revision as of 00:24, 26 November 2021 by Lollypop (talk | contribs) (Text replacement - "[[Kategorie:" to "[[Category:")
Jump to navigationJump to search


Grub

Create /etc/udev/rules.d/99-local-grub.rules with this content: <syntaxhighlight lang=bash>

  1. Create by-id links in /dev as well for zfs vdev. Needed by grub
  2. Add links for zfs_member only

KERNEL=="sd*[0-9]", IMPORT{parent}=="ID_*", ENV{ID_FS_TYPE}=="zfs_member", SYMLINK+="$env{ID_BUS}-$env{ID_SERIAL}-part%n" </source>


Virtualbox on ZVols

If you use ZVols as rawvmdk-device in VirtualBox as normal user (vmuser in this example) create /etc/udev/rules.d/99-local-zvol.rules with this content: <syntaxhighlight lang=bash> KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="/lib/udev/zvol_id /dev/%k" RESULT=="rpool/VM/*" OWNER="vmuser" </source>

<syntaxhighlight lang=bash> vmuser@virtualbox-server:~$ VBoxManage internalcommands createrawvmdk -filename /var/data/VMs/dev/Solaris10.vmdk -rawdisk /dev/zvol/rpool/VM/Solaris10 </source>

Setup Ubuntu 16.04 with ZFS root

Most is from here Ubuntu-16.04-Root-on-ZFS.

Boot Ubuntu Desktop (alias Live CD) and choose "try out".

Get the right ashift value

For example to get sda and sdb: <syntaxhighlight lang=bash>

  1. lsblk -o NAME,PHY-SeC,LOG-SEC /dev/sd{a,b} | awk 'function exponent (value) {for(i=0;value>1;i++){value/=2;}; return i;}{if($2 ~ /[0-9]+/){print $0,exponent($2)}else{print$0,"ashift"}}'

NAME PHY-SEC LOG-SEC ashift sda 512 512 9 ├─sda1 512 512 9 ├─sda2 512 512 9 ├─sda3 512 512 9 └─sda4 512 512 9 sdb 4096 512 12 ├─sdb1 4096 512 12 ├─sdb2 4096 512 12 ├─sdb3 4096 512 12 └─sdb4 4096 512 12 </source>

Connect it to your network

<syntaxhighlight lang=bash> sudo -i ifconfig ens160 <IP> netmask 255.255.255.0 route add default gw <defaultrouter>

echo "nameserver <nameserver>" >> /etc/resolv.conf echo 'Acquire::http::Proxy "http://<user>:<pass>@<proxyhost>:<proxyport>";' >> /etc/apt/apt.conf

apt-add-repository universe apt update apt --yes install openssh-server passwd ubuntu

Reconnect via ssh

apt install --yes debootstrap gdisk zfs-initramfs sgdisk -g -a1 -n2:34:2047 -t2:EF02 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4 sgdisk -n9:-8M:0 -t9:BF07 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4 sgdisk -n1:0:0 -t1:BF01 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4

zpool create -f -o ashift=12 \

     -O atime=off \
     -O canmount=off \
     -O compression=lz4 \
     -O normalization=formD \
     -O mountpoint=/ \
     -R /mnt \
     rpool /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4-part1

zfs create -o canmount=off -o mountpoint=none rpool/ROOT zfs create -o canmount=noauto -o mountpoint=/ rpool/ROOT/ubuntu zfs mount rpool/ROOT/ubuntu zfs create -o setuid=off rpool/home zfs create -o mountpoint=/root rpool/home/root zfs create -o canmount=off -o setuid=off -o exec=off rpool/var zfs create -o com.sun:auto-snapshot=false rpool/var/cache zfs create rpool/var/log zfs create rpool/var/spool zfs create -o com.sun:auto-snapshot=false -o exec=on rpool/var/tmp zfs create -V 4G -b $(getconf PAGESIZE) -o compression=zle \

     -o logbias=throughput -o sync=always \
     -o primarycache=metadata -o secondarycache=none \
     -o com.sun:auto-snapshot=false rpool/swap

cp -p {,/mnt}/etc/apt/apt.conf export http_proxy=$(awk '/Acquire::http::Proxy/{gsub(/\"/,"");gsub(/;$/,"");print $2}' /mnt/etc/apt/apt.conf) echo -n xenial{,-security,-updates} | \

 xargs -n 1 -d ' ' -I{} echo "deb http://archive.ubuntu.com/ubuntu {} main universe" > /mnt/etc/apt/sources.list

chmod 1777 /mnt/var/tmp debootstrap xenial /mnt zfs set devices=off rpool

HOSTNAME=Template-VM echo ${HOSTNAME} > /mnt/etc/hostname printf "127.0.1.1\t%s\n" "${HOSTNAME}" >> /mnt/etc/hosts

INTERFACE=$(ip a s scope global | awk 'NR==1{gsub(/:$/,"",$2);print $2;}') printf "auto %s\niface %s inet dhcp\n" "${INTERFACE}" "${INTERFACE}" > /mnt/etc/network/interfaces.d/${INTERFACE}

mount --rbind /dev /mnt/dev mount --rbind /proc /mnt/proc mount --rbind /sys /mnt/sys cp -p {,/mnt}/etc/apt/apt.conf echo -n xenial{,-security,-updates} | \

 xargs -n 1 -d ' ' -I{} echo "deb http://archive.ubuntu.com/ubuntu {} main universe" > /mnt/etc/apt/sources.list

chroot /mnt /bin/bash --login

locale-gen en_US.UTF-8 echo 'LANG="en_US.UTF-8"' > /etc/default/locale LANG="en_US.UTF-8" dpkg-reconfigure tzdata

ln -s /proc/self/mounts /etc/mtab apt update apt install --yes ubuntu-minimal apt install --yes --no-install-recommends linux-image-generic apt install --yes zfs-initramfs apt install --yes openssh-server

apt install --yes grub-pc addgroup --system lpadmin addgroup --system sambashare passwd

grub-probe /

update-initramfs -c -k all

vi /etc/default/grub Comment out: GRUB_HIDDEN_TIMEOUT=0 Remove quiet and splash from: GRUB_CMDLINE_LINUX_DEFAULT Uncomment: GRUB_TERMINAL=console

update-grub grub-install /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4

zfs snapshot rpool/ROOT/ubuntu@install

exit mount | grep -v zfs | tac | awk '/\/mnt/ {print $3}' | xargs -i{} umount -lf {} zpool export rpool

reboot

apt install --yes cryptsetup echo cryptswap1 /dev/zvol/rpool/swap /dev/urandom swap,cipher=aes-xts-plain64:sha256,size=256 >> /etc/crypttab systemctl daemon-reload systemctl start systemd-cryptsetup@cryptswap1.service echo /dev/mapper/cryptswap1 none swap defaults 0 0 >> /etc/fstab swapon -av

</source>

Swap on ZFS with random key encryption

<syntaxhighlight lang=ini>

  1. /etc/systemd/system/zfs-cryptswap@.service

[Unit] Description=ZFS Random Cryptography Setup for %I Documentation=man:zfs(8) DefaultDependencies=no Conflicts=umount.target IgnoreOnIsolate=true After=systemd-random-seed.service BindsTo=dev-zvol-rpool-%i.device Before=umount.target

[Service] Type=oneshot RemainAfterExit=yes TimeoutSec=0 KeyringMode=shared OOMScoreAdjust=500 UMask=0077 RuntimeDirectory=zfs-cryptswap.%i RuntimeDirectoryMode=0700 ExecStartPre=-/sbin/swapoff '/dev/zvol/rpool/%i' ExecStartPre=-/sbin/zfs destroy 'rpool/%i' ExecStartPre=/bin/dd if=/dev/urandom of=/run/zfs-cryptswap.%i/%i.key bs=32 count=1 ExecStart=/sbin/zfs create -V 4G -b 4k -o compression=zle -o logbias=throughput -o sync=always -o primarycache=metadata -o secondarycache=none -o com.sun:auto-snapshot=false -o encryption=on -o keyformat=raw -o keylocation=file:///run/zfs-cryptswap.%i/%i.key rpool/%i ExecStartPost=/sbin/mkswap '/dev/zvol/rpool/%i' ExecStartPost=/sbin/swapon '/dev/zvol/rpool/%i' ExecStop=/sbin/swapoff '/dev/zvol/rpool/%i' ExecStopPost=/sbin/zfs destroy 'rpool/%i'

[Install] WantedBy=swap.target </source>

!!!BE CAREFUL with the name after @ !!!

The name after the @ is the name of the ZFS the will be DESTROYED and recreated!!!

To destroy and recreate an encrypted ZFS volume named cryptswap use: <syntaxhighlight lang=bash>

  1. systemctl start zfs-cryptswap@cryptswap.service
  2. systemctl enable zfs-cryptswap@cryptswap.service
  3. update-initramfs -k all -u

</source>

Kernel settings for ZFS

Set module parameter in /etc/modprobe.d/zfs.conf

<syntaxhighlight lang=bash> options zfs zfs_arc_max=10737418240

  1. increase them so scrub/resilver is more quickly at the cost of other work

options zfs zfs_vdev_scrub_min_active=24 options zfs zfs_vdev_scrub_max_active=64

  1. sync write

options zfs zfs_vdev_sync_write_min_active=8 options zfs zfs_vdev_sync_write_max_active=32

  1. sync reads (normal)

options zfs zfs_vdev_sync_read_min_active=8 options zfs zfs_vdev_sync_read_max_active=32

  1. async reads : prefetcher

options zfs zfs_vdev_async_read_min_active=8 options zfs zfs_vdev_async_read_max_active=32

  1. async write : bulk writes

options zfs zfs_vdev_async_write_min_active=8 options zfs zfs_vdev_async_write_max_active=32

  1. max write speed to l2arc
  2. tradeoff between write/read and durability of ssd (?)
  3. default : 8 * 1024 * 1024
  4. setting here : 500 * 1024 * 1024

options zfs l2arc_write_max=524288000

options zfs zfs_top_maxinflight=512 options zfs zfs_resilver_min_time_ms=8000 options zfs zfs_resilver_delay=0 </source>

Remember to update your initramfs before boot. This is the filesystem which is read when your module is loaded. <syntaxhighlight lang=bash>

  1. update-initramfs -k all -u

</source>

Check settings

<syntaxhighlight lang=bash> root@zfshost:~# modprobe -c | grep "options zfs" options zfs zfs_arc_max=10737418240 options zfs zfs_vdev_scrub_min_active=24 options zfs zfs_vdev_scrub_max_active=64 options zfs zfs_vdev_sync_write_min_active=8 options zfs zfs_vdev_sync_write_max_active=32 options zfs zfs_vdev_sync_read_min_active=8 options zfs zfs_vdev_sync_read_max_active=32 options zfs zfs_vdev_async_read_min_active=8 options zfs zfs_vdev_async_read_max_active=32 options zfs zfs_vdev_async_write_min_active=8 options zfs zfs_vdev_async_write_max_active=32 options zfs l2arc_write_max=524288000 options zfs zfs_top_maxinflight=512 options zfs zfs_resilver_min_time_ms=8000 options zfs zfs_resilver_delay=0 </source>

<syntaxhighlight lang=bash> root@zfshost:~# modprobe --show-depends zfs insmod /lib/modules/4.15.0-58-generic/kernel/spl/spl.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/znvpair.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zcommon.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/icp.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zavl.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zunicode.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zfs.ko zfs_arc_max=10737418240 zfs_vdev_scrub_min_active=24 zfs_vdev_scrub_max_active=64 zfs_vdev_sync_write_min_active=8 zfs_vdev_sync_write_max_active=32 zfs_vdev_sync_read_min_active=8 zfs_vdev_sync_read_max_active=32 zfs_vdev_async_read_min_active=8 zfs_vdev_async_read_max_active=32 zfs_vdev_async_write_min_active=8 zfs_vdev_async_write_max_active=32 l2arc_write_max=524288000 zfs_top_maxinflight=512 zfs_resilver_min_time_ms=8000 zfs_resilver_delay=0 </source>

Check actual settings

Check files in

  • /proc/spl/kstat/zfs/
  • /sys/module/zfs/parameters/

ARC Cache

Get the current usage of cache

<syntaxhighlight lang=bash>

  1. cat /proc/spl/kstat/zfs/arcstats |grep c_

c_min 4 521779200 c_max 4 1073741824 arc_no_grow 4 0 arc_tempreserve 4 0 arc_loaned_bytes 4 0 arc_prune 4 25360 arc_meta_used 4 493285336 arc_meta_limit 4 805306368 arc_dnode_limit 4 80530636 arc_meta_max 4 706551816 arc_meta_min 4 16777216 sync_wait_for_async 4 357 arc_need_free 4 0 arc_sys_free 4 260889600 </source>

Limit the cache without reboot non permanent

For example limit it to 512MB (which is too small for production environments, just an example...): <syntaxhighlight lang=bash>

  1. echo "$[512*1024*1024]" > /sys/module/zfs/parameters/zfs_arc_max

</source> Now you have to drop the caches: <syntaxhighlight lang=bash>

  1. echo 3 > /proc/sys/vm/drop_caches

</source>

Make the cache limit permanent

For example limit it to 512MB (which is too small for production environments, just an example...): <syntaxhighlight lang=bash>

  1. echo "options zfs zfs_arc_max=$[512*1024*1024]" >> /etc/modprobe.d/zfs.conf

</source> After reboot this value take effect.

Check cache hits/misses

<syntaxhighlight lang=bash>

  1. (while : ; do cat /proc/spl/kstat/zfs/arcstats ; sleep 5 ; done ) | awk '
         BEGIN { 
         }     
         $1 ~ /(hits|misses)/ {
                 name=$1;
                 gsub(/[_]*(hits|misses)/,"",name);
                 if(name == ""){ 
                   name="global";
                 }
         }
         $1 ~ /hits/ {
                 hits[name] = $3 - hitslast[name]
                 hitslast[name] = $3
         }     
         $1 ~ /misses/ {
                 misses[name] = $3 - misslast[name]
                 misslast[name] = $3
                 rate = 0
                 total = hits[name] + misses[name]
                 if (total)
                         rate = (hits[name] * 100) / total
                 if (name=="global")
                   printf "%30s %12s %12s %9s\n", "NAME", "HITS", "MISSES", "HITRATE"
                 printf "%30s %12d %12d %8.2f%%\n", name, hits[name], misses[name], rate
         }     
 '

</source>

Higher scrub performance

<syntaxhighlight lang=bash highlight=3-5>

  1. !/bin/bash
    1. scrub_fast.sh

case $1 in start)

 echo    0 > /sys/module/zfs/parameters/zfs_scan_idle
 echo    0 > /sys/module/zfs/parameters/zfs_scrub_delay
 echo  512 > /sys/module/zfs/parameters/zfs_top_maxinflight
 echo 5000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms
 echo    4 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active
 echo    8 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active
 ;;

stop)

 echo   50 > /sys/module/zfs/parameters/zfs_scan_idle
 echo    4 > /sys/module/zfs/parameters/zfs_scrub_delay
 echo   32 > /sys/module/zfs/parameters/zfs_top_maxinflight
 echo 1000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms
 echo    1 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active
 echo    2 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active
 ;;

status)

 for i in zfs_scan_idle zfs_scrub_delay zfs_top_maxinflight zfs_scan_min_time_ms zfs_vdev_scrub_{min,max}_active
 do
   param="/sys/module/zfs/parameters/${i}"
   printf "%60s\t%d\n" "${param}" "$(cat ${param})"
 done
 ;;
  • )
 echo "Usage: ${0} (start|stop|status)"
 ;;

esac </source>

Backup ZFS settings

A little script which may be used on your own risk.

<syntaxhighlight lang=bash>

  1. !/bin/bash
  1. Written by Lars Timmann <L@rs.Timmann.de> 2018
  2. Tested on solaris 11.3 & Ubuntu Linux
  1. This script is a rotten bunch of code... rewrite it!

AWK_CMD=/usr/bin/gawk ZPOOL_CMD=/sbin/zpool ZFS_CMD=/sbin/zfs ZDB_CMD=/sbin/zdb

function print_local_options () {

 DATASET=$1
 OPTION=$2
 EXCLUDE_REGEX=$3
 ${ZFS_CMD} get -s local -Ho property,value -p ${OPTION} ${DATASET} | while read -r property value
 do
   if [[ ! ${property} =~ ${EXCLUDE_REGEX} ]]
   then
     if [ "_${property}_" == "_share.*_" ]
     then
       print_local_options "${DATASET}" 'share.all' '^$'
     else
       printf '\t-o %s=%s \\\n' "${property}" "${value}"
     fi
   fi
 done

}

function print_filesystem () {

 ZFS=$1
 printf '%s create \\\n' "${ZFS_CMD}"
 print_local_options "${ZFS}" 'all' '^$'
 printf '\t%s\n' "${ZFS}"

}

function print_filesystems () {

 ZPOOL=$1
 for ZFS in $(${ZFS_CMD} list -Ho name -t filesystem -r ${ZPOOL})
 do
   if [ ${ZFS} == ${ZPOOL} ] ; then continue ; fi
   printf '#\n## Filesystem: %s\n#\n\n' "${ZFS}"
   print_filesystem ${ZFS}
   printf '\n'
 done

}

function print_volume () {

 ZVOL=$1
 volsize=$(${ZFS_CMD} get -Ho value volsize ${ZVOL})
 volblocksize=$(${ZFS_CMD} get -Ho value volblocksize ${ZVOL})
 
 printf '%s create \\\n\t-V %s \\\n\t-b %s \\\n' "${ZFS_CMD}" "${volsize}" "${volblocksize}"
 print_local_options "${ZVOL}" 'all' '(volsize|refreservation)'
 printf '\t%s\n' "${ZVOL}"

}

function print_volumes () {

 ZPOOL=$1
 for ZVOL in $(${ZFS_CMD} list -Ho name -t volume -r ${ZPOOL})
 do
   printf '#\n## Volume: %s\n#\n\n' "${ZVOL}"
   print_volume ${ZVOL}
   printf '\n'
 done

}

function print_vdevs () {

 ZPOOL=$1
 ${ZDB_CMD} -C ${ZPOOL} | ${AWK_CMD} -F':' '
   $1 ~ /^space:*type$/ {
     gsub(/[ ]+/,"",$NF);
     type=substr($NF,2,length($NF)-2);
     if ( type == "mirror" ) {
       printf " \\\n\t%s",type;
     }
   }
   $1 ~ /^space:*path$/ {
     gsub(/[ ]+/,"",$NF);
     vdev=substr($NF,2,length($NF)-2);
     printf " \\\n\t%s",vdev;
   }
   END {
     printf "\n";
   }
 '

}

function print_zpool () {

 ZPOOL=$1
 
 printf '#############################################################\n'
 printf '#\n## ZPool: %s\n#\n' "${ZPOOL}"
 printf '#############################################################\n\n'
 printf '%s create \\\n' "${ZPOOL_CMD}"
 print_local_options "${ZPOOL}" 'all' '/@/'
 printf '\t%s' "${ZPOOL}"
 print_vdevs "${ZPOOL}"
 printf '\n'
 printf '#############################################################\n\n'
 print_filesystems   "${ZPOOL}"
 print_volumes       "${ZPOOL}"

}

OS=$(uname -s) eval $(uname -s)=1 HOSTNAME=$(hostname)

printf '#############################################################\n' printf '# Hostname: %s\n' "${HOSTNAME}" printf '#############################################################\n\n' for ZPOOL in $(${ZPOOL_CMD} list -Ho name) do

 print_zpool ${ZPOOL}

done </source>

Links