ZFS on Linux: Difference between revisions
m (Text replacement - "<source" to "<syntaxhighlight") |
m (Text replacement - "[[Kategorie:" to "[[Category:") |
||
Line 1: | Line 1: | ||
[[ | [[Category:Linux|ZFS]] | ||
[[ | [[Category:ZFS|Linux]] | ||
[[ | [[Category:VirtualBox|ZFS]] | ||
==Grub== | ==Grub== |
Revision as of 00:24, 26 November 2021
Grub
Create /etc/udev/rules.d/99-local-grub.rules with this content: <syntaxhighlight lang=bash>
- Create by-id links in /dev as well for zfs vdev. Needed by grub
- Add links for zfs_member only
KERNEL=="sd*[0-9]", IMPORT{parent}=="ID_*", ENV{ID_FS_TYPE}=="zfs_member", SYMLINK+="$env{ID_BUS}-$env{ID_SERIAL}-part%n" </source>
Virtualbox on ZVols
If you use ZVols as rawvmdk-device in VirtualBox as normal user (vmuser in this example) create /etc/udev/rules.d/99-local-zvol.rules with this content: <syntaxhighlight lang=bash> KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="/lib/udev/zvol_id /dev/%k" RESULT=="rpool/VM/*" OWNER="vmuser" </source>
<syntaxhighlight lang=bash> vmuser@virtualbox-server:~$ VBoxManage internalcommands createrawvmdk -filename /var/data/VMs/dev/Solaris10.vmdk -rawdisk /dev/zvol/rpool/VM/Solaris10 </source>
Setup Ubuntu 16.04 with ZFS root
Most is from here Ubuntu-16.04-Root-on-ZFS.
Boot Ubuntu Desktop (alias Live CD) and choose "try out".
Get the right ashift value
For example to get sda and sdb: <syntaxhighlight lang=bash>
- lsblk -o NAME,PHY-SeC,LOG-SEC /dev/sd{a,b} | awk 'function exponent (value) {for(i=0;value>1;i++){value/=2;}; return i;}{if($2 ~ /[0-9]+/){print $0,exponent($2)}else{print$0,"ashift"}}'
NAME PHY-SEC LOG-SEC ashift sda 512 512 9 ├─sda1 512 512 9 ├─sda2 512 512 9 ├─sda3 512 512 9 └─sda4 512 512 9 sdb 4096 512 12 ├─sdb1 4096 512 12 ├─sdb2 4096 512 12 ├─sdb3 4096 512 12 └─sdb4 4096 512 12 </source>
Connect it to your network
<syntaxhighlight lang=bash> sudo -i ifconfig ens160 <IP> netmask 255.255.255.0 route add default gw <defaultrouter>
echo "nameserver <nameserver>" >> /etc/resolv.conf echo 'Acquire::http::Proxy "http://<user>:<pass>@<proxyhost>:<proxyport>";' >> /etc/apt/apt.conf
apt-add-repository universe apt update apt --yes install openssh-server passwd ubuntu
Reconnect via ssh
apt install --yes debootstrap gdisk zfs-initramfs sgdisk -g -a1 -n2:34:2047 -t2:EF02 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4 sgdisk -n9:-8M:0 -t9:BF07 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4 sgdisk -n1:0:0 -t1:BF01 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
zpool create -f -o ashift=12 \
-O atime=off \ -O canmount=off \ -O compression=lz4 \ -O normalization=formD \ -O mountpoint=/ \ -R /mnt \ rpool /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4-part1
zfs create -o canmount=off -o mountpoint=none rpool/ROOT zfs create -o canmount=noauto -o mountpoint=/ rpool/ROOT/ubuntu zfs mount rpool/ROOT/ubuntu zfs create -o setuid=off rpool/home zfs create -o mountpoint=/root rpool/home/root zfs create -o canmount=off -o setuid=off -o exec=off rpool/var zfs create -o com.sun:auto-snapshot=false rpool/var/cache zfs create rpool/var/log zfs create rpool/var/spool zfs create -o com.sun:auto-snapshot=false -o exec=on rpool/var/tmp zfs create -V 4G -b $(getconf PAGESIZE) -o compression=zle \
-o logbias=throughput -o sync=always \ -o primarycache=metadata -o secondarycache=none \ -o com.sun:auto-snapshot=false rpool/swap
cp -p {,/mnt}/etc/apt/apt.conf export http_proxy=$(awk '/Acquire::http::Proxy/{gsub(/\"/,"");gsub(/;$/,"");print $2}' /mnt/etc/apt/apt.conf) echo -n xenial{,-security,-updates} | \
xargs -n 1 -d ' ' -I{} echo "deb http://archive.ubuntu.com/ubuntu {} main universe" > /mnt/etc/apt/sources.list
chmod 1777 /mnt/var/tmp debootstrap xenial /mnt zfs set devices=off rpool
HOSTNAME=Template-VM echo ${HOSTNAME} > /mnt/etc/hostname printf "127.0.1.1\t%s\n" "${HOSTNAME}" >> /mnt/etc/hosts
INTERFACE=$(ip a s scope global | awk 'NR==1{gsub(/:$/,"",$2);print $2;}') printf "auto %s\niface %s inet dhcp\n" "${INTERFACE}" "${INTERFACE}" > /mnt/etc/network/interfaces.d/${INTERFACE}
mount --rbind /dev /mnt/dev mount --rbind /proc /mnt/proc mount --rbind /sys /mnt/sys cp -p {,/mnt}/etc/apt/apt.conf echo -n xenial{,-security,-updates} | \
xargs -n 1 -d ' ' -I{} echo "deb http://archive.ubuntu.com/ubuntu {} main universe" > /mnt/etc/apt/sources.list
chroot /mnt /bin/bash --login
locale-gen en_US.UTF-8 echo 'LANG="en_US.UTF-8"' > /etc/default/locale LANG="en_US.UTF-8" dpkg-reconfigure tzdata
ln -s /proc/self/mounts /etc/mtab apt update apt install --yes ubuntu-minimal apt install --yes --no-install-recommends linux-image-generic apt install --yes zfs-initramfs apt install --yes openssh-server
apt install --yes grub-pc addgroup --system lpadmin addgroup --system sambashare passwd
grub-probe /
update-initramfs -c -k all
vi /etc/default/grub Comment out: GRUB_HIDDEN_TIMEOUT=0 Remove quiet and splash from: GRUB_CMDLINE_LINUX_DEFAULT Uncomment: GRUB_TERMINAL=console
update-grub grub-install /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
zfs snapshot rpool/ROOT/ubuntu@install
exit mount | grep -v zfs | tac | awk '/\/mnt/ {print $3}' | xargs -i{} umount -lf {} zpool export rpool
reboot
apt install --yes cryptsetup echo cryptswap1 /dev/zvol/rpool/swap /dev/urandom swap,cipher=aes-xts-plain64:sha256,size=256 >> /etc/crypttab systemctl daemon-reload systemctl start systemd-cryptsetup@cryptswap1.service echo /dev/mapper/cryptswap1 none swap defaults 0 0 >> /etc/fstab swapon -av
</source>
Swap on ZFS with random key encryption
<syntaxhighlight lang=ini>
- /etc/systemd/system/zfs-cryptswap@.service
[Unit] Description=ZFS Random Cryptography Setup for %I Documentation=man:zfs(8) DefaultDependencies=no Conflicts=umount.target IgnoreOnIsolate=true After=systemd-random-seed.service BindsTo=dev-zvol-rpool-%i.device Before=umount.target
[Service] Type=oneshot RemainAfterExit=yes TimeoutSec=0 KeyringMode=shared OOMScoreAdjust=500 UMask=0077 RuntimeDirectory=zfs-cryptswap.%i RuntimeDirectoryMode=0700 ExecStartPre=-/sbin/swapoff '/dev/zvol/rpool/%i' ExecStartPre=-/sbin/zfs destroy 'rpool/%i' ExecStartPre=/bin/dd if=/dev/urandom of=/run/zfs-cryptswap.%i/%i.key bs=32 count=1 ExecStart=/sbin/zfs create -V 4G -b 4k -o compression=zle -o logbias=throughput -o sync=always -o primarycache=metadata -o secondarycache=none -o com.sun:auto-snapshot=false -o encryption=on -o keyformat=raw -o keylocation=file:///run/zfs-cryptswap.%i/%i.key rpool/%i ExecStartPost=/sbin/mkswap '/dev/zvol/rpool/%i' ExecStartPost=/sbin/swapon '/dev/zvol/rpool/%i' ExecStop=/sbin/swapoff '/dev/zvol/rpool/%i' ExecStopPost=/sbin/zfs destroy 'rpool/%i'
[Install] WantedBy=swap.target </source>
!!!BE CAREFUL with the name after @ !!!
The name after the @ is the name of the ZFS the will be DESTROYED and recreated!!!
To destroy and recreate an encrypted ZFS volume named cryptswap use: <syntaxhighlight lang=bash>
- systemctl start zfs-cryptswap@cryptswap.service
- systemctl enable zfs-cryptswap@cryptswap.service
- update-initramfs -k all -u
</source>
Kernel settings for ZFS
Set module parameter in /etc/modprobe.d/zfs.conf
<syntaxhighlight lang=bash> options zfs zfs_arc_max=10737418240
- increase them so scrub/resilver is more quickly at the cost of other work
options zfs zfs_vdev_scrub_min_active=24 options zfs zfs_vdev_scrub_max_active=64
- sync write
options zfs zfs_vdev_sync_write_min_active=8 options zfs zfs_vdev_sync_write_max_active=32
- sync reads (normal)
options zfs zfs_vdev_sync_read_min_active=8 options zfs zfs_vdev_sync_read_max_active=32
- async reads : prefetcher
options zfs zfs_vdev_async_read_min_active=8 options zfs zfs_vdev_async_read_max_active=32
- async write : bulk writes
options zfs zfs_vdev_async_write_min_active=8 options zfs zfs_vdev_async_write_max_active=32
- max write speed to l2arc
- tradeoff between write/read and durability of ssd (?)
- default : 8 * 1024 * 1024
- setting here : 500 * 1024 * 1024
options zfs l2arc_write_max=524288000
options zfs zfs_top_maxinflight=512 options zfs zfs_resilver_min_time_ms=8000 options zfs zfs_resilver_delay=0 </source>
Remember to update your initramfs before boot. This is the filesystem which is read when your module is loaded. <syntaxhighlight lang=bash>
- update-initramfs -k all -u
</source>
Check settings
<syntaxhighlight lang=bash> root@zfshost:~# modprobe -c | grep "options zfs" options zfs zfs_arc_max=10737418240 options zfs zfs_vdev_scrub_min_active=24 options zfs zfs_vdev_scrub_max_active=64 options zfs zfs_vdev_sync_write_min_active=8 options zfs zfs_vdev_sync_write_max_active=32 options zfs zfs_vdev_sync_read_min_active=8 options zfs zfs_vdev_sync_read_max_active=32 options zfs zfs_vdev_async_read_min_active=8 options zfs zfs_vdev_async_read_max_active=32 options zfs zfs_vdev_async_write_min_active=8 options zfs zfs_vdev_async_write_max_active=32 options zfs l2arc_write_max=524288000 options zfs zfs_top_maxinflight=512 options zfs zfs_resilver_min_time_ms=8000 options zfs zfs_resilver_delay=0 </source>
<syntaxhighlight lang=bash> root@zfshost:~# modprobe --show-depends zfs insmod /lib/modules/4.15.0-58-generic/kernel/spl/spl.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/znvpair.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zcommon.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/icp.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zavl.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zunicode.ko insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zfs.ko zfs_arc_max=10737418240 zfs_vdev_scrub_min_active=24 zfs_vdev_scrub_max_active=64 zfs_vdev_sync_write_min_active=8 zfs_vdev_sync_write_max_active=32 zfs_vdev_sync_read_min_active=8 zfs_vdev_sync_read_max_active=32 zfs_vdev_async_read_min_active=8 zfs_vdev_async_read_max_active=32 zfs_vdev_async_write_min_active=8 zfs_vdev_async_write_max_active=32 l2arc_write_max=524288000 zfs_top_maxinflight=512 zfs_resilver_min_time_ms=8000 zfs_resilver_delay=0 </source>
Check actual settings
Check files in
- /proc/spl/kstat/zfs/
- /sys/module/zfs/parameters/
ARC Cache
Get the current usage of cache
<syntaxhighlight lang=bash>
- cat /proc/spl/kstat/zfs/arcstats |grep c_
c_min 4 521779200 c_max 4 1073741824 arc_no_grow 4 0 arc_tempreserve 4 0 arc_loaned_bytes 4 0 arc_prune 4 25360 arc_meta_used 4 493285336 arc_meta_limit 4 805306368 arc_dnode_limit 4 80530636 arc_meta_max 4 706551816 arc_meta_min 4 16777216 sync_wait_for_async 4 357 arc_need_free 4 0 arc_sys_free 4 260889600 </source>
Limit the cache without reboot non permanent
For example limit it to 512MB (which is too small for production environments, just an example...): <syntaxhighlight lang=bash>
- echo "$[512*1024*1024]" > /sys/module/zfs/parameters/zfs_arc_max
</source> Now you have to drop the caches: <syntaxhighlight lang=bash>
- echo 3 > /proc/sys/vm/drop_caches
</source>
Make the cache limit permanent
For example limit it to 512MB (which is too small for production environments, just an example...): <syntaxhighlight lang=bash>
- echo "options zfs zfs_arc_max=$[512*1024*1024]" >> /etc/modprobe.d/zfs.conf
</source> After reboot this value take effect.
Check cache hits/misses
<syntaxhighlight lang=bash>
- (while : ; do cat /proc/spl/kstat/zfs/arcstats ; sleep 5 ; done ) | awk '
BEGIN { } $1 ~ /(hits|misses)/ { name=$1; gsub(/[_]*(hits|misses)/,"",name); if(name == ""){ name="global"; } } $1 ~ /hits/ { hits[name] = $3 - hitslast[name] hitslast[name] = $3 } $1 ~ /misses/ { misses[name] = $3 - misslast[name] misslast[name] = $3 rate = 0 total = hits[name] + misses[name] if (total) rate = (hits[name] * 100) / total if (name=="global") printf "%30s %12s %12s %9s\n", "NAME", "HITS", "MISSES", "HITRATE"
printf "%30s %12d %12d %8.2f%%\n", name, hits[name], misses[name], rate } '
</source>
Higher scrub performance
<syntaxhighlight lang=bash highlight=3-5>
- !/bin/bash
-
- scrub_fast.sh
case $1 in start)
echo 0 > /sys/module/zfs/parameters/zfs_scan_idle echo 0 > /sys/module/zfs/parameters/zfs_scrub_delay echo 512 > /sys/module/zfs/parameters/zfs_top_maxinflight echo 5000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms echo 4 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active echo 8 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active ;;
stop)
echo 50 > /sys/module/zfs/parameters/zfs_scan_idle echo 4 > /sys/module/zfs/parameters/zfs_scrub_delay echo 32 > /sys/module/zfs/parameters/zfs_top_maxinflight echo 1000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms echo 1 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active echo 2 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active ;;
status)
for i in zfs_scan_idle zfs_scrub_delay zfs_top_maxinflight zfs_scan_min_time_ms zfs_vdev_scrub_{min,max}_active do param="/sys/module/zfs/parameters/${i}" printf "%60s\t%d\n" "${param}" "$(cat ${param})" done ;;
- )
echo "Usage: ${0} (start|stop|status)" ;;
esac </source>
Backup ZFS settings
A little script which may be used on your own risk.
<syntaxhighlight lang=bash>
- !/bin/bash
- Written by Lars Timmann <L@rs.Timmann.de> 2018
- Tested on solaris 11.3 & Ubuntu Linux
- This script is a rotten bunch of code... rewrite it!
AWK_CMD=/usr/bin/gawk ZPOOL_CMD=/sbin/zpool ZFS_CMD=/sbin/zfs ZDB_CMD=/sbin/zdb
function print_local_options () {
DATASET=$1 OPTION=$2 EXCLUDE_REGEX=$3 ${ZFS_CMD} get -s local -Ho property,value -p ${OPTION} ${DATASET} | while read -r property value do if [[ ! ${property} =~ ${EXCLUDE_REGEX} ]] then if [ "_${property}_" == "_share.*_" ] then print_local_options "${DATASET}" 'share.all' '^$' else printf '\t-o %s=%s \\\n' "${property}" "${value}" fi fi done
}
function print_filesystem () {
ZFS=$1
printf '%s create \\\n' "${ZFS_CMD}" print_local_options "${ZFS}" 'all' '^$' printf '\t%s\n' "${ZFS}"
}
function print_filesystems () {
ZPOOL=$1 for ZFS in $(${ZFS_CMD} list -Ho name -t filesystem -r ${ZPOOL}) do if [ ${ZFS} == ${ZPOOL} ] ; then continue ; fi printf '#\n## Filesystem: %s\n#\n\n' "${ZFS}" print_filesystem ${ZFS} printf '\n' done
}
function print_volume () {
ZVOL=$1 volsize=$(${ZFS_CMD} get -Ho value volsize ${ZVOL}) volblocksize=$(${ZFS_CMD} get -Ho value volblocksize ${ZVOL}) printf '%s create \\\n\t-V %s \\\n\t-b %s \\\n' "${ZFS_CMD}" "${volsize}" "${volblocksize}" print_local_options "${ZVOL}" 'all' '(volsize|refreservation)' printf '\t%s\n' "${ZVOL}"
}
function print_volumes () {
ZPOOL=$1 for ZVOL in $(${ZFS_CMD} list -Ho name -t volume -r ${ZPOOL}) do printf '#\n## Volume: %s\n#\n\n' "${ZVOL}" print_volume ${ZVOL} printf '\n' done
}
function print_vdevs () {
ZPOOL=$1 ${ZDB_CMD} -C ${ZPOOL} | ${AWK_CMD} -F':' ' $1 ~ /^space:*type$/ { gsub(/[ ]+/,"",$NF); type=substr($NF,2,length($NF)-2); if ( type == "mirror" ) { printf " \\\n\t%s",type; } } $1 ~ /^space:*path$/ { gsub(/[ ]+/,"",$NF); vdev=substr($NF,2,length($NF)-2); printf " \\\n\t%s",vdev; } END { printf "\n"; } '
}
function print_zpool () {
ZPOOL=$1 printf '#############################################################\n' printf '#\n## ZPool: %s\n#\n' "${ZPOOL}" printf '#############################################################\n\n'
printf '%s create \\\n' "${ZPOOL_CMD}" print_local_options "${ZPOOL}" 'all' '/@/' printf '\t%s' "${ZPOOL}" print_vdevs "${ZPOOL}" printf '\n'
printf '#############################################################\n\n' print_filesystems "${ZPOOL}" print_volumes "${ZPOOL}"
}
OS=$(uname -s) eval $(uname -s)=1 HOSTNAME=$(hostname)
printf '#############################################################\n' printf '# Hostname: %s\n' "${HOSTNAME}" printf '#############################################################\n\n' for ZPOOL in $(${ZPOOL_CMD} list -Ho name) do
print_zpool ${ZPOOL}
done </source>