ZFS on Linux: Difference between revisions
(26 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
[[ | [[Category:Linux|ZFS]] | ||
[[ | [[Category:ZFS|Linux]] | ||
[[ | [[Category:VirtualBox|ZFS]] | ||
==Grub== | ==Grub== | ||
Create /etc/udev/rules.d/99-local-grub.rules with this content: | Create /etc/udev/rules.d/99-local-grub.rules with this content: | ||
< | <syntaxhighlight lang=bash> | ||
# Create by-id links in /dev as well for zfs vdev. Needed by grub | # Create by-id links in /dev as well for zfs vdev. Needed by grub | ||
# Add links for zfs_member only | # Add links for zfs_member only | ||
KERNEL=="sd*[0-9]", IMPORT{parent}=="ID_*", ENV{ID_FS_TYPE}=="zfs_member", SYMLINK+="$env{ID_BUS}-$env{ID_SERIAL}-part%n" | KERNEL=="sd*[0-9]", IMPORT{parent}=="ID_*", ENV{ID_FS_TYPE}=="zfs_member", SYMLINK+="$env{ID_BUS}-$env{ID_SERIAL}-part%n" | ||
</ | </syntaxhighlight> | ||
==Virtualbox on ZVols== | ==Virtualbox on ZVols== | ||
If you use ZVols as rawvmdk-device in VirtualBox as normal user (vmuser in this example) create /etc/udev/rules.d/99-local-zvol.rules with this content: | If you use ZVols as rawvmdk-device in VirtualBox as normal user (vmuser in this example) create /etc/udev/rules.d/99-local-zvol.rules with this content: | ||
< | <syntaxhighlight lang=bash> | ||
KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="/lib/udev/zvol_id /dev/%k" RESULT=="rpool/VM/*" OWNER="vmuser" | KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="/lib/udev/zvol_id /dev/%k" RESULT=="rpool/VM/*" OWNER="vmuser" | ||
</ | </syntaxhighlight> | ||
< | <syntaxhighlight lang=bash> | ||
vmuser@virtualbox-server:~$ VBoxManage internalcommands createrawvmdk -filename /var/data/VMs/dev/Solaris10.vmdk -rawdisk /dev/zvol/rpool/VM/Solaris10 | vmuser@virtualbox-server:~$ VBoxManage internalcommands createrawvmdk -filename /var/data/VMs/dev/Solaris10.vmdk -rawdisk /dev/zvol/rpool/VM/Solaris10 | ||
</ | </syntaxhighlight> | ||
==Setup Ubuntu 16.04 with ZFS root== | ==Setup Ubuntu 16.04 with ZFS root== | ||
Line 26: | Line 26: | ||
Boot Ubuntu Desktop (alias Live CD) and choose "try out". | Boot Ubuntu Desktop (alias Live CD) and choose "try out". | ||
===Get the right ashift value=== | |||
For example to get sda and sdb: | |||
<syntaxhighlight lang=bash> | |||
# lsblk -o NAME,PHY-SeC,LOG-SEC /dev/sd{a,b} | awk 'function exponent (value) {for(i=0;value>1;i++){value/=2;}; return i;}{if($2 ~ /[0-9]+/){print $0,exponent($2)}else{print$0,"ashift"}}' | |||
NAME PHY-SEC LOG-SEC ashift | |||
sda 512 512 9 | |||
├─sda1 512 512 9 | |||
├─sda2 512 512 9 | |||
├─sda3 512 512 9 | |||
└─sda4 512 512 9 | |||
sdb 4096 512 12 | |||
├─sdb1 4096 512 12 | |||
├─sdb2 4096 512 12 | |||
├─sdb3 4096 512 12 | |||
└─sdb4 4096 512 12 | |||
</syntaxhighlight> | |||
===Connect it to your network=== | ===Connect it to your network=== | ||
< | <syntaxhighlight lang=bash> | ||
sudo -i | sudo -i | ||
ifconfig ens160 <IP> netmask 255.255.255.0 | ifconfig ens160 <IP> netmask 255.255.255.0 | ||
Line 141: | Line 159: | ||
swapon -av | swapon -av | ||
</ | </syntaxhighlight> | ||
==Swap on ZFS with random key encryption== | |||
<syntaxhighlight lang=bash> | |||
$ sudo systemctl edit --force --full zfs-cryptswap@.service | |||
</syntaxhighlight> | |||
<syntaxhighlight lang=ini> | |||
# /etc/systemd/system/zfs-cryptswap@.service | |||
[Unit] | |||
Description=ZFS Random Cryptography Setup for %I | |||
Documentation=man:zfs(8) | |||
DefaultDependencies=no | |||
Conflicts=umount.target | |||
IgnoreOnIsolate=true | |||
After=systemd-random-seed.service zfs-volumes.target | |||
BindsTo=dev-zvol-rpool-%i.device | |||
Before=umount.target | |||
[Service] | |||
Type=oneshot | |||
RemainAfterExit=yes | |||
TimeoutSec=0 | |||
KeyringMode=shared | |||
OOMScoreAdjust=500 | |||
UMask=0077 | |||
RuntimeDirectory=zfs-cryptswap.%i | |||
RuntimeDirectoryMode=0700 | |||
ExecStartPre=-/sbin/swapoff '/dev/zvol/rpool/%i' | |||
ExecStartPre=-/sbin/zfs destroy 'rpool/%i' | |||
ExecStartPre=/bin/dd if=/dev/urandom of=/run/zfs-cryptswap.%i/%i.key bs=32 count=1 | |||
ExecStart=/sbin/zfs create -V 4G -b 8k -o compression=zle -o logbias=throughput -o sync=always -o primarycache=metadata -o secondarycache=none -o com.sun:auto-snapshot=false -o encryption=on -o keyformat=raw -o keylocation=file:///run/zfs-cryptswap.%i/%i.key rpool/%i | |||
ExecStart=/bin/sleep 1 | |||
ExecStartPost=/sbin/mkswap '/dev/zvol/rpool/%i' | |||
ExecStartPost=/sbin/swapon '/dev/zvol/rpool/%i' | |||
ExecStop=/sbin/swapoff '/dev/zvol/rpool/%i' | |||
ExecStop=/bin/sleep 2 | |||
ExecStopPost=/sbin/zfs destroy 'rpool/%i' | |||
[Install] | |||
WantedBy=swap.target | |||
</syntaxhighlight> | |||
!!!BE CAREFUL with the name after @ !!! | |||
The name after the @ is the name of the ZFS that will be DESTROYED and recreated!!! | |||
To destroy and recreate an encrypted ZFS volume named cryptswap use: | |||
<syntaxhighlight lang=bash> | |||
# systemctl start zfs-cryptswap@cryptswap.service | |||
# systemctl enable zfs-cryptswap@cryptswap.service | |||
# update-initramfs -k $(uname -i) -u | |||
</syntaxhighlight> | |||
==Kernel settings for ZFS== | |||
=== Set module parameter in /etc/modprobe.d/zfs.conf=== | |||
<syntaxhighlight lang=bash> | |||
options zfs zfs_arc_max=10737418240 | |||
# increase them so scrub/resilver is more quickly at the cost of other work | |||
options zfs zfs_vdev_scrub_min_active=24 | |||
options zfs zfs_vdev_scrub_max_active=64 | |||
# sync write | |||
options zfs zfs_vdev_sync_write_min_active=8 | |||
options zfs zfs_vdev_sync_write_max_active=32 | |||
# sync reads (normal) | |||
options zfs zfs_vdev_sync_read_min_active=8 | |||
options zfs zfs_vdev_sync_read_max_active=32 | |||
# async reads : prefetcher | |||
options zfs zfs_vdev_async_read_min_active=8 | |||
options zfs zfs_vdev_async_read_max_active=32 | |||
# async write : bulk writes | |||
options zfs zfs_vdev_async_write_min_active=8 | |||
options zfs zfs_vdev_async_write_max_active=32 | |||
# max write speed to l2arc | |||
# tradeoff between write/read and durability of ssd (?) | |||
# default : 8 * 1024 * 1024 | |||
# setting here : 500 * 1024 * 1024 | |||
options zfs l2arc_write_max=524288000 | |||
options zfs zfs_top_maxinflight=512 | |||
options zfs zfs_resilver_min_time_ms=8000 | |||
options zfs zfs_resilver_delay=0 | |||
</syntaxhighlight> | |||
Remember to update your initramfs before boot. This is the filesystem which is read when your module is loaded. | |||
<syntaxhighlight lang=bash> | |||
# update-initramfs -k all -u | |||
</syntaxhighlight> | |||
=== Check settings === | |||
<syntaxhighlight lang=bash> | |||
root@zfshost:~# modprobe -c | grep "options zfs" | |||
options zfs zfs_arc_max=10737418240 | |||
options zfs zfs_vdev_scrub_min_active=24 | |||
options zfs zfs_vdev_scrub_max_active=64 | |||
options zfs zfs_vdev_sync_write_min_active=8 | |||
options zfs zfs_vdev_sync_write_max_active=32 | |||
options zfs zfs_vdev_sync_read_min_active=8 | |||
options zfs zfs_vdev_sync_read_max_active=32 | |||
options zfs zfs_vdev_async_read_min_active=8 | |||
options zfs zfs_vdev_async_read_max_active=32 | |||
options zfs zfs_vdev_async_write_min_active=8 | |||
options zfs zfs_vdev_async_write_max_active=32 | |||
options zfs l2arc_write_max=524288000 | |||
options zfs zfs_top_maxinflight=512 | |||
options zfs zfs_resilver_min_time_ms=8000 | |||
options zfs zfs_resilver_delay=0 | |||
</syntaxhighlight> | |||
<syntaxhighlight lang=bash> | |||
root@zfshost:~# modprobe --show-depends zfs | |||
insmod /lib/modules/4.15.0-58-generic/kernel/spl/spl.ko | |||
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/znvpair.ko | |||
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zcommon.ko | |||
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/icp.ko | |||
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zavl.ko | |||
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zunicode.ko | |||
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zfs.ko zfs_arc_max=10737418240 zfs_vdev_scrub_min_active=24 zfs_vdev_scrub_max_active=64 zfs_vdev_sync_write_min_active=8 zfs_vdev_sync_write_max_active=32 zfs_vdev_sync_read_min_active=8 zfs_vdev_sync_read_max_active=32 zfs_vdev_async_read_min_active=8 zfs_vdev_async_read_max_active=32 zfs_vdev_async_write_min_active=8 zfs_vdev_async_write_max_active=32 l2arc_write_max=524288000 zfs_top_maxinflight=512 zfs_resilver_min_time_ms=8000 zfs_resilver_delay=0 | |||
</syntaxhighlight> | |||
=== Check actual settings === | |||
Check files in | |||
* /proc/spl/kstat/zfs/ | |||
* /sys/module/zfs/parameters/ | |||
==ARC Cache== | |||
===Get the current usage of cache=== | |||
<syntaxhighlight lang=bash> | |||
# cat /proc/spl/kstat/zfs/arcstats |grep c_ | |||
c_min 4 521779200 | |||
c_max 4 1073741824 | |||
arc_no_grow 4 0 | |||
arc_tempreserve 4 0 | |||
arc_loaned_bytes 4 0 | |||
arc_prune 4 25360 | |||
arc_meta_used 4 493285336 | |||
arc_meta_limit 4 805306368 | |||
arc_dnode_limit 4 80530636 | |||
arc_meta_max 4 706551816 | |||
arc_meta_min 4 16777216 | |||
sync_wait_for_async 4 357 | |||
arc_need_free 4 0 | |||
arc_sys_free 4 260889600 | |||
</syntaxhighlight> | |||
===Limit the cache without reboot non permanent=== | |||
For example limit it to 512MB (which is too small for production environments, just an example...): | |||
<syntaxhighlight lang=bash> | |||
# echo "$[512*1024*1024]" > /sys/module/zfs/parameters/zfs_arc_max | |||
</syntaxhighlight> | |||
Now you have to drop the caches: | |||
<syntaxhighlight lang=bash> | |||
# echo 3 > /proc/sys/vm/drop_caches | |||
</syntaxhighlight> | |||
===Make the cache limit permanent=== | |||
For example limit it to 512MB (which is too small for production environments, just an example...): | |||
<syntaxhighlight lang=bash> | |||
# echo "options zfs zfs_arc_max=$[512*1024*1024]" >> /etc/modprobe.d/zfs.conf | |||
</syntaxhighlight> | |||
After reboot this value take effect. | |||
===Check cache hits/misses=== | |||
<syntaxhighlight lang=bash> | |||
# (while : ; do cat /proc/spl/kstat/zfs/arcstats ; sleep 5 ; done ) | awk ' | |||
BEGIN { | |||
} | |||
$1 ~ /(hits|misses)/ { | |||
name=$1; | |||
gsub(/[_]*(hits|misses)/,"",name); | |||
if(name == ""){ | |||
name="global"; | |||
} | |||
} | |||
$1 ~ /hits/ { | |||
hits[name] = $3 - hitslast[name] | |||
hitslast[name] = $3 | |||
} | |||
$1 ~ /misses/ { | |||
misses[name] = $3 - misslast[name] | |||
misslast[name] = $3 | |||
rate = 0 | |||
total = hits[name] + misses[name] | |||
if (total) | |||
rate = (hits[name] * 100) / total | |||
if (name=="global") | |||
printf "%30s %12s %12s %9s\n", "NAME", "HITS", "MISSES", "HITRATE" | |||
printf "%30s %12d %12d %8.2f%%\n", name, hits[name], misses[name], rate | |||
} | |||
' | |||
</syntaxhighlight> | |||
==Higher scrub performance== | |||
<syntaxhighlight lang=bash highlight=3-5> | |||
#!/bin/bash | |||
# | |||
## scrub_fast.sh | |||
# | |||
case $1 in | |||
start) | |||
echo 0 > /sys/module/zfs/parameters/zfs_scan_idle | |||
echo 0 > /sys/module/zfs/parameters/zfs_scrub_delay | |||
echo 512 > /sys/module/zfs/parameters/zfs_top_maxinflight | |||
echo 5000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms | |||
echo 4 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active | |||
echo 8 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active | |||
;; | |||
stop) | |||
echo 50 > /sys/module/zfs/parameters/zfs_scan_idle | |||
echo 4 > /sys/module/zfs/parameters/zfs_scrub_delay | |||
echo 32 > /sys/module/zfs/parameters/zfs_top_maxinflight | |||
echo 1000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms | |||
echo 1 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active | |||
echo 2 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active | |||
;; | |||
status) | |||
for i in zfs_scan_idle zfs_scrub_delay zfs_top_maxinflight zfs_scan_min_time_ms zfs_vdev_scrub_{min,max}_active | |||
do | |||
param="/sys/module/zfs/parameters/${i}" | |||
printf "%60s\t%d\n" "${param}" "$(cat ${param})" | |||
done | |||
;; | |||
*) | |||
echo "Usage: ${0} (start|stop|status)" | |||
;; | |||
esac | |||
</syntaxhighlight> | |||
==More information on zpool status== | |||
<SyntaxHighlight lang=bash highlight=3-5> | |||
#!/bin/bash | |||
# | |||
## print_zpool.sh | |||
# | |||
# Written by Lars Timmann <L@rs.Timmann.de> 2022 | |||
columns=5 # number of columns for zpool status | |||
if [ ${#} -gt 0 ] && [ ${1} == "iostat" ] | |||
then | |||
command="iostat -v" | |||
columns=7 | |||
shift | |||
fi | |||
stdbuf --output=L zpool ${command:-status} -P ${*} | awk -v columns=${columns} ' | |||
BEGIN { | |||
command="lsscsi --scsi_id"; | |||
while( command | getline lsscsi ) { | |||
count=split(lsscsi,fields); | |||
dev=fields[count-1]; | |||
scsi_id[dev]=fields[1]; | |||
} | |||
close(command); | |||
command="ls -Ul /dev/disk/by-id/*"; | |||
while( command | getline ) { | |||
dev=$NF; | |||
gsub(/[\.\/]/,"",dev); | |||
dev_id=$(NF-2); | |||
device[dev_id]="/dev/"dev; | |||
} | |||
close(command); | |||
} | |||
$1 ~ /\/dev\// { | |||
line=$0; | |||
dev_by_id=$1; | |||
dev_no_part=dev_by_id; | |||
gsub(/(-part|)[0-9]+$/,"",dev_no_part); | |||
if( NF > 5) { | |||
count=split(line,a,FS,seps); | |||
line=seps[0]; | |||
for(i=1;i<columns;i++){ | |||
line=line a[i] seps[i]; | |||
} | |||
line=line a[columns]; | |||
for(i=columns+1;i<=count;i++){ | |||
rest=rest a[i] seps[i]; | |||
} | |||
} | |||
printf("%s %s %s",line,scsi_id[device[dev_no_part]],device[dev_by_id]); | |||
if(rest!=""){ | |||
printf(" %s",rest); | |||
rest=""; | |||
} | |||
printf("\n"); | |||
next; | |||
} | |||
/^errors:/ { | |||
print; | |||
fflush(); | |||
next; | |||
} | |||
{ | |||
print; | |||
}' | |||
</SyntaxHighlight> | |||
==Backup ZFS settings== | |||
A little script which may be used on your own risk. | |||
<syntaxhighlight lang=bash> | |||
#!/bin/bash | |||
# Written by Lars Timmann <L@rs.Timmann.de> 2018 | |||
# Tested on solaris 11.3 & Ubuntu Linux | |||
# This script is a rotten bunch of code... rewrite it! | |||
AWK_CMD=/usr/bin/gawk | |||
ZPOOL_CMD=/sbin/zpool | |||
ZFS_CMD=/sbin/zfs | |||
ZDB_CMD=/sbin/zdb | |||
function print_local_options () { | |||
DATASET=$1 | |||
OPTION=$2 | |||
EXCLUDE_REGEX=$3 | |||
${ZFS_CMD} get -s local -Ho property,value -p ${OPTION} ${DATASET} | while read -r property value | |||
do | |||
if [[ ! ${property} =~ ${EXCLUDE_REGEX} ]] | |||
then | |||
if [ "_${property}_" == "_share.*_" ] | |||
then | |||
print_local_options "${DATASET}" 'share.all' '^$' | |||
else | |||
printf '\t-o %s=%s \\\n' "${property}" "${value}" | |||
fi | |||
fi | |||
done | |||
} | |||
function print_filesystem () { | |||
ZFS=$1 | |||
printf '%s create \\\n' "${ZFS_CMD}" | |||
print_local_options "${ZFS}" 'all' '^$' | |||
printf '\t%s\n' "${ZFS}" | |||
} | |||
function print_filesystems () { | |||
ZPOOL=$1 | |||
for ZFS in $(${ZFS_CMD} list -Ho name -t filesystem -r ${ZPOOL}) | |||
do | |||
if [ ${ZFS} == ${ZPOOL} ] ; then continue ; fi | |||
printf '#\n## Filesystem: %s\n#\n\n' "${ZFS}" | |||
print_filesystem ${ZFS} | |||
printf '\n' | |||
done | |||
} | |||
function print_volume () { | |||
ZVOL=$1 | |||
volsize=$(${ZFS_CMD} get -Ho value volsize ${ZVOL}) | |||
volblocksize=$(${ZFS_CMD} get -Ho value volblocksize ${ZVOL}) | |||
printf '%s create \\\n\t-V %s \\\n\t-b %s \\\n' "${ZFS_CMD}" "${volsize}" "${volblocksize}" | |||
print_local_options "${ZVOL}" 'all' '(volsize|refreservation)' | |||
printf '\t%s\n' "${ZVOL}" | |||
} | |||
function print_volumes () { | |||
ZPOOL=$1 | |||
for ZVOL in $(${ZFS_CMD} list -Ho name -t volume -r ${ZPOOL}) | |||
do | |||
printf '#\n## Volume: %s\n#\n\n' "${ZVOL}" | |||
print_volume ${ZVOL} | |||
printf '\n' | |||
done | |||
} | |||
function print_vdevs () { | |||
ZPOOL=$1 | |||
${ZDB_CMD} -C ${ZPOOL} | ${AWK_CMD} -F':' ' | |||
$1 ~ /^[[:space:]]*type$/ { | |||
gsub(/[ ]+/,"",$NF); | |||
type=substr($NF,2,length($NF)-2); | |||
if ( type == "mirror" ) { | |||
printf " \\\n\t%s",type; | |||
} | |||
} | |||
$1 ~ /^[[:space:]]*path$/ { | |||
gsub(/[ ]+/,"",$NF); | |||
vdev=substr($NF,2,length($NF)-2); | |||
printf " \\\n\t%s",vdev; | |||
} | |||
END { | |||
printf "\n"; | |||
} | |||
' | |||
} | |||
function print_zpool () { | |||
ZPOOL=$1 | |||
printf '#############################################################\n' | |||
printf '#\n## ZPool: %s\n#\n' "${ZPOOL}" | |||
printf '#############################################################\n\n' | |||
printf '%s create \\\n' "${ZPOOL_CMD}" | |||
print_local_options "${ZPOOL}" 'all' '/@/' | |||
printf '\t%s' "${ZPOOL}" | |||
print_vdevs "${ZPOOL}" | |||
printf '\n' | |||
printf '#############################################################\n\n' | |||
print_filesystems "${ZPOOL}" | |||
print_volumes "${ZPOOL}" | |||
} | |||
OS=$(uname -s) | |||
eval $(uname -s)=1 | |||
HOSTNAME=$(hostname) | |||
printf '#############################################################\n' | |||
printf '# Hostname: %s\n' "${HOSTNAME}" | |||
printf '#############################################################\n\n' | |||
for ZPOOL in $(${ZPOOL_CMD} list -Ho name) | |||
do | |||
print_zpool ${ZPOOL} | |||
done | |||
</syntaxhighlight> | |||
==Links== | ==Links== | ||
* [[https://github.com/zfsonlinux/pkg-zfs/wiki/HOWTO-install-Ubuntu-16.04-to-a-Whole-Disk-Native-ZFS-Root-Filesystem-using-Ubiquity-GUI-installer HOWTO install Ubuntu 16.04 to a Whole Disk Native ZFS Root Filesystem using Ubiquity GUI installer]] | * [[https://github.com/zfsonlinux/pkg-zfs/wiki/HOWTO-install-Ubuntu-16.04-to-a-Whole-Disk-Native-ZFS-Root-Filesystem-using-Ubiquity-GUI-installer HOWTO install Ubuntu 16.04 to a Whole Disk Native ZFS Root Filesystem using Ubiquity GUI installer]] | ||
* [[https://github.com/zfsonlinux/zfs/wiki/Ubuntu-16.04-Root-on-ZFS Ubuntu 16.04 Root on ZFS]] | * [[https://github.com/zfsonlinux/zfs/wiki/Ubuntu-16.04-Root-on-ZFS Ubuntu 16.04 Root on ZFS]] |
Latest revision as of 15:54, 22 June 2023
Grub
Create /etc/udev/rules.d/99-local-grub.rules with this content:
# Create by-id links in /dev as well for zfs vdev. Needed by grub
# Add links for zfs_member only
KERNEL=="sd*[0-9]", IMPORT{parent}=="ID_*", ENV{ID_FS_TYPE}=="zfs_member", SYMLINK+="$env{ID_BUS}-$env{ID_SERIAL}-part%n"
Virtualbox on ZVols
If you use ZVols as rawvmdk-device in VirtualBox as normal user (vmuser in this example) create /etc/udev/rules.d/99-local-zvol.rules with this content:
KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="/lib/udev/zvol_id /dev/%k" RESULT=="rpool/VM/*" OWNER="vmuser"
vmuser@virtualbox-server:~$ VBoxManage internalcommands createrawvmdk -filename /var/data/VMs/dev/Solaris10.vmdk -rawdisk /dev/zvol/rpool/VM/Solaris10
Setup Ubuntu 16.04 with ZFS root
Most is from here Ubuntu-16.04-Root-on-ZFS.
Boot Ubuntu Desktop (alias Live CD) and choose "try out".
Get the right ashift value
For example to get sda and sdb:
# lsblk -o NAME,PHY-SeC,LOG-SEC /dev/sd{a,b} | awk 'function exponent (value) {for(i=0;value>1;i++){value/=2;}; return i;}{if($2 ~ /[0-9]+/){print $0,exponent($2)}else{print$0,"ashift"}}'
NAME PHY-SEC LOG-SEC ashift
sda 512 512 9
├─sda1 512 512 9
├─sda2 512 512 9
├─sda3 512 512 9
└─sda4 512 512 9
sdb 4096 512 12
├─sdb1 4096 512 12
├─sdb2 4096 512 12
├─sdb3 4096 512 12
└─sdb4 4096 512 12
Connect it to your network
sudo -i
ifconfig ens160 <IP> netmask 255.255.255.0
route add default gw <defaultrouter>
echo "nameserver <nameserver>" >> /etc/resolv.conf
echo 'Acquire::http::Proxy "http://<user>:<pass>@<proxyhost>:<proxyport>";' >> /etc/apt/apt.conf
apt-add-repository universe
apt update
apt --yes install openssh-server
passwd ubuntu
Reconnect via ssh
apt install --yes debootstrap gdisk zfs-initramfs
sgdisk -g -a1 -n2:34:2047 -t2:EF02 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
sgdisk -n9:-8M:0 -t9:BF07 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
sgdisk -n1:0:0 -t1:BF01 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
zpool create -f -o ashift=12 \
-O atime=off \
-O canmount=off \
-O compression=lz4 \
-O normalization=formD \
-O mountpoint=/ \
-R /mnt \
rpool /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4-part1
zfs create -o canmount=off -o mountpoint=none rpool/ROOT
zfs create -o canmount=noauto -o mountpoint=/ rpool/ROOT/ubuntu
zfs mount rpool/ROOT/ubuntu
zfs create -o setuid=off rpool/home
zfs create -o mountpoint=/root rpool/home/root
zfs create -o canmount=off -o setuid=off -o exec=off rpool/var
zfs create -o com.sun:auto-snapshot=false rpool/var/cache
zfs create rpool/var/log
zfs create rpool/var/spool
zfs create -o com.sun:auto-snapshot=false -o exec=on rpool/var/tmp
zfs create -V 4G -b $(getconf PAGESIZE) -o compression=zle \
-o logbias=throughput -o sync=always \
-o primarycache=metadata -o secondarycache=none \
-o com.sun:auto-snapshot=false rpool/swap
cp -p {,/mnt}/etc/apt/apt.conf
export http_proxy=$(awk '/Acquire::http::Proxy/{gsub(/\"/,"");gsub(/;$/,"");print $2}' /mnt/etc/apt/apt.conf)
echo -n xenial{,-security,-updates} | \
xargs -n 1 -d ' ' -I{} echo "deb http://archive.ubuntu.com/ubuntu {} main universe" > /mnt/etc/apt/sources.list
chmod 1777 /mnt/var/tmp
debootstrap xenial /mnt
zfs set devices=off rpool
HOSTNAME=Template-VM
echo ${HOSTNAME} > /mnt/etc/hostname
printf "127.0.1.1\t%s\n" "${HOSTNAME}" >> /mnt/etc/hosts
INTERFACE=$(ip a s scope global | awk 'NR==1{gsub(/:$/,"",$2);print $2;}')
printf "auto %s\niface %s inet dhcp\n" "${INTERFACE}" "${INTERFACE}" > /mnt/etc/network/interfaces.d/${INTERFACE}
mount --rbind /dev /mnt/dev
mount --rbind /proc /mnt/proc
mount --rbind /sys /mnt/sys
cp -p {,/mnt}/etc/apt/apt.conf
echo -n xenial{,-security,-updates} | \
xargs -n 1 -d ' ' -I{} echo "deb http://archive.ubuntu.com/ubuntu {} main universe" > /mnt/etc/apt/sources.list
chroot /mnt /bin/bash --login
locale-gen en_US.UTF-8
echo 'LANG="en_US.UTF-8"' > /etc/default/locale
LANG="en_US.UTF-8"
dpkg-reconfigure tzdata
ln -s /proc/self/mounts /etc/mtab
apt update
apt install --yes ubuntu-minimal
apt install --yes --no-install-recommends linux-image-generic
apt install --yes zfs-initramfs
apt install --yes openssh-server
apt install --yes grub-pc
addgroup --system lpadmin
addgroup --system sambashare
passwd
grub-probe /
update-initramfs -c -k all
vi /etc/default/grub
Comment out: GRUB_HIDDEN_TIMEOUT=0
Remove quiet and splash from: GRUB_CMDLINE_LINUX_DEFAULT
Uncomment: GRUB_TERMINAL=console
update-grub
grub-install /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
zfs snapshot rpool/ROOT/ubuntu@install
exit
mount | grep -v zfs | tac | awk '/\/mnt/ {print $3}' | xargs -i{} umount -lf {}
zpool export rpool
reboot
apt install --yes cryptsetup
echo cryptswap1 /dev/zvol/rpool/swap /dev/urandom swap,cipher=aes-xts-plain64:sha256,size=256 >> /etc/crypttab
systemctl daemon-reload
systemctl start systemd-cryptsetup@cryptswap1.service
echo /dev/mapper/cryptswap1 none swap defaults 0 0 >> /etc/fstab
swapon -av
Swap on ZFS with random key encryption
$ sudo systemctl edit --force --full zfs-cryptswap@.service
# /etc/systemd/system/zfs-cryptswap@.service
[Unit]
Description=ZFS Random Cryptography Setup for %I
Documentation=man:zfs(8)
DefaultDependencies=no
Conflicts=umount.target
IgnoreOnIsolate=true
After=systemd-random-seed.service zfs-volumes.target
BindsTo=dev-zvol-rpool-%i.device
Before=umount.target
[Service]
Type=oneshot
RemainAfterExit=yes
TimeoutSec=0
KeyringMode=shared
OOMScoreAdjust=500
UMask=0077
RuntimeDirectory=zfs-cryptswap.%i
RuntimeDirectoryMode=0700
ExecStartPre=-/sbin/swapoff '/dev/zvol/rpool/%i'
ExecStartPre=-/sbin/zfs destroy 'rpool/%i'
ExecStartPre=/bin/dd if=/dev/urandom of=/run/zfs-cryptswap.%i/%i.key bs=32 count=1
ExecStart=/sbin/zfs create -V 4G -b 8k -o compression=zle -o logbias=throughput -o sync=always -o primarycache=metadata -o secondarycache=none -o com.sun:auto-snapshot=false -o encryption=on -o keyformat=raw -o keylocation=file:///run/zfs-cryptswap.%i/%i.key rpool/%i
ExecStart=/bin/sleep 1
ExecStartPost=/sbin/mkswap '/dev/zvol/rpool/%i'
ExecStartPost=/sbin/swapon '/dev/zvol/rpool/%i'
ExecStop=/sbin/swapoff '/dev/zvol/rpool/%i'
ExecStop=/bin/sleep 2
ExecStopPost=/sbin/zfs destroy 'rpool/%i'
[Install]
WantedBy=swap.target
!!!BE CAREFUL with the name after @ !!!
The name after the @ is the name of the ZFS that will be DESTROYED and recreated!!!
To destroy and recreate an encrypted ZFS volume named cryptswap use:
# systemctl start zfs-cryptswap@cryptswap.service
# systemctl enable zfs-cryptswap@cryptswap.service
# update-initramfs -k $(uname -i) -u
Kernel settings for ZFS
Set module parameter in /etc/modprobe.d/zfs.conf
options zfs zfs_arc_max=10737418240
# increase them so scrub/resilver is more quickly at the cost of other work
options zfs zfs_vdev_scrub_min_active=24
options zfs zfs_vdev_scrub_max_active=64
# sync write
options zfs zfs_vdev_sync_write_min_active=8
options zfs zfs_vdev_sync_write_max_active=32
# sync reads (normal)
options zfs zfs_vdev_sync_read_min_active=8
options zfs zfs_vdev_sync_read_max_active=32
# async reads : prefetcher
options zfs zfs_vdev_async_read_min_active=8
options zfs zfs_vdev_async_read_max_active=32
# async write : bulk writes
options zfs zfs_vdev_async_write_min_active=8
options zfs zfs_vdev_async_write_max_active=32
# max write speed to l2arc
# tradeoff between write/read and durability of ssd (?)
# default : 8 * 1024 * 1024
# setting here : 500 * 1024 * 1024
options zfs l2arc_write_max=524288000
options zfs zfs_top_maxinflight=512
options zfs zfs_resilver_min_time_ms=8000
options zfs zfs_resilver_delay=0
Remember to update your initramfs before boot. This is the filesystem which is read when your module is loaded.
# update-initramfs -k all -u
Check settings
root@zfshost:~# modprobe -c | grep "options zfs"
options zfs zfs_arc_max=10737418240
options zfs zfs_vdev_scrub_min_active=24
options zfs zfs_vdev_scrub_max_active=64
options zfs zfs_vdev_sync_write_min_active=8
options zfs zfs_vdev_sync_write_max_active=32
options zfs zfs_vdev_sync_read_min_active=8
options zfs zfs_vdev_sync_read_max_active=32
options zfs zfs_vdev_async_read_min_active=8
options zfs zfs_vdev_async_read_max_active=32
options zfs zfs_vdev_async_write_min_active=8
options zfs zfs_vdev_async_write_max_active=32
options zfs l2arc_write_max=524288000
options zfs zfs_top_maxinflight=512
options zfs zfs_resilver_min_time_ms=8000
options zfs zfs_resilver_delay=0
root@zfshost:~# modprobe --show-depends zfs
insmod /lib/modules/4.15.0-58-generic/kernel/spl/spl.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/znvpair.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zcommon.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/icp.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zavl.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zunicode.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zfs.ko zfs_arc_max=10737418240 zfs_vdev_scrub_min_active=24 zfs_vdev_scrub_max_active=64 zfs_vdev_sync_write_min_active=8 zfs_vdev_sync_write_max_active=32 zfs_vdev_sync_read_min_active=8 zfs_vdev_sync_read_max_active=32 zfs_vdev_async_read_min_active=8 zfs_vdev_async_read_max_active=32 zfs_vdev_async_write_min_active=8 zfs_vdev_async_write_max_active=32 l2arc_write_max=524288000 zfs_top_maxinflight=512 zfs_resilver_min_time_ms=8000 zfs_resilver_delay=0
Check actual settings
Check files in
- /proc/spl/kstat/zfs/
- /sys/module/zfs/parameters/
ARC Cache
Get the current usage of cache
# cat /proc/spl/kstat/zfs/arcstats |grep c_
c_min 4 521779200
c_max 4 1073741824
arc_no_grow 4 0
arc_tempreserve 4 0
arc_loaned_bytes 4 0
arc_prune 4 25360
arc_meta_used 4 493285336
arc_meta_limit 4 805306368
arc_dnode_limit 4 80530636
arc_meta_max 4 706551816
arc_meta_min 4 16777216
sync_wait_for_async 4 357
arc_need_free 4 0
arc_sys_free 4 260889600
Limit the cache without reboot non permanent
For example limit it to 512MB (which is too small for production environments, just an example...):
# echo "$[512*1024*1024]" > /sys/module/zfs/parameters/zfs_arc_max
Now you have to drop the caches:
# echo 3 > /proc/sys/vm/drop_caches
Make the cache limit permanent
For example limit it to 512MB (which is too small for production environments, just an example...):
# echo "options zfs zfs_arc_max=$[512*1024*1024]" >> /etc/modprobe.d/zfs.conf
After reboot this value take effect.
Check cache hits/misses
# (while : ; do cat /proc/spl/kstat/zfs/arcstats ; sleep 5 ; done ) | awk '
BEGIN {
}
$1 ~ /(hits|misses)/ {
name=$1;
gsub(/[_]*(hits|misses)/,"",name);
if(name == ""){
name="global";
}
}
$1 ~ /hits/ {
hits[name] = $3 - hitslast[name]
hitslast[name] = $3
}
$1 ~ /misses/ {
misses[name] = $3 - misslast[name]
misslast[name] = $3
rate = 0
total = hits[name] + misses[name]
if (total)
rate = (hits[name] * 100) / total
if (name=="global")
printf "%30s %12s %12s %9s\n", "NAME", "HITS", "MISSES", "HITRATE"
printf "%30s %12d %12d %8.2f%%\n", name, hits[name], misses[name], rate
}
'
Higher scrub performance
#!/bin/bash
#
## scrub_fast.sh
#
case $1 in
start)
echo 0 > /sys/module/zfs/parameters/zfs_scan_idle
echo 0 > /sys/module/zfs/parameters/zfs_scrub_delay
echo 512 > /sys/module/zfs/parameters/zfs_top_maxinflight
echo 5000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms
echo 4 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active
echo 8 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active
;;
stop)
echo 50 > /sys/module/zfs/parameters/zfs_scan_idle
echo 4 > /sys/module/zfs/parameters/zfs_scrub_delay
echo 32 > /sys/module/zfs/parameters/zfs_top_maxinflight
echo 1000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms
echo 1 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active
echo 2 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active
;;
status)
for i in zfs_scan_idle zfs_scrub_delay zfs_top_maxinflight zfs_scan_min_time_ms zfs_vdev_scrub_{min,max}_active
do
param="/sys/module/zfs/parameters/${i}"
printf "%60s\t%d\n" "${param}" "$(cat ${param})"
done
;;
*)
echo "Usage: ${0} (start|stop|status)"
;;
esac
More information on zpool status
#!/bin/bash
#
## print_zpool.sh
#
# Written by Lars Timmann <L@rs.Timmann.de> 2022
columns=5 # number of columns for zpool status
if [ ${#} -gt 0 ] && [ ${1} == "iostat" ]
then
command="iostat -v"
columns=7
shift
fi
stdbuf --output=L zpool ${command:-status} -P ${*} | awk -v columns=${columns} '
BEGIN {
command="lsscsi --scsi_id";
while( command | getline lsscsi ) {
count=split(lsscsi,fields);
dev=fields[count-1];
scsi_id[dev]=fields[1];
}
close(command);
command="ls -Ul /dev/disk/by-id/*";
while( command | getline ) {
dev=$NF;
gsub(/[\.\/]/,"",dev);
dev_id=$(NF-2);
device[dev_id]="/dev/"dev;
}
close(command);
}
$1 ~ /\/dev\// {
line=$0;
dev_by_id=$1;
dev_no_part=dev_by_id;
gsub(/(-part|)[0-9]+$/,"",dev_no_part);
if( NF > 5) {
count=split(line,a,FS,seps);
line=seps[0];
for(i=1;i<columns;i++){
line=line a[i] seps[i];
}
line=line a[columns];
for(i=columns+1;i<=count;i++){
rest=rest a[i] seps[i];
}
}
printf("%s %s %s",line,scsi_id[device[dev_no_part]],device[dev_by_id]);
if(rest!=""){
printf(" %s",rest);
rest="";
}
printf("\n");
next;
}
/^errors:/ {
print;
fflush();
next;
}
{
print;
}'
Backup ZFS settings
A little script which may be used on your own risk.
#!/bin/bash
# Written by Lars Timmann <L@rs.Timmann.de> 2018
# Tested on solaris 11.3 & Ubuntu Linux
# This script is a rotten bunch of code... rewrite it!
AWK_CMD=/usr/bin/gawk
ZPOOL_CMD=/sbin/zpool
ZFS_CMD=/sbin/zfs
ZDB_CMD=/sbin/zdb
function print_local_options () {
DATASET=$1
OPTION=$2
EXCLUDE_REGEX=$3
${ZFS_CMD} get -s local -Ho property,value -p ${OPTION} ${DATASET} | while read -r property value
do
if [[ ! ${property} =~ ${EXCLUDE_REGEX} ]]
then
if [ "_${property}_" == "_share.*_" ]
then
print_local_options "${DATASET}" 'share.all' '^$'
else
printf '\t-o %s=%s \\\n' "${property}" "${value}"
fi
fi
done
}
function print_filesystem () {
ZFS=$1
printf '%s create \\\n' "${ZFS_CMD}"
print_local_options "${ZFS}" 'all' '^$'
printf '\t%s\n' "${ZFS}"
}
function print_filesystems () {
ZPOOL=$1
for ZFS in $(${ZFS_CMD} list -Ho name -t filesystem -r ${ZPOOL})
do
if [ ${ZFS} == ${ZPOOL} ] ; then continue ; fi
printf '#\n## Filesystem: %s\n#\n\n' "${ZFS}"
print_filesystem ${ZFS}
printf '\n'
done
}
function print_volume () {
ZVOL=$1
volsize=$(${ZFS_CMD} get -Ho value volsize ${ZVOL})
volblocksize=$(${ZFS_CMD} get -Ho value volblocksize ${ZVOL})
printf '%s create \\\n\t-V %s \\\n\t-b %s \\\n' "${ZFS_CMD}" "${volsize}" "${volblocksize}"
print_local_options "${ZVOL}" 'all' '(volsize|refreservation)'
printf '\t%s\n' "${ZVOL}"
}
function print_volumes () {
ZPOOL=$1
for ZVOL in $(${ZFS_CMD} list -Ho name -t volume -r ${ZPOOL})
do
printf '#\n## Volume: %s\n#\n\n' "${ZVOL}"
print_volume ${ZVOL}
printf '\n'
done
}
function print_vdevs () {
ZPOOL=$1
${ZDB_CMD} -C ${ZPOOL} | ${AWK_CMD} -F':' '
$1 ~ /^[[:space:]]*type$/ {
gsub(/[ ]+/,"",$NF);
type=substr($NF,2,length($NF)-2);
if ( type == "mirror" ) {
printf " \\\n\t%s",type;
}
}
$1 ~ /^[[:space:]]*path$/ {
gsub(/[ ]+/,"",$NF);
vdev=substr($NF,2,length($NF)-2);
printf " \\\n\t%s",vdev;
}
END {
printf "\n";
}
'
}
function print_zpool () {
ZPOOL=$1
printf '#############################################################\n'
printf '#\n## ZPool: %s\n#\n' "${ZPOOL}"
printf '#############################################################\n\n'
printf '%s create \\\n' "${ZPOOL_CMD}"
print_local_options "${ZPOOL}" 'all' '/@/'
printf '\t%s' "${ZPOOL}"
print_vdevs "${ZPOOL}"
printf '\n'
printf '#############################################################\n\n'
print_filesystems "${ZPOOL}"
print_volumes "${ZPOOL}"
}
OS=$(uname -s)
eval $(uname -s)=1
HOSTNAME=$(hostname)
printf '#############################################################\n'
printf '# Hostname: %s\n' "${HOSTNAME}"
printf '#############################################################\n\n'
for ZPOOL in $(${ZPOOL_CMD} list -Ho name)
do
print_zpool ${ZPOOL}
done