ZFS on Linux: Difference between revisions
No edit summary |
|||
(One intermediate revision by the same user not shown) | |||
Line 189: | Line 189: | ||
ExecStartPre=-/sbin/zfs destroy 'rpool/%i' | ExecStartPre=-/sbin/zfs destroy 'rpool/%i' | ||
ExecStartPre=/bin/dd if=/dev/urandom of=/run/zfs-cryptswap.%i/%i.key bs=32 count=1 | ExecStartPre=/bin/dd if=/dev/urandom of=/run/zfs-cryptswap.%i/%i.key bs=32 count=1 | ||
ExecStart=/sbin/zfs create -V 4G -b | ExecStart=/sbin/zfs create -V 4G -b 8k -o compression=zle -o logbias=throughput -o sync=always -o primarycache=metadata -o secondarycache=none -o com.sun:auto-snapshot=false -o encryption=on -o keyformat=raw -o keylocation=file:///run/zfs-cryptswap.%i/%i.key rpool/%i | ||
ExecStart=/bin/sleep 1 | ExecStart=/bin/sleep 1 | ||
ExecStartPost=/sbin/mkswap '/dev/zvol/rpool/%i' | ExecStartPost=/sbin/mkswap '/dev/zvol/rpool/%i' | ||
Line 403: | Line 403: | ||
columns=5 # number of columns for zpool status | columns=5 # number of columns for zpool status | ||
if [ ${1} == "iostat" ] | if [ ${#} -gt 0 ] && [ ${1} == "iostat" ] | ||
then | then | ||
command="iostat -v" | command="iostat -v" | ||
Line 462: | Line 462: | ||
}' | }' | ||
</SyntaxHighlight> | </SyntaxHighlight> | ||
==Backup ZFS settings== | ==Backup ZFS settings== | ||
A little script which may be used on your own risk. | A little script which may be used on your own risk. |
Latest revision as of 15:54, 22 June 2023
Grub
Create /etc/udev/rules.d/99-local-grub.rules with this content:
# Create by-id links in /dev as well for zfs vdev. Needed by grub
# Add links for zfs_member only
KERNEL=="sd*[0-9]", IMPORT{parent}=="ID_*", ENV{ID_FS_TYPE}=="zfs_member", SYMLINK+="$env{ID_BUS}-$env{ID_SERIAL}-part%n"
Virtualbox on ZVols
If you use ZVols as rawvmdk-device in VirtualBox as normal user (vmuser in this example) create /etc/udev/rules.d/99-local-zvol.rules with this content:
KERNEL=="zd*" SUBSYSTEM=="block" ACTION=="add|change" PROGRAM="/lib/udev/zvol_id /dev/%k" RESULT=="rpool/VM/*" OWNER="vmuser"
vmuser@virtualbox-server:~$ VBoxManage internalcommands createrawvmdk -filename /var/data/VMs/dev/Solaris10.vmdk -rawdisk /dev/zvol/rpool/VM/Solaris10
Setup Ubuntu 16.04 with ZFS root
Most is from here Ubuntu-16.04-Root-on-ZFS.
Boot Ubuntu Desktop (alias Live CD) and choose "try out".
Get the right ashift value
For example to get sda and sdb:
# lsblk -o NAME,PHY-SeC,LOG-SEC /dev/sd{a,b} | awk 'function exponent (value) {for(i=0;value>1;i++){value/=2;}; return i;}{if($2 ~ /[0-9]+/){print $0,exponent($2)}else{print$0,"ashift"}}'
NAME PHY-SEC LOG-SEC ashift
sda 512 512 9
├─sda1 512 512 9
├─sda2 512 512 9
├─sda3 512 512 9
└─sda4 512 512 9
sdb 4096 512 12
├─sdb1 4096 512 12
├─sdb2 4096 512 12
├─sdb3 4096 512 12
└─sdb4 4096 512 12
Connect it to your network
sudo -i
ifconfig ens160 <IP> netmask 255.255.255.0
route add default gw <defaultrouter>
echo "nameserver <nameserver>" >> /etc/resolv.conf
echo 'Acquire::http::Proxy "http://<user>:<pass>@<proxyhost>:<proxyport>";' >> /etc/apt/apt.conf
apt-add-repository universe
apt update
apt --yes install openssh-server
passwd ubuntu
Reconnect via ssh
apt install --yes debootstrap gdisk zfs-initramfs
sgdisk -g -a1 -n2:34:2047 -t2:EF02 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
sgdisk -n9:-8M:0 -t9:BF07 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
sgdisk -n1:0:0 -t1:BF01 /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
zpool create -f -o ashift=12 \
-O atime=off \
-O canmount=off \
-O compression=lz4 \
-O normalization=formD \
-O mountpoint=/ \
-R /mnt \
rpool /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4-part1
zfs create -o canmount=off -o mountpoint=none rpool/ROOT
zfs create -o canmount=noauto -o mountpoint=/ rpool/ROOT/ubuntu
zfs mount rpool/ROOT/ubuntu
zfs create -o setuid=off rpool/home
zfs create -o mountpoint=/root rpool/home/root
zfs create -o canmount=off -o setuid=off -o exec=off rpool/var
zfs create -o com.sun:auto-snapshot=false rpool/var/cache
zfs create rpool/var/log
zfs create rpool/var/spool
zfs create -o com.sun:auto-snapshot=false -o exec=on rpool/var/tmp
zfs create -V 4G -b $(getconf PAGESIZE) -o compression=zle \
-o logbias=throughput -o sync=always \
-o primarycache=metadata -o secondarycache=none \
-o com.sun:auto-snapshot=false rpool/swap
cp -p {,/mnt}/etc/apt/apt.conf
export http_proxy=$(awk '/Acquire::http::Proxy/{gsub(/\"/,"");gsub(/;$/,"");print $2}' /mnt/etc/apt/apt.conf)
echo -n xenial{,-security,-updates} | \
xargs -n 1 -d ' ' -I{} echo "deb http://archive.ubuntu.com/ubuntu {} main universe" > /mnt/etc/apt/sources.list
chmod 1777 /mnt/var/tmp
debootstrap xenial /mnt
zfs set devices=off rpool
HOSTNAME=Template-VM
echo ${HOSTNAME} > /mnt/etc/hostname
printf "127.0.1.1\t%s\n" "${HOSTNAME}" >> /mnt/etc/hosts
INTERFACE=$(ip a s scope global | awk 'NR==1{gsub(/:$/,"",$2);print $2;}')
printf "auto %s\niface %s inet dhcp\n" "${INTERFACE}" "${INTERFACE}" > /mnt/etc/network/interfaces.d/${INTERFACE}
mount --rbind /dev /mnt/dev
mount --rbind /proc /mnt/proc
mount --rbind /sys /mnt/sys
cp -p {,/mnt}/etc/apt/apt.conf
echo -n xenial{,-security,-updates} | \
xargs -n 1 -d ' ' -I{} echo "deb http://archive.ubuntu.com/ubuntu {} main universe" > /mnt/etc/apt/sources.list
chroot /mnt /bin/bash --login
locale-gen en_US.UTF-8
echo 'LANG="en_US.UTF-8"' > /etc/default/locale
LANG="en_US.UTF-8"
dpkg-reconfigure tzdata
ln -s /proc/self/mounts /etc/mtab
apt update
apt install --yes ubuntu-minimal
apt install --yes --no-install-recommends linux-image-generic
apt install --yes zfs-initramfs
apt install --yes openssh-server
apt install --yes grub-pc
addgroup --system lpadmin
addgroup --system sambashare
passwd
grub-probe /
update-initramfs -c -k all
vi /etc/default/grub
Comment out: GRUB_HIDDEN_TIMEOUT=0
Remove quiet and splash from: GRUB_CMDLINE_LINUX_DEFAULT
Uncomment: GRUB_TERMINAL=console
update-grub
grub-install /dev/disk/by-id/scsi-36000c2932cdb62febff0b5ac93786dd4
zfs snapshot rpool/ROOT/ubuntu@install
exit
mount | grep -v zfs | tac | awk '/\/mnt/ {print $3}' | xargs -i{} umount -lf {}
zpool export rpool
reboot
apt install --yes cryptsetup
echo cryptswap1 /dev/zvol/rpool/swap /dev/urandom swap,cipher=aes-xts-plain64:sha256,size=256 >> /etc/crypttab
systemctl daemon-reload
systemctl start systemd-cryptsetup@cryptswap1.service
echo /dev/mapper/cryptswap1 none swap defaults 0 0 >> /etc/fstab
swapon -av
Swap on ZFS with random key encryption
$ sudo systemctl edit --force --full zfs-cryptswap@.service
# /etc/systemd/system/zfs-cryptswap@.service
[Unit]
Description=ZFS Random Cryptography Setup for %I
Documentation=man:zfs(8)
DefaultDependencies=no
Conflicts=umount.target
IgnoreOnIsolate=true
After=systemd-random-seed.service zfs-volumes.target
BindsTo=dev-zvol-rpool-%i.device
Before=umount.target
[Service]
Type=oneshot
RemainAfterExit=yes
TimeoutSec=0
KeyringMode=shared
OOMScoreAdjust=500
UMask=0077
RuntimeDirectory=zfs-cryptswap.%i
RuntimeDirectoryMode=0700
ExecStartPre=-/sbin/swapoff '/dev/zvol/rpool/%i'
ExecStartPre=-/sbin/zfs destroy 'rpool/%i'
ExecStartPre=/bin/dd if=/dev/urandom of=/run/zfs-cryptswap.%i/%i.key bs=32 count=1
ExecStart=/sbin/zfs create -V 4G -b 8k -o compression=zle -o logbias=throughput -o sync=always -o primarycache=metadata -o secondarycache=none -o com.sun:auto-snapshot=false -o encryption=on -o keyformat=raw -o keylocation=file:///run/zfs-cryptswap.%i/%i.key rpool/%i
ExecStart=/bin/sleep 1
ExecStartPost=/sbin/mkswap '/dev/zvol/rpool/%i'
ExecStartPost=/sbin/swapon '/dev/zvol/rpool/%i'
ExecStop=/sbin/swapoff '/dev/zvol/rpool/%i'
ExecStop=/bin/sleep 2
ExecStopPost=/sbin/zfs destroy 'rpool/%i'
[Install]
WantedBy=swap.target
!!!BE CAREFUL with the name after @ !!!
The name after the @ is the name of the ZFS that will be DESTROYED and recreated!!!
To destroy and recreate an encrypted ZFS volume named cryptswap use:
# systemctl start zfs-cryptswap@cryptswap.service
# systemctl enable zfs-cryptswap@cryptswap.service
# update-initramfs -k $(uname -i) -u
Kernel settings for ZFS
Set module parameter in /etc/modprobe.d/zfs.conf
options zfs zfs_arc_max=10737418240
# increase them so scrub/resilver is more quickly at the cost of other work
options zfs zfs_vdev_scrub_min_active=24
options zfs zfs_vdev_scrub_max_active=64
# sync write
options zfs zfs_vdev_sync_write_min_active=8
options zfs zfs_vdev_sync_write_max_active=32
# sync reads (normal)
options zfs zfs_vdev_sync_read_min_active=8
options zfs zfs_vdev_sync_read_max_active=32
# async reads : prefetcher
options zfs zfs_vdev_async_read_min_active=8
options zfs zfs_vdev_async_read_max_active=32
# async write : bulk writes
options zfs zfs_vdev_async_write_min_active=8
options zfs zfs_vdev_async_write_max_active=32
# max write speed to l2arc
# tradeoff between write/read and durability of ssd (?)
# default : 8 * 1024 * 1024
# setting here : 500 * 1024 * 1024
options zfs l2arc_write_max=524288000
options zfs zfs_top_maxinflight=512
options zfs zfs_resilver_min_time_ms=8000
options zfs zfs_resilver_delay=0
Remember to update your initramfs before boot. This is the filesystem which is read when your module is loaded.
# update-initramfs -k all -u
Check settings
root@zfshost:~# modprobe -c | grep "options zfs"
options zfs zfs_arc_max=10737418240
options zfs zfs_vdev_scrub_min_active=24
options zfs zfs_vdev_scrub_max_active=64
options zfs zfs_vdev_sync_write_min_active=8
options zfs zfs_vdev_sync_write_max_active=32
options zfs zfs_vdev_sync_read_min_active=8
options zfs zfs_vdev_sync_read_max_active=32
options zfs zfs_vdev_async_read_min_active=8
options zfs zfs_vdev_async_read_max_active=32
options zfs zfs_vdev_async_write_min_active=8
options zfs zfs_vdev_async_write_max_active=32
options zfs l2arc_write_max=524288000
options zfs zfs_top_maxinflight=512
options zfs zfs_resilver_min_time_ms=8000
options zfs zfs_resilver_delay=0
root@zfshost:~# modprobe --show-depends zfs
insmod /lib/modules/4.15.0-58-generic/kernel/spl/spl.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/znvpair.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zcommon.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/icp.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zavl.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zunicode.ko
insmod /lib/modules/4.15.0-58-generic/kernel/zfs/zfs.ko zfs_arc_max=10737418240 zfs_vdev_scrub_min_active=24 zfs_vdev_scrub_max_active=64 zfs_vdev_sync_write_min_active=8 zfs_vdev_sync_write_max_active=32 zfs_vdev_sync_read_min_active=8 zfs_vdev_sync_read_max_active=32 zfs_vdev_async_read_min_active=8 zfs_vdev_async_read_max_active=32 zfs_vdev_async_write_min_active=8 zfs_vdev_async_write_max_active=32 l2arc_write_max=524288000 zfs_top_maxinflight=512 zfs_resilver_min_time_ms=8000 zfs_resilver_delay=0
Check actual settings
Check files in
- /proc/spl/kstat/zfs/
- /sys/module/zfs/parameters/
ARC Cache
Get the current usage of cache
# cat /proc/spl/kstat/zfs/arcstats |grep c_
c_min 4 521779200
c_max 4 1073741824
arc_no_grow 4 0
arc_tempreserve 4 0
arc_loaned_bytes 4 0
arc_prune 4 25360
arc_meta_used 4 493285336
arc_meta_limit 4 805306368
arc_dnode_limit 4 80530636
arc_meta_max 4 706551816
arc_meta_min 4 16777216
sync_wait_for_async 4 357
arc_need_free 4 0
arc_sys_free 4 260889600
Limit the cache without reboot non permanent
For example limit it to 512MB (which is too small for production environments, just an example...):
# echo "$[512*1024*1024]" > /sys/module/zfs/parameters/zfs_arc_max
Now you have to drop the caches:
# echo 3 > /proc/sys/vm/drop_caches
Make the cache limit permanent
For example limit it to 512MB (which is too small for production environments, just an example...):
# echo "options zfs zfs_arc_max=$[512*1024*1024]" >> /etc/modprobe.d/zfs.conf
After reboot this value take effect.
Check cache hits/misses
# (while : ; do cat /proc/spl/kstat/zfs/arcstats ; sleep 5 ; done ) | awk '
BEGIN {
}
$1 ~ /(hits|misses)/ {
name=$1;
gsub(/[_]*(hits|misses)/,"",name);
if(name == ""){
name="global";
}
}
$1 ~ /hits/ {
hits[name] = $3 - hitslast[name]
hitslast[name] = $3
}
$1 ~ /misses/ {
misses[name] = $3 - misslast[name]
misslast[name] = $3
rate = 0
total = hits[name] + misses[name]
if (total)
rate = (hits[name] * 100) / total
if (name=="global")
printf "%30s %12s %12s %9s\n", "NAME", "HITS", "MISSES", "HITRATE"
printf "%30s %12d %12d %8.2f%%\n", name, hits[name], misses[name], rate
}
'
Higher scrub performance
#!/bin/bash
#
## scrub_fast.sh
#
case $1 in
start)
echo 0 > /sys/module/zfs/parameters/zfs_scan_idle
echo 0 > /sys/module/zfs/parameters/zfs_scrub_delay
echo 512 > /sys/module/zfs/parameters/zfs_top_maxinflight
echo 5000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms
echo 4 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active
echo 8 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active
;;
stop)
echo 50 > /sys/module/zfs/parameters/zfs_scan_idle
echo 4 > /sys/module/zfs/parameters/zfs_scrub_delay
echo 32 > /sys/module/zfs/parameters/zfs_top_maxinflight
echo 1000 > /sys/module/zfs/parameters/zfs_scan_min_time_ms
echo 1 > /sys/module/zfs/parameters/zfs_vdev_scrub_min_active
echo 2 > /sys/module/zfs/parameters/zfs_vdev_scrub_max_active
;;
status)
for i in zfs_scan_idle zfs_scrub_delay zfs_top_maxinflight zfs_scan_min_time_ms zfs_vdev_scrub_{min,max}_active
do
param="/sys/module/zfs/parameters/${i}"
printf "%60s\t%d\n" "${param}" "$(cat ${param})"
done
;;
*)
echo "Usage: ${0} (start|stop|status)"
;;
esac
More information on zpool status
#!/bin/bash
#
## print_zpool.sh
#
# Written by Lars Timmann <L@rs.Timmann.de> 2022
columns=5 # number of columns for zpool status
if [ ${#} -gt 0 ] && [ ${1} == "iostat" ]
then
command="iostat -v"
columns=7
shift
fi
stdbuf --output=L zpool ${command:-status} -P ${*} | awk -v columns=${columns} '
BEGIN {
command="lsscsi --scsi_id";
while( command | getline lsscsi ) {
count=split(lsscsi,fields);
dev=fields[count-1];
scsi_id[dev]=fields[1];
}
close(command);
command="ls -Ul /dev/disk/by-id/*";
while( command | getline ) {
dev=$NF;
gsub(/[\.\/]/,"",dev);
dev_id=$(NF-2);
device[dev_id]="/dev/"dev;
}
close(command);
}
$1 ~ /\/dev\// {
line=$0;
dev_by_id=$1;
dev_no_part=dev_by_id;
gsub(/(-part|)[0-9]+$/,"",dev_no_part);
if( NF > 5) {
count=split(line,a,FS,seps);
line=seps[0];
for(i=1;i<columns;i++){
line=line a[i] seps[i];
}
line=line a[columns];
for(i=columns+1;i<=count;i++){
rest=rest a[i] seps[i];
}
}
printf("%s %s %s",line,scsi_id[device[dev_no_part]],device[dev_by_id]);
if(rest!=""){
printf(" %s",rest);
rest="";
}
printf("\n");
next;
}
/^errors:/ {
print;
fflush();
next;
}
{
print;
}'
Backup ZFS settings
A little script which may be used on your own risk.
#!/bin/bash
# Written by Lars Timmann <L@rs.Timmann.de> 2018
# Tested on solaris 11.3 & Ubuntu Linux
# This script is a rotten bunch of code... rewrite it!
AWK_CMD=/usr/bin/gawk
ZPOOL_CMD=/sbin/zpool
ZFS_CMD=/sbin/zfs
ZDB_CMD=/sbin/zdb
function print_local_options () {
DATASET=$1
OPTION=$2
EXCLUDE_REGEX=$3
${ZFS_CMD} get -s local -Ho property,value -p ${OPTION} ${DATASET} | while read -r property value
do
if [[ ! ${property} =~ ${EXCLUDE_REGEX} ]]
then
if [ "_${property}_" == "_share.*_" ]
then
print_local_options "${DATASET}" 'share.all' '^$'
else
printf '\t-o %s=%s \\\n' "${property}" "${value}"
fi
fi
done
}
function print_filesystem () {
ZFS=$1
printf '%s create \\\n' "${ZFS_CMD}"
print_local_options "${ZFS}" 'all' '^$'
printf '\t%s\n' "${ZFS}"
}
function print_filesystems () {
ZPOOL=$1
for ZFS in $(${ZFS_CMD} list -Ho name -t filesystem -r ${ZPOOL})
do
if [ ${ZFS} == ${ZPOOL} ] ; then continue ; fi
printf '#\n## Filesystem: %s\n#\n\n' "${ZFS}"
print_filesystem ${ZFS}
printf '\n'
done
}
function print_volume () {
ZVOL=$1
volsize=$(${ZFS_CMD} get -Ho value volsize ${ZVOL})
volblocksize=$(${ZFS_CMD} get -Ho value volblocksize ${ZVOL})
printf '%s create \\\n\t-V %s \\\n\t-b %s \\\n' "${ZFS_CMD}" "${volsize}" "${volblocksize}"
print_local_options "${ZVOL}" 'all' '(volsize|refreservation)'
printf '\t%s\n' "${ZVOL}"
}
function print_volumes () {
ZPOOL=$1
for ZVOL in $(${ZFS_CMD} list -Ho name -t volume -r ${ZPOOL})
do
printf '#\n## Volume: %s\n#\n\n' "${ZVOL}"
print_volume ${ZVOL}
printf '\n'
done
}
function print_vdevs () {
ZPOOL=$1
${ZDB_CMD} -C ${ZPOOL} | ${AWK_CMD} -F':' '
$1 ~ /^[[:space:]]*type$/ {
gsub(/[ ]+/,"",$NF);
type=substr($NF,2,length($NF)-2);
if ( type == "mirror" ) {
printf " \\\n\t%s",type;
}
}
$1 ~ /^[[:space:]]*path$/ {
gsub(/[ ]+/,"",$NF);
vdev=substr($NF,2,length($NF)-2);
printf " \\\n\t%s",vdev;
}
END {
printf "\n";
}
'
}
function print_zpool () {
ZPOOL=$1
printf '#############################################################\n'
printf '#\n## ZPool: %s\n#\n' "${ZPOOL}"
printf '#############################################################\n\n'
printf '%s create \\\n' "${ZPOOL_CMD}"
print_local_options "${ZPOOL}" 'all' '/@/'
printf '\t%s' "${ZPOOL}"
print_vdevs "${ZPOOL}"
printf '\n'
printf '#############################################################\n\n'
print_filesystems "${ZPOOL}"
print_volumes "${ZPOOL}"
}
OS=$(uname -s)
eval $(uname -s)=1
HOSTNAME=$(hostname)
printf '#############################################################\n'
printf '# Hostname: %s\n' "${HOSTNAME}"
printf '#############################################################\n\n'
for ZPOOL in $(${ZPOOL_CMD} list -Ho name)
do
print_zpool ${ZPOOL}
done