ZFS sync script: Difference between revisions
From Lolly's Wiki
Jump to navigationJump to search
No edit summary |
|||
Line 10: | Line 10: | ||
* The variable ''SECURE'' defines if you want to use ssh to encrypt your stream. Set it to ''yes'' or ''no''. | * The variable ''SECURE'' defines if you want to use ssh to encrypt your stream. Set it to ''yes'' or ''no''. | ||
* To mark the datasets to copy from the backup host use this on the source: | * To mark the datasets to copy from the backup host use this on the source: | ||
<source lang=bash> | |||
# /usr/sbin/zfs set de.timmann:auto-backup=<backup host> <dataset> | |||
</source> | |||
* Run the script on the destination/backup host. | * Run the script on the destination/backup host. | ||
* Make an ssh-key exchange to login without password. | * Make an ssh-key exchange to login without password. |
Revision as of 14:28, 2 June 2015
Like all of my scripts this script is coming without any guaranties!!! You can use it on your own risk!
About the script
- It uses mbuffer. It is easy to compile.
- It uses gawk.
- The variable SECURE defines if you want to use ssh to encrypt your stream. Set it to yes or no.
- To mark the datasets to copy from the backup host use this on the source:
# /usr/sbin/zfs set de.timmann:auto-backup=<backup host> <dataset>
- Run the script on the destination/backup host.
- Make an ssh-key exchange to login without password.
- If you don't want to use root as backup-user on source host do this to create a zfssync user:
# useradd -m zfssync
# passwd -N zfssync
# usermod -K type=normal zfssync
Good luck!
zfs_sync.sh
#!/usr/bin/bash
# Written by Lars Timmann <L@rs.Timmann.de> 2013
# This script is a rotten bunch of code... rewrite it!
SRC_USER=zfssync
SRC_HOST=my_source_server
SRC_POOL=my_source_zpool
DST_POOL=my_local_destination_zpool
INITIAL_COPIES=3
# Default yes means use SSH for encryption over the net. Every other value means just mbuffer.
SECURE="no"
MBUFFER_PORT=10001
MBUFFER_OPTS="-v 0 --md5 -s 128k -m 256M"
ZFS=/usr/sbin/zfs
SSH="/usr/bin/ssh -xc arcfour128"
#AWK=/usr/bin/gawk
AWK=/opt/sfw/bin/gawk
GREP=/usr/bin/grep
DATE=/usr/bin/date
MD5="/usr/bin/digest -a md5"
ROUTE=/usr/sbin/route
MBUFFER="/opt/mbuffer/bin/mbuffer"
# Guess the right IP for communication with source host
DST_HOST=$(${ROUTE} -vn get ${SRC_HOST} | ${AWK} '{ip=$2}END{print ip}')
MYNAME=$(/usr/bin/basename $0 .sh)
MYSELF=$(/usr/bin/hostname)
SRC_DATASETS=/tmp/${MYNAME}_src_ds.out
DST_DATASETS=/tmp/${MYNAME}_dst_ds.out
LOCK_FILE=/var/run/${MYNAME}.lck
TMP_FILE1=/tmp/${MYNAME}.tmp1
TMP_FILE2=/tmp/${MYNAME}.tmp2
BACKUP_PROPERTY="de.timmann:auto-backup"
START_TIME=$(${AWK} 'BEGIN{printf systime();}')
${AWK} -v time=${START_TIME} 'BEGIN{print "START:",strftime("%d.%m.%Y %H:%M.%S",time)}'
# Was tun bei Unterbrechung
# -------------------------
trap 'echo "\n--- Signal empfangen: Exiting ...\n"; \
date ; \
rm -f ${LOCK_FILE}; \
sleep 3; kill -9 ${!} 2>/dev/null; exit 1' 1 2 3 13 14 15 18
###########################
if [ -f ${LOCK_FILE} ] ; then
echo "$0 is allready running as PID $(/usr/bin/cat ${LOCK_FILE}) look in ${LOCK_FILE}"
exit 1
else
echo $$ > ${LOCK_FILE}
fi
${SSH} ${SRC_USER:+"${SRC_USER}@"}${SRC_HOST} "${ZFS} list -rH -t filesystem,snapshot,volume -o name,type,${BACKUP_PROPERTY} -s creation ${SRC_POOL}" > ${SRC_DATASETS} &
${ZFS} list -rH -t filesystem,snapshot,volume -o name,type -s creation ${DST_POOL} > ${DST_DATASETS} &
wait
function convert_to_poolname () {
from_zfs=$1
search=$2
replace=$3
echo ${from_zfs} | sed -e "s#^${search}#${replace}#g"
}
function is_available () {
snapshot=$1
list=$2
${AWK} -v snapshot=${snapshot} 'BEGIN{rc=1;}$1 == snapshot{print $1; rc=0;}END{exit rc;}' ${list}
return $?
}
function expire_dst_pool_snapshots () {
days_to_keep=$1
min_to_keep=$2
for expired_zfs in $(
${ZFS} list -o creation,name -S creation -t snapshot | \
${AWK} \
-v days_to_keep=${days_to_keep} \
-v min_to_keep=${min_to_keep} \
-v DST_POOL="^${DST_POOL}" \
'
BEGIN{
split("Jan:Feb:Mar:Apr:May:Jun:Jul:Aug:Sep:Oct:Nov:Dec",mon,":");
for(m in mon){
month[mon[m]]=m
};
expire_date=systime()-days_to_keep*60*60*24
}
$NF ~ DST_POOL {
filesystem=$NF;
gsub(/@.*$/,"",filesystem);
split($4,time,":");
filesystem_date=mktime(sprintf("%d %02d %02d %02d %02d 00", $5, month[$2], $3, time[1], time[2]));
count[filesystem]++;
if(filesystem_date < expire_date && count[filesystem] > min_to_keep )
{
print $NF;
}
}')
do
printf "$(${DATE}) Destroying snapshot ${expired_zfs}\n"
${ZFS} destroy ${expired_zfs}
done
}
function get_src_list () {
${AWK} -v backup_server=${MYSELF} '
( $2=="filesystem" || $2=="volume" ) && $3==backup_server {
path[$1]=1;
for(name in path){
# delete name from list, if name is substring of $1
if( index($1,name)==1 && name != $1 && path[name]!=0 ){
path[name]=0;
}
}
}
END{
for(name in path){
if(path[name]==1) print name
}
}
' ${SRC_DATASETS}
}
function first_snapshot () {
${AWK} -v zfs="${1}@" '
$2=="snapshot" && $1 ~ zfs {
first=$1;
# und raus...
nextfile;
}
END{
print first;
}
' $2
}
function last_snapshot () {
${AWK} -v zfs="${1}@" '
$2=="snapshot" && $1 ~ zfs {
last=$1;
}
END{
print last;
}
' $2
}
function get_recursive () {
src_host=$1
src_datasets=$2
first=$3
last=$4
dst_pool=$5
dst_datasets=$6
if [ $# -lt 6 ] ; then
echo "Called from line ${BASH_LINENO[$i]} with $# Arguments"
end 1
fi
src_zfs=$(echo ${first} | ${AWK} -F'@' '{print $1}')
first_snap=$(echo ${first} | ${AWK} -F'@' '{print FS""$2}')
echo "Getting snapshot ${zfs}..."
if [ "_${SECURE}_" == "_yes_" ]
then
# setup receiver
${MBUFFER} ${MBUFFER_OPTS} -l ${TMP_FILE1} -I 127.0.0.1:${MBUFFER_PORT} | \
${ZFS} recv -vFd ${dst_pool} 2>&1 &
# start sender
${SSH} ${SRC_USER:+"${SRC_USER}@"}${SRC_HOST} \
-R ${MBUFFER_PORT}:127.0.0.1:${MBUFFER_PORT} \
"${ZFS} send -I ${first_snap} ${last} | ${MBUFFER} ${MBUFFER_OPTS} -O 127.0.0.1:${MBUFFER_PORT} 2>&1" >${TMP_FILE2} &
else
# setup receiver
${MBUFFER} ${MBUFFER_OPTS} -l ${TMP_FILE1} -I ${MBUFFER_PORT} | \
${ZFS} recv -vFd ${dst_pool} 2>&1 &
# start sender
${SSH} ${SRC_USER:+"${SRC_USER}@"}${SRC_HOST} \
"${ZFS} send -I ${first_snap} ${last} | ${MBUFFER} ${MBUFFER_OPTS} -O ${DST_HOST}:${MBUFFER_PORT} 2>&1" >${TMP_FILE2} &
fi
wait
local_md5=$(grep md5 ${TMP_FILE1})
remote_md5=$(grep md5 ${TMP_FILE2})
local_summary=$(grep summary ${TMP_FILE1})
remote_summary=$(grep summary ${TMP_FILE2})
printf "remote %s\nlocal %s\n" "${remote_md5}" "${local_md5}"
printf "remote %s\nlocal %s\n" "${remote_summary}" "${local_summary}"
rm -f ${TMP_FILE1} ${TMP_FILE2}
}
function get_snapshot () {
src_host=$1
src_datasets=$2
zfs=$3
dst_pool=$4
dst_datasets=$5
if [ -z "$(is_available ${zfs} ${dst_datasets})" ] ; then
echo "Getting snapshot ${zfs}..."
if [ "_${SECURE}_" == "_yes_" ]
then
# setup receiver
${MBUFFER} ${MBUFFER_OPTS} -l ${TMP_FILE1} -I 127.0.0.1:${MBUFFER_PORT} | \
${ZFS} recv -vFd ${dst_pool} 2>&1 &
# start sender
${SSH} ${SRC_USER:+"${SRC_USER}@"}${SRC_HOST} \
-R ${MBUFFER_PORT}:127.0.0.1:${MBUFFER_PORT} \
"${ZFS} send -R ${zfs} | ${MBUFFER} ${MBUFFER_OPTS} -O 127.0.0.1:${MBUFFER_PORT} 2>&1" >${TMP_FILE2} &
else
# setup receiver
${MBUFFER} ${MBUFFER_OPTS} -l ${TMP_FILE1} -I ${MBUFFER_PORT} | \
${ZFS} recv -vFd ${dst_pool} 2>&1 &
# start sender
${SSH} ${SRC_USER:+"${SRC_USER}@"}${SRC_HOST} \
"${ZFS} send -R ${zfs} | ${MBUFFER} ${MBUFFER_OPTS} -O ${DST_HOST}:${MBUFFER_PORT} 2>&1" >${TMP_FILE2} &
fi
wait
local_md5=$(grep md5 ${TMP_FILE1})
remote_md5=$(grep md5 ${TMP_FILE2})
local_summary=$(grep summary ${TMP_FILE1})
remote_summary=$(grep summary ${TMP_FILE2})
printf "remote %s\nlocal %s\n" "${remote_md5}" "${local_md5}"
printf "remote %s\nlocal %s\n" "${remote_summary}" "${local_summary}"
rm -f ${TMP_FILE1} ${TMP_FILE2}
fi
}
function timestamp () {
echo $(${DATE} '+%Y%m%d-%H:%M')
}
function expire_backup_snapshots () {
src_host=$1
src_datasets=$2
dst_datasets=$3
src_last_to_keep=$4
dst_pool=$5
src_zfs=$(echo ${src_last_to_keep} | ${AWK} -F'@' '{print $1}')
dst_zfs=$(convert_to_poolname ${src_zfs} ${SRC_POOL} ${dst_pool})
dst_last_to_keep=$(convert_to_poolname ${src_last_to_keep} ${SRC_POOL} ${dst_pool})
echo "Deleting old backup snapshots before ${dst_last_to_keep}"
if ( ${ZFS} list -o name ${dst_last_to_keep} >/dev/null 2>&1 ) ; then
for src_backup_snapshot in $(${AWK} -v src_backup="${src_zfs}@backup" -v src_last_to_keep="${src_last_to_keep}" '
$1 == src_last_to_keep {
exit 0;
}
$1 ~ src_backup {
print $1;
}
' ${src_datasets})
do
printf "\tDeleting on src ${src_backup_snapshot} ..."
if ( ${SSH} ${SRC_USER:+"${SRC_USER}@"}${SRC_HOST} "${ZFS} destroy ${src_backup_snapshot}" ) ; then
echo "done"
else
echo "failed"
fi
done
for dst_backup_snapshot in $(${AWK} -v dst_backup="${dst_zfs}@backup" -v dst_last_to_keep=${dst_last_to_keep} '
$1 == dst_last_to_keep {
exit 0;
}
$1 ~ dst_backup {
print $1;
}
' ${dst_datasets})
do
printf "\tDeleting on destination ${dst_backup_snapshot} ..."
if ( ${ZFS} destroy ${dst_backup_snapshot} ) ; then
echo "done"
else
echo "failed"
fi
done
else
echo "Strange we do not have the copy of ${dst_last_to_keep} => STOP!"
fi
}
function end () {
/usr/bin/rm -f ${LOCK_FILE}
exit $1
}
for src_zfs in $(get_src_list) ; do
echo "Evaluating ${src_zfs}"
dst_zfs=$(convert_to_poolname ${src_zfs} ${SRC_POOL} ${DST_POOL})
last_src=$(last_snapshot ${src_zfs} ${SRC_DATASETS})
last_dst=$(last_snapshot ${dst_zfs} ${DST_DATASETS})
last_backup_src=$(${AWK} -v zfs="${src_zfs}@backup" '$1 ~ zfs{last=$1}END{print last}' ${SRC_DATASETS})
last_backup_dst=$(${AWK} -v zfs="${dst_zfs}@backup" '$1 ~ zfs{last=$1}END{print last}' ${DST_DATASETS})
last_dst_on_src=$(convert_to_poolname ${last_dst} ${DST_POOL} ${SRC_POOL})
this_backup_src=${src_zfs}@backup_$(timestamp)
# Create snapshot for incremental backups
${SSH} ${SRC_USER:+"${SRC_USER}@"}${SRC_HOST} "${ZFS} snapshot ${this_backup_src}"
if [ -n "$(is_available ${dst_zfs} ${DST_DATASETS})" -a -z "${last_dst}" ] ; then
echo "zfs is on dst, but no snapshots. Getting ${last_src}..."
get_snapshot ${SRC_HOST} ${SRC_DATASETS} ${last_src} ${DST_POOL} ${DST_DATASETS}
# Look for last backup snapshot on destination
elif [ -n "${last_backup_dst}" ] ; then
# Name of last backup snapshot on src
last_dst_backup_on_src=$(convert_to_poolname ${last_backup_dst} ${DST_POOL} ${SRC_POOL})
# If converted name is not empty and snapshot is in the list of src snapshots
# then get all snapshots from last backup until now
if [ -n "${last_dst_backup_on_src}" ] ; then
if [ -n "$(is_available ${last_dst_backup_on_src} ${SRC_DATASETS})" ] ; then
# Get the snapshot of this backup
printf "%s\tsnapshot\n" ${this_backup_src} >> ${SRC_DATASETS}
get_recursive ${SRC_HOST} ${SRC_DATASETS} ${last_dst_backup_on_src} ${this_backup_src} ${DST_POOL} ${DST_DATASETS} && \
expire_backup_snapshots ${SRC_HOST} ${SRC_DATASETS} ${DST_DATASETS} ${this_backup_src} ${DST_POOL}
fi
fi
elif [ -n "$(is_available ${dst_zfs} ${DST_DATASETS})" ] ; then
# No last backup snapshot on dst but we have snapshots
if [ -n "$(is_available ${last_dst_on_src} ${SRC_DATASETS})" ] ; then
echo "Try to backup from ${last_dst_on_src} to ${this_backup_src}"
first=${last_dst_on_src}
last=${last_src}
get_recursive ${SRC_HOST} ${SRC_DATASETS} ${first} ${last} ${DST_POOL} ${DST_DATASETS} && \
expire_backup_snapshots ${SRC_HOST} ${SRC_DATASETS} ${DST_DATASETS} ${this_backup_src} ${DST_POOL}
# Get the snapshot of this backup
printf "%s\tsnapshot\n" ${this_backup_src} >> ${SRC_DATASETS}
get_recursive ${SRC_HOST} ${SRC_DATASETS} ${last} ${this_backup_src} ${DST_POOL} ${DST_DATASETS} && \
expire_backup_snapshots ${SRC_HOST} ${SRC_DATASETS} ${DST_DATASETS} ${this_backup_src} ${DST_POOL}
else
echo "OK I tried hard... now it is your job..."
fi
else
# No existing copies for this zfs. Get the last <INITIAL_COPIES> copies
first=$(${AWK} -v zfs=${src_zfs} -v intitial_copies=$((${INITIAL_COPIES}-1)) '
$1 ~ zfs && $2=="snapshot" {
last[++count]=$1;
}
END {
if(count>intitial_copies){
print last[count-intitial_copies]
}else{
print last[1]
}
}' ${SRC_DATASETS})
last=$( ${AWK} -v zfs=${src_zfs} '$1 ~ zfs && $2=="snapshot"{last=$1}END{print last}' ${SRC_DATASETS} )
get_snapshot ${SRC_HOST} ${SRC_DATASETS} ${first} ${DST_POOL} ${DST_DATASETS}
get_recursive ${SRC_HOST} ${SRC_DATASETS} ${first} ${last} ${DST_POOL} ${DST_DATASETS} && \
expire_backup_snapshots ${SRC_HOST} ${SRC_DATASETS} ${DST_DATASETS} ${this_backup_src} ${DST_POOL}
# Get the snapshot of this backup
printf "%s\tsnapshot\n" ${this_backup_src} >> ${SRC_DATASETS}
get_recursive ${SRC_HOST} ${SRC_DATASETS} ${last} ${this_backup_src} ${DST_POOL} ${DST_DATASETS} && \
expire_backup_snapshots ${SRC_HOST} ${SRC_DATASETS} ${DST_DATASETS} ${this_backup_src} ${DST_POOL}
fi
echo
echo --------------------------------------------------------------------------------
date
echo
done
# expire_dst_pool_snapshots days_to_keep min_to_keep
expire_dst_pool_snapshots 34 70
END_TIME=$(${AWK} 'BEGIN{printf systime();}')
${AWK} -v time=${END_TIME} 'BEGIN{print "END :",strftime("%d.%m.%Y %H:%M.%S",time)}'
${AWK} -v start=${START_TIME} -v end=${END_TIME} 'BEGIN{print "DURATION:",strftime("%H:%M.%S",end-start-3600*strftime("%H",0))}'
end 0