Linux: How to sync large binary files to another host

rsync is not for files >10MBytes ("rsync is hugely inefficient with huge files. Even with --inplace it will first read the whole file on the target host and THEN begin reading the file on the local host and transfer the differences")

I prefer bdsync (see this post)

bdsync

Installation

# download
wget https://github.com/TargetHolding/bdsync/archive/master.zip
unzip master.zip
cd bdsync-master/
# install requirements
apt install libssl-dev pandoc
# compile
make
# test
./bdsync 

Example script to sync all files to another host

cat /opt/bin/sync-vd
#!/bin/bash
PATH=/home/ndemou/bin:/home/ndemou/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/opt/bin.glb:/opt/bin:/opt/bin.glb:/opt/bin

BASE_DIR_LOCAL=/mnt/vmhdd
BASE_DIR_REMOTE=/home/ndemou/vmhdd
BIN_PATH_LOCAL=/opt/bin
BIN_PATH_REMOTE=/opt/bin
REMOTE_HOST=ndemou@10.1.11.61
PATCH=/tmp/patch.bdsync.gz

sync_1_file()
{
  local FILE="$1"

  MTIME_0=`stat "$BASE_DIR_LOCAL/$FILE" -c %Y`
  SIZE=`stat "$BASE_DIR_LOCAL/$FILE" -c %s`

  if [[ $SIZE -lt 10000000 ]] ; then
    echo "`date +%H:%M:%S` rsyncing, $FILE, $SIZE bytes"
    rsync "$BASE_DIR_LOCAL/$FILE" $REMOTE_HOST:"$BASE_DIR_REMOTE/$FILE" || echo "ERROR while rsyncing"
    echo "`date +%H:%M:%S` done rsyncing"
    return
  fi

  if ! rsync --dry-run -i "$BASE_DIR_LOCAL/$FILE" $REMOTE_HOST:"$BASE_DIR_REMOTE/$FILE" > /tmp/tmp111 ; then
        echo "ERROR checking if files have the same time/size, skipping, $FILE"
        return
  fi
  if ! grep -q f /tmp/tmp111; then
        echo "Files have same time/size -- skipping, $FILE"
        return
  fi

echo "`date +%H:%M:%S` bdsyncing file $FILE, $SIZE bytes"
$BIN_PATH_LOCAL/bdsync "ssh $REMOTE_HOST $BIN_PATH_REMOTE/bdsync --server" "$BASE_DIR_LOCAL/$FILE" "$BASE_DIR_REMOTE/$FILE" --diffsize=resize | pigz > $PATCH
if [[ ${PIPESTATUS[0]} = 0 ]] ; then
PATCH_SIZE=`stat -c %s $PATCH`
echo "`date +%H:%M:%S` patch file was generated ($PATCH_SIZE bytes)"
if [[ $PATCH_SIZE -lt 25 ]] ; then
echo "ERROR: patch is too small ($PATCH_SIZE bytes)"
else
rsync $PATCH $REMOTE_HOST:$PATCH
rm $PATCH
#echo "`date +%H:%M:%S` patch file sent to remote host"
(
ssh -T $REMOTE_HOST <<ENDSSH
pigz -d < $PATCH | $BIN_PATH_REMOTE/bdsync --patch="$BASE_DIR_REMOTE/$FILE" --diffsize=resize
rm "$PATCH"
echo "ALL-DONE"
ENDSSH
) | grep -q "ALL-DONE" || echo "ERROR during patching at remote server"
MTIME=`stat "$BASE_DIR_LOCAL/$FILE" -c %Y`
ssh $REMOTE_HOST touch -c -d @"$MTIME_0" "$BASE_DIR_REMOTE/$FILE" </dev/null
if ! [[ $MTIME_0 = "$MTIME" ]] ; then
echo "WARNING: local mtime changed - disk probably in use"
fi
fi
fi

echo "`date +%H:%M:%S` Patch applied"
}

#sync_1_file foswiki/Foswiki-s002.vmdk
cd $BASE_DIR_LOCAL

if [[ "$1" = "--file" ]] ; then
sync_1_file $2
exit
fi

find -name '*.vdi' -or -name '*.vmdk' | grep -v ' '| sed -e 's/^\.\///' | while read file; do
sync_1_file $file
sleep 1 || exit 3 # eases stoping execution with ctrl-C
done

.

This topic: KnowledgeBase > LinuxFastCopyOfLargeBinaries
Topic revision: 05 Nov 2018, NickDemou
Copyright © enLogic