#!/bin/sh
#
# Use rsync to back up a directory tree or filesystem.
#
# Copyright (c) 2009 Douglas G. Henke -- Released under GPL v2
# See http://www.gnu.org/licenses/gpl-2.0.txt for full license terms.
#
# This script uses the rsync(1) command to efficiently make successive
# (local) copies of a filesystem or part thereof.
#
# The first time it is run, it simply makes a recursive copy in the
# manner of "cp -a". On subsequent runs, files which have not changed
# are hard-linked from the previous copy to the new copy. This effectively
# combines the completeness of a full backup with the time and storage
# efficiency of an incremental backup. It also makes browsing and recovery
# very simple -- it's just a regular filesystem.
#
# Today's copy is placed in a directory: $ARCHIVE/$PREFIX-YYYYMMDD
# (see configuration options below). If you make more than one backup
# during a single calendar day, the second will have "-01" appended
# to the name, the third "-02" and so on (up to a maximum).
#
# Caveat: Files in the backup retain original permissions and ownership.
# While this is nice in the sense that users can retrieve their own
# files, it also means users can destroy their own backups. (Removing
# a file impacts only one generation, but altering the contents hits
# every instance back to the last change.) Read-only bind mounts are
# a good solution to this problem.
#
# Caveat: rsync decides if a file has changed based on the stat info
# (mtime and size) not the contents. If you deliberately set out to
# fool it, you can.
#

#### Configuration options: ############################################

# TOP is the name of the directory heirarchy to be backed up (recursively,
# staying within the same filesystem). It should end with a slash.
TOP=/

# ARCHIVE is the name of the directory holding the backups. It should
# not contain anything else besides backups of $TOP, and it should not
# be "underneath" $TOP (unless it is on a different filesystem).
ARCHIVE=/private-mnt/backup/`hostname`

# PREFIX is the first part of the name of each backup generation
PREFIX=backup

# MAXGEN is the maximum number of backup generations in one calendar day
MAXGEN=20

#### End of configuration options ######################################

PROG="`basename "$0"`"

# sanity check: there should be no command-line arguments
if [ $# -ne 0 ] ; then
   echo 1>&2 "usage: $PROG"
   exit 10
fi

# sanity check: archive directory needs to exist
if [ ! -d "$ARCHIVE" ] ; then
   echo 1>&2 "$PROG error: no directory $ARCHIVE -- is backup device mounted?"
   exit 10
fi

# make the name of the current backup generation
CUR="$ARCHIVE/$PREFIX-`date +%Y%m%d`"

# If that name is already taken, append a generation number. If still
# in use, increment number -- but only up to MAXGEN.
if [ -e "$CUR" ] ; then
   CNT=1
   while true ; do
      TMP="`printf '%s-%02d' "$CUR" "$CNT"`"
      if [ ! -e "$TMP" ] ; then
         CUR="$TMP"
         break;
      fi
      CNT="`expr "$CNT" + 1`"
      if [ "$CNT" -gt "$MAXGEN" ] ; then
         echo 1>&2 "$PROG error: already $MAXGEN gens of $CUR"
         exit 10
      fi
   done
fi

# Let PREV be the full path to the newest directory in $ARCHIVE whose
# name starts with $PREFIX, or the string '.' (dot) is there is no such
# directory.
PREV="`find "$ARCHIVE" -maxdepth 1 -type d -name "$PREFIX*" -print0 |\
 xargs -0 ls -1td 2>/dev/null | head -n 1`"

# If there's no previous directory, specify an artificial one.
if [ "x$PREV" == "x." ] ; then
   PREV="$ARCHIVE/$PREFIX-zero"
fi

echo "$PROG: target is \"$CUR\""
echo "$PROG: previous is \"$PREV\""

rsync -axv --delete --filter='dir-merge /.backup-filter' \
 --link-dest="$PREV" "$TOP" "$CUR"
touch "$CUR"
