#!/bin/sh

#
# babarchive_prep_directories
# Copyright (C) 1998-2016 by John Heidemann
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License along
#    with this program; if not, write to the Free Software Foundation, Inc.,
#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

set -o pipefail

usage () {
    cat 1>&2 <<END
usage: $0 [-a ALG] [-j N] [-n] [-x] directory

Babarchive_prep_directories checksums each given directory (and its subdirectories)
as the root and leaves of a new babarchive.
Creating a .shasum in each directory listing all of its files.

Options:
    -a ALG   specify algorithm (sha256 or sha1, defaults to sha256)
    -j N     run N parallel sums concurrently (defaults to 4)
    -n       non-recursive
    -x       exclude root (don't checksum top level dir)
END
    exit 1
}


die () {
	echo "$@" 1>&2
	exit 1
}

alg=auto
algprog=""
algargs=""
algprefix=auto
algsuffix=auto
recursive_args=''
exclude_root=''
while getopts "a:nj:x" ch
do
        case $ch in
        a) alg=$OPTARG;;
        j) parallel_sums=$OPTARG;;
        n) recursive_args='-maxdepth 0';;
        x) exclude_root='-mindepth 1' ;;
        *) usage;;
        esac
done
shift $(($OPTIND - 1))

case x$alg in
	xmd5)
		alg=md5; algprog=md5sum; algargs=""; algprefix=md5; algsuffix=jdb;;
	xsha1|x1)
		alg=1; algprog=sha1sum; algargs=""; algprefix=sha1; algsuffix=jdb;;
	xsha256|x256|xauto)
		alg=256; algprog=shasum; algargs="--binary -a $alg"; algprefix=sha; algsuffix=fsdb;;
	*)
		die "unknown algorithm $alg"
		;;
esac


files=/tmp/checksum_directories-$$

PARALLEL_SUMS=4

for root in "$@"
do
(
	cd $root
	find . $recursive_args $exclude_root -type d -print |
	while read d
	do
		echo $root/$d
	    (
	        cd "$d"
			find . -maxdepth 1 ! \( -name .md5sum -o -name .sha1sum -o -name .shasum \) -type f -print0 > $files.tosum || die "ERROR checksumming $d"
            if [ -s "$files.tosum" ]; then
                #find optimal count
                par=${parallel_sums:-$PARALLEL_SUMS}
                tot_cnt=$(tr -d -c '\000' < $files.tosum | wc -c)
                opt_cnt=$(( (tot_cnt + par -1)/par ))
                [ "$opt_cnt" == "0" ] && opt_cnt=1 
                rm -f $files.sums-*
                #there MUST be $0 in cmd line below
                cat $files.tosum |       
			        xargs -0 -n $opt_cnt -P $par -- bash -c "${algprog} ${algargs} \"\$0\" \"\$@\" >$files.sums-\$\$" || die "ERROR checksumming $d"
                cat $files.sums-* | LC_COLLATE=C sort -k 2,2 >$files.sums
                mv $files.sums .${algprefix}sum
                rm -f $files.tosum $files.sums-*
                out_cnt=$(cat .${algprefix}sum | wc -l) 
                [ "$out_cnt" -eq "$tot_cnt" ] || die "ERROR mismatched counts: for $root: $tot_cnt != $out_cnt"
            else
                : >.${algprefix}sum
            fi
	    ) || exit 1
	done
)
done

exit 0

