#! /bin/sh

#
# fshash                                                        (jh,28.06.2011)
#

#
#   fshash: checks a directory of hashsums against a directory of files
#   Copyright (C) 2010, 2011  Jochen Hepp <jochen.hepp@gmx.de>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#


script="${0##*/}"
version='0.0.3'
: ${HASH:=md5sum}


#
# --- usage ---
#

usage () {
	HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
	HASHROOT="${HASHROOT%/}/"
	cat <<EOF
Usage: $script OPTIONS COMMAND FILES ...

Options:
       --hash=HASH       use HASH, default is $HASH
       --hashroot=DIR    search hashes of files in DIR, default is $HASHROOT
       --dataroot=DIR    search files in DIR, default is $DATAROOT
       -n                no changes, only print what would be done normally
       -v                verbose output
       -V  --version     display version number
       -h  --help        display this help and exit

Command:
       generate          compute hash file from data files
       add               add new files to the corresponding hash files
       check             read hash files and check them with data files
       lost              a hash exists but the data file is lost
       lostquick         a hash exists but the data file or directory is lost
       found             no hash exists but a data file is found
       foundquick        no hash exists but a data file or directory is found
       lostfound
       foundlost         print lost and found files
       lostfoundquick
       foundlostquick    print lost and found files or directories
       hashsum           print stored hashsum(s) in hashroot of data file(s)
       hardlink          replace FILES who are duplicates of files in dataroot
                         with hard links
       mkdir             create corresponding directories in data- and hashroot
       rmdir             remove corresponding directories in data- and hashroot
       mv                move corresponding entries in data- and hashroot
       rm                remove corresponding entries in data- and hashroot
EOF
}


#
# --- version ---
#

print_version () {
	cat <<-EOF
		$script $version

		Copyright (C) 2011 Jochen Hepp
		This program is distributed in the hope that it will be useful,
		but WITHOUT ANY WARRANTY; without even the implied warranty of
		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
		GNU General Public License for more details.

		Written by Jochen Hepp <jochen.hepp@gmx.de>.
EOF
}


#
# --- check root DATAROOT ---
#

check_dataroot () {
	if [ ! -d "$DATAROOT" ]; then
		echo "$script: ${DATAROOT%/}: dataroot directory no found" >&2
		exit 1
	fi
}


#
# --- check root HASHROOT ---
#

check_hashroot () {
	if [ "$HASHROOT" != "$DATAROOT" -a \
	     "$HASHROOT${DATAROOT#$HASHROOT}" = "$DATAROOT" ]; then
		echo "$script: dataroot is a subdirectory of hashroot" >&2
		exit 1
	fi
	if [ ! -d "$HASHROOT" ]; then
		echo "$script: ${HASHROOT%/}: hashroot directory no found" >&2
		exit 1
	fi
}


#
# --- check HASHROOT and DATAROOT ---
#

check_roots () {
	check_dataroot
	check_hashroot
}


#
# --- check filedir ---
#

check_filedir () { # dir
	local dir="$1"

	if [ ! -e "$dir" ]; then
		echo "$script: ${dir:-.}: no file or directory found" >&2
		exit 1
	fi
}


#
# --- generate a hash for a file or subdirectory ---
#

generate () { # file/directory
	local filedir="${1%/}/"
	local hashdir
	local hashbasedir
	local datadir
	filedir="${filedir#$DATAROOT}"
	filedir="${filedir#/}"
	filedir="${filedir%/}"
	hashdir="$HASHROOT$filedir"
	hashbasedir="${hashdir%/*}"
	datadir="$DATAROOT$filedir"

	check_filedir "$datadir"

	if [ ! -d "$hashbasedir" ]; then
		if [ -z "$nochanges" ]; then
			mkdir -p "$hashbasedir"
		fi
	elif [ -d "$hashdir" ]; then
		echo "$script: ${filedir:-.}: hash in subdirectory exists" >&2
		exit 1
	fi

	if [ "$verbose" ]; then
		echo "$hashdir:" >&5
	fi

	# directory recursive
	if [ -d "$datadir" ]; then
		cd "$datadir" && \
		find . -path "./$HASHNAME" -prune -o -type f -print0 | \
		xargs -0 --no-run-if-empty $HASH
	# single file
	elif [ -f "$datadir" ]; then
		cat "$datadir" | \
		$HASH
	fi | \
	if [ -z "$nochanges" ]; then
		if [ "$verbose" ]; then
			tee "$hashdir" >&5
			echo >&5
		else
			cat > "$hashdir"
		fi
	else
		if [ "$verbose" ]; then
			cat >&5
			echo >&5
		else
			cat >/dev/null
		fi
	fi
}


#
# --- add a hash for a file or subdirectory and print added files ---
#

add () { # file/directory
	local filedir="${1%/}/"
	local hashdir
	local hashbasedir
	local datadir
	local subdir
	local sedpat
	filedir="${filedir#$DATAROOT}"
	filedir="${filedir#/}"
	filedir="${filedir%/}"
	hashdir="$HASHROOT$filedir"
	datadir="$DATAROOT$filedir"

	check_filedir "$datadir"

	hashbasedir="$hashdir"
	while [ ! -e "$hashbasedir" ]; do
		hashdir="$hashbasedir"
		hashbasedir="${hashbasedir%/*}"
	done

	# directory recursive
	if [ -d "$hashbasedir" ]; then
		subdir="${hashdir#$HASHROOT}"
		generate "$subdir"
		sedpat="$(echo "$subdir" | sed 's%/%\\/%g')"
		sed "s%^[^ ]*  %%; s%./%%; s/^/$sedpat\//" "$hashdir"
	# single file with name
	elif [ -f "$hashbasedir" ]; then
		if [ "$verbose" ]; then
			echo "$hashbasedir: (append)" >&5
		fi
		subdir="${hashbasedir#$HASHROOT}"
		cd "$DATAROOT$subdir" && \
		$HASH "./${filedir#$subdir/}" | \
		if [ -z "$nochanges" ]; then
			if [ "$verbose" ]; then
				tee -a "$hashbasedir" >&5
				echo >&5
			else
				cat >> "$hashbasedir"
			fi
		else
			if [ "$verbose" ]; then
				cat >&5
				echo >&5
			else
				cat >/dev/null
			fi
		fi
		echo "$filedir"
	fi
}


#
# --- search a hash for a file or subdirectory ---
#

search () { # file/directory
	local filedir="${1%/}/"
	local hashdir
	local hashbasedir
	local datadir
	local sedpat1
	local sedpat2
	filedir="${filedir#$DATAROOT}"
	filedir="${filedir#/}"
	filedir="${filedir%/}"
	hashdir="$HASHROOT$filedir"
	datadir="$DATAROOT$filedir"

	check_filedir "$datadir"

	hashbasedir="$hashdir"
	while [ ! -e "$hashbasedir" ]; do
		hashbasedir="${hashbasedir%/*}"
	done

	# directory recursive
	if [ "$hashbasedir" = "$hashdir" ]; then
		cd "$HASHROOT" && \
		find "${filedir:-.}" -type f -print0 | \
		xargs -0 --no-run-if-empty -I {} \
		sed -e '/  -$/ { s%  -$%  {}%; s%  ./%  %; b }' \
		    -e 's%  ./%  %; s%  %  {}/%; s%  ./%  %' "{}"
	# single file
	else
		if [ ! -f "$hashbasedir" ]; then
			echo "$script: $filedir: no hash found" >&2
			exit 1
		fi

		filedir="${hashdir#$hashbasedir/}"
		sedpat1="$(echo "$filedir" | sed 's%/%\\/%g')"
		sedpat2="$(echo "${hashbasedir#$HASHROOT}" | sed 's%/%\\/%g')"
		sed -n -e 's%  ./%  %' \
		       -e "/  $sedpat1\(\/\|$\)/{ s/  /  $sedpat2\//; p }" \
		    "$hashbasedir"
	fi
}


#
# --- diff hash and filesystem files ---
#

diffhash () { # mode, filedir, tmpfile
	local mode="$1"
	local filedir="${2%/}/"
	local tmpfile="$3"
	local sedpat
	filedir="${filedir#$DATAROOT}"
	filedir="${filedir#/}"
	filedir="${filedir%/}"
	datadir="$DATAROOT$filedir"

	check_filedir "$datadir"

	if [ "$mode" = 'lost' ]; then
		sedpat='s/^-//; t print; d'
	elif [ "$mode" = 'found' ]; then
		sedpat='s/^+//; t print; d'
	else
		sedpat='s/^+/found: /; s/^-/lost: /'
	fi

	search "$datadir" | \
	sed 's/^[^ ]*  //' | \
	sort >"$hashfile"
	cd "$DATAROOT" && \
	find "${filedir:-.}" -path "./$HASHNAME" -prune -o -type f -print | \
	sed 's%^./%%' | \
	sort | \
	diff -U0 "$hashfile" - | \
	sed -n -e '/^\(@@\|--- \|+++ \)/d' \
	       -e "$sedpat" -e ':print p'
}


#
# --- quick ---
#

quick () { # mode file/directory
	local mode="$1"
	local filedir="${2%/}/"
	local hashdir
	local datadir
	local subdir
	local msg
	local sedpat
	filedir="${filedir#$DATAROOT}"
	filedir="${filedir#/}"
	filedir="${filedir%/}"
	hashdir="$HASHROOT$filedir"
	datadir="$DATAROOT$filedir"

	check_filedir "$datadir"

	if [ "$mode" != 'found' ]; then
		if [ "$mode" != 'lost' ]; then
			msg='lost: '
		fi
		cd "$hashdir" && \
		find . -type f | \
		while read file; do
			if [ ! -e "$datadir/$file" ]; then
				echo "$msg$filedir${filedir:+/}${file#./}"
			fi
		done
	fi

	if [ "$mode" != 'lost' ]; then
		if [ "$mode" != 'found' ]; then
			msg='found: '
		fi
		subdir="${datadir#$DATAROOT}"
		sedpat="$(echo "${subdir:-.}/" | sed 's%/%\\/%g')"
		cd "$datadir" && \
		find . -mindepth 1 -path "./$HASHNAME" -prune -o \
		     \( -exec test -f "$hashdir/"{} \; -prune -o \
		     \( -type f -printf "%h\n" \) \) | \
		sort | \
		uniq | \
		sed -e "s/$/\//; s%^\./%%; s/^/$sedpat/; s%^\./%%; s%/$%%; s%^$%.%" \
		    -e "s/^/$msg/"
	fi
}


#
# --- hardlink ---
#

hardlink () { # input: hashsum file
	local hashsum
	local hashsumlast=
	local part
	local file
	local filelast=
	local inode=
	local inodelast=
	local hashlen="$(($(echo | $HASH | cut -d' ' -f1 | wc -c)-1))"
	local t="$(echo "\b")"
	local partfile="$(tmpfile)"
	local part1file="$(tmpfile)"
	local morefile="$(tmpfile)"

	sed "s/  /$t/" | \
	sort -t"$t" -k 1b,1 > "$partfile"
	if [ ! -s "$partfile" ]; then
		return
	fi
	sed "s/$t/${t}1$t/" <"$partfile" >"$part1file"

	search "$datadir" | \
	sed "s/  /$t/" | \
	sort -t"$t" -k 1b,1 | \
	diff -U0 "$partfile" - | \
	grep ^+[^+] | \
	cut -c2- | \
	uniq -w $hashlen >"$morefile"

	if [ -s "$morefile" ]; then
		uniq -w $hashlen "$partfile" | \
		join -t"$t" -1 1 -o 2.1,2.2 - "$morefile" | \
		sed "s/$t/${t}0$t/" | \
		LC_ALL=C sort - "$part1file"
	else
		cat "$part1file"
	fi | \
	sed "s/$t/ /; s/$t/ /" | \
	while read hashsum part file; do
		if [ "$hashsum" != "$hashsumlast" ]; then
			hashsumlast="$hashsum"
			filelast="$file"
			inodelast="$inode"
			inode=
		elif [ "$part" = '1' ]; then
			if [ -z "$inodelast" ]; then
				inodelast="$(ls -i "$dataroot$filelast" | cut -d' ' -f1)"
			fi
			inode="$(ls -i "$dataroot$file" | cut -d' ' -f1)"
			if [ "$inode" != "$inodelast" ]; then
				if cmp -s "$dataroot$file" "$dataroot$filelast"; then
					if [ "$verbose" ]; then
						echo "$file"
					fi
					if [ -z "$nochanges" ]; then
						ln -f "$dataroot$filelast" "$dataroot$file"
					fi
					inode="$inodelast"
				else
					inodelast="$inode"
					filelast="$file"
				fi
			fi
		fi
	done

	rm "$partfile" "$part1file" "$morefile"
}


#
# --- file_name (absolute file name) ---
#

file_name () { # filename
	local file="$pwd/$1"

	# absolute filename?
	case "$1" in
		/*) file="$1" ;;
	esac

	echo "$file" | \
	sed 's%^\./%%; s%/\./%/%g; s%/\.$%/%; s%///*%/%g;
	     :up /\/[^/]\+\/\.\.\(\/\|$\)/ { s%/[^/]\+/\.\.\(/\|$\)%\1%g; b up; };
	     s%^\(\.\./\)\+%/%'
}


#
# --- create temporary file ---
#

tmpfile () {
	local file="$(mktemp --tmpdir "$script.XXXXXXXXXX")"
	if [ ! -f "$file" ]; then
		echo "$script: unable to create temporary file" >&2
		exit 1
	fi
	echo "$file"
}


#
# --- main ---
#

HASHNAME="$(echo $HASH | tr 'a-z' 'A-Z')"
DATAROOT="${DATAROOT:-/}"
DATAROOT="${DATAROOT%/}/"
pwd="$PWD"
nochanges=
verbose=
cmd=
res=0
# file descriptors: 5=output, 6=warnings
exec 5>&1 6>&2

while [ $# -gt 0 ]; do
	cmd="$1"
	shift

	case "$cmd" in
		-n)
			nochanges=yes
			;;
		-v)
			verbose=yes
			;;
		-h|--help)
			usage
			exit 0
			;;
		-V|--version)
			print_version
			exit 0
			;;
		--hash=*)
			HASH="${cmd#--hash=}"
			HASH="${HASH:-md5sum}"
			HASHNAME="$(echo $HASH | tr 'a-z' 'A-Z')"
			;;
		--dataroot=*)
			DATAROOT="${cmd#--dataroot=}"
			DATAROOT="${DATAROOT:-/}"
			DATAROOT="${DATAROOT%/}/"
			;;
		--hashroot=*)
			HASHROOT="${cmd#--hashroot=}"
			HASHROOT="${HASHROOT%/}/"
			;;
		generate)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_dataroot
			while [ $# -gt 0 ]; do
				generate "$(file_name "$1")"
				if [ "$?" != 0 ]; then
					res="$?"
				fi
				shift
			done
			;;
		add)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			hashfile="$(tmpfile)"
			newfile="$(tmpfile)"
			while [ $# -gt 0 ]; do
				diffhash 'found' "$(file_name "$1")" "$hashfile" | \
				while read file; do
					if ! grep -x -q "$file" "$newfile"; then
						add "$file" >> "$newfile"
						if [ "$?" != 0 ]; then
							res="$?"
						fi
					fi
				done
				shift
			done
			rm "$hashfile" "$newfile"
			;;
		hashsum|check|hardlink)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			cd "$DATAROOT" && \
			while [ $# -gt 0 ]; do
				search "$(file_name "$1")"
				shift
			done | \
			case "$cmd" in
				hashsum)
					sed 's%  %  ./%'
					;;
				check)
					if [ "$verbose" ]; then
						$HASH -c -
					else
						$HASH -c --quiet -
					fi
					;;
				hardlink)
					hardlink
					;;
			esac
			res="$?"
			shift $#
			;;
		lost|found|lost+found|lostfound|foundlost)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			hashfile="$(tmpfile)"
			while [ $# -gt 0 ]; do
				diffhash "$cmd" "$(file_name "$1")" "$hashfile"
				if [ "$?" != 0 ]; then
					res="$?"
				fi
				shift
			done
			rm "$hashfile"
			;;
		lostquick|foundquick|lostquick+foundquick|lostfoundquick|foundlostquick)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			cmd="${cmd%quick}"
			while [ $# -gt 0 ]; do
				quick "$cmd" "$(file_name "$1")"
				if [ "$?" != 0 ]; then
					res="$?"
				fi
				shift
			done
			;;
		mkdir|rmdir|mv|rm)
			HASHROOT="${HASHROOT:-$DATAROOT$HASHNAME}"
			HASHROOT="${HASHROOT%/}/"
			check_roots
			hashdir="$pwd/"
			hashdir="${hashdir#$DATAROOT}"
			hashdir="$HASHROOT${hashdir#/}"
			if [ "$verbose" ]; then
				verbose='-v'
			fi
			if [ -z "$nochanges" ]; then
				"$cmd" $verbose "$@" && \
				cd "$hashdir" && \
				"$cmd" $verbose "$@"
				if [ "$?" != 0 ]; then
					res="$?"
				fi
			fi
			shift $#
			;;
		--*)
			echo "$script: unrecognized option \`$cmd'" >&2
			echo "$script: Try \`$script --help' for more information." >&2
			exit 1
			;;
		*)
			echo "$script: unrecognized command \`$cmd'" >&2
			echo "$script: Try \`$script --help' for more information." >&2
			exit 1
			;;
	esac
done

if [ $# -eq 0 ]; then
	case "$cmd" in
		--hash=*|--dataroot=*|--hashroot=*)
			usage >&2
			exit 1
			;;
		*)
			if [ -z "$cmd" ]; then
				usage >&2
				exit 1
			fi
			;;
	esac
fi

exec 5>&- 6>&-

if [ "$res" = 0 ]; then
	true
else
	false
fi

# --- end ---

