#! /bin/sh

#
# hardlink                                                      (jh,01.06.2005)
#

#
#   hardlink: replaces identical files with hardlinks or symlinks
#   Copyright (C) 2005  Jochen Hepp <jochen.hepp@gmx.de>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#


script="${0##*/}"
version="0.0.10"



# --- usage ---

print_usage () {
	cat <<EOF
Usage: $script [OPTIONS] SRCDIR [DESTDIR]

       -s  --symbolic    make symbolic links instead of hard links
           --script      print script instead of doing the work
       -V  --version     display version number
       -h  --help        display this help and exit
EOF
#      --count           count the bytes which can be saved by using links
}



# --- version ---

print_version () {
	cat <<-EOF
		$script $version

		Copyright (C) 2005 Jochen Hepp
		This program is distributed in the hope that it will be useful,
		but WITHOUT ANY WARRANTY; without even the implied warranty of
		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
		GNU General Public License for more details.

		Written by Jochen Hepp <jochen.hepp@gmx.de>.
EOF
}



# --- copy ---

copy () { # mode srcdir destdir
	mode="$1"
	srcdir="$2"
	destdir="$3"
	local cmdfile
	local copyfile

	cmdfile="$(tmpfile)"
	copyfile="$(tmpfile)"

	link "$mode" "$srcdir" "$destdir" "$copyfile" "$cmdfile"
	if [ "$destdir" ]; then
		empty_dirs "$srcdir" "$destdir" "$cmdfile"
	fi

	if [ "$destdir" ]; then
		cat <<-EOF
			#! /bin/sh

			#
			# automatically generated by $script to copy
			# from $srcdir
			# to   $destdir
			# and replace identical files with links
			#

			tarfile="\$(mktemp)"
			if [ ! -f "\$tarfile" ]; then
			   echo "$script: unable to create temporary file" >&2
			   exit 1
			fi

			cat >"\$tarfile" <<'EOF'
EOF

		cat "$copyfile"
		echo "EOF"

		cat <<-EOF

			tar -cf -  -T "\$tarfile" -C '$srcdir' | \\
			tar -xkf - -C '$destdir'

			rm -f "\$tarfile"
EOF

	else
		cat <<-EOF
			#! /bin/sh

			#
			# automatically generated by $script to replace
			# in $srcdir
			# identical files with links
			#
EOF
		destdir="$srcdir"
	fi

	cat <<-EOF

		pwd="\$PWD"
		cd '$destdir'
EOF

	sort -s <"$cmdfile" | \
	uniq

	cat <<-EOF
		cd "\$pwd"

		# --- end ---

EOF

	rm -f "$copyfile"
	rm -f "$cmdfile"
}



# --- link ---

link () { # mode srcdir destdir copyfile cmdfile
	local mode="$1"
	local srcdir="$2"
	local destdir="$3"
	local copyfile="$4"
	local cmdfile="$5"
	local ln=echo
	local sizedir
	local files
	local tmp
	local line
	local size
	local file
	local location
	local fileprefix
	local lineprefix
	local fdir
	local fdirold=''
	local found

	case "$mode" in
		ln)   ln='ln' ;;
		ln-s) ln='ln -s' ;;
	esac

	sizedir="$(tmpdir)"
	files="$(tmpfile)"
	tmp="$(tmpfile)"

	{	if [ "$destdir" ]; then
			find "$destdir" \
			     -type f -printf '%s 0 %P\n' -or ! -type d -printf '0 0 %P\n';
		fi;
		find "$srcdir" \
		     -type f -printf '%s 1 %P\n' -or ! -type d -printf '0 1 %P\n'; } | \
	sort -t' ' -k1,2 -n -s >"$files"

	if [ -z "$destdir" ]; then
		ln="$ln -f"
		destdir="$srcdir"
	fi

	uniq -t' ' -W1 -D <"$files" >"$tmp"
	exec 9<&0 <"$tmp"

	while read line; do
		size="${line%% *}"
		file="${line#* [01] }"
		location="${line#* }"
		location="${location%% *}"
		if [ "$location" = "1" ]; then
			fileprefix="$srcdir"
		else
			fileprefix="$destdir"
		fi
		found=''

		if [ "$size" -ne 0 ] && [ -f "$sizedir/$size" ]; then
			exec 8<&0 <"$sizedir/$size"
			while read line; do
				if [ -f "$destdir/$line" ] && \
				   cmp -s "$fileprefix/$file" "$destdir/$line"; then
					lineprefix="$destdir"
					found="$line"
					break
				elif cmp -s "$fileprefix/$file" "$srcdir/$line"; then
					lineprefix="$srcdir"
					found="$line"
					break
				fi
			done
			exec 0<&8 8<&-
		fi

		if [ "$found" ]; then
			if [ "$location" = "1" ]; then
				fdir="${file%/*}"
				if [ "$fdir" != "$file" ] && [ "$fdir" != "$fdirold" ]; then
					echo "if [ ! -d '$fdir' ]; then mkdir -p '$fdir'; fi" \
					     >>"$cmdfile"
					fdirold="$fdir"
				fi
				echo "$ln '$found' '$file'" >>"$cmdfile"
			fi
		else
			echo "$file" >>"$sizedir/$size"
			if [ "$location" = "1" ]; then
				echo "$file" >>"$copyfile"
			fi
		fi
	done
	exec 0<&9 9<&-

	uniq -t' ' -W1 -u <"$files" | \
	cut -d' ' -f2- | \
	grep "^1 " | \
	cut -d' ' -f2- >>"$copyfile"

	rm -f "$files"
	rm -rf "$sizedir"
	rm -f "$tmp"
}



# --- empty_dirs ---

empty_dirs () { # srcdir destdir cmdfile
	local srcdir="$1"
	local destdir="$2"
	local cmdfile="$3"
	local tmp
	local dir

	tmp="$(tmpfile)"

	find "$srcdir" -type d -mindepth 1 -printf './%P\n' >"$tmp"
	find "$srcdir" -mindepth 2 -printf './%P\n' | \
	sed 's%/[^/]*$%%' >>"$tmp"
	sort "$tmp" | \
	uniq -u | \
	while read d; do
		dir="${d#./}"
		echo "if [ ! -d '$dir' ]; then mkdir -p '$dir'; fi" >>"$cmdfile"
	done

	rm -f "$tmp"
}



# --- tmpfile ---

tmpfile () {
	tmp="$(mktemp)"
	if [ ! -f "$tmp" ]; then
		echo "$script: unable to create temporary file" >&2
		exit 1
	fi
	echo "$tmp"
}



# --- tmpdir ---

tmpdir () {
	tmp="$(mktemp -d)"
	if [ ! -d "$tmp" ]; then
		echo "$script: unable to create temporary directory" >&2
		exit 1
	fi
	echo "$tmp"
}



# --- main ---

# main {
	set -e

	if [ $# -eq 0 ]; then
		print_usage >&2
		exit 1
	fi

	mode='ln'
	print=
	srcdir=
	destdir=

	while [ $# -gt 0 -a "$1" = "-${1#-}" ]; do
		arg="$1"
		case "$arg" in
			--symbolic|-s)
				mode='ln-s'
				;;
			--script)
				print=yes
				;;
			--version|-V)
				print_version
				exit 0
				;;
			--help|-h)
				print_usage
				exit 0
				;;
			*)
				echo "$script: unrecognized option \`$arg'" >&2
				echo "$script: Try \`$script --help' for more information." >&2
				exit 1
				;;
		esac
		shift
	done

	if [ $# -eq 1 ]; then
		srcdir="$1"
		destdir=''
	elif [ $# -eq 2 ]; then
		srcdir="$1"
		destdir="$2"
	else
		print_usage >&2
		exit 1
	fi

	case "$srcdir" in
		/) ;;
		*/) srcdir="${srcdir%/}" ;;
	esac
	case "$destdir" in
		/) ;;
		*/) destdir="${destdir%/}" ;;
	esac

	if [ ! -d "$srcdir" ]; then
		echo "$script: $srcdir: no such directory" >&2
		exit 1
	fi
	if [ $# -eq 2 -a ! -d "$destdir" ]; then
		echo "$script: $destdir: no such directory" >&2
		exit 1
	fi

	copy "$mode" "$srcdir" "$destdir" | \
	if [ "$print" ]; then
		cat
	else
		sh
	fi

	exit 0
# }


# --- end ---

