#!/bin/bash
# Copyright 1999-2013 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
# Author: Karl Trygve Kalleberg <karltk@gentoo.org>
# Rewritten from the old, Perl-based emerge-webrsync script
# Author: Alon Bar-Lev <alon.barlev@gmail.com>
# Major rewrite from Karl's scripts.

# TODO:
#  - all output should prob be converted to e* funcs
#  - add support for ROOT

#
# gpg key import
# KEY_ID=0x96D8BF6D
# gpg --homedir /etc/portage/gnupg --keyserver subkeys.pgp.net --recv-keys $KEY_ID
# gpg --homedir /etc/portage/gnupg --edit-key $KEY_ID trust
#

# Only echo if in verbose mode
vvecho() { [[ ${do_verbose} -eq 1 ]] && echo "$@" ; }
# Only echo if not in verbose mode
nvecho() { [[ ${do_verbose} -eq 0 ]] && echo "$@" ; }
# warning echos
wecho() { echo "${argv0##*/}: warning: $*" 1>&2 ; }
# error echos
eecho() { echo "${argv0##*/}: error: $*" 1>&2 ; }

argv0=$0

# Use portageq from the same directory/prefix as the current script, so
# that we don't have to rely on PATH including the current EPREFIX.
scriptpath=${BASH_SOURCE[0]}
if [ -x "${scriptpath%/*}/portageq" ]; then
	portageq=${scriptpath%/*}/portageq
elif type -P portageq > /dev/null ; then
	portageq=portageq
else
	eecho "could not find 'portageq'; aborting"
	exit 1
fi
eval "$("${portageq}" envvar -v DISTDIR EPREFIX FEATURES \
	FETCHCOMMAND GENTOO_MIRRORS \
	PORTAGE_BIN_PATH PORTAGE_CONFIGROOT PORTAGE_GPG_DIR \
	PORTAGE_NICENESS PORTAGE_REPOSITORIES PORTAGE_RSYNC_EXTRA_OPTS \
	PORTAGE_RSYNC_OPTS PORTAGE_TMPDIR \
	USERLAND http_proxy ftp_proxy)"
export http_proxy ftp_proxy

source "${PORTAGE_BIN_PATH}"/isolated-functions.sh || exit 1

repo_name=gentoo
repo_location=$(__repo_key "${repo_name}" location)
if [[ -z ${repo_location} ]]; then
	eecho "Repository '${repo_name}' not found"
	exit 1
fi
repo_sync_type=$(__repo_key "${repo_name}" sync-type)

# If PORTAGE_NICENESS is overriden via the env then it will
# still pass through the portageq call and override properly.
if [ -n "${PORTAGE_NICENESS}" ]; then
	renice $PORTAGE_NICENESS $$ > /dev/null
fi

do_verbose=0
do_debug=0
keep=false

if has webrsync-gpg ${FEATURES} ; then
	WEBSYNC_VERIFY_SIGNATURE=1
else
	WEBSYNC_VERIFY_SIGNATURE=0
fi
if [ ${WEBSYNC_VERIFY_SIGNATURE} != 0 -a -z "${PORTAGE_GPG_DIR}" ]; then
	eecho "please set PORTAGE_GPG_DIR in make.conf"
	exit 1
fi

do_tar() {
	local file=$1; shift
	local decompressor
	case ${file} in
		*.xz)   decompressor="xzcat" ;;
		*.bz2)  decompressor="bzcat" ;;
		*.gz)   decompressor="zcat"  ;;
		*)      decompressor="cat"   ;;
	esac
	${decompressor} "${file}" | tar "$@"
	_pipestatus=${PIPESTATUS[*]}
	[[ ${_pipestatus// /} -eq 0 ]]
}

get_utc_date_in_seconds() {
	date -u +"%s"
}

get_date_part() {
	local utc_time_in_secs="$1"
	local part="$2"

	if	[[ ${USERLAND} == BSD ]] ; then
		date -r ${utc_time_in_secs} -u +"${part}"
	else
		date -d @${utc_time_in_secs} -u +"${part}"
	fi
}

get_utc_second_from_string() {
	local s="$1"
	if [[ ${USERLAND} == BSD ]] ; then
		# Specify zeros for the least significant digits, or else those
		# digits are inherited from the current system clock time.
		date -juf "%Y%m%d%H%M.%S" "${s}0000.00" +"%s"
	else
		date -d "${s:0:4}-${s:4:2}-${s:6:2}" -u +"%s"
	fi
}

get_portage_timestamp() {
	local portage_current_timestamp=0

	if [ -f "${repo_location}/metadata/timestamp.x" ]; then
		portage_current_timestamp=$(cut -f 1 -d " " "${repo_location}/metadata/timestamp.x" )
	fi

	echo "${portage_current_timestamp}"
}

fetch_file() {
	local URI="$1"
	local FILE="$2"
	local opts

	if [ "${FETCHCOMMAND/wget/}" != "${FETCHCOMMAND}" ]; then
		opts="--continue $(nvecho -q)"
	elif [ "${FETCHCOMMAND/curl/}" != "${FETCHCOMMAND}" ]; then
		opts="--continue-at - $(nvecho -s -f)"
	else
		rm -f "${DISTDIR}/${FILE}"
	fi

	__vecho "Fetching file ${FILE} ..."
	# already set DISTDIR=
	eval "${FETCHCOMMAND} ${opts}"
	if [[ $? -eq 0 && -s ${DISTDIR}/${FILE} ]] ; then
		return 0
	else
		rm -f "${DISTDIR}/${FILE}"
		return 1
	fi
}

check_file_digest() {
	local digest="$1"
	local file="$2"
	local r=1

	__vecho "Checking digest ..."

	if type -P md5sum > /dev/null; then
		local md5sum_output=$(md5sum "${file}")
		local digest_content=$(< "${digest}")
		[ "${md5sum_output%%[[:space:]]*}" = "${digest_content%%[[:space:]]*}" ] && r=0
	elif type -P md5 > /dev/null; then
		[ "$(md5 -q "${file}")" == "$(cut -d ' ' -f 1 "${digest}")" ] && r=0
	else
		eecho "cannot check digest: no suitable md5/md5sum binaries found"
	fi

	return "${r}"
}

check_file_signature() {
	local signature="$1"
	local file="$2"
	local r=1

	if [ ${WEBSYNC_VERIFY_SIGNATURE} != 0 ]; then

		__vecho "Checking signature ..."

		if type -P gpg > /dev/null; then
			gpg --homedir "${PORTAGE_GPG_DIR}" --verify "$signature" "$file" && r=0
		else
			eecho "cannot check signature: gpg binary not found"
			exit 1
		fi
	else
		r=0
	fi

	return "${r}"
}

get_snapshot_timestamp() {
	local file="$1"

	do_tar "${file}" --to-stdout -xf - portage/metadata/timestamp.x | cut -f 1 -d " "
}

sync_local() {
	local file="$1"

	__vecho "Syncing local tree ..."

	local ownership="portage:portage"
	if has usersync ${FEATURES} ; then
		case "${USERLAND}" in
			BSD)
				ownership=$(stat -f '%Su:%Sg' "${repo_location}")
				;;
			*)
				ownership=$(stat -c '%U:%G' "${repo_location}")
				;;
		esac
	fi

	if type -P tarsync > /dev/null ; then
		local chown_opts="-o ${ownership%:*} -g ${ownership#*:}"
		chown ${ownership} "${repo_location}" > /dev/null 2>&1 || chown_opts=""
		if ! tarsync $(vvecho -v) -s 1 ${chown_opts} \
			-e /distfiles -e /packages -e /local "${file}" "${repo_location}"; then
			eecho "tarsync failed; tarball is corrupt? (${file})"
			return 1
		fi
	else
		if ! do_tar "${file}" xf -; then
			eecho "tar failed to extract the image. tarball is corrupt? (${file})"
			rm -fr portage
			return 1
		fi

		# Free disk space
		${keep} || rm -f "${file}"

		local rsync_opts="${PORTAGE_RSYNC_OPTS} ${PORTAGE_RSYNC_EXTRA_OPTS}"
		if chown ${ownership} portage > /dev/null 2>&1; then
			chown -R ${ownership} portage
			rsync_opts+=" --owner --group"
		fi
		cd portage
		rsync ${rsync_opts} . "${repo_location%%/}"
		cd ..

		__vecho "Cleaning up ..."
		rm -fr portage
	fi

	if has metadata-transfer ${FEATURES} ; then
		__vecho "Updating cache ..."
		"${PORTAGE_BIN_PATH}/emerge" --metadata
	fi
	local post_sync=${PORTAGE_CONFIGROOT}etc/portage/bin/post_sync
	[ -x "${post_sync}" ] && "${post_sync}"
	# --quiet suppresses output if there are no relevant news items
	has news ${FEATURES} && "${PORTAGE_BIN_PATH}/emerge" --check-news --quiet
	return 0
}

do_snapshot() {
	local ignore_timestamp="$1"
	local date="$2"

	local r=1

	local base_file="portage-${date}.tar"

	local have_files=0
	local mirror

	local compressions=""
	# xz is not supported in app-arch/tarsync, so use
	# bz2 format if we have tarsync.
	if ! type -P tarsync > /dev/null ; then
		type -P xzcat > /dev/null && compressions="${compressions} xz"
	fi
	type -P bzcat > /dev/null && compressions="${compressions} bz2"
	type -P  zcat > /dev/null && compressions="${compressions} gz"
	if [[ -z ${compressions} ]] ; then
		eecho "unable to locate any decompressors (xzcat or bzcat or zcat)"
		exit 1
	fi

	for mirror in ${GENTOO_MIRRORS} ; do

		mirror=${mirror%/}
		__vecho "Trying to retrieve ${date} snapshot from ${mirror} ..."

		for compression in ${compressions} ; do
			local file="portage-${date}.tar.${compression}"
			local digest="${file}.md5sum"
			local signature="${file}.gpgsig"

			if [ -s "${DISTDIR}/${file}" -a -s "${DISTDIR}/${digest}" -a -s "${DISTDIR}/${signature}" ] ; then
				check_file_digest "${DISTDIR}/${digest}" "${DISTDIR}/${file}" && \
				check_file_signature "${DISTDIR}/${signature}" "${DISTDIR}/${file}" && \
				have_files=1
			fi

			if [ ${have_files} -eq 0 ] ; then
				fetch_file "${mirror}/snapshots/${digest}" "${digest}" && \
				fetch_file "${mirror}/snapshots/${signature}" "${signature}" && \
				fetch_file "${mirror}/snapshots/${file}" "${file}" && \
				check_file_digest "${DISTDIR}/${digest}" "${DISTDIR}/${file}" && \
				check_file_signature "${DISTDIR}/${signature}" "${DISTDIR}/${file}" && \
				have_files=1
			fi

			#
			# If timestamp is invalid
			# we want to try and retrieve
			# from a different mirror
			#
			if [ ${have_files} -eq 1 ]; then

				__vecho "Getting snapshot timestamp ..."
				local snapshot_timestamp=$(get_snapshot_timestamp "${DISTDIR}/${file}")

				if [ ${ignore_timestamp} == 0 ]; then
					if [ ${snapshot_timestamp} -lt $(get_portage_timestamp) ]; then
						wecho "portage is newer than snapshot"
						have_files=0
					fi
				else
					local utc_seconds=$(get_utc_second_from_string "${date}")

					#
					# Check that this snapshot
					# is what it claims to be ...
					#
					if [ ${snapshot_timestamp} -lt ${utc_seconds} ] || \
						[ ${snapshot_timestamp} -gt $((${utc_seconds}+ 2*86400)) ]; then

						wecho "snapshot timestamp is not in acceptable period"
						have_files=0
					fi
				fi
			fi

			if [ ${have_files} -eq 1 ]; then
				break
			else
				#
				# Remove files and use a different mirror
				#
				rm -f "${DISTDIR}/${file}" "${DISTDIR}/${digest}" "${DISTDIR}/${signature}"
			fi
		done

		[ ${have_files} -eq 1 ] && break
	done

	if [ ${have_files} -eq 1 ]; then
		sync_local "${DISTDIR}/${file}" && r=0
	else
		__vecho "${date} snapshot was not found"
	fi

	${keep} || rm -f "${DISTDIR}/${file}" "${DISTDIR}/${digest}" "${DISTDIR}/${signature}"
	return "${r}"
}

do_latest_snapshot() {
	local attempts=0
	local r=1

	__vecho "Fetching most recent snapshot ..."

	# The snapshot for a given day is generated at 00:45 UTC on the following
	# day, so the current day's snapshot (going by UTC time) hasn't been
	# generated yet.  Therefore, always start by looking for the previous day's
	# snapshot (for attempts=1, subtract 1 day from the current UTC time).

	# Timestamps that differ by less than 2 hours
	# are considered to be approximately equal.
	local min_time_diff=$(( 2 * 60 * 60 ))

	local existing_timestamp=$(get_portage_timestamp)
	local timestamp_difference
	local timestamp_problem
	local approx_snapshot_time
	local start_time=$(get_utc_date_in_seconds)
	local start_hour=$(get_date_part ${start_time} "%H")

	# Daily snapshots are created at 00:45 and are not
	# available until after 01:00. Don't waste time trying
	# to fetch a snapshot before it's been created.
	if [ ${start_hour} -lt 1 ] ; then
		(( start_time -= 86400 ))
	fi
	local snapshot_date=$(get_date_part ${start_time} "%Y%m%d")
	local snapshot_date_seconds=$(get_utc_second_from_string ${snapshot_date})

	while (( ${attempts} <  40 )) ; do
		(( attempts++ ))
		(( snapshot_date_seconds -= 86400 ))
		# snapshots are created at 00:45
		(( approx_snapshot_time = snapshot_date_seconds + 86400 + 2700 ))
		(( timestamp_difference = existing_timestamp - approx_snapshot_time ))
		[ ${timestamp_difference} -lt 0 ] && (( timestamp_difference = -1 * timestamp_difference ))
		snapshot_date=$(get_date_part ${snapshot_date_seconds} "%Y%m%d")

		timestamp_problem=""
		if [ ${timestamp_difference} -eq 0 ]; then
			timestamp_problem="is identical to"
		elif [ ${timestamp_difference} -lt ${min_time_diff} ]; then
			timestamp_problem="is possibly identical to"
		elif [ ${approx_snapshot_time} -lt ${existing_timestamp} ] ; then
			timestamp_problem="is newer than"
		fi

		if [ -n "${timestamp_problem}" ]; then
			ewarn "Latest snapshot date: ${snapshot_date}"
			ewarn
			ewarn "Approximate snapshot timestamp: ${approx_snapshot_time}"
			ewarn "       Current local timestamp: ${existing_timestamp}"
			ewarn
			echo -e "The current local timestamp" \
				"${timestamp_problem} the" \
				"timestamp of the latest" \
				"snapshot. In order to force sync," \
				"use the --revert option or remove" \
				"the timestamp file located at" \
				"'${repo_location}/metadata/timestamp.x'." | fmt -w 70 | \
				while read -r line ; do
					ewarn "${line}"
				done
			r=0
			break
		fi

		if do_snapshot 0 "${snapshot_date}"; then
			r=0
			break;
		fi
	done

	return "${r}"
}

usage() {
	cat <<-EOF
	Usage: $0 [options]

	Options:
	  --revert=yyyymmdd   Revert to snapshot
	  -k, --keep          Keep snapshots in DISTDIR (don't delete)
	  -q, --quiet         Only output errors
	  -v, --verbose       Enable verbose output
	  -x, --debug         Enable debug output
	  -h, --help          This help screen (duh!)
	EOF
	if [[ -n $* ]] ; then
		printf "\nError: %s\n" "$*" 1>&2
		exit 1
	else
		exit 0
	fi
}

main() {
	local arg
	local revert_date

	for arg in "$@" ; do
		local v=${arg#*=}
		case ${arg} in
			-h|--help)    usage ;;
			-k|--keep)    keep=true ;;
			-q|--quiet)   PORTAGE_QUIET=1 ;;
			-v|--verbose) do_verbose=1 ;;
			-x|--debug)   do_debug=1 ;;
			--revert=*)   revert_date=${v} ;;
			*)            usage "Invalid option '${arg}'" ;;
		esac
	done

	[[ -d ${repo_location} ]] || mkdir -p "${repo_location}"
	if [[ ! -w ${repo_location} ]] ; then
		eecho "Repository '${repo_name}' is not writable: ${repo_location}"
		exit 1
	fi

	[[ -d ${PORTAGE_TMPDIR}/portage ]] || mkdir -p "${PORTAGE_TMPDIR}/portage"
	TMPDIR=$(mktemp -d "${PORTAGE_TMPDIR}/portage/webrsync-XXXXXX")
	if [[ ! -w ${TMPDIR} ]] ; then
		eecho "TMPDIR is not writable: ${TMPDIR}"
		exit 1
	fi
	trap 'cd / ; rm -rf "${TMPDIR}"' EXIT
	cd "${TMPDIR}" || exit 1

	${keep} || DISTDIR=${TMPDIR}
	[ ! -d "${DISTDIR}" ] && mkdir -p "${DISTDIR}"

	if ${keep} && [[ ! -w ${DISTDIR} ]] ; then
		eecho "DISTDIR is not writable: ${DISTDIR}"
		exit 1
	fi

	# This is a sanity check to help prevent people like funtoo users
	# from accidentally wiping out their git tree.
	if [[ -n ${repo_sync_type} && ${repo_sync_type} != rsync ]] ; then
		echo "The current sync-type attribute of repository 'gentoo' is not set to 'rsync':" >&2
		echo >&2
		echo "  sync-type=${repo_sync_type}" >&2
		echo >&2
		echo "If you intend to use emerge-webrsync then please" >&2
		echo "adjust sync-type and sync-uri attributes to refer to rsync." >&2
		echo "emerge-webrsync exiting due to abnormal sync-type setting." >&2
		exit 1
	fi

	[[ ${do_debug} -eq 1 ]] && set -x

	if [[ -n ${revert_date} ]] ; then
		do_snapshot 1 "${revert_date}"
	else
		do_latest_snapshot
	fi
}

main "$@"
