#!/bin/bash

# Copyright 2012 Red Hat, Inc.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
# of the GNU General Public License v2 or (at your option) any later version.

# cluster.conf
#
# <clusternode name="n1" nodeid="1">
#   <fence>
#   <method name="1">
#   <device name="wd" host_id="1"/>
#   </method>
#   </fence>
#   <unfence>
#   <device name="wd" host_id="1" action="on"/>
#   </unfence>
# </clusternode>
#
# <clusternode name="n2" nodeid="2">
#   <fence>
#   <method name="1">
#   <device name="wd" host_id="2"/>
#   </method>
#   </fence>
#   <unfence>
#   <device name="wd" host_id="2" action="on"/>
#   </unfence>
# </clusternode>
#
# <fencedevice name="wd" agent="fence_sanlock" path="/dev/fence_sanlock/leases"/>

prog=fence_sanlock
max_hosts=128
opts=
action=
path=
host_id=
offset=

help() {
	echo "Usage:"
	echo ""
	echo "$prog [options]"
	echo ""
	echo "Options:"
	echo "  -o <action>   Action: off (default), on, status or metadata"
	echo "                (sanlock specific actions: sanlock_init)"
	echo "  -p <path>     sanlock shared storage for leases"
	echo "  -i <num>      sanlock host_id of node to operate on"
	echo "  -h            Print this help, then exit"
	echo "  -V            Print program version information, then exit"
	echo ""
	echo "stdin options:"
	echo "  action=<action>"
	echo "  path=<path>"
	echo "  host_id=<num>"
}

cli_options() {
	while [ "$1" != "--" ]; do
		case $1 in
		-o)
			action=$2
			shift
		;;
		-p)
			path=$2
			shift
		;;
		-i)
			host_id=$2
			shift
		;;
		-h)
			help
			exit 0
		;;
		-V)
			echo "$prog version 3.8.5"
			exit 0
		;;
		esac
		shift
	done
}

stdin_options() {
	oldIFS="$IFS"
	export IFS="="
	while read key val; do
		case "$key" in
			action)
				action=$val
			;;
			path)
				path=$val
			;;
			host_id)
				host_id=$val
			;;
		esac
	done
	export IFS="$oldIFS"
}

if [ $# -eq 0 ]; then
	stdin_options
else
	opts=$(getopt n:o:p:i:hV $@)
	if [ "$?" != 0 ]; then
		help
		exit 1
	fi
	cli_options $opts
fi

metadata() {
cat << EOF
<?xml version="1.0" ?>
<resource-agent name="fence_sanlock" shortdesc="Fence agent for watchdog and shared storage">
<longdesc>
fence_sanlock is an i/o fencing agent that uses the watchdog device to
reset nodes.  Shared storage (block or file) is used by sanlock to ensure
that fenced nodes are reset, and to notify partitioned nodes that they
need to be reset.
</longdesc>
<vendor-url>http://www.redhat.com/</vendor-url>
<parameters>
	<parameter name="action" unique="0" required="1">
		<getopt mixed="-o &lt;action&gt;" />
		<content type="string" default="off" />
		<shortdesc lang="en">Fencing Action</shortdesc>
	</parameter>
	<parameter name="path" unique="0" required="1">
		<getopt mixed="-p &lt;action&gt;" />
		<content type="string" />
		<shortdesc lang="en">Path to sanlock shared storage</shortdesc>
	</parameter>
	<parameter name="host_id" unique="0" required="1">
		<getopt mixed="-i &lt;action&gt;" />
		<content type="string" />
		<shortdesc lang="en">Host id for sanlock (1-128)</shortdesc>
	</parameter>
</parameters>
<actions>
	<action name="on" />
	<action name="off" />
	<action name="status" />
	<action name="metadata" />
	<action name="sanlock_init" />
</actions>
</resource-agent>
EOF
	return 0
}

read_leader() {
	# verify storage has been initialized

	leader=$(sanlock direct read_leader -r fence:h$host_id:$path:$offset 2>&1)
	[ "$?" != 0 ] && {
		logger -t $prog "$action: storage error: unable to read $path"
		return 1
	}

	magic="$(echo "$leader" | grep magic | awk '{print $NF}')"

	[ -z "$magic" ] && {
		logger -t $prog "$action: storage error: no sanlock magic number at $path:$offset"
		return 1
	}

	[ "$magic" != "0x6152010" ] && {
		logger -t $prog "$action: storage error: invalid sanlock magic number $magic at $path:$offset"
		return 1
	}

	return 0
}

action_on() {
	read_leader || return 1

	[ -z "$(pidof fence_sanlockd)" ] && {
		logger -t $prog "on fence_sanlockd is not running"
		return 1
	}

	# send p,i options to fence_sanlockd which is waiting for them

	errmsg="$(fence_sanlockd -s -p $path -i $host_id 2>&1)"
	[ "$?" != 0 ] && {
		logger -t $prog "on fence_sanlockd -s error: $errmsg"
		return 1
	}

	# wait for fence_sanlockd to acquire the local lease;
	# it can take minutes, and we can't allow fence_tool join
	# until this is complete
	initdone=""
	while [ -z "$initdone" ]; do

		# make sure sanlockd is alive 
		clientstatus="$(sanlock client status 2>&1)"
		[ "$?" != 0 ] && {
			logger -t $prog "on sanlock client status error $?"
			return 1
		}

		# make sure fence_sanlockd is alive
		[ -z "$(pidof fence_sanlockd)" ] && {
			logger -t $prog "on fence_sanlockd stopped running"
			return 1
		}

		# FIXME: check that r is really done being acquired?
		# just appearing in output may not be enough

		echo "$clientstatus" | grep -q fence:h$host_id:$path:$offset && initdone="yes"

		sleep 1
	done

	exit 0
}

action_off() {
	read_leader || return 1

	owner_id="$(echo "$leader" | grep owner_id | awk '{print $NF}')"
	owner_gen="$(echo "$leader" | grep owner_gen | awk '{print $NF}')"
	ver="$(echo "$leader" | grep lver | awk '{print $NF}')"
	timestamp="$(echo "$leader" | grep ^timestamp | awk '{print $NF}')"

	# lease is released, so host is off
	[ "$timestamp" = 0 ] && {
		return 0
	}

	# owner_id should equal host_id
	[ "$owner_id" != "$host_id" ] && {
		logger -t $prog "victim lease $host_id owned by $owner_id:$owner_gen"
		return 1
	}

	pid="$(pidof fence_sanlockd)"
	[ -z "$pid" ] && {
		logger -t $prog "Unable to determine fence_sanlockd pid"
		return 1
	}

	# pid file should be unique for each instance so multiple
	# fence_sanlock's can run in parallel.  fence_sanlockd may read
	# this file to see which host_id we are fencing.

	pidfile=/run/$prog/$prog.pid.$$

	echo "$$ host_id $host_id gen $owner_gen ver $ver timestamp $timestamp" > $pidfile

	logger -t $prog "$$ host_id $host_id gen $owner_gen ver $ver timestamp $timestamp"

	loop=0

	# FIXME: should this loop have a retry limit?
	while :
	do
		loop=$(($loop+1))

		tmp_pid="$(pidof fence_sanlockd)"
		[ -z "$tmp_pid" ] && {
			logger -t $prog "fence_sanlockd not running"
			unlink $pidfile
			return 1
		}

		sanlock client acquire -r fence:h$host_id:$path:$offset -p $pid > /dev/null 2>&1
		[ "$?" = 0 ] && {
			# fence success
			sanlock client release -r fence:h$host_id:$path:$offset -p $pid > /dev/null 2>&1
			[ "$?" != 0 ] && {
				logger -t $prog "release $host_id error $?"
			}
			unlink $pidfile
			return 0
		}

		if [ "$loop" = 1 ]; then

			# acquire probably failed because the victim is
			# still alive and renewing its lease, (we could
			# verify that by checking the error code, but the
			# error codes are currently messed up due to
			# negation).  use a request on the victim's lease
			# to tell it that it's being fenced and needs to
			# reset.  the -f 2 causes SIGUSR1 to be sent to
			# fence_sanlockd on the victim.

			# We send SIGUSR2 to our own fence_sanlockd to
			# tell it that we are fencing someone else.  If
			# fence_sanlockd gets both SIGUSR1 indicating that
			# someone is fencing it, and it gets SIGUSR2
			# indicating that it is fencing someone, it knows
			# that it's the special situation of two nodes
			# fencing each other in a two node cluster.  In
			# this case, the low host_id can choose to survive.

			kill -s SIGUSR2 $pid

			sanlock client request -r fence:h$host_id:$path:$offset:$((ver + 1)) -f 2 > /dev/null 2>&1
			[ "$?" != 0 ] && {
				loggger -t $prog "request $host_id error $?"
			}
		fi

		sleep 10

		# Reread the leader; if the victim's lease has been
		# reacquired cleanly by the victim host (same host_id, new
		# generation), we can quit with success

		read_leader

		[ "$?" != 0 ] && {
			unlink $pidfile
			return 1
		}

		tmp_id="$(echo "$leader" | grep owner_id | awk '{print $NF}')"
		tmp_gen="$(echo "$leader" | grep owner_gen | awk '{print $NF}')"

		if [ "$owner_id" -eq "$tmp_id" ] && [ "$owner_gen" -lt "$tmp_gen" ]; then
			logger -t $prog "victim $owner_id:$owner_gen reacquired lease gen $tmp_gen"
			unlink $pidfile
			return 0
		fi

		if [ "$owner_id" -ne "$tmp_id" ]; then
			logger -t $prog "victim $owner_id:$owner_gen acquired by $tmp_id:$tmp_gen"
			unlink $pidfile
			return 1
		fi
	done

	unlink $pidfile
	return 0
}

action_status() {
	read_leader || return 1

	timestamp="$(echo "$leader" | grep ^timestamp | awk '{print $NF}')"

	# lease is released, so host is "off"
	[ "$timestamp" = 0 ] && {
		echo "Status: OFF"
		exit 2
	}

	# lease is held, so host is "on"
	echo "Status: ON"
	exit 0
}

sanlock_init() {
	# initialize lease path
	echo -n "Initializing fence sanlock lockspace on $path: "
	sanlock direct init -s fence:0:$path:0 \
		> /dev/null 2>/dev/null || \
		{ echo "error $?" && return 1; }
	echo "ok"

	echo -n "Initializing $max_hosts sanlock host leases on $path: "
	for host_id in $(seq 1 $max_hosts); do
		offset=$((host_id * $align))
		sanlock direct init -r fence:h$host_id:$path:$offset \
			> /dev/null 2>/dev/null || \
			{ echo "error $? for host $host_id" && return 1; }
	done
	echo "ok"

	return 0
}

[ -z "$action" ] && action=off

# check actions and options compatibility
# all actions beside metadata needs storage 

[ "$action" != "metadata" ] && {
	[ -z "$path" ] && {
		echo "storage path argument required"
		exit 1
	}

	# all actions beside sanlock_init needs host_id
	[ "$action" != "sanlock_init" ] && [ -z "$host_id" ] && {
		echo "host_id argument required"
		exit 1
	}

	# FIXME: add direct align command to sanlock
	# align="&(sanlock direct align $path)"
	align=1048576
}

# verify host_id parameter
[ -n "$host_id" ] && {
	if [ "$host_id" -lt 1 ] || [ "$host_id" -gt "$max_hosts" ]; then
		echo "host_id must be between 1 and $max_hosts"
		exit 1
	fi
	offset=$((host_id * $align))
}

case "$action" in
	metadata)
		metadata
	;;
	sanlock_init)
		sanlock_init
	;;
	on)
		action_on
	;;
	off)
		action_off
	;;
	status)
		action_status
	;;
	*)
		echo $"Unknown action: $action"
		exit 1
	;;
esac
exit $?
