#!/bin/sh

set -e
#set -x

usage () {
        echo "$0 <CLUSTER-NAME>"
        echo "This command restores the state of a backuped PoC cluster."
        echo "See oci-poc-save to save the state of a cluster."
        exit 1
}

if ! [ $# = 1 ] ; then
        usage
fi
if [ "${1}" = "-h" ] || [ "${1}" = "--help" ] || [ "${1}" = "-help" ] ; then
	usage
fi

CLUSTER_NAME=${1}

if ! [ -d /var/lib/openstack-cluster-installer-poc/saved/${CLUSTER_NAME} ] ; then
	echo "${CLUSTER_NAME} doesn't exist"
	exit 1
fi

PID_DIR=/var/run/oci-poc

RED="\033[1;31m"
NO_COL="\033[0m"
GREEN="\033[1;32m"
green_echo () {
        echo ${GREEN}${1}${NO_COL}
}
red_echo () {
        echo ${RED}${1}${NO_COL}
}

wait_for_ssh () {
	local COUNT CYCLES OTCI_CAN_SSH SSH_HOST
	SYSUSERNAME=root
	# This is 15 minutes
	COUNT=900
	CYCLES=0
	OTCI_CAN_SSH=no
	SSH_HOST=${1}

	green_echo -n "---> Attempting to connect to ${SSH_HOST}: "
	ssh-keygen -f ~/.ssh/known_hosts -R ${SSH_HOST} 1>/dev/null 2>/dev/null || true
	while [ "${OTCI_CAN_SSH}" != "yes" ] && [ ${COUNT} != 0 ] ; do
		if ssh -o "StrictHostKeyChecking no" -o "ConnectTimeout 2" ${SYSUSERNAME}@${SSH_HOST} 'echo -n ""' 2>/dev/null ; then
			OTCI_CAN_SSH=yes
			echo ok.
		else
			COUNT=$(( ${COUNT} - 1 ))
			CYCLES=$(( ${CYCLES} + 1 ))
			sleep 1
			echo -n "."
		fi
	done
	ssh -o "StrictHostKeyChecking no" -o "ConnectTimeout 2" ${SYSUSERNAME}@${SSH_HOST} 'echo -n ""' 1>/dev/null 2>/dev/null
}

kill_all_running_vm () {
	green_echo "===> Killing all VMs and IPMI SIM..."
	for i in $(find /var/run/oci-poc/ -iname '*.pid') ; do
		kill $(cat $i) || true
		rm -f $i
	done
}

restore_all_vm_from_hdd () {
	green_echo "===> Restoring backups of .qcow2 files..."
	cp -v /var/lib/openstack-cluster-installer-poc/saved/${CLUSTER_NAME}/*.qcow2 /var/lib/openstack-cluster-installer-poc/runtime/
	cp -v /var/lib/openstack-cluster-installer-poc/saved/${CLUSTER_NAME}/*.conf /var/lib/openstack-cluster-installer-poc/ipmi_sim/
}

start_all_vms () {
	green_echo "===> Starting VMs..."
	green_echo "---> Starting PXE server"

	start-stop-daemon \
		--start \
		--quiet \
		--background \
		--pidfile ${PID_DIR}/pxe-server-node.pid.ipmisim.pid \
		--make-pidfile \
		--startas /usr/bin/ipmi_sim \
		--      -n \
			-c /var/lib/openstack-cluster-installer-poc/ipmi_sim/pxe-server-node.conf \
			-f /etc/oci-poc/ipmisim1.emu

	green_echo "---> Waiting 20 seconds for the PXE server to be up..."
	sleep 20

	VM_NUM_LIST=$(for i in $(ls /var/lib/openstack-cluster-installer-poc/ipmi_sim/slave-node-*.conf) ; do echo $(basename $i) | cut -d- -f3 | cut -d. -f1; done | sort -V | tr '\n' ' ')
	for i in ${VM_NUM_LIST} ; do
		green_echo "---> Starting-up VM $i"
		start-stop-daemon \
			--start \
			--quiet \
			--background \
			--pidfile ${PID_DIR}/slave-node-${i}.pid.ipmisim.pid \
			--make-pidfile \
			--startas /usr/bin/ipmi_sim \
			--      -n \
				-c /var/lib/openstack-cluster-installer-poc/ipmi_sim/slave-node-${i}.conf \
				-f /etc/oci-poc/ipmisim1.emu
		sleep 1
	done
}

wait_for_ssh_of_pxe_and_controllers_or_sqls () {
	wait_for_ssh oci
	for i in $(ocicli -csv machine-list -s | q -H -d, "SELECT Cur_ip FROM - WHERE role='controller' OR role='sql' OR role='sqlmsg'") ; do
		wait_for_ssh $i
	done
}

start_one_galera_cluster () {
	local ORDERED_NODE_LIST FIRST_NODE
	ORDERED_NODE_LIST="${1}"

	green_echo "===> Restarting galera on nodes: ${ORDERED_NODE_LIST}"

	FIRST_NODE=yes
	for i in ${ORDERED_NODE_LIST} ; do
		if [ "${FIRST_NODE}" = "yes" ] ; then
			green_echo "---> Starting galera_new_cluster on host $i"
			ssh $i "sed -i 's/safe_to_bootstrap: 0/safe_to_bootstrap: 1/' /var/lib/mysql/grastate.dat"
			ssh $i galera_new_cluster || true
			if ! ssh $i "[ -e /var/run/mysqld/mysqld.pid ]" ; then
				red_echo "---> Failed: trying a 2nd time on host $i"
				ssh $i galera_new_cluster || true
				if ! ssh $i "[ -e /var/run/mysqld/mysqld.pid ]" ; then
					red_echo "---> Failed: trying a 3nd time on host $i"
					ssh $i galera_new_cluster || true
					if ! ssh $i "[ -e /var/run/mysqld/mysqld.pid ]" ; then
						red_echo "Giving up..."
						exit 1
					fi
				fi
			fi
			FIRST_NODE=no
		else
			green_echo "---> Starting MySQL on host $i"
			ssh $i "systemctl start mysql"
		fi
		green_echo "---> Waiting 2 seconds"
		sleep 2
	done
}

start_galera_service () {
	local CLUSTER_LIST SQLS CTRLS CLUSTER
	CLUSTER_LIST=$(ocicli -csv cluster-list | q -H -d, "SELECT name FROM -")

	for CLUSTER in ${CLUSTER_LIST} ; do
		green_echo "===> Restarting galera cluster(s) on cluster ${CLUSTER}"
		SQLS=$(ocicli -csv machine-list --filter role=sql,cluster_name=${CLUSTER} | q -H -d, "SELECT hostname FROM -" 2>/dev/null | tr '\n' ' ' | sed 's/ $//')
		if [ -z "${SQLS}" ] ; then
			CTRLS=$(ocicli -csv machine-list --filter role=controller,cluster_name=${CLUSTER} | q -H -d, "SELECT hostname FROM -" 2>/dev/null | tr '\n' ' ' | sed 's/ $//')
			start_one_galera_cluster "${CTRLS}"
		else
			start_one_galera_cluster "${SQLS}"
		fi
		SQLMSGS=$(ocicli -csv machine-list --filter role=sqlmsg,cluster_name=${CLUSTER} | q -H -d, "SELECT hostname FROM -" 2>/dev/null | tr '\n' ' ' | sed 's/ $//')
		if [ -z "${SQLMSGS}" ] ; then
			MSGS=$(ocicli -csv machine-list --filter role=messaging,cluster_name=${CLUSTER} | q -H -d, "SELECT hostname FROM -" 2>/dev/null | tr '\n' ' ' | sed 's/ $//')
			if [ -n "${SQLMSGS}" ] ; then
				start_one_galera_cluster ${MSGS}
			fi
		else
			start_one_galera_cluster "${SQLMSGS}"
		fi
	done
}

restart_all_services_on_cluster () {
	local CLUSTER
	CLUSTER=${1}
	green_echo "===> Restarting all services on controllers"
	for i in $(ocicli -csv machine-list -a | q -H -d, "SELECT hostname,Cur_ip FROM - WHERE role='controller' AND cluster='${CLUSTER}'") ; do
		HOST_NAME=$(echo $i | cut -d, -f1)
		HOST_IP=$(echo $i | cut -d, -f2)
		green_echo "---> Restarting services on ${HOST_NAME}"
		ssh ${HOST_IP} "oci-restart-all-services"
	done

	green_echo "===> Restarting all services on network nodes"
	for i in $(ocicli -csv machine-list -a | q -H -d, "SELECT hostname,Cur_ip FROM - WHERE role='network' AND cluster='${CLUSTER}'") ; do
		HOST_NAME=$(echo $i | cut -d, -f1)
		HOST_IP=$(echo $i | cut -d, -f2)
		green_echo "---> Restarting services on ${HOST_NAME}"
		ssh ${HOST_IP} "oci-restart-all-services"
	done

	green_echo "===> Restarting all services on compute nodes"
	for i in $(ocicli -csv machine-list -a | q -H -d, "SELECT hostname,Cur_ip FROM - WHERE role='compute' AND cluster='${CLUSTER}'") ; do
		HOST_NAME=$(echo $i | cut -d, -f1)
		HOST_IP=$(echo $i | cut -d, -f2)
		green_echo "---> Restarting services on ${HOST_NAME}"
		ssh ${HOST_IP} "oci-restart-all-services"
	done
}

restart_all_services () {
	restart_all_services_on_cluster cl2
	restart_all_services_on_cluster cl1
	restart_all_services_on_cluster cl3
}

restart_all_vms () {
	green_echo "===> Rebooting all VMs of the cluster"
	CTLIP=$(ocicli -csv machine-list -s | q -H -d, "SELECT Cur_ip FROM - WHERE role='controller'" | head -n 1)
	for i in $(ssh ${CTLIP} ". oci-openrc ; openstack server list --all-projects --format value -c ID") ; do
		green_echo "---> Issuing: openstack server reboot $i"
		ssh ${CTLIP} ". oci-openrc ; openstack server reboot $i"
	done
}

restart_puppet_server () {
	green_echo "===> Restarting puppetserver"
	ssh root@oci "if [ -e /lib/systemd/system/puppet-master.service ] ; then systemctl restart puppet-master.service ; else systemctl restart puppetserver.service ; fi" || true
}

kill_all_running_vm
restore_all_vm_from_hdd
start_all_vms
wait_for_ssh_of_pxe_and_controllers_or_sqls
start_galera_service
restart_puppet_server
restart_all_services
restart_all_vms
