#!/bin/ksh
# vim:tabstop=3:syntax=sh:foldmethod=marker
#
# ---------
## Programme : if_error
## Version   : 1.6
## Objet     : paralellise le lancement d'un script au travers d'une fenetre
##             dtterm et analyse le code retour.
## Author    : Cedrick GAILLARD
## Email     : mobidyc @ gmail.com
##----------------------------------------------------------------------------
# 0.1 00-mar-09 - Creation
# 0.9 17-avr-09 - ajout de la limite de process lances en parallelisation.
#               - ajout de la possibilite de paralelliser un proramme donne en
#                 ligne de commande.
#               - possibilite de variabiliser un composant du script donne en
#                 ligne de commande.
#               - possibilite de ne pas exporter la variable ARGUMENT_TO_WORK.
#               - ajout de l'usage et d'exemples.
#               - ajout de controle d'erreurs.
#               - garde maintenant un log de tout ce qui se passe concernant
#                 l'argument concerne en cas d'erreur.
#               - ajout d'un resume des erreurs en fin de programme.
# 1.0 24-avr-09 - suppression du downtime nagios, mis dans le script d'arret.
# 1.1 11-jun-09 - Modification d'un message d'erreur
# 1.2 18-jun-09 - si limite n'est pas specifie, positionnement par defaut a 10
#                 ajout d'un parseur qui recherche les RVAL dans le script a
#               - modification du repertoire ou sont deposes les fichier .RVAL
#               - lancer pour les afficher avant le lancement du script.
# 1.3 09-sep-09 - ajout du chemin complet vers le binaire ps.
# 1.5 13-sep-10 - utilise maintenant le shell courant en tache de fond pour
#                 travailler, plus besoin d'exporter un DISPLAY.
#                 ajout de l'argument --dtterm pour utiliser dtterm.
# 1.6 09-mar-11 - modification du check display si ddterm n'est pas utilisé.
#                 
#-----------------------------------------------------------------------------

Usage () { #{{{
cat <<EOF
Usage:
   ${0} (-h | --help) (--dtterm) -e PROGRAM (-l LIMIT) (-n) [-f FILE ] [Argument1 ( ... ArgumentX ... )]"

   -h             Display usage
   --help         Display usage
   -e PROGRAM     Paralellize the program PROGRAM
   -l LIMIT       limit the paralellization to this value
   -n             Does not export the \$ARGUMENT_TO_WORK variable to PROGRAM
   --dtterm       use dtterm windows instead of the terminal
   -x VAR_NAME    Substitue each VAR_NAME occurences by arguments
   -f FILE        FILE contains an argument list
   
PROGRAM must be only one program, eventually followed by args.
   in that case, you must quote it.

Utility:
   ${0} will run multiple forks of PROGRAM. there will be as many fork as arguments, each fork can use
        the \$ARGUMENT_TO_WORK variable, which is the current argument.
        
   The first utility for this script was to paralellize a script, which takes a server name in argument,
   establish a connection to this server an run some commands, need a complete log and a resume in case
   of errors.
   but this script can deserve you a lot of other needs.

Example:
   6 forks and a maximum of 4 simultaneous run of xterm, each xterm will have the current argument in title

   # ${0} -l 4 -e "xterm -title FOOBAR -e sleep 2" -x FOOBAR A B C D E F

Example: 
    # SERVER_LIST="machineA machineB machineB.dev.par.emea.cib"
    # ${0} -l 2 -e "ssh -o StrictHostKeyChecking=no FOOBAR uname -n" -x FOOBAR \$SERVER_LIST
    machineB : En erreur
    machineA : finished
    machineB.dev.par.emea.cib : Fait!

    Resume des erreurs :
    machineB : Erreur, Rval = 255 - voir le fichier /var/tmp/machineB.255.RVAL

    # cat machineB.RVAL
    ssh: machineB: node name or service name not known
    255

Example:
   using the --dtterm permits you oo interact with each telnet session
   SERVER_LIST="machineA machineB machineB.dev.par.emea.cib"
   # ${0} --dtterm -e "telnet TOTOTO-lc.adm" -x TOTOTO -l 4 \$SERVER_LIST
EOF
} #}}}

#SIZE="-geometry 100x10"
export SIZE=

[ -z "$1" ] && { #{{{
	echo "${0} (-h | --help) (--dtterm) -e PROGRAM (-l LIMIT) (-n) [-f FILE ] [Argument1 ( ... ArgumentX ... )]"
	echo 

	echo "to mass run a script on multiple servers :"
	echo "$0 -l 10 -e '${0%/*}/stop_server.sh' -f /tmp/server_liste.txt"
	echo "$0 -l 10 -e '${0%/*}/stop_server.sh' Server1 [ Server2 ... ServerX ]"
	echo "$0 -l 10 -e '${0%/*}/stop_server.sh' Server1 [ Server2 ... ServerX ]"
	echo "$0 -l 10 -e '${0%/*}/restart_server.sh' Serveur1 [ Serveur2 ... ServeurX ]"
	echo "$0       -e '${0%/*}/restart_server.sh' Serveur1 [ Serveur2 ... ServeurX ]"
	echo "$0       -e '${0%/*}/mass_downtime.sh' Serveur1 [ Serveur2 ... ServeurX ]"
	echo
	echo "To display detailed help :"
	echo "$0 -h"
} #}}}

stop_prg () { #{{{
	[ -n "$SERV_ERR" ] && {
		echo
		echo "Errors detected :"

		for SERV_IN_ERR in $SERV_ERR
		do
			Serv="${SERV_IN_ERR%:*}"
			Rval="${SERV_IN_ERR#*:}"

			if [ "$Rval" = "BAD" ]
				then
					echo "$Serv : Unknown error"
			elif [ -n "$(echo $Rval |sed 's/[0-9]//g')" ]
				then
					echo "$Serv : look at the ${TMPRVAL}/${Serv}.RVAL file"
				else
					echo "$Serv : ERROR: Rval=$Rval - look at the ${TMPRVAL}/${Serv}.${Rval}.RVAL file"
			fi
		done
	}

	if [ "$DEBUG" = "true" ]
		then
			echo "Logs: $TMPDIR"
		else
			rm -rf "$TMPDIR"
	fi

	exit
} #}}}

trap stop_prg 1 2 3 11 13 15
DEBUG=

echo -- " --> Analysing arguments"
while [ "$#" -ne "0" ] #{{{
do
	case $1
	in
		-e)	COMMANDS=$2
				shift 2
				;;
		-h|--help) \
				Usage
				exit
				;;
		-l)	LIMIT=$2
				shift 2
				;;
		-n)	VAR_EXPORT=false
				shift
				;;
		--dtterm)	V_WINTERM=true
					shift
				;;
		-x)	VAR_TO_USE=$2
				shift 2
				;;
		-f)	ARGUMENTS="$ARGUMENTS $(< $2)"
				shift 2
				;;
		*)		ARGUMENTS="$ARGUMENTS $1"
				shift
				;;
	esac
done #}}}
echo -- " --> End of analysing arguments"

RACINE="$(cd ${0%/*} && pwd)" || {
	echo "ERROR: can't get my racine directory"
	exit 1
}

[ -z "$DISPLAY" -a "$V_WINTERM" = "true" ] && {
	echo "ERREUR : your DISPLAY is not set"
	exit 1
}

DTTERM="/usr/dt/bin/dtterm"
V_DATE="$(date +%Y%m%d-%Hh%Ms%S)"

cd $RACINE

# default simultaneous limit
[ -z "$LIMIT" -o "$LIMIT" -lt "1" ] && LIMIT="10"

TMPDIR="/var/tmp/${0##*/}.${RANDOM}.${$}"
[ -d "${TMPDIR}" ] && {
	echo "ERROR : The temporary folder alreacy exists : ${TMPDIR}"
	exit 1
}
mkdir -p "${TMPDIR}"

TMPRVAL="/var/tmp"
[ -d "${TMPRVAL}" ] || mkdir -p "${TMPRVAL}"

# if PROGRAM has defined RVAL in header, we display them
egrep "^# RVAL - [0-9]*[0-9]  - " "$(echo ${COMMANDS} |awk '{print $1}')" 2>/dev/null
echo

[ -z "$COMMANDS" ] && {
	echo "no command found to execute!"
	exit 1
}

run_thread () { #{{{
	ARGUMENT_TO_WORK=$1
	[ "$VAR_EXPORT" != "false" ] && export ARGUMENT_TO_WORK

	if [ -n "$VAR_TO_USE" ]
		then
			COMMAND="$(echo "$COMMANDS" |sed "s@$VAR_TO_USE@$ARGUMENT_TO_WORK@g")"
		else
			COMMAND="$COMMANDS"
	fi

	echo "#! /bin/ksh" > "${TMPDIR}/${ARGUMENT_TO_WORK}.sh"
	echo "$COMMAND"   >> "${TMPDIR}/${ARGUMENT_TO_WORK}.sh"
	echo "echo \$?"   >> "${TMPDIR}/${ARGUMENT_TO_WORK}.sh"
	echo "exit"       >> "${TMPDIR}/${ARGUMENT_TO_WORK}.sh"
	chmod +x "${TMPDIR}/${ARGUMENT_TO_WORK}.sh"

	cp /dev/null ${TMPRVAL}/${ARGUMENT_TO_WORK}.RVAL
	if [ "$V_WINTERM" = "true" ]
		then
			${DTTERM} -kshMode $SIZE \
				-title ${ARGUMENT_TO_WORK} \
				-l -lf ${TMPRVAL}/${ARGUMENT_TO_WORK}.RVAL -geometry 100x10 \
				-e ${TMPDIR}/${ARGUMENT_TO_WORK}.sh &
		else
			(exec ${TMPDIR}/${ARGUMENT_TO_WORK}.sh ${ARGUMENT_TO_WORK}) > ${TMPRVAL}/${ARGUMENT_TO_WORK}.RVAL 2>&1 &
	fi

	PIDS="$PIDS $ARGUMENT_TO_WORK:$! "
	CURRENT="$(( $CURRENT + 1 ))"
} #}}}

check_thread () { #{{{
	SHIFT_TO_DO=0
	for PID in ${PIDS}
	do
		# If the PID is still running, go next
		[ -n "$(/bin/ps -e -o pid |grep "^[ ]*${PID#*:}$")" ] && continue

		# from here, the process is finished, we can restart another
		[ -n "$1" ] && {
			run_thread $1
			shift 1
			SHIFT_TO_DO=$(( $SHIFT_TO_DO + 1 ))
		}

		INDEX="$(( $INDEX + 1 ))"
		CURRENT="$(( $CURRENT - 1 ))"

		RVAL="$(tail -1 ${TMPRVAL}/${PID%:*}.RVAL)"
		[ "$V_WINTERM" = "true" ] && RVAL="$(echo "$RVAL" |awk '{print $1}' |sed 's///')"

		if [ "$RVAL" = "0" ]
			then
				echo "${PID%:*} (${INDEX}/${NBR_ARGS} - $(echo "${INDEX} ${OPER}" |/usr/bin/bc)%) : finished"
				rm -f ${TMPRVAL}/${PID%:*}.RVAL
			else
				mv "${TMPRVAL}/${PID%:*}.RVAL" "${TMPRVAL}/${PID%:*}.${RVAL}.RVAL"
				echo "${PID%:*} (${INDEX}/${NBR_ARGS} - $(echo "${INDEX} ${OPER}" |/usr/bin/bc)%) : ERROR: Rval=$RVAL - take a look at the ${TMPRVAL}/${PID%:*}.${RVAL}.RVAL file"
				SERV_ERR="$SERV_ERR ${PID%:*}:${RVAL}"
		fi

		PIDS="$(echo "${PIDS}" |sed -e "s@\<${PID}\>@@" -e 's@  @ @g')"
	done
} #}}}

set -- $ARGUMENTS

NBR_ARGS=${#}
if [ "$NBR_ARGS" -eq "100" ]
	then
		OPER=""
elif [ "$NBR_ARGS" -lt "100" ]
	then
		OPER="* $(echo "scale=2 ; 100 / ${NBR_ARGS}" |bc -l)"
	else
		OPER="/ $(echo "scale=2 ; ${NBR_ARGS} / 100" |bc -l)"
fi
INDEX=0
CURRENT=0
LOOP=0

echo -- " --> We have ${NBR_ARGS} Args to run"
echo -- " --> LIMIT defined to ${LIMIT}"

while [ : ]
do
	[ "${#}" -ne "0" -a "$CURRENT" -le "$LIMIT" ] && {
		run_thread $1 
		shift 1
	}

	NBR_PIDS=$(echo "${PIDS}" |wc -w)
	[ "${NBR_PIDS}" -eq "0" ] && break

	[ "${#}" -eq "0" ] && /usr/bin/perl -e 'select(undef,undef,undef,.1)'
	[ "$CURRENT" -eq "${LIMIT}" ] && /usr/bin/perl -e 'select(undef,undef,undef,.1)'
	

	[ "${#}" -gt "${LIMIT}" -a "${NBR_PIDS}" -lt "$(echo "${LIMIT} / 2" |/usr/bin/bc)" ] && {
		while [ "${#}" -ne "0" -a "$CURRENT" -lt "$LIMIT" ]
		do
			run_thread $1
			shift 1
		done
	}

	check_thread ${*}

	[ "$SHIFT_TO_DO" -gt "0" ] && shift $SHIFT_TO_DO
done

stop_prg

