ServerMonitor.sh
Code: Select all
#!/bin/bash
###############################################################################
#
# This script monitors a process (called "server" from now on) that should run forever. It needs an external program to detect server's alive status and another command to (re)start the server.
# It is intended to run from a cron job.
#
# Parameters: (none)
# Exit codes:
# 0 if server is responding or was (re)started successfully
# 1 on wrong variable settings, problems with needed files, failing server termination or starting
#
# History:
# 2017-02-26
# * first release
#
###############################################################################
################ monitoring related variables, prefix MONITOR_ ################
# name of this program
declare -r MONITOR_PROGNAME="ServerMonitor.sh"
# base dir for monitoring program:
declare -r MONITOR_BASEDIR=/home/ut99/ut-server
# Name of the log file for this program: (directory must exist)
declare MONITOR_LOGFILE=$MONITOR_BASEDIR/Logs/$MONITOR_PROGNAME.log
# default log level: see LOG_xxx constants for possible values
declare -i MONITOR_LOGLEVEL=3 # 3 = log infos
# Name of the monitoring control file. If it does not exist or does not contain a "1", this program terminates immediately.
declare MONITOR_CONTROLFILE=$MONITOR_BASEDIR/monitoring
# Name of the file where the PID of the monitoring process is stored.
# If this file exists, it indicates that another instance of this program is running and the monitoring program terminates instantly if a process with the given PID exists.
declare MONITOR_PIDFILE="$MONITOR_BASEDIR/run/$MONITOR_PROGNAME.pid"
# delay time after issuing an external command:
declare -i MONITOR_EXEC_SLEEP=3
################ program for checking server alive status, prefix SERVERALIVE_ ################
# command for checking server alive:
declare SERVERALIVE_CMD="$MONITOR_BASEDIR/utils/ServerAlive.pl"
declare SERVERALIVE_CMD_PARAMS="127.0.0.1 7778"
# how many times should SERVERALIVE_CMD be executed additionally before proceeding?
declare -i SERVERALIVE_TIMEOUT_TRIES=3
# duration of the pause between multiple SERVERALIVE_CMD
declare -i SERVERALIVE_TIMEOUT_SEC=3
################ server related variables, prefix SERVER_ ################
# Base directoy for server:
declare SERVER_BASEDIR=/home/ut99/ut-server
# Absolute path and name of the binary server's file. The absolute path is needed to distinguish it from possibly other running servers.
declare SERVER_BIN=$SERVER_BASEDIR/System/ucc-bin
# location of server's PID file:
declare SERVER_PIDFILE=$SERVER_BASEDIR/run/ucc.init.lock
# command for starting the server:
declare SERVER_START="$SERVER_BASEDIR/ucc.init"
declare SERVER_START_PARAMS=soft-start
########## internal variables, no changes needed below here ########
# version in date yyyy-mm-dd format
declare -ri MONITOR_VERSION=20170226
# possible log levels:
declare -ri LOG_None=0
declare -ri LOG_Error=1
declare -ri LOG_Warning=2
declare -ri LOG_Info=3
declare -ri LOG_Verbose=4
declare -ri LOG_Debug=5
declare -ri LOG_All=6
# log levels in text form:
declare -ra LOG_TEXT=(LOG_None LOG_Error LOG_Warning LOG_Info LOG_Verbose LOG_Debug LOG_All)
declare PIDCreated=false
declare monitor_force=false
declare monitor_restart=false
declare monitor_terminate=false
declare monitor_logtoconsole=false
declare -i i # generic integer variable
declare -r OPT_CHAR_LOGTOCONSOLE=c
declare -r OPT_CHAR_FORCE=f
declare -r OPT_CHAR_HELP=h
declare -r OPT_CHAR_LOGFILE=L
declare -r OPT_CHAR_RESTART=r
declare -r OPT_CHAR_TERMINATE=t
declare -r OPT_CHAR_VERBOSE=v
declare -r OPT_CHAR_VERSION=V
function help {
echo "$MONITOR_PROGNAME: checks if a running process is responding and restarting it otherwise
Usage: $MONITOR_PROGNAME [-$OPT_CHAR_FORCE$OPT_CHAR_HELP$OPT_CHAR_VERSION][-$OPT_CHAR_VERBOSE level]
With default options the extenal program for alive check is run. If this program fails, all processes with the given PID and binary name are terminated by SIGKILL and/or SIGTERM and a new instance is started.
Options:
-$OPT_CHAR_LOGTOCONSOLE all logging is done on console, LOGFILE is ignored
-$OPT_CHAR_FORCE run program ignoring monitoring control file $MONITOR_CONTROLFILE
-$OPT_CHAR_HELP print this help end exit
-$OPT_CHAR_LOGFILE filename file name for log file (default is \"$MONITOR_LOGFILE\")
-$OPT_CHAR_RESTART terminate all running server and restart an instance
-$OPT_CHAR_TERMINATE terminate all running server and exit
-$OPT_CHAR_VERBOSE level set verbose level. Valid values are"
i=0
while [ $i -le $LOG_All ]; do
echo -e "\t\t\t$i\t${LOG_TEXT[$i]}"
i=$((i+1))
done
echo "-$OPT_CHAR_VERSION print program version and hints"
echo -e "\"$MONITOR_PROGNAME\" comes with NO WARRANTY, to the extent permitted by law. You may redistribute copies of \"$MONITOR_PROGNAME\" under the terms of the GNU General Public License.\nReport bugs to SeriousBarbie AT googlemail DOT com"
}
function ErrorExit {
# param(s): message to put out on stderr
echo "$MONITOR_PROGNAME ERROR: $@" 1>&2
exit 1
}
function LogExit {
# 1. param: exit error level (0=no error, everything else=error)
# 2. param: log level
# 3. param: message
local -i errorlevel=$1
local -i loglevel=$2
local message
shift 2
message=$@
log $loglevel $message
if $PIDCreated; then
rm "$MONITOR_PIDFILE"
PIDCreated=false
log $LOG_Debug "PID file $MONITOR_PIDFILE removed"
fi
log $LOG_Debug "$MONITOR_PROGNAME exiting with errorlevel $errorlevel"
exit $errorlevel
}
function log {
# 1. param: loglevel
# 2. param: message
local loglevel=$1
local message
test $loglevel -le $MONITOR_LOGLEVEL || return
shift
message="$@"
test -z "$message" && ErrorExit "No message given for log function"
message="$(date --rfc-3339=seconds) $message"
if $monitor_logtoconsole; then
echo $message
else
echo $message >> "$MONITOR_LOGFILE"
fi
}
function ProcessExists {
# Checks if a process with the given PID exists.
# 1. param: PID
# Returns 0 if process exists, 1 else.
local -i PID=$1
return $(ps --pid $PID > /dev/null)
}
function IsValidLogLevel {
# 1. param: loglevel as string
local -i i=0
while [ $i -le $LOG_All ]; do
test "$1" = "$i" && return 0
i=$((i+1))
done
return 1
}
function GetNextServerPID {
# Scans the /proc filesystem for a binary given as argument (may not be portable).
# 1. param: full name of binary
# Returns the binaries PID or an empty string if none was found.
local PROCESSFILE="$1"
for f in /proc/*; do
test -d "$f" || continue
test -r "$f/exe" || continue
bin=$(readlink "$f/exe" 2>/dev/null)
test "$bin" = "$PROCESSFILE" || continue
echo $(basename $f)
return 0
done
return 1
}
function ServerTerminate {
# 1. param: PID
# returns nothing but exits with 1 if process could not be terminated
local -ri ServerPID=$1
# check if a process with that PID is running:
if ProcessExists $ServerPID; then
log $LOG_Verbose "process with PID $ServerPID found, trying to terminate it by signal SIGTERM..."
kill -TERM $ServerPID
sleep $MONITOR_EXEC_SLEEP
if ProcessExists $ServerPID; then
log $LOG_Verbose "process with PID $ServerPID is still alive after sending a SIGTERM and waiting for $MONITOR_EXEC_SLEEP seconds, trying to kill it with SIGKILL..."
kill -KILL $ServerPID
sleep $MONITOR_EXEC_SLEEP
if ProcessExists $ServerPID; then
LogExit 1 $LOG_Error "process with PID $ServerPID is still alive after sending a SIGKILL and waiting for $MONITOR_EXEC_SLEEP seconds, exiting!"
else
log $LOG_Info "process with PID $ServerPID successfully killed"
fi
else
log $LOG_Info "process with PID $ServerPID successfully terminated"
fi
else
log $LOG_Verbose "no process with PID $ServerPID found"
fi
}
####### main program #######
while getopts ":$OPT_CHAR_LOGTOCONSOLE$OPT_CHAR_FORCE$OPT_CHAR_HELP$OPT_CHAR_LOGFILE:$OPT_CHAR_RESTART$OPT_CHAR_TERMINATE$OPT_CHAR_VERBOSE:$OPT_CHAR_VERSION" OP; do
case $OP in
$OPT_CHAR_LOGTOCONSOLE)
monitor_logtoconsole=true
log $LOG_Debug "monitor_logtoconsole option set to $monitor_logtoconsole"
;;
$OPT_CHAR_FORCE)
monitor_force=true
log $LOG_Debug "monitor_force option set to $monitor_force"
;;
$OPT_CHAR_VERBOSE)
if $(IsValidLogLevel $OPTARG); then
MONITOR_LOGLEVEL=$OPTARG
log $LOG_Debug "MONITOR_LOGLEVEL set to $MONITOR_LOGLEVEL"
else
ErrorExit "\"$OPTARG\" is not a valid number for logging level"
fi
;;
$OPT_CHAR_RESTART)
monitor_restart=true
log $LOG_Debug "monitor_restart option set to $monitor_restart"
;;
$OPT_CHAR_LOGFILE)
MONITOR_LOGFILE="$OPTARG"
log $LOG_Debug "MONITOR_LOGFILE option set to $MONITOR_LOGFILE"
;;
$OPT_CHAR_TERMINATE)
monitor_terminate=true
log $LOG_Debug "monitor_terminate option set to $monitor_terminate"
;;
$OPT_CHAR_VERSION)
echo $MONITOR_VERSION
exit 0
;;
$OPT_CHAR_HELP)
help
exit 0
;;
*)
test "$OP" = ":" && ErrorExit "option -$OPTARG requires an argument" || ErrorExit "illegal option: -$OPTARG"
esac
done
log $LOG_Verbose "$MONITOR_PROGNAME with log level $MONITOR_LOGLEVEL (${LOG_TEXT[$MONITOR_LOGLEVEL]}) started"
# check varibles and programs:
test -z "$MONITOR_LOGFILE" && ErrorExit "variable MONITOR_LOGFILE not set"
test -d $(dirname "$MONITOR_LOGFILE") || ErrorExit "directory for log file $MONITOR_LOGFILE does not exist"
test -x "$SERVER_START" || ErrorExit "program for server start (SERVER_START=\"$SERVER_START\") does not exist or is not executable"
test -z "$SERVER_START_PARAMS" && server_start=$CMDSERVER_START || server_start="$SERVER_START $SERVER_START_PARAMS"
log $LOG_Debug "command for server start is \"$server_start\""
test -x "$SERVERALIVE_CMD" || ErrorExit "program for detecting server's status (SERVERALIVE_CMD=\"$SERVERALIVE_CMD\" does not exist or is not executable"
test -z "$SERVERALIVE_CMD_PARAMS" && cmdserver_alive=$SERVERALIVE_CMD || cmdserver_alive="$SERVERALIVE_CMD $SERVERALIVE_CMD_PARAMS"
log $LOG_Debug "command for detecting server status is \"$cmdserver_alive\""
if [ $MONITOR_EXEC_SLEEP -lt 0 ]; then
log $LOG_Error "MONITOR_EXEC_SLEEP may not be below 0, reset to 1"
MONITOR_EXEC_SLEEP=1
fi
log $LOG_Debug "MONITOR_EXEC_SLEEP is $MONITOR_EXEC_SLEEP"
# exit, if pid file exists and a running process exists:
if [ -f "$MONITOR_PIDFILE" ]; then
test -r "$MONITOR_PIDFILE" || LogExit 1 $LOG_Error "monitoring program PID file $MONITOR_PIDFILE is not readable, exiting!"
proc_pid="$(cat $MONITOR_PIDFILE)"
log $LOG_Debug "PID file $MONITOR_PIDFILE found with content $proc_pid"
if ProcessExists $proc_pid; then
LogExit 0 $LOG_Debug "running process with PID $proc_pid found, exiting..."
fi
rm "$MONITOR_PIDFILE" || LogExit 1 $LOG_Error "could not remove monitoring program PID file $MONITOR_PIDFILE, exiting!"
else
log $LOG_Debug "monitoring program PID file $MONITOR_PIDFILE does not exist, proceeding"
fi
echo $$ > "$MONITOR_PIDFILE"
if [ -r "$MONITOR_PIDFILE" ]; then
PIDCreated=true
log $LOG_Debug "PID file \"$MONITOR_PIDFILE\" created with PID $(cat "$MONITOR_PIDFILE")"
else
ErrorExit "Could not create PID file $MONITOR_PIDFILE"
fi
if $monitor_force; then
log $LOG_Warning "forced to ignore monitoring control file $MONITOR_CONTROLFILE, proceeding"
else
# terminate, if control file is not readable:
test -r "$MONITOR_CONTROLFILE" || LogExit 0 $LOG_Verbose "monitoring control file $MONITOR_CONTROLFILE does not exist, exiting"
log $LOG_Debug "monitoring control file $MONITOR_CONTROLFILE exists and is readable, proceeding"
# terminate, if control file does not contain "aktive" content:
grep --quiet "1" "$MONITOR_CONTROLFILE" || LogExit 0 $LOG_Verbose "monitoring control file $MONITOR_CONTROLFILE does not contain the value '1', exiting"
log $LOG_Debug "monitoring control file $MONITOR_CONTROLFILE contains '1', proceeding"
fi
# now for the work:
# do an unconditionally terminate?
if ! ($monitor_restart || $monitor_terminate); then
# see if server responds by "cmdserver_alive" for some loops:
i=0
while true; do
$cmdserver_alive >> "$MONITOR_LOGFILE" && LogExit 0 $LOG_Verbose "server alive, exiting"
i=$(($i+1))
test $i -gt $SERVERALIVE_TIMEOUT_TRIES && break
log $LOG_Verbose "server not responding, retrying ($i/$SERVERALIVE_TIMEOUT_TRIES) in $SERVERALIVE_TIMEOUT_SEC seconds"
sleep $SERVERALIVE_TIMEOUT_SEC
done
log $LOG_Warning "server not responding after $((SERVERALIVE_TIMEOUT_TRIES+1)) tries"
monitor_restart=true
else
log $LOG_Verbose "terminating all running server processes is forced..."
fi
# server should be terminated, so try to kill if PID exists:
if [ -f $SERVER_PIDFILE ]; then
test -r $SERVER_PIDFILE || LogExit 1 $LOG_Error "server's PID file $SERVER_PIDFILE exists but is not readable, exiting!"
proc_pid=$(cat "$SERVER_PIDFILE" 2>/dev/null)
log $LOG_Verbose "Server PID file exists and contains PID $proc_pid"
ServerTerminate $proc_pid
log $LOG_Verbose "removing PID file $SERVER_PIDFILE..."
rm "$SERVER_PIDFILE" || LogExit 1 $LOG_Error "could not remove server's PID file $SERVER_PIDFILE, exiting!"
sleep $MONITOR_EXEC_SLEEP
else
log $LOG_Verbose "server's PID file \"$SERVER_PIDFILE\" does not exist"
fi
# lets see if there are processes with given server's binary:
log $LOG_Verbose "looking for processes with binary file $SERVER_BIN..."
i=0
while true; do
proc_pid=$(GetNextServerPID "$SERVER_BIN")
test -z "$proc_pid" && break
log $LOG_Verbose "running process $proc_pid with binary file $SERVER_BIN found, terminating it..."
ServerTerminate $proc_pid
i=$((i+1))
sleep $MONITOR_EXEC_SLEEP
done
log $LOG_Verbose "$i processes with binary file $SERVER_BIN found"
if $monitor_restart; then
log $LOG_Debug "executing \"$server_start\"..."
if $server_start >> "$MONITOR_LOGFILE"; then
LogExit 0 $LOG_Info "\"$server_start\" executed successfully, $MONITOR_PROGNAME finishing"
else
LogExit 1 $LOG_Error "\"$server_start\" failed, $MONITOR_PROGNAME exiting"
fi
else
LogExit 0 $LOG_Info "restarting server is disabled"
fi
Code: Select all
/home/ut99/ut-server/utils/ServerMonitor.sh > /dev/null 2>&1
The detecting program is taken from Wiki; I adjusted that to my needs:
ServerAlive.pl
Code: Select all
#!/usr/bin/perl
###############################################################################
#
# ServerAlive <server> <port>
#
# Queries an Unreal Tournament game server and returns 0 if a response was
# received.
# Exitcode is not zero if an error occoured.
#
###############################################################################
use Socket;
use Sys::Hostname;
use constant TIMEOUT_SEC => 5.0;
$ServerAdress = $ARGV[0]
or die "ServerAdress must be given";
$ServerPort = $ARGV[1]
or die "ServerPort must be given";
#print "ServerAdress: ", $ServerAdress, "\n";
#print "ServerPort: ", $ServerPort, "\n";
$protocol = getprotobyname 'udp';
$addressClient = sockaddr_in 0, scalar gethostbyname hostname;
socket SERVER, PF_INET, SOCK_DGRAM, $protocol
or die "Unable to create socket";
bind SERVER, $addressClient
or die "Unable to bind address";
$addressServer = sockaddr_in $ServerPort, inet_aton $ServerAdress
or die "Server not found";
send SERVER, '\\info\\', 0, $addressServer;
$handleRead = '';
vec($handleRead, fileno SERVER, 1) = 1;
select $handleRead, undef, undef, TIMEOUT_SEC
or die "Query timed out";
recv SERVER, $serverInfo, 0x1000, 0
or die "Error receiving information";
close SERVER;