monitoring/check_systemd_service.sh

342 lines
8.0 KiB
Bash
Executable File

#!/usr/bin/env bash
LOCK_DIR="/tmp/$(basename $0).$$.LOCK"
log_file="${LOCK_DIR}/${script_name%%.*}.log"
CONFLICTING_SCRIPTS="check_sympa_spool_msg_dir.sh"
#---------------------------------------
#-----------------------------
# Base Function(s)
#-----------------------------
#---------------------------------------
usage() {
[[ -n "$1" ]] && error "$1"
[[ $terminal ]] && echo -e "
\033[1mUsage:\033[m
$(basename $0) <Systemd-Service>
\033[1mDescription\033[m
Script checks if given service is running and tries to restart service
if it is not running.
\033[1mOptions\033[m
No Options available
\033[1mExample:\033[m
Check if apache2 service is runnin. Restart service if needed.
$(basename $0) apache2
"
clean_up 1
}
clean_up() {
# Perform program exit housekeeping
rm -rf "$LOCK_DIR"
blank_line
exit $1
}
fatal(){
echo ""
if $terminal ; then
echo -e " [ \033[31m\033[1mFatal\033[m ] $*"
else
echo -e " [ Fatal ] $*"
fi
echo ""
if $terminal ; then
echo -e " \033[1mScript terminated\033[m.."
else
echo -e " Script terminated.."
fi
clean_up 1
}
error (){
echo ""
if $terminal ; then
echo -e " [ \033[31m\033[1mError\033[m ] $*"
else
echo " [ Error ] $*"
fi
echo ""
}
warn (){
if $LOGGING || $terminal ; then
echo ""
if $terminal ; then
echo -e " [ \033[33m\033[1mWarn\033[m ] $*"
else
echo " [ Warn ] $*"
fi
echo ""
fi
}
info (){
if $LOGGING || $terminal ; then
echo ""
if $terminal ; then
echo -e " [ \033[32m\033[1mInfo\033[m ] $*"
else
echo " [ Info ] $*"
fi
echo ""
fi
}
ok (){
if $LOGGING || $terminal ; then
echo ""
if $terminal ; then
echo -e " [ \033[32m\033[1mOk\033[m ] $*"
else
echo " [ Ok ] $*"
fi
echo ""
fi
}
blank_line() {
if $terminal ; then
echo ""
fi
}
trim() {
local var="$*"
var="${var#"${var%%[![:space:]]*}"}" # remove leading whitespace characters
var="${var%"${var##*[![:space:]]}"}" # remove trailing whitespace characters
echo -n "$var"
}
# - Running in a terminal?
# -
if [[ -t 1 ]] ; then
terminal=true
LOGGING=true
else
terminal=false
LOGGING=false
fi
# ----------
# - Jobhandling
# ----------
# - Run 'clean_up' for signals SIGHUP SIGINT SIGTERM
# -
trap clean_up SIGHUP SIGINT SIGTERM
# - Create lock directory '$LOCK_DIR"
#
mkdir "$LOCK_DIR"
# - Stop here, if these give scripts are running
# -
if [[ ${#CONFLICTING_SCRIPTS} -gt 0 ]] ; then
# - Try using a random start delay to prevent (or at least have a small chance) that
# - conflicting scripts will both/all abort if they start at the same time.
# -
# - !! Notice !!
# - This only makes sense if a fixed LOCK directory is used, otherwise the process list
# - (and NOT the LOCK-directory) is used to look for scripts running in parallel.
# -
# - Skip delay if running in an terminal (from copnsole)
# -
if ! $terminal ; then
if [[ "$LOCK_DIR" = "/tmp/${script_name%%.*}.LOCK" ]]; then
_shift="$(( $RANDOM % 10 + 1 ))"
sleep $(( $RANDOM % 25 + $_shift ))
fi
fi
_stop_running=false
for _val in $CONFLICTING_SCRIPTS ; do
IFS=':' read -a _val_arr <<< "${_val}"
_script_name="$(basename ${_val_arr[0]})"
if [[ -n "${_val_arr[1]}" ]] ; then
if [[ "${_val_arr[1]}" = "CHECK_PROCESS_LIST" ]] ; then
check_string_ps="${_val_arr[0]}"
if ps -e f | grep -E "\s+${check_string_ps}" | grep -v grep | grep -v -E "\s+vim\s+" > /dev/null ; then
_stop_running=true
fi
elif [[ -d "${_val_arr[1]}" ]] ; then
_stop_running=true
fi
elif [[ -d "/tmp/${_script_name%%.*}.LOCK" ]]; then
_stop_running=true
fi
if $_stop_running ; then
echo ""
echo "[ Error ]: The \"${_script_name}\" script is currently running, but it conflicts with this script."
echo ""
echo " Exiting now.."
echo ""
clean_up 1
fi # if $_stop_running ; then
done # for _val in $CONFLICTING_SCRIPTS ; do
fi # if [[ ${#CONFLICTING_SCRIPTS} -gt 0 ]] ; then
#---------------------------------------
#-----------------------------
# Check some prerequisites
#-----------------------------
#---------------------------------------
if [[ -n "$1" ]] ; then
service_name=$1
else
error "No Service given!"
usage
fi
# - Systemd supported ?
# -
systemd=$(which systemd)
systemctl=$(which systemctl)
systemd_supported=false
if [[ -n "$systemd" ]] && [[ -n "$systemctl" ]] ; then
systemd_supported=true
else
fatal "Systemd is not present!"
fi
if systemctl -t service list-unit-files \
| grep -e "^${service_name,,}d" \
| grep -q -E "(enabled|disabled)" 2> /devnull ; then
SYSTEMD_SERVICE="$(systemctl -t service list-unit-files | grep -e "^${service_name,,}d" | awk '{print$1}' | head -1)"
elif systemctl -t service list-unit-files \
| grep -e "^${service_name,,}" \
| grep -q -E "(enabled|disabled)" 2> /devnull ; then
SYSTEMD_SERVICE="$(systemctl -t service list-unit-files | grep -e "^${service_name,,}" | awk '{print$1}' | head -1)"
fi
if [[ -z "$SYSTEMD_SERVICE" ]]; then
fatal "Systemd Service '${service_name}' not found!"
fi
#---------------------------------------
#-----------------------------
# Check if service is running
#-----------------------------
#---------------------------------------
if $LOGGING ; then
declare -i _length="${#service_name}"
echo -e "\n Check if $service_name service is running.."
echo -en " ==============================="
declare -i i=0
while [[ $i -lt $_length ]] ; do
echo -n "="
((i++))
done
echo ""
fi
need_restart=false
if $(systemctl is-active $SYSTEMD_SERVICE > /dev/null 2>&1) ; then
eval $(systemctl show -p MainPID ${SYSTEMD_SERVICE})
if [[ -n "$MainPID" ]] && [[ $MainPID -gt 0 ]]; then
if $(ps ax | grep -q -E "^\s*${MainPID} " 2> /dev/null) ; then
ok "$service_name service is up and running."
else
error "$service_name service seems to be down! Trying to restart service now.."
need_restart=true
fi
else
ok "$service_name service is up and running."
warn "Cannot determine MainPID for Service '$service_name', but the status is 'active'."
fi
else
error "$service_name service seems to be down! Trying to restart service now.."
need_restart=true
fi
if $need_restart ; then
$systemctl daemon-reload > /dev/null 2> $log_file
if [[ $? -ne 0 ]]; then
error "$(cat $log_file)"
fi
sleep 2
$systemctl stop $SYSTEMD_SERVICE > /dev/null 2> $log_file
if [[ $? -ne 0 ]]; then
error "$(cat $log_file)"
fi
sleep 10
$systemctl start $SYSTEMD_SERVICE > /dev/null 2> $log_file
if [[ $? -ne 0 ]]; then
error "$(cat $log_file)"
fi
sleep 5
if $(systemctl is-active $SYSTEMD_SERVICE > /dev/null 2>&1) ; then
eval $(systemctl show -p MainPID ${SYSTEMD_SERVICE})
if [[ -n "$MainPID" ]] && [[ $MainPID -gt 0 ]]; then
if $(ps ax | grep -q -E "^\s*${MainPID} " 2> /dev/null) ; then
ok "$service_name service is now up and running. New PID is '$MainPID'"
if ! $terminal ; then
echo " [ Ok ] service is now up and running. New PID is '$MainPID'"
fi
else
error "Restarting $service_name service failed!"
fi
else
warn "Cannot determine MainPID for Service '$service_name', but the status is 'active'."
ok "$service_name service is up and running."
if ! $terminal ; then
echo " [ Ok ] service is now up and running. New PID is '$MainPID'"
echo " [ Warn ] Cannot determine MainPID for Service '$service_name', but the status is 'active'."
fi
fi
else
error "Restarting $service_name service failed!"
fi
fi
clean_up 0