monitoring/check_local_webservice.sh

485 lines
12 KiB
Bash
Executable File

#!/usr/bin/env bash
script_name="$(basename $(realpath $0))"
working_dir="$(dirname $(realpath $0))"
conf_file="${working_dir}/conf/${script_name%%.*}.conf"
LOCK_DIR="/tmp/${script_name%%.*}.LOCK"
RESTART_CHECK_FILE="/tmp/${script_name%%.*}.NEED-RESTART"
declare -a alert_email_arr
# -------------
# --- Some functions
# -------------
clean_up() {
# Perform program exit housekeeping
rm -rf "$LOCK_DIR"
if $LOGGING ; then
echo ""
fi
exit $1
}
echononl(){
if $terminal && $LOGGING ; then
echo X\\c > /tmp/shprompt$$
if [ `wc -c /tmp/shprompt$$ | awk '{print $1}'` -eq 1 ]; then
echo -e -n "$*\\c" 1>&2
else
echo -e -n "$*" 1>&2
fi
rm /tmp/shprompt$$
fi
}
fatal(){
echo ""
if $terminal ; then
echo -e "[ \033[31m\033[1mError\033[m ]: $*"
echo ""
echo -e "\t\033[31m\033[1mScript was interupted\033[m!"
else
echo " [ Fatal ]: $*"
echo ""
echo " Script was terminated...."
fi
echo ""
clean_up 1
}
error (){
echo ""
if $terminal ; then
echo -e "\t[ \033[31m\033[1mError\033[m ]: $*"
else
echo "[ Error ]: $*"
fi
echo ""
}
warn (){
echo ""
if $terminal ; then
echo -e "\t[ \033[33m\033[1mWarning\033[m ]: $*"
else
echo "[ Warning ]: $*"
fi
echo ""
}
warn_only_terminal () {
if $terminal ; then
echo ""
echo -e "\t[ \033[33m\033[1mWarning\033[m ]: $*"
echo ""
fi
}
echo_done() {
if $terminal && $LOGGING ; then
echo -e "\033[75G[ \033[32mdone\033[m ]"
else
if $LOGGING ; then
echo " [ done ]"
fi
fi
}
echo_ok() {
if $terminal && $LOGGING ; then
echo -e "\033[75G[ \033[32mok\033[m ]"
else
if $LOGGING ; then
echo " [ ok ]"
fi
fi
}
echo_failed(){
if $terminal && $LOGGING ; then
echo -e "\033[75G[ \033[1;31mfailed\033[m ]"
else
if $LOGGING ; then
echo " [ failed ]"
fi
fi
}
echo_skipped() {
if $terminal && $LOGGING ; then
echo -e "\033[75G[ \033[33m\033[1mskipped\033[m ]"
else
if $LOGGING ; then
echo " [ skipped ]"
fi
fi
}
is_number() {
return $(test ! -z "${1##*[!0-9]*}" > /dev/null 2>&1);
# - also possible
# -
#[[ ! -z "${1##*[!0-9]*}" ]] && return 0 || return 1
#return $([[ ! -z "${1##*[!0-9]*}" ]])
}
trim() {
local var="$*"
var="${var#"${var%%[![:space:]]*}"}" # remove leading whitespace characters
var="${var%"${var##*[![:space:]]}"}" # remove trailing whitespace characters
echo -n "$var"
}
blank_line() {
if $terminal ; then
echo ""
fi
}
reboot_system() {
# content_type='Content-Type: text/plain;\n charset="utf-8"'
# datum="$(date +"%d.%m.%Y")"
# from_address="root@$(hostname --long)"
# msg="S*"
#
#
# for _email in ${alert_email_arr[@]} ; do
#
# echo -e "To:${_email}\n${content_type}\nSubject:[Fatal: Local Webservice NOT rsponding] - Reboot System\n${msg}" \
# | sendmail -F "Error `hostname -f`" -f $sender_address $_email
# done
sleep 10
/sbin/reboot -f > /dev/null 2>&1
}
# -------------
# --- Read Configurations from $conf_file
# -------------
# Some default values
#
DEFAULT_NUMBER_LINES=20
DEFAULT_TIME_OUT=20
DEFAULT_CONFLICTING_SCRIPTS="
/root/bin/mysql/optimize_mysql_tables.sh
/root/bin/mysql/optimize_mysql_tables-ND.sh
/root/bin/monitoring/check_webservice_load.sh
"
if [[ ! -f "$conf_file" ]]; then
echo ""
echo -e " [ Fatal ] Configuration file '$(basename ${conf_file})' not found!"
echo ""
echo -e "\tScript terminated.."
echo ""
exit 1
else
source "$conf_file"
fi
for _email in $alert_email_addresses ; do
alert_email_arr+=("$_email")
done
[[ -n "$sender_address" ]] || sender_address="check_local_webservice@$(hostname -f)"
[[ -n "$content_type" ]] || content_type='Content-Type: text/plain;\n charset="utf-8"'
[[ -n "$TIME_OUT" ]] || TIME_OUT=$DEFAULT_TIME_OUT
TIME_OUT_MAX="$(expr ${TIME_OUT} + 5)"
[[ -n "$CONFLICTING_SCRIPTS" ]] || CONFLICTING_SCRIPTS="$DEFAULT_CONFLICTING_SCRIPTS"
# -------------
# --- Check some prerequisites
# -------------
# - Running in a terminal?
# -
if [[ -t 1 ]] ; then
terminal=true
LOGGING=true
else
terminal=false
LOGGING=false
fi
# - Stop here, if these give scripts are running
# -
if [[ ${#CONFLICTING_SCRIPTS} -gt 0 ]] ; then
# - Try using a random start delay to prevent (or at least have a small chance) that
# - conflicting scripts will both/all abort if they start at the same time.
# -
# - !! Notice !!
# - This only makes sense if a fixed LOCK directory is used, otherwise the process list
# - (and NOT the LOCK-directory) is used to look for scripts running in parallel.
# -
# - Skip delay if running in an terminal (from copnsole)
# -
if ! $terminal ; then
if [[ "$LOCK_DIR" = "/tmp/${script_name%%.*}.LOCK" ]]; then
_shift="$(( $RANDOM % 10 + 1 ))"
sleep $(( $RANDOM % 25 + $_shift ))
fi
fi
_stop_running=false
for _val in $CONFLICTING_SCRIPTS ; do
IFS=':' read -a _val_arr <<< "${_val}"
_script_name="$(basename ${_val_arr[0]})"
if [[ -n "${_val_arr[1]}" ]] ; then
if [[ "${_val_arr[1]}" = "CHECK_PROCESS_LIST" ]] ; then
check_string_ps="${_val_arr[0]}"
if ps -e f | grep -E "\s+${check_string_ps}" | grep -v grep | grep -v -E "\s+vim\s+" > /dev/null ; then
_stop_running=true
fi
elif [[ -d "${_val_arr[1]}" ]] ; then
_stop_running=true
fi
elif [[ -d "/tmp/${_script_name%%.*}.LOCK" ]]; then
_stop_running=true
fi
if $_stop_running ; then
warn_only_terminal "\033[1m${_script_name}\033[m is currently running, but it conflicts with this script.
Exiting now.."
clean_up 1
fi # if $_stop_running ; then
done # for _val in $CONFLICTING_SCRIPTS ; do
fi # if [[ ${#CONFLICTING_SCRIPTS} -gt 0 ]] ; then
# -------------
# - Job is already running?
# -------------
# - If job already runs, stop execution..
# -
if mkdir "$LOCK_DIR" 2> /dev/null ; then
## - Remove lockdir when the script finishes, or when it receives a signal
trap "clean_up 1" SIGHUP SIGINT SIGTERM
else
datum="$(date +"%d.%m.%Y %H:%M")"
msg="[ Error ]: A previos instance of \"`basename $0`\" seems already be running.\n\n Exiting now.."
echo ""
echo "[ Error ]: A previos instance of that script \"`basename $0`\" seems already be running."
echo ""
echo -e " Exiting now.."
echo ""
for _email in ${alert_email_arr[@]} ; do
echo -e "To:${_email}\n${content_type}\nSubject:Error cronjob `basename $0` -- $datum\n${msg}\n" \
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
done
exit 1
fi
# -------------
# --- Check some further prerequisites
# -------------
# - Systemd supported ?
# -
systemd=$(which systemd)
systemctl=$(which systemctl)
systemd_supported=false
if [[ -n "$systemd" ]] && [[ -n "$systemctl" ]] ; then
systemd_supported=true
fi
if $LOGGING ; then
echo ""
fi
if [[ -n "$LOCAL_WEBSITES_TO_CHECK" ]] ; then
for _site in $LOCAL_WEBSITES_TO_CHECK ; do
echononl " Check site \033[1m$_site\033[m .."
declare -i i=0
while [[ $i -lt 3 ]] ; do
response="$(curl --max-time $TIME_OUT_MAX --connect-timeout $TIME_OUT \
-I -k -L --write-out %{http_code} --silent --output /dev/null $_site 2> ${LOCK_DIR}/err.msg)"
# 200 - OK
# 401 - Unauthorized (response of roundcube webmailer (since version 1.4)
if [[ "$response" -eq 200 ]] || [[ "$response" -eq 401 ]]; then
echo_ok
if [[ -f "$RESTART_CHECK_FILE" ]] ; then
rm -f "$RESTART_CHECK_FILE"
fi
clean_up 0
fi
sleep 2
((i++))
done
echo_failed
done
else
warn "No Website to check given (empty var 'LOCAL_WEBSITES_TO_CHECK')"
clean_up 10
fi
msg_head="\n==========\nSystem logfiles\n==========\n"
msg00="Last entries (20 lines) of \"/var/log/syslog\":"
msg01="==============================================="
msg02=`tail -n 20 /var/log/syslog`
msg03="Last entries (20 lines) of \"/var/log/messages\":"
msg04="================================================="
msg05=`tail -n 20 /var/log/messages`
msg06="Last entries (20 lines) of \"/var/log/auth.log\":"
msg07="================================================="
msg08=`tail -n 20 /var/log/auth.log`
msg09="Last entries (20 lines) of \"/var/log/daemon.log\":"
msg10="================================================="
msg11=`tail -n 20 /var/log/daemon.log`
msg12="Last entries (20 lines) of \"/var/log/kern.log\":"
msg13="================================================="
msg14=`tail -n 20 /var/log/kern.log`
msg15="Lastlog:"
msg16="========"
msg17=`lastlog`
msg18="dmesg:"
msg19="======"
msg20=`dmesg -T`
msg="${msg_head}\n${msg00}\n${msg01}\n${msg02}\n\n${msg03}\n${msg04}\n${msg05}\n\n${msg06}\n${msg07}\n${msg08}\n\n${msg09}\n${msg10}\n${msg11}\n\n${msg12}\n${msg13}\n${msg14}\n\n${msg15}\n${msg16}\n${msg17}\n\n${msg18}\n${msg19}\n${msg20}"
if [[ ${#LOG_FILES_TO_MONITOR} -gt 0 ]] ; then
msg_user_defined="\n==========\nUser defined logfiles\n==========\n"
for _val in $LOG_FILES_TO_MONITOR ; do
IFS=':' read -a _val_arr <<< "${_val}"
_log_file="${_val_arr[0]}"
if [[ -n "${_val_arr[0]}" ]] && is_number "${_val_arr[1]}" ; then
_number_lines=${_val_arr[1]}
else
_number_lines=$DEFAULT_NUMBER_LINES
fi
if [[ -s "${_log_file}" ]] ; then
msg_user_defined="${msg_user_defined}\n---\nLast entries (${_number_lines} lines) of \"${_log_file}\":\n---\n$(tail -n ${_number_lines} ${_log_file})\n"
else
msg_user_defined="${msg_user_defined}\n---\nLast entries (${_number_lines} lines) of \"${_log_file}\":\n---\n-- FILE IS EMPTY --\n"
fi
done
msg_user_defined="${msg_user_defined}\n\n"
else
msg_user_defined=""
fi
if [[ ! -f "$RESTART_CHECK_FILE" ]]; then
touch "$RESTART_CHECK_FILE"
error "The local webservice seems to be down. Some Websites do not respond as expected:"
err_msg="\n[ Warning ]: The local webservice seems to be down.\n\n Some Websites do not respond as expected:\n"
for _site in $LOCAL_WEBSITES_TO_CHECK ; do
if $LOGGING ; then
echo -e "\t \033[1m$_site\033[m"
else
echo " $_site"
fi
err_msg="${err_msg} ${_site}\n"
done
err_msg="${err_msg}\nFor now nothing is to. because its the first time..\n"
datum="$(date +"%d.%m.%Y %H:%M")"
for _email in ${alert_email_arr[@]} ; do
echo -e "To:${_email}\n${content_type}\nSubject:[Warn: Local Webservice NOT rsponding] - Do nothing for now\n$err_msg\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_user_defined}\n${msg}" \
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
done
clean_up 20
else
error "The local webservice is still down. Some Websites still not responding:"
err_msg="\n[ Error ]: The local webservice seems to be down.\n\n Some Websites still not responding:\n"
for _site in $LOCAL_WEBSITES_TO_CHECK ; do
if $LOGGING ; then
echo -e "\t \033[1m$_site\033[m"
else
echo " $_site"
fi
err_msg="${err_msg} ${_site}\n"
done
if $LOGGING ; then
echo -e "\n\033[1mGoing to restart the system NOW..\033[m"
else
echo ""
echo "Going to restart the system NOW.."
fi
err_msg="${err_msg}\nGoing to restart the system..\n"
datum="$(date +"%d.%m.%Y %H:%M")"
for _email in ${alert_email_arr[@]} ; do
echo -e "To:${_email}\n${content_type}\nSubject:[ REBOOT System ]: Local Webservice IS STILL NOT rsponding.\n$err_msg\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_user_defined}\n${msg}" \
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
done
rm -f "$RESTART_CHECK_FILE"
reboot_system
fi
if $LOGGING ; then
echo ""
fi
clean_up 0