From 66bf37f24eda39b9106d42a40ea91fe28d65c8c8 Mon Sep 17 00:00:00 2001 From: Christoph Date: Sat, 11 Jun 2022 02:22:59 +0200 Subject: [PATCH] Add script check_local_mattermost_service.sh. --- check_local_mattermost_service.sh | 606 ++++++++++++++++++ ...check_local_mattermost_service.conf.sample | 95 +++ 2 files changed, 701 insertions(+) create mode 100755 check_local_mattermost_service.sh create mode 100644 conf/check_local_mattermost_service.conf.sample diff --git a/check_local_mattermost_service.sh b/check_local_mattermost_service.sh new file mode 100755 index 0000000..cb91cd6 --- /dev/null +++ b/check_local_mattermost_service.sh @@ -0,0 +1,606 @@ +#!/usr/bin/env bash + + +script_name="$(basename $(realpath $0))" +working_dir="$(dirname $(realpath $0))" +conf_file="${working_dir}/conf/${script_name%%.*}.conf" + +LOCK_DIR="/tmp/${script_name%%.*}.LOCK" + +declare -a alert_email_arr + + + +# ------------- +# --- Some functions +# ------------- + +clean_up() { + + # Perform program exit housekeeping + rm -rf "$LOCK_DIR" + if $LOGGING ; then + echo "" + fi + exit $1 +} + +echononl(){ + if $terminal && $LOGGING ; then + echo X\\c > /tmp/shprompt$$ + if [ `wc -c /tmp/shprompt$$ | awk '{print $1}'` -eq 1 ]; then + echo -e -n " $*\\c" 1>&2 + else + echo -e -n " $*" 1>&2 + fi + rm /tmp/shprompt$$ + fi +} + +fatal(){ + echo "" + if $terminal ; then + echo -e "[ \033[31m\033[1mError\033[m ]: $*" + echo "" + echo -e "\t\033[31m\033[1mScript was interupted\033[m!" + else + echo " [ Fatal ]: $*" + echo "" + echo " Script was terminated...." + fi + echo "" + clean_up 1 +} + +error (){ + echo "" + if $terminal ; then + echo -e "\t[ \033[31m\033[1mError\033[m ]: $*" + else + echo "[ Error ]: $*" + fi + echo "" +} + +warn (){ + echo "" + if $terminal ; then + echo -e "\t[ \033[33m\033[1mWarning\033[m ]: $*" + else + echo "[ Warning ]: $*" + fi + echo "" +} + +info (){ + echo "" + if $terminal ; then + echo -e " [ \033[32m\033[1mInfo\033[m ] $*" + else + echo "[ Info ] $*" + fi + echo "" +} + +ok (){ + echo "" + if $terminal ; then + echo -e "\t[ \033[32m\033[1mOk\033[m ]: $*" + else + echo "[ Ok ]: $*" + fi + echo "" +} + +echo_done() { + if $terminal && $LOGGING ; then + echo -e "\033[75G[ \033[32mdone\033[m ]" + else + if $LOGGING ; then + echo " [ done ]" + fi + fi +} +echo_ok() { + if $terminal && $LOGGING ; then + echo -e "\033[75G[ \033[32mok\033[m ]" + else + if $LOGGING ; then + echo " [ ok ]" + fi + fi +} +echo_failed(){ + if $terminal && $LOGGING ; then + echo -e "\033[75G[ \033[1;31mfailed\033[m ]" + else + if $LOGGING ; then + echo " [ failed ]" + fi + fi +} +echo_skipped() { + if $terminal && $LOGGING ; then + echo -e "\033[75G[ \033[33m\033[1mskipped\033[m ]" + else + if $LOGGING ; then + echo " [ skipped ]" + fi + fi +} + +is_number() { + + return $(test ! -z "${1##*[!0-9]*}" > /dev/null 2>&1); + + # - also possible + # - + #[[ ! -z "${1##*[!0-9]*}" ]] && return 0 || return 1 + #return $([[ ! -z "${1##*[!0-9]*}" ]]) +} + +reboot_system() { + +# content_type='Content-Type: text/plain;\n charset="utf-8"' +# datum="$(date +"%d.%m.%Y")" +# from_address="root@$(hostname --long)" +# msg="S*" +# +# +# for _email in ${alert_email_arr[@]} ; do +# +# echo -e "To:${_email}\n${content_type}\nSubject:[Fatal: Local Webservice NOT rsponding] - Reboot System\n${msg}" \ +# | sendmail -F "Error `hostname -f`" -f $sender_address $_email +# done + + sleep 10 + /sbin/reboot -f > /dev/null 2>&1 + +} + +# ------------- +# --- Read Configurations from $conf_file +# ------------- + +# Some default values +# +DEFAULT_NUMBER_LINES=20 +DEFAULT_TIME_OUT=20 + +#DEFAULT_CONFLICTING_SCRIPTS="/root/bin/monitoring/check_webservice_load.sh" + +if [[ ! -f "$conf_file" ]]; then + echo "" + echo -e " [ Fatal ] Configuration file '$(basename ${conf_file})' not found!" + echo "" + echo -e "\tScript terminated.." + echo "" + exit 1 +else + source "$conf_file" +fi + +for _email in $alert_email_addresses ; do + alert_email_arr+=("$_email") +done + +[[ -n "$sender_address" ]] || sender_address="check_local_webservice@$(hostname -f)" +[[ -n "$content_type" ]] || content_type='Content-Type: text/plain;\n charset="utf-8"' + +[[ -n "$TIME_OUT" ]] || TIME_OUT=$DEFAULT_TIME_OUT +TIME_OUT_MAX="$(expr ${TIME_OUT} + 5)" + +#[[ -n "$CONFLICTING_SCRIPTS" ]] || CONFLICTING_SCRIPTS="$DEFAULT_CONFLICTING_SCRIPTS" +[[ -n "$CONFLICTING_SCRIPTS" ]] || CONFLICTING_SCRIPTS="" + + +# ------------- +# --- Check some prerequisites +# ------------- + +# - Running in a terminal? +# - +if [[ -t 1 ]] ; then + terminal=true + LOGGING=true +else + terminal=false + LOGGING=false +fi + + +# - Stop here, if these give scripts are running +# - +if [[ ${#CONFLICTING_SCRIPTS} -gt 0 ]] ; then + + # - Try using a random start delay to prevent (or at least have a small chance) that + # - conflicting scripts will both/all abort if they start at the same time. + # - + # - !! Notice !! + # - This only makes sense if a fixed LOCK directory is used, otherwise the process list + # - (and NOT the LOCK-directory) is used to look for scripts running in parallel. + # - + # - Skip delay if running in an terminal (from copnsole) + # - + if ! $terminal ; then + if [[ "$LOCK_DIR" = "/tmp/${script_name%%.*}.LOCK" ]]; then + _shift="$(( $RANDOM % 10 + 1 ))" + sleep $(( $RANDOM % 25 + $_shift )) + fi + fi + + _stop_running=false + for _val in $CONFLICTING_SCRIPTS ; do + + IFS=':' read -a _val_arr <<< "${_val}" + + _script_name="$(basename ${_val_arr[0]})" + + if [[ -n "${_val_arr[1]}" ]] ; then + + if [[ "${_val_arr[1]}" = "CHECK_PROCESS_LIST" ]] ; then + + check_string_ps="${_val_arr[0]}" + if ps -e f | grep -E "\s+${check_string_ps}" | grep -v grep | grep -v -E "\s+vim\s+" > /dev/null ; then + _stop_running=true + fi + + elif [[ -d "${_val_arr[1]}" ]] ; then + _stop_running=true + fi + + elif [[ -d "/tmp/${_script_name%%.*}.LOCK" ]]; then + _stop_running=true + fi + + if $_stop_running ; then + + echo "" + echo "[ Error ]: The \"${_script_name}\" script is currently running, but it conflicts with this script." + echo "" + echo " Exiting now.." + echo "" + + clean_up 1 + + fi # if $_stop_running ; then + + done # for _val in $CONFLICTING_SCRIPTS ; do + +fi # if [[ ${#CONFLICTING_SCRIPTS} -gt 0 ]] ; then + + +# ------------- +# - Job is already running? +# ------------- + +# - If job already runs, stop execution.. +# - +if mkdir "$LOCK_DIR" 2> /dev/null ; then + + ## - Remove lockdir when the script finishes, or when it receives a signal + trap "clean_up 1" SIGHUP SIGINT SIGTERM + +else + + datum="$(date +"%d.%m.%Y %H:%M")" + + msg="[ Error ]: A previos instance of \"`basename $0`\" seems already be running.\n\n Exiting now.." + + echo "" + echo "[ Error ]: A previos instance of that script \"`basename $0`\" seems already be running." + echo "" + echo -e " Exiting now.." + echo "" + + for _email in ${alert_email_arr[@]} ; do + echo -e "To:${_email}\n${content_type}\nSubject:Error cronjob `basename $0` -- $datum\n${msg}\n" \ + | sendmail -F "Error `hostname -f`" -f $sender_address $_email + done + + exit 1 + +fi + + +# ------------- +# --- Check some further prerequisites +# ------------- + +# - Systemd supported ? +# - +systemd=$(which systemd) +systemctl=$(which systemctl) + + +if $LOGGING ; then + echo "" +fi + +declare -i response=-1 +NGINX_SERVICE_DOWN=false +MM_SERVICE_DOWN=false + +if [[ -n "$LOCAL_MM_WEBSITES_TO_CHECK" ]] ; then + + echononl "Check local mattermost service \033[1m$LOCAL_MM_WEBSITES_TO_CHECK\033[m .." + + declare -i i=0 + + while [[ $i -lt 3 ]] ; do + response="$(curl --max-time $TIME_OUT_MAX --connect-timeout $TIME_OUT \ + -I -k -L --write-out %{http_code} --silent --output /dev/null $LOCAL_MM_WEBSITES_TO_CHECK \ + 2> ${LOCK_DIR}/error.log)" + + # 200 - OK + if [[ $response -eq 200 ]] ; then + echo_ok + clean_up 0 + fi + sleep 2 + ((i++)) + done + echo_failed + if [[ $response -gt 499 ]] ; then + MM_SERVICE_DOWN=true + elif [[ $response -eq 0 ]] || [[ $response -eq -1 ]]; then + NGINX_SERVICE_DOWN=true + + PID="$(ps -e f | grep -E "[[:digit:]]\s+/opt/mattermost/bin/mattermost" | grep -v grep 2> /dev/null)" + [[ -z "$PID" ]] && MM_SERVICE_DOWN=true + + else + NGINX_SERVICE_DOWN=true + MM_SERVICE_DOWN=true + fi + +else + warn "No local Mattermost Service to check is given (empty var 'LOCAL_MM_WEBSITES_TO_CHECK')" + clean_up 10 +fi + +msg_process_list="process list:\n=============\n$(ps -e f)" + +msg_head="\n==========\nSystem logfiles\n==========\n" + +msg00="Last entries (20 lines) of \"/var/log/syslog\":" +msg01="===============================================" +msg02=`tail -n 20 /var/log/syslog` + +msg03="Last entries (20 lines) of \"/var/log/messages\":" +msg04="=================================================" +msg05=`tail -n 20 /var/log/messages` + +msg06="Last entries (20 lines) of \"/var/log/auth.log\":" +msg07="=================================================" +msg08=`tail -n 20 /var/log/auth.log` + +msg09="Last entries (20 lines) of \"/var/log/daemon.log\":" +msg10="=================================================" +msg11=`tail -n 20 /var/log/daemon.log` + + +msg12="Last entries (20 lines) of \"/var/log/kern.log\":" +msg13="=================================================" +msg14=`tail -n 20 /var/log/kern.log` + +msg15="Lastlog:" +msg16="========" +msg17=`lastlog` + +msg18="dmesg:" +msg19="======" +msg20=`dmesg -T` + +msg="${msg_head}\n${msg00}\n${msg01}\n${msg02}\n\n${msg03}\n${msg04}\n${msg05}\n\n${msg06}\n${msg07}\n${msg08}\n\n${msg09}\n${msg10}\n${msg11}\n\n${msg12}\n${msg13}\n${msg14}\n\n${msg15}\n${msg16}\n${msg17}\n\n${msg18}\n${msg19}\n${msg20}" + + +if [[ ${#LOG_FILES_TO_MONITOR} -gt 0 ]] ; then + msg_user_defined="\n==========\nUser defined logfiles\n==========\n" + for _val in $LOG_FILES_TO_MONITOR ; do + + IFS=':' read -a _val_arr <<< "${_val}" + _log_file="${_val_arr[0]}" + if [[ -n "${_val_arr[0]}" ]] && is_number "${_val_arr[1]}" ; then + _number_lines=${_val_arr[1]} + else + _number_lines=$DEFAULT_NUMBER_LINES + fi + + if [[ -s "${_log_file}" ]] ; then + msg_user_defined="${msg_user_defined}\n---\nLast entries (${_number_lines} lines) of \"${_log_file}\":\n---\n$(tail -n ${_number_lines} ${_log_file})\n" + else + msg_user_defined="${msg_user_defined}\n---\nLast entries (${_number_lines} lines) of \"${_log_file}\":\n---\n-- FILE IS EMPTY --\n" + fi + done + msg_user_defined="${msg_user_defined}\n\n" +else + msg_user_defined="" +fi + + +if $NGINX_SERVICE_DOWN ; then + error "NGINX Service seems to be down. Going to restart Service.." + + err_msg="\n[ Error ]: NGINX Service seems to be down. + + Going to restart NGINX Service\n" + + echononl "Stop nginx Service.." + $systemctl stop nginx > /dev/null 2> ${LOCK_DIR}/error.log + if [[ $? -ne 0 ]]; then + echo_failed + error "$(cat ${LOCK_DIR}/error.log)" + else + echo_done + fi + + declare -i counter=0 + PID="$(ps aux | grep -E "[[:digit:]]\s+nginx:" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)" + + while [[ -n "$PID" ]] ; do + + if [[ $counter -gt 3 ]] ; then + break + fatal "Killing remaining nginx Process(es) failed!" + fi + + warn "There are still nginx processes running" + + ((counter++)) + + echononl "${counter}: Kill remaining nginx Process(es).." + if [[ -s "/run/nginx.pid" ]]; then + kill $(cat /run/nginx.pid) > ${LOCK_DIR}/error.log 2>&1 + if [[ $? -ne 0 ]]; then + echo_failed + error "$(cat ${LOCK_DIR}/error.log)" + else + echo_done + fi + rm -f "/run/nginx.pid" > /dev/null 2>&1 + else + killall nginx > ${LOCK_DIR}/error.log 2>&1 + if [[ $? -ne 0 ]]; then + echo_failed + error "$(cat ${LOCK_DIR}/error.log)" + else + echo_done + fi + fi + sleep 1 + PID="$(ps aux | grep -E "[[:digit:]]\s+nginx:" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)" + done + + echononl "Start nginx Service.." + $systemctl start nginx > /dev/null 2> ${LOCK_DIR}/error.log + if [[ $? -ne 0 ]]; then + echo_failed + error "$(cat ${LOCK_DIR}/error.log)" + else + echo_done + fi + + declare -i counter=0 + PID="$(ps aux | grep -E "[[:digit:]]\s+nginx:" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)" + sleep 1 + while [[ "X${PID}" = "X" ]]; do + sleep 1 + PID="$(ps aux | grep -E "[[:digit:]]\s+nginx:" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)" + if [[ $counter -gt 10 ]]; then + break + else + ((counter++)) + fi + done + + if [[ "X${PID}" = "X" ]] ; then + error "Restarting NGINX Service failed!" + + err_msg="${err_msg}\n[ Error ]: Restarting NGINX Service failed!" + + else + ok "NGINX Service is up and running" + + err_msg="${err_msg}\n[ OK ]: NGINX Service is up and running" + fi + +fi + +if $MM_SERVICE_DOWN; then + error "Mattermost Service seems to be down. Going to restart Service.." + + if [[ -n "$err_msg" ]]; then + err_msg="${err_msg}\n\n\n[ Error ]: Mattermost Service seems to be down. + + Going to restart Mattermost Service Service\n" + else + err_msg="\n[ Error ]: Mattermost Service seems to be down. + + Going to restart Mattermost Service Service\n" + fi + + echononl "Stop mattwermost Service.." + $systemctl stop mattermost > /dev/null 2> ${LOCK_DIR}/error.log + if [[ $? -ne 0 ]]; then + echo_failed + error "$(cat ${LOCK_DIR}/error.log)" + else + echo_done + fi + + declare -i counter=0 + PID="$(ps aux | grep -E "[[:digit:]]\s+/opt/mattermost/bin/mattermost" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)" + + while [[ -n "$PID" ]] ; do + + if [[ $counter -gt 3 ]] ; then + break + fatal "Killing remaining mattermost Process(es) failed!" + fi + + warn "There are still mattermost processes running" + + ((counter++)) + + echononl "${counter}: Kill remaining mattermost Process(es).." + killall mattermost > ${LOCK_DIR}/error.log 2>&1 + if [[ $? -ne 0 ]]; then + echo_failed + error "$(cat ${LOCK_DIR}/error.log)" + else + echo_done + fi + sleep 1 + PID="$(ps aux | grep -E "[[:digit:]]\s+/opt/mattermost/bin/mattermost" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)" + done + + echononl "Start mattermost Service.." + $systemctl start mattermost > /dev/null 2> ${LOCK_DIR}/error.log + if [[ $? -ne 0 ]]; then + echo_failed + error "$(cat ${LOCK_DIR}/error.log)" + else + echo_done + fi + + declare -i counter=0 + PID="$(ps aux | grep -E "[[:digit:]]\s+/opt/mattermost/bin/mattermost" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)" + sleep 1 + while [[ "X${PID}" = "X" ]]; do + sleep 1 + PID="$(ps aux | grep -E "[[:digit:]]\s+/opt/mattermost/bin/mattermost" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)" + if [[ $counter -gt 10 ]]; then + break + else + ((counter++)) + fi + done + + if [[ "X${PID}" = "X" ]] ; then + error "Restarting Mattermost Service failed!" + + err_msg="${err_msg}\n[ Error ]: Restarting Mattermost Service failed!" + + else + ok "Mattermost Service is up and running" + + err_msg="${err_msg}\n[ OK ]: Mattermost Service is up and running" + fi + +fi + +datum="$(date +"%d.%m.%Y %H:%M")" + +for _email in ${alert_email_arr[@]} ; do + + echo -e "To:${_email}\n${content_type}\nSubject:[Error] Local Mattermost Serviceinot available.\n$err_msg\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \ + | sendmail -F "Error `hostname -f`" -f $sender_address $_email +done + + + +if $LOGGING ; then + echo "" +fi +clean_up 0 diff --git a/conf/check_local_mattermost_service.conf.sample b/conf/check_local_mattermost_service.conf.sample new file mode 100644 index 0000000..19fd20e --- /dev/null +++ b/conf/check_local_mattermost_service.conf.sample @@ -0,0 +1,95 @@ +#----------------------------- +# Settings for script check_local_mattermost_service.sh +#----------------------------- + +# - LOCAL_MM_WEBSITES_TO_CHECK +# - +# - mattermost websites, whichn will be checked +# - +# - Example: LOCAL_MM_WEBSITES_TO_CHECK="mm-irights.oopen.de" +# - +#LOCAL_MM_WEBSITES_TO_CHECK="" + +# - LOG_FILES_TO_MONITOR[:] +# - +# - Print out the last lines from theses given logfiles +# - +# - its possible to give a number of lines, which should be printed out +# - by appending it seperated ba a colon ':'. +# - +# - The default number of printed out lines is 20. +# - +# - Example: +# - LOG_FILES_TO_MONITOR=" +# - /var/log/nginx/access.log:40 +# - /var/log/nginx/error.log:25 +# - " +# - +#LOG_FILES_TO_MONITOR="" + +# - TIME_OUT +# - +# - Timeout for curl request of each website +# - +# - Defaults to: TIME_OUT=30 +# - +#TIME_OUT=30 + + +# - CONFLICTING_SCRIPTS +# - +# - The scripts listed here conflict with this script. If one of these scripts +# - is currently running, this script will be stopped. +# - +# - In addition to the script, a LOCK directory can also be specified which is +# - connected to it. +# - +# - If no fixed LOCK directory is connected to the script, set +# - this value to the constant 'CHECK_PROCESS_LIST'. +# - +# - If no value for the LOCK directory is given, the LOCK directory +# - '/tmp/.LOCK' is assumed. +# - +# - +# - Example: +# - CONFLICTING_SCRIPTS=" +# - /root/bin/monitoring/check_webservice_load.sh:CHECK_PROCESS_LIST +# - /root/bin/monitoring/check_remote_websites.sh +# - " +# - +# - Defaults to: +# - CONFLICTING_SCRIPTS="/root/bin/monitoring/check_webservice_load.sh" +# - +#CONFLICTING_SCRIPTS="" + + +# --- +# - E-Mail settings for sending script messages +# --- + +# - company +# - +# - Example: company="O.OPEN" +# - +#company="" + +# - sender_address +# - +# - Defaults to: sender_address="root@$(hostname -f)" +# - +#sender_address="check_mm_service@$(hostname -f)" + +# - content_type +# - +# - Defaults to: content_type='Content-Type: text/plain;\n charset="utf-8"' +# - +#content_type='Content-Type: text/plain;\n charset="utf-8"' + +# - alert_email_addresses +# - +# - blank separated list of e-mail addresses +# +# - Example: alert_email_addresses="ckubu@oopen.de axel@warenform.net" +# - +#alert_email_addresses="ckubu@oopen.de" +