check_local_mattermost_service.sh: support restart of the whole system if service continues be down.

This commit is contained in:
Christoph 2022-06-23 11:22:24 +02:00
parent ccfa76e125
commit 97625cbe0b

View File

@ -7,6 +7,8 @@ conf_file="${working_dir}/conf/${script_name%%.*}.conf"
LOCK_DIR="/tmp/${script_name%%.*}.LOCK"
RESTART_CHECK_FILE="/tmp/${script_name%%.*}.NEED-RESTART"
declare -a alert_email_arr
@ -335,6 +337,9 @@ if [[ -n "$LOCAL_MM_WEBSITES_TO_CHECK" ]] ; then
# 200 - OK
if [[ $response -eq 200 ]] ; then
echo_ok
if [[ -f "$RESTART_CHECK_FILE" ]] ; then
rm -f "$RESTART_CHECK_FILE"
fi
clean_up 0
fi
sleep 2
@ -343,12 +348,15 @@ if [[ -n "$LOCAL_MM_WEBSITES_TO_CHECK" ]] ; then
echo_failed
if [[ $response -gt 499 ]] ; then
MM_SERVICE_DOWN=true
elif [[ $response -eq 0 ]] || [[ $response -eq -1 ]]; then
elif [[ $response -eq -1 ]]; then
NGINX_SERVICE_DOWN=true
PID="$(ps -e f | grep -E "[[:digit:]]\s+/opt/mattermost/bin/mattermost" | grep -v grep 2> /dev/null)"
[[ -z "$PID" ]] && MM_SERVICE_DOWN=true
#elif [[ $response -eq 0 ]] ; then
# NGINX_SERVICE_DOWN=true
# MM_SERVICE_DOWN=true
else
NGINX_SERVICE_DOWN=true
MM_SERVICE_DOWN=true
@ -412,6 +420,8 @@ if [[ ${#LOG_FILES_TO_MONITOR} -gt 0 ]] ; then
else
msg_user_defined="${msg_user_defined}\n---\nLast entries (${_number_lines} lines) of \"${_log_file}\":\n---\n-- FILE IS EMPTY --\n"
fi
done
fi
done
msg_user_defined="${msg_user_defined}\n\n"
else
@ -419,6 +429,62 @@ else
fi
if [[ ! -f "$RESTART_CHECK_FILE" ]]; then
touch "$RESTART_CHECK_FILE"
error "The local Mattermost Service seems to be down."
if $LOGGING ; then
echo -e "\n \033[1mFirst we try to restore the system. If this is not successful,\n the system will be restarted in about 5 minutes.\033[m"
else
echo ""
echo "First we try to restore the system. If this is not successful,"
echo ""the system will be restarted in about 5 minutes.
echo ""
fi
err_msg="\n[ Warning ]: The local Mattermost Service seems to be down.\n"
err_msg="${err_msg}\nFirst we try to restore the system. If this is not successful,\nthe system will be restarted in about 5 minutes.\n"
datum="$(date +"%d.%m.%Y %H:%M")"
for _email in ${alert_email_arr[@]} ; do
echo -e "To:${_email}\n${content_type}\nSubject:[Warning] Local Mattermost Service is not available.\n${err_msg}\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
done
else
error "The local Mattermost Service seems to be down."
if $LOGGING ; then
echo -e "\n\033[1mGoing to restart the system NOW..\033[m"
else
echo ""
echo "Going to restart the system NOW.."
echo ""
fi
err_msg="\n[ Error ]: The local Mattermost Service seems to be down.\n"
err_msg="${err_msg}\nGoing to restart the system..\n"
datum="$(date +"%d.%m.%Y %H:%M")"
for _email in ${alert_email_arr[@]} ; do
echo -e "To:${_email}\n${content_type}\nSubject:[Error] Local Mattermost Service is not available.\n${err_msg}\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
done
rm -f "$RESTART_CHECK_FILE"
reboot_system
fi
if $NGINX_SERVICE_DOWN ; then
error "NGINX Service seems to be down. Going to restart Service.."
@ -483,64 +549,6 @@ if $NGINX_SERVICE_DOWN ; then
declare -i counter=0
PID="$(ps aux | grep -E "[[:digit:]]\s+nginx:" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)"
sleep 1
while [[ "X${PID}" = "X" ]]; do
sleep 1
PID="$(ps aux | grep -E "[[:digit:]]\s+nginx:" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)"
if [[ $counter -gt 10 ]]; then
break
else
((counter++))
fi
done
if [[ "X${PID}" = "X" ]] ; then
error "Restarting NGINX Service failed!"
err_msg="${err_msg}\n[ Error ]: Restarting NGINX Service failed!"
else
ok "NGINX Service is up and running"
err_msg="${err_msg}\n[ OK ]: NGINX Service is up and running"
fi
fi
if $MM_SERVICE_DOWN; then
error "Mattermost Service seems to be down. Going to restart Service.."
if [[ -n "$err_msg" ]]; then
err_msg="${err_msg}\n\n\n[ Error ]: Mattermost Service seems to be down.
Going to restart Mattermost Service Service\n"
else
err_msg="\n[ Error ]: Mattermost Service seems to be down.
Going to restart Mattermost Service Service\n"
fi
echononl "Stop mattwermost Service.."
$systemctl stop mattermost > /dev/null 2> ${LOCK_DIR}/error.log
if [[ $? -ne 0 ]]; then
echo_failed
error "$(cat ${LOCK_DIR}/error.log)"
else
echo_done
fi
declare -i counter=0
PID="$(ps aux | grep -E "[[:digit:]]\s+/opt/mattermost/bin/mattermost" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)"
while [[ -n "$PID" ]] ; do
if [[ $counter -gt 3 ]] ; then
break
fatal "Killing remaining mattermost Process(es) failed!"
fi
warn "There are still mattermost processes running"
((counter++))
echononl "${counter}: Kill remaining mattermost Process(es).."
@ -590,14 +598,50 @@ if $MM_SERVICE_DOWN; then
fi
datum="$(date +"%d.%m.%Y %H:%M")"
for _email in ${alert_email_arr[@]} ; do
echo -e "To:${_email}\n${content_type}\nSubject:[Error] Local Mattermost Serviceinot available.\n$err_msg\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \
if [[ ! -f "$RESTART_CHECK_FILE" ]]; then
touch "$RESTART_CHECK_FILE"
error "The local Mattermost Service seems to be down."
err_msg="\n[ Warning ]: The local Mattermost Service seems to be down.\n"
err_msg="${err_msg}\nFor now nothing is to. because its the first time..\n"
datum="$(date +"%d.%m.%Y %H:%M")"
for _email in ${alert_email_arr[@]} ; do
echo -e "To:${_email}\n${content_type}\nSubject:[Warning] Local Mattermost Service is not available.\n${err_msg}\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
done
done
else
error "The local Mattermost Service seems to be down."
err_msg="\n[ Warning ]: The local Mattermost Service seems to be down.\n"
if $LOGGING ; then
echo -e "\n\033[1mGoing to restart the system NOW..\033[m"
else
echo ""
echo "Going to restart the system NOW.."
fi
err_msg="${err_msg}\nGoing to restart the system..\n"
datum="$(date +"%d.%m.%Y %H:%M")"
for _email in ${alert_email_arr[@]} ; do
echo -e "To:${_email}\n${content_type}\nSubject:[Error] Local Mattermost Service is not available.\n$err_msg\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
done
rm -f "$RESTART_CHECK_FILE"
reboot_system
fi
if $LOGGING ; then