check_local_mattermost_service.sh: support restart of the whole system if service continues be down.
This commit is contained in:
parent
ccfa76e125
commit
97625cbe0b
@ -7,6 +7,8 @@ conf_file="${working_dir}/conf/${script_name%%.*}.conf"
|
|||||||
|
|
||||||
LOCK_DIR="/tmp/${script_name%%.*}.LOCK"
|
LOCK_DIR="/tmp/${script_name%%.*}.LOCK"
|
||||||
|
|
||||||
|
RESTART_CHECK_FILE="/tmp/${script_name%%.*}.NEED-RESTART"
|
||||||
|
|
||||||
declare -a alert_email_arr
|
declare -a alert_email_arr
|
||||||
|
|
||||||
|
|
||||||
@ -335,6 +337,9 @@ if [[ -n "$LOCAL_MM_WEBSITES_TO_CHECK" ]] ; then
|
|||||||
# 200 - OK
|
# 200 - OK
|
||||||
if [[ $response -eq 200 ]] ; then
|
if [[ $response -eq 200 ]] ; then
|
||||||
echo_ok
|
echo_ok
|
||||||
|
if [[ -f "$RESTART_CHECK_FILE" ]] ; then
|
||||||
|
rm -f "$RESTART_CHECK_FILE"
|
||||||
|
fi
|
||||||
clean_up 0
|
clean_up 0
|
||||||
fi
|
fi
|
||||||
sleep 2
|
sleep 2
|
||||||
@ -343,12 +348,15 @@ if [[ -n "$LOCAL_MM_WEBSITES_TO_CHECK" ]] ; then
|
|||||||
echo_failed
|
echo_failed
|
||||||
if [[ $response -gt 499 ]] ; then
|
if [[ $response -gt 499 ]] ; then
|
||||||
MM_SERVICE_DOWN=true
|
MM_SERVICE_DOWN=true
|
||||||
elif [[ $response -eq 0 ]] || [[ $response -eq -1 ]]; then
|
elif [[ $response -eq -1 ]]; then
|
||||||
NGINX_SERVICE_DOWN=true
|
NGINX_SERVICE_DOWN=true
|
||||||
|
|
||||||
PID="$(ps -e f | grep -E "[[:digit:]]\s+/opt/mattermost/bin/mattermost" | grep -v grep 2> /dev/null)"
|
PID="$(ps -e f | grep -E "[[:digit:]]\s+/opt/mattermost/bin/mattermost" | grep -v grep 2> /dev/null)"
|
||||||
[[ -z "$PID" ]] && MM_SERVICE_DOWN=true
|
[[ -z "$PID" ]] && MM_SERVICE_DOWN=true
|
||||||
|
|
||||||
|
#elif [[ $response -eq 0 ]] ; then
|
||||||
|
# NGINX_SERVICE_DOWN=true
|
||||||
|
# MM_SERVICE_DOWN=true
|
||||||
else
|
else
|
||||||
NGINX_SERVICE_DOWN=true
|
NGINX_SERVICE_DOWN=true
|
||||||
MM_SERVICE_DOWN=true
|
MM_SERVICE_DOWN=true
|
||||||
@ -412,6 +420,8 @@ if [[ ${#LOG_FILES_TO_MONITOR} -gt 0 ]] ; then
|
|||||||
else
|
else
|
||||||
msg_user_defined="${msg_user_defined}\n---\nLast entries (${_number_lines} lines) of \"${_log_file}\":\n---\n-- FILE IS EMPTY --\n"
|
msg_user_defined="${msg_user_defined}\n---\nLast entries (${_number_lines} lines) of \"${_log_file}\":\n---\n-- FILE IS EMPTY --\n"
|
||||||
fi
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
msg_user_defined="${msg_user_defined}\n\n"
|
msg_user_defined="${msg_user_defined}\n\n"
|
||||||
else
|
else
|
||||||
@ -419,6 +429,62 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if [[ ! -f "$RESTART_CHECK_FILE" ]]; then
|
||||||
|
|
||||||
|
touch "$RESTART_CHECK_FILE"
|
||||||
|
|
||||||
|
error "The local Mattermost Service seems to be down."
|
||||||
|
|
||||||
|
if $LOGGING ; then
|
||||||
|
echo -e "\n \033[1mFirst we try to restore the system. If this is not successful,\n the system will be restarted in about 5 minutes.\033[m"
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "First we try to restore the system. If this is not successful,"
|
||||||
|
echo ""the system will be restarted in about 5 minutes.
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
err_msg="\n[ Warning ]: The local Mattermost Service seems to be down.\n"
|
||||||
|
err_msg="${err_msg}\nFirst we try to restore the system. If this is not successful,\nthe system will be restarted in about 5 minutes.\n"
|
||||||
|
|
||||||
|
datum="$(date +"%d.%m.%Y %H:%M")"
|
||||||
|
|
||||||
|
for _email in ${alert_email_arr[@]} ; do
|
||||||
|
|
||||||
|
echo -e "To:${_email}\n${content_type}\nSubject:[Warning] Local Mattermost Service is not available.\n${err_msg}\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \
|
||||||
|
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
|
||||||
|
done
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
error "The local Mattermost Service seems to be down."
|
||||||
|
|
||||||
|
if $LOGGING ; then
|
||||||
|
echo -e "\n\033[1mGoing to restart the system NOW..\033[m"
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "Going to restart the system NOW.."
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
err_msg="\n[ Error ]: The local Mattermost Service seems to be down.\n"
|
||||||
|
err_msg="${err_msg}\nGoing to restart the system..\n"
|
||||||
|
|
||||||
|
datum="$(date +"%d.%m.%Y %H:%M")"
|
||||||
|
|
||||||
|
for _email in ${alert_email_arr[@]} ; do
|
||||||
|
|
||||||
|
echo -e "To:${_email}\n${content_type}\nSubject:[Error] Local Mattermost Service is not available.\n${err_msg}\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \
|
||||||
|
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
|
||||||
|
done
|
||||||
|
|
||||||
|
rm -f "$RESTART_CHECK_FILE"
|
||||||
|
reboot_system
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
if $NGINX_SERVICE_DOWN ; then
|
if $NGINX_SERVICE_DOWN ; then
|
||||||
error "NGINX Service seems to be down. Going to restart Service.."
|
error "NGINX Service seems to be down. Going to restart Service.."
|
||||||
|
|
||||||
@ -483,64 +549,6 @@ if $NGINX_SERVICE_DOWN ; then
|
|||||||
|
|
||||||
declare -i counter=0
|
declare -i counter=0
|
||||||
PID="$(ps aux | grep -E "[[:digit:]]\s+nginx:" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)"
|
PID="$(ps aux | grep -E "[[:digit:]]\s+nginx:" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)"
|
||||||
sleep 1
|
|
||||||
while [[ "X${PID}" = "X" ]]; do
|
|
||||||
sleep 1
|
|
||||||
PID="$(ps aux | grep -E "[[:digit:]]\s+nginx:" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)"
|
|
||||||
if [[ $counter -gt 10 ]]; then
|
|
||||||
break
|
|
||||||
else
|
|
||||||
((counter++))
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ "X${PID}" = "X" ]] ; then
|
|
||||||
error "Restarting NGINX Service failed!"
|
|
||||||
|
|
||||||
err_msg="${err_msg}\n[ Error ]: Restarting NGINX Service failed!"
|
|
||||||
|
|
||||||
else
|
|
||||||
ok "NGINX Service is up and running"
|
|
||||||
|
|
||||||
err_msg="${err_msg}\n[ OK ]: NGINX Service is up and running"
|
|
||||||
fi
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
if $MM_SERVICE_DOWN; then
|
|
||||||
error "Mattermost Service seems to be down. Going to restart Service.."
|
|
||||||
|
|
||||||
if [[ -n "$err_msg" ]]; then
|
|
||||||
err_msg="${err_msg}\n\n\n[ Error ]: Mattermost Service seems to be down.
|
|
||||||
|
|
||||||
Going to restart Mattermost Service Service\n"
|
|
||||||
else
|
|
||||||
err_msg="\n[ Error ]: Mattermost Service seems to be down.
|
|
||||||
|
|
||||||
Going to restart Mattermost Service Service\n"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echononl "Stop mattwermost Service.."
|
|
||||||
$systemctl stop mattermost > /dev/null 2> ${LOCK_DIR}/error.log
|
|
||||||
if [[ $? -ne 0 ]]; then
|
|
||||||
echo_failed
|
|
||||||
error "$(cat ${LOCK_DIR}/error.log)"
|
|
||||||
else
|
|
||||||
echo_done
|
|
||||||
fi
|
|
||||||
|
|
||||||
declare -i counter=0
|
|
||||||
PID="$(ps aux | grep -E "[[:digit:]]\s+/opt/mattermost/bin/mattermost" | grep -v grep | tail -n 1 | awk '{print$2}' 2> /dev/null)"
|
|
||||||
|
|
||||||
while [[ -n "$PID" ]] ; do
|
|
||||||
|
|
||||||
if [[ $counter -gt 3 ]] ; then
|
|
||||||
break
|
|
||||||
fatal "Killing remaining mattermost Process(es) failed!"
|
|
||||||
fi
|
|
||||||
|
|
||||||
warn "There are still mattermost processes running"
|
|
||||||
|
|
||||||
((counter++))
|
((counter++))
|
||||||
|
|
||||||
echononl "${counter}: Kill remaining mattermost Process(es).."
|
echononl "${counter}: Kill remaining mattermost Process(es).."
|
||||||
@ -590,14 +598,50 @@ if $MM_SERVICE_DOWN; then
|
|||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
datum="$(date +"%d.%m.%Y %H:%M")"
|
|
||||||
|
|
||||||
for _email in ${alert_email_arr[@]} ; do
|
|
||||||
|
|
||||||
echo -e "To:${_email}\n${content_type}\nSubject:[Error] Local Mattermost Serviceinot available.\n$err_msg\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \
|
if [[ ! -f "$RESTART_CHECK_FILE" ]]; then
|
||||||
|
|
||||||
|
touch "$RESTART_CHECK_FILE"
|
||||||
|
|
||||||
|
error "The local Mattermost Service seems to be down."
|
||||||
|
err_msg="\n[ Warning ]: The local Mattermost Service seems to be down.\n"
|
||||||
|
err_msg="${err_msg}\nFor now nothing is to. because its the first time..\n"
|
||||||
|
|
||||||
|
datum="$(date +"%d.%m.%Y %H:%M")"
|
||||||
|
|
||||||
|
for _email in ${alert_email_arr[@]} ; do
|
||||||
|
|
||||||
|
echo -e "To:${_email}\n${content_type}\nSubject:[Warning] Local Mattermost Service is not available.\n${err_msg}\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \
|
||||||
|
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
|
||||||
|
done
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
error "The local Mattermost Service seems to be down."
|
||||||
|
err_msg="\n[ Warning ]: The local Mattermost Service seems to be down.\n"
|
||||||
|
|
||||||
|
if $LOGGING ; then
|
||||||
|
echo -e "\n\033[1mGoing to restart the system NOW..\033[m"
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "Going to restart the system NOW.."
|
||||||
|
fi
|
||||||
|
|
||||||
|
err_msg="${err_msg}\nGoing to restart the system..\n"
|
||||||
|
|
||||||
|
datum="$(date +"%d.%m.%Y %H:%M")"
|
||||||
|
|
||||||
|
for _email in ${alert_email_arr[@]} ; do
|
||||||
|
|
||||||
|
echo -e "To:${_email}\n${content_type}\nSubject:[Error] Local Mattermost Service is not available.\n$err_msg\n\nFilesystem usage:\n=================\n$(df -h)\n\n${msg_process_list}\n\n${msg_user_defined}\n${msg}" \
|
||||||
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
|
| sendmail -F "Error `hostname -f`" -f $sender_address $_email
|
||||||
done
|
done
|
||||||
|
|
||||||
|
rm -f "$RESTART_CHECK_FILE"
|
||||||
|
reboot_system
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
if $LOGGING ; then
|
if $LOGGING ; then
|
||||||
|
Loading…
Reference in New Issue
Block a user