All checks were successful
Deploy to Production / Deploy to Server (push) Successful in 1m44s
- BUG-053: Add terser JS minification to build process - BUG-060: Add og:image, twitter:card, twitter:image to sub-pages - BUG-067: Update skip-link to #main-content on all pages
93 lines
2.3 KiB
Bash
Executable file
93 lines
2.3 KiB
Bash
Executable file
#!/bin/bash
|
|
|
|
# Health check monitoring script for DocFast
|
|
# Runs every 5 minutes via cron
|
|
|
|
HEALTH_URL="https://docfast.dev/health"
|
|
LOG_FILE="/var/log/docfast-healthcheck.log"
|
|
DOWN_MARKER="/tmp/docfast-down"
|
|
STATE_FILE="/tmp/docfast-healthcheck-state"
|
|
MAX_LOG_LINES=1000
|
|
|
|
# Initialize state file if it doesn't exist
|
|
if [ ! -f "$STATE_FILE" ]; then
|
|
echo "0" > "$STATE_FILE"
|
|
fi
|
|
|
|
# Function to rotate log if it exceeds max lines
|
|
rotate_log() {
|
|
if [ -f "$LOG_FILE" ]; then
|
|
local line_count=$(wc -l < "$LOG_FILE")
|
|
if [ "$line_count" -gt "$MAX_LOG_LINES" ]; then
|
|
# Keep only the last 1000 lines
|
|
tail -n "$MAX_LOG_LINES" "$LOG_FILE" > "${LOG_FILE}.tmp"
|
|
mv "${LOG_FILE}.tmp" "$LOG_FILE"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Function to log messages
|
|
log_message() {
|
|
local msg="$1"
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $msg" >> "$LOG_FILE"
|
|
rotate_log
|
|
}
|
|
|
|
# Perform health check
|
|
check_health() {
|
|
response=$(curl -s -w "\n%{http_code}" "$HEALTH_URL" 2>/dev/null)
|
|
http_code=$(echo "$response" | tail -n1)
|
|
body=$(echo "$response" | head -n-1)
|
|
|
|
# Check if HTTP 200 and contains "status":"ok"
|
|
if [ "$http_code" = "200" ] && echo "$body" | grep -q '"status":"ok"'; then
|
|
return 0 # Success
|
|
else
|
|
return 1 # Failure
|
|
fi
|
|
}
|
|
|
|
# Get current failure count
|
|
get_failure_count() {
|
|
cat "$STATE_FILE"
|
|
}
|
|
|
|
# Increment failure count
|
|
increment_failure() {
|
|
local count=$(get_failure_count)
|
|
echo $((count + 1)) > "$STATE_FILE"
|
|
}
|
|
|
|
# Reset failure count
|
|
reset_failure() {
|
|
echo "0" > "$STATE_FILE"
|
|
}
|
|
|
|
# Main logic
|
|
if check_health; then
|
|
# Health check passed
|
|
failure_count=$(get_failure_count)
|
|
if [ "$failure_count" -gt 0 ]; then
|
|
log_message "✓ Service recovered after $failure_count failure(s)"
|
|
if [ -f "$DOWN_MARKER" ]; then
|
|
rm -f "$DOWN_MARKER"
|
|
log_message "Removed downtime marker"
|
|
fi
|
|
fi
|
|
reset_failure
|
|
else
|
|
# Health check failed
|
|
increment_failure
|
|
failure_count=$(get_failure_count)
|
|
log_message "✗ Health check failed (attempt $failure_count)"
|
|
|
|
# After 2 consecutive failures, mark as down
|
|
if [ "$failure_count" -ge 2 ]; then
|
|
if [ ! -f "$DOWN_MARKER" ]; then
|
|
touch "$DOWN_MARKER"
|
|
log_message "⚠ DOWNTIME DETECTED - Marker file created"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
exit 0
|