Files
Alpha/crash_diagnose.sh
2025-08-13 10:17:06 +02:00

87 lines
3.4 KiB
Bash
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -euo pipefail
YELLOW='\033[1;33m'
RED='\033[1;31m'
NC='\033[0m' # No color
echo -e "📦 ${YELLOW}PHX Crash Diagnostic Tool (Docker + Linux Server)${NC}"
echo "🔍 Boot Timeline:"
journalctl --list-boots | head -3
echo -e "\n⚠ OOM Kills:"
journalctl -b -1 | grep -i 'killed process' || echo "None found."
echo -e "\n⚠ Out of Memory Events:"
journalctl -b -1 | grep -i 'out of memory' || echo "None found."
echo -e "\n⚠ systemd-oomd Events:"
journalctl -b -1 | grep systemd-oomd || echo "None found."
echo -e "\n🔥 CPU/Load Pressure (dmesg/syslog):"
journalctl -b -1 | grep -Ei 'cpu|load average|soft lockup|hung task' || echo "None found."
echo -e "\n🚨 System Errors (priority 03):"
journalctl -b -1 -p 3..0 || echo "None found."
if command -v docker &> /dev/null && docker info >/dev/null 2>&1; then
echo -e "\n🐳 Docker detected and running."
CONTAINERS=$(docker ps -aq)
if [[ -z "$CONTAINERS" ]]; then
echo -e "\n⚠ No containers found. Skipping container-specific diagnostics."
else
echo -e "\n🐳 Docker OOM-Killed Containers:"
docker inspect $CONTAINERS 2>/dev/null | grep -B10 '"OOMKilled": true' || echo "No containers were OOMKilled."
echo -e "\n🔁 Recently Restarted Containers:"
docker ps -a --format '{{.Names}}\t{{.Status}}' | grep -i 'restarted' || echo "No recent restarts."
echo -e "\n📉 Top 5 Containers by Memory Usage (now):"
docker stats --no-stream --format "table {{.Name}}\t{{.MemUsage}}" | sort -k2 -hr | head -n 6
echo -e "\n📈 Top 5 Containers by CPU Usage (now):"
docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}" | sort -k2 -hr | head -n 6
echo -e "\n📋 Docker Container Memory Limits:"
docker inspect $CONTAINERS --format '{{ .Name }}: {{ .HostConfig.Memory }} bytes' | grep -v ': 0' || echo "None set"
echo -e "\n📋 Containers With No Memory Limit:"
docker inspect $CONTAINERS --format '{{ .Name }}: {{ .HostConfig.Memory }}' | awk '$2 == 0 {print $1}'
echo -e "\n📝 Last 100 Log Lines from PHX Containers:"
for name in $(docker ps -a --format '{{.Names}}' | grep -i 'phoenix\|pgadmin\|postgres'); do
echo -e "\n--- Logs for $name ---"
docker logs --tail=100 "$name" 2>/dev/null || echo "No logs for $name"
done
fi
else
echo -e "\n🐳 ${RED}Docker is not installed or not running.${NC}"
fi
# Historical CPU/memory usage with 'sar'
if command -v sar &> /dev/null; then
echo -e "\n📊 Analyzing Memory and CPU Usage via sar (last 60 mins if possible)..."
echo -e "\n🔍 Memory Usage (High Usage if >90%):"
sar -r | awk '
BEGIN { OFS="\t"; print "Time", "%memused", "%commit", "Status" }
/^[0-9]/ {
memused = $4; commit = $8;
status = (memused+0 > 90 || commit+0 > 95) ? "⚠️ HIGH" : "OK";
printf "%s\t%s%%\t%s%%\t%s\n", $1, memused, commit, status;
}'
echo -e "\n🔍 CPU Usage (High if %idle < 10 or %system > 90):"
sar -u | awk '
BEGIN { OFS="\t"; print "Time", "%user", "%system", "%idle", "Status" }
/^[0-9]/ {
user = $3; sys = $5; idle = $8;
status = (idle+0 < 10 || sys+0 > 90) ? "⚠️ HIGH" : "OK";
printf "%s\t%s%%\t%s%%\t%s%%\t%s\n", $1, user, sys, idle, status;
}'
else
echo -e "\n 'sar' (sysstat) is not installed. Skipping historical CPU/memory analysis."
fi
echo -e "\n✅ ${YELLOW}Done. Use this script after crashes or schedule it in cron for proactive monitoring.${NC}"