first commit

This commit is contained in:
2025-07-29 10:09:46 +02:00
commit 4a6249dfae
19 changed files with 1732 additions and 0 deletions

87
crash_diagnose.sh Normal file
View File

@@ -0,0 +1,87 @@
#!/bin/bash
set -euo pipefail
YELLOW='\033[1;33m'
RED='\033[1;31m'
NC='\033[0m' # No color
echo -e "📦 ${YELLOW}PHX Crash Diagnostic Tool (Docker + Linux Server)${NC}"
echo "🔍 Boot Timeline:"
journalctl --list-boots | head -3
echo -e "\n⚠ OOM Kills:"
journalctl -b -1 | grep -i 'killed process' || echo "None found."
echo -e "\n⚠ Out of Memory Events:"
journalctl -b -1 | grep -i 'out of memory' || echo "None found."
echo -e "\n⚠ systemd-oomd Events:"
journalctl -b -1 | grep systemd-oomd || echo "None found."
echo -e "\n🔥 CPU/Load Pressure (dmesg/syslog):"
journalctl -b -1 | grep -Ei 'cpu|load average|soft lockup|hung task' || echo "None found."
echo -e "\n🚨 System Errors (priority 03):"
journalctl -b -1 -p 3..0 || echo "None found."
if command -v docker &> /dev/null && docker info >/dev/null 2>&1; then
echo -e "\n🐳 Docker detected and running."
CONTAINERS=$(docker ps -aq)
if [[ -z "$CONTAINERS" ]]; then
echo -e "\n⚠ No containers found. Skipping container-specific diagnostics."
else
echo -e "\n🐳 Docker OOM-Killed Containers:"
docker inspect $CONTAINERS 2>/dev/null | grep -B10 '"OOMKilled": true' || echo "No containers were OOMKilled."
echo -e "\n🔁 Recently Restarted Containers:"
docker ps -a --format '{{.Names}}\t{{.Status}}' | grep -i 'restarted' || echo "No recent restarts."
echo -e "\n📉 Top 5 Containers by Memory Usage (now):"
docker stats --no-stream --format "table {{.Name}}\t{{.MemUsage}}" | sort -k2 -hr | head -n 6
echo -e "\n📈 Top 5 Containers by CPU Usage (now):"
docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}" | sort -k2 -hr | head -n 6
echo -e "\n📋 Docker Container Memory Limits:"
docker inspect $CONTAINERS --format '{{ .Name }}: {{ .HostConfig.Memory }} bytes' | grep -v ': 0' || echo "None set"
echo -e "\n📋 Containers With No Memory Limit:"
docker inspect $CONTAINERS --format '{{ .Name }}: {{ .HostConfig.Memory }}' | awk '$2 == 0 {print $1}'
echo -e "\n📝 Last 100 Log Lines from PHX Containers:"
for name in $(docker ps -a --format '{{.Names}}' | grep -i 'phoenix\|pgadmin\|postgres'); do
echo -e "\n--- Logs for $name ---"
docker logs --tail=100 "$name" 2>/dev/null || echo "No logs for $name"
done
fi
else
echo -e "\n🐳 ${RED}Docker is not installed or not running.${NC}"
fi
# Historical CPU/memory usage with 'sar'
if command -v sar &> /dev/null; then
echo -e "\n📊 Analyzing Memory and CPU Usage via sar (last 60 mins if possible)..."
echo -e "\n🔍 Memory Usage (High Usage if >90%):"
sar -r | awk '
BEGIN { OFS="\t"; print "Time", "%memused", "%commit", "Status" }
/^[0-9]/ {
memused = $4; commit = $8;
status = (memused+0 > 90 || commit+0 > 95) ? "⚠️ HIGH" : "OK";
printf "%s\t%s%%\t%s%%\t%s\n", $1, memused, commit, status;
}'
echo -e "\n🔍 CPU Usage (High if %idle < 10 or %system > 90):"
sar -u | awk '
BEGIN { OFS="\t"; print "Time", "%user", "%system", "%idle", "Status" }
/^[0-9]/ {
user = $3; sys = $5; idle = $8;
status = (idle+0 < 10 || sys+0 > 90) ? "⚠️ HIGH" : "OK";
printf "%s\t%s%%\t%s%%\t%s%%\t%s\n", $1, user, sys, idle, status;
}'
else
echo -e "\n 'sar' (sysstat) is not installed. Skipping historical CPU/memory analysis."
fi
echo -e "\n✅ ${YELLOW}Done. Use this script after crashes or schedule it in cron for proactive monitoring.${NC}"