63ca005b6e
PHASE 1 — DEBUG mode:
- /etc/systemd/system/pgz-sport.service.d/debug.conf: DEBUG=1, LOG_LEVEL=DEBUG, PYTHONUNBUFFERED=1, UVICORN_LOG_LEVEL=debug
PHASE 2 — Error stream:
- /opt/pgz-sport/scripts/debug_tail.sh: tail journalctl + nginx → /var/log/pgz-sport-debug/{stream,errors}.jsonl
- pgz-debug-tail.service (always restart, multiplexes 4 sources)
PHASE 3 — Auto-triage bot:
- /opt/pgz-sport/scripts/auto_triage.py: classifies errors, dispatches CC agents
- Patterns: 5xx spike → CC4, 401/403 spike → CC2, 4xx API → CC3, ImportError/DB → CC4
- Rate limit: 6 telegram/5min
- Records decisions in triage_decisions.jsonl
- pgz-auto-triage.service
PHASE 4 — Live dashboard:
- routers/debug_router.py mounted in pgz_sport_api
- GET /api/debug/health — services + DB + error count
- GET /api/debug/errors?limit=N — last N errors (JSON)
- GET /api/debug/decisions — auto-fix decisions
- GET /api/debug/stream — full log tail
- GET /api/debug/dashboard — live HTML refresh 5s
Damir admin tier dashboard: https://sport.rinet.one/sport/api/debug/dashboard
60 lines
2.5 KiB
Bash
Executable File
60 lines
2.5 KiB
Bash
Executable File
#!/bin/bash
|
|
# Tail journalctl + nginx errors → strukturirani JSONL stream
|
|
LOGDIR=/var/log/pgz-sport-debug
|
|
mkdir -p $LOGDIR
|
|
|
|
# Tail journalctl
|
|
journalctl -u pgz-sport -f -n 0 --output=cat 2>/dev/null | while read line; do
|
|
ts=$(date -Iseconds)
|
|
level="INFO"
|
|
|
|
# Klasifikacija
|
|
if echo "$line" | grep -qE "ERROR|Exception|Traceback|CRITICAL|FATAL"; then level="ERROR"; fi
|
|
if echo "$line" | grep -qE "WARNING|WARN"; then level="WARN"; fi
|
|
if echo "$line" | grep -qE "DEBUG"; then level="DEBUG"; fi
|
|
|
|
# JSON-escape
|
|
safe=$(echo "$line" | python3 -c "import json,sys; print(json.dumps(sys.stdin.read().strip()))")
|
|
echo "{\"ts\":\"$ts\",\"src\":\"pgz-sport\",\"level\":\"$level\",\"msg\":$safe}" >> $LOGDIR/stream.jsonl
|
|
done &
|
|
JPID=$!
|
|
echo $JPID > $LOGDIR/journalctl_tail.pid
|
|
|
|
# Tail nginx error log
|
|
tail -F /var/log/nginx/sport.error.log 2>/dev/null | while read line; do
|
|
ts=$(date -Iseconds)
|
|
safe=$(echo "$line" | python3 -c "import json,sys; print(json.dumps(sys.stdin.read().strip()))")
|
|
echo "{\"ts\":\"$ts\",\"src\":\"nginx\",\"level\":\"ERROR\",\"msg\":$safe}" >> $LOGDIR/stream.jsonl
|
|
done &
|
|
NPID=$!
|
|
echo $NPID > $LOGDIR/nginx_tail.pid
|
|
|
|
# Tail nginx access log za 4xx/5xx
|
|
tail -F /var/log/nginx/sport.access.log 2>/dev/null | while read line; do
|
|
# parse: status code je 9. polje (combined log format)
|
|
code=$(echo "$line" | awk '{print $9}')
|
|
if [[ "$code" =~ ^[45][0-9][0-9]$ ]]; then
|
|
ts=$(date -Iseconds)
|
|
method=$(echo "$line" | awk '{print $6}' | tr -d '"')
|
|
path=$(echo "$line" | awk '{print $7}')
|
|
safe=$(echo "$line" | python3 -c "import json,sys; print(json.dumps(sys.stdin.read().strip()))")
|
|
echo "{\"ts\":\"$ts\",\"src\":\"nginx-access\",\"level\":\"WARN\",\"code\":\"$code\",\"method\":\"$method\",\"path\":\"$path\",\"raw\":$safe}" >> $LOGDIR/stream.jsonl
|
|
|
|
# ACTIVE ALERTING: ako je 5xx ili 401-403, log do error feed
|
|
if [[ "$code" =~ ^5 ]] || [[ "$code" == "401" ]] || [[ "$code" == "403" ]]; then
|
|
echo "{\"ts\":\"$ts\",\"src\":\"nginx-access\",\"level\":\"ERROR\",\"code\":\"$code\",\"method\":\"$method\",\"path\":\"$path\"}" >> $LOGDIR/errors.jsonl
|
|
fi
|
|
fi
|
|
done &
|
|
APID=$!
|
|
echo $APID > $LOGDIR/access_tail.pid
|
|
|
|
# Drop ERROR-level u zaseban error file (agenti gledaju ovaj)
|
|
tail -F $LOGDIR/stream.jsonl 2>/dev/null | grep -E "\"level\":\"(ERROR|CRITICAL|FATAL)\"" >> $LOGDIR/errors.jsonl &
|
|
EPID=$!
|
|
echo $EPID > $LOGDIR/error_filter.pid
|
|
|
|
echo "Debug tail running. PIDs: journalctl=$JPID nginx=$NPID access=$APID error_filter=$EPID"
|
|
echo " stream.jsonl + errors.jsonl in $LOGDIR"
|
|
wait
|