feat: /api/v2/analiza/* endpoints - sport analytics backend

This commit is contained in:
Damir Radulic
2026-05-16 00:28:12 +02:00
parent 7ca5d7d94e
commit aca5051418
1355 changed files with 321891 additions and 4128 deletions
+148
View File
@@ -0,0 +1,148 @@
#!/bin/bash
# sport.rinet.one Total Forensic Collection Script
# Run as root or sudo on the host (gpu.rinet.one)
# Output: /tmp/sport_forensics_$(date +%Y%m%d_%H%M%S).txt
OUTFILE="/tmp/sport_forensics_$(date +%Y%m%d_%H%M%S).txt"
exec > >(tee -a "$OUTFILE") 2>&1
echo "============================================================"
echo "Sport.RiNET.one Forensic Deep Dive"
echo "Started: $(date)"
echo "============================================================"
# 1. Nginx configuration for sport.rinet.one
echo -e "\n[1] NGINX CONFIG (sport.rinet.one)"
nginx -T 2>/dev/null | grep -A30 "server_name sport.rinet.one" || echo "No dedicated server block? Checking proxy pass..."
nginx -T 2>/dev/null | grep -B5 -A10 "proxy_pass http://127.0.0.1:8095" | head -40
# Also check if there is a separate /sport/ location on main domains
echo -e "\n[1b] Additional /sport/ proxy in main vhosts"
grep -r "location /sport" /etc/nginx/sites-enabled/ | head -20
# 2. Systemd services for PGŽ Sport
echo -e "\n[2] SYSTEMD SERVICES (pgz-sport related)"
systemctl list-units --type=service | grep -E "pgz|sport" || echo "No matching services"
for svc in pgz-sport pgz-sport-enricher pgz-sport-ocr pgz-sport-loop? ; do
if systemctl status "$svc" &>/dev/null; then
echo -e "\n--- $svc ---"
systemctl status "$svc" --no-pager -l | head -20
fi
done
# 3. Processes listening on port 8095 (backend API)
echo -e "\n[3] BACKEND API (port 8095)"
lsof -i :8095 -P -n 2>/dev/null || ss -tulpn | grep :8095
echo -e "\nProcess tree of backend:"
ps auxf | grep -E "8095|pgz_sport_api" | grep -v grep | head -20
# 4. Code location and Git state
echo -e "\n[4] CODE REPOSITORY (/opt/pgz-sport)"
if [ -d /opt/pgz-sport ]; then
cd /opt/pgz-sport
echo "Git remote:"
git remote -v 2>/dev/null || echo "Not a git repo"
echo -e "\nCurrent branch & latest commit:"
git branch -a 2>/dev/null | head -5
git log -1 --oneline 2>/dev/null
echo -e "\nUncommitted changes (git status --short):"
git status --short | head -50
echo -e "\nTop-level files:"
ls -la | head -20
else
echo "/opt/pgz-sport not found, checking alternative paths"
find /opt -maxdepth 2 -type d -name "*pgz*" -o -name "*sport*" 2>/dev/null
fi
# 5. Environment variables and .env files
echo -e "\n[5] ENVIRONMENT CONFIGURATION"
if [ -f /opt/pgz-sport/.env ]; then
echo "--- .env (masked) ---"
cat /opt/pgz-sport/.env | sed 's/\(PASSWORD=\).*/\1***REDACTED***/i;s/\(SECRET=\).*/\1***REDACTED***/i;s/\(KEY=\).*/\1***REDACTED***/i'
else
echo "No .env found in /opt/pgz-sport"
fi
echo -e "\nEnvironment variables of the backend process:"
PID=$(pgrep -f "pgz_sport_api.py" | head -1)
if [ -n "$PID" ]; then
cat /proc/$PID/environ | tr '\0' '\n' | grep -E "^(DB_|PG_|POSTGRES|REDIS|QDRANT|OPENAI)" | sed 's/\(PASSWORD=\).*/\1***/i'
else
echo "No running pgz_sport_api process found"
fi
# 6. Database connectivity (PostgreSQL)
echo -e "\n[6] DATABASE (PostgreSQL)"
if command -v psql &>/dev/null; then
# Try to find connection string from .env or process
DB_URL=$(grep -E "DATABASE_URL|POSTGRES_URL" /opt/pgz-sport/.env 2>/dev/null | cut -d= -f2- | head -1)
if [ -n "$DB_URL" ]; then
echo "Connection string found (masked)"
echo "$DB_URL" | sed 's/:[^:]*@/:***@/'
else
# Use default rinet DB via pgbouncer
echo "Assuming default rinet_v3 database via pgbouncer localhost:6432"
PGPASSWORD="${DB_PASSWORD:?DB_PASSWORD not set}" psql -h 127.0.0.1 -p 6432 -U rinet -d rinet_v3 -c "\l" &>/dev/null && echo "Connection successful (rinet_v3)" || echo "Connection failed"
fi
# Check sport-specific tables
echo -e "\nTable count for 'pgz' / 'sport' schemas:"
psql -h 127.0.0.1 -p 6432 -U rinet -d rinet_v3 -c "SELECT schemaname, COUNT(*) FROM pg_tables WHERE schemaname LIKE '%sport%' OR schemaname LIKE '%pgz%' GROUP BY schemaname" 2>/dev/null || echo "No sport schemas found"
else
echo "psql not installed"
fi
# 7. Qdrant vector database (used by sport)
echo -e "\n[7] QDRANT VECTOR STORAGE"
# Qdrant runs in Docker, check container
docker ps --format "table {{.Names}}\t{{.Image}}\t{{.Ports}}" | grep -i qdrant 2>/dev/null || echo "Qdrant container not found via docker"
# Check collections via API if accessible
if command -v curl &>/dev/null; then
curl -s http://127.0.0.1:6333/collections | python3 -m json.tool 2>/dev/null | head -30 || echo "Qdrant API not reachable"
fi
# 8. Logs (journalctl, application logs)
echo -e "\n[8] LOGS (last 30 lines per service)"
for svc in pgz-sport pgz-sport-enricher pgz-sport-ocr; do
if systemctl is-active --quiet "$svc"; then
echo -e "\n--- $svc (journalctl) ---"
journalctl -u "$svc" -n 30 --no-pager
fi
done
echo -e "\nApplication log files:"
find /opt/pgz-sport -name "*.log" -type f -exec ls -lh {} \; 2>/dev/null | head -20
echo -e "\nTail of /opt/pgz-sport/_logs/*.log (latest):"
tail -n 20 /opt/pgz-sport/_logs/*.log 2>/dev/null | head -50
# 9. Cron jobs related to sport
echo -e "\n[9] CRON JOBS (sport / pgz)"
crontab -l 2>/dev/null | grep -iE "sport|pgz|hbs|hks|hos|godisnjak"
for f in /etc/cron.d/*; do
if grep -q -iE "sport|pgz|hbs|hks|hos|godisnjak" "$f" 2>/dev/null; then
echo "=== $f ==="
cat "$f"
fi
done
# 10. Frontend static files (served by backend or nginx?)
echo -e "\n[10] FRONTEND STATIC ASSETS"
# Check where backend serves static from
grep -r "static\|StaticFiles\|mount" /opt/pgz-sport/*.py 2>/dev/null | head -10
# Also check Nginx root for sport subdomain (not used, but maybe static)
grep -A5 "server_name sport.rinet.one" /etc/nginx/sites-enabled/* | grep "root\|alias"
# 11. Additional background workers (enricher, ocr, scrapers)
echo -e "\n[11] BACKGROUND WORKERS (python scripts)"
ps aux | grep -E "enrichment_worker|ocr_worker|sport.*loop" | grep -v grep
# 12. Latest API endpoints (quick test)
echo -e "\n[12] ENDPOINT HEALTH CHECK"
curl -s -o /dev/null -w "Health check: %{http_code}\n" https://sport.rinet.one/health 2>/dev/null || echo "Health endpoint not available"
curl -s -o /dev/null -w "API root: %{http_code}\n" https://sport.rinet.one/api/ 2>/dev/null || echo "API root not available"
# 13. Summary of running sport-related processes
echo -e "\n[13] PROCESS SUMMARY (pgz/sport)"
ps -ef | grep -E "pgz|sport" | grep -v grep | grep -v "forensic" | column -t
echo -e "\n============================================================"
echo "Report saved to: $OUTFILE"
echo "Finished: $(date)"
echo "============================================================"