feat: /api/v2/analiza/* endpoints - sport analytics backend

This commit is contained in:
Damir Radulic
2026-05-16 00:28:12 +02:00
parent 7ca5d7d94e
commit aca5051418
1355 changed files with 321891 additions and 4128 deletions
+120
View File
@@ -0,0 +1,120 @@
#!/usr/bin/env python3
# sport_harvest_health.py — staleness check za pgz_sport klubove
# v1.0 — dradulic@outlook.com / damir@rinet.one — 2026-05-05
# Description: Provjerava kad je svaki aktivan klub zadnji put scrape-an
# (klub_roster.scraped_at clanovi.last_scraped_at). Klubovi >7 dana
# flag-irani su za re-scrape; Telegram alert se šalje ako ima staleova.
# Pokreće ga /etc/cron.d/sport-harvesters u 04:30 svaki 2. dan.
import os
import sys
import json
import subprocess
from datetime import datetime, timedelta, timezone
import psycopg2
from psycopg2.extras import RealDictCursor
DSN = os.getenv(
"RINET_DSN",
f"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password={os.environ['DB_PASSWORD']}",
)
TG_TOKEN = os.getenv("TG_TOKEN", "8535797835:AAFItT-92jzZ9NWFafLxn0dLa1_n2s-JE5Y")
TG_CHAT = os.getenv("TG_CHAT", "7969491558")
STALE_DAYS = int(os.getenv("SPORT_STALE_DAYS", "7"))
LOG_DIR = "/var/log/pgz-sport-debug"
LOG_PATH = os.path.join(LOG_DIR, f"health_{datetime.now().strftime('%Y%m%d_%H%M')}.log")
os.makedirs(LOG_DIR, exist_ok=True)
_logfh = open(LOG_PATH, "a")
def log(msg: str) -> None:
line = f"[{datetime.now().isoformat(timespec='seconds')}] {msg}"
print(line, flush=True)
_logfh.write(line + "\n")
_logfh.flush()
SQL = """
WITH last_per_klub AS (
SELECT k.id AS klub_id, k.naziv, k.sport,
GREATEST(
COALESCE((SELECT MAX(scraped_at) FROM pgz_sport.klub_roster WHERE klub_id = k.id), 'epoch'::timestamptz),
COALESCE((SELECT MAX(last_scraped_at) FROM pgz_sport.clanovi WHERE klub_id = k.id), 'epoch'::timestamptz)
) AS last_scrape
FROM pgz_sport.klubovi k
WHERE k.aktivan = true
)
SELECT klub_id, naziv, sport, last_scrape,
(last_scrape <= 'epoch'::timestamptz OR last_scrape < now() - interval %s) AS stale
FROM last_per_klub;
"""
def telegram(text: str) -> None:
try:
subprocess.run(
[
"curl", "-sS", "-X", "POST",
f"https://api.telegram.org/bot{TG_TOKEN}/sendMessage",
"-d", f"chat_id={TG_CHAT}",
"--data-urlencode", f"text={text}",
],
capture_output=True,
timeout=10,
check=False,
)
log(f"telegram sent ({len(text)} chars)")
except Exception as e:
log(f"telegram fail: {e}")
def main() -> int:
log(f"sport_harvest_health START stale_days={STALE_DAYS}")
try:
conn = psycopg2.connect(DSN)
except Exception as e:
log(f"DB connect FAIL: {e}")
telegram(f"🚨 sport_harvest_health: DB connect FAIL — {e}")
return 2
interval_str = f"{STALE_DAYS} days"
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(SQL, (interval_str,))
rows = cur.fetchall()
total = len(rows)
stale_rows = [r for r in rows if r["stale"]]
by_sport: dict = {}
for r in stale_rows:
s = (r["sport"] or "?").lower()
by_sport[s] = by_sport.get(s, 0) + 1
top_stale = sorted(
stale_rows,
key=lambda r: (r["last_scrape"] or datetime(1970, 1, 1, tzinfo=timezone.utc)),
)[:10]
log(f"klubova_total={total} stale={len(stale_rows)} by_sport={json.dumps(by_sport, ensure_ascii=False)}")
for r in top_stale:
log(f" STALE klub_id={r['klub_id']} sport={r['sport']} last={r['last_scrape']} naziv={r['naziv']}")
if stale_rows:
sport_summary = ", ".join(f"{k.upper()}:{v}" for k, v in sorted(by_sport.items()))
top_lines = "\n".join(
f" • {r['naziv']} ({(r['sport'] or '?')}) — {r['last_scrape']}"
for r in top_stale[:5]
)
msg = (
f"⚠️ Sport harvest stale: {len(stale_rows)}/{total} klubova "
f">{STALE_DAYS} dana ({sport_summary})\nTop:\n{top_lines}"
)
telegram(msg)
conn.close()
log("sport_harvest_health DONE")
return 1 if stale_rows else 0
if __name__ == "__main__":
sys.exit(main())