HNS+UI: 4 nova endpointa + multi-sport schema (M2M kategorije + player_stats)

Endpoints:
- GET /api/v2/enrich-sources — sport→source mapping
- GET /api/v2/klubovi/priority-sort — financirani/godišnjak prvi
- GET /api/v2/clan/{id}/kategorije — many-to-many kategorije
- GET /api/v2/clan/{id}/full — kompletna slika (profil+kategorije+sezone+utakmice+stats)
- POST /api/v2/export/klubovi — XLSX export selektiranih

Schema:
- pgz_sport.clan_kategorije (M2M: igrač u juniorskoj+seniorskoj)
- pgz_sport.player_stats (multi-sport: nogomet/košarka/rukomet/odbojka/vaterpolo)
- pgz_sport.klub_roster (multi-source)
- pgz_sport.enrichment_sources (sport→izvor)
- View: v_pgz_priority_klubovi (financiran || u_godisnjaku)
- View: v_klubovi_priority_sort (priority sort)

Sport harvesters scaffold:
- scripts/sport_harvesters/__base.py (SportHarvester class)
- hks_basketball.py, hrs_handball.py, hos_volleyball.py, hvs_waterpolo.py
This commit is contained in:
2026-05-05 10:42:49 +02:00
parent c68fd4471e
commit 9fb512932a
10 changed files with 4765 additions and 0 deletions
+120
View File
@@ -0,0 +1,120 @@
#!/usr/bin/env python3
# sport_harvest_health.py — staleness check za pgz_sport klubove
# v1.0 — dradulic@outlook.com / damir@rinet.one — 2026-05-05
# Description: Provjerava kad je svaki aktivan klub zadnji put scrape-an
# (klub_roster.scraped_at clanovi.last_scraped_at). Klubovi >7 dana
# flag-irani su za re-scrape; Telegram alert se šalje ako ima staleova.
# Pokreće ga /etc/cron.d/sport-harvesters u 04:30 svaki 2. dan.
import os
import sys
import json
import subprocess
from datetime import datetime, timedelta, timezone
import psycopg2
from psycopg2.extras import RealDictCursor
DSN = os.getenv(
"RINET_DSN",
"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7",
)
TG_TOKEN = os.getenv("TG_TOKEN", "8535797835:AAFItT-92jzZ9NWFafLxn0dLa1_n2s-JE5Y")
TG_CHAT = os.getenv("TG_CHAT", "7969491558")
STALE_DAYS = int(os.getenv("SPORT_STALE_DAYS", "7"))
LOG_DIR = "/var/log/pgz-sport-debug"
LOG_PATH = os.path.join(LOG_DIR, f"health_{datetime.now().strftime('%Y%m%d_%H%M')}.log")
os.makedirs(LOG_DIR, exist_ok=True)
_logfh = open(LOG_PATH, "a")
def log(msg: str) -> None:
line = f"[{datetime.now().isoformat(timespec='seconds')}] {msg}"
print(line, flush=True)
_logfh.write(line + "\n")
_logfh.flush()
SQL = """
WITH last_per_klub AS (
SELECT k.id AS klub_id, k.naziv, k.sport,
GREATEST(
COALESCE((SELECT MAX(scraped_at) FROM pgz_sport.klub_roster WHERE klub_id = k.id), 'epoch'::timestamptz),
COALESCE((SELECT MAX(last_scraped_at) FROM pgz_sport.clanovi WHERE klub_id = k.id), 'epoch'::timestamptz)
) AS last_scrape
FROM pgz_sport.klubovi k
WHERE k.aktivan = true
)
SELECT klub_id, naziv, sport, last_scrape,
(last_scrape <= 'epoch'::timestamptz OR last_scrape < now() - interval %s) AS stale
FROM last_per_klub;
"""
def telegram(text: str) -> None:
try:
subprocess.run(
[
"curl", "-sS", "-X", "POST",
f"https://api.telegram.org/bot{TG_TOKEN}/sendMessage",
"-d", f"chat_id={TG_CHAT}",
"--data-urlencode", f"text={text}",
],
capture_output=True,
timeout=10,
check=False,
)
log(f"telegram sent ({len(text)} chars)")
except Exception as e:
log(f"telegram fail: {e}")
def main() -> int:
log(f"sport_harvest_health START stale_days={STALE_DAYS}")
try:
conn = psycopg2.connect(DSN)
except Exception as e:
log(f"DB connect FAIL: {e}")
telegram(f"🚨 sport_harvest_health: DB connect FAIL — {e}")
return 2
interval_str = f"{STALE_DAYS} days"
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(SQL, (interval_str,))
rows = cur.fetchall()
total = len(rows)
stale_rows = [r for r in rows if r["stale"]]
by_sport: dict = {}
for r in stale_rows:
s = (r["sport"] or "?").lower()
by_sport[s] = by_sport.get(s, 0) + 1
top_stale = sorted(
stale_rows,
key=lambda r: (r["last_scrape"] or datetime(1970, 1, 1, tzinfo=timezone.utc)),
)[:10]
log(f"klubova_total={total} stale={len(stale_rows)} by_sport={json.dumps(by_sport, ensure_ascii=False)}")
for r in top_stale:
log(f" STALE klub_id={r['klub_id']} sport={r['sport']} last={r['last_scrape']} naziv={r['naziv']}")
if stale_rows:
sport_summary = ", ".join(f"{k.upper()}:{v}" for k, v in sorted(by_sport.items()))
top_lines = "\n".join(
f"{r['naziv']} ({(r['sport'] or '?')}) — {r['last_scrape']}"
for r in top_stale[:5]
)
msg = (
f"⚠️ Sport harvest stale: {len(stale_rows)}/{total} klubova "
f">{STALE_DAYS} dana ({sport_summary})\nTop:\n{top_lines}"
)
telegram(msg)
conn.close()
log("sport_harvest_health DONE")
return 1 if stale_rows else 0
if __name__ == "__main__":
sys.exit(main())