#!/usr/bin/env python3 # sport_harvest_health.py — staleness check za pgz_sport klubove # v1.0 — dradulic@outlook.com / damir@rinet.one — 2026-05-05 # Description: Provjerava kad je svaki aktivan klub zadnji put scrape-an # (klub_roster.scraped_at ∪ clanovi.last_scraped_at). Klubovi >7 dana # flag-irani su za re-scrape; Telegram alert se šalje ako ima staleova. # Pokreće ga /etc/cron.d/sport-harvesters u 04:30 svaki 2. dan. import os import sys import json import subprocess from datetime import datetime, timedelta, timezone import psycopg2 from psycopg2.extras import RealDictCursor DSN = os.getenv( "RINET_DSN", "host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7", ) TG_TOKEN = os.getenv("TG_TOKEN", "8535797835:AAFItT-92jzZ9NWFafLxn0dLa1_n2s-JE5Y") TG_CHAT = os.getenv("TG_CHAT", "7969491558") STALE_DAYS = int(os.getenv("SPORT_STALE_DAYS", "7")) LOG_DIR = "/var/log/pgz-sport-debug" LOG_PATH = os.path.join(LOG_DIR, f"health_{datetime.now().strftime('%Y%m%d_%H%M')}.log") os.makedirs(LOG_DIR, exist_ok=True) _logfh = open(LOG_PATH, "a") def log(msg: str) -> None: line = f"[{datetime.now().isoformat(timespec='seconds')}] {msg}" print(line, flush=True) _logfh.write(line + "\n") _logfh.flush() SQL = """ WITH last_per_klub AS ( SELECT k.id AS klub_id, k.naziv, k.sport, GREATEST( COALESCE((SELECT MAX(scraped_at) FROM pgz_sport.klub_roster WHERE klub_id = k.id), 'epoch'::timestamptz), COALESCE((SELECT MAX(last_scraped_at) FROM pgz_sport.clanovi WHERE klub_id = k.id), 'epoch'::timestamptz) ) AS last_scrape FROM pgz_sport.klubovi k WHERE k.aktivan = true ) SELECT klub_id, naziv, sport, last_scrape, (last_scrape <= 'epoch'::timestamptz OR last_scrape < now() - interval %s) AS stale FROM last_per_klub; """ def telegram(text: str) -> None: try: subprocess.run( [ "curl", "-sS", "-X", "POST", f"https://api.telegram.org/bot{TG_TOKEN}/sendMessage", "-d", f"chat_id={TG_CHAT}", "--data-urlencode", f"text={text}", ], capture_output=True, timeout=10, check=False, ) log(f"telegram sent ({len(text)} chars)") except Exception as e: log(f"telegram fail: {e}") def main() -> int: log(f"sport_harvest_health START stale_days={STALE_DAYS}") try: conn = psycopg2.connect(DSN) except Exception as e: log(f"DB connect FAIL: {e}") telegram(f"🚨 sport_harvest_health: DB connect FAIL — {e}") return 2 interval_str = f"{STALE_DAYS} days" with conn.cursor(cursor_factory=RealDictCursor) as cur: cur.execute(SQL, (interval_str,)) rows = cur.fetchall() total = len(rows) stale_rows = [r for r in rows if r["stale"]] by_sport: dict = {} for r in stale_rows: s = (r["sport"] or "?").lower() by_sport[s] = by_sport.get(s, 0) + 1 top_stale = sorted( stale_rows, key=lambda r: (r["last_scrape"] or datetime(1970, 1, 1, tzinfo=timezone.utc)), )[:10] log(f"klubova_total={total} stale={len(stale_rows)} by_sport={json.dumps(by_sport, ensure_ascii=False)}") for r in top_stale: log(f" STALE klub_id={r['klub_id']} sport={r['sport']} last={r['last_scrape']} naziv={r['naziv']}") if stale_rows: sport_summary = ", ".join(f"{k.upper()}:{v}" for k, v in sorted(by_sport.items())) top_lines = "\n".join( f" • {r['naziv']} ({(r['sport'] or '?')}) — {r['last_scrape']}" for r in top_stale[:5] ) msg = ( f"⚠️ Sport harvest stale: {len(stale_rows)}/{total} klubova " f">{STALE_DAYS} dana ({sport_summary})\nTop:\n{top_lines}" ) telegram(msg) conn.close() log("sport_harvest_health DONE") return 1 if stale_rows else 0 if __name__ == "__main__": sys.exit(main())