124 lines
4.0 KiB
Python
Executable File
124 lines
4.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
from dotenv import load_dotenv
|
||
load_dotenv('/opt/rinet-gpu/.env.master')
|
||
# auto-added by patch_scrapers_with_dotenv.sh
|
||
# sport_harvest_health.py — staleness check za pgz_sport klubove
|
||
# v1.0 — dradulic@outlook.com / damir@rinet.one — 2026-05-05
|
||
# Description: Provjerava kad je svaki aktivan klub zadnji put scrape-an
|
||
# (klub_roster.scraped_at ∪ clanovi.last_scraped_at). Klubovi >7 dana
|
||
# flag-irani su za re-scrape; Telegram alert se šalje ako ima staleova.
|
||
# Pokreće ga /etc/cron.d/sport-harvesters u 04:30 svaki 2. dan.
|
||
|
||
import os
|
||
import sys
|
||
import json
|
||
import subprocess
|
||
from datetime import datetime, timedelta, timezone
|
||
import psycopg2
|
||
from psycopg2.extras import RealDictCursor
|
||
|
||
DSN = os.getenv(
|
||
"RINET_DSN",
|
||
f"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password={os.environ['DB_PASSWORD']}",
|
||
)
|
||
TG_TOKEN = os.getenv("TG_TOKEN", "8535797835:AAFItT-92jzZ9NWFafLxn0dLa1_n2s-JE5Y")
|
||
TG_CHAT = os.getenv("TG_CHAT", "7969491558")
|
||
STALE_DAYS = int(os.getenv("SPORT_STALE_DAYS", "7"))
|
||
LOG_DIR = "/var/log/pgz-sport-debug"
|
||
|
||
LOG_PATH = os.path.join(LOG_DIR, f"health_{datetime.now().strftime('%Y%m%d_%H%M')}.log")
|
||
os.makedirs(LOG_DIR, exist_ok=True)
|
||
_logfh = open(LOG_PATH, "a")
|
||
|
||
|
||
def log(msg: str) -> None:
|
||
line = f"[{datetime.now().isoformat(timespec='seconds')}] {msg}"
|
||
print(line, flush=True)
|
||
_logfh.write(line + "\n")
|
||
_logfh.flush()
|
||
|
||
|
||
SQL = """
|
||
WITH last_per_klub AS (
|
||
SELECT k.id AS klub_id, k.naziv, k.sport,
|
||
GREATEST(
|
||
COALESCE((SELECT MAX(scraped_at) FROM pgz_sport.klub_roster WHERE klub_id = k.id), 'epoch'::timestamptz),
|
||
COALESCE((SELECT MAX(last_scraped_at) FROM pgz_sport.clanovi WHERE klub_id = k.id), 'epoch'::timestamptz)
|
||
) AS last_scrape
|
||
FROM pgz_sport.klubovi k
|
||
WHERE k.aktivan = true
|
||
)
|
||
SELECT klub_id, naziv, sport, last_scrape,
|
||
(last_scrape <= 'epoch'::timestamptz OR last_scrape < now() - interval %s) AS stale
|
||
FROM last_per_klub;
|
||
"""
|
||
|
||
|
||
def telegram(text: str) -> None:
|
||
try:
|
||
subprocess.run(
|
||
[
|
||
"curl", "-sS", "-X", "POST",
|
||
f"https://api.telegram.org/bot{TG_TOKEN}/sendMessage",
|
||
"-d", f"chat_id={TG_CHAT}",
|
||
"--data-urlencode", f"text={text}",
|
||
],
|
||
capture_output=True,
|
||
timeout=10,
|
||
check=False,
|
||
)
|
||
log(f"telegram sent ({len(text)} chars)")
|
||
except Exception as e:
|
||
log(f"telegram fail: {e}")
|
||
|
||
|
||
def main() -> int:
|
||
log(f"sport_harvest_health START stale_days={STALE_DAYS}")
|
||
try:
|
||
conn = psycopg2.connect(DSN)
|
||
except Exception as e:
|
||
log(f"DB connect FAIL: {e}")
|
||
telegram(f"🚨 sport_harvest_health: DB connect FAIL — {e}")
|
||
return 2
|
||
|
||
interval_str = f"{STALE_DAYS} days"
|
||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||
cur.execute(SQL, (interval_str,))
|
||
rows = cur.fetchall()
|
||
|
||
total = len(rows)
|
||
stale_rows = [r for r in rows if r["stale"]]
|
||
by_sport: dict = {}
|
||
for r in stale_rows:
|
||
s = (r["sport"] or "?").lower()
|
||
by_sport[s] = by_sport.get(s, 0) + 1
|
||
|
||
top_stale = sorted(
|
||
stale_rows,
|
||
key=lambda r: (r["last_scrape"] or datetime(1970, 1, 1, tzinfo=timezone.utc)),
|
||
)[:10]
|
||
|
||
log(f"klubova_total={total} stale={len(stale_rows)} by_sport={json.dumps(by_sport, ensure_ascii=False)}")
|
||
for r in top_stale:
|
||
log(f" STALE klub_id={r['klub_id']} sport={r['sport']} last={r['last_scrape']} naziv={r['naziv']}")
|
||
|
||
if stale_rows:
|
||
sport_summary = ", ".join(f"{k.upper()}:{v}" for k, v in sorted(by_sport.items()))
|
||
top_lines = "\n".join(
|
||
f" • {r['naziv']} ({(r['sport'] or '?')}) — {r['last_scrape']}"
|
||
for r in top_stale[:5]
|
||
)
|
||
msg = (
|
||
f"⚠️ Sport harvest stale: {len(stale_rows)}/{total} klubova "
|
||
f">{STALE_DAYS} dana ({sport_summary})\nTop:\n{top_lines}"
|
||
)
|
||
telegram(msg)
|
||
|
||
conn.close()
|
||
log("sport_harvest_health DONE")
|
||
return 1 if stale_rows else 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main())
|