#!/usr/bin/env python3
from dotenv import load_dotenv
load_dotenv('/opt/rinet-gpu/.env.master')
# auto-added by patch_scrapers_with_dotenv.sh
"""results_multilevel.py — daily scrape of league match results across HR sport
federations (HNS / HKS / HRS).

This file is the **scaffold**: a real production-grade per-match scraper for
each federation is per-federation HTML/embed/API work. The structure here
makes adding each federation a single function — and lays down logging,
dedup-by-external-id, klub-name → klub_id resolution, and CLI flags.

Scope today (2026-05-09):
  - HNS  (nogomet): TODO — likely source https://hns-cff.hr/ + per-liga pages
  - HKS  (košarka): partial — could borrow Genius Sports embed used by
    /opt/pgz-sport/scrapers/hks_scraper.py for standings; that scraper does
    *standings*, not per-match results
  - HRS  (rukomet): TODO — likely source https://www.hrs.hr/

Exit 0 on partial coverage; per-federation failure logs to stderr.
"""
from __future__ import annotations
import argparse, json, logging, os, sys, time
from datetime import datetime, timezone, date
from typing import Optional

import psycopg2
import psycopg2.extras

logging.basicConfig(level=logging.INFO, format="%(asctime)s [results_ml] %(message)s")
log = logging.getLogger("results_ml")

DSN = os.environ.get(
    "RINET_DSN",
    f"host=127.0.0.1 port=6432 dbname=rinet_v3 user=rinet password={os.environ['DB_PASSWORD']}",
)
UA = "RiNET-Civic/1.0 (https://rinet.one) results scraper"
RATE_LIMIT_SEC = 1.2  # polite between requests within one federation


# ─── DB helpers ────────────────────────────────────────────────────────
def db_conn():
    return psycopg2.connect(DSN)


def upsert_match(cur, row: dict) -> str:
    """Insert if (federation, external_id) is new; else update score+status.

    Returns 'inserted' | 'updated' | 'skipped' | 'error'.
    """
    if not row.get("federation") or not row.get("external_id"):
        return "skipped"
    try:
        cur.execute(
            """
            INSERT INTO pgz_sport.results
              (sport, federation, liga, sezona, match_date, kolo,
               home_team, away_team, home_klub_id, away_klub_id,
               home_score, away_score, status, source_url, external_id, raw_payload)
            VALUES (%(sport)s, %(federation)s, %(liga)s, %(sezona)s,
                    %(match_date)s, %(kolo)s,
                    %(home_team)s, %(away_team)s, %(home_klub_id)s, %(away_klub_id)s,
                    %(home_score)s, %(away_score)s,
                    COALESCE(%(status)s,'final'),
                    %(source_url)s, %(external_id)s,
                    %(raw_payload)s::jsonb)
            ON CONFLICT (federation, external_id) DO UPDATE SET
              home_score = EXCLUDED.home_score,
              away_score = EXCLUDED.away_score,
              status     = EXCLUDED.status,
              raw_payload= EXCLUDED.raw_payload,
              scraped_at = now()
            RETURNING (xmax = 0) AS inserted
            """,
            {**row,
             "raw_payload": json.dumps(row.get("raw_payload") or {}, ensure_ascii=False)},
        )
        was_inserted = cur.fetchone()["inserted"]
        return "inserted" if was_inserted else "updated"
    except Exception as e:
        log.warning(f"upsert_match err for {row.get('federation')}/{row.get('external_id')}: {e}")
        return "error"


def resolve_klub(cur, name: str, sport: str) -> Optional[int]:
    """Best-effort name → klubovi.id. Falls back to None."""
    if not name:
        return None
    cur.execute(
        """SELECT id FROM pgz_sport.klubovi
           WHERE sport ILIKE %s AND naziv ILIKE %s
           ORDER BY length(naziv) ASC LIMIT 1""",
        (sport, f"%{name.strip()[:40]}%"),
    )
    r = cur.fetchone()
    return r["id"] if r else None


# ─── Per-federation scrapers ────────────────────────────────────────────
def scrape_hns(cur, sezona: str = "2025/26") -> dict:
    """HNS (nogomet). TODO: walk per-liga pages on hns-cff.hr.

    Plausible sources (verified in earlier ingests):
      * https://hns-cff.hr/natjecanja/  (lige listing)
      * https://semafor.hns.family/    (per-match drill-down — this is what
        hns_master_harvester.py uses for player seasons)

    Per-match parsing for SEZONA-by-SEZONA scrape is non-trivial because
    each liga page is a different layout. Implementation deferred.
    """
    log.warning("scrape_hns: NOT YET IMPLEMENTED — stub returns 0 rows")
    return {"inserted": 0, "updated": 0, "errors": 0, "status": "stub"}


def scrape_hks(cur, sezona: str = "2025/26") -> dict:
    """HKS (košarka). Genius Sports embed exposes JSON for fixtures.

    Existing /opt/pgz-sport/scrapers/hks_scraper.py reads the *standings*
    embed at:
      https://hosted.dcd.shared.geniussports.com/embednf/HKS/en/competition/{ID}/standings
    The fixtures/results embed has a different path
    (.../competition/{ID}/fixtures or .../matches). Per-match parsing
    deferred.
    """
    log.warning("scrape_hks: NOT YET IMPLEMENTED — stub returns 0 rows")
    return {"inserted": 0, "updated": 0, "errors": 0, "status": "stub"}


def scrape_hrs(cur, sezona: str = "2025/26") -> dict:
    """HRS (rukomet). No existing scraper — source TBD.

    Likely candidates:
      * https://www.hrs.hr/  (federation site)
      * Per-liga pages once located
    """
    log.warning("scrape_hrs: NOT YET IMPLEMENTED — stub returns 0 rows")
    return {"inserted": 0, "updated": 0, "errors": 0, "status": "stub"}


# ─── orchestration ─────────────────────────────────────────────────────
FEDS = {
    "hns": ("nogomet", scrape_hns),
    "hks": ("kosarka", scrape_hks),
    "hrs": ("rukomet", scrape_hrs),
}


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--feds", default="hns,hks,hrs",
                    help="comma list of federation keys to run")
    ap.add_argument("--sezona", default="2025/26")
    ap.add_argument("--dry-run", action="store_true",
                    help="run scrapers but do not COMMIT")
    args = ap.parse_args()

    started = datetime.now(timezone.utc)
    summary = {"started": started.isoformat(), "sezona": args.sezona, "feds": {}}

    conn = db_conn()
    conn.autocommit = False
    cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)

    try:
        for fed in [f.strip().lower() for f in args.feds.split(",") if f.strip()]:
            if fed not in FEDS:
                log.warning(f"unknown federation: {fed}")
                continue
            sport, fn = FEDS[fed]
            log.info(f"scraping {fed} ({sport})")
            t0 = time.time()
            try:
                res = fn(cur, sezona=args.sezona)
            except Exception as e:
                res = {"inserted": 0, "updated": 0, "errors": 1,
                       "status": f"crash:{type(e).__name__}: {str(e)[:200]}"}
            res["elapsed_s"] = round(time.time() - t0, 1)
            summary["feds"][fed] = res

        if args.dry_run:
            conn.rollback()
            log.info("dry-run: rolled back")
        else:
            conn.commit()
    finally:
        conn.close()

    summary["ended"] = datetime.now(timezone.utc).isoformat()
    print(json.dumps(summary, ensure_ascii=False, indent=2))
    return 0


if __name__ == "__main__":
    sys.exit(main())