#!/usr/bin/env python3 # ═══════════════════════════════════════════════════════════════════ # Fajl: sub1_hns_manual_overrides.py | v1.0.0 | 05.05.2026 # Lokacija: /opt/pgz-sport/scripts/sub1_hns_manual_overrides.py # Autor: dradulic@outlook.com / damir@rinet.one # Svrha: SUB1 — Manual high-confidence overrides za klubove koje # fuzzy match nije uhvatio (ali postoje u HNS-u). # ═══════════════════════════════════════════════════════════════════ """SUB1 manual overrides — verified mapping for special cases.""" import os, re, sys, time, urllib.request from datetime import datetime import psycopg2 DSN = os.getenv("RINET_DSN", f"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password={os.environ['DB_PASSWORD']}") UA = "PGZ-Sport-Bot/1.0 (+https://api.rinet.one/sport/; contact dradulic@outlook.com)" # Manual mappings — verified by visiting semafor.hns.family # Format: db_id -> (hns_id, slug, naziv-na-HNS, reason) OVERRIDES = { 9: (3440, "znk-rijeka", "ŽNK Rijeka", "Ženski NK Rijeka — same modern club"), 101: (3440, "znk-rijeka", "ŽNK Rijeka", "Ženski NK Rijeka 'Jack Pot' — sponsor naming, same club"), 574: (5239, "nk-medicinar", "NK Medicinar", "NK Medicinar Rijeka (osnovan 1996, SRC Belveder)"), } def http_check(url, timeout=10): try: req = urllib.request.Request(url, headers={"User-Agent": UA}) with urllib.request.urlopen(req, timeout=timeout) as r: html = r.read().decode("utf-8", errors="replace") m = re.search(r']*>([^<]+)', html) return r.status, (m.group(1).strip() if m else None) except Exception as e: return 0, str(e) def main(): conn = psycopg2.connect(DSN); conn.autocommit = True cur = conn.cursor() print(f"[{datetime.now().isoformat(timespec='seconds')}] Manual overrides start") ok = 0; fail = 0 for kid, (hns_id, slug, naziv, reason) in OVERRIDES.items(): url = f"https://semafor.hns.family/klubovi/{hns_id}/{slug}/" status, title = http_check(url) time.sleep(0.8) if status != 200: print(f" VERIFY FAIL [{kid}] {hns_id}: {status} {title}") fail += 1 continue try: cur.execute(""" UPDATE pgz_sport.klubovi SET hns_klub_id = %s, hns_slug = %s, source_url = %s, scrape_source = 'hns_semafor_manual', last_scraped_at = now() WHERE id = %s """, (hns_id, slug, url, kid)) print(f" OVERRIDE [{kid}] -> HNS {hns_id} '{title}' ({reason})") ok += 1 except Exception as e: print(f" UPDATE fail [{kid}]: {e}") fail += 1 print(f"Done: ok={ok}, fail={fail}") if __name__ == "__main__": main()