68 lines
3.1 KiB
Python
68 lines
3.1 KiB
Python
#!/usr/bin/env python3
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
# Fajl: sub1_hns_manual_overrides.py | v1.0.0 | 05.05.2026
|
|
# Lokacija: /opt/pgz-sport/scripts/sub1_hns_manual_overrides.py
|
|
# Autor: dradulic@outlook.com / damir@rinet.one
|
|
# Svrha: SUB1 — Manual high-confidence overrides za klubove koje
|
|
# fuzzy match nije uhvatio (ali postoje u HNS-u).
|
|
# ═══════════════════════════════════════════════════════════════════
|
|
"""SUB1 manual overrides — verified mapping for special cases."""
|
|
import os, re, sys, time, urllib.request
|
|
from datetime import datetime
|
|
import psycopg2
|
|
|
|
DSN = os.getenv("RINET_DSN",
|
|
f"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password={os.environ['DB_PASSWORD']}")
|
|
UA = "PGZ-Sport-Bot/1.0 (+https://api.rinet.one/sport/; contact dradulic@outlook.com)"
|
|
|
|
# Manual mappings — verified by visiting semafor.hns.family
|
|
# Format: db_id -> (hns_id, slug, naziv-na-HNS, reason)
|
|
OVERRIDES = {
|
|
9: (3440, "znk-rijeka", "ŽNK Rijeka", "Ženski NK Rijeka — same modern club"),
|
|
101: (3440, "znk-rijeka", "ŽNK Rijeka", "Ženski NK Rijeka 'Jack Pot' — sponsor naming, same club"),
|
|
574: (5239, "nk-medicinar", "NK Medicinar", "NK Medicinar Rijeka (osnovan 1996, SRC Belveder)"),
|
|
}
|
|
|
|
def http_check(url, timeout=10):
|
|
try:
|
|
req = urllib.request.Request(url, headers={"User-Agent": UA})
|
|
with urllib.request.urlopen(req, timeout=timeout) as r:
|
|
html = r.read().decode("utf-8", errors="replace")
|
|
m = re.search(r'<h1[^>]*>([^<]+)</h1>', html)
|
|
return r.status, (m.group(1).strip() if m else None)
|
|
except Exception as e:
|
|
return 0, str(e)
|
|
|
|
def main():
|
|
conn = psycopg2.connect(DSN); conn.autocommit = True
|
|
cur = conn.cursor()
|
|
print(f"[{datetime.now().isoformat(timespec='seconds')}] Manual overrides start")
|
|
ok = 0; fail = 0
|
|
for kid, (hns_id, slug, naziv, reason) in OVERRIDES.items():
|
|
url = f"https://semafor.hns.family/klubovi/{hns_id}/{slug}/"
|
|
status, title = http_check(url)
|
|
time.sleep(0.8)
|
|
if status != 200:
|
|
print(f" VERIFY FAIL [{kid}] {hns_id}: {status} {title}")
|
|
fail += 1
|
|
continue
|
|
try:
|
|
cur.execute("""
|
|
UPDATE pgz_sport.klubovi
|
|
SET hns_klub_id = %s,
|
|
hns_slug = %s,
|
|
source_url = %s,
|
|
scrape_source = 'hns_semafor_manual',
|
|
last_scraped_at = now()
|
|
WHERE id = %s
|
|
""", (hns_id, slug, url, kid))
|
|
print(f" OVERRIDE [{kid}] -> HNS {hns_id} '{title}' ({reason})")
|
|
ok += 1
|
|
except Exception as e:
|
|
print(f" UPDATE fail [{kid}]: {e}")
|
|
fail += 1
|
|
print(f"Done: ok={ok}, fail={fail}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|