""" enrich_router.py β€” Round-2/3B enrichment + forensic-scan endpoints Author: dradulic@outlook.com Date: 2026-05-04 (R2), 2026-05-05 (R3B) Surfaces "Obogati podatke" buttons for klubovi, savezi, sportasi, plus the Forenzika "Pokreni novu analizu" scan endpoint that searches civic.*. Strategy: 1) Read what's already in DB and surface fields the frontend may not have shown. 2) Build curated research URLs (Google, Wikipedia HR, Sportilus, sport-pgz.hr, HNS Semafor) so the operator can verify or expand by hand. 3) If the entity has a `web` URL set, quickly fetch the page and extract + <meta description> to return as a "live snippet". 5s timeout, fail-soft. 4) /forensic/scan β€” match name across civic.persons, return entity links, forensic_findings hits, and a synthesised risk score. 5) /enrich/{kind}/{id}/apply β€” fetch best web source for entity and UPDATE the row's web/email/telefon fields when missing. """ import os, re, json, time, urllib.parse, urllib.request, html import psycopg2, psycopg2.extras from fastapi import APIRouter, HTTPException, Body router = APIRouter() _pgh = os.environ.get('PG_HOST','10.10.0.2') _pgp = int(os.environ.get('PG_PORT','6432')) # pgz-sport.service inherits PG_HOST=localhost:5432 from /opt/.env.rinet which is wrong # (local PG is disabled). Force the Server B DSN if env says localhost. if _pgh in ('localhost', '127.0.0.1'): _pgh = os.environ.get('DB_HOST','10.10.0.2') _pgp = int(os.environ.get('DB_PORT','6432')) DB = dict(host=_pgh, port=_pgp, dbname=os.environ.get('PG_DB','rinet_v3'), user=os.environ.get('PG_USER','rinet'), password=os.environ.get('PG_PASS','R1net2026!SecureDB#v7')) UA = 'pgz-sport-enrich/2.0' def _db(): c = psycopg2.connect(**DB); c.autocommit = True; return c def _fetch_one(sql, p): with _db() as c, c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute(sql, p) r = cur.fetchone() return dict(r) if r else None def _fetch_title(url, timeout=5): if not url: return None try: if not url.startswith('http'): return None req = urllib.request.Request(url, headers={'User-Agent': UA}) with urllib.request.urlopen(req, timeout=timeout) as r: data = r.read(40000).decode('utf-8','ignore') title_m = re.search(r'<title[^>]*>([^<]+)', data, re.I) desc_m = re.search(r'