"""
enrich_router.py β Round-2/3B enrichment + forensic-scan endpoints
Author: dradulic@outlook.com Date: 2026-05-04 (R2), 2026-05-05 (R3B)
Surfaces "Obogati podatke" buttons for klubovi, savezi, sportasi, plus
the Forenzika "Pokreni novu analizu" scan endpoint that searches civic.*.
Strategy:
1) Read what's already in DB and surface fields the frontend may not have shown.
2) Build curated research URLs (Google, Wikipedia HR, Sportilus, sport-pgz.hr,
HNS Semafor) so the operator can verify or expand by hand.
3) If the entity has a `web` URL set, quickly fetch the page and extract
+ to return as a "live snippet". 5s timeout, fail-soft.
4) /forensic/scan β match name across civic.persons, return entity links,
forensic_findings hits, and a synthesised risk score.
5) /enrich/{kind}/{id}/apply β fetch best web source for entity and UPDATE the
row's web/email/telefon fields when missing.
"""
import os, re, json, time, urllib.parse, urllib.request, html
import psycopg2, psycopg2.extras
from fastapi import APIRouter, HTTPException, Body
router = APIRouter()
_pgh = os.environ.get('PG_HOST','10.10.0.2')
_pgp = int(os.environ.get('PG_PORT','6432'))
# pgz-sport.service inherits PG_HOST=localhost:5432 from /opt/.env.rinet which is wrong
# (local PG is disabled). Force the Server B DSN if env says localhost.
if _pgh in ('localhost', '127.0.0.1'):
_pgh = os.environ.get('DB_HOST','10.10.0.2')
_pgp = int(os.environ.get('DB_PORT','6432'))
DB = dict(host=_pgh, port=_pgp,
dbname=os.environ.get('PG_DB','rinet_v3'),
user=os.environ.get('PG_USER','rinet'),
password=os.environ.get('PG_PASS','R1net2026!SecureDB#v7'))
UA = 'pgz-sport-enrich/2.0'
def _db():
c = psycopg2.connect(**DB); c.autocommit = True; return c
def _fetch_one(sql, p):
with _db() as c, c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, p)
r = cur.fetchone()
return dict(r) if r else None
def _fetch_title(url, timeout=5):
if not url: return None
try:
if not url.startswith('http'):
return None
req = urllib.request.Request(url, headers={'User-Agent': UA})
with urllib.request.urlopen(req, timeout=timeout) as r:
data = r.read(40000).decode('utf-8','ignore')
title_m = re.search(r']*>([^<]+)', data, re.I)
desc_m = re.search(r'