"""
enrich_router.py — Round-2 enrichment endpoint
Author: dradulic@outlook.com Date: 2026-05-04
Surfaces "Obogati podatke" buttons for klubovi, savezi, sportasi.
Strategy:
1) Read what's already in DB and surface fields the frontend may not have shown.
2) Build curated research URLs (Google, Wikipedia HR, Sportilus, sport-pgz.hr,
HNS Semafor) so the operator can verify or expand by hand.
3) If the entity has a `web` URL set, quickly fetch the page and extract
+ to return as a "live snippet". 5s timeout, fail-soft.
"""
import os, re, json, time, urllib.parse, urllib.request, html
import psycopg2, psycopg2.extras
from fastapi import APIRouter, HTTPException
router = APIRouter()
DB = dict(host=os.environ.get('PG_HOST','10.10.0.2'),
port=int(os.environ.get('PG_PORT','6432')),
dbname=os.environ.get('PG_DB','rinet_v3'),
user=os.environ.get('PG_USER','rinet'),
password=os.environ.get('PG_PASS',''))
UA = 'pgz-sport-enrich/2.0'
def _db():
c = psycopg2.connect(**DB); c.autocommit = True; return c
def _fetch_one(sql, p):
with _db() as c, c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, p)
r = cur.fetchone()
return dict(r) if r else None
def _fetch_title(url, timeout=5):
if not url: return None
try:
if not url.startswith('http'):
return None
req = urllib.request.Request(url, headers={'User-Agent': UA})
with urllib.request.urlopen(req, timeout=timeout) as r:
data = r.read(40000).decode('utf-8','ignore')
title_m = re.search(r']*>([^<]+)', data, re.I)
desc_m = re.search(r'