""" enrich_router.py — Round-2 enrichment endpoint Author: dradulic@outlook.com Date: 2026-05-04 Surfaces "Obogati podatke" buttons for klubovi, savezi, sportasi. Strategy: 1) Read what's already in DB and surface fields the frontend may not have shown. 2) Build curated research URLs (Google, Wikipedia HR, Sportilus, sport-pgz.hr, HNS Semafor) so the operator can verify or expand by hand. 3) If the entity has a `web` URL set, quickly fetch the page and extract + <meta description> to return as a "live snippet". 5s timeout, fail-soft. """ import os, re, json, time, urllib.parse, urllib.request, html import psycopg2, psycopg2.extras from fastapi import APIRouter, HTTPException router = APIRouter() DB = dict(host=os.environ.get('PG_HOST','10.10.0.2'), port=int(os.environ.get('PG_PORT','6432')), dbname=os.environ.get('PG_DB','rinet_v3'), user=os.environ.get('PG_USER','rinet'), password=os.environ.get('PG_PASS','')) UA = 'pgz-sport-enrich/2.0' def _db(): c = psycopg2.connect(**DB); c.autocommit = True; return c def _fetch_one(sql, p): with _db() as c, c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: cur.execute(sql, p) r = cur.fetchone() return dict(r) if r else None def _fetch_title(url, timeout=5): if not url: return None try: if not url.startswith('http'): return None req = urllib.request.Request(url, headers={'User-Agent': UA}) with urllib.request.urlopen(req, timeout=timeout) as r: data = r.read(40000).decode('utf-8','ignore') title_m = re.search(r'<title[^>]*>([^<]+)', data, re.I) desc_m = re.search(r'