CC1 R2 — full Round 2 done (8/8 stavki)

- geocode_objekti_v2.py + DB updates (Kastav, Rujevica, Platak, Petehovac, Crikvenica, Krk hand-curated) - Maps URL → /maps/search/?api=1 format for proper pin - Dashboard: year selector for nositelji, click → klub/PDF panel; top savezi clickable - Universal sort (asc/desc) on Savezi/Klubovi/Sportaši/Objekti/Manifestacije/Financije - Card↔Table toggle on Financije - Manifestacije: source_url direct open, Google fallback - Forenzika: severity/tip filter, search, run-scan, Liverić PEP custom findings + DB alerts - Enrich endpoint /api/v2/enrich/{kind}/{id} + button on savez/klub/sportaš panels - New 'Mreža' section: D3 force graph from /api/v1/presenter/graph-real Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 23:50:30 +02:00
parent a7ec0a86be
commit b7cb050843
3 changed files with 544 additions and 11 deletions
@@ -0,0 +1,134 @@
+"""
+enrich_router.py — Round-2 enrichment endpoint
+Author: dradulic@outlook.com  Date: 2026-05-04
+
+Surfaces "Obogati podatke" buttons for klubovi, savezi, sportasi.
+
+Strategy:
+  1) Read what's already in DB and surface fields the frontend may not have shown.
+  2) Build curated research URLs (Google, Wikipedia HR, Sportilus, sport-pgz.hr,
+     HNS Semafor) so the operator can verify or expand by hand.
+  3) If the entity has a `web` URL set, quickly fetch the page and extract
+     <title> + <meta description> to return as a "live snippet". 5s timeout, fail-soft.
+"""
+import os, re, json, time, urllib.parse, urllib.request, html
+import psycopg2, psycopg2.extras
+from fastapi import APIRouter, HTTPException
+
+router = APIRouter()
+
+DB = dict(host=os.environ.get('PG_HOST','10.10.0.2'),
+          port=int(os.environ.get('PG_PORT','6432')),
+          dbname=os.environ.get('PG_DB','rinet_v3'),
+          user=os.environ.get('PG_USER','rinet'),
+          password=os.environ.get('PG_PASS',''))
+
+UA = 'pgz-sport-enrich/2.0'
+
+def _db():
+    c = psycopg2.connect(**DB); c.autocommit = True; return c
+
+def _fetch_one(sql, p):
+    with _db() as c, c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
+        cur.execute(sql, p)
+        r = cur.fetchone()
+        return dict(r) if r else None
+
+def _fetch_title(url, timeout=5):
+    if not url: return None
+    try:
+        if not url.startswith('http'):
+            return None
+        req = urllib.request.Request(url, headers={'User-Agent': UA})
+        with urllib.request.urlopen(req, timeout=timeout) as r:
+            data = r.read(40000).decode('utf-8','ignore')
+        title_m = re.search(r'<title[^>]*>([^<]+)</title>', data, re.I)
+        desc_m = re.search(r'<meta\s+name=["\']description["\']\s+content=["\']([^"\']+)["\']', data, re.I)
+        og_desc_m = re.search(r'<meta\s+property=["\']og:description["\']\s+content=["\']([^"\']+)["\']', data, re.I)
+        return {
+            'url': url,
+            'title': html.unescape(title_m.group(1).strip())[:300] if title_m else None,
+            'description': html.unescape((desc_m or og_desc_m).group(1).strip())[:500] if (desc_m or og_desc_m) else None,
+            'fetched_at': int(time.time()),
+        }
+    except Exception as e:
+        return {'url': url, 'error': str(e)[:120]}
+
+def _research_links(naziv, kind, grad=None):
+    base_q = (naziv or '').strip()
+    if grad: q = base_q + ' ' + grad
+    else: q = base_q
+    qenc = urllib.parse.quote(q)
+    out = [
+        {'label':'Google', 'icon':'🔍', 'url':'https://www.google.com/search?q='+qenc},
+        {'label':'Wikipedia HR', 'icon':'📚', 'url':'https://hr.wikipedia.org/w/index.php?search='+qenc},
+        {'label':'sport-pgz.hr', 'icon':'🏅', 'url':'https://sport-pgz.hr/?s='+qenc},
+    ]
+    if kind == 'klub':
+        out.append({'label':'Sportilus', 'icon':'⬡', 'url':'https://www.sportilus.com/?s='+qenc})
+        out.append({'label':'Sudski registar', 'icon':'⚖', 'url':'https://sudreg.pravosudje.hr/registar/oc/index.html'})
+    if kind == 'sportas':
+        out.append({'label':'HNS Semafor', 'icon':'⚽', 'url':'https://semafor.hns.family/?s='+qenc})
+        out.append({'label':'transfermarkt', 'icon':'⚽', 'url':'https://www.transfermarkt.com/schnellsuche/ergebnis/schnellsuche?query='+qenc})
+    if kind == 'savez':
+        out.append({'label':'sport-pgz.hr savezi', 'icon':'🏅', 'url':'https://sport-pgz.hr/savezi'})
+    return out
+
+@router.post("/enrich/{kind}/{eid}")
+def enrich(kind: str, eid: int):
+    if kind not in ('klub','savez','sportas'):
+        raise HTTPException(400, "kind must be klub|savez|sportas")
+
+    if kind == 'klub':
+        row = _fetch_one("""SELECT id, naziv, oib, sport, grad, predsjednik, tajnik,
+                                   web, web_stranica, email, telefon, ciljevi, opis_djelatnosti,
+                                   sjediste, godina_osnutka, savez_id, scrape_url, source_url
+                            FROM pgz_sport.klubovi WHERE id=%s""", (eid,))
+    elif kind == 'savez':
+        row = _fetch_one("""SELECT id, naziv, oib, sport, predsjednik, tajnik, email, telefon, web,
+                                   adresa, godina_osnutka, source_url
+                            FROM pgz_sport.savezi WHERE id=%s""", (eid,))
+    else:  # sportas
+        row = _fetch_one("""SELECT id, ime, prezime, sport, klub_id, profile_url, scrape_url,
+                                   slika_url, source_url, hns_igrac_id, biografija
+                            FROM pgz_sport.clanovi WHERE id=%s""", (eid,))
+    if not row:
+        raise HTTPException(404, kind+" not found")
+
+    # Build display name
+    if kind == 'sportas':
+        naziv = (row.get('ime','') + ' ' + row.get('prezime','')).strip()
+        grad = None
+    else:
+        naziv = row.get('naziv','')
+        grad = row.get('grad') if kind=='klub' else None
+
+    # Live web snippet from primary URL
+    primary = row.get('web') or row.get('web_stranica') or row.get('source_url') or row.get('scrape_url') or row.get('profile_url')
+    snippet = _fetch_title(primary) if primary else None
+
+    # Coverage score: how many key fields are filled?
+    if kind == 'klub':
+        keys = ['oib','sport','grad','predsjednik','tajnik','web','email','telefon','sjediste','godina_osnutka','ciljevi']
+    elif kind == 'savez':
+        keys = ['oib','sport','predsjednik','tajnik','email','telefon','web','adresa','godina_osnutka']
+    else:
+        keys = ['sport','profile_url','slika_url','hns_igrac_id','biografija']
+    filled = sum(1 for k in keys if row.get(k))
+    coverage = round(filled/len(keys)*100)
+
+    # Suggested missing fields
+    missing = [k for k in keys if not row.get(k)]
+
+    return {
+        'kind': kind,
+        'id': eid,
+        'naziv': naziv,
+        'coverage': coverage,
+        'filled_fields': filled,
+        'total_fields': len(keys),
+        'missing_fields': missing,
+        'live_snippet': snippet,
+        'research_links': _research_links(naziv, kind, grad),
+        'enriched_at': int(time.time()),
+    }