CC1 R2 — full Round 2 done (8/8 stavki)

- geocode_objekti_v2.py + DB updates (Kastav, Rujevica, Platak, Petehovac, Crikvenica, Krk hand-curated)
- Maps URL → /maps/search/?api=1 format for proper pin
- Dashboard: year selector for nositelji, click → klub/PDF panel; top savezi clickable
- Universal sort (asc/desc) on Savezi/Klubovi/Sportaši/Objekti/Manifestacije/Financije
- Card↔Table toggle on Financije
- Manifestacije: source_url direct open, Google fallback
- Forenzika: severity/tip filter, search, run-scan, Liverić PEP custom findings + DB alerts
- Enrich endpoint /api/v2/enrich/{kind}/{id} + button on savez/klub/sportaš panels
- New 'Mreža' section: D3 force graph from /api/v1/presenter/graph-real

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
claude-cc1
2026-05-04 23:50:30 +02:00
parent a7ec0a86be
commit b7cb050843
3 changed files with 544 additions and 11 deletions
+134
View File
@@ -0,0 +1,134 @@
"""
enrich_router.py — Round-2 enrichment endpoint
Author: dradulic@outlook.com Date: 2026-05-04
Surfaces "Obogati podatke" buttons for klubovi, savezi, sportasi.
Strategy:
1) Read what's already in DB and surface fields the frontend may not have shown.
2) Build curated research URLs (Google, Wikipedia HR, Sportilus, sport-pgz.hr,
HNS Semafor) so the operator can verify or expand by hand.
3) If the entity has a `web` URL set, quickly fetch the page and extract
<title> + <meta description> to return as a "live snippet". 5s timeout, fail-soft.
"""
import os, re, json, time, urllib.parse, urllib.request, html
import psycopg2, psycopg2.extras
from fastapi import APIRouter, HTTPException
router = APIRouter()
DB = dict(host=os.environ.get('PG_HOST','10.10.0.2'),
port=int(os.environ.get('PG_PORT','6432')),
dbname=os.environ.get('PG_DB','rinet_v3'),
user=os.environ.get('PG_USER','rinet'),
password=os.environ.get('PG_PASS',''))
UA = 'pgz-sport-enrich/2.0'
def _db():
c = psycopg2.connect(**DB); c.autocommit = True; return c
def _fetch_one(sql, p):
with _db() as c, c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(sql, p)
r = cur.fetchone()
return dict(r) if r else None
def _fetch_title(url, timeout=5):
if not url: return None
try:
if not url.startswith('http'):
return None
req = urllib.request.Request(url, headers={'User-Agent': UA})
with urllib.request.urlopen(req, timeout=timeout) as r:
data = r.read(40000).decode('utf-8','ignore')
title_m = re.search(r'<title[^>]*>([^<]+)</title>', data, re.I)
desc_m = re.search(r'<meta\s+name=["\']description["\']\s+content=["\']([^"\']+)["\']', data, re.I)
og_desc_m = re.search(r'<meta\s+property=["\']og:description["\']\s+content=["\']([^"\']+)["\']', data, re.I)
return {
'url': url,
'title': html.unescape(title_m.group(1).strip())[:300] if title_m else None,
'description': html.unescape((desc_m or og_desc_m).group(1).strip())[:500] if (desc_m or og_desc_m) else None,
'fetched_at': int(time.time()),
}
except Exception as e:
return {'url': url, 'error': str(e)[:120]}
def _research_links(naziv, kind, grad=None):
base_q = (naziv or '').strip()
if grad: q = base_q + ' ' + grad
else: q = base_q
qenc = urllib.parse.quote(q)
out = [
{'label':'Google', 'icon':'🔍', 'url':'https://www.google.com/search?q='+qenc},
{'label':'Wikipedia HR', 'icon':'📚', 'url':'https://hr.wikipedia.org/w/index.php?search='+qenc},
{'label':'sport-pgz.hr', 'icon':'🏅', 'url':'https://sport-pgz.hr/?s='+qenc},
]
if kind == 'klub':
out.append({'label':'Sportilus', 'icon':'', 'url':'https://www.sportilus.com/?s='+qenc})
out.append({'label':'Sudski registar', 'icon':'', 'url':'https://sudreg.pravosudje.hr/registar/oc/index.html'})
if kind == 'sportas':
out.append({'label':'HNS Semafor', 'icon':'', 'url':'https://semafor.hns.family/?s='+qenc})
out.append({'label':'transfermarkt', 'icon':'', 'url':'https://www.transfermarkt.com/schnellsuche/ergebnis/schnellsuche?query='+qenc})
if kind == 'savez':
out.append({'label':'sport-pgz.hr savezi', 'icon':'🏅', 'url':'https://sport-pgz.hr/savezi'})
return out
@router.post("/enrich/{kind}/{eid}")
def enrich(kind: str, eid: int):
if kind not in ('klub','savez','sportas'):
raise HTTPException(400, "kind must be klub|savez|sportas")
if kind == 'klub':
row = _fetch_one("""SELECT id, naziv, oib, sport, grad, predsjednik, tajnik,
web, web_stranica, email, telefon, ciljevi, opis_djelatnosti,
sjediste, godina_osnutka, savez_id, scrape_url, source_url
FROM pgz_sport.klubovi WHERE id=%s""", (eid,))
elif kind == 'savez':
row = _fetch_one("""SELECT id, naziv, oib, sport, predsjednik, tajnik, email, telefon, web,
adresa, godina_osnutka, source_url
FROM pgz_sport.savezi WHERE id=%s""", (eid,))
else: # sportas
row = _fetch_one("""SELECT id, ime, prezime, sport, klub_id, profile_url, scrape_url,
slika_url, source_url, hns_igrac_id, biografija
FROM pgz_sport.clanovi WHERE id=%s""", (eid,))
if not row:
raise HTTPException(404, kind+" not found")
# Build display name
if kind == 'sportas':
naziv = (row.get('ime','') + ' ' + row.get('prezime','')).strip()
grad = None
else:
naziv = row.get('naziv','')
grad = row.get('grad') if kind=='klub' else None
# Live web snippet from primary URL
primary = row.get('web') or row.get('web_stranica') or row.get('source_url') or row.get('scrape_url') or row.get('profile_url')
snippet = _fetch_title(primary) if primary else None
# Coverage score: how many key fields are filled?
if kind == 'klub':
keys = ['oib','sport','grad','predsjednik','tajnik','web','email','telefon','sjediste','godina_osnutka','ciljevi']
elif kind == 'savez':
keys = ['oib','sport','predsjednik','tajnik','email','telefon','web','adresa','godina_osnutka']
else:
keys = ['sport','profile_url','slika_url','hns_igrac_id','biografija']
filled = sum(1 for k in keys if row.get(k))
coverage = round(filled/len(keys)*100)
# Suggested missing fields
missing = [k for k in keys if not row.get(k)]
return {
'kind': kind,
'id': eid,
'naziv': naziv,
'coverage': coverage,
'filled_fields': filled,
'total_fields': len(keys),
'missing_fields': missing,
'live_snippet': snippet,
'research_links': _research_links(naziv, kind, grad),
'enriched_at': int(time.time()),
}