CC1 R2 — full Round 2 done (8/8 stavki)
- geocode_objekti_v2.py + DB updates (Kastav, Rujevica, Platak, Petehovac, Crikvenica, Krk hand-curated)
- Maps URL → /maps/search/?api=1 format for proper pin
- Dashboard: year selector for nositelji, click → klub/PDF panel; top savezi clickable
- Universal sort (asc/desc) on Savezi/Klubovi/Sportaši/Objekti/Manifestacije/Financije
- Card↔Table toggle on Financije
- Manifestacije: source_url direct open, Google fallback
- Forenzika: severity/tip filter, search, run-scan, Liverić PEP custom findings + DB alerts
- Enrich endpoint /api/v2/enrich/{kind}/{id} + button on savez/klub/sportaš panels
- New 'Mreža' section: D3 force graph from /api/v1/presenter/graph-real
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
enrich_router.py — Round-2 enrichment endpoint
|
||||
Author: dradulic@outlook.com Date: 2026-05-04
|
||||
|
||||
Surfaces "Obogati podatke" buttons for klubovi, savezi, sportasi.
|
||||
|
||||
Strategy:
|
||||
1) Read what's already in DB and surface fields the frontend may not have shown.
|
||||
2) Build curated research URLs (Google, Wikipedia HR, Sportilus, sport-pgz.hr,
|
||||
HNS Semafor) so the operator can verify or expand by hand.
|
||||
3) If the entity has a `web` URL set, quickly fetch the page and extract
|
||||
<title> + <meta description> to return as a "live snippet". 5s timeout, fail-soft.
|
||||
"""
|
||||
import os, re, json, time, urllib.parse, urllib.request, html
|
||||
import psycopg2, psycopg2.extras
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
DB = dict(host=os.environ.get('PG_HOST','10.10.0.2'),
|
||||
port=int(os.environ.get('PG_PORT','6432')),
|
||||
dbname=os.environ.get('PG_DB','rinet_v3'),
|
||||
user=os.environ.get('PG_USER','rinet'),
|
||||
password=os.environ.get('PG_PASS',''))
|
||||
|
||||
UA = 'pgz-sport-enrich/2.0'
|
||||
|
||||
def _db():
|
||||
c = psycopg2.connect(**DB); c.autocommit = True; return c
|
||||
|
||||
def _fetch_one(sql, p):
|
||||
with _db() as c, c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||
cur.execute(sql, p)
|
||||
r = cur.fetchone()
|
||||
return dict(r) if r else None
|
||||
|
||||
def _fetch_title(url, timeout=5):
|
||||
if not url: return None
|
||||
try:
|
||||
if not url.startswith('http'):
|
||||
return None
|
||||
req = urllib.request.Request(url, headers={'User-Agent': UA})
|
||||
with urllib.request.urlopen(req, timeout=timeout) as r:
|
||||
data = r.read(40000).decode('utf-8','ignore')
|
||||
title_m = re.search(r'<title[^>]*>([^<]+)</title>', data, re.I)
|
||||
desc_m = re.search(r'<meta\s+name=["\']description["\']\s+content=["\']([^"\']+)["\']', data, re.I)
|
||||
og_desc_m = re.search(r'<meta\s+property=["\']og:description["\']\s+content=["\']([^"\']+)["\']', data, re.I)
|
||||
return {
|
||||
'url': url,
|
||||
'title': html.unescape(title_m.group(1).strip())[:300] if title_m else None,
|
||||
'description': html.unescape((desc_m or og_desc_m).group(1).strip())[:500] if (desc_m or og_desc_m) else None,
|
||||
'fetched_at': int(time.time()),
|
||||
}
|
||||
except Exception as e:
|
||||
return {'url': url, 'error': str(e)[:120]}
|
||||
|
||||
def _research_links(naziv, kind, grad=None):
|
||||
base_q = (naziv or '').strip()
|
||||
if grad: q = base_q + ' ' + grad
|
||||
else: q = base_q
|
||||
qenc = urllib.parse.quote(q)
|
||||
out = [
|
||||
{'label':'Google', 'icon':'🔍', 'url':'https://www.google.com/search?q='+qenc},
|
||||
{'label':'Wikipedia HR', 'icon':'📚', 'url':'https://hr.wikipedia.org/w/index.php?search='+qenc},
|
||||
{'label':'sport-pgz.hr', 'icon':'🏅', 'url':'https://sport-pgz.hr/?s='+qenc},
|
||||
]
|
||||
if kind == 'klub':
|
||||
out.append({'label':'Sportilus', 'icon':'⬡', 'url':'https://www.sportilus.com/?s='+qenc})
|
||||
out.append({'label':'Sudski registar', 'icon':'⚖', 'url':'https://sudreg.pravosudje.hr/registar/oc/index.html'})
|
||||
if kind == 'sportas':
|
||||
out.append({'label':'HNS Semafor', 'icon':'⚽', 'url':'https://semafor.hns.family/?s='+qenc})
|
||||
out.append({'label':'transfermarkt', 'icon':'⚽', 'url':'https://www.transfermarkt.com/schnellsuche/ergebnis/schnellsuche?query='+qenc})
|
||||
if kind == 'savez':
|
||||
out.append({'label':'sport-pgz.hr savezi', 'icon':'🏅', 'url':'https://sport-pgz.hr/savezi'})
|
||||
return out
|
||||
|
||||
@router.post("/enrich/{kind}/{eid}")
|
||||
def enrich(kind: str, eid: int):
|
||||
if kind not in ('klub','savez','sportas'):
|
||||
raise HTTPException(400, "kind must be klub|savez|sportas")
|
||||
|
||||
if kind == 'klub':
|
||||
row = _fetch_one("""SELECT id, naziv, oib, sport, grad, predsjednik, tajnik,
|
||||
web, web_stranica, email, telefon, ciljevi, opis_djelatnosti,
|
||||
sjediste, godina_osnutka, savez_id, scrape_url, source_url
|
||||
FROM pgz_sport.klubovi WHERE id=%s""", (eid,))
|
||||
elif kind == 'savez':
|
||||
row = _fetch_one("""SELECT id, naziv, oib, sport, predsjednik, tajnik, email, telefon, web,
|
||||
adresa, godina_osnutka, source_url
|
||||
FROM pgz_sport.savezi WHERE id=%s""", (eid,))
|
||||
else: # sportas
|
||||
row = _fetch_one("""SELECT id, ime, prezime, sport, klub_id, profile_url, scrape_url,
|
||||
slika_url, source_url, hns_igrac_id, biografija
|
||||
FROM pgz_sport.clanovi WHERE id=%s""", (eid,))
|
||||
if not row:
|
||||
raise HTTPException(404, kind+" not found")
|
||||
|
||||
# Build display name
|
||||
if kind == 'sportas':
|
||||
naziv = (row.get('ime','') + ' ' + row.get('prezime','')).strip()
|
||||
grad = None
|
||||
else:
|
||||
naziv = row.get('naziv','')
|
||||
grad = row.get('grad') if kind=='klub' else None
|
||||
|
||||
# Live web snippet from primary URL
|
||||
primary = row.get('web') or row.get('web_stranica') or row.get('source_url') or row.get('scrape_url') or row.get('profile_url')
|
||||
snippet = _fetch_title(primary) if primary else None
|
||||
|
||||
# Coverage score: how many key fields are filled?
|
||||
if kind == 'klub':
|
||||
keys = ['oib','sport','grad','predsjednik','tajnik','web','email','telefon','sjediste','godina_osnutka','ciljevi']
|
||||
elif kind == 'savez':
|
||||
keys = ['oib','sport','predsjednik','tajnik','email','telefon','web','adresa','godina_osnutka']
|
||||
else:
|
||||
keys = ['sport','profile_url','slika_url','hns_igrac_id','biografija']
|
||||
filled = sum(1 for k in keys if row.get(k))
|
||||
coverage = round(filled/len(keys)*100)
|
||||
|
||||
# Suggested missing fields
|
||||
missing = [k for k in keys if not row.get(k)]
|
||||
|
||||
return {
|
||||
'kind': kind,
|
||||
'id': eid,
|
||||
'naziv': naziv,
|
||||
'coverage': coverage,
|
||||
'filled_fields': filled,
|
||||
'total_fields': len(keys),
|
||||
'missing_fields': missing,
|
||||
'live_snippet': snippet,
|
||||
'research_links': _research_links(naziv, kind, grad),
|
||||
'enriched_at': int(time.time()),
|
||||
}
|
||||
Reference in New Issue
Block a user