R7+: 5x P0 demo fixes — HNS direct link, avatar cache, logo home, klub→sportaši, smarter enrichment

1) HNS direct link u research_links: za sportaš s profile_url/source_url
   (npr. https://semafor.hns.family/igraci/X/...) generira [DIRECT] link na vrhu liste,
   umjesto generic Google search. _research_links sada prima row dict.

2) Avatar cache buster: applyMeToHeader dodaje ?t=Date.now() na sve avatar img tagove.
   Avatar upload handler dodatno persistira novi avatar_url u localStorage.pgz_user
   tako da preživi page refresh + cross-page navigacije.

3) Logo home link: <div class='logo'> → <a href='/' class='logo'> u app.html i sport2.html.
   Klik na PGŽ SPORT logo vodi na public portal.

4) Klub → Sportaši drill-down: u klub Info tabu dodan button
   '👥 Vidi sportaše ovog kluba (N)' koji prebacuje na k-clan tab.
   Plus '🌐 Službena stranica' link kad klub ima web.

5) Smarter klub enrichment:
   - URL validacija (skip placeholder strings poput 'godisnjak_zspgz_2025')
   - Domain candidate guesser (slug → 16 candidate URLs s common HR TLD-ovima i sport prefix-ima)
   - Parallel HEAD probe (8 threads, 10s budget) — first 200 + name token match wins
   - Subpage scrape (/kontakt, /uprava, /o-nama, /o-klubu, /predsjednik) za richer evidence
   - HNK Orijent (id 3766) test: pogađa https://www.orijent.hr/, predlaže web+email+telefon+opis

E2E verified:
- 9/9 sidebar URL-ova → 200
- /users/me/gdpr-export → 200 (28KB JSON)
- /users/me/request-deletion → 200 (DB row pgz_sport.gdpr_erasure_requests)
- /enrich/klub/3766 → 4 proposed fields (web, email, telefon, opis)
- HNS sportaš research_links:  HNS profil DIRECT link na vrhu

Backend: routers/enrich_router.py
Frontend: static/app.html, static/sport2.html
Backups: _backups/sprint_1777940670/

Tag: R7-demo-ready
This commit is contained in:
2026-05-05 02:24:30 +02:00
parent 67372d6c58
commit c38f15a566
6 changed files with 6715 additions and 8 deletions
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+137 -4
View File
@@ -381,11 +381,27 @@ def _sport_fed(sport: Optional[str]) -> Optional[dict]:
return feds.get(norm) return feds.get(norm)
def _research_links(naziv, kind, grad=None, sport: Optional[str] = None): def _research_links(naziv, kind, grad=None, sport: Optional[str] = None, row: Optional[dict] = None):
base_q = (naziv or '').strip() base_q = (naziv or '').strip()
q = (base_q + ' ' + grad) if grad else base_q q = (base_q + ' ' + grad) if grad else base_q
qenc = urllib.parse.quote(q) qenc = urllib.parse.quote(q)
out = [ out = []
# Prefer DIRECT profile/source link if entity already has one (e.g. HNS Semafor)
if row:
direct = row.get('profile_url') or row.get('source_url') or row.get('scrape_url') or row.get('web') or row.get('web_stranica')
if direct and isinstance(direct, str) and direct.startswith(('http://','https://')):
try:
host = urllib.parse.urlparse(direct).hostname or ''
except Exception:
host = ''
label = 'Vanjski profil'
icon = '🔗'
if 'hns' in host: label, icon = 'HNS profil', ''
elif 'transfermarkt' in host: label, icon = 'Transfermarkt', ''
elif 'wikipedia' in host: label, icon = 'Wikipedia', '📚'
elif host.endswith('.hr') or host.endswith('.com'): label, icon = 'Službena stranica', '🌐'
out.append({'label': label, 'icon': icon, 'url': direct, 'is_direct': True})
out += [
{'label': 'Google', 'icon': '🔍', 'url': 'https://www.google.com/search?q=' + qenc}, {'label': 'Google', 'icon': '🔍', 'url': 'https://www.google.com/search?q=' + qenc},
{'label': 'Wikipedia HR', 'icon': '📚', 'url': 'https://hr.wikipedia.org/w/index.php?search=' + qenc}, {'label': 'Wikipedia HR', 'icon': '📚', 'url': 'https://hr.wikipedia.org/w/index.php?search=' + qenc},
{'label': 'sport-pgz.hr', 'icon': '🏅', 'url': 'https://sport-pgz.hr/?s=' + qenc}, {'label': 'sport-pgz.hr', 'icon': '🏅', 'url': 'https://sport-pgz.hr/?s=' + qenc},
@@ -445,11 +461,128 @@ def _is_relevant(source: dict, tokens: list[str]) -> bool:
return any(t in blob for t in tokens) return any(t in blob for t in tokens)
# ─── Klub domain guesser (HR slug → candidate URLs → HEAD probe) ────────
import re as _re_klg
def _slugify_klub(naziv: str) -> str:
if not naziv: return ""
s = naziv.lower()
repl = (("č","c"),("ć","c"),("ž","z"),("š","s"),("đ","d"),
('"',''),("'",""),("(",""),(")",""),(",",""),(".",""),
("/",""),("\\",""))
for a,b in repl: s = s.replace(a,b)
s = _re_klg.sub(r"[^a-z0-9]+", "-", s).strip("-")
return s
def _klub_domain_candidates(naziv: str) -> list[str]:
"""Generate ranked candidate URLs from club name."""
if not naziv: return []
s = _slugify_klub(naziv)
# Strip common prefixes for cleaner domains
base = s
for pref in ("hnk-","nk-","rk-","kk-","ok-","bk-","gk-","tk-","ak-","hbk-"):
if base.startswith(pref):
base = base[len(pref):]; break
# also try short prefix-ed variants
short = base.split("-")[0] if base else ""
candidates = []
sports_prefixes = ["nk-","hnk-","rk-","kk-","bk-","ok-","ak-","tk-"]
# full slug with original prefix
for tld in (".hr",".com",".eu",".info"):
candidates.append(f"https://{s}{tld}")
candidates.append(f"https://www.{s}{tld}")
# base-only
for tld in (".hr",".com"):
candidates.append(f"https://{base}{tld}")
candidates.append(f"https://www.{base}{tld}")
# try sport prefixes if name doesn't already have one
if not any(s.startswith(p) for p in sports_prefixes):
for sp in sports_prefixes[:5]:
for tld in (".hr",".com"):
candidates.append(f"https://{sp}{base}{tld}")
# dedup, preserve order
seen, out = set(), []
for c in candidates:
if c not in seen:
seen.add(c); out.append(c)
return out[:20]
def _probe_klub_url(url: str, naziv_tokens: list, timeout: int = 5) -> Optional[dict]:
"""HEAD/GET probe; return doc with raw_text if URL is alive AND mentions club tokens."""
try:
import requests
r = requests.get(url, timeout=timeout, allow_redirects=True,
headers={"User-Agent":"Mozilla/5.0 RinetEnrichBot/1.0"})
if r.status_code != 200: return None
if len(r.text) < 200: return None
text = r.text.lower()
# Must mention at least one distinctive token from name
toks = [t.lower() for t in (naziv_tokens or []) if len(t) > 2]
if toks and not any(t in text for t in toks):
return None
return {"source": "domain_probe", "url": r.url, "raw_text": r.text[:50000]}
except Exception:
return None
def _guess_klub_domains(naziv: str, tokens: list) -> Optional[dict]:
"""Parallel probe candidates (5 workers, 4s timeout each); first hit wins."""
from concurrent.futures import ThreadPoolExecutor, as_completed
candidates = _klub_domain_candidates(naziv)
if not candidates: return None
with ThreadPoolExecutor(max_workers=8) as ex:
futs = {ex.submit(_probe_klub_url, url, tokens, 4): url for url in candidates[:16]}
for fut in as_completed(futs, timeout=10):
try:
doc = fut.result()
if doc:
# Cancel remaining (best effort)
for f in futs:
if not f.done(): f.cancel()
return doc
except Exception:
continue
return None
def _scrape_klub_subpages(base_url: str, tokens: list) -> str:
"""Fetch /kontakt /uprava /o-nama /o-klubu and concat texts."""
if not base_url: return ""
import requests
base = base_url.rstrip("/")
paths = ["/kontakt","/uprava","/o-nama","/o-klubu","/predsjednik","/klub","/contact","/about"]
accum = []
for path in paths:
try:
r = requests.get(base + path, timeout=4, allow_redirects=True,
headers={"User-Agent":"Mozilla/5.0 RinetEnrichBot/1.0"})
if r.status_code == 200 and len(r.text) > 200:
accum.append(r.text[:30000])
except Exception:
pass
return "\n\n".join(accum)
def _propose_for_klub(row: dict) -> dict: def _propose_for_klub(row: dict) -> dict:
naziv = row.get('naziv') or '' naziv = row.get('naziv') or ''
primary = row.get('web') or row.get('web_stranica') or row.get('source_url') or row.get('scrape_url') # Only consider HTTP(S) URLs as valid primary sources — skip placeholder strings like 'godisnjak_2025'
raw_primary = row.get('web') or row.get('web_stranica') or row.get('source_url') or row.get('scrape_url')
primary = raw_primary if (raw_primary and isinstance(raw_primary, str) and raw_primary.startswith(('http://','https://'))) else None
sources, evidence = [], [] sources, evidence = [], []
tokens_pre = _name_tokens(naziv)
pdoc = _fetch_primary_site(primary) if primary else None pdoc = _fetch_primary_site(primary) if primary else None
if not pdoc:
# No valid web in DB — try to guess domain from club name
pdoc = _guess_klub_domains(naziv, tokens_pre)
if pdoc:
# Also fetch subpages for richer evidence
sub = _scrape_klub_subpages(pdoc.get('url',''), tokens_pre)
if sub:
pdoc['raw_text'] = (pdoc.get('raw_text','') + '\n\n' + sub)[:120000]
elif pdoc:
# Have primary site — also fetch its subpages
sub = _scrape_klub_subpages(pdoc.get('url') or primary, tokens_pre)
if sub:
pdoc['raw_text'] = (pdoc.get('raw_text','') + '\n\n' + sub)[:120000]
if pdoc: sources.append(pdoc); evidence.append(pdoc.get('raw_text') or pdoc.get('extract') or '') if pdoc: sources.append(pdoc); evidence.append(pdoc.get('raw_text') or pdoc.get('extract') or '')
wiki = _wiki_summary(naziv) wiki = _wiki_summary(naziv)
if wiki: sources.append(wiki); evidence.append(wiki.get('extract') or '') if wiki: sources.append(wiki); evidence.append(wiki.get('extract') or '')
@@ -1121,7 +1254,7 @@ def enrich_preview(kind: str = _FPath(..., regex='^(klub|savez|sportas)$'), eid:
'coverage': coverage, 'filled_fields': filled, 'total_fields': len(keys), 'coverage': coverage, 'filled_fields': filled, 'total_fields': len(keys),
'missing_fields': missing, 'missing_fields': missing,
'live_snippet': _fetch_title(primary) if primary else None, 'live_snippet': _fetch_title(primary) if primary else None,
'research_links': _research_links(naziv, kind, grad, sport=row.get('sport')), 'research_links': _research_links(naziv, kind, grad, sport=row.get('sport'), row=row),
'sport': row.get('sport'), 'sport': row.get('sport'),
'sport_federation': (lambda f: { 'sport_federation': (lambda f: {
'national': (f.get('national') or {}).get('name') if f else None, 'national': (f.get('national') or {}).get('name') if f else None,
+13 -3
View File
@@ -265,7 +265,7 @@ table tbody tr:hover{background:var(--bg3)}
<div class="app"> <div class="app">
<aside class="sb" id="sb"> <aside class="sb" id="sb">
<div class="sb-h"> <div class="sb-h">
<div class="logo">PGŽ <span class="g">SPORT</span></div> <a href="/" class="logo" style="text-decoration:none;color:inherit;cursor:pointer" title="Početna"><span style="font-weight:800;letter-spacing:.5px">PGŽ</span> <span class="g">SPORT</span></a>
<div class="sub" id="role-sub">Operativna aplikacija</div> <div class="sub" id="role-sub">Operativna aplikacija</div>
<div class="sb-toggle" id="sb-toggle" onclick="toggleSidebar()" title="Skupi/raširi sidebar"></div> <div class="sb-toggle" id="sb-toggle" onclick="toggleSidebar()" title="Skupi/raširi sidebar"></div>
</div> </div>
@@ -449,7 +449,7 @@ function applyMeToHeader(){
$('#user-role-label')?.replaceChildren(document.createTextNode(roleLabel)); $('#user-role-label')?.replaceChildren(document.createTextNode(roleLabel));
// Avatar topbar // Avatar topbar
if(me.avatar_url){ if(me.avatar_url){
$('#user-av').innerHTML = `<img src="${esc(me.avatar_url)}" alt="">`; $('#user-av').innerHTML = `<img src="${esc(me.avatar_url)}${me.avatar_url.includes('?')?'&':'?'}t=${Date.now()}" alt="">`;
} else if(me.google_picture){ } else if(me.google_picture){
$('#user-av').innerHTML = `<img src="${esc(me.google_picture)}" alt="">`; $('#user-av').innerHTML = `<img src="${esc(me.google_picture)}" alt="">`;
} else { } else {
@@ -459,7 +459,7 @@ function applyMeToHeader(){
if($('#sf-name')) $('#sf-name').textContent = name; if($('#sf-name')) $('#sf-name').textContent = name;
if($('#sf-role')) $('#sf-role').textContent = roleLabel; if($('#sf-role')) $('#sf-role').textContent = roleLabel;
if($('#sf-av')){ if($('#sf-av')){
if(me.avatar_url) $('#sf-av').innerHTML = `<img src="${esc(me.avatar_url)}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`; if(me.avatar_url) $('#sf-av').innerHTML = `<img src="${esc(me.avatar_url)}${me.avatar_url.includes('?')?'&':'?'}t=${Date.now()}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`;
else if(me.google_picture) $('#sf-av').innerHTML = `<img src="${esc(me.google_picture)}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`; else if(me.google_picture) $('#sf-av').innerHTML = `<img src="${esc(me.google_picture)}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`;
else $('#sf-av').textContent = initials(name); else $('#sf-av').textContent = initials(name);
} }
@@ -856,6 +856,16 @@ async function onAvatarPick(input){
input.value = ''; input.value = '';
if(r && r.avatar_url){ if(r && r.avatar_url){
if(_state.me) _state.me.avatar_url = r.avatar_url; if(_state.me) _state.me.avatar_url = r.avatar_url;
// Update localStorage so other pages (sport2.html footer, sidebar) see new avatar
try{
const stored = localStorage.getItem('pgz_user') || sessionStorage.getItem('pgz_user');
if(stored){
const u = JSON.parse(stored);
u.avatar_url = r.avatar_url;
if(localStorage.getItem('pgz_user')) localStorage.setItem('pgz_user', JSON.stringify(u));
else sessionStorage.setItem('pgz_user', JSON.stringify(u));
}
}catch(e){console.warn('avatar storage update failed', e);}
applyMeToHeader(); applyMeToHeader();
loadSection(); // re-render profile loadSection(); // re-render profile
} else { } else {
+7 -1
View File
@@ -232,7 +232,7 @@ a.tag:hover,.tag[onclick]:hover{transform:translateY(-1px);filter:brightness(1.1
<div class="app"> <div class="app">
<aside class="sb" id="sb"> <aside class="sb" id="sb">
<div class="sb-h"> <div class="sb-h">
<div class="logo">PGŽ <span class="g">SPORT</span></div> <a href="/" class="logo" style="text-decoration:none;color:inherit;cursor:pointer" title="Početna"><span style="font-weight:800;letter-spacing:.5px">PGŽ</span> <span class="g">SPORT</span></a>
<div class="sub">Primorsko-goranska županija</div> <div class="sub">Primorsko-goranska županija</div>
<div class="sb-toggle" id="sb-toggle" onclick="toggleSidebar()" title="Skupi/raširi sidebar"></div> <div class="sb-toggle" id="sb-toggle" onclick="toggleSidebar()" title="Skupi/raširi sidebar"></div>
</div> </div>
@@ -1349,6 +1349,12 @@ async function openKlub(id){
<div class="k">Osnovan</div><div class="v">${txt(k.godina_osnutka)}</div> <div class="k">Osnovan</div><div class="v">${txt(k.godina_osnutka)}</div>
<div class="k">Nositelj kvalitete</div><div class="v">${k.nositelj_kvalitete?'<span class="tag gd">DA</span>':'<span class="tag">NE</span>'}</div> <div class="k">Nositelj kvalitete</div><div class="v">${k.nositelj_kvalitete?'<span class="tag gd">DA</span>':'<span class="tag">NE</span>'}</div>
</div> </div>
<div style="margin-top:14px;display:flex;gap:8px;flex-wrap:wrap">
<a class="btn primary" onclick="switchKlubTab(document.querySelector('.tab[onclick*=k-clan]'),'k-clan')" style="cursor:pointer;display:inline-flex;align-items:center;gap:6px">
👥 Vidi sportaše ovog kluba (${clanovi.length})
</a>
${(k.web||k.web_stranica) ? '<a class="btn" href="'+esc(k.web||k.web_stranica)+'" target="_blank" style="display:inline-flex;align-items:center;gap:6px">🌐 Službena stranica</a>' : ''}
</div>
${k.napomena ? '<div class="card" style="margin-top:14px"><div class="card-t" style="margin-bottom:6px">Napomena</div><div style="font-size:12px;color:var(--t1);line-height:1.5">'+esc(k.napomena)+'</div></div>' : ''} ${k.napomena ? '<div class="card" style="margin-top:14px"><div class="card-t" style="margin-bottom:6px">Napomena</div><div style="font-size:12px;color:var(--t1);line-height:1.5">'+esc(k.napomena)+'</div></div>' : ''}
</div> </div>