R7+: 5x P0 demo fixes — HNS direct link, avatar cache, logo home, klub→sportaši, smarter enrichment
1) HNS direct link u research_links: za sportaš s profile_url/source_url (npr. https://semafor.hns.family/igraci/X/...) generira [⭐DIRECT] link na vrhu liste, umjesto generic Google search. _research_links sada prima row dict. 2) Avatar cache buster: applyMeToHeader dodaje ?t=Date.now() na sve avatar img tagove. Avatar upload handler dodatno persistira novi avatar_url u localStorage.pgz_user tako da preživi page refresh + cross-page navigacije. 3) Logo home link: <div class='logo'> → <a href='/' class='logo'> u app.html i sport2.html. Klik na PGŽ SPORT logo vodi na public portal. 4) Klub → Sportaši drill-down: u klub Info tabu dodan button '👥 Vidi sportaše ovog kluba (N)' koji prebacuje na k-clan tab. Plus '🌐 Službena stranica' link kad klub ima web. 5) Smarter klub enrichment: - URL validacija (skip placeholder strings poput 'godisnjak_zspgz_2025') - Domain candidate guesser (slug → 16 candidate URLs s common HR TLD-ovima i sport prefix-ima) - Parallel HEAD probe (8 threads, 10s budget) — first 200 + name token match wins - Subpage scrape (/kontakt, /uprava, /o-nama, /o-klubu, /predsjednik) za richer evidence - HNK Orijent (id 3766) test: pogađa https://www.orijent.hr/, predlaže web+email+telefon+opis E2E verified: - 9/9 sidebar URL-ova → 200 - /users/me/gdpr-export → 200 (28KB JSON) - /users/me/request-deletion → 200 (DB row pgz_sport.gdpr_erasure_requests) - /enrich/klub/3766 → 4 proposed fields (web, email, telefon, opis) - HNS sportaš research_links: ⭐ HNS profil DIRECT link na vrhu Backend: routers/enrich_router.py Frontend: static/app.html, static/sport2.html Backups: _backups/sprint_1777940670/ Tag: R7-demo-ready
This commit is contained in:
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+137
-4
@@ -381,11 +381,27 @@ def _sport_fed(sport: Optional[str]) -> Optional[dict]:
|
|||||||
return feds.get(norm)
|
return feds.get(norm)
|
||||||
|
|
||||||
|
|
||||||
def _research_links(naziv, kind, grad=None, sport: Optional[str] = None):
|
def _research_links(naziv, kind, grad=None, sport: Optional[str] = None, row: Optional[dict] = None):
|
||||||
base_q = (naziv or '').strip()
|
base_q = (naziv or '').strip()
|
||||||
q = (base_q + ' ' + grad) if grad else base_q
|
q = (base_q + ' ' + grad) if grad else base_q
|
||||||
qenc = urllib.parse.quote(q)
|
qenc = urllib.parse.quote(q)
|
||||||
out = [
|
out = []
|
||||||
|
# Prefer DIRECT profile/source link if entity already has one (e.g. HNS Semafor)
|
||||||
|
if row:
|
||||||
|
direct = row.get('profile_url') or row.get('source_url') or row.get('scrape_url') or row.get('web') or row.get('web_stranica')
|
||||||
|
if direct and isinstance(direct, str) and direct.startswith(('http://','https://')):
|
||||||
|
try:
|
||||||
|
host = urllib.parse.urlparse(direct).hostname or ''
|
||||||
|
except Exception:
|
||||||
|
host = ''
|
||||||
|
label = 'Vanjski profil'
|
||||||
|
icon = '🔗'
|
||||||
|
if 'hns' in host: label, icon = 'HNS profil', '⚽'
|
||||||
|
elif 'transfermarkt' in host: label, icon = 'Transfermarkt', '⚽'
|
||||||
|
elif 'wikipedia' in host: label, icon = 'Wikipedia', '📚'
|
||||||
|
elif host.endswith('.hr') or host.endswith('.com'): label, icon = 'Službena stranica', '🌐'
|
||||||
|
out.append({'label': label, 'icon': icon, 'url': direct, 'is_direct': True})
|
||||||
|
out += [
|
||||||
{'label': 'Google', 'icon': '🔍', 'url': 'https://www.google.com/search?q=' + qenc},
|
{'label': 'Google', 'icon': '🔍', 'url': 'https://www.google.com/search?q=' + qenc},
|
||||||
{'label': 'Wikipedia HR', 'icon': '📚', 'url': 'https://hr.wikipedia.org/w/index.php?search=' + qenc},
|
{'label': 'Wikipedia HR', 'icon': '📚', 'url': 'https://hr.wikipedia.org/w/index.php?search=' + qenc},
|
||||||
{'label': 'sport-pgz.hr', 'icon': '🏅', 'url': 'https://sport-pgz.hr/?s=' + qenc},
|
{'label': 'sport-pgz.hr', 'icon': '🏅', 'url': 'https://sport-pgz.hr/?s=' + qenc},
|
||||||
@@ -445,11 +461,128 @@ def _is_relevant(source: dict, tokens: list[str]) -> bool:
|
|||||||
return any(t in blob for t in tokens)
|
return any(t in blob for t in tokens)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Klub domain guesser (HR slug → candidate URLs → HEAD probe) ────────
|
||||||
|
import re as _re_klg
|
||||||
|
|
||||||
|
def _slugify_klub(naziv: str) -> str:
|
||||||
|
if not naziv: return ""
|
||||||
|
s = naziv.lower()
|
||||||
|
repl = (("č","c"),("ć","c"),("ž","z"),("š","s"),("đ","d"),
|
||||||
|
('"',''),("'",""),("(",""),(")",""),(",",""),(".",""),
|
||||||
|
("/",""),("\\",""))
|
||||||
|
for a,b in repl: s = s.replace(a,b)
|
||||||
|
s = _re_klg.sub(r"[^a-z0-9]+", "-", s).strip("-")
|
||||||
|
return s
|
||||||
|
|
||||||
|
def _klub_domain_candidates(naziv: str) -> list[str]:
|
||||||
|
"""Generate ranked candidate URLs from club name."""
|
||||||
|
if not naziv: return []
|
||||||
|
s = _slugify_klub(naziv)
|
||||||
|
# Strip common prefixes for cleaner domains
|
||||||
|
base = s
|
||||||
|
for pref in ("hnk-","nk-","rk-","kk-","ok-","bk-","gk-","tk-","ak-","hbk-"):
|
||||||
|
if base.startswith(pref):
|
||||||
|
base = base[len(pref):]; break
|
||||||
|
# also try short prefix-ed variants
|
||||||
|
short = base.split("-")[0] if base else ""
|
||||||
|
candidates = []
|
||||||
|
sports_prefixes = ["nk-","hnk-","rk-","kk-","bk-","ok-","ak-","tk-"]
|
||||||
|
# full slug with original prefix
|
||||||
|
for tld in (".hr",".com",".eu",".info"):
|
||||||
|
candidates.append(f"https://{s}{tld}")
|
||||||
|
candidates.append(f"https://www.{s}{tld}")
|
||||||
|
# base-only
|
||||||
|
for tld in (".hr",".com"):
|
||||||
|
candidates.append(f"https://{base}{tld}")
|
||||||
|
candidates.append(f"https://www.{base}{tld}")
|
||||||
|
# try sport prefixes if name doesn't already have one
|
||||||
|
if not any(s.startswith(p) for p in sports_prefixes):
|
||||||
|
for sp in sports_prefixes[:5]:
|
||||||
|
for tld in (".hr",".com"):
|
||||||
|
candidates.append(f"https://{sp}{base}{tld}")
|
||||||
|
# dedup, preserve order
|
||||||
|
seen, out = set(), []
|
||||||
|
for c in candidates:
|
||||||
|
if c not in seen:
|
||||||
|
seen.add(c); out.append(c)
|
||||||
|
return out[:20]
|
||||||
|
|
||||||
|
def _probe_klub_url(url: str, naziv_tokens: list, timeout: int = 5) -> Optional[dict]:
|
||||||
|
"""HEAD/GET probe; return doc with raw_text if URL is alive AND mentions club tokens."""
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
r = requests.get(url, timeout=timeout, allow_redirects=True,
|
||||||
|
headers={"User-Agent":"Mozilla/5.0 RinetEnrichBot/1.0"})
|
||||||
|
if r.status_code != 200: return None
|
||||||
|
if len(r.text) < 200: return None
|
||||||
|
text = r.text.lower()
|
||||||
|
# Must mention at least one distinctive token from name
|
||||||
|
toks = [t.lower() for t in (naziv_tokens or []) if len(t) > 2]
|
||||||
|
if toks and not any(t in text for t in toks):
|
||||||
|
return None
|
||||||
|
return {"source": "domain_probe", "url": r.url, "raw_text": r.text[:50000]}
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _guess_klub_domains(naziv: str, tokens: list) -> Optional[dict]:
|
||||||
|
"""Parallel probe candidates (5 workers, 4s timeout each); first hit wins."""
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
candidates = _klub_domain_candidates(naziv)
|
||||||
|
if not candidates: return None
|
||||||
|
with ThreadPoolExecutor(max_workers=8) as ex:
|
||||||
|
futs = {ex.submit(_probe_klub_url, url, tokens, 4): url for url in candidates[:16]}
|
||||||
|
for fut in as_completed(futs, timeout=10):
|
||||||
|
try:
|
||||||
|
doc = fut.result()
|
||||||
|
if doc:
|
||||||
|
# Cancel remaining (best effort)
|
||||||
|
for f in futs:
|
||||||
|
if not f.done(): f.cancel()
|
||||||
|
return doc
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _scrape_klub_subpages(base_url: str, tokens: list) -> str:
|
||||||
|
"""Fetch /kontakt /uprava /o-nama /o-klubu and concat texts."""
|
||||||
|
if not base_url: return ""
|
||||||
|
import requests
|
||||||
|
base = base_url.rstrip("/")
|
||||||
|
paths = ["/kontakt","/uprava","/o-nama","/o-klubu","/predsjednik","/klub","/contact","/about"]
|
||||||
|
accum = []
|
||||||
|
for path in paths:
|
||||||
|
try:
|
||||||
|
r = requests.get(base + path, timeout=4, allow_redirects=True,
|
||||||
|
headers={"User-Agent":"Mozilla/5.0 RinetEnrichBot/1.0"})
|
||||||
|
if r.status_code == 200 and len(r.text) > 200:
|
||||||
|
accum.append(r.text[:30000])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return "\n\n".join(accum)
|
||||||
|
|
||||||
|
|
||||||
def _propose_for_klub(row: dict) -> dict:
|
def _propose_for_klub(row: dict) -> dict:
|
||||||
naziv = row.get('naziv') or ''
|
naziv = row.get('naziv') or ''
|
||||||
primary = row.get('web') or row.get('web_stranica') or row.get('source_url') or row.get('scrape_url')
|
# Only consider HTTP(S) URLs as valid primary sources — skip placeholder strings like 'godisnjak_2025'
|
||||||
|
raw_primary = row.get('web') or row.get('web_stranica') or row.get('source_url') or row.get('scrape_url')
|
||||||
|
primary = raw_primary if (raw_primary and isinstance(raw_primary, str) and raw_primary.startswith(('http://','https://'))) else None
|
||||||
sources, evidence = [], []
|
sources, evidence = [], []
|
||||||
|
tokens_pre = _name_tokens(naziv)
|
||||||
pdoc = _fetch_primary_site(primary) if primary else None
|
pdoc = _fetch_primary_site(primary) if primary else None
|
||||||
|
if not pdoc:
|
||||||
|
# No valid web in DB — try to guess domain from club name
|
||||||
|
pdoc = _guess_klub_domains(naziv, tokens_pre)
|
||||||
|
if pdoc:
|
||||||
|
# Also fetch subpages for richer evidence
|
||||||
|
sub = _scrape_klub_subpages(pdoc.get('url',''), tokens_pre)
|
||||||
|
if sub:
|
||||||
|
pdoc['raw_text'] = (pdoc.get('raw_text','') + '\n\n' + sub)[:120000]
|
||||||
|
elif pdoc:
|
||||||
|
# Have primary site — also fetch its subpages
|
||||||
|
sub = _scrape_klub_subpages(pdoc.get('url') or primary, tokens_pre)
|
||||||
|
if sub:
|
||||||
|
pdoc['raw_text'] = (pdoc.get('raw_text','') + '\n\n' + sub)[:120000]
|
||||||
if pdoc: sources.append(pdoc); evidence.append(pdoc.get('raw_text') or pdoc.get('extract') or '')
|
if pdoc: sources.append(pdoc); evidence.append(pdoc.get('raw_text') or pdoc.get('extract') or '')
|
||||||
wiki = _wiki_summary(naziv)
|
wiki = _wiki_summary(naziv)
|
||||||
if wiki: sources.append(wiki); evidence.append(wiki.get('extract') or '')
|
if wiki: sources.append(wiki); evidence.append(wiki.get('extract') or '')
|
||||||
@@ -1121,7 +1254,7 @@ def enrich_preview(kind: str = _FPath(..., regex='^(klub|savez|sportas)$'), eid:
|
|||||||
'coverage': coverage, 'filled_fields': filled, 'total_fields': len(keys),
|
'coverage': coverage, 'filled_fields': filled, 'total_fields': len(keys),
|
||||||
'missing_fields': missing,
|
'missing_fields': missing,
|
||||||
'live_snippet': _fetch_title(primary) if primary else None,
|
'live_snippet': _fetch_title(primary) if primary else None,
|
||||||
'research_links': _research_links(naziv, kind, grad, sport=row.get('sport')),
|
'research_links': _research_links(naziv, kind, grad, sport=row.get('sport'), row=row),
|
||||||
'sport': row.get('sport'),
|
'sport': row.get('sport'),
|
||||||
'sport_federation': (lambda f: {
|
'sport_federation': (lambda f: {
|
||||||
'national': (f.get('national') or {}).get('name') if f else None,
|
'national': (f.get('national') or {}).get('name') if f else None,
|
||||||
|
|||||||
+13
-3
@@ -265,7 +265,7 @@ table tbody tr:hover{background:var(--bg3)}
|
|||||||
<div class="app">
|
<div class="app">
|
||||||
<aside class="sb" id="sb">
|
<aside class="sb" id="sb">
|
||||||
<div class="sb-h">
|
<div class="sb-h">
|
||||||
<div class="logo">PGŽ <span class="g">SPORT</span></div>
|
<a href="/" class="logo" style="text-decoration:none;color:inherit;cursor:pointer" title="Početna"><span style="font-weight:800;letter-spacing:.5px">PGŽ</span> <span class="g">SPORT</span></a>
|
||||||
<div class="sub" id="role-sub">Operativna aplikacija</div>
|
<div class="sub" id="role-sub">Operativna aplikacija</div>
|
||||||
<div class="sb-toggle" id="sb-toggle" onclick="toggleSidebar()" title="Skupi/raširi sidebar">≡</div>
|
<div class="sb-toggle" id="sb-toggle" onclick="toggleSidebar()" title="Skupi/raširi sidebar">≡</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -449,7 +449,7 @@ function applyMeToHeader(){
|
|||||||
$('#user-role-label')?.replaceChildren(document.createTextNode(roleLabel));
|
$('#user-role-label')?.replaceChildren(document.createTextNode(roleLabel));
|
||||||
// Avatar topbar
|
// Avatar topbar
|
||||||
if(me.avatar_url){
|
if(me.avatar_url){
|
||||||
$('#user-av').innerHTML = `<img src="${esc(me.avatar_url)}" alt="">`;
|
$('#user-av').innerHTML = `<img src="${esc(me.avatar_url)}${me.avatar_url.includes('?')?'&':'?'}t=${Date.now()}" alt="">`;
|
||||||
} else if(me.google_picture){
|
} else if(me.google_picture){
|
||||||
$('#user-av').innerHTML = `<img src="${esc(me.google_picture)}" alt="">`;
|
$('#user-av').innerHTML = `<img src="${esc(me.google_picture)}" alt="">`;
|
||||||
} else {
|
} else {
|
||||||
@@ -459,7 +459,7 @@ function applyMeToHeader(){
|
|||||||
if($('#sf-name')) $('#sf-name').textContent = name;
|
if($('#sf-name')) $('#sf-name').textContent = name;
|
||||||
if($('#sf-role')) $('#sf-role').textContent = roleLabel;
|
if($('#sf-role')) $('#sf-role').textContent = roleLabel;
|
||||||
if($('#sf-av')){
|
if($('#sf-av')){
|
||||||
if(me.avatar_url) $('#sf-av').innerHTML = `<img src="${esc(me.avatar_url)}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`;
|
if(me.avatar_url) $('#sf-av').innerHTML = `<img src="${esc(me.avatar_url)}${me.avatar_url.includes('?')?'&':'?'}t=${Date.now()}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`;
|
||||||
else if(me.google_picture) $('#sf-av').innerHTML = `<img src="${esc(me.google_picture)}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`;
|
else if(me.google_picture) $('#sf-av').innerHTML = `<img src="${esc(me.google_picture)}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`;
|
||||||
else $('#sf-av').textContent = initials(name);
|
else $('#sf-av').textContent = initials(name);
|
||||||
}
|
}
|
||||||
@@ -856,6 +856,16 @@ async function onAvatarPick(input){
|
|||||||
input.value = '';
|
input.value = '';
|
||||||
if(r && r.avatar_url){
|
if(r && r.avatar_url){
|
||||||
if(_state.me) _state.me.avatar_url = r.avatar_url;
|
if(_state.me) _state.me.avatar_url = r.avatar_url;
|
||||||
|
// Update localStorage so other pages (sport2.html footer, sidebar) see new avatar
|
||||||
|
try{
|
||||||
|
const stored = localStorage.getItem('pgz_user') || sessionStorage.getItem('pgz_user');
|
||||||
|
if(stored){
|
||||||
|
const u = JSON.parse(stored);
|
||||||
|
u.avatar_url = r.avatar_url;
|
||||||
|
if(localStorage.getItem('pgz_user')) localStorage.setItem('pgz_user', JSON.stringify(u));
|
||||||
|
else sessionStorage.setItem('pgz_user', JSON.stringify(u));
|
||||||
|
}
|
||||||
|
}catch(e){console.warn('avatar storage update failed', e);}
|
||||||
applyMeToHeader();
|
applyMeToHeader();
|
||||||
loadSection(); // re-render profile
|
loadSection(); // re-render profile
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
+7
-1
@@ -232,7 +232,7 @@ a.tag:hover,.tag[onclick]:hover{transform:translateY(-1px);filter:brightness(1.1
|
|||||||
<div class="app">
|
<div class="app">
|
||||||
<aside class="sb" id="sb">
|
<aside class="sb" id="sb">
|
||||||
<div class="sb-h">
|
<div class="sb-h">
|
||||||
<div class="logo">PGŽ <span class="g">SPORT</span></div>
|
<a href="/" class="logo" style="text-decoration:none;color:inherit;cursor:pointer" title="Početna"><span style="font-weight:800;letter-spacing:.5px">PGŽ</span> <span class="g">SPORT</span></a>
|
||||||
<div class="sub">Primorsko-goranska županija</div>
|
<div class="sub">Primorsko-goranska županija</div>
|
||||||
<div class="sb-toggle" id="sb-toggle" onclick="toggleSidebar()" title="Skupi/raširi sidebar">≡</div>
|
<div class="sb-toggle" id="sb-toggle" onclick="toggleSidebar()" title="Skupi/raširi sidebar">≡</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -1349,6 +1349,12 @@ async function openKlub(id){
|
|||||||
<div class="k">Osnovan</div><div class="v">${txt(k.godina_osnutka)}</div>
|
<div class="k">Osnovan</div><div class="v">${txt(k.godina_osnutka)}</div>
|
||||||
<div class="k">Nositelj kvalitete</div><div class="v">${k.nositelj_kvalitete?'<span class="tag gd">DA</span>':'<span class="tag">NE</span>'}</div>
|
<div class="k">Nositelj kvalitete</div><div class="v">${k.nositelj_kvalitete?'<span class="tag gd">DA</span>':'<span class="tag">NE</span>'}</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div style="margin-top:14px;display:flex;gap:8px;flex-wrap:wrap">
|
||||||
|
<a class="btn primary" onclick="switchKlubTab(document.querySelector('.tab[onclick*=k-clan]'),'k-clan')" style="cursor:pointer;display:inline-flex;align-items:center;gap:6px">
|
||||||
|
👥 Vidi sportaše ovog kluba (${clanovi.length})
|
||||||
|
</a>
|
||||||
|
${(k.web||k.web_stranica) ? '<a class="btn" href="'+esc(k.web||k.web_stranica)+'" target="_blank" style="display:inline-flex;align-items:center;gap:6px">🌐 Službena stranica</a>' : ''}
|
||||||
|
</div>
|
||||||
${k.napomena ? '<div class="card" style="margin-top:14px"><div class="card-t" style="margin-bottom:6px">Napomena</div><div style="font-size:12px;color:var(--t1);line-height:1.5">'+esc(k.napomena)+'</div></div>' : ''}
|
${k.napomena ? '<div class="card" style="margin-top:14px"><div class="card-t" style="margin-bottom:6px">Napomena</div><div style="font-size:12px;color:var(--t1);line-height:1.5">'+esc(k.napomena)+'</div></div>' : ''}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user