R7+: 5x P0 demo fixes — HNS direct link, avatar cache, logo home, klub→sportaši, smarter enrichment
1) HNS direct link u research_links: za sportaš s profile_url/source_url (npr. https://semafor.hns.family/igraci/X/...) generira [⭐DIRECT] link na vrhu liste, umjesto generic Google search. _research_links sada prima row dict. 2) Avatar cache buster: applyMeToHeader dodaje ?t=Date.now() na sve avatar img tagove. Avatar upload handler dodatno persistira novi avatar_url u localStorage.pgz_user tako da preživi page refresh + cross-page navigacije. 3) Logo home link: <div class='logo'> → <a href='/' class='logo'> u app.html i sport2.html. Klik na PGŽ SPORT logo vodi na public portal. 4) Klub → Sportaši drill-down: u klub Info tabu dodan button '👥 Vidi sportaše ovog kluba (N)' koji prebacuje na k-clan tab. Plus '🌐 Službena stranica' link kad klub ima web. 5) Smarter klub enrichment: - URL validacija (skip placeholder strings poput 'godisnjak_zspgz_2025') - Domain candidate guesser (slug → 16 candidate URLs s common HR TLD-ovima i sport prefix-ima) - Parallel HEAD probe (8 threads, 10s budget) — first 200 + name token match wins - Subpage scrape (/kontakt, /uprava, /o-nama, /o-klubu, /predsjednik) za richer evidence - HNK Orijent (id 3766) test: pogađa https://www.orijent.hr/, predlaže web+email+telefon+opis E2E verified: - 9/9 sidebar URL-ova → 200 - /users/me/gdpr-export → 200 (28KB JSON) - /users/me/request-deletion → 200 (DB row pgz_sport.gdpr_erasure_requests) - /enrich/klub/3766 → 4 proposed fields (web, email, telefon, opis) - HNS sportaš research_links: ⭐ HNS profil DIRECT link na vrhu Backend: routers/enrich_router.py Frontend: static/app.html, static/sport2.html Backups: _backups/sprint_1777940670/ Tag: R7-demo-ready
This commit is contained in:
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+137
-4
@@ -381,11 +381,27 @@ def _sport_fed(sport: Optional[str]) -> Optional[dict]:
|
||||
return feds.get(norm)
|
||||
|
||||
|
||||
def _research_links(naziv, kind, grad=None, sport: Optional[str] = None):
|
||||
def _research_links(naziv, kind, grad=None, sport: Optional[str] = None, row: Optional[dict] = None):
|
||||
base_q = (naziv or '').strip()
|
||||
q = (base_q + ' ' + grad) if grad else base_q
|
||||
qenc = urllib.parse.quote(q)
|
||||
out = [
|
||||
out = []
|
||||
# Prefer DIRECT profile/source link if entity already has one (e.g. HNS Semafor)
|
||||
if row:
|
||||
direct = row.get('profile_url') or row.get('source_url') or row.get('scrape_url') or row.get('web') or row.get('web_stranica')
|
||||
if direct and isinstance(direct, str) and direct.startswith(('http://','https://')):
|
||||
try:
|
||||
host = urllib.parse.urlparse(direct).hostname or ''
|
||||
except Exception:
|
||||
host = ''
|
||||
label = 'Vanjski profil'
|
||||
icon = '🔗'
|
||||
if 'hns' in host: label, icon = 'HNS profil', '⚽'
|
||||
elif 'transfermarkt' in host: label, icon = 'Transfermarkt', '⚽'
|
||||
elif 'wikipedia' in host: label, icon = 'Wikipedia', '📚'
|
||||
elif host.endswith('.hr') or host.endswith('.com'): label, icon = 'Službena stranica', '🌐'
|
||||
out.append({'label': label, 'icon': icon, 'url': direct, 'is_direct': True})
|
||||
out += [
|
||||
{'label': 'Google', 'icon': '🔍', 'url': 'https://www.google.com/search?q=' + qenc},
|
||||
{'label': 'Wikipedia HR', 'icon': '📚', 'url': 'https://hr.wikipedia.org/w/index.php?search=' + qenc},
|
||||
{'label': 'sport-pgz.hr', 'icon': '🏅', 'url': 'https://sport-pgz.hr/?s=' + qenc},
|
||||
@@ -445,11 +461,128 @@ def _is_relevant(source: dict, tokens: list[str]) -> bool:
|
||||
return any(t in blob for t in tokens)
|
||||
|
||||
|
||||
|
||||
# ─── Klub domain guesser (HR slug → candidate URLs → HEAD probe) ────────
|
||||
import re as _re_klg
|
||||
|
||||
def _slugify_klub(naziv: str) -> str:
|
||||
if not naziv: return ""
|
||||
s = naziv.lower()
|
||||
repl = (("č","c"),("ć","c"),("ž","z"),("š","s"),("đ","d"),
|
||||
('"',''),("'",""),("(",""),(")",""),(",",""),(".",""),
|
||||
("/",""),("\\",""))
|
||||
for a,b in repl: s = s.replace(a,b)
|
||||
s = _re_klg.sub(r"[^a-z0-9]+", "-", s).strip("-")
|
||||
return s
|
||||
|
||||
def _klub_domain_candidates(naziv: str) -> list[str]:
|
||||
"""Generate ranked candidate URLs from club name."""
|
||||
if not naziv: return []
|
||||
s = _slugify_klub(naziv)
|
||||
# Strip common prefixes for cleaner domains
|
||||
base = s
|
||||
for pref in ("hnk-","nk-","rk-","kk-","ok-","bk-","gk-","tk-","ak-","hbk-"):
|
||||
if base.startswith(pref):
|
||||
base = base[len(pref):]; break
|
||||
# also try short prefix-ed variants
|
||||
short = base.split("-")[0] if base else ""
|
||||
candidates = []
|
||||
sports_prefixes = ["nk-","hnk-","rk-","kk-","bk-","ok-","ak-","tk-"]
|
||||
# full slug with original prefix
|
||||
for tld in (".hr",".com",".eu",".info"):
|
||||
candidates.append(f"https://{s}{tld}")
|
||||
candidates.append(f"https://www.{s}{tld}")
|
||||
# base-only
|
||||
for tld in (".hr",".com"):
|
||||
candidates.append(f"https://{base}{tld}")
|
||||
candidates.append(f"https://www.{base}{tld}")
|
||||
# try sport prefixes if name doesn't already have one
|
||||
if not any(s.startswith(p) for p in sports_prefixes):
|
||||
for sp in sports_prefixes[:5]:
|
||||
for tld in (".hr",".com"):
|
||||
candidates.append(f"https://{sp}{base}{tld}")
|
||||
# dedup, preserve order
|
||||
seen, out = set(), []
|
||||
for c in candidates:
|
||||
if c not in seen:
|
||||
seen.add(c); out.append(c)
|
||||
return out[:20]
|
||||
|
||||
def _probe_klub_url(url: str, naziv_tokens: list, timeout: int = 5) -> Optional[dict]:
|
||||
"""HEAD/GET probe; return doc with raw_text if URL is alive AND mentions club tokens."""
|
||||
try:
|
||||
import requests
|
||||
r = requests.get(url, timeout=timeout, allow_redirects=True,
|
||||
headers={"User-Agent":"Mozilla/5.0 RinetEnrichBot/1.0"})
|
||||
if r.status_code != 200: return None
|
||||
if len(r.text) < 200: return None
|
||||
text = r.text.lower()
|
||||
# Must mention at least one distinctive token from name
|
||||
toks = [t.lower() for t in (naziv_tokens or []) if len(t) > 2]
|
||||
if toks and not any(t in text for t in toks):
|
||||
return None
|
||||
return {"source": "domain_probe", "url": r.url, "raw_text": r.text[:50000]}
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _guess_klub_domains(naziv: str, tokens: list) -> Optional[dict]:
|
||||
"""Parallel probe candidates (5 workers, 4s timeout each); first hit wins."""
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
candidates = _klub_domain_candidates(naziv)
|
||||
if not candidates: return None
|
||||
with ThreadPoolExecutor(max_workers=8) as ex:
|
||||
futs = {ex.submit(_probe_klub_url, url, tokens, 4): url for url in candidates[:16]}
|
||||
for fut in as_completed(futs, timeout=10):
|
||||
try:
|
||||
doc = fut.result()
|
||||
if doc:
|
||||
# Cancel remaining (best effort)
|
||||
for f in futs:
|
||||
if not f.done(): f.cancel()
|
||||
return doc
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
def _scrape_klub_subpages(base_url: str, tokens: list) -> str:
|
||||
"""Fetch /kontakt /uprava /o-nama /o-klubu and concat texts."""
|
||||
if not base_url: return ""
|
||||
import requests
|
||||
base = base_url.rstrip("/")
|
||||
paths = ["/kontakt","/uprava","/o-nama","/o-klubu","/predsjednik","/klub","/contact","/about"]
|
||||
accum = []
|
||||
for path in paths:
|
||||
try:
|
||||
r = requests.get(base + path, timeout=4, allow_redirects=True,
|
||||
headers={"User-Agent":"Mozilla/5.0 RinetEnrichBot/1.0"})
|
||||
if r.status_code == 200 and len(r.text) > 200:
|
||||
accum.append(r.text[:30000])
|
||||
except Exception:
|
||||
pass
|
||||
return "\n\n".join(accum)
|
||||
|
||||
|
||||
def _propose_for_klub(row: dict) -> dict:
|
||||
naziv = row.get('naziv') or ''
|
||||
primary = row.get('web') or row.get('web_stranica') or row.get('source_url') or row.get('scrape_url')
|
||||
# Only consider HTTP(S) URLs as valid primary sources — skip placeholder strings like 'godisnjak_2025'
|
||||
raw_primary = row.get('web') or row.get('web_stranica') or row.get('source_url') or row.get('scrape_url')
|
||||
primary = raw_primary if (raw_primary and isinstance(raw_primary, str) and raw_primary.startswith(('http://','https://'))) else None
|
||||
sources, evidence = [], []
|
||||
tokens_pre = _name_tokens(naziv)
|
||||
pdoc = _fetch_primary_site(primary) if primary else None
|
||||
if not pdoc:
|
||||
# No valid web in DB — try to guess domain from club name
|
||||
pdoc = _guess_klub_domains(naziv, tokens_pre)
|
||||
if pdoc:
|
||||
# Also fetch subpages for richer evidence
|
||||
sub = _scrape_klub_subpages(pdoc.get('url',''), tokens_pre)
|
||||
if sub:
|
||||
pdoc['raw_text'] = (pdoc.get('raw_text','') + '\n\n' + sub)[:120000]
|
||||
elif pdoc:
|
||||
# Have primary site — also fetch its subpages
|
||||
sub = _scrape_klub_subpages(pdoc.get('url') or primary, tokens_pre)
|
||||
if sub:
|
||||
pdoc['raw_text'] = (pdoc.get('raw_text','') + '\n\n' + sub)[:120000]
|
||||
if pdoc: sources.append(pdoc); evidence.append(pdoc.get('raw_text') or pdoc.get('extract') or '')
|
||||
wiki = _wiki_summary(naziv)
|
||||
if wiki: sources.append(wiki); evidence.append(wiki.get('extract') or '')
|
||||
@@ -1121,7 +1254,7 @@ def enrich_preview(kind: str = _FPath(..., regex='^(klub|savez|sportas)$'), eid:
|
||||
'coverage': coverage, 'filled_fields': filled, 'total_fields': len(keys),
|
||||
'missing_fields': missing,
|
||||
'live_snippet': _fetch_title(primary) if primary else None,
|
||||
'research_links': _research_links(naziv, kind, grad, sport=row.get('sport')),
|
||||
'research_links': _research_links(naziv, kind, grad, sport=row.get('sport'), row=row),
|
||||
'sport': row.get('sport'),
|
||||
'sport_federation': (lambda f: {
|
||||
'national': (f.get('national') or {}).get('name') if f else None,
|
||||
|
||||
+13
-3
@@ -265,7 +265,7 @@ table tbody tr:hover{background:var(--bg3)}
|
||||
<div class="app">
|
||||
<aside class="sb" id="sb">
|
||||
<div class="sb-h">
|
||||
<div class="logo">PGŽ <span class="g">SPORT</span></div>
|
||||
<a href="/" class="logo" style="text-decoration:none;color:inherit;cursor:pointer" title="Početna"><span style="font-weight:800;letter-spacing:.5px">PGŽ</span> <span class="g">SPORT</span></a>
|
||||
<div class="sub" id="role-sub">Operativna aplikacija</div>
|
||||
<div class="sb-toggle" id="sb-toggle" onclick="toggleSidebar()" title="Skupi/raširi sidebar">≡</div>
|
||||
</div>
|
||||
@@ -449,7 +449,7 @@ function applyMeToHeader(){
|
||||
$('#user-role-label')?.replaceChildren(document.createTextNode(roleLabel));
|
||||
// Avatar topbar
|
||||
if(me.avatar_url){
|
||||
$('#user-av').innerHTML = `<img src="${esc(me.avatar_url)}" alt="">`;
|
||||
$('#user-av').innerHTML = `<img src="${esc(me.avatar_url)}${me.avatar_url.includes('?')?'&':'?'}t=${Date.now()}" alt="">`;
|
||||
} else if(me.google_picture){
|
||||
$('#user-av').innerHTML = `<img src="${esc(me.google_picture)}" alt="">`;
|
||||
} else {
|
||||
@@ -459,7 +459,7 @@ function applyMeToHeader(){
|
||||
if($('#sf-name')) $('#sf-name').textContent = name;
|
||||
if($('#sf-role')) $('#sf-role').textContent = roleLabel;
|
||||
if($('#sf-av')){
|
||||
if(me.avatar_url) $('#sf-av').innerHTML = `<img src="${esc(me.avatar_url)}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`;
|
||||
if(me.avatar_url) $('#sf-av').innerHTML = `<img src="${esc(me.avatar_url)}${me.avatar_url.includes('?')?'&':'?'}t=${Date.now()}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`;
|
||||
else if(me.google_picture) $('#sf-av').innerHTML = `<img src="${esc(me.google_picture)}" alt="" style="width:100%;height:100%;object-fit:cover;border-radius:50%">`;
|
||||
else $('#sf-av').textContent = initials(name);
|
||||
}
|
||||
@@ -856,6 +856,16 @@ async function onAvatarPick(input){
|
||||
input.value = '';
|
||||
if(r && r.avatar_url){
|
||||
if(_state.me) _state.me.avatar_url = r.avatar_url;
|
||||
// Update localStorage so other pages (sport2.html footer, sidebar) see new avatar
|
||||
try{
|
||||
const stored = localStorage.getItem('pgz_user') || sessionStorage.getItem('pgz_user');
|
||||
if(stored){
|
||||
const u = JSON.parse(stored);
|
||||
u.avatar_url = r.avatar_url;
|
||||
if(localStorage.getItem('pgz_user')) localStorage.setItem('pgz_user', JSON.stringify(u));
|
||||
else sessionStorage.setItem('pgz_user', JSON.stringify(u));
|
||||
}
|
||||
}catch(e){console.warn('avatar storage update failed', e);}
|
||||
applyMeToHeader();
|
||||
loadSection(); // re-render profile
|
||||
} else {
|
||||
|
||||
+7
-1
@@ -232,7 +232,7 @@ a.tag:hover,.tag[onclick]:hover{transform:translateY(-1px);filter:brightness(1.1
|
||||
<div class="app">
|
||||
<aside class="sb" id="sb">
|
||||
<div class="sb-h">
|
||||
<div class="logo">PGŽ <span class="g">SPORT</span></div>
|
||||
<a href="/" class="logo" style="text-decoration:none;color:inherit;cursor:pointer" title="Početna"><span style="font-weight:800;letter-spacing:.5px">PGŽ</span> <span class="g">SPORT</span></a>
|
||||
<div class="sub">Primorsko-goranska županija</div>
|
||||
<div class="sb-toggle" id="sb-toggle" onclick="toggleSidebar()" title="Skupi/raširi sidebar">≡</div>
|
||||
</div>
|
||||
@@ -1349,6 +1349,12 @@ async function openKlub(id){
|
||||
<div class="k">Osnovan</div><div class="v">${txt(k.godina_osnutka)}</div>
|
||||
<div class="k">Nositelj kvalitete</div><div class="v">${k.nositelj_kvalitete?'<span class="tag gd">DA</span>':'<span class="tag">NE</span>'}</div>
|
||||
</div>
|
||||
<div style="margin-top:14px;display:flex;gap:8px;flex-wrap:wrap">
|
||||
<a class="btn primary" onclick="switchKlubTab(document.querySelector('.tab[onclick*=k-clan]'),'k-clan')" style="cursor:pointer;display:inline-flex;align-items:center;gap:6px">
|
||||
👥 Vidi sportaše ovog kluba (${clanovi.length})
|
||||
</a>
|
||||
${(k.web||k.web_stranica) ? '<a class="btn" href="'+esc(k.web||k.web_stranica)+'" target="_blank" style="display:inline-flex;align-items:center;gap:6px">🌐 Službena stranica</a>' : ''}
|
||||
</div>
|
||||
${k.napomena ? '<div class="card" style="margin-top:14px"><div class="card-t" style="margin-bottom:6px">Napomena</div><div style="font-size:12px;color:var(--t1);line-height:1.5">'+esc(k.napomena)+'</div></div>' : ''}
|
||||
</div>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user