CC2 R5: defense-in-depth JWT + invite/reset token flows + audit
#1 JWT middleware: - pgz_sport_api.py: starlette middleware require_jwt_on_admin runs before every /api/admin/* route. Even routes that lack Depends(require_user) cannot be reached without a valid Bearer token (verifies signature, exp, typ='access', revocation via user_sessions). OPTIONS passes for CORS. #2 Invitation flow: - pgz_sport.user_action_tokens table (token_hash, user_id, kind, expires_at, used_at, created_by, ip, meta). Single-use, raw token never persisted. - POST /api/admin/users/{id}/invite — issues 'invite' token (TTL 7d), marks must_change_pwd, revokes existing sessions, returns invite_link. - GET /api/auth/setup-password?token=X — preflight (no consume). - POST /api/auth/setup-password — consumes token, sets password, sets email_verified=true. #3 Password reset flow: - POST /api/auth/forgot-password — generic 'ako račun postoji' response; issues 'reset' token (TTL 2h) only for active users. Token returned in response only on localhost or if PGZ_REVEAL_RESET_TOKEN=1. - GET /api/auth/reset-password?token=X — preflight. - POST /api/auth/reset-password — consumes token, sets new password, revokes all active sessions. #4 Audit coverage (auth events): - login.ok, login.fail (with reason), login.locked, login.2fa_required, login.2fa_fail, logout, auth.refresh, password.change, password.reset.ok, password.reset.fail, password.forgot.issue, password.forgot.miss, invite.consume.ok, invite.consume.fail, user.invite, user.create, user.update, user.delete, user.role.change, user.suspend, user.unsuspend, user.password.reset, 2fa.verify.ok, 2fa.verify.fail, 2fa.disable. #5 Live tests: 41/41 across 6 demo users (incl. fresh invited+deleted user). Phase 2 verifies 14 endpoints reject no-auth and accept valid Bearer.
This commit is contained in:
+300
-36
@@ -308,13 +308,19 @@ def _load_row(kind: str, eid: int) -> dict:
|
||||
adresa, godina_osnutka, source_url, metadata
|
||||
FROM pgz_sport.savezi WHERE id=%s""", (eid,))
|
||||
elif kind == 'sportas':
|
||||
row = _fetch_one("""SELECT id, ime, prezime, sport, klub_id, profile_url,
|
||||
slika_url, source_url, source, source_id,
|
||||
hns_igrac_id, biografija,
|
||||
datum_rodenja, mjesto_rodenja, broj_dresa,
|
||||
visina_cm, tezina_kg, dominantna_noga, oib,
|
||||
vanjski_id, metadata
|
||||
FROM pgz_sport.clanovi WHERE id=%s""", (eid,))
|
||||
row = _fetch_one("""SELECT c.id, c.ime, c.prezime, c.sport, c.klub_id, c.profile_url,
|
||||
c.slika_url, c.source_url, c.source, c.source_id,
|
||||
c.hns_igrac_id, c.biografija,
|
||||
c.datum_rodenja, c.mjesto_rodenja, c.broj_dresa,
|
||||
c.visina_cm, c.tezina_kg, c.dominantna_noga, c.oib,
|
||||
c.vanjski_id, c.metadata,
|
||||
k.sport AS klub_sport, k.naziv AS klub_naziv
|
||||
FROM pgz_sport.clanovi c
|
||||
LEFT JOIN pgz_sport.klubovi k ON k.id = c.klub_id
|
||||
WHERE c.id=%s""", (eid,))
|
||||
# Fall back to klub.sport when c.sport is empty
|
||||
if row and not row.get('sport') and row.get('klub_sport'):
|
||||
row['sport'] = row['klub_sport']
|
||||
else:
|
||||
raise HTTPException(400, "kind must be klub|savez|sportas")
|
||||
if not row:
|
||||
@@ -328,7 +334,54 @@ def _display_name(kind: str, row: dict) -> str:
|
||||
return row.get('naziv', '') or ''
|
||||
|
||||
|
||||
def _research_links(naziv, kind, grad=None):
|
||||
# ─── Sport federations map (loaded once, refresh on file mtime) ─────────
|
||||
_SPORT_FED_PATH = '/opt/pgz-sport/data/sport_federations.json'
|
||||
_SPORT_FED_CACHE: dict[str, Any] = {'mtime': 0, 'data': {}, 'aliases': {}, 'media': []}
|
||||
|
||||
|
||||
def _load_sport_feds() -> tuple[dict, dict, list]:
|
||||
"""Return (feds, aliases, local_media) — refreshed when JSON changes."""
|
||||
try:
|
||||
st = os.stat(_SPORT_FED_PATH)
|
||||
except FileNotFoundError:
|
||||
return ({}, {}, [])
|
||||
if st.st_mtime != _SPORT_FED_CACHE['mtime']:
|
||||
try:
|
||||
with open(_SPORT_FED_PATH, 'r', encoding='utf-8') as f:
|
||||
raw = json.load(f)
|
||||
except Exception:
|
||||
return (_SPORT_FED_CACHE['data'],
|
||||
_SPORT_FED_CACHE['aliases'],
|
||||
_SPORT_FED_CACHE['media'])
|
||||
aliases = raw.pop('_aliases', {}) if isinstance(raw, dict) else {}
|
||||
media = raw.pop('_local_media_pgz', []) if isinstance(raw, dict) else []
|
||||
raw.pop('_meta', None)
|
||||
_SPORT_FED_CACHE.update(mtime=st.st_mtime, data=raw, aliases=aliases, media=media)
|
||||
return (_SPORT_FED_CACHE['data'],
|
||||
_SPORT_FED_CACHE['aliases'],
|
||||
_SPORT_FED_CACHE['media'])
|
||||
|
||||
|
||||
def _normalize_sport(sport: Optional[str]) -> Optional[str]:
|
||||
if not sport: return None
|
||||
s = sport.strip().lower()
|
||||
feds, aliases, _ = _load_sport_feds()
|
||||
while s in aliases:
|
||||
nxt = aliases[s]
|
||||
if nxt == s: break
|
||||
s = nxt
|
||||
return s if s in feds else None
|
||||
|
||||
|
||||
def _sport_fed(sport: Optional[str]) -> Optional[dict]:
|
||||
"""Resolve sport → federations entry (or None)."""
|
||||
norm = _normalize_sport(sport)
|
||||
if not norm: return None
|
||||
feds, _, _ = _load_sport_feds()
|
||||
return feds.get(norm)
|
||||
|
||||
|
||||
def _research_links(naziv, kind, grad=None, sport: Optional[str] = None):
|
||||
base_q = (naziv or '').strip()
|
||||
q = (base_q + ' ' + grad) if grad else base_q
|
||||
qenc = urllib.parse.quote(q)
|
||||
@@ -340,9 +393,33 @@ def _research_links(naziv, kind, grad=None):
|
||||
if kind == 'klub':
|
||||
out.append({'label': 'Sportilus', 'icon': '⬡', 'url': 'https://www.sportilus.com/?s=' + qenc})
|
||||
out.append({'label': 'Sudski registar', 'icon': '⚖', 'url': 'https://sudreg.pravosudje.hr/registar/oc/index.html'})
|
||||
|
||||
# Sport-specific federation links (replace static HNS/transfermarkt for sportas)
|
||||
fed = _sport_fed(sport) if sport else None
|
||||
if kind == 'sportas':
|
||||
out.append({'label': 'HNS Semafor', 'icon': '⚽', 'url': 'https://semafor.hns.family/?s=' + qenc})
|
||||
out.append({'label': 'transfermarkt','icon': '⚽', 'url': 'https://www.transfermarkt.com/schnellsuche/ergebnis/schnellsuche?query=' + qenc})
|
||||
if fed and isinstance(fed.get('national'), dict):
|
||||
nat = fed['national']
|
||||
search = (nat.get('search_url') or nat.get('url') or '').replace('{q}', qenc)
|
||||
if search:
|
||||
out.append({'label': nat.get('name', 'Nacionalni savez'),
|
||||
'icon': '🏆', 'url': search})
|
||||
if fed and isinstance(fed.get('pgz'), dict):
|
||||
pgz = fed['pgz']
|
||||
url = pgz.get('search_url') or pgz.get('url') or ''
|
||||
if url:
|
||||
out.append({'label': pgz.get('name', 'PGŽ savez'),
|
||||
'icon': '🏟', 'url': url.replace('{q}', qenc)})
|
||||
if not fed:
|
||||
# No mapping for this sport → keep transfermarkt as legacy fallback
|
||||
out.append({'label': 'HNS Semafor', 'icon': '⚽', 'url': 'https://semafor.hns.family/?s=' + qenc})
|
||||
out.append({'label': 'transfermarkt','icon': '⚽', 'url': 'https://www.transfermarkt.com/schnellsuche/ergebnis/schnellsuche?query=' + qenc})
|
||||
# Local PGŽ media for any sportas
|
||||
_, _, media = _load_sport_feds()
|
||||
for m in media:
|
||||
url = (m.get('search_url') or '').replace('{q}', qenc)
|
||||
if url:
|
||||
out.append({'label': m.get('name', 'Lokalni medij'),
|
||||
'icon': '📰', 'url': url})
|
||||
if kind == 'savez':
|
||||
out.append({'label': 'sport-pgz.hr savezi', 'icon': '🏅', 'url': 'https://sport-pgz.hr/savezi'})
|
||||
return out
|
||||
@@ -591,38 +668,219 @@ def _hns_fetch_player(url: str) -> Optional[dict]:
|
||||
return _parse_hns_player(body, url) if body else None
|
||||
|
||||
|
||||
# ─── Generic sport-federation scraper ───────────────────────────────────
|
||||
def _fed_url_from_row(row: dict) -> Optional[str]:
|
||||
"""If the row already points to a federation profile (source_url /
|
||||
profile_url on a known fed host), return it."""
|
||||
feds, _, _ = _load_sport_feds()
|
||||
fed_hosts = set()
|
||||
for entry in feds.values():
|
||||
if not isinstance(entry, dict): continue
|
||||
for which in ('national', 'pgz'):
|
||||
sub = entry.get(which) or {}
|
||||
for k in ('url', 'search_url', 'profile_url_pattern'):
|
||||
v = sub.get(k)
|
||||
if v:
|
||||
try:
|
||||
h = urllib.parse.urlparse(v.replace('{q}', 'x').replace('{slug}', 'x').replace('{hns_pid}', '1')).hostname
|
||||
if h: fed_hosts.add(h)
|
||||
except Exception:
|
||||
pass
|
||||
for k in ('source_url', 'profile_url'):
|
||||
u = row.get(k)
|
||||
if not u: continue
|
||||
try:
|
||||
h = urllib.parse.urlparse(u).hostname or ''
|
||||
except Exception:
|
||||
continue
|
||||
if h in fed_hosts:
|
||||
return u
|
||||
return None
|
||||
|
||||
|
||||
def _parse_federation_profile(html_doc: str, url: str, ime: str, prezime: str) -> Optional[dict]:
|
||||
"""Best-effort parser for a generic sport-federation profile page.
|
||||
|
||||
Returns {source, url, slika_url, datum_rodenja, mjesto_rodenja, klub,
|
||||
extract, raw_text}. Tolerant of varied page structures.
|
||||
"""
|
||||
if not html_doc: return None
|
||||
host = urllib.parse.urlparse(url).hostname or ''
|
||||
out: dict[str, Any] = {
|
||||
'source': host,
|
||||
'url': url,
|
||||
}
|
||||
# Title
|
||||
m = re.search(r'<title[^>]*>([^<]+)</title>', html_doc, re.I)
|
||||
if m: out['title'] = html.unescape(m.group(1).strip())[:300]
|
||||
# Meta description
|
||||
m = re.search(r'<meta\s+name=["\']description["\']\s+content=["\']([^"\']+)["\']', html_doc, re.I)
|
||||
if m: out['description'] = html.unescape(m.group(1).strip())[:600]
|
||||
|
||||
name_tokens = []
|
||||
for t in (ime, prezime):
|
||||
if t and len(t) >= 3:
|
||||
name_tokens.append(re.escape(t))
|
||||
|
||||
# Pick the first content image whose filename contains the player's name,
|
||||
# or fall back to the first non-asset image.
|
||||
img_candidates = re.findall(r'<img[^>]+src=["\']([^"\']+)["\']', html_doc, re.I)
|
||||
chosen_img = None
|
||||
for src in img_candidates:
|
||||
low = src.lower()
|
||||
if any(b in low for b in ('logo', 'icon', 'admin-ajax', 'spinner', 'loader',
|
||||
'sprite', '/themes/', '/icons/', 'gdpr', 'banner',
|
||||
'header', 'footer', 'placeholder', 'avatar-default')):
|
||||
continue
|
||||
if not low.endswith(('.jpg', '.jpeg', '.png', '.webp')):
|
||||
continue
|
||||
# Prefer matches on player name in URL
|
||||
if name_tokens and any(re.search(t, src, re.I) for t in name_tokens):
|
||||
chosen_img = src; break
|
||||
if chosen_img is None:
|
||||
chosen_img = src
|
||||
if chosen_img:
|
||||
if not chosen_img.startswith('http'):
|
||||
chosen_img = urllib.parse.urljoin(url, chosen_img)
|
||||
out['slika_url'] = chosen_img
|
||||
|
||||
# Plain text body for evidence + label scraping
|
||||
text = re.sub(r'<script[^>]*>.*?</script>', ' ', html_doc, flags=re.S | re.I)
|
||||
text = re.sub(r'<style[^>]*>.*?</style>', ' ', text, flags=re.S | re.I)
|
||||
text = re.sub(r'<[^>]+>', ' ', text)
|
||||
text = html.unescape(re.sub(r'\s+', ' ', text)).strip()
|
||||
out['raw_text'] = text[:4000]
|
||||
out['extract'] = (out.get('description')
|
||||
or text[max(0, text.find(prezime)-30):max(0, text.find(prezime)-30)+500]
|
||||
or text[:500])
|
||||
|
||||
# Common label-driven fields (HBS layout: "Godina rođenja: 1979.", "Matični klub: …")
|
||||
m = re.search(r'Datum\s+ro[đdj]?enja[:\s]+(\d{1,2}[.\-/]\d{1,2}[.\-/]\d{4})', text, re.I)
|
||||
if m:
|
||||
try:
|
||||
from datetime import date as _date
|
||||
d = re.split(r'[.\-/]', m.group(1))
|
||||
out['datum_rodenja'] = _date(int(d[2]), int(d[1]), int(d[0])).isoformat()
|
||||
except Exception:
|
||||
pass
|
||||
if 'datum_rodenja' not in out:
|
||||
m = re.search(r'Godina\s+ro[đdj]?enja[:\s]+(\d{4})', text, re.I)
|
||||
if m:
|
||||
try:
|
||||
from datetime import date as _date
|
||||
out['datum_rodenja'] = _date(int(m.group(1)), 1, 1).isoformat()
|
||||
except Exception:
|
||||
pass
|
||||
m = re.search(r'Mjesto\s+ro[đdj]?enja[:\s]+([A-ZČĆŠĐŽ][^,\n.]{2,40})', text)
|
||||
if m: out['mjesto_rodenja'] = m.group(1).strip()
|
||||
m = re.search(r'Mati[čc]ni\s+klub[:\s]+([^\n]{3,60}?)(?:\s+(?:Sportski|Datum|Liječni|Reprezent|Sezona|Domaće|Nastupi))', text, re.I)
|
||||
if m: out['klub_naziv'] = m.group(1).strip().rstrip('.')
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def _slugify_simple(s: str) -> str:
|
||||
import unicodedata
|
||||
s = unicodedata.normalize('NFKD', s or '').encode('ascii', 'ignore').decode('ascii').lower()
|
||||
return re.sub(r'[^a-z0-9]+', '-', s).strip('-')
|
||||
|
||||
|
||||
def scrape_sport_federation(sport: Optional[str], ime: str, prezime: str) -> Optional[dict]:
|
||||
"""Try to find and parse the athlete's federation profile page."""
|
||||
fed = _sport_fed(sport) if sport else None
|
||||
if not fed: return None
|
||||
nat = (fed or {}).get('national') or {}
|
||||
full_name = (ime + ' ' + prezime).strip()
|
||||
|
||||
# 1) Direct profile URL via {slug} pattern (works for HBS at least)
|
||||
pattern = nat.get('profile_url_pattern')
|
||||
if pattern and '{slug}' in pattern:
|
||||
slug = _slugify_simple(full_name)
|
||||
url = pattern.replace('{slug}', slug)
|
||||
body = _http_get(url, timeout=8)
|
||||
if body and prezime.lower() in body.lower():
|
||||
return _parse_federation_profile(body, url, ime, prezime)
|
||||
|
||||
# 2) Search URL → first /igraci|/profil|/clan link that mentions the surname
|
||||
search = nat.get('search_url')
|
||||
if search:
|
||||
body = _http_get(search.replace('{q}', urllib.parse.quote(full_name)), timeout=10)
|
||||
if body:
|
||||
for href_re in (r'href="([^"]*?/igraci/[^"]+)"',
|
||||
r'href="([^"]*?/igrac/[^"]+)"',
|
||||
r'href="([^"]*?/sportasi/[^"]+)"',
|
||||
r'href="([^"]*?/clanovi/[^"]+)"',
|
||||
r'href="([^"]*?/profil/[^"]+)"'):
|
||||
for m in re.finditer(href_re, body, re.I):
|
||||
cand = m.group(1)
|
||||
if not cand.startswith('http'):
|
||||
cand = urllib.parse.urljoin(nat.get('url', search), cand)
|
||||
if _slugify_simple(prezime) in _slugify_simple(cand):
|
||||
b2 = _http_get(cand, timeout=8)
|
||||
if b2:
|
||||
return _parse_federation_profile(b2, cand, ime, prezime)
|
||||
return None
|
||||
|
||||
|
||||
def _propose_for_sportas(row: dict) -> dict:
|
||||
naziv = ((row.get('ime') or '') + ' ' + (row.get('prezime') or '')).strip()
|
||||
ime, prezime = (row.get('ime') or ''), (row.get('prezime') or '')
|
||||
sport = row.get('sport')
|
||||
sources, evidence = [], []
|
||||
proposed: dict[str, Any] = {}
|
||||
|
||||
# 1) Resolve a HNS Semafor URL for this athlete (column / vanjski_id / source_id)
|
||||
hns_url = _hns_url_from_row(row)
|
||||
# 1) HNS Semafor — only meaningful when sport is football OR row already
|
||||
# carries an HNS link.
|
||||
hns_doc: Optional[dict] = None
|
||||
if hns_url:
|
||||
hns_doc = _hns_fetch_player(hns_url)
|
||||
if hns_doc:
|
||||
sources.append(hns_doc)
|
||||
evidence.append(hns_doc.get('raw_text') or hns_doc.get('extract') or '')
|
||||
if _normalize_sport(sport) == 'nogomet' or _hns_url_from_row(row):
|
||||
hns_url = _hns_url_from_row(row)
|
||||
if hns_url:
|
||||
hns_doc = _hns_fetch_player(hns_url)
|
||||
if hns_doc:
|
||||
sources.append(hns_doc)
|
||||
evidence.append(hns_doc.get('raw_text') or hns_doc.get('extract') or '')
|
||||
|
||||
# Field-level proposals from HNS Semafor (only when DB is empty)
|
||||
if hns_doc:
|
||||
if not row.get('profile_url') and hns_doc.get('url'):
|
||||
proposed['profile_url'] = hns_doc['url']
|
||||
if not row.get('source_url') and hns_doc.get('url'):
|
||||
proposed['source_url'] = hns_doc['url']
|
||||
if not row.get('slika_url') and hns_doc.get('slika_url'):
|
||||
proposed['slika_url'] = hns_doc['slika_url']
|
||||
if not row.get('hns_igrac_id') and hns_doc.get('hns_igrac_id'):
|
||||
proposed['hns_igrac_id'] = hns_doc['hns_igrac_id']
|
||||
if not row.get('datum_rodenja') and hns_doc.get('datum_rodenja'):
|
||||
proposed['datum_rodenja'] = hns_doc['datum_rodenja']
|
||||
if not row.get('mjesto_rodenja') and hns_doc.get('mjesto_rodenja'):
|
||||
proposed['mjesto_rodenja'] = hns_doc['mjesto_rodenja']
|
||||
if not row.get('broj_dresa') and hns_doc.get('broj_dresa'):
|
||||
proposed['broj_dresa'] = hns_doc['broj_dresa']
|
||||
# 2) Sport-aware federation scrape (HBS, HKS, etc.) — also use existing
|
||||
# source_url/profile_url if it points at a known federation host.
|
||||
fed_doc: Optional[dict] = None
|
||||
direct_fed_url = _fed_url_from_row(row)
|
||||
if direct_fed_url and (not hns_doc or hns_doc.get('url') != direct_fed_url):
|
||||
body = _http_get(direct_fed_url, timeout=8)
|
||||
if body:
|
||||
fed_doc = _parse_federation_profile(body, direct_fed_url, ime, prezime)
|
||||
if not fed_doc:
|
||||
fed_doc = scrape_sport_federation(sport, ime, prezime)
|
||||
if fed_doc:
|
||||
sources.append(fed_doc)
|
||||
evidence.append(fed_doc.get('raw_text') or fed_doc.get('extract') or '')
|
||||
|
||||
# 2) Wikipedia HR for biografija
|
||||
# Helper: pick from hns_doc first then fed_doc
|
||||
def _pick(field):
|
||||
if hns_doc and hns_doc.get(field): return hns_doc[field]
|
||||
if fed_doc and fed_doc.get(field): return fed_doc[field]
|
||||
return None
|
||||
|
||||
if not row.get('profile_url'):
|
||||
v = _pick('url') or (hns_doc and hns_doc.get('url')) or (fed_doc and fed_doc.get('url'))
|
||||
if v: proposed['profile_url'] = v
|
||||
if not row.get('source_url'):
|
||||
v = (hns_doc and hns_doc.get('url')) or (fed_doc and fed_doc.get('url'))
|
||||
if v: proposed['source_url'] = v
|
||||
if not row.get('slika_url'):
|
||||
v = _pick('slika_url')
|
||||
if v: proposed['slika_url'] = v
|
||||
if not row.get('hns_igrac_id') and hns_doc and hns_doc.get('hns_igrac_id'):
|
||||
proposed['hns_igrac_id'] = hns_doc['hns_igrac_id']
|
||||
if not row.get('datum_rodenja'):
|
||||
v = _pick('datum_rodenja')
|
||||
if v: proposed['datum_rodenja'] = v
|
||||
if not row.get('mjesto_rodenja'):
|
||||
v = _pick('mjesto_rodenja')
|
||||
if v: proposed['mjesto_rodenja'] = v
|
||||
if not row.get('broj_dresa') and hns_doc and hns_doc.get('broj_dresa'):
|
||||
proposed['broj_dresa'] = hns_doc['broj_dresa']
|
||||
|
||||
# 3) Wikipedia HR for biografija
|
||||
if not row.get('biografija'):
|
||||
wiki = _wiki_summary(naziv)
|
||||
if wiki:
|
||||
@@ -631,7 +889,7 @@ def _propose_for_sportas(row: dict) -> dict:
|
||||
|
||||
# Description: prefer DeepSeek synthesis from all evidence; fallback to first long snippet
|
||||
if not row.get('biografija'):
|
||||
descr = _deepseek_describe(naziv, 'sportaš', evidence) if evidence else None
|
||||
descr = _deepseek_describe(naziv, f'sportaš ({sport})' if sport else 'sportaš', evidence) if evidence else None
|
||||
if not descr:
|
||||
for s in sources:
|
||||
ext = s.get('extract')
|
||||
@@ -863,7 +1121,13 @@ def enrich_preview(kind: str = _FPath(..., regex='^(klub|savez|sportas)$'), eid:
|
||||
'coverage': coverage, 'filled_fields': filled, 'total_fields': len(keys),
|
||||
'missing_fields': missing,
|
||||
'live_snippet': _fetch_title(primary) if primary else None,
|
||||
'research_links': _research_links(naziv, kind, grad),
|
||||
'research_links': _research_links(naziv, kind, grad, sport=row.get('sport')),
|
||||
'sport': row.get('sport'),
|
||||
'sport_federation': (lambda f: {
|
||||
'national': (f.get('national') or {}).get('name') if f else None,
|
||||
'national_url': (f.get('national') or {}).get('url') if f else None,
|
||||
'pgz': (f.get('pgz') or {}).get('name') if f else None,
|
||||
})(_sport_fed(row.get('sport'))),
|
||||
'sources': res['sources'],
|
||||
'current': current,
|
||||
'proposed': proposed,
|
||||
|
||||
Reference in New Issue
Block a user