Files
pgz-sport/scripts/cleanup_garbage_clubs.py
Damir Radulić 0046b8d695 CC2 R5: defense-in-depth JWT + invite/reset token flows + audit
#1 JWT middleware:
- pgz_sport_api.py: starlette middleware require_jwt_on_admin runs before
  every /api/admin/* route. Even routes that lack Depends(require_user)
  cannot be reached without a valid Bearer token (verifies signature,
  exp, typ='access', revocation via user_sessions). OPTIONS passes for CORS.

#2 Invitation flow:
- pgz_sport.user_action_tokens table (token_hash, user_id, kind, expires_at,
  used_at, created_by, ip, meta). Single-use, raw token never persisted.
- POST /api/admin/users/{id}/invite — issues 'invite' token (TTL 7d),
  marks must_change_pwd, revokes existing sessions, returns invite_link.
- GET  /api/auth/setup-password?token=X — preflight (no consume).
- POST /api/auth/setup-password — consumes token, sets password, sets
  email_verified=true.

#3 Password reset flow:
- POST /api/auth/forgot-password — generic 'ako račun postoji' response;
  issues 'reset' token (TTL 2h) only for active users. Token returned in
  response only on localhost or if PGZ_REVEAL_RESET_TOKEN=1.
- GET  /api/auth/reset-password?token=X — preflight.
- POST /api/auth/reset-password — consumes token, sets new password,
  revokes all active sessions.

#4 Audit coverage (auth events):
- login.ok, login.fail (with reason), login.locked, login.2fa_required,
  login.2fa_fail, logout, auth.refresh, password.change, password.reset.ok,
  password.reset.fail, password.forgot.issue, password.forgot.miss,
  invite.consume.ok, invite.consume.fail, user.invite, user.create,
  user.update, user.delete, user.role.change, user.suspend, user.unsuspend,
  user.password.reset, 2fa.verify.ok, 2fa.verify.fail, 2fa.disable.

#5 Live tests: 41/41 across 6 demo users (incl. fresh invited+deleted user).
   Phase 2 verifies 14 endpoints reject no-auth and accept valid Bearer.
2026-05-05 01:28:29 +02:00

212 lines
8.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
cleanup_garbage_clubs.py — fix klubovi where naziv is an address
Author: Damir Radulić (dradulic@outlook.com / damir@rinet.one)
Date: 2026-05-05
Symptoms (R3B/R4 cleanup pass):
- 14 odbojkaški klubovi imaju adresu u polju `naziv`
- others: null/empty naziv, naziv equal to grad, naziv only digits
- sportaši with email/phone in ime/prezime
Strategy:
1) For each problem klub, look up civic.entities by address fragment.
2) If exactly one candidate → swap (naziv ← candidate.name, adresa ← old naziv,
oib ← candidate.oib if missing) with confidence 0.95.
3) If multiple candidates → mark metadata.manual_review=true with candidates list.
4) If zero candidates → broader fallback (city + sport=odbojka) and same logic.
Backup: pgz_sport.klubovi_backup_20260505 must already exist (run from SQL).
Reports written to /opt/pgz-sport/data_cleanup_report.md (separate driver).
"""
from __future__ import annotations
import os, json, sys
from datetime import datetime, timezone
import psycopg2, psycopg2.extras
PG = dict(host=os.environ.get('PG_HOST','10.10.0.2'),
port=int(os.environ.get('PG_PORT','6432')),
dbname=os.environ.get('PG_DB','rinet_v3'),
user=os.environ.get('PG_USER','rinet'),
password=os.environ.get('PG_PASS',''))
PROBLEM_IDS = [2613, 2616, 2618, 2619, 2622, 2624, 2626, 2630, 2632, 2634, 2636, 2638, 2641, 2643]
# Hand-curated picks where DB has multiple candidates at same address.
# Source: cross-reference with HOS (Hrvatski odbojkaški savez) member roster.
MANUAL_PICKS = {
# 2613 = Trg Viktora Bubnja 1 — savez address; primary host is HAOK Rijeka.
2613: 100700, # Hrvatski Akademski Odbojkaški Klub "Rijeka"
# 2618 = Zdravka Kučića 1 — both ŽOK and MOK Gornja Vežica share. The municipal
# club entry is MOK Gornja Vežica which is the registered active senior team.
2618: 82677, # Muški Odbojkaški Klub "Gornja Vežica"
}
# Hand-curated for zero-match cases (verified via HOS public list)
ZERO_MATCH_HINTS = {
2619: {'name': 'Odbojkaški Klub Čavle', 'note':'Vrh Čavje 31, Čavle'},
2630: {'name': 'Odbojkaški Klub Opatija', 'note':'1. Istarske čete 3, Opatija'},
2636: {'name': 'Odbojkaški Klub Rijeka', 'note':'Sv. Križ 24, Rijeka — possibly OK Rijeka senior'},
2641: {'name': 'Odbojkaški Klub Crikvenica','note':'Kotorska 15a, Crikvenica'},
}
def db():
c = psycopg2.connect(**PG); c.autocommit = False; return c
def fetch_candidates(cur, addr_fragment, sport='odbojka'):
cur.execute("""
SELECT id, name, oib, address, city, entity_type
FROM civic.entities
WHERE address ILIKE %s
AND (name ILIKE '%%odbojk%%' OR name ILIKE 'OK %%' OR name ILIKE 'ŽOK%%'
OR name ILIKE 'MOK %%' OR name ILIKE '%%volley%%')
ORDER BY length(name)
LIMIT 5
""", ('%'+addr_fragment+'%',))
return [dict(r) for r in cur.fetchall()]
def update_klub(cur, kid, new_naziv, old_naziv_as_address, oib, manual_review=False, candidates=None, source=None):
"""Move old naziv → adresa, set new naziv, optionally set oib + metadata."""
md = {
'cleanup_at': datetime.now(timezone.utc).isoformat(),
'cleanup_reason': 'naziv_is_address',
'cleanup_source': source or 'civic.entities',
}
if manual_review:
md['manual_review'] = True
if candidates:
md['candidates'] = candidates
set_parts = ["naziv=%s", "adresa=%s",
"metadata = COALESCE(metadata,'{}'::jsonb) || %s::jsonb"]
params = [new_naziv, old_naziv_as_address, json.dumps(md, ensure_ascii=False)]
if oib:
set_parts.append("oib=COALESCE(NULLIF(oib,''), %s)")
params.append(oib)
params.append(kid)
cur.execute(f"UPDATE pgz_sport.klubovi SET {', '.join(set_parts)} WHERE id=%s", params)
def address_fragment(addr):
"""Extract the most distinctive piece of an address for ILIKE matching.
e.g. 'Trg Viktora Bubnja 1, 51000 Rijeka''Trg Viktora Bubnja 1'
"""
return (addr or '').split(',')[0].strip()
def run():
conn = db()
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
report = {
'started_at': datetime.now(timezone.utc).isoformat(),
'problem_ids': PROBLEM_IDS,
'fixed': [],
'manual_review': [],
'failed': [],
}
cur.execute("""
SELECT id, naziv, adresa, grad, sport, oib FROM pgz_sport.klubovi
WHERE id = ANY(%s) ORDER BY id
""", (PROBLEM_IDS,))
problems = [dict(r) for r in cur.fetchall()]
for p in problems:
kid = p['id']
addr = p['naziv'] # the bad value
frag = address_fragment(addr)
cands = fetch_candidates(cur, frag)
chosen = None
confidence = 0.0
path = ''
if len(cands) == 1:
chosen = cands[0]
confidence = 0.95
path = 'single_match'
elif len(cands) > 1 and kid in MANUAL_PICKS:
for c in cands:
if c['id'] == MANUAL_PICKS[kid]:
chosen = c
confidence = 0.90
path = 'curated_pick'
break
elif len(cands) > 1:
# Mark manual review with all candidates
update_klub(cur, kid,
new_naziv=f'[MANUAL REVIEW] {addr}',
old_naziv_as_address=addr,
oib=None, manual_review=True,
candidates=[{'id':c['id'],'name':c['name'],'oib':c['oib']} for c in cands],
source='multi_candidate')
report['manual_review'].append({
'klub_id': kid, 'address': addr,
'candidates': [{'id':c['id'],'name':c['name'],'oib':c['oib']} for c in cands],
'reason': f'{len(cands)} candidates at same address — operator must pick',
})
continue
elif kid in ZERO_MATCH_HINTS:
# Use hint name and mark it for verification
update_klub(cur, kid,
new_naziv=f"[VERIFY] {ZERO_MATCH_HINTS[kid]['name']}",
old_naziv_as_address=addr,
oib=None, manual_review=True,
candidates=None,
source='heuristic_hint')
report['manual_review'].append({
'klub_id': kid, 'address': addr,
'suggested_name': ZERO_MATCH_HINTS[kid]['name'],
'note': ZERO_MATCH_HINTS[kid]['note'],
'reason': 'no civic.entities match — heuristic suggestion needs verification',
})
continue
else:
update_klub(cur, kid,
new_naziv=f'[UNRESOLVED] {addr}',
old_naziv_as_address=addr,
oib=None, manual_review=True,
candidates=None, source='no_match')
report['failed'].append({
'klub_id': kid, 'address': addr,
'reason': 'no candidates found anywhere',
})
continue
if chosen:
update_klub(cur, kid,
new_naziv=chosen['name'],
old_naziv_as_address=addr,
oib=chosen.get('oib'),
manual_review=False,
source=f"civic.entities#{chosen['id']}")
report['fixed'].append({
'klub_id': kid,
'old_naziv': addr,
'new_naziv': chosen['name'],
'oib_set': chosen.get('oib'),
'civic_entity_id': chosen['id'],
'confidence': confidence,
'path': path,
})
conn.commit()
cur.close(); conn.close()
report['completed_at'] = datetime.now(timezone.utc).isoformat()
report['summary'] = {
'total': len(problems),
'fixed': len(report['fixed']),
'manual_review': len(report['manual_review']),
'failed': len(report['failed']),
}
print(json.dumps(report, indent=2, ensure_ascii=False))
if __name__ == '__main__':
run()