Files

612 lines
25 KiB
Python
Executable File

#!/usr/bin/env python3
from dotenv import load_dotenv
load_dotenv('/opt/rinet-gpu/.env.master')
# auto-added by patch_scrapers_with_dotenv.sh
"""
HNS Semafor scraper for PGŽ football clubs.
Strategy:
1. Seed-map known PGŽ clubs to HNS COMET klub_id (manual list to start)
2. For each klub: fetch /klubovi/{id}/{slug}/ and extract roster (player list)
3. For each player: fetch /igraci/{id}/{slug}/ → store in clanovi + utakmice_log
4. Respect rate limit (1 req / 1.5s), record run in scraper_runs
Run modes:
python hns_semafor.py seed # set hns_klub_id for known clubs
python hns_semafor.py klub <db_klub_id> # scrape one klub roster + players
python hns_semafor.py player <hns_pid> # scrape one player
python hns_semafor.py daily # full daily harvest of seeded PGŽ clubs
"""
import os, re, sys, time, json, logging
from datetime import datetime, date
from urllib.parse import urljoin
import psycopg2
import psycopg2.extras
import requests
from bs4 import BeautifulSoup
DB = dict(host='localhost', port=5432, dbname='rinet_v3', user='rinet', password=os.environ["DB_PASSWORD"])
BASE = "https://semafor.hns.family"
UA = "PGZ-Sport-Bot/1.0 (+https://api.rinet.one/sport/; legitimni interes; analitika sporta PGZ)"
RATE_S = 1.6 # seconds between requests
TIMEOUT = 25
log = logging.getLogger("hns")
logging.basicConfig(
format="%(asctime)s [%(levelname)s] %(message)s",
level=logging.INFO,
handlers=[
logging.FileHandler("/opt/pgz-sport/_logs/hns_scraper.log"),
logging.StreamHandler(sys.stdout),
],
)
# ═══ Manual seed mapping — PGŽ klubovi → HNS COMET id ═══
# Discovered from semafor.hns.family/igraci/1167145/marko-komadina/ matches
SEED_MAP = {
# naziv → hns_klub_id
"NK Klana": 1569,
"NK Krk": 1558,
"NK Mune": 1576,
"NK Vihor": 4326,
"NK Doker": 107415,
"HNK Kozala": 3090,
"HNK Lovran": 1574,
"HNK Goranin": 1565,
"NK Risnjak": 1583,
"NK Lokomotiva": 1570,
"NK Omladinac Vrata": 1579,
"NK Draga": 1554,
"NK Zamet": 1589,
"NK Vrbovsko": 1588,
"NK Rikard Benčić": 1582,
"NK OŠK Omišalj": 3071,
}
def conn():
return psycopg2.connect(**DB)
def fetch(url: str) -> str:
log.info(f"GET {url}")
r = requests.get(url, headers={"User-Agent": UA}, timeout=TIMEOUT)
r.raise_for_status()
time.sleep(RATE_S)
return r.text
def slugify(s: str) -> str:
s = s.lower().strip()
s = re.sub(r'[čćš]', lambda m: {'č':'c','ć':'c','š':'s'}[m.group()], s)
s = re.sub(r'[žđ]', lambda m: {'ž':'z','đ':'d'}[m.group()], s)
s = re.sub(r'[^a-z0-9]+', '-', s).strip('-')
return s
def cmd_seed():
"""Map SEED_MAP to klubovi.hns_klub_id where naziv matches; auto-INSERT if missing."""
n_updated = 0; n_inserted = 0
with conn() as c:
cu = c.cursor()
for naziv, hns_id in SEED_MAP.items():
cu.execute("""UPDATE pgz_sport.klubovi
SET hns_klub_id=%s, hns_slug=%s, source_synced_at=now()
WHERE naziv ILIKE %s AND sport='nogomet'
AND (hns_klub_id IS NULL OR hns_klub_id=%s)""",
(hns_id, slugify(naziv), f"%{naziv}%", hns_id))
if cu.rowcount > 0:
n_updated += cu.rowcount
continue
# Try by hns_klub_id directly (already set elsewhere)
cu.execute("SELECT id FROM pgz_sport.klubovi WHERE hns_klub_id=%s", (hns_id,))
if cu.fetchone():
continue
# Insert new minimal row
cu.execute("""INSERT INTO pgz_sport.klubovi
(naziv, sport, razina, hns_klub_id, hns_slug, aktivan, region,
source_synced_at, napomena)
VALUES (%s,'nogomet','3.HRL',%s,%s,true,'PGŽ',now(),
'Auto-seeded from HNS Semafor (legitimni interes — analitika)')""",
(naziv, hns_id, slugify(naziv)))
n_inserted += 1
c.commit()
log.info(f"Seed: updated={n_updated}, inserted={n_inserted}")
return {"updated": n_updated, "inserted": n_inserted}
def parse_player_profile(hns_pid: int, html: str) -> dict:
"""Parse /igraci/{id}/{slug}/ → dict."""
soup = BeautifulSoup(html, 'html.parser')
data = {"hns_pid": hns_pid, "matches": []}
# Name in first <h1>
h1 = soup.find('h1')
if h1:
data['ime_prezime'] = h1.get_text(' ', strip=True)
# Photo
img = soup.find('img', alt=data.get('ime_prezime', ''))
if img and img.get('src'):
data['slika_url'] = img['src']
# Trenutni klub — find h4 with link (klub heading)
klub_link = soup.find('a', href=re.compile(r'/klubovi/(\d+)/'))
if klub_link:
m = re.search(r'/klubovi/(\d+)/', klub_link['href'])
if m: data['trenutni_klub_hns_id'] = int(m.group(1))
h = klub_link.find('h4')
if h: data['trenutni_klub'] = h.get_text(' ', strip=True)
# Datum rođenja - targetira <li class="dob"> direktno
li_dob = soup.find('li', class_='dob')
if li_dob:
h4 = li_dob.find('h4')
if h4:
t = h4.get_text(' ', strip=True)
data['datum_rodenja_raw'] = t
m = re.match(r'(\d{1,2})\.(\d{1,2})\.(\d{4})', t)
if m:
try:
d = m.groups()
data['datum_rodenja'] = date(int(d[2]), int(d[1]), int(d[0])).isoformat()
except Exception:
pass
# Mjesto rođenja - targetira <li class="pob">
li_pob = soup.find('li', class_='pob')
if li_pob:
h4_m = li_pob.find('h4')
if h4_m:
data['mjesto_rodenja'] = h4_m.get_text(strip=True)
# Stara fallback metoda - h4 followed by h3 "Mjesto rođenja"
for h3 in soup.find_all('h3'):
if 'Mjesto rođenja' in h3.get_text():
prev = h3.find_previous('h4')
if prev: data['mjesto_rodenja'] = prev.get_text(strip=True)
return data
def upsert_player(klub_id_db: int, prof: dict) -> int:
"""Upsert clanovi row from parsed profile, return clan_id."""
with conn() as c:
cu = c.cursor()
# Try find existing by source_id
cu.execute("""SELECT id FROM pgz_sport.clanovi
WHERE source='hns_semafor' AND source_id=%s""", (str(prof['hns_pid']),))
row = cu.fetchone()
ime, *prezime = (prof.get('ime_prezime','') or '').split(' ', 1)
prezime = prezime[0] if prezime else ''
url = f"{BASE}/igraci/{prof['hns_pid']}/{slugify(prof.get('ime_prezime',''))}/"
if row:
cid = row[0]
cu.execute("""UPDATE pgz_sport.clanovi
SET ime=%s, prezime=%s, datum_rodenja=%s, mjesto_rodenja=%s,
slika_url=%s, klub_id=%s, source_url=%s, source_synced_at=now()
WHERE id=%s""",
(ime, prezime, prof.get('datum_rodenja'), prof.get('mjesto_rodenja'),
prof.get('slika_url'), klub_id_db, url, cid))
else:
cu.execute("""INSERT INTO pgz_sport.clanovi
(klub_id, ime, prezime, datum_rodenja, mjesto_rodenja, slika_url,
source, source_id, source_url, source_synced_at, slug)
VALUES (%s,%s,%s,%s,%s,%s,'hns_semafor',%s,%s,now(),%s)
RETURNING id""",
(klub_id_db, ime, prezime, prof.get('datum_rodenja'),
prof.get('mjesto_rodenja'), prof.get('slika_url'),
str(prof['hns_pid']), url, slugify(prof.get('ime_prezime',''))))
cid = cu.fetchone()[0]
c.commit()
return cid
def cmd_player(hns_pid: int, klub_id_db: int = None):
"""Scrape a single player by HNS ID."""
if klub_id_db is None:
# try to infer from current klub via DB if previously stored
with conn() as c:
cu = c.cursor()
cu.execute("""SELECT klub_id FROM pgz_sport.clanovi
WHERE source='hns_semafor' AND source_id=%s""", (str(hns_pid),))
r = cu.fetchone()
if r: klub_id_db = r[0]
url = f"{BASE}/igraci/{hns_pid}/dummy/" # slug is forgiving; HNS redirects
html = fetch(url)
prof = parse_player_profile(hns_pid, html)
log.info(f"Parsed: {prof.get('ime_prezime','?')} (HNS#{hns_pid}) klub={prof.get('trenutni_klub','?')}")
# Resolve current_klub_hns_id → klub_id_db if not provided
if klub_id_db is None and prof.get('trenutni_klub_hns_id'):
with conn() as c:
cu = c.cursor()
cu.execute("SELECT id FROM pgz_sport.klubovi WHERE hns_klub_id=%s", (prof['trenutni_klub_hns_id'],))
r = cu.fetchone()
if r: klub_id_db = r[0]
if klub_id_db is None:
log.warning(f"No DB klub_id for HNS player {hns_pid} — skipping upsert")
return None
return upsert_player(klub_id_db, prof)
def cmd_daily():
"""Refresh seeded clubs and their rosters (pull from sample player). To be expanded."""
run_id = None
with conn() as c:
cu = c.cursor()
cu.execute("""INSERT INTO pgz_sport.scraper_runs (source, scope)
VALUES ('hns_semafor','daily') RETURNING id""")
run_id = cu.fetchone()[0]; c.commit()
inserted = 0; updated = 0; errors = []
try:
# Phase 1: ensure seed mapping is current
cmd_seed()
log.info("=== Daily HNS harvest start ===")
# TODO: roster discovery requires per-klub roster page. For now, only re-fetch known players.
with conn() as c:
cu = c.cursor()
cu.execute("""SELECT source_id FROM pgz_sport.clanovi
WHERE source='hns_semafor' ORDER BY source_synced_at NULLS FIRST LIMIT 500""")
pids = [r[0] for r in cu.fetchall()]
for pid in pids:
try:
cmd_player(int(pid))
updated += 1
except Exception as e:
log.error(f"player {pid}: {e}")
errors.append({"pid": pid, "err": str(e)})
log.info(f"=== Daily done: updated={updated} errors={len(errors)} ===")
finally:
with conn() as c:
cu = c.cursor()
cu.execute("""UPDATE pgz_sport.scraper_runs
SET finished_at=now(), status=%s, rows_updated=%s, errors=%s::jsonb, rows_inserted=%s
WHERE id=%s""",
("ok" if not errors else "partial", updated, json.dumps(errors), inserted, run_id))
c.commit()
def parse_match(html, match_url=None):
"""HNS match parser v4 — uses precise class signals.
Player <li class='row match_lineup' data-personid='87561'>:
<div class='shirtNumber'>9</div>
<div class='playerPhoto'><div class='photo'><img src='...' /></div></div>
<div class='playerName'><h3><a href='/igraci/.../'>Ivan Laginja</a></h3>Igrač</div>
<div class='matchEvents'>
<ul class='events'>
<li class='goal'><div class='icon' title='Gol'></div>40'</li>
<li class='substitutionOut'><div class='icon' title='Izmjena'></div>87'</li>
<li class='yellow'>...</li>
<li class='red'>...</li>
<li class='ownGoal'>...</li>
<li class='substitutionIn'>...</li>
</ul>
</div>
"""
soup = BeautifulSoup(html, 'html.parser')
out = {"teams": {}, "match_url": match_url, "meta": {}, "title": ""}
h1 = soup.find('h1')
out['title'] = h1.get_text(' ', strip=True) if h1 else ''
EVENT_KIND_MAP = {
'goal': 'gol',
'ownGoal': 'autogol',
'penaltyGoal': 'gol',
'yellow': 'zuti',
'secondYellow': 'zuti2', # second yellow → effectively red
'red': 'crveni',
'substitutionIn': 'subIn',
'substitutionOut': 'subOut',
}
def parse_team_div(team_div):
if not team_div: return None, []
ul = team_div.find('ul', recursive=False)
if not ul: ul = team_div.find('ul')
if not ul: return None, []
team_name = None
players = []
is_starter = True
for li in ul.find_all('li', recursive=False):
cls = li.get('class') or []
if 'header' in cls and 'clubName' in cls:
team_name = li.get_text(' ', strip=True)
continue
if 'header' in cls and 'separatorTitle' in cls:
if 'Pričuvni' in li.get_text(' ', strip=True):
is_starter = False
continue
if not ('row' in cls and 'match_lineup' in cls):
continue
# Player extraction
pid = li.get('data-personid')
if not pid:
a = li.find('a', href=re.compile(r'/igraci/(\d+)/'))
if not a: continue
pm = re.search(r'/igraci/(\d+)/', a['href'])
pid = pm.group(1)
try: pid = int(pid)
except: continue
# Shirt number
sn = li.find('div', class_='shirtNumber')
broj_dresa = None
if sn:
bs = sn.get_text(' ', strip=True).strip()
if bs.isdigit(): broj_dresa = int(bs)
# Image
img = li.find('img')
slika = img.get('src') if img else None
# Name + position
pn = li.find('div', class_='playerName')
ime_prezime = ''
pozicija = None
captain = False
if pn:
a2 = pn.find('a')
if a2:
ime_prezime = a2.get_text(' ', strip=True)
# Position is text after <h3>
full = pn.get_text(' ', strip=True)
rest = full.replace(ime_prezime, '').strip()
if '(C)' in rest: captain = True
rest = rest.replace('(C)', '').strip()
if 'Vratar' in rest: pozicija = 'Vratar'
elif 'Igrač' in rest: pozicija = 'Igrač'
# Events
events = []
me_div = li.find('div', class_='matchEvents')
if me_div:
ev_ul = me_div.find('ul', class_='events')
if ev_ul:
for ev_li in ev_ul.find_all('li', recursive=False):
ev_cls = ev_li.get('class') or []
kind = None
for k in ev_cls:
if k in EVENT_KIND_MAP:
kind = EVENT_KIND_MAP[k]; break
text = ev_li.get_text(' ', strip=True)
mm = re.search(r"(\d+(?:\+\d+)?)\s*'", text)
minute = mm.group(1) if mm else None
if kind:
events.append({'kind': kind, 'minute': minute})
# Aggregate counts
cnt_gol = sum(1 for e in events if e['kind'] in ('gol',))
cnt_zuti = sum(1 for e in events if e['kind'] == 'zuti')
cnt_crveni = sum(1 for e in events if e['kind'] in ('crveni','zuti2')) # 2nd yellow = red
# Substitution minutes (in/out)
sub_in_min = next((e['minute'] for e in events if e['kind']=='subIn'), None)
sub_out_min = next((e['minute'] for e in events if e['kind']=='subOut'), None)
# Estimate minutes played
minutes = None
if is_starter:
if sub_out_min:
try: minutes = int(re.sub(r'[^\d]','', sub_out_min))
except: pass
else:
minutes = 90 # full game
else: # bench
if sub_in_min:
try: minutes = max(0, 90 - int(re.sub(r'[^\d]','', sub_in_min)))
except: pass
else:
minutes = 0 # never came on
players.append({
'hns_pid': pid,
'ime_prezime': ime_prezime,
'broj_dresa': broj_dresa,
'pozicija': pozicija,
'slika_url': slika,
'captain': captain,
'starter': is_starter,
'events': events,
'pogodaka': cnt_gol,
'zuti_kartoni': cnt_zuti,
'crveni_kartoni': cnt_crveni,
'minute': minutes,
})
return team_name, players
home_div = soup.find('div', class_='homeTeam')
away_div = soup.find('div', class_='awayTeam')
home_name, home_players = parse_team_div(home_div)
away_name, away_players = parse_team_div(away_div)
if home_name: out['teams'][home_name] = home_players
if away_name: out['teams'][away_name] = away_players
# Logo URLs
if home_div:
h_img = home_div.find('img')
out['meta']['klub_dom_logo'] = h_img.get('src') if h_img else None
if away_div:
a_img = away_div.find('img')
out['meta']['klub_gost_logo'] = a_img.get('src') if a_img else None
out['meta']['klub_dom'] = home_name
out['meta']['klub_gost'] = away_name
# Date/time, viewership, score, competition
body_text = soup.get_text(' ', strip=True)
dm = re.search(r'(\d{1,2}\.\d{1,2}\.\d{4})\.?\s*(\d{1,2}:\d{2})', body_text)
if dm:
try:
d_parts = dm.group(1).split('.')
out['meta']['datum'] = f"{d_parts[2]}-{d_parts[1].zfill(2)}-{d_parts[0].zfill(2)}"
out['meta']['vrijeme'] = dm.group(2)
except: pass
gm = re.search(r'Gledatelja:\s*(\d+)', body_text)
if gm: out['meta']['gledatelja'] = int(gm.group(1))
rm = re.search(r'(\d+):(\d+)', out.get('title',''))
if rm:
out['meta']['rezultat'] = f"{rm.group(1)}:{rm.group(2)}"
nat_match = out.get('title','').split(',')
if len(nat_match) > 1: out['meta']['natjecanje'] = nat_match[-1].strip()
return out
def cmd_klub(klub_id_db: int, max_matches: int = 999):
"""Scrape klub: club page → all matches → for our team upsert player + utakmice_log row with full stats."""
with conn() as c:
cu = c.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
cu.execute("SELECT id, naziv, hns_klub_id, hns_slug FROM pgz_sport.klubovi WHERE id=%s", (klub_id_db,))
klub = cu.fetchone()
if not klub or not klub['hns_klub_id']:
log.error(f"Klub #{klub_id_db}: nema hns_klub_id"); return 0
klub_url = f"{BASE}/klubovi/{klub['hns_klub_id']}/{klub['hns_slug'] or 'k'}/"
log.info(f"Klub: {klub['naziv']}{klub_url}")
try: html = fetch(klub_url)
except Exception as e: log.error(f"klub fetch failed: {e}"); return 0
soup = BeautifulSoup(html, 'html.parser')
match_ids = []
for a in soup.find_all('a', href=re.compile(r'/utakmice/(\d+)/')):
mm = re.search(r'/utakmice/(\d+)/', a['href'])
if mm and mm.group(1) not in match_ids:
match_ids.append(mm.group(1))
log.info(f" found {len(match_ids)} matches; processing up to {max_matches}")
klub_naziv_low = klub['naziv'].lower()
seen_pids = set()
matches_logged = 0
for mid in match_ids[:max_matches]:
try:
mhtml = fetch(f"{BASE}/utakmice/{mid}/")
md = parse_match(mhtml, match_url=f"{BASE}/utakmice/{mid}/")
except Exception as e: log.error(f" match {mid}: {e}"); continue
if not md.get('teams'):
log.warning(f" match {mid}: no teams parsed"); continue
# Find which team (home or away) is OURS — use looser match (incl. token overlap)
roster = []; matched_team = None
for tn, players in md['teams'].items():
tn_low = tn.lower()
# try exact substring both directions
if klub_naziv_low in tn_low or tn_low in klub_naziv_low:
roster = players; matched_team = tn; break
# token-set overlap (e.g. "NK Krk" vs "NK Krk Krk" or "NK Vihor" vs "NK Vihor (B)")
tokens_klub = set(re.split(r'\s+', re.sub(r'[^\w]',' ', klub_naziv_low)))
tokens_team = set(re.split(r'\s+', re.sub(r'[^\w]',' ', tn_low)))
tokens_klub.discard(''); tokens_team.discard('')
common = tokens_klub & tokens_team
# Drop generic tokens
generic = {'nk','hnk','klub','nogometni'}
common_strong = common - generic
if len(common_strong) >= 1 and (klub_naziv_low.split()[-1] in tn_low or tn_low.split()[-1] in klub_naziv_low):
roster = players; matched_team = tn
log.info(f" fuzzy match: {klub['naziv']}{tn}")
break
if not roster:
continue # silently skip non-matching
meta = md.get('meta', {})
team_keys = list(md['teams'].keys())
klub_dom = team_keys[0] if team_keys else None
klub_gost = team_keys[1] if len(team_keys) > 1 else None
with conn() as c:
cu = c.cursor()
for pl in roster:
if not pl.get('hns_pid'): continue
seen_pids.add(pl['hns_pid'])
name = pl['ime_prezime'] or ''
parts = name.rsplit(' ', 1)
ime = parts[0] if len(parts) > 1 else name
prezime = parts[1] if len(parts) > 1 else ''
slug = slugify(name)
src_url = f"{BASE}/igraci/{pl['hns_pid']}/{slug}/"
cu.execute("""SELECT id FROM pgz_sport.clanovi WHERE source='hns_semafor' AND source_id=%s""", (str(pl['hns_pid']),))
row = cu.fetchone()
if row:
cid = row[0]
cu.execute("""UPDATE pgz_sport.clanovi
SET ime=%s, prezime=%s, slika_url=COALESCE(NULLIF(%s,''), slika_url),
broj_dresa=COALESCE(%s, broj_dresa),
pozicija=COALESCE(%s, pozicija),
klub_id=%s, source_url=%s, source_synced_at=now(), slug=%s
WHERE id=%s""",
(ime, prezime, pl.get('slika_url') or '', pl.get('broj_dresa'),
pl.get('pozicija'), klub_id_db, src_url, slug, cid))
else:
cu.execute("""INSERT INTO pgz_sport.clanovi
(klub_id, ime, prezime, slika_url, broj_dresa, pozicija,
source, source_id, source_url, source_synced_at, slug)
VALUES (%s,%s,%s,%s,%s,%s,'hns_semafor',%s,%s,now(),%s)
RETURNING id""",
(klub_id_db, ime, prezime, pl.get('slika_url'), pl.get('broj_dresa'),
pl.get('pozicija'), str(pl['hns_pid']), src_url, slug))
cid = cu.fetchone()[0]
cu.execute("""INSERT INTO pgz_sport.utakmice_log
(clan_id, source, source_match_id, source_url, datum, vrijeme,
natjecanje, klub_dom, klub_dom_logo, klub_gost, klub_gost_logo, rezultat, za_klub_id,
pogodaka, zuti_kartoni, crveni_kartoni, minute, zapocet_kao_starter)
VALUES (%s,'hns_semafor',%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT (source, source_match_id, clan_id) DO UPDATE SET
datum=EXCLUDED.datum, rezultat=EXCLUDED.rezultat,
za_klub_id=EXCLUDED.za_klub_id,
pogodaka=EXCLUDED.pogodaka, zuti_kartoni=EXCLUDED.zuti_kartoni,
crveni_kartoni=EXCLUDED.crveni_kartoni, minute=EXCLUDED.minute,
zapocet_kao_starter=EXCLUDED.zapocet_kao_starter,
klub_dom_logo=EXCLUDED.klub_dom_logo, klub_gost_logo=EXCLUDED.klub_gost_logo""",
(cid, mid, f"{BASE}/utakmice/{mid}/",
meta.get('datum'), meta.get('vrijeme'),
meta.get('natjecanje'), klub_dom, meta.get('klub_dom_logo'),
klub_gost, meta.get('klub_gost_logo'),
meta.get('rezultat'), klub_id_db,
pl.get('pogodaka',0), pl.get('zuti_kartoni',0),
pl.get('crveni_kartoni',0), pl.get('minute'),
pl.get('starter', True)))
c.commit()
matches_logged += 1
log.info(f"Klub {klub['naziv']} done: {len(seen_pids)} unique players, {matches_logged} matches logged")
return len(seen_pids)
if __name__ == '__main__':
if len(sys.argv) < 2:
print(__doc__); sys.exit(1)
cmd = sys.argv[1]
if cmd == 'seed':
print(cmd_seed())
elif cmd == 'player':
cid = cmd_player(int(sys.argv[2]))
print(f"clan_id={cid}")
elif cmd == 'daily':
cmd_daily()
elif cmd == 'klub':
if len(sys.argv) < 3:
print("Usage: klub <db_klub_id> [max_matches]"); sys.exit(2)
max_m = int(sys.argv[3]) if len(sys.argv) > 3 else 1
cmd_klub(int(sys.argv[2]), max_matches=max_m)
elif cmd == 'klub_all':
# Scrape all PGŽ klubovi with hns_klub_id set
with conn() as c:
cu = c.cursor()
cu.execute("SELECT id FROM pgz_sport.klubovi WHERE hns_klub_id IS NOT NULL ORDER BY id")
kids = [r[0] for r in cu.fetchall()]
log.info(f"Scraping rosters for {len(kids)} klubova…")
for kid in kids:
try: cmd_klub(kid, max_matches=999)
except Exception as e: log.error(f"klub {kid}: {e}")
else:
print(f"Unknown: {cmd}"); sys.exit(2)