9fb512932a
Endpoints:
- GET /api/v2/enrich-sources — sport→source mapping
- GET /api/v2/klubovi/priority-sort — financirani/godišnjak prvi
- GET /api/v2/clan/{id}/kategorije — many-to-many kategorije
- GET /api/v2/clan/{id}/full — kompletna slika (profil+kategorije+sezone+utakmice+stats)
- POST /api/v2/export/klubovi — XLSX export selektiranih
Schema:
- pgz_sport.clan_kategorije (M2M: igrač u juniorskoj+seniorskoj)
- pgz_sport.player_stats (multi-sport: nogomet/košarka/rukomet/odbojka/vaterpolo)
- pgz_sport.klub_roster (multi-source)
- pgz_sport.enrichment_sources (sport→izvor)
- View: v_pgz_priority_klubovi (financiran || u_godisnjaku)
- View: v_klubovi_priority_sort (priority sort)
Sport harvesters scaffold:
- scripts/sport_harvesters/__base.py (SportHarvester class)
- hks_basketball.py, hrs_handball.py, hos_volleyball.py, hvs_waterpolo.py
33 lines
1.4 KiB
Python
Executable File
33 lines
1.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""HKS-CBF + FIBA LiveStats basketball harvester."""
|
|
import sys, re
|
|
sys.path.insert(0, '/opt/pgz-sport/scripts/sport_harvesters')
|
|
from __base import SportHarvester
|
|
|
|
class HKSHarvester(SportHarvester):
|
|
SPORT = 'košarka'
|
|
SOURCE = 'hks_cbf'
|
|
|
|
def scrape_klub(self, page, klub):
|
|
# Discovery: try search hks-cbf.hr by club name
|
|
url = f"https://www.hks-cbf.hr/?s={klub['naziv'].replace(' ','+')}"
|
|
self.log(f" 🏀 Klub {klub['id']} {klub['naziv']} → {url}")
|
|
try:
|
|
page.goto(url, wait_until="domcontentloaded", timeout=20000)
|
|
# Look for /klubovi/ or /klub/ link
|
|
klub_links = page.locator('a[href*="/klubovi/"], a[href*="/klub/"]').all()
|
|
for a in klub_links[:3]:
|
|
href = a.get_attribute('href')
|
|
if href and 'klub' in href:
|
|
self.log(f" Found: {href}")
|
|
# Save URL to klub
|
|
with self.conn.cursor() as cur:
|
|
cur.execute("UPDATE pgz_sport.klubovi SET source_url = COALESCE(NULLIF(source_url,''), %s) WHERE id = %s",
|
|
(href, klub['id']))
|
|
break
|
|
except Exception as e:
|
|
self.log(f" ❌ {e}")
|
|
|
|
if __name__ == '__main__':
|
|
HKSHarvester().run(limit=int(sys.argv[1]) if len(sys.argv) > 1 else 50)
|