HNS+UI: 4 nova endpointa + multi-sport schema (M2M kategorije + player_stats)
Endpoints:
- GET /api/v2/enrich-sources — sport→source mapping
- GET /api/v2/klubovi/priority-sort — financirani/godišnjak prvi
- GET /api/v2/clan/{id}/kategorije — many-to-many kategorije
- GET /api/v2/clan/{id}/full — kompletna slika (profil+kategorije+sezone+utakmice+stats)
- POST /api/v2/export/klubovi — XLSX export selektiranih
Schema:
- pgz_sport.clan_kategorije (M2M: igrač u juniorskoj+seniorskoj)
- pgz_sport.player_stats (multi-sport: nogomet/košarka/rukomet/odbojka/vaterpolo)
- pgz_sport.klub_roster (multi-source)
- pgz_sport.enrichment_sources (sport→izvor)
- View: v_pgz_priority_klubovi (financiran || u_godisnjaku)
- View: v_klubovi_priority_sort (priority sort)
Sport harvesters scaffold:
- scripts/sport_harvesters/__base.py (SportHarvester class)
- hks_basketball.py, hrs_handball.py, hos_volleyball.py, hvs_waterpolo.py
This commit is contained in:
Executable
+54
@@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env python3
|
||||
"""HVS waterpolo harvester."""
|
||||
import sys, re
|
||||
sys.path.insert(0, '/opt/pgz-sport/scripts/sport_harvesters')
|
||||
from __base import SportHarvester
|
||||
|
||||
class HVSHarvester(SportHarvester):
|
||||
SPORT = 'vaterpolo'
|
||||
SOURCE = 'hvs'
|
||||
|
||||
def scrape_klub(self, page, klub):
|
||||
self.log(f" 🤽 Klub {klub['id']} {klub['naziv']}")
|
||||
try:
|
||||
# Get all klubovi list from HVS
|
||||
page.goto("https://hvs.hr/klubovi/", wait_until="domcontentloaded", timeout=20000)
|
||||
klub_links = page.locator('a[href*="/klub/"]').all()
|
||||
naziv_lower = klub['naziv'].lower()
|
||||
for a in klub_links[:30]:
|
||||
text = a.inner_text().lower()
|
||||
href = a.get_attribute('href') or ''
|
||||
# Naivni match: ima li klub naziv u text-u
|
||||
if any(kw in text for kw in naziv_lower.split() if len(kw) > 3):
|
||||
self.log(f" Match: {text[:50]} → {href}")
|
||||
m = re.search(r'/klub/(\d+)', href)
|
||||
if m:
|
||||
kid = m.group(1)
|
||||
new_url = f"https://hvs.hr/klub/{kid}/"
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("UPDATE pgz_sport.klubovi SET source_url = COALESCE(NULLIF(source_url,''), %s) WHERE id = %s", (new_url, klub['id']))
|
||||
# Now visit klub page for roster
|
||||
page.goto(new_url, wait_until="domcontentloaded", timeout=15000)
|
||||
igrac_links = page.locator('a[href*="/igrac/"]').all()
|
||||
self.log(f" {len(igrac_links)} igrača found")
|
||||
for ia in igrac_links[:30]:
|
||||
ihref = ia.get_attribute('href') or ''
|
||||
naziv = ia.inner_text().strip()
|
||||
mi = re.search(r'/igrac/(\d+)', ihref)
|
||||
if mi and naziv:
|
||||
parts = re.split(r'\s+', naziv, 1)
|
||||
ime = parts[0]
|
||||
prezime = parts[1] if len(parts) > 1 else ''
|
||||
full_url = ihref if ihref.startswith('http') else f"https://hvs.hr{ihref}"
|
||||
clan_id = self.upsert_clan(
|
||||
klub_id=klub['id'], source_id=mi.group(1),
|
||||
ime=ime, prezime=prezime,
|
||||
source_url=full_url
|
||||
)
|
||||
self.stats['players'] += 1
|
||||
break
|
||||
except Exception as e:
|
||||
self.log(f" ❌ {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
HVSHarvester().run(limit=int(sys.argv[1]) if len(sys.argv) > 1 else 50)
|
||||
Reference in New Issue
Block a user