HNS+UI: 4 nova endpointa + multi-sport schema (M2M kategorije + player_stats)
Endpoints:
- GET /api/v2/enrich-sources — sport→source mapping
- GET /api/v2/klubovi/priority-sort — financirani/godišnjak prvi
- GET /api/v2/clan/{id}/kategorije — many-to-many kategorije
- GET /api/v2/clan/{id}/full — kompletna slika (profil+kategorije+sezone+utakmice+stats)
- POST /api/v2/export/klubovi — XLSX export selektiranih
Schema:
- pgz_sport.clan_kategorije (M2M: igrač u juniorskoj+seniorskoj)
- pgz_sport.player_stats (multi-sport: nogomet/košarka/rukomet/odbojka/vaterpolo)
- pgz_sport.klub_roster (multi-source)
- pgz_sport.enrichment_sources (sport→izvor)
- View: v_pgz_priority_klubovi (financiran || u_godisnjaku)
- View: v_klubovi_priority_sort (priority sort)
Sport harvesters scaffold:
- scripts/sport_harvesters/__base.py (SportHarvester class)
- hks_basketball.py, hrs_handball.py, hos_volleyball.py, hvs_waterpolo.py
This commit is contained in:
Executable
+112
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
"""HNS sezone retry — pojednostavljen extract."""
|
||||
import os, time, re, json, sys
|
||||
from datetime import datetime
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
DSN = "host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7"
|
||||
|
||||
def find_seasons_in_obj(obj, found=None):
|
||||
if found is None: found = []
|
||||
if isinstance(obj, dict):
|
||||
if 'season' in obj or 'sezona' in obj:
|
||||
found.append(obj)
|
||||
for v in obj.values():
|
||||
find_seasons_in_obj(v, found)
|
||||
elif isinstance(obj, list):
|
||||
for item in obj:
|
||||
find_seasons_in_obj(item, found)
|
||||
return found
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute("""
|
||||
SELECT c.id AS clan_id, c.hns_igrac_id, c.ime, c.prezime, c.source_url
|
||||
FROM pgz_sport.clanovi c
|
||||
WHERE c.hns_igrac_id IS NOT NULL
|
||||
AND NOT EXISTS (SELECT 1 FROM pgz_sport.hns_player_seasons s WHERE s.hns_igrac_id = c.hns_igrac_id)
|
||||
ORDER BY c.id LIMIT 200
|
||||
""")
|
||||
targets = cur.fetchall()
|
||||
|
||||
print(f"Targets: {len(targets)}", flush=True)
|
||||
|
||||
seasons_added = 0
|
||||
with sync_playwright() as pw:
|
||||
browser = pw.chromium.launch(headless=True, args=["--no-sandbox","--ignore-certificate-errors"])
|
||||
ctx = browser.new_context(ignore_https_errors=True,
|
||||
user_agent="Mozilla/5.0 (X11; Linux x86_64) Chrome/120.0.0.0")
|
||||
page = ctx.new_page()
|
||||
|
||||
for i, t in enumerate(targets):
|
||||
url = t['source_url']
|
||||
if not url or 'semafor.hns.family/igraci/' not in url:
|
||||
continue
|
||||
try:
|
||||
page.goto(url, wait_until="networkidle", timeout=20000)
|
||||
try: page.wait_for_selector('table, .karijera, [class*="season"]', timeout=6000)
|
||||
except: pass
|
||||
time.sleep(0.5)
|
||||
|
||||
rows = []
|
||||
|
||||
# Extract from __NEXT_DATA__ if exists
|
||||
html = page.content()
|
||||
m = re.search(r'__NEXT_DATA__"\s*type="application/json">([^<]+)</script>', html)
|
||||
if m:
|
||||
try:
|
||||
data = json.loads(m.group(1))
|
||||
sezone = find_seasons_in_obj(data)
|
||||
for s in sezone:
|
||||
sezona = s.get('season') or s.get('sezona')
|
||||
if sezona:
|
||||
rows.append({'sezona': str(sezona), 'klub': '', 'natjecanje': '', 'nastupi': 0, 'golovi': 0})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback regex on body
|
||||
if not rows:
|
||||
body = page.locator('body').inner_text()
|
||||
for line in body.split('\n'):
|
||||
match = re.match(r'^(20\d{2}/\d{2})\s+(.+?)\s+(\d+(?:\s+\d+)*)\s*$', line.strip())
|
||||
if match:
|
||||
sezona = match.group(1)
|
||||
rest = match.group(2)
|
||||
nums = [int(x) for x in match.group(3).split()]
|
||||
rows.append({
|
||||
'sezona': sezona, 'klub': rest[:200], 'natjecanje': '',
|
||||
'nastupi': nums[0] if nums else 0,
|
||||
'golovi': nums[1] if len(nums) > 1 else 0,
|
||||
})
|
||||
|
||||
if rows:
|
||||
with conn.cursor() as cur:
|
||||
for r in rows:
|
||||
try:
|
||||
cur.execute("""
|
||||
INSERT INTO pgz_sport.hns_player_seasons
|
||||
(hns_igrac_id, clan_id, sezona, klub_naziv, natjecanje, nastupi, golovi)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT DO NOTHING
|
||||
""", (t['hns_igrac_id'], t['clan_id'], r['sezona'], r['klub'],
|
||||
r['natjecanje'], r['nastupi'], r['golovi']))
|
||||
seasons_added += 1
|
||||
except Exception:
|
||||
pass
|
||||
print(f" ✓ [{i}/{len(targets)}] {t['ime']} {t['prezime']}: {len(rows)} sezone (total added: {seasons_added})", flush=True)
|
||||
|
||||
if i % 20 == 0:
|
||||
print(f" [{i}/{len(targets)}] processed, total added: {seasons_added}", flush=True)
|
||||
except Exception as e:
|
||||
print(f" ❌ {t['ime']}: {e}", flush=True)
|
||||
|
||||
browser.close()
|
||||
|
||||
print(f"\nDone. Total sezone added: {seasons_added}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user