feat: /api/v2/analiza/* endpoints - sport analytics backend
This commit is contained in:
Executable
+141
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""HNS sezone v3 — koristi __NEXT_DATA__ JSON parser primarily."""
|
||||
import os, time, re, json, sys
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
DSN = f"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password={os.environ['DB_PASSWORD']}"
|
||||
|
||||
def find_seasons(obj, found=None, depth=0):
|
||||
if depth > 25: return found or []
|
||||
if found is None: found = []
|
||||
if isinstance(obj, dict):
|
||||
keys = set(obj.keys())
|
||||
# Detect season-like dict
|
||||
if ('season' in keys and isinstance(obj.get('season'), (str, dict))) or 'sezona' in keys:
|
||||
found.append(obj)
|
||||
# Detect career object with seasons array
|
||||
for k, v in obj.items():
|
||||
if k.lower() in ('careers','career','seasons','sezone','statistics','stats') and isinstance(v, list):
|
||||
for item in v:
|
||||
if isinstance(item, dict) and any(kk in item for kk in ('season','sezona','year','godina')):
|
||||
found.append(item)
|
||||
find_seasons(v, found, depth+1)
|
||||
elif isinstance(obj, list):
|
||||
for item in obj:
|
||||
find_seasons(item, found, depth+1)
|
||||
return found
|
||||
|
||||
def normalize_season(s):
|
||||
"""Convert season dict to flat row."""
|
||||
sezona = s.get('season') or s.get('sezona') or s.get('year') or s.get('godina') or ''
|
||||
if isinstance(sezona, dict):
|
||||
sezona = sezona.get('name') or sezona.get('label') or str(sezona.get('year',''))
|
||||
sezona = str(sezona)
|
||||
|
||||
klub = s.get('club') or s.get('klub') or s.get('team') or ''
|
||||
if isinstance(klub, dict):
|
||||
klub = klub.get('name') or klub.get('naziv') or ''
|
||||
|
||||
natj = s.get('competition') or s.get('natjecanje') or s.get('league') or ''
|
||||
if isinstance(natj, dict):
|
||||
natj = natj.get('name') or natj.get('naziv') or ''
|
||||
|
||||
def num(*keys):
|
||||
for k in keys:
|
||||
for kk in s.keys():
|
||||
if k.lower() in kk.lower():
|
||||
v = s[kk]
|
||||
try: return int(v)
|
||||
except:
|
||||
try: return int(re.sub(r'\D','', str(v)) or 0)
|
||||
except: return 0
|
||||
return 0
|
||||
|
||||
return {
|
||||
'sezona': sezona, 'klub': str(klub)[:200], 'natjecanje': str(natj)[:100],
|
||||
'nastupi': num('matches','nastup','appearance'),
|
||||
'startna': num('start'),
|
||||
'zamjena': num('sub','zamjen'),
|
||||
'golovi': num('goal','gol'),
|
||||
'asistencije': num('assist','asist'),
|
||||
'zuti': num('yellow','žut','zut'),
|
||||
'crveni': num('red','crv'),
|
||||
'minute': num('minute','minut','min'),
|
||||
}
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute("""
|
||||
SELECT c.id AS clan_id, c.hns_igrac_id, c.ime, c.prezime, c.source_url
|
||||
FROM pgz_sport.clanovi c
|
||||
WHERE c.hns_igrac_id IS NOT NULL
|
||||
AND NOT EXISTS (SELECT 1 FROM pgz_sport.hns_player_seasons s WHERE s.hns_igrac_id = c.hns_igrac_id)
|
||||
ORDER BY c.id LIMIT 200
|
||||
""")
|
||||
targets = cur.fetchall()
|
||||
|
||||
print(f"Targets: {len(targets)}", flush=True)
|
||||
|
||||
seasons_added = 0
|
||||
with sync_playwright() as pw:
|
||||
browser = pw.chromium.launch(headless=True, args=["--no-sandbox","--ignore-certificate-errors"])
|
||||
page = browser.new_context(ignore_https_errors=True,
|
||||
user_agent="Mozilla/5.0 (X11; Linux x86_64) Chrome/120.0.0.0").new_page()
|
||||
|
||||
for i, t in enumerate(targets):
|
||||
url = t['source_url']
|
||||
if not url or 'semafor.hns.family/igraci/' not in url:
|
||||
continue
|
||||
try:
|
||||
page.goto(url, wait_until="networkidle", timeout=20000)
|
||||
time.sleep(0.8)
|
||||
|
||||
html = page.content()
|
||||
rows = []
|
||||
|
||||
# Extract __NEXT_DATA__
|
||||
m = re.search(r'__NEXT_DATA__"\s*type="application/json">([^<]+)</script>', html)
|
||||
if m:
|
||||
try:
|
||||
data = json.loads(m.group(1))
|
||||
seasons_raw = find_seasons(data)
|
||||
for s in seasons_raw:
|
||||
n = normalize_season(s)
|
||||
if n['sezona']:
|
||||
rows.append(n)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Insert
|
||||
if rows:
|
||||
with conn.cursor() as cur:
|
||||
for r in rows:
|
||||
try:
|
||||
cur.execute("""
|
||||
INSERT INTO pgz_sport.hns_player_seasons
|
||||
(hns_igrac_id, clan_id, sezona, klub_naziv, natjecanje,
|
||||
nastupi, startna, zamjena, golovi, asistencije, zuti, crveni, minute)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT DO NOTHING
|
||||
""", (t['hns_igrac_id'], t['clan_id'], r['sezona'], r['klub'], r['natjecanje'],
|
||||
r['nastupi'], r['startna'], r['zamjena'], r['golovi'],
|
||||
r['asistencije'], r['zuti'], r['crveni'], r['minute']))
|
||||
seasons_added += 1
|
||||
except: pass
|
||||
print(f" ✓ [{i}/{len(targets)}] {t['ime']} {t['prezime']}: +{len(rows)} sezone (total: {seasons_added})", flush=True)
|
||||
|
||||
if i % 30 == 0 and i > 0:
|
||||
print(f" [{i}/{len(targets)}] processed, total: {seasons_added}", flush=True)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
browser.close()
|
||||
|
||||
print(f"\n✅ Done. Total: {seasons_added}", flush=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user