feat: /api/v2/analiza/* endpoints - sport analytics backend

This commit is contained in:
Damir Radulic
2026-05-16 00:28:12 +02:00
parent 7ca5d7d94e
commit aca5051418
1355 changed files with 321891 additions and 4128 deletions
+24 -1
View File
@@ -1,4 +1,7 @@
#!/usr/bin/env python3
from dotenv import load_dotenv
load_dotenv('/opt/rinet-gpu/.env.master')
# auto-added by patch_scrapers_with_dotenv.sh
"""
HNS Master Harvester — Playwright-based scrape semafor.hns.family
─────────────────────────────────────────────────────────────────
@@ -18,7 +21,7 @@ from psycopg2.extras import RealDictCursor, execute_values
from playwright.sync_api import sync_playwright
DSN = os.getenv("RINET_DSN",
"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7")
f"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password={os.environ['DB_PASSWORD']}")
TG = os.getenv("TG_BOT_TOKEN", "8535797835:AAFItT-92jzZ9NWFafLxn0dLa1_n2s-JE5Y")
TG_CHAT = os.getenv("TG_CHAT", "7969491558")
LOG = open(f"/var/log/pgz-sport-debug/hns_harvester_{datetime.now().strftime('%Y%m%d_%H%M')}.log", "a")
@@ -210,12 +213,30 @@ def upsert_clan(conn, klub_id, player_data):
def upsert_seasons(conn, hns_id, clan_id, seasons):
if not seasons: return 0
rows = []
skipped = 0
# Reject rows where klub_naziv is obviously a misparsed HTML stat-block
# (the parser at scrape_player_full() can produce these when a <table> row
# has fewer cells than the header — dict(zip(...)) silently drops, leaving
# whole-block dumps or bare numbers in the value).
_BAD_PREFIXES = ('STATISTIKA', 'NASTUPI', 'ZAPOČEO', 'ZAMJENA',
'POGOTCI', 'ŽUTI', 'CRVENI', 'UKUPNO', 'SUPERSPORT')
def _looks_like_garbage(klub_text):
if not klub_text: return True
t = klub_text.strip()
if not t: return True # whitespace only
if re.match(r'^\d+$', t): return True # bare number (year, jersey #)
if t.count('\n') >= 2: return True # multi-line label dump
u = t.upper()
return any(u.startswith(p) for p in _BAD_PREFIXES)
for s in seasons:
sezona = s.get('sezona', '')
if not sezona: continue
# Try extract klub iz row
klub = next((v for k,v in s.items() if 'lub' in k.lower()), '')
natjecanje = next((v for k,v in s.items() if 'atjec' in k.lower() or 'liga' in k.lower()), '')
if _looks_like_garbage(klub):
skipped += 1
continue
def num(key):
for k in s.keys():
if key in k.lower():
@@ -227,6 +248,8 @@ def upsert_seasons(conn, hns_id, clan_id, seasons):
num('nastup'), num('start'), num('zamj'),
num('gol'), num('asist'), num('žut'), num('crv'), num('minut')
))
if skipped:
print(f'[hns_master_harvester] upsert_seasons: skipped {skipped} garbage rows for hns_id={hns_id}')
with conn.cursor() as cur:
execute_values(cur, """
INSERT INTO pgz_sport.hns_player_seasons