feat: /api/v2/analiza/* endpoints - sport analytics backend
This commit is contained in:
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python3
|
||||
# sport_to_knowledge.py v2.0 — match dabi.knowledge schema (source_refs jsonb)
|
||||
import os, sys, hashlib, logging, json
|
||||
import psycopg2
|
||||
from psycopg2.extras import execute_batch
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [sport2k] %(message)s')
|
||||
log = logging.getLogger("sport2k")
|
||||
DSN = f"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password={os.environ['DB_PASSWORD']}"
|
||||
|
||||
def insert_batch(cur, facts):
|
||||
if not facts: return 0
|
||||
execute_batch(cur, """
|
||||
INSERT INTO dabi.knowledge
|
||||
(fact, category, source, source_refs, confidence, data_hash, created_at)
|
||||
VALUES (%s, %s, %s, %s::jsonb, %s, %s, now())
|
||||
ON CONFLICT (data_hash) DO NOTHING
|
||||
""", facts, page_size=200)
|
||||
return len(facts)
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
cur = conn.cursor()
|
||||
total = 0
|
||||
|
||||
# 1) Klubovi
|
||||
cur.execute("""
|
||||
SELECT id, naziv, oib, sport, grad, predsjednik, tajnik, trener_glavni,
|
||||
broj_clanova, broj_aktivnih_sportasa, godina_osnutka, web, telefon, email, adresa
|
||||
FROM pgz_sport.klubovi WHERE aktivan = true
|
||||
""")
|
||||
rows = []
|
||||
for k in cur.fetchall():
|
||||
kid, naziv, oib, sport, grad, preds, tajn, tren, n_cl, n_akt, god, web, tel, email, adr = k
|
||||
if not naziv: continue
|
||||
parts = [f"Klub {naziv}"]
|
||||
if sport: parts.append(f"sport: {sport}")
|
||||
if grad: parts.append(f"grad: {grad}")
|
||||
if oib: parts.append(f"OIB: {oib}")
|
||||
if god: parts.append(f"osnovan {god}.")
|
||||
if preds: parts.append(f"predsjednik: {preds}")
|
||||
if tajn: parts.append(f"tajnik: {tajn}")
|
||||
if tren: parts.append(f"glavni trener: {tren}")
|
||||
if n_cl: parts.append(f"broj članova: {n_cl}")
|
||||
if n_akt: parts.append(f"broj aktivnih sportaša: {n_akt}")
|
||||
if adr: parts.append(f"adresa: {adr}")
|
||||
if tel: parts.append(f"tel: {tel}")
|
||||
if email: parts.append(f"email: {email}")
|
||||
if web: parts.append(f"web: {web}")
|
||||
fact = ". ".join(parts) + "."
|
||||
if len(fact) < 30: continue
|
||||
fact_hash = hashlib.sha256(f"klub:{kid}:{fact[:200]}".encode()).hexdigest()[:32]
|
||||
refs = json.dumps([{"type":"pgz_sport_klub","id": kid, "url": f"https://sport.rinet.one/admin#klub/{kid}"}])
|
||||
rows.append((fact[:2000], 'pgz_sport_klub', 'pgz_sport_db_extract', refs, 0.92, fact_hash))
|
||||
n = insert_batch(cur, rows); total += n; log.info(f"Klubovi facts: {n}")
|
||||
|
||||
# 2) Savezi
|
||||
cur.execute("""
|
||||
SELECT id, naziv, oib, sport, predsjednik, tajnik, web, NULL AS broj_klubova, NULL AS broj_clanova
|
||||
FROM pgz_sport.savezi
|
||||
""")
|
||||
rows = []
|
||||
for s in cur.fetchall():
|
||||
sid, naziv, oib, sport, preds, tajn, web, n_kl, n_cl = s
|
||||
if not naziv: continue
|
||||
parts = [f"Savez {naziv}"]
|
||||
if sport: parts.append(f"sport: {sport}")
|
||||
if oib: parts.append(f"OIB: {oib}")
|
||||
if preds: parts.append(f"predsjednik: {preds}")
|
||||
if tajn: parts.append(f"tajnik: {tajn}")
|
||||
if n_kl: parts.append(f"broj klubova: {n_kl}")
|
||||
if n_cl: parts.append(f"broj članova: {n_cl}")
|
||||
if web: parts.append(f"web: {web}")
|
||||
fact = ". ".join(parts) + "."
|
||||
if len(fact) < 30: continue
|
||||
fact_hash = hashlib.sha256(f"savez:{sid}:{fact[:200]}".encode()).hexdigest()[:32]
|
||||
refs = json.dumps([{"type":"pgz_sport_savez","id": sid}])
|
||||
rows.append((fact[:2000], 'pgz_sport_savez', 'pgz_sport_db_extract', refs, 0.92, fact_hash))
|
||||
n = insert_batch(cur, rows); total += n; log.info(f"Savezi facts: {n}")
|
||||
|
||||
# 3) Overview
|
||||
cur.execute("""
|
||||
SELECT s.naziv, s.sport, count(k.id) AS n_kl,
|
||||
string_agg(k.grad, ', ' ORDER BY k.grad) FILTER (WHERE k.grad IS NOT NULL) AS gradovi
|
||||
FROM pgz_sport.savezi s
|
||||
LEFT JOIN pgz_sport.klubovi k ON k.savez_id = s.id AND k.aktivan = true
|
||||
GROUP BY s.id, s.naziv, s.sport HAVING count(k.id) > 0
|
||||
""")
|
||||
rows = []
|
||||
for r in cur.fetchall():
|
||||
savez, sport, n, gradovi = r
|
||||
gradovi_str = (gradovi[:300] + '...') if gradovi and len(gradovi) > 300 else (gradovi or '')
|
||||
fact = f"{savez} ima {n} aktivnih klubova"
|
||||
if sport: fact += f" u sportu {sport}"
|
||||
if gradovi_str: fact += f". Gradovi: {gradovi_str[:200]}"
|
||||
fact += "."
|
||||
fact_hash = hashlib.sha256(f"overview:{savez}".encode()).hexdigest()[:32]
|
||||
rows.append((fact[:2000], 'pgz_sport_overview', 'pgz_sport_db_extract', json.dumps([{}]), 0.95, fact_hash))
|
||||
n = insert_batch(cur, rows); total += n; log.info(f"Overview facts: {n}")
|
||||
|
||||
# 4) Multi-chair
|
||||
cur.execute("""
|
||||
WITH all_links AS (
|
||||
SELECT lower(trim(predsjednik)) AS pk, predsjednik AS pname,
|
||||
'klub:'||k.id AS oid, k.naziv AS oname, 'predsjednik' AS role
|
||||
FROM pgz_sport.klubovi k WHERE predsjednik IS NOT NULL AND length(trim(predsjednik)) > 5
|
||||
UNION ALL
|
||||
SELECT lower(trim(tajnik)), tajnik, 'klub:'||k.id, k.naziv, 'tajnik'
|
||||
FROM pgz_sport.klubovi k WHERE tajnik IS NOT NULL AND length(trim(tajnik)) > 5
|
||||
UNION ALL
|
||||
SELECT lower(trim(predsjednik)), predsjednik, 'savez:'||s.id, s.naziv, 'predsjednik'
|
||||
FROM pgz_sport.savezi s WHERE predsjednik IS NOT NULL AND length(trim(predsjednik)) > 5
|
||||
)
|
||||
SELECT pk, max(pname) AS pname, count(DISTINCT oid) AS n_orgs,
|
||||
string_agg(DISTINCT oname || ' (' || role || ')', '; ') AS orgs
|
||||
FROM all_links GROUP BY pk HAVING count(DISTINCT oid) >= 2
|
||||
ORDER BY count(DISTINCT oid) DESC LIMIT 200
|
||||
""")
|
||||
rows = []
|
||||
for r in cur.fetchall():
|
||||
pk, pname, n_orgs, orgs = r
|
||||
fact = f"{pname} sjedi na {n_orgs} stolica u PGŽ Sport ekosustavu: {orgs[:500]}"
|
||||
if n_orgs >= 3:
|
||||
fact += " — VIŠESTRUKE FUNKCIJE: forenzički flag za moguće sukobe interesa."
|
||||
fact_hash = hashlib.sha256(f"multichair:{pk}".encode()).hexdigest()[:32]
|
||||
rows.append((fact[:2000], 'pgz_sport_multichair', 'pgz_sport_db_extract', json.dumps([{}]), 0.90, fact_hash))
|
||||
n = insert_batch(cur, rows); total += n; log.info(f"Multi-chair facts: {n}")
|
||||
|
||||
# 5) Manifestacije + natjecanja
|
||||
cur.execute("SELECT id, naziv, mjesto, godina_od, organizator, razina, broj_ucesnika FROM pgz_sport.manifestacije WHERE naziv IS NOT NULL")
|
||||
rows = []
|
||||
for r in cur.fetchall():
|
||||
mid, naziv, mjesto, god, org, razina, n_uces = r
|
||||
fact = f"Sportska manifestacija: {naziv}"
|
||||
if mjesto: fact += f", mjesto: {mjesto}"
|
||||
if god: fact += f", godina {god}"
|
||||
if org: fact += f", organizator: {org}"
|
||||
if razina: fact += f", razina: {razina}"
|
||||
if n_uces: fact += f", broj učesnika: {n_uces}"
|
||||
fact += "."
|
||||
fh = hashlib.sha256(f"man:{mid}".encode()).hexdigest()[:32]
|
||||
rows.append((fact[:2000], 'pgz_sport_manifestacija', 'pgz_sport_db_extract', json.dumps([{}]), 0.85, fh))
|
||||
|
||||
cur.execute("SELECT id, naziv, sport, datum_pocetka::text AS godina, sezona, razina, tip, kategorija FROM pgz_sport.natjecanja WHERE naziv IS NOT NULL LIMIT 500")
|
||||
for r in cur.fetchall():
|
||||
nid, naziv, sport, god, sez, raz, tip, kat = r
|
||||
fact = f"Natjecanje: {naziv}"
|
||||
if sport: fact += f" — sport: {sport}"
|
||||
if sez: fact += f", sezona {sez}"
|
||||
if raz: fact += f", razina: {raz}"
|
||||
if tip: fact += f", tip: {tip}"
|
||||
if kat: fact += f", kategorija: {kat}"
|
||||
if god and god != 'None': fact += f", datum početka: {god[:10]}"
|
||||
fact += "."
|
||||
fh = hashlib.sha256(f"nat:{nid}".encode()).hexdigest()[:32]
|
||||
rows.append((fact[:2000], 'pgz_sport_natjecanje', 'pgz_sport_db_extract', json.dumps([{}]), 0.85, fh))
|
||||
n = insert_batch(cur, rows); total += n; log.info(f"Manifest+nat: {n}")
|
||||
|
||||
log.info(f"═══ TOTAL: {total} ═══")
|
||||
cur.close(); conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user