PGŽ Sport Platform — Round 1+2 baseline (sport2.html + API)

This commit is contained in:
Damir Radulić
2026-05-04 23:39:08 +02:00
commit a7ec0a86be
1820 changed files with 694455 additions and 0 deletions
+163
View File
@@ -0,0 +1,163 @@
#!/usr/bin/env python3
# sport_to_knowledge.py v2.0 — match dabi.knowledge schema (source_refs jsonb)
import os, sys, hashlib, logging, json
import psycopg2
from psycopg2.extras import execute_batch
logging.basicConfig(level=logging.INFO, format='%(asctime)s [sport2k] %(message)s')
log = logging.getLogger("sport2k")
DSN = "host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7"
def insert_batch(cur, facts):
if not facts: return 0
execute_batch(cur, """
INSERT INTO dabi.knowledge
(fact, category, source, source_refs, confidence, data_hash, created_at)
VALUES (%s, %s, %s, %s::jsonb, %s, %s, now())
ON CONFLICT (data_hash) DO NOTHING
""", facts, page_size=200)
return len(facts)
def main():
conn = psycopg2.connect(DSN); conn.autocommit = True
cur = conn.cursor()
total = 0
# 1) Klubovi
cur.execute("""
SELECT id, naziv, oib, sport, grad, predsjednik, tajnik, trener_glavni,
broj_clanova, broj_aktivnih_sportasa, godina_osnutka, web, telefon, email, adresa
FROM pgz_sport.klubovi WHERE aktivan = true
""")
rows = []
for k in cur.fetchall():
kid, naziv, oib, sport, grad, preds, tajn, tren, n_cl, n_akt, god, web, tel, email, adr = k
if not naziv: continue
parts = [f"Klub {naziv}"]
if sport: parts.append(f"sport: {sport}")
if grad: parts.append(f"grad: {grad}")
if oib: parts.append(f"OIB: {oib}")
if god: parts.append(f"osnovan {god}.")
if preds: parts.append(f"predsjednik: {preds}")
if tajn: parts.append(f"tajnik: {tajn}")
if tren: parts.append(f"glavni trener: {tren}")
if n_cl: parts.append(f"broj članova: {n_cl}")
if n_akt: parts.append(f"broj aktivnih sportaša: {n_akt}")
if adr: parts.append(f"adresa: {adr}")
if tel: parts.append(f"tel: {tel}")
if email: parts.append(f"email: {email}")
if web: parts.append(f"web: {web}")
fact = ". ".join(parts) + "."
if len(fact) < 30: continue
fact_hash = hashlib.sha256(f"klub:{kid}:{fact[:200]}".encode()).hexdigest()[:32]
refs = json.dumps([{"type":"pgz_sport_klub","id": kid, "url": f"https://sport.rinet.one/admin#klub/{kid}"}])
rows.append((fact[:2000], 'pgz_sport_klub', 'pgz_sport_db_extract', refs, 0.92, fact_hash))
n = insert_batch(cur, rows); total += n; log.info(f"Klubovi facts: {n}")
# 2) Savezi
cur.execute("""
SELECT id, naziv, oib, sport, predsjednik, tajnik, web, NULL AS broj_klubova, NULL AS broj_clanova
FROM pgz_sport.savezi
""")
rows = []
for s in cur.fetchall():
sid, naziv, oib, sport, preds, tajn, web, n_kl, n_cl = s
if not naziv: continue
parts = [f"Savez {naziv}"]
if sport: parts.append(f"sport: {sport}")
if oib: parts.append(f"OIB: {oib}")
if preds: parts.append(f"predsjednik: {preds}")
if tajn: parts.append(f"tajnik: {tajn}")
if n_kl: parts.append(f"broj klubova: {n_kl}")
if n_cl: parts.append(f"broj članova: {n_cl}")
if web: parts.append(f"web: {web}")
fact = ". ".join(parts) + "."
if len(fact) < 30: continue
fact_hash = hashlib.sha256(f"savez:{sid}:{fact[:200]}".encode()).hexdigest()[:32]
refs = json.dumps([{"type":"pgz_sport_savez","id": sid}])
rows.append((fact[:2000], 'pgz_sport_savez', 'pgz_sport_db_extract', refs, 0.92, fact_hash))
n = insert_batch(cur, rows); total += n; log.info(f"Savezi facts: {n}")
# 3) Overview
cur.execute("""
SELECT s.naziv, s.sport, count(k.id) AS n_kl,
string_agg(k.grad, ', ' ORDER BY k.grad) FILTER (WHERE k.grad IS NOT NULL) AS gradovi
FROM pgz_sport.savezi s
LEFT JOIN pgz_sport.klubovi k ON k.savez_id = s.id AND k.aktivan = true
GROUP BY s.id, s.naziv, s.sport HAVING count(k.id) > 0
""")
rows = []
for r in cur.fetchall():
savez, sport, n, gradovi = r
gradovi_str = (gradovi[:300] + '...') if gradovi and len(gradovi) > 300 else (gradovi or '')
fact = f"{savez} ima {n} aktivnih klubova"
if sport: fact += f" u sportu {sport}"
if gradovi_str: fact += f". Gradovi: {gradovi_str[:200]}"
fact += "."
fact_hash = hashlib.sha256(f"overview:{savez}".encode()).hexdigest()[:32]
rows.append((fact[:2000], 'pgz_sport_overview', 'pgz_sport_db_extract', json.dumps([{}]), 0.95, fact_hash))
n = insert_batch(cur, rows); total += n; log.info(f"Overview facts: {n}")
# 4) Multi-chair
cur.execute("""
WITH all_links AS (
SELECT lower(trim(predsjednik)) AS pk, predsjednik AS pname,
'klub:'||k.id AS oid, k.naziv AS oname, 'predsjednik' AS role
FROM pgz_sport.klubovi k WHERE predsjednik IS NOT NULL AND length(trim(predsjednik)) > 5
UNION ALL
SELECT lower(trim(tajnik)), tajnik, 'klub:'||k.id, k.naziv, 'tajnik'
FROM pgz_sport.klubovi k WHERE tajnik IS NOT NULL AND length(trim(tajnik)) > 5
UNION ALL
SELECT lower(trim(predsjednik)), predsjednik, 'savez:'||s.id, s.naziv, 'predsjednik'
FROM pgz_sport.savezi s WHERE predsjednik IS NOT NULL AND length(trim(predsjednik)) > 5
)
SELECT pk, max(pname) AS pname, count(DISTINCT oid) AS n_orgs,
string_agg(DISTINCT oname || ' (' || role || ')', '; ') AS orgs
FROM all_links GROUP BY pk HAVING count(DISTINCT oid) >= 2
ORDER BY count(DISTINCT oid) DESC LIMIT 200
""")
rows = []
for r in cur.fetchall():
pk, pname, n_orgs, orgs = r
fact = f"{pname} sjedi na {n_orgs} stolica u PGŽ Sport ekosustavu: {orgs[:500]}"
if n_orgs >= 3:
fact += " — VIŠESTRUKE FUNKCIJE: forenzički flag za moguće sukobe interesa."
fact_hash = hashlib.sha256(f"multichair:{pk}".encode()).hexdigest()[:32]
rows.append((fact[:2000], 'pgz_sport_multichair', 'pgz_sport_db_extract', json.dumps([{}]), 0.90, fact_hash))
n = insert_batch(cur, rows); total += n; log.info(f"Multi-chair facts: {n}")
# 5) Manifestacije + natjecanja
cur.execute("SELECT id, naziv, mjesto, godina_od, organizator, razina, broj_ucesnika FROM pgz_sport.manifestacije WHERE naziv IS NOT NULL")
rows = []
for r in cur.fetchall():
mid, naziv, mjesto, god, org, razina, n_uces = r
fact = f"Sportska manifestacija: {naziv}"
if mjesto: fact += f", mjesto: {mjesto}"
if god: fact += f", godina {god}"
if org: fact += f", organizator: {org}"
if razina: fact += f", razina: {razina}"
if n_uces: fact += f", broj učesnika: {n_uces}"
fact += "."
fh = hashlib.sha256(f"man:{mid}".encode()).hexdigest()[:32]
rows.append((fact[:2000], 'pgz_sport_manifestacija', 'pgz_sport_db_extract', json.dumps([{}]), 0.85, fh))
cur.execute("SELECT id, naziv, sport, datum_pocetka::text AS godina, sezona, razina, tip, kategorija FROM pgz_sport.natjecanja WHERE naziv IS NOT NULL LIMIT 500")
for r in cur.fetchall():
nid, naziv, sport, god, sez, raz, tip, kat = r
fact = f"Natjecanje: {naziv}"
if sport: fact += f" — sport: {sport}"
if sez: fact += f", sezona {sez}"
if raz: fact += f", razina: {raz}"
if tip: fact += f", tip: {tip}"
if kat: fact += f", kategorija: {kat}"
if god and god != 'None': fact += f", datum početka: {god[:10]}"
fact += "."
fh = hashlib.sha256(f"nat:{nid}".encode()).hexdigest()[:32]
rows.append((fact[:2000], 'pgz_sport_natjecanje', 'pgz_sport_db_extract', json.dumps([{}]), 0.85, fh))
n = insert_batch(cur, rows); total += n; log.info(f"Manifest+nat: {n}")
log.info(f"═══ TOTAL: {total} ═══")
cur.close(); conn.close()
if __name__ == "__main__":
main()