#!/usr/bin/env python3 # sport_to_knowledge.py v2.0 — match dabi.knowledge schema (source_refs jsonb) import os, sys, hashlib, logging, json import psycopg2 from psycopg2.extras import execute_batch logging.basicConfig(level=logging.INFO, format='%(asctime)s [sport2k] %(message)s') log = logging.getLogger("sport2k") DSN = "host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7" def insert_batch(cur, facts): if not facts: return 0 execute_batch(cur, """ INSERT INTO dabi.knowledge (fact, category, source, source_refs, confidence, data_hash, created_at) VALUES (%s, %s, %s, %s::jsonb, %s, %s, now()) ON CONFLICT (data_hash) DO NOTHING """, facts, page_size=200) return len(facts) def main(): conn = psycopg2.connect(DSN); conn.autocommit = True cur = conn.cursor() total = 0 # 1) Klubovi cur.execute(""" SELECT id, naziv, oib, sport, grad, predsjednik, tajnik, trener_glavni, broj_clanova, broj_aktivnih_sportasa, godina_osnutka, web, telefon, email, adresa FROM pgz_sport.klubovi WHERE aktivan = true """) rows = [] for k in cur.fetchall(): kid, naziv, oib, sport, grad, preds, tajn, tren, n_cl, n_akt, god, web, tel, email, adr = k if not naziv: continue parts = [f"Klub {naziv}"] if sport: parts.append(f"sport: {sport}") if grad: parts.append(f"grad: {grad}") if oib: parts.append(f"OIB: {oib}") if god: parts.append(f"osnovan {god}.") if preds: parts.append(f"predsjednik: {preds}") if tajn: parts.append(f"tajnik: {tajn}") if tren: parts.append(f"glavni trener: {tren}") if n_cl: parts.append(f"broj članova: {n_cl}") if n_akt: parts.append(f"broj aktivnih sportaša: {n_akt}") if adr: parts.append(f"adresa: {adr}") if tel: parts.append(f"tel: {tel}") if email: parts.append(f"email: {email}") if web: parts.append(f"web: {web}") fact = ". ".join(parts) + "." if len(fact) < 30: continue fact_hash = hashlib.sha256(f"klub:{kid}:{fact[:200]}".encode()).hexdigest()[:32] refs = json.dumps([{"type":"pgz_sport_klub","id": kid, "url": f"https://sport.rinet.one/admin#klub/{kid}"}]) rows.append((fact[:2000], 'pgz_sport_klub', 'pgz_sport_db_extract', refs, 0.92, fact_hash)) n = insert_batch(cur, rows); total += n; log.info(f"Klubovi facts: {n}") # 2) Savezi cur.execute(""" SELECT id, naziv, oib, sport, predsjednik, tajnik, web, NULL AS broj_klubova, NULL AS broj_clanova FROM pgz_sport.savezi """) rows = [] for s in cur.fetchall(): sid, naziv, oib, sport, preds, tajn, web, n_kl, n_cl = s if not naziv: continue parts = [f"Savez {naziv}"] if sport: parts.append(f"sport: {sport}") if oib: parts.append(f"OIB: {oib}") if preds: parts.append(f"predsjednik: {preds}") if tajn: parts.append(f"tajnik: {tajn}") if n_kl: parts.append(f"broj klubova: {n_kl}") if n_cl: parts.append(f"broj članova: {n_cl}") if web: parts.append(f"web: {web}") fact = ". ".join(parts) + "." if len(fact) < 30: continue fact_hash = hashlib.sha256(f"savez:{sid}:{fact[:200]}".encode()).hexdigest()[:32] refs = json.dumps([{"type":"pgz_sport_savez","id": sid}]) rows.append((fact[:2000], 'pgz_sport_savez', 'pgz_sport_db_extract', refs, 0.92, fact_hash)) n = insert_batch(cur, rows); total += n; log.info(f"Savezi facts: {n}") # 3) Overview cur.execute(""" SELECT s.naziv, s.sport, count(k.id) AS n_kl, string_agg(k.grad, ', ' ORDER BY k.grad) FILTER (WHERE k.grad IS NOT NULL) AS gradovi FROM pgz_sport.savezi s LEFT JOIN pgz_sport.klubovi k ON k.savez_id = s.id AND k.aktivan = true GROUP BY s.id, s.naziv, s.sport HAVING count(k.id) > 0 """) rows = [] for r in cur.fetchall(): savez, sport, n, gradovi = r gradovi_str = (gradovi[:300] + '...') if gradovi and len(gradovi) > 300 else (gradovi or '') fact = f"{savez} ima {n} aktivnih klubova" if sport: fact += f" u sportu {sport}" if gradovi_str: fact += f". Gradovi: {gradovi_str[:200]}" fact += "." fact_hash = hashlib.sha256(f"overview:{savez}".encode()).hexdigest()[:32] rows.append((fact[:2000], 'pgz_sport_overview', 'pgz_sport_db_extract', json.dumps([{}]), 0.95, fact_hash)) n = insert_batch(cur, rows); total += n; log.info(f"Overview facts: {n}") # 4) Multi-chair cur.execute(""" WITH all_links AS ( SELECT lower(trim(predsjednik)) AS pk, predsjednik AS pname, 'klub:'||k.id AS oid, k.naziv AS oname, 'predsjednik' AS role FROM pgz_sport.klubovi k WHERE predsjednik IS NOT NULL AND length(trim(predsjednik)) > 5 UNION ALL SELECT lower(trim(tajnik)), tajnik, 'klub:'||k.id, k.naziv, 'tajnik' FROM pgz_sport.klubovi k WHERE tajnik IS NOT NULL AND length(trim(tajnik)) > 5 UNION ALL SELECT lower(trim(predsjednik)), predsjednik, 'savez:'||s.id, s.naziv, 'predsjednik' FROM pgz_sport.savezi s WHERE predsjednik IS NOT NULL AND length(trim(predsjednik)) > 5 ) SELECT pk, max(pname) AS pname, count(DISTINCT oid) AS n_orgs, string_agg(DISTINCT oname || ' (' || role || ')', '; ') AS orgs FROM all_links GROUP BY pk HAVING count(DISTINCT oid) >= 2 ORDER BY count(DISTINCT oid) DESC LIMIT 200 """) rows = [] for r in cur.fetchall(): pk, pname, n_orgs, orgs = r fact = f"{pname} sjedi na {n_orgs} stolica u PGŽ Sport ekosustavu: {orgs[:500]}" if n_orgs >= 3: fact += " — VIŠESTRUKE FUNKCIJE: forenzički flag za moguće sukobe interesa." fact_hash = hashlib.sha256(f"multichair:{pk}".encode()).hexdigest()[:32] rows.append((fact[:2000], 'pgz_sport_multichair', 'pgz_sport_db_extract', json.dumps([{}]), 0.90, fact_hash)) n = insert_batch(cur, rows); total += n; log.info(f"Multi-chair facts: {n}") # 5) Manifestacije + natjecanja cur.execute("SELECT id, naziv, mjesto, godina_od, organizator, razina, broj_ucesnika FROM pgz_sport.manifestacije WHERE naziv IS NOT NULL") rows = [] for r in cur.fetchall(): mid, naziv, mjesto, god, org, razina, n_uces = r fact = f"Sportska manifestacija: {naziv}" if mjesto: fact += f", mjesto: {mjesto}" if god: fact += f", godina {god}" if org: fact += f", organizator: {org}" if razina: fact += f", razina: {razina}" if n_uces: fact += f", broj učesnika: {n_uces}" fact += "." fh = hashlib.sha256(f"man:{mid}".encode()).hexdigest()[:32] rows.append((fact[:2000], 'pgz_sport_manifestacija', 'pgz_sport_db_extract', json.dumps([{}]), 0.85, fh)) cur.execute("SELECT id, naziv, sport, datum_pocetka::text AS godina, sezona, razina, tip, kategorija FROM pgz_sport.natjecanja WHERE naziv IS NOT NULL LIMIT 500") for r in cur.fetchall(): nid, naziv, sport, god, sez, raz, tip, kat = r fact = f"Natjecanje: {naziv}" if sport: fact += f" — sport: {sport}" if sez: fact += f", sezona {sez}" if raz: fact += f", razina: {raz}" if tip: fact += f", tip: {tip}" if kat: fact += f", kategorija: {kat}" if god and god != 'None': fact += f", datum početka: {god[:10]}" fact += "." fh = hashlib.sha256(f"nat:{nid}".encode()).hexdigest()[:32] rows.append((fact[:2000], 'pgz_sport_natjecanje', 'pgz_sport_db_extract', json.dumps([{}]), 0.85, fh)) n = insert_batch(cur, rows); total += n; log.info(f"Manifest+nat: {n}") log.info(f"═══ TOTAL: {total} ═══") cur.close(); conn.close() if __name__ == "__main__": main()