PGŽ Sport Platform — Round 1+2 baseline (sport2.html + API)

This commit is contained in:
Damir Radulić
2026-05-04 23:39:08 +02:00
commit a7ec0a86be
1820 changed files with 694455 additions and 0 deletions
+91
View File
@@ -0,0 +1,91 @@
#!/usr/bin/env python3
import sys, hashlib, logging
import psycopg2
from psycopg2.extras import execute_batch
sys.path.insert(0, '/opt/rinet-gpu')
from db_config import DB_DSN
logging.basicConfig(level=logging.INFO, format='%(asctime)s [sport-ksync] %(levelname)s: %(message)s')
log = logging.getLogger('sport-ksync')
SOURCE = 'pgz_sport_db'
SOURCE_URL = 'https://api.rinet.one/sport/'
def mk_hash(text):
return hashlib.md5(text.encode()).hexdigest()
def run():
conn = psycopg2.connect(DB_DSN)
conn.autocommit = True
cur = conn.cursor()
facts = []
cur.execute("SELECT naziv, sport, grad, predsjednik, email FROM pgz_sport.savezi WHERE aktivan=true ORDER BY naziv")
for row in cur.fetchall():
naziv, sport, grad, predsj, email = row
f = f"Sportski savez u PGZ: {naziv}"
if sport: f += f", sport: {sport}"
if grad: f += f", grad: {grad}"
if predsj: f += f", predsjednik: {predsj}"
facts.append((f, 'pgz_sport_savezi', SOURCE, SOURCE_URL, 0.90))
log.info(f'Savezi: {len(facts)}')
cur.execute("""SELECT k.naziv, k.sport, k.grad, k.predsjednik, k.broj_clanova, k.godina_osnutka, k.nositelj_kvalitete, k.pgz_sufinanciran, s.naziv
FROM pgz_sport.klubovi k LEFT JOIN pgz_sport.savezi s ON k.savez_id=s.id
WHERE k.aktivan=true AND k.naziv IS NOT NULL AND k.naziv != '' ORDER BY k.naziv LIMIT 800""")
n0 = len(facts)
for row in cur.fetchall():
naziv, sport, grad, predsj, br_cl, godina, nositelj, pgz_fin, savez = row
f = f"Sportski klub u PGZ: {naziv}"
if sport: f += f", sport: {sport}"
if grad: f += f", grad: {grad}"
if savez: f += f", savez: {savez}"
if predsj: f += f", predsjednik: {predsj}"
if br_cl: f += f", clanovi: {br_cl}"
if godina: f += f", osnovan: {godina}."
if nositelj: f += " Nositelj kvalitete HOO."
if pgz_fin: f += " PGZ sufinanciran."
facts.append((f, 'pgz_sport_klubovi', SOURCE, SOURCE_URL, 0.88))
log.info(f'Klubovi: {len(facts)-n0}')
cur.execute("""SELECT DISTINCT c.ime, c.prezime, c.sport, c.kategorija, c.hoo_kategorija, c.reprezentativac, k.naziv
FROM pgz_sport.clanovi c LEFT JOIN pgz_sport.klubovi k ON c.klub_id=k.id
WHERE c.aktivan=true AND c.kategoriziran=true AND c.ime IS NOT NULL AND c.prezime IS NOT NULL
ORDER BY c.prezime, c.ime LIMIT 1000""")
n0 = len(facts)
for row in cur.fetchall():
ime, prezime, sport, kat, hoo_kat, repr_, klub = row
f = f"Kategorizirani sportash PGZ: {ime} {prezime}"
if sport: f += f", sport: {sport}"
if kat: f += f", kategorija: {kat}"
if hoo_kat: f += f", HOO: {hoo_kat}"
if klub: f += f", klub: {klub}"
if repr_: f += ". Reprezentativac."
facts.append((f, 'pgz_sport_sportasi', SOURCE, SOURCE_URL, 0.85))
log.info(f'Sportasi: {len(facts)-n0}')
cur.execute("SELECT naziv, tip, grad, adresa, kapacitet, izgradeno, natkrita, array_to_string(sportovi, ', ') FROM pgz_sport.sportski_objekti WHERE aktivan=true ORDER BY naziv")
n0 = len(facts)
for row in cur.fetchall():
naziv, tip, grad, adresa, kap, izg, natkr, sportovi = row
f = f"Sportski objekt u PGZ: {naziv}"
if tip: f += f", tip: {tip}"
if grad: f += f", grad: {grad}"
if adresa: f += f", adresa: {adresa}"
if kap: f += f", kapacitet: {kap} mjesta"
if izg: f += f", izgradeno: {izg}."
if sportovi: f += f" Sportovi: {sportovi}."
facts.append((f, 'pgz_sport_objekti', SOURCE, SOURCE_URL, 0.88))
log.info(f'Objekti: {len(facts)-n0}')
log.info(f'Total: {len(facts)} facts to insert')
rows = [(f, cat, src, conf, mk_hash(SOURCE+f)) for f, cat, src, url, conf in facts]
execute_batch(cur, """INSERT INTO dabi.knowledge (fact,category,source,confidence,data_hash)
VALUES (%s,%s,%s,%s,%s) ON CONFLICT (data_hash) DO NOTHING""", rows, page_size=200)
cur.execute("SELECT count(*) FROM dabi.knowledge WHERE source=%s", (SOURCE,))
log.info(f'Total in dabi.knowledge: {cur.fetchone()[0]}')
cur.close(); conn.close()
if __name__ == '__main__':
run()