#!/usr/bin/env python3 import sys, hashlib, logging import psycopg2 from psycopg2.extras import execute_batch sys.path.insert(0, '/opt/rinet-gpu') from db_config import DB_DSN logging.basicConfig(level=logging.INFO, format='%(asctime)s [sport-ksync] %(levelname)s: %(message)s') log = logging.getLogger('sport-ksync') SOURCE = 'pgz_sport_db' SOURCE_URL = 'https://api.rinet.one/sport/' def mk_hash(text): return hashlib.md5(text.encode()).hexdigest() def run(): conn = psycopg2.connect(DB_DSN) conn.autocommit = True cur = conn.cursor() facts = [] cur.execute("SELECT naziv, sport, grad, predsjednik, email FROM pgz_sport.savezi WHERE aktivan=true ORDER BY naziv") for row in cur.fetchall(): naziv, sport, grad, predsj, email = row f = f"Sportski savez u PGZ: {naziv}" if sport: f += f", sport: {sport}" if grad: f += f", grad: {grad}" if predsj: f += f", predsjednik: {predsj}" facts.append((f, 'pgz_sport_savezi', SOURCE, SOURCE_URL, 0.90)) log.info(f'Savezi: {len(facts)}') cur.execute("""SELECT k.naziv, k.sport, k.grad, k.predsjednik, k.broj_clanova, k.godina_osnutka, k.nositelj_kvalitete, k.pgz_sufinanciran, s.naziv FROM pgz_sport.klubovi k LEFT JOIN pgz_sport.savezi s ON k.savez_id=s.id WHERE k.aktivan=true AND k.naziv IS NOT NULL AND k.naziv != '' ORDER BY k.naziv LIMIT 800""") n0 = len(facts) for row in cur.fetchall(): naziv, sport, grad, predsj, br_cl, godina, nositelj, pgz_fin, savez = row f = f"Sportski klub u PGZ: {naziv}" if sport: f += f", sport: {sport}" if grad: f += f", grad: {grad}" if savez: f += f", savez: {savez}" if predsj: f += f", predsjednik: {predsj}" if br_cl: f += f", clanovi: {br_cl}" if godina: f += f", osnovan: {godina}." if nositelj: f += " Nositelj kvalitete HOO." if pgz_fin: f += " PGZ sufinanciran." facts.append((f, 'pgz_sport_klubovi', SOURCE, SOURCE_URL, 0.88)) log.info(f'Klubovi: {len(facts)-n0}') cur.execute("""SELECT DISTINCT c.ime, c.prezime, c.sport, c.kategorija, c.hoo_kategorija, c.reprezentativac, k.naziv FROM pgz_sport.clanovi c LEFT JOIN pgz_sport.klubovi k ON c.klub_id=k.id WHERE c.aktivan=true AND c.kategoriziran=true AND c.ime IS NOT NULL AND c.prezime IS NOT NULL ORDER BY c.prezime, c.ime LIMIT 1000""") n0 = len(facts) for row in cur.fetchall(): ime, prezime, sport, kat, hoo_kat, repr_, klub = row f = f"Kategorizirani sportash PGZ: {ime} {prezime}" if sport: f += f", sport: {sport}" if kat: f += f", kategorija: {kat}" if hoo_kat: f += f", HOO: {hoo_kat}" if klub: f += f", klub: {klub}" if repr_: f += ". Reprezentativac." facts.append((f, 'pgz_sport_sportasi', SOURCE, SOURCE_URL, 0.85)) log.info(f'Sportasi: {len(facts)-n0}') cur.execute("SELECT naziv, tip, grad, adresa, kapacitet, izgradeno, natkrita, array_to_string(sportovi, ', ') FROM pgz_sport.sportski_objekti WHERE aktivan=true ORDER BY naziv") n0 = len(facts) for row in cur.fetchall(): naziv, tip, grad, adresa, kap, izg, natkr, sportovi = row f = f"Sportski objekt u PGZ: {naziv}" if tip: f += f", tip: {tip}" if grad: f += f", grad: {grad}" if adresa: f += f", adresa: {adresa}" if kap: f += f", kapacitet: {kap} mjesta" if izg: f += f", izgradeno: {izg}." if sportovi: f += f" Sportovi: {sportovi}." facts.append((f, 'pgz_sport_objekti', SOURCE, SOURCE_URL, 0.88)) log.info(f'Objekti: {len(facts)-n0}') log.info(f'Total: {len(facts)} facts to insert') rows = [(f, cat, src, conf, mk_hash(SOURCE+f)) for f, cat, src, url, conf in facts] execute_batch(cur, """INSERT INTO dabi.knowledge (fact,category,source,confidence,data_hash) VALUES (%s,%s,%s,%s,%s) ON CONFLICT (data_hash) DO NOTHING""", rows, page_size=200) cur.execute("SELECT count(*) FROM dabi.knowledge WHERE source=%s", (SOURCE,)) log.info(f'Total in dabi.knowledge: {cur.fetchone()[0]}') cur.close(); conn.close() if __name__ == '__main__': run()