92 lines
4.1 KiB
Python
92 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
|
import sys, hashlib, logging
|
|
import psycopg2
|
|
from psycopg2.extras import execute_batch
|
|
|
|
sys.path.insert(0, '/opt/rinet-gpu')
|
|
from db_config import DB_DSN
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [sport-ksync] %(levelname)s: %(message)s')
|
|
log = logging.getLogger('sport-ksync')
|
|
|
|
SOURCE = 'pgz_sport_db'
|
|
SOURCE_URL = 'https://api.rinet.one/sport/'
|
|
|
|
def mk_hash(text):
|
|
return hashlib.md5(text.encode()).hexdigest()
|
|
|
|
def run():
|
|
conn = psycopg2.connect(DB_DSN)
|
|
conn.autocommit = True
|
|
cur = conn.cursor()
|
|
facts = []
|
|
|
|
cur.execute("SELECT naziv, sport, grad, predsjednik, email FROM pgz_sport.savezi WHERE aktivan=true ORDER BY naziv")
|
|
for row in cur.fetchall():
|
|
naziv, sport, grad, predsj, email = row
|
|
f = f"Sportski savez u PGZ: {naziv}"
|
|
if sport: f += f", sport: {sport}"
|
|
if grad: f += f", grad: {grad}"
|
|
if predsj: f += f", predsjednik: {predsj}"
|
|
facts.append((f, 'pgz_sport_savezi', SOURCE, SOURCE_URL, 0.90))
|
|
log.info(f'Savezi: {len(facts)}')
|
|
|
|
cur.execute("""SELECT k.naziv, k.sport, k.grad, k.predsjednik, k.broj_clanova, k.godina_osnutka, k.nositelj_kvalitete, k.pgz_sufinanciran, s.naziv
|
|
FROM pgz_sport.klubovi k LEFT JOIN pgz_sport.savezi s ON k.savez_id=s.id
|
|
WHERE k.aktivan=true AND k.naziv IS NOT NULL AND k.naziv != '' ORDER BY k.naziv LIMIT 800""")
|
|
n0 = len(facts)
|
|
for row in cur.fetchall():
|
|
naziv, sport, grad, predsj, br_cl, godina, nositelj, pgz_fin, savez = row
|
|
f = f"Sportski klub u PGZ: {naziv}"
|
|
if sport: f += f", sport: {sport}"
|
|
if grad: f += f", grad: {grad}"
|
|
if savez: f += f", savez: {savez}"
|
|
if predsj: f += f", predsjednik: {predsj}"
|
|
if br_cl: f += f", clanovi: {br_cl}"
|
|
if godina: f += f", osnovan: {godina}."
|
|
if nositelj: f += " Nositelj kvalitete HOO."
|
|
if pgz_fin: f += " PGZ sufinanciran."
|
|
facts.append((f, 'pgz_sport_klubovi', SOURCE, SOURCE_URL, 0.88))
|
|
log.info(f'Klubovi: {len(facts)-n0}')
|
|
|
|
cur.execute("""SELECT DISTINCT c.ime, c.prezime, c.sport, c.kategorija, c.hoo_kategorija, c.reprezentativac, k.naziv
|
|
FROM pgz_sport.clanovi c LEFT JOIN pgz_sport.klubovi k ON c.klub_id=k.id
|
|
WHERE c.aktivan=true AND c.kategoriziran=true AND c.ime IS NOT NULL AND c.prezime IS NOT NULL
|
|
ORDER BY c.prezime, c.ime LIMIT 1000""")
|
|
n0 = len(facts)
|
|
for row in cur.fetchall():
|
|
ime, prezime, sport, kat, hoo_kat, repr_, klub = row
|
|
f = f"Kategorizirani sportash PGZ: {ime} {prezime}"
|
|
if sport: f += f", sport: {sport}"
|
|
if kat: f += f", kategorija: {kat}"
|
|
if hoo_kat: f += f", HOO: {hoo_kat}"
|
|
if klub: f += f", klub: {klub}"
|
|
if repr_: f += ". Reprezentativac."
|
|
facts.append((f, 'pgz_sport_sportasi', SOURCE, SOURCE_URL, 0.85))
|
|
log.info(f'Sportasi: {len(facts)-n0}')
|
|
|
|
cur.execute("SELECT naziv, tip, grad, adresa, kapacitet, izgradeno, natkrita, array_to_string(sportovi, ', ') FROM pgz_sport.sportski_objekti WHERE aktivan=true ORDER BY naziv")
|
|
n0 = len(facts)
|
|
for row in cur.fetchall():
|
|
naziv, tip, grad, adresa, kap, izg, natkr, sportovi = row
|
|
f = f"Sportski objekt u PGZ: {naziv}"
|
|
if tip: f += f", tip: {tip}"
|
|
if grad: f += f", grad: {grad}"
|
|
if adresa: f += f", adresa: {adresa}"
|
|
if kap: f += f", kapacitet: {kap} mjesta"
|
|
if izg: f += f", izgradeno: {izg}."
|
|
if sportovi: f += f" Sportovi: {sportovi}."
|
|
facts.append((f, 'pgz_sport_objekti', SOURCE, SOURCE_URL, 0.88))
|
|
log.info(f'Objekti: {len(facts)-n0}')
|
|
|
|
log.info(f'Total: {len(facts)} facts to insert')
|
|
rows = [(f, cat, src, conf, mk_hash(SOURCE+f)) for f, cat, src, url, conf in facts]
|
|
execute_batch(cur, """INSERT INTO dabi.knowledge (fact,category,source,confidence,data_hash)
|
|
VALUES (%s,%s,%s,%s,%s) ON CONFLICT (data_hash) DO NOTHING""", rows, page_size=200)
|
|
cur.execute("SELECT count(*) FROM dabi.knowledge WHERE source=%s", (SOURCE,))
|
|
log.info(f'Total in dabi.knowledge: {cur.fetchone()[0]}')
|
|
cur.close(); conn.close()
|
|
|
|
if __name__ == '__main__':
|
|
run()
|