Files
pgz-sport/scrapers/import_godisnjak.py

122 lines
5.1 KiB
Python
Executable File

#!/usr/bin/env python3
from dotenv import load_dotenv
load_dotenv('/opt/rinet-gpu/.env.master')
# auto-added by patch_scrapers_with_dotenv.sh
import os
import psycopg2, json, re
DB = dict(host='localhost', port=5432, dbname='rinet_v3', user='rinet', password=os.environ["DB_PASSWORD"])
conn = psycopg2.connect(**DB); conn.autocommit = True
cu = conn.cursor()
totals = json.load(open('/opt/pgz-sport/data/sport_totals_2025.json'))
print(f"Sport totals 2025: {len(totals)}")
SPORT_TO_SAVEZ_KEYS = {
'atletika':'atletski','biciklizam':'biciklist','boćanje':'boćar','boks':'boksač',
'gimnastika':'gimnast','jedriličarstvo':'jedrili','judo':'judo','karate':'karate',
'kendo':'kendo','kickboxing':'kickbox','košarka':'košark','kuglanje':'kuglač',
'nogomet':'nogomet','odbojka':'odbojkaš','parasport':'parasport','ples':'ples',
'plivanje':'plivačk','ronilaštvo':'ronila','rukomet':'rukometn',
'sinkronizirano plivanje':'plivačk','skijanje':'skijaš','sport gluhih':'gluh',
'sportski ribolov':'ribolov','sportsko penjanje':'penjač','stolni tenis':'stolnotenis',
'streličarstvo':'streličar','streljaštvo':'streljač','tenis':'teniski',
'triatlon':'triatlon','vaterpolo':'vaterpol','veslanje':'veslač',
}
cu.execute("SELECT id, naziv FROM pgz_sport.savezi WHERE naziv ILIKE '%PGŽ%' OR naziv ILIKE '%Primorsko%'")
all_savezi = cu.fetchall()
print(f"PGŽ savezi: {len(all_savezi)}")
savez_map = {}
for sport_lc, key in SPORT_TO_SAVEZ_KEYS.items():
for sid, naziv in all_savezi:
if key.lower() in naziv.lower():
savez_map[sport_lc] = sid; break
# Update statistika_saveza for 2025
n_upd = 0
for sport_lc, data in totals.items():
sid = savez_map.get(sport_lc)
if not sid: continue
cu.execute("SELECT id FROM pgz_sport.statistika_saveza WHERE savez_id=%s AND godina=%s", (sid, 2025))
e = cu.fetchone()
if e:
cu.execute("UPDATE pgz_sport.statistika_saveza SET registriranih=%s WHERE id=%s", (data['total'], e[0]))
else:
cu.execute("INSERT INTO pgz_sport.statistika_saveza (savez_id, godina, registriranih) VALUES (%s,%s,%s)",
(sid, 2025, data['total']))
n_upd += 1
print(f"Statistika_saveza 2025 updated: {n_upd}")
# Kategorizirani
KAT = json.load(open('/opt/pgz-sport/data/kategorizirani_2025.json'))
print(f"\nKategorizirani: {len(KAT)}")
cu.execute("ALTER TABLE pgz_sport.clanovi ADD COLUMN IF NOT EXISTS hoo_kategorija TEXT")
cu.execute("ALTER TABLE pgz_sport.clanovi ADD COLUMN IF NOT EXISTS hoo_kategorija_od DATE")
cu.execute("ALTER TABLE pgz_sport.clanovi ADD COLUMN IF NOT EXISTS hoo_kategorija_do DATE")
n_matched = 0; n_inserted = 0; n_dup = 0
seen = set()
def parse_d(s):
try:
d, m, y = s.split('.')
return f"{y}-{int(m):02d}-{int(d):02d}"
except: return None
for k in KAT:
ime = k['ime'].strip()
prezime = k['prezime'].strip()
sport = k['sport'].strip().lower()
klub = k['klub'].strip().strip('"').strip()
mjesto = k['mjesto'].strip()
hoo_kat = k['hoo_kategorija']
od = parse_d(k['vrijedi_od'])
do = parse_d(k['vrijedi_do'])
key = (ime.lower(), prezime.lower(), sport)
if key in seen: n_dup += 1; continue
seen.add(key)
cu.execute("""SELECT id FROM pgz_sport.clanovi WHERE LOWER(ime)=LOWER(%s) AND LOWER(prezime)=LOWER(%s)
AND (sport IS NULL OR LOWER(sport)=LOWER(%s)) LIMIT 1""", (ime, prezime, sport))
row = cu.fetchone()
if row:
cu.execute("""UPDATE pgz_sport.clanovi SET hoo_kategorija=%s, hoo_kategorija_od=%s,
hoo_kategorija_do=%s, sport=COALESCE(sport, %s) WHERE id=%s""",
(hoo_kat, od, do, sport, row[0]))
n_matched += 1
else:
klub_id = None
cu.execute("SELECT id FROM pgz_sport.klubovi WHERE LOWER(naziv) LIKE LOWER(%s) AND region IS NOT NULL LIMIT 1",
(f"%{klub[:30]}%",))
kr = cu.fetchone()
if kr: klub_id = kr[0]
cu.execute("""INSERT INTO pgz_sport.clanovi (ime, prezime, sport, mjesto_rodenja, klub_id,
hoo_kategorija, hoo_kategorija_od, hoo_kategorija_do)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s)""",
(ime, prezime, sport, mjesto, klub_id, hoo_kat, od, do))
n_inserted += 1
print(f"Matched: {n_matched}, Inserted: {n_inserted}, Dup: {n_dup}")
cu.execute("SELECT count(*) FROM pgz_sport.clanovi WHERE hoo_kategorija IS NOT NULL")
print(f"\nSportaša s HOO kategorijom: {cu.fetchone()[0]}")
cu.execute("SELECT hoo_kategorija, count(*) FROM pgz_sport.clanovi WHERE hoo_kategorija IS NOT NULL GROUP BY hoo_kategorija ORDER BY hoo_kategorija")
for r in cu.fetchall():
print(f" HOO {r[0]}: {r[1]}")
cu.execute("SELECT count(*) FROM pgz_sport.clanovi")
print(f"\nUkupno sportaša: {cu.fetchone()[0]}")
# Sport totals 2025 result
cu.execute("""SELECT s.naziv, ss.registriranih FROM pgz_sport.statistika_saveza ss
JOIN pgz_sport.savezi s ON s.id=ss.savez_id
WHERE ss.godina=2025 AND ss.registriranih > 0 ORDER BY ss.registriranih DESC LIMIT 12""")
print("\n2025 statistika_saveza top 12:")
for r in cu.fetchall():
print(f" {r[0]:50} {r[1]:>6}")
conn.close()