68 lines
2.3 KiB
Python
68 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
|
# clan_oib_enricher.py — match clanovi s civic.persons po imenu+prezimenu+grad
|
|
import os, sys, logging, re
|
|
import psycopg2
|
|
from psycopg2.extras import RealDictCursor
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [clan_oib] %(message)s')
|
|
log = logging.getLogger("clan_oib")
|
|
|
|
DSN = f"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password={os.environ['DB_PASSWORD']}"
|
|
|
|
def main():
|
|
conn = psycopg2.connect(DSN, cursor_factory=RealDictCursor)
|
|
conn.autocommit = True
|
|
cur = conn.cursor()
|
|
|
|
cur.execute("""
|
|
SELECT id, ime, prezime, grad, datum_rodenja
|
|
FROM pgz_sport.clanovi
|
|
WHERE (oib IS NULL OR length(oib) != 11)
|
|
AND ime IS NOT NULL AND prezime IS NOT NULL
|
|
AND length(ime) > 2 AND length(prezime) > 2
|
|
ORDER BY id
|
|
LIMIT 5000
|
|
""")
|
|
clanovi = cur.fetchall()
|
|
log.info(f"Members for OIB enrichment: {len(clanovi)}")
|
|
|
|
enriched = 0
|
|
for c in clanovi:
|
|
# Match s civic.persons po ime + prezime + (grad ili datum)
|
|
sql = """
|
|
SELECT oib, name
|
|
FROM civic.persons
|
|
WHERE oib IS NOT NULL AND length(oib) = 11
|
|
AND lower(name) LIKE %s
|
|
LIMIT 5
|
|
"""
|
|
try:
|
|
cur.execute(sql, (f'%{c["ime"]}%{c["prezime"]}%',))
|
|
matches = cur.fetchall()
|
|
except Exception as e:
|
|
# unaccent might not exist
|
|
cur.execute("ROLLBACK")
|
|
sql2 = """
|
|
SELECT oib, name FROM civic.persons
|
|
WHERE oib IS NOT NULL AND length(oib) = 11
|
|
AND lower(name) LIKE %s
|
|
LIMIT 5
|
|
"""
|
|
cur.execute(sql2, (f'%{c["ime"].lower()}%{c["prezime"].lower()}%',))
|
|
matches = cur.fetchall()
|
|
|
|
if len(matches) == 1:
|
|
# Unique match — update
|
|
cur.execute("UPDATE pgz_sport.clanovi SET oib = %s WHERE id = %s AND (oib IS NULL OR length(oib) != 11)",
|
|
(matches[0]['oib'], c['id']))
|
|
enriched += 1
|
|
if enriched % 100 == 0:
|
|
log.info(f"Progress: {enriched} enriched")
|
|
|
|
log.info(f"FINAL: {enriched} clanovi enriched (unique matches only)")
|
|
cur.close()
|
|
conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|