#!/usr/bin/env python3 # Seed cultural_qa training data iz dabi.knowledge (Alan Ford, čakavski, satrovacki, izreke) import psycopg2, hashlib, logging logging.basicConfig(level=logging.INFO, format='%(asctime)s [cult_qa] %(message)s') log = logging.getLogger("cult_qa") DSN = "host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7" def main(): conn = psycopg2.connect(DSN); conn.autocommit = True cur = conn.cursor() # Get cultural facts cur.execute(""" SELECT id, fact, category FROM dabi.knowledge WHERE category IN ('alan_ford_priority','satrovacki_priority','cakavski_priority', 'fjumanski_priority','rijeka_izreka','lingvistika_qa', 'alan_ford_v3','alan_ford_qa','rijeka_alan','rijeka_humor', 'rijeka_lokalni','rijeka_qa','satrovacki_dict','satrovacki_jezik') AND fact IS NOT NULL AND length(fact) > 30 LIMIT 1000 """) rows = cur.fetchall() log.info(f"Cultural facts: {len(rows)}") inserted = 0 for fid, fact, cat in rows: # Generate Q variants based on category questions = [] fact_lower = fact.lower() if 'alan_ford' in cat: # Try extract character for char in ['Alan Ford', 'Bob Rock', 'Sir Oliver', 'Broj Jedan', 'Grunf', 'Jeremija', 'Šef', 'Margot', 'Superhik', 'Notar', 'Cifra Sluga']: if char.lower() in fact_lower: questions.append(f"Tko je {char}?") questions.append(f"Što znaš o liku {char}?") break if not questions: questions.append(f"Što znaš o Alan Fordu?") elif 'satrovacki' in cat: # First word in fact = the term words = fact.split() if words and len(words[0]) > 2: term = words[0].rstrip('=,:;.').strip() questions.append(f"Što znači {term}?") questions.append(f"Što je {term} na šatrovačkom?") elif 'cakavski' in cat or 'fjumanski' in cat: words = fact.split() if words and len(words[0]) > 2: term = words[0].rstrip('=,:;.').strip() questions.append(f"Što znači {term}?") questions.append(f"Što je {term} u riječkom dijalektu?") elif 'rijeka_izreka' in cat: questions.append(f"Reci mi neku riječku izreku.") questions.append(f"Koje su tradicionalne riječke izreke?") # Save Q&A pairs for q in questions[:2]: # max 2 per fact qa_hash = hashlib.sha256(f"cult:{fid}:{q[:60]}".encode()).hexdigest()[:32] try: cur.execute(""" INSERT INTO dabi.training_qa (question, answer, category, source_type, created_at) VALUES (%s, %s, %s, 'cultural_seed', now()) ON CONFLICT DO NOTHING """, (q[:300], fact[:500], 'cultural_'+cat.split('_')[0])) inserted += cur.rowcount except Exception as e: if inserted < 3: log.warning(f"insert err: {e}") log.info(f"Inserted: {inserted} cultural Q&A pairs") cur.close(); conn.close() if __name__ == "__main__": main()