#!/usr/bin/env python3 # Proširen cultural Q&A seed (svaki fact daje 3-5 varijanti pitanja) import psycopg2, hashlib, logging, re logging.basicConfig(level=logging.INFO, format='%(asctime)s [cult_qa2] %(message)s') DSN = "host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7" def main(): conn = psycopg2.connect(DSN); conn.autocommit = True cur = conn.cursor() # Lokalni Riječki + dijalekti facts cur.execute(""" SELECT id, fact, category FROM dabi.knowledge WHERE (category IN ('alan_ford_priority','satrovacki_priority','cakavski_priority', 'fjumanski_priority','rijeka_izreka','lingvistika_qa', 'alan_ford_v3','alan_ford_qa','rijeka_alan','rijeka_humor', 'rijeka_lokalni','rijeka_qa','satrovacki_dict','satrovacki_jezik', 'pgz_administracija','pgz_promet','rijeka_lokali','rijeka_lokal') OR fact ~ '\\m(žišku|brodo|rista|vopi|kantun|ponistra|šugaman)\\M' OR fact ~ '\\m(Alan Ford|Bob Rock|Sir Oliver|TNT|Grunf)\\M') AND fact IS NOT NULL AND length(fact) > 30 AND length(fact) < 1500 LIMIT 2000 """) rows = cur.fetchall() logging.info(f"Cultural facts proširen: {len(rows)}") inserted = 0 for fid, fact, cat in rows: questions = [] fl = fact.lower() # Alan Ford characters characters = ['Alan Ford','Bob Rock','Sir Oliver','Broj Jedan','Grunf','Jeremija','Šef','Margot','Superhik','Notar','Cifra Sluga','Don Galon','Debela Gilda'] for ch in characters: if ch.lower() in fl: questions.extend([f"Tko je {ch}?", f"Što znaš o {ch}?", f"Kakav je lik {ch}?"]) break # Šatrovački/čakavski/fjumanski — extract first word as term if any(k in cat.lower() for k in ['satrovacki', 'cakavski', 'fjumanski', 'lokalni']): # Extract first interesting word (not common noun) words = re.findall(r'\b\w+\b', fact) for w in words[:3]: if len(w) >= 3 and w.lower() not in ['ova', 'taj', 'jest', 'znači', 'što', 'kako', 'tko', 'gdje']: questions.append(f"Što znači riječ {w}?") questions.append(f"Što je {w}?") break # Riječke izreke if 'izrek' in cat.lower() or 'izrek' in fl: questions.append("Reci mi neku riječku izreku.") questions.append("Imaš li primjer riječke poslovice?") # General Rijeka context if 'rijeka' in fl or 'kvarner' in fl or 'trsat' in fl or 'preluk' in fl: questions.append("Što mi možeš reći o Rijeci?") if not questions: # Fallback Q based on category cat_q = { 'alan_ford': 'Pričaj mi nešto o Alan Fordu.', 'cakavski': 'Pričaj mi o čakavskom dijalektu.', 'satrovacki': 'Što je šatrovački?', 'fjumanski': 'Što je fjumanski?', 'rijeka': 'Što je posebno za Rijeku?' } for k, v in cat_q.items(): if k in cat.lower(): questions.append(v) break # Save for q in questions[:3]: qa_hash = hashlib.sha256(f"cv2:{fid}:{q[:60]}".encode()).hexdigest()[:32] try: cur.execute(""" INSERT INTO dabi.training_qa (question, answer, category, source_type, created_at) VALUES (%s, %s, %s, 'cultural_seed_v2', now()) ON CONFLICT DO NOTHING """, (q[:300], fact[:800], 'cultural_'+cat.split('_')[0][:20])) inserted += cur.rowcount except: pass logging.info(f"Inserted: {inserted} cultural Q&A v2") cur.close(); conn.close() if __name__ == "__main__": main()