#!/usr/bin/env python3 """data.gov.hr — Open Data PGZ.""" import sys, json, time sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") from _common import upsert_facts, DSN, UA import urllib.request import psycopg2 API = "https://data.gov.hr/api/3/action" def search(query, rows=50): url = f"{API}/package_search?q={urllib.parse.quote(query)}&rows={rows}" try: req = urllib.request.Request(url, headers={"User-Agent": UA}) with urllib.request.urlopen(req, timeout=30) as r: d = json.loads(r.read()) return d.get("result", {}).get("results", []) except Exception as e: print(f"search err: {e}") return [] import urllib.parse def main(): conn = psycopg2.connect(DSN); conn.autocommit = True queries = [ "Primorsko-goranska", "Rijeka", "Opatija", "Crikvenica", "Krk", "Cres", "Lošinj", "Rab", "Delnice", "Bakar", "Kvarner", ] total_inserted = 0 seen = set() for q in queries: results = search(q, rows=50) ff = [] for pkg in results: pkg_id = pkg.get("id", "") if pkg_id in seen: continue seen.add(pkg_id) title = pkg.get("title", "") notes = pkg.get("notes", "")[:600] org = pkg.get("organization", {}).get("title", "") tags = ", ".join([t.get("name", "") for t in pkg.get("tags", [])]) fact = f"[OpenData] {title} | Org: {org} | {notes} | Tags: {tags}"[:1200] if len(fact) > 50: ff.append({ "fact": fact, "url": f"https://data.gov.hr/dataset/{pkg.get('name', '')}", "title": title, }) n = upsert_facts(conn, ff, source_name="data_gov_hr_pgz", category="opendata_pgz", confidence=0.85) total_inserted += n print(f" query='{q}' -> {len(results)} results, {n} new facts") time.sleep(1) conn.close() print(f"=== TOTAL: {total_inserted} ===") print(json.dumps({"queries": len(queries), "total_facts": total_inserted})) if __name__ == "__main__": main()