Files
pgz-sport/scrapers/harvesters/civic_data_pgz.py
T
damir f07fdad919 Crisis V7 MEGA: sufinanciranje_sport + panel + CRM auth
DB:
- pgz_sport.sufinanciranje_sport.je_klub flag (RSS programi/totals false)
- pgz_sport.sufinanciranje_sport.klub_id matched

Endpoints:
- /v2/potpore/by-year: samo_klubovi=True default + davatelj filter

Frontend:
- sport2.html PANEL FORCE HIDE CSS (right:-100vw default)
- crm_v2.html: redirect to /login only on actual 401, not on page load
2026-05-05 15:02:47 +02:00

71 lines
2.2 KiB
Python

#!/usr/bin/env python3
"""data.gov.hr — Open Data PGZ."""
import sys, json, time
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
from _common import upsert_facts, DSN, UA
import urllib.request
import psycopg2
API = "https://data.gov.hr/api/3/action"
def search(query, rows=50):
url = f"{API}/package_search?q={urllib.parse.quote(query)}&rows={rows}"
try:
req = urllib.request.Request(url, headers={"User-Agent": UA})
with urllib.request.urlopen(req, timeout=30) as r:
d = json.loads(r.read())
return d.get("result", {}).get("results", [])
except Exception as e:
print(f"search err: {e}")
return []
import urllib.parse
def main():
conn = psycopg2.connect(DSN); conn.autocommit = True
queries = [
"Primorsko-goranska", "Rijeka", "Opatija", "Crikvenica", "Krk",
"Cres", "Lošinj", "Rab", "Delnice", "Bakar", "Kvarner",
]
total_inserted = 0
seen = set()
for q in queries:
results = search(q, rows=50)
ff = []
for pkg in results:
pkg_id = pkg.get("id", "")
if pkg_id in seen: continue
seen.add(pkg_id)
title = pkg.get("title", "")
notes = pkg.get("notes", "")[:600]
org = pkg.get("organization", {}).get("title", "")
tags = ", ".join([t.get("name", "") for t in pkg.get("tags", [])])
fact = f"[OpenData] {title} | Org: {org} | {notes} | Tags: {tags}"[:1200]
if len(fact) > 50:
ff.append({
"fact": fact,
"url": f"https://data.gov.hr/dataset/{pkg.get('name', '')}",
"title": title,
})
n = upsert_facts(conn, ff, source_name="data_gov_hr_pgz",
category="opendata_pgz", confidence=0.85)
total_inserted += n
print(f" query='{q}' -> {len(results)} results, {n} new facts")
time.sleep(1)
conn.close()
print(f"=== TOTAL: {total_inserted} ===")
print(json.dumps({"queries": len(queries), "total_facts": total_inserted}))
if __name__ == "__main__":
main()