f07fdad919
DB: - pgz_sport.sufinanciranje_sport.je_klub flag (RSS programi/totals false) - pgz_sport.sufinanciranje_sport.klub_id matched Endpoints: - /v2/potpore/by-year: samo_klubovi=True default + davatelj filter Frontend: - sport2.html PANEL FORCE HIDE CSS (right:-100vw default) - crm_v2.html: redirect to /login only on actual 401, not on page load
71 lines
2.2 KiB
Python
71 lines
2.2 KiB
Python
#!/usr/bin/env python3
|
|
"""data.gov.hr — Open Data PGZ."""
|
|
import sys, json, time
|
|
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
|
|
from _common import upsert_facts, DSN, UA
|
|
import urllib.request
|
|
import psycopg2
|
|
|
|
API = "https://data.gov.hr/api/3/action"
|
|
|
|
|
|
def search(query, rows=50):
|
|
url = f"{API}/package_search?q={urllib.parse.quote(query)}&rows={rows}"
|
|
try:
|
|
req = urllib.request.Request(url, headers={"User-Agent": UA})
|
|
with urllib.request.urlopen(req, timeout=30) as r:
|
|
d = json.loads(r.read())
|
|
return d.get("result", {}).get("results", [])
|
|
except Exception as e:
|
|
print(f"search err: {e}")
|
|
return []
|
|
|
|
import urllib.parse
|
|
|
|
|
|
def main():
|
|
conn = psycopg2.connect(DSN); conn.autocommit = True
|
|
|
|
queries = [
|
|
"Primorsko-goranska", "Rijeka", "Opatija", "Crikvenica", "Krk",
|
|
"Cres", "Lošinj", "Rab", "Delnice", "Bakar", "Kvarner",
|
|
]
|
|
|
|
total_inserted = 0
|
|
seen = set()
|
|
|
|
for q in queries:
|
|
results = search(q, rows=50)
|
|
ff = []
|
|
for pkg in results:
|
|
pkg_id = pkg.get("id", "")
|
|
if pkg_id in seen: continue
|
|
seen.add(pkg_id)
|
|
|
|
title = pkg.get("title", "")
|
|
notes = pkg.get("notes", "")[:600]
|
|
org = pkg.get("organization", {}).get("title", "")
|
|
tags = ", ".join([t.get("name", "") for t in pkg.get("tags", [])])
|
|
|
|
fact = f"[OpenData] {title} | Org: {org} | {notes} | Tags: {tags}"[:1200]
|
|
if len(fact) > 50:
|
|
ff.append({
|
|
"fact": fact,
|
|
"url": f"https://data.gov.hr/dataset/{pkg.get('name', '')}",
|
|
"title": title,
|
|
})
|
|
|
|
n = upsert_facts(conn, ff, source_name="data_gov_hr_pgz",
|
|
category="opendata_pgz", confidence=0.85)
|
|
total_inserted += n
|
|
print(f" query='{q}' -> {len(results)} results, {n} new facts")
|
|
time.sleep(1)
|
|
|
|
conn.close()
|
|
print(f"=== TOTAL: {total_inserted} ===")
|
|
print(json.dumps({"queries": len(queries), "total_facts": total_inserted}))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|