Files
damir f07fdad919 Crisis V7 MEGA: sufinanciranje_sport + panel + CRM auth
DB:
- pgz_sport.sufinanciranje_sport.je_klub flag (RSS programi/totals false)
- pgz_sport.sufinanciranje_sport.klub_id matched

Endpoints:
- /v2/potpore/by-year: samo_klubovi=True default + davatelj filter

Frontend:
- sport2.html PANEL FORCE HIDE CSS (right:-100vw default)
- crm_v2.html: redirect to /login only on actual 401, not on page load
2026-05-05 15:02:47 +02:00

84 lines
2.8 KiB
Python

#!/usr/bin/env python3
"""Naselja PGZ — sela, zaseoci, otocna mjesta."""
import sys, json, time
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
from _common import chunk_text, upsert_facts, DSN, UA
from urllib.parse import urlencode, quote
import urllib.request
import psycopg2
API_HR = "https://hr.wikipedia.org/w/api.php"
def wiki_cat_members(cat, limit=200):
"""Get pages in a Wikipedia category."""
params = {"action":"query","list":"categorymembers","cmtitle":cat,
"cmlimit":str(limit),"format":"json"}
url = API_HR + "?" + urlencode(params)
try:
req = urllib.request.Request(url, headers={"User-Agent": UA})
with urllib.request.urlopen(req, timeout=20) as r:
d = json.loads(r.read())
return [m["title"] for m in d.get("query", {}).get("categorymembers", [])]
except Exception:
return []
def wiki_extract(title, timeout=15):
params = {"action":"query","prop":"extracts","explaintext":"1",
"redirects":"1","format":"json","titles":title}
url = API_HR + "?" + urlencode(params)
try:
req = urllib.request.Request(url, headers={"User-Agent": UA})
with urllib.request.urlopen(req, timeout=timeout) as r:
d = json.loads(r.read())
for pid, p in d.get("query", {}).get("pages", {}).items():
if pid == "-1": return None
return p.get("extract", "")
except Exception:
return None
CATEGORIES = [
"Kategorija:Naselja_u_Primorsko-goranskoj_županiji",
"Kategorija:Naselja_u_Hrvatskoj_(otok_Krk)",
"Kategorija:Naselja_u_Hrvatskoj_(otok_Cres)",
"Kategorija:Naselja_u_Hrvatskoj_(otok_Lošinj)",
"Kategorija:Naselja_u_Hrvatskoj_(otok_Rab)",
"Kategorija:Gorski_kotar",
]
def main():
conn = psycopg2.connect(DSN); conn.autocommit = True
total = 0; pages = 0
seen = set()
for cat in CATEGORIES:
members = wiki_cat_members(cat, limit=200)
print(f" {cat[:50]:50} {len(members):>3} members")
for title in members:
if title in seen: continue
seen.add(title)
text = wiki_extract(title)
if not text or len(text) < 200: continue
pages += 1
facts = [{"fact": c, "url": f"https://hr.wikipedia.org/wiki/{quote(title)}",
"title": title}
for c in chunk_text(text, 600) if len(c) > 100]
n = upsert_facts(conn, facts, source_name="wikipedia_pgz_naselja",
category="naselja_pgz", confidence=0.86)
total += n
time.sleep(0.3)
conn.close()
print(f"=== TOTAL pages={pages} facts={total} ===")
print(json.dumps({"pages": pages, "total_facts": total}))
if __name__ == "__main__":
main()