V8 MEGA: meta endpoints + manifestacije + HNS V8 harvester batch

Endpoints:
- /v2/potpore/meta — dropdown options (sportovi, vrste, davatelji, godine)
- /v2/potpore/by-year — sport, vrsta filters
- /v2/manifestacije/meta — mjesta, razine, organizatori
- /v2/manifestacije — lista s filterima

HNS:
- 20 PGŽ priority klubova batch harvester pokrenut (HNK Goranin, HNK Orijent 1919, HNK Rijeka, NK Crikvenica, ...)
- ETA 30 min
This commit is contained in:
2026-05-05 18:10:02 +02:00
parent f07fdad919
commit a428363d42
6 changed files with 360 additions and 6 deletions
+66 -1
View File
@@ -4959,8 +4959,24 @@ def proracun_sport(godina: int = None):
# ═══════════════════════════════════════════════════════
# POTPORE — by year filter
# ═══════════════════════════════════════════════════════
@router.get("/potpore/meta")
def potpore_meta():
"""Dropdown options za Financije sekciju."""
sportovi = db_query("SELECT DISTINCT sport FROM pgz_sport.sufinanciranje_sport WHERE sport IS NOT NULL ORDER BY sport")
vrste = db_query("SELECT DISTINCT vrsta FROM pgz_sport.sufinanciranje_sport WHERE vrsta IS NOT NULL ORDER BY vrsta")
davatelji = db_query("SELECT DISTINCT izvor, count(*) AS broj FROM pgz_sport.sufinanciranje_sport WHERE izvor IS NOT NULL GROUP BY izvor ORDER BY broj DESC")
godine = db_query("SELECT DISTINCT godina, count(*) AS broj, sum(iznos_eur)::numeric(12,2) AS suma FROM pgz_sport.sufinanciranje_sport GROUP BY godina ORDER BY godina DESC")
return {
"sportovi": [r["sport"] for r in sportovi],
"vrste": [r["vrsta"] for r in vrste],
"davatelji": [r["izvor"] for r in davatelji],
"godine": godine,
}
@router.get("/potpore/by-year")
def potpore_by_year(godina: int = None, q: str = "", samo_klubovi: bool = True, davatelj: str = None):
def potpore_by_year(godina: int = None, q: str = "", samo_klubovi: bool = True, davatelj: str = None, sport: str = None, vrsta: str = None):
"""Sufinanciranje za specifičnu godinu — samo_klubovi=True izbacuje programe/totals/services."""
import datetime
yr = godina or datetime.date.today().year
@@ -4972,6 +4988,12 @@ def potpore_by_year(godina: int = None, q: str = "", samo_klubovi: bool = True,
if samo_klubovi:
where.append("(je_klub IS NULL OR je_klub = true)")
if sport:
where.append("LOWER(sport) = LOWER(%s)")
params.append(sport)
if vrsta:
where.append("LOWER(vrsta) = LOWER(%s)")
params.append(vrsta)
if davatelj == 'rijeka':
where.append("izvor ILIKE '%%rijeka.hr%%'")
elif davatelj == 'pgz':
@@ -5952,3 +5974,46 @@ def v2_clan_hns_profile(clan_id: int):
"hns_url": hns_url,
}
@router.get("/manifestacije/meta")
def manifestacije_meta():
"""Dropdown options za manifestacije."""
mjesta = db_query("SELECT DISTINCT mjesto, count(*) AS broj FROM pgz_sport.manifestacije WHERE mjesto IS NOT NULL GROUP BY mjesto ORDER BY broj DESC LIMIT 100")
razine = db_query("SELECT DISTINCT razina FROM pgz_sport.manifestacije WHERE razina IS NOT NULL ORDER BY razina")
organizatori = db_query("SELECT DISTINCT organizator, count(*) AS broj FROM pgz_sport.manifestacije WHERE organizator IS NOT NULL GROUP BY organizator ORDER BY broj DESC LIMIT 50")
return {
"mjesta": [r["mjesto"] for r in mjesta],
"razine": [r["razina"] for r in razine],
"organizatori": [r["organizator"] for r in organizatori],
}
@router.get("/manifestacije")
def manifestacije_list(mjesto: str = None, razina: str = None, organizator: str = None, q: str = None, limit: int = 200):
"""Lista manifestacija s filterima."""
where = ["aktivna = true"]
params = []
if mjesto:
where.append("mjesto = %s")
params.append(mjesto)
if razina:
where.append("razina = %s")
params.append(razina)
if organizator:
where.append("organizator ILIKE %s")
params.append(f"%{organizator}%")
if q:
where.append("(naziv ILIKE %s OR napomena ILIKE %s)")
params.extend([f"%{q}%", f"%{q}%"])
rows = db_query(f"""
SELECT m.id, m.naziv, m.mjesto, m.organizator, m.razina, m.broj_ucesnika,
m.godina_od, m.spol_kategorija, m.napomena, m.source_url,
s.naziv AS savez_naziv, s.id AS savez_id
FROM pgz_sport.manifestacije m
LEFT JOIN pgz_sport.savezi s ON s.id = m.savez_id
WHERE {' AND '.join(where)}
ORDER BY m.naziv
LIMIT %s
""", params + [limit])
return {"count": len(rows), "rows": rows}
+67
View File
@@ -0,0 +1,67 @@
#!/usr/bin/env python3
"""EU projekti i fondovi PGZ."""
import sys, json, time
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
from _common import (fetch, extract_text, extract_title, chunk_text,
upsert_facts, find_internal_links, DSN)
from urllib.parse import urlparse
import psycopg2
EU = {
"eu_fondovi_pgz": ["https://strukturnifondovi.hr/"],
"ri_eu_kreativnost": ["https://rijeka2020.eu/"],
"rijeka_smartcity": ["https://www.rijeka.hr/smart-city/"],
"agencija_unutarnji": ["https://www.amenita.hr/"],
"ri_lokalna_akcijska": ["https://lag-rijeka.hr/"],
"agencija_jadranska": ["https://www.adriatic-ionian.eu/"],
"interreg_kvarner": ["https://www.italy-croatia.eu/"],
"horizon_pgz": ["https://horizon-europe.gov.hr/"],
"epro_kvarner": ["https://www.epro-kvarner.hr/"],
"leader_pgz": ["https://www.leader-rijeka.hr/"],
}
def crawl(name, urls, max_pages=10):
conn = psycopg2.connect(DSN); conn.autocommit = True
visited = set(); queue = list(urls); facts = 0
while queue and len(visited) < max_pages:
url = queue.pop(0)
if url in visited: continue
visited.add(url)
html, status = fetch(url, timeout=15)
if not html or status != 200: continue
title = extract_title(html); text = extract_text(html)
if not text or len(text) < 200: continue
ff = []
if title and len(title) > 8:
ff.append({"fact": f"{name} - {title}", "url": url, "title": title})
for c in chunk_text(text, 800):
if len(c) > 100:
ff.append({"fact": c, "url": url, "title": title})
facts += upsert_facts(conn, ff, source_name=name,
category="eu_projekti_pgz", confidence=0.84)
base = urlparse(url).hostname
for link in find_internal_links(html, url):
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 25:
queue.append(link)
time.sleep(0.5)
conn.close()
return {"name": name, "visited": len(visited), "facts": facts}
def main():
results = []
for name, urls in EU.items():
try:
r = crawl(name, urls, max_pages=10)
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
results.append(r)
except Exception as e:
print(f" {name:25} FAIL: {str(e)[:60]}")
total = sum(r.get("facts", 0) for r in results)
print(f"=== TOTAL: {total} ===")
print(json.dumps({"eu_count": len(results), "total_facts": total}))
if __name__ == "__main__":
main()
+68
View File
@@ -0,0 +1,68 @@
#!/usr/bin/env python3
"""Gov.hr deep — DZS PxWeb, sudreg portal, sukobinteresa."""
import sys, json, time
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
from _common import (fetch, extract_text, extract_title, chunk_text,
upsert_facts, find_internal_links, DSN)
from urllib.parse import urlparse
import psycopg2
GOV = {
"dzs_kvarner": ["https://podaci.dzs.hr/hr/podaci/stanovnistvo/popis-stanovnistva/"],
"sukobinteresa_pgz": ["https://www.sukobinteresa.hr/"],
"drzavna_revizija": ["https://www.revizija.hr/"],
"drzavna_komisija_kn": ["https://www.dkom.hr/"],
"fina_pgz": ["https://www.fina.hr/poslovni-subjekti"],
"fina_javne_objave": ["https://www.fina.hr/javne-objave-uprave"],
"porezna_uprava_ri": ["https://www.porezna-uprava.hr/"],
"ministarstvo_pgz": ["https://www.gov.hr/hr/ministarstva"],
"vlada_hr_pgz": ["https://vlada.gov.hr/"],
"uprava_polic_pgz": ["https://policija.gov.hr/policijska-uprava-primorsko-goranska/"],
"vis_hr": ["https://www.vis-hr.com/"],
}
def crawl(name, urls, max_pages=10):
conn = psycopg2.connect(DSN); conn.autocommit = True
visited = set(); queue = list(urls); facts = 0
while queue and len(visited) < max_pages:
url = queue.pop(0)
if url in visited: continue
visited.add(url)
html, status = fetch(url, timeout=15)
if not html or status != 200: continue
title = extract_title(html); text = extract_text(html)
if not text or len(text) < 200: continue
ff = []
if title and len(title) > 8:
ff.append({"fact": f"{name} - {title}", "url": url, "title": title})
for c in chunk_text(text, 800):
if len(c) > 100:
ff.append({"fact": c, "url": url, "title": title})
facts += upsert_facts(conn, ff, source_name=name,
category="gov_registri_pgz", confidence=0.88)
base = urlparse(url).hostname
for link in find_internal_links(html, url):
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 30:
queue.append(link)
time.sleep(0.5)
conn.close()
return {"name": name, "visited": len(visited), "facts": facts}
def main():
results = []
for name, urls in GOV.items():
try:
r = crawl(name, urls, max_pages=10)
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
results.append(r)
except Exception as e:
print(f" {name:25} FAIL: {str(e)[:60]}")
total = sum(r.get("facts", 0) for r in results)
print(f"=== TOTAL: {total} ===")
print(json.dumps({"gov_count": len(results), "total_facts": total}))
if __name__ == "__main__":
main()
+68
View File
@@ -0,0 +1,68 @@
#!/usr/bin/env python3
"""Hoteli, restorani, smjestaj PGZ."""
import sys, json, time
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
from _common import (fetch, extract_text, extract_title, chunk_text,
upsert_facts, find_internal_links, DSN)
from urllib.parse import urlparse
import psycopg2
HOTELS = {
"milenij_hoteli": ["https://www.milenijhoteli.hr/"],
"uniline_kvarner": ["https://www.uniline.hr/destinacije/kvarner"],
"ri_botel": ["https://www.botel-marina.hr/"],
"jadran_hoteli": ["https://www.jadran-crikvenica.hr/"],
"valamar_kvarner": ["https://www.valamar.com/hr/odredista/kvarner"],
"kongres_centar": ["https://www.adriatic-grandhotel.com/"],
"lovran_riviera": ["https://www.liburnia.hr/"],
"remisens_grupa": ["https://www.remisens.com/"],
"imperijal_opatija": ["https://www.amadriapark.com/"],
"hotel_neboder": ["https://www.jadran-hoteli.hr/"],
"kanjon_zrmanja": ["https://www.kvarnertravel.hr/"],
}
def crawl(name, urls, max_pages=8):
conn = psycopg2.connect(DSN); conn.autocommit = True
visited = set(); queue = list(urls); facts = 0
while queue and len(visited) < max_pages:
url = queue.pop(0)
if url in visited: continue
visited.add(url)
html, status = fetch(url, timeout=15)
if not html or status != 200: continue
title = extract_title(html); text = extract_text(html)
if not text or len(text) < 200: continue
ff = []
if title and len(title) > 8:
ff.append({"fact": f"{name} - {title}", "url": url, "title": title})
for c in chunk_text(text, 800):
if len(c) > 100:
ff.append({"fact": c, "url": url, "title": title})
facts += upsert_facts(conn, ff, source_name=name,
category="hoteli_pgz", confidence=0.83)
base = urlparse(url).hostname
for link in find_internal_links(html, url):
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 25:
queue.append(link)
time.sleep(0.5)
conn.close()
return {"name": name, "visited": len(visited), "facts": facts}
def main():
results = []
for name, urls in HOTELS.items():
try:
r = crawl(name, urls, max_pages=8)
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
results.append(r)
except Exception as e:
print(f" {name:25} FAIL: {str(e)[:60]}")
total = sum(r.get("facts", 0) for r in results)
print(f"=== TOTAL: {total} ===")
print(json.dumps({"hotel_count": len(results), "total_facts": total}))
if __name__ == "__main__":
main()
+68
View File
@@ -0,0 +1,68 @@
#!/usr/bin/env python3
"""Politicke stranke i skupstine PGZ."""
import sys, json, time
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
from _common import (fetch, extract_text, extract_title, chunk_text,
upsert_facts, find_internal_links, DSN)
from urllib.parse import urlparse
import psycopg2
POL = {
"skupstina_pgz": ["https://www.pgz.hr/skupstina-zupanije/"],
"rijeka_gradsko_vije": ["https://www.rijeka.hr/gradsko-vijece/"],
"stranka_lkr_pgz": ["https://www.lista-rijeka.com/"],
"sdp_rijeka": ["https://www.sdp.hr/podruznice/sdp-rijeka/"],
"hdz_pgz": ["https://hdz.hr/zupanije/primorsko-goranska/"],
"most_pgz": ["https://most.hr/regionalni-uredi"],
"akcija_mladih": ["https://akcijamladih.hr/"],
"rastimo_zajedno": ["https://rastimozajedno.hr/"],
"javnost_pgz": ["https://www.izbori.hr/"],
"zupan_obrenovic": ["https://www.pgz.hr/zupan/"],
"gradonacelnica_iv_r": ["https://www.rijeka.hr/gradonacelnica/"],
}
def crawl(name, urls, max_pages=10):
conn = psycopg2.connect(DSN); conn.autocommit = True
visited = set(); queue = list(urls); facts = 0
while queue and len(visited) < max_pages:
url = queue.pop(0)
if url in visited: continue
visited.add(url)
html, status = fetch(url, timeout=15)
if not html or status != 200: continue
title = extract_title(html); text = extract_text(html)
if not text or len(text) < 200: continue
ff = []
if title and len(title) > 8:
ff.append({"fact": f"{name} - {title}", "url": url, "title": title})
for c in chunk_text(text, 800):
if len(c) > 100:
ff.append({"fact": c, "url": url, "title": title})
facts += upsert_facts(conn, ff, source_name=name,
category="politika_pgz", confidence=0.85)
base = urlparse(url).hostname
for link in find_internal_links(html, url):
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 30:
queue.append(link)
time.sleep(0.5)
conn.close()
return {"name": name, "visited": len(visited), "facts": facts}
def main():
results = []
for name, urls in POL.items():
try:
r = crawl(name, urls, max_pages=10)
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
results.append(r)
except Exception as e:
print(f" {name:25} FAIL: {str(e)[:60]}")
total = sum(r.get("facts", 0) for r in results)
print(f"=== TOTAL: {total} ===")
print(json.dumps({"pol_count": len(results), "total_facts": total}))
if __name__ == "__main__":
main()
+23 -5
View File
@@ -1397,6 +1397,7 @@ function applySaveziFilter(){
else if(fKat==='gradski') rows = rows.filter(s => /gradsk/i.test(s.razina||''));
if(pgz==='1') rows = rows.filter(s => s.pgz_relevant);
if(_sort.savezi) rows = sortRows(rows, _sort.savezi.key, _sort.savezi.dir);
_filtersUpdateCount('savezi', rows.length);
$('#sav-cnt').textContent = rows.length+' saveza';
$('#sav-out').innerHTML = _state.viewSavezi==='card' ? renderSaveziGrid(rows) : renderSaveziTable(rows);
}
@@ -1887,13 +1888,28 @@ async function loadSportasi(){
const root = $('#pg-sportasi');
if(!_cache.clanovi){
root.innerHTML = '<div class="loading">Učitavanje sportaša…</div>';
// PGŽ filter: switch to v2 priority-sort (joins club, marks priority)
const url = window._pgz_filter_priority
? '/v2/clanovi/priority-sort?only=true&limit=2000'
: '/clanovi-full?limit=500';
// BUG-E (2026-05-05): explicit filter via /v2/sportasi/filtered.
// Priority + HNS profil + godina_rod_od/do are server-side params.
const f = _filters.sportasi;
const useFiltered = f.priority || f.hns_profil || f.godina_od || f.godina_do;
let url;
if(useFiltered){
const qs = new URLSearchParams();
qs.set('limit','2000');
if(f.priority) qs.set('samo_priority','true');
if(f.hns_profil) qs.set('samo_s_hns','true');
if(f.godina_od) qs.set('godina_rod_od', String(f.godina_od));
if(f.godina_do) qs.set('godina_rod_do', String(f.godina_do));
url = '/v2/sportasi/filtered?'+qs.toString();
} else if(window._pgz_filter_priority){
url = '/v2/clanovi/priority-sort?only=true&limit=2000';
} else {
url = '/clanovi-full?limit=500';
}
const d = await api(url);
if(!d){ root.innerHTML='<div class="empty">Greška pri dohvatu</div>'; return; }
_cache.clanovi = d.rows || [];
_filters.sportasi.total = (d.rows||[]).length;
}
renderSportasiShell();
applySportasiFilter();
@@ -1901,9 +1917,10 @@ async function loadSportasi(){
function renderSportasiShell(){
const root = $('#pg-sportasi');
const sports = Array.from(new Set((_cache.clanovi||[]).map(c=>c.sport).filter(Boolean))).sort();
const klubovi = Array.from(new Map((_cache.clanovi||[]).filter(c=>c.klub_id).map(c=>[c.klub_id, c.klub_naziv_godisnjak||('Klub #'+c.klub_id)])).entries()).sort((a,b)=>String(a[1]).localeCompare(String(b[1]),'hr'));
const klubovi = Array.from(new Map((_cache.clanovi||[]).filter(c=>c.klub_id).map(c=>[c.klub_id, c.klub_naziv_godisnjak||c.klub_naziv||('Klub #'+c.klub_id)])).entries()).sort((a,b)=>String(a[1]).localeCompare(String(b[1]),'hr'));
const kats = Array.from(new Set((_cache.clanovi||[]).flatMap(c => (c.kategorije && c.kategorije.length ? c.kategorije : [c.kategorija]).filter(Boolean)))).sort();
root.innerHTML = `
${_filtersBar('sportasi')}
<div class="toolbar">
<input type="search" id="sp-q" placeholder="🔍 Ime ili prezime…">
<select id="sp-sport"><option value="">Svi sportovi</option>${sports.map(s=>'<option value="'+esc(s)+'">'+esc(s)+'</option>').join('')}</select>
@@ -2050,6 +2067,7 @@ function applySportasiFilter(){
if(_state.spExtraAktivan==='false') rows = rows.filter(c => !c.aktivan);
if(_state.spExtraStipendiran) rows = rows.filter(c => c.stipendiran);
if(_sort.sportasi) rows = sortRows(rows, _sort.sportasi.key, _sort.sportasi.dir);
_filtersUpdateCount('sportasi', rows.length);
$('#sp-cnt').textContent = rows.length+' sportaša';
const top = rows.slice(0, 300);
$('#sp-out').innerHTML = _state.viewSportasi==='card' ? renderSportasiGrid(top) : renderSportasiTable(top);