diff --git a/pgz_sport_v2_router.py b/pgz_sport_v2_router.py index 2768be4..c0a0553 100644 --- a/pgz_sport_v2_router.py +++ b/pgz_sport_v2_router.py @@ -4959,8 +4959,24 @@ def proracun_sport(godina: int = None): # ═══════════════════════════════════════════════════════ # POTPORE — by year filter # ═══════════════════════════════════════════════════════ + + +@router.get("/potpore/meta") +def potpore_meta(): + """Dropdown options za Financije sekciju.""" + sportovi = db_query("SELECT DISTINCT sport FROM pgz_sport.sufinanciranje_sport WHERE sport IS NOT NULL ORDER BY sport") + vrste = db_query("SELECT DISTINCT vrsta FROM pgz_sport.sufinanciranje_sport WHERE vrsta IS NOT NULL ORDER BY vrsta") + davatelji = db_query("SELECT DISTINCT izvor, count(*) AS broj FROM pgz_sport.sufinanciranje_sport WHERE izvor IS NOT NULL GROUP BY izvor ORDER BY broj DESC") + godine = db_query("SELECT DISTINCT godina, count(*) AS broj, sum(iznos_eur)::numeric(12,2) AS suma FROM pgz_sport.sufinanciranje_sport GROUP BY godina ORDER BY godina DESC") + return { + "sportovi": [r["sport"] for r in sportovi], + "vrste": [r["vrsta"] for r in vrste], + "davatelji": [r["izvor"] for r in davatelji], + "godine": godine, + } + @router.get("/potpore/by-year") -def potpore_by_year(godina: int = None, q: str = "", samo_klubovi: bool = True, davatelj: str = None): +def potpore_by_year(godina: int = None, q: str = "", samo_klubovi: bool = True, davatelj: str = None, sport: str = None, vrsta: str = None): """Sufinanciranje za specifičnu godinu — samo_klubovi=True izbacuje programe/totals/services.""" import datetime yr = godina or datetime.date.today().year @@ -4972,6 +4988,12 @@ def potpore_by_year(godina: int = None, q: str = "", samo_klubovi: bool = True, if samo_klubovi: where.append("(je_klub IS NULL OR je_klub = true)") + if sport: + where.append("LOWER(sport) = LOWER(%s)") + params.append(sport) + if vrsta: + where.append("LOWER(vrsta) = LOWER(%s)") + params.append(vrsta) if davatelj == 'rijeka': where.append("izvor ILIKE '%%rijeka.hr%%'") elif davatelj == 'pgz': @@ -5952,3 +5974,46 @@ def v2_clan_hns_profile(clan_id: int): "hns_url": hns_url, } + +@router.get("/manifestacije/meta") +def manifestacije_meta(): + """Dropdown options za manifestacije.""" + mjesta = db_query("SELECT DISTINCT mjesto, count(*) AS broj FROM pgz_sport.manifestacije WHERE mjesto IS NOT NULL GROUP BY mjesto ORDER BY broj DESC LIMIT 100") + razine = db_query("SELECT DISTINCT razina FROM pgz_sport.manifestacije WHERE razina IS NOT NULL ORDER BY razina") + organizatori = db_query("SELECT DISTINCT organizator, count(*) AS broj FROM pgz_sport.manifestacije WHERE organizator IS NOT NULL GROUP BY organizator ORDER BY broj DESC LIMIT 50") + return { + "mjesta": [r["mjesto"] for r in mjesta], + "razine": [r["razina"] for r in razine], + "organizatori": [r["organizator"] for r in organizatori], + } + +@router.get("/manifestacije") +def manifestacije_list(mjesto: str = None, razina: str = None, organizator: str = None, q: str = None, limit: int = 200): + """Lista manifestacija s filterima.""" + where = ["aktivna = true"] + params = [] + if mjesto: + where.append("mjesto = %s") + params.append(mjesto) + if razina: + where.append("razina = %s") + params.append(razina) + if organizator: + where.append("organizator ILIKE %s") + params.append(f"%{organizator}%") + if q: + where.append("(naziv ILIKE %s OR napomena ILIKE %s)") + params.extend([f"%{q}%", f"%{q}%"]) + + rows = db_query(f""" + SELECT m.id, m.naziv, m.mjesto, m.organizator, m.razina, m.broj_ucesnika, + m.godina_od, m.spol_kategorija, m.napomena, m.source_url, + s.naziv AS savez_naziv, s.id AS savez_id + FROM pgz_sport.manifestacije m + LEFT JOIN pgz_sport.savezi s ON s.id = m.savez_id + WHERE {' AND '.join(where)} + ORDER BY m.naziv + LIMIT %s + """, params + [limit]) + return {"count": len(rows), "rows": rows} + diff --git a/scrapers/harvesters/eu_projekti_pgz.py b/scrapers/harvesters/eu_projekti_pgz.py new file mode 100644 index 0000000..b3470dc --- /dev/null +++ b/scrapers/harvesters/eu_projekti_pgz.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +"""EU projekti i fondovi PGZ.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +EU = { + "eu_fondovi_pgz": ["https://strukturnifondovi.hr/"], + "ri_eu_kreativnost": ["https://rijeka2020.eu/"], + "rijeka_smartcity": ["https://www.rijeka.hr/smart-city/"], + "agencija_unutarnji": ["https://www.amenita.hr/"], + "ri_lokalna_akcijska": ["https://lag-rijeka.hr/"], + "agencija_jadranska": ["https://www.adriatic-ionian.eu/"], + "interreg_kvarner": ["https://www.italy-croatia.eu/"], + "horizon_pgz": ["https://horizon-europe.gov.hr/"], + "epro_kvarner": ["https://www.epro-kvarner.hr/"], + "leader_pgz": ["https://www.leader-rijeka.hr/"], +} + + +def crawl(name, urls, max_pages=10): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=15) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 200: continue + ff = [] + if title and len(title) > 8: + ff.append({"fact": f"{name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 800): + if len(c) > 100: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="eu_projekti_pgz", confidence=0.84) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 25: + queue.append(link) + time.sleep(0.5) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in EU.items(): + try: + r = crawl(name, urls, max_pages=10) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"eu_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/scrapers/harvesters/gov_deep_pgz.py b/scrapers/harvesters/gov_deep_pgz.py new file mode 100644 index 0000000..4ccd121 --- /dev/null +++ b/scrapers/harvesters/gov_deep_pgz.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +"""Gov.hr deep — DZS PxWeb, sudreg portal, sukobinteresa.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +GOV = { + "dzs_kvarner": ["https://podaci.dzs.hr/hr/podaci/stanovnistvo/popis-stanovnistva/"], + "sukobinteresa_pgz": ["https://www.sukobinteresa.hr/"], + "drzavna_revizija": ["https://www.revizija.hr/"], + "drzavna_komisija_kn": ["https://www.dkom.hr/"], + "fina_pgz": ["https://www.fina.hr/poslovni-subjekti"], + "fina_javne_objave": ["https://www.fina.hr/javne-objave-uprave"], + "porezna_uprava_ri": ["https://www.porezna-uprava.hr/"], + "ministarstvo_pgz": ["https://www.gov.hr/hr/ministarstva"], + "vlada_hr_pgz": ["https://vlada.gov.hr/"], + "uprava_polic_pgz": ["https://policija.gov.hr/policijska-uprava-primorsko-goranska/"], + "vis_hr": ["https://www.vis-hr.com/"], +} + + +def crawl(name, urls, max_pages=10): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=15) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 200: continue + ff = [] + if title and len(title) > 8: + ff.append({"fact": f"{name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 800): + if len(c) > 100: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="gov_registri_pgz", confidence=0.88) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 30: + queue.append(link) + time.sleep(0.5) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in GOV.items(): + try: + r = crawl(name, urls, max_pages=10) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"gov_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/scrapers/harvesters/hoteli_pgz.py b/scrapers/harvesters/hoteli_pgz.py new file mode 100644 index 0000000..4f908e1 --- /dev/null +++ b/scrapers/harvesters/hoteli_pgz.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +"""Hoteli, restorani, smjestaj PGZ.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +HOTELS = { + "milenij_hoteli": ["https://www.milenijhoteli.hr/"], + "uniline_kvarner": ["https://www.uniline.hr/destinacije/kvarner"], + "ri_botel": ["https://www.botel-marina.hr/"], + "jadran_hoteli": ["https://www.jadran-crikvenica.hr/"], + "valamar_kvarner": ["https://www.valamar.com/hr/odredista/kvarner"], + "kongres_centar": ["https://www.adriatic-grandhotel.com/"], + "lovran_riviera": ["https://www.liburnia.hr/"], + "remisens_grupa": ["https://www.remisens.com/"], + "imperijal_opatija": ["https://www.amadriapark.com/"], + "hotel_neboder": ["https://www.jadran-hoteli.hr/"], + "kanjon_zrmanja": ["https://www.kvarnertravel.hr/"], +} + + +def crawl(name, urls, max_pages=8): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=15) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 200: continue + ff = [] + if title and len(title) > 8: + ff.append({"fact": f"{name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 800): + if len(c) > 100: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="hoteli_pgz", confidence=0.83) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 25: + queue.append(link) + time.sleep(0.5) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in HOTELS.items(): + try: + r = crawl(name, urls, max_pages=8) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"hotel_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/scrapers/harvesters/politika_pgz.py b/scrapers/harvesters/politika_pgz.py new file mode 100644 index 0000000..e4e848a --- /dev/null +++ b/scrapers/harvesters/politika_pgz.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +"""Politicke stranke i skupstine PGZ.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +POL = { + "skupstina_pgz": ["https://www.pgz.hr/skupstina-zupanije/"], + "rijeka_gradsko_vije": ["https://www.rijeka.hr/gradsko-vijece/"], + "stranka_lkr_pgz": ["https://www.lista-rijeka.com/"], + "sdp_rijeka": ["https://www.sdp.hr/podruznice/sdp-rijeka/"], + "hdz_pgz": ["https://hdz.hr/zupanije/primorsko-goranska/"], + "most_pgz": ["https://most.hr/regionalni-uredi"], + "akcija_mladih": ["https://akcijamladih.hr/"], + "rastimo_zajedno": ["https://rastimozajedno.hr/"], + "javnost_pgz": ["https://www.izbori.hr/"], + "zupan_obrenovic": ["https://www.pgz.hr/zupan/"], + "gradonacelnica_iv_r": ["https://www.rijeka.hr/gradonacelnica/"], +} + + +def crawl(name, urls, max_pages=10): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=15) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 200: continue + ff = [] + if title and len(title) > 8: + ff.append({"fact": f"{name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 800): + if len(c) > 100: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="politika_pgz", confidence=0.85) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 30: + queue.append(link) + time.sleep(0.5) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in POL.items(): + try: + r = crawl(name, urls, max_pages=10) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"pol_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/static/sport2.html b/static/sport2.html index 085eacd..b1b58ed 100644 --- a/static/sport2.html +++ b/static/sport2.html @@ -1397,6 +1397,7 @@ function applySaveziFilter(){ else if(fKat==='gradski') rows = rows.filter(s => /gradsk/i.test(s.razina||'')); if(pgz==='1') rows = rows.filter(s => s.pgz_relevant); if(_sort.savezi) rows = sortRows(rows, _sort.savezi.key, _sort.savezi.dir); + _filtersUpdateCount('savezi', rows.length); $('#sav-cnt').textContent = rows.length+' saveza'; $('#sav-out').innerHTML = _state.viewSavezi==='card' ? renderSaveziGrid(rows) : renderSaveziTable(rows); } @@ -1887,13 +1888,28 @@ async function loadSportasi(){ const root = $('#pg-sportasi'); if(!_cache.clanovi){ root.innerHTML = '