diff --git a/.claude/worktrees/agent-a2230c7d02a7c02f4 b/.claude/worktrees/agent-a2230c7d02a7c02f4 index 8127e2e..f488623 160000 --- a/.claude/worktrees/agent-a2230c7d02a7c02f4 +++ b/.claude/worktrees/agent-a2230c7d02a7c02f4 @@ -1 +1 @@ -Subproject commit 8127e2ef2220092d36dc6f55f77db92f9b7cbfc3 +Subproject commit f4886239201d9f2cfe38272347139c85de3cce16 diff --git a/.claude/worktrees/agent-a54ff6ad4250d2734 b/.claude/worktrees/agent-a54ff6ad4250d2734 index 8127e2e..38383d0 160000 --- a/.claude/worktrees/agent-a54ff6ad4250d2734 +++ b/.claude/worktrees/agent-a54ff6ad4250d2734 @@ -1 +1 @@ -Subproject commit 8127e2ef2220092d36dc6f55f77db92f9b7cbfc3 +Subproject commit 38383d07c5e4fe794355cfae6ddf3ec6533f1b09 diff --git a/.claude/worktrees/agent-a70769f0db14302aa b/.claude/worktrees/agent-a70769f0db14302aa index 8127e2e..55a27fb 160000 --- a/.claude/worktrees/agent-a70769f0db14302aa +++ b/.claude/worktrees/agent-a70769f0db14302aa @@ -1 +1 @@ -Subproject commit 8127e2ef2220092d36dc6f55f77db92f9b7cbfc3 +Subproject commit 55a27fb315bb2f779d6351a9437a67397a6d1300 diff --git a/.claude/worktrees/agent-af39fdf2dbfd08afe b/.claude/worktrees/agent-af39fdf2dbfd08afe index 8127e2e..efa15d0 160000 --- a/.claude/worktrees/agent-af39fdf2dbfd08afe +++ b/.claude/worktrees/agent-af39fdf2dbfd08afe @@ -1 +1 @@ -Subproject commit 8127e2ef2220092d36dc6f55f77db92f9b7cbfc3 +Subproject commit efa15d00862e1a48e8423daf62165127869ef753 diff --git a/pgz_sport_api.py b/pgz_sport_api.py index 9675e9b..acde47c 100644 --- a/pgz_sport_api.py +++ b/pgz_sport_api.py @@ -2835,6 +2835,56 @@ def auth_me_v2_alias(authorization: str = Header(None)): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/v2/sportski-objekti") +def sportski_objekti_v2_list(tip: str = None, grad: str = None, sport: str = None, q: str = None, limit: int = 500): + """Sportski objekti PGŽ s filterima.""" + where = ["aktivan = true"] + params = [] + if tip: + where.append("tip = %s"); params.append(tip) + if grad: + where.append("grad = %s"); params.append(grad) + if sport: + where.append("%s = ANY(sportovi)"); params.append(sport) + if q: + where.append("(naziv ILIKE %s OR adresa ILIKE %s OR upravitelj ILIKE %s)") + params.extend([f"%{q}%"]*3) + + rows = fetch(f""" + SELECT id, naziv, tip, grad, adresa, lat, lng, upravitelj, kapacitet, + sportovi, izgradeno, obnovljeno_god, "veličina" AS velicina, natkrita, + napomena, web + FROM pgz_sport.sportski_objekti + WHERE {' AND '.join(where)} + ORDER BY grad, naziv + LIMIT %s + """, tuple(params) + (limit,)) + return {"count": len(rows), "rows": rows} + + +@app.get("/api/v2/sportski-objekti/meta") +def sportski_objekti_meta(): + """Dropdown options za filter.""" + tipovi = fetch("SELECT tip, count(*) AS broj FROM pgz_sport.sportski_objekti WHERE aktivan = true AND tip IS NOT NULL GROUP BY tip ORDER BY broj DESC") + gradovi = fetch("SELECT grad, count(*) AS broj FROM pgz_sport.sportski_objekti WHERE aktivan = true AND grad IS NOT NULL GROUP BY grad ORDER BY broj DESC") + sportovi = fetch("SELECT DISTINCT unnest(sportovi) AS sport, count(*) AS broj FROM pgz_sport.sportski_objekti WHERE aktivan = true AND sportovi IS NOT NULL GROUP BY sport ORDER BY broj DESC LIMIT 50") + return { + "tipovi": tipovi, + "gradovi": gradovi, + "sportovi": sportovi, + "ukupno": (fetch("SELECT count(*) AS n FROM pgz_sport.sportski_objekti WHERE aktivan = true")[0])["n"] + } + + +@app.get("/objekti") +@app.get("/objekti/") +@app.get("/sport/objekti") +@app.get("/sport/objekti/") +def serve_objekti(): + from fastapi.responses import FileResponse + return FileResponse("/opt/pgz-sport/static/objekti.html") + @app.get("/") def root(request: Request): host = request.headers.get("host", "") diff --git a/scrapers/harvesters/akademski_pgz.py b/scrapers/harvesters/akademski_pgz.py new file mode 100644 index 0000000..2101121 --- /dev/null +++ b/scrapers/harvesters/akademski_pgz.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +"""UNIRI akademski repozitorij + znanstveni radovi.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +ACADEMIC = { + "uniri_repozitorij": ["https://repozitorij.uniri.hr/"], + "portal_znanstveni": ["https://portal.uniri.hr/"], + "hrčak_uniri": ["https://hrcak.srce.hr/"], + "pfri_radovi": ["https://repository.pfri.uniri.hr/"], + "medri_radovi": ["https://medri.uniri.hr/znanstveni-radovi/"], + "tfr_radovi": ["https://www.riteh.uniri.hr/"], + "ffri_radovi": ["https://www.ffri.uniri.hr/znanstveni-radovi/"], +} + + +def crawl(name, urls, max_pages=15): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=20) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 300: continue + ff = [] + if title and len(title) > 15: + ff.append({"fact": f"[Academic] {name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 900): + if len(c) > 150: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="akademski_pgz", confidence=0.90) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 40: + queue.append(link) + time.sleep(0.7) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in ACADEMIC.items(): + try: + r = crawl(name, urls, max_pages=12) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"academic_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/scrapers/harvesters/commerce_pgz.py b/scrapers/harvesters/commerce_pgz.py new file mode 100644 index 0000000..6dafb5d --- /dev/null +++ b/scrapers/harvesters/commerce_pgz.py @@ -0,0 +1,67 @@ +#!/usr%bin/env python3 +"""Lokalne firme i obrti PGŽ — HGK/HOK članovi.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +COMMERCE = { + "hgk_rijeka_deep": ["https://www.hgk.hr/zupanijske-komore/primorsko-goranska-zupanijska-komora"], + "hok_pgz": ["https://www.hok.hr/"], + "poduzetnistvo_ri": ["https://www.pgz.hr/gospodarstvo/"], + "poslovni_inkubator": ["https://www.step-ri.hr/"], + "izvoz_import_pgz": ["https://www.hgk.hr/izvoz-uvoz"], + "tehnopolis_firme": ["https://www.tehnopolis.hr/"], + "start_up_ri": ["https://www.startup-rijeka.hr/"], + "obrtnička_komora": ["https://www.hok.hr/pgz"], + "tz_pgz_biznis": ["https://www.kvarner.hr/biznis"], + "free_zone_rijeka": ["https://www.rfind.hr/"], +} + + +def crawl(name, urls, max_pages=12): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=15) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 200: continue + ff = [] + if title and len(title) > 8: + ff.append({"fact": f"{name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 800): + if len(c) > 100: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="commerce_pgz", confidence=0.84) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 35: + queue.append(link) + time.sleep(0.5) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in COMMERCE.items(): + try: + r = crawl(name, urls, max_pages=10) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"commerce_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/scrapers/harvesters/ekologija_pgz.py b/scrapers/harvesters/ekologija_pgz.py new file mode 100644 index 0000000..86b22ab --- /dev/null +++ b/scrapers/harvesters/ekologija_pgz.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +"""Ekologija i zaštita okoliša PGŽ.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +ECOLOGY = { + "np_risnjak_full": ["https://www.np-risnjak.hr/"], + "pp_ucka": ["https://pp-ucka.hr/"], + "zelena_akcija": ["https://zelena-akcija.hr/"], + "eko_kvarner": ["https://www.eko-kvarner.hr/"], + "fundacija_adris": ["https://www.adris.hr/"], + "plava_zastava": ["https://www.plava-zastava.hr/"], + "cistoca_pgz": ["https://www.cistoca.hr/"], + "vodoopskrba_pgz": ["https://www.kdvik-rijeka.hr/"], + "otpad_pgz": ["https://www.komunalac.hr/"], + "More_cisto": ["https://more-cisto.hr/"], + "zzjz_okolisa": ["https://www.zzjzpgz.hr/zastita-okolisa/"], +} + + +def crawl(name, urls, max_pages=12): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=15) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 200: continue + ff = [] + if title and len(title) > 8: + ff.append({"fact": f"{name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 800): + if len(c) > 100: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="ekologija_pgz", confidence=0.86) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 35: + queue.append(link) + time.sleep(0.5) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in ECOLOGY.items(): + try: + r = crawl(name, urls, max_pages=10) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"ecology_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/scrapers/harvesters/ngo_pgz.py b/scrapers/harvesters/ngo_pgz.py new file mode 100644 index 0000000..0dd6455 --- /dev/null +++ b/scrapers/harvesters/ngo_pgz.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""NGO i udruge PGŽ — mladi, veterani, humanitarne.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +NGO = { + "mladi_pgz": ["https://www.pgz.hr/mladi/"], + "udruge_pgz": ["https://www.udruge.hr/pgz"], + "humanitarne_pgz": ["https://www.volonterski-centar-ri.hr/"], + "crveni_kriz_full": ["https://www.crveni-kriz-rijeka.hr/"], + "veterani_pgz": ["https://www.veterani.hr/pgz"], + "umirovljenici_pgz": ["https://www.savez-umirovljenika.hr/"], + "invalidi_pgz": ["https://www.invalidi-rijeka.hr/"], + "zivotrodi_pgz": ["https://www.zivotrodi.hr/"], + "omladina_ri": ["https://www.omladina-rijeka.hr/"], +} + + +def crawl(name, urls, max_pages=10): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=15) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 200: continue + ff = [] + if title and len(title) > 8: + ff.append({"fact": f"{name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 800): + if len(c) > 100: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="ngo_pgz", confidence=0.84) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 25: + queue.append(link) + time.sleep(0.5) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in NGO.items(): + try: + r = crawl(name, urls, max_pages=8) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"ngo_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/scrapers/harvesters/pomorstvo_pgz.py b/scrapers/harvesters/pomorstvo_pgz.py new file mode 100644 index 0000000..225264d --- /dev/null +++ b/scrapers/harvesters/pomorstvo_pgz.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +"""Pomorstvo — luke, marine, brodogradnja, nautika.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +MARITIME = { + "luka_rijeka_full": ["https://www.lukarijeka.hr/", "https://www.portauthority.hr/"], + "aci_marine_kvarner": ["https://www.aci-marinas.com/"], + "marina_opatija": ["https://www.marina-opatija.hr/"], + "marina_punat": ["https://www.marina-punat.hr/"], + "marina_cres": ["https://www.aci-marinas.com/marina/aci-cres"], + "yachting_kvarner": ["https://www.yachting-kvarner.com/"], + "brod_3maj_deep": ["https://www.3maj.hr/"], + "viktor_lenac_deep": ["https://www.lenac.hr/"], + "pfri_maritime": ["https://www.pfri.uniri.hr/"], + "luka_baska": ["https://www.luka-baska.hr/"], + "luke_kvarner": ["https://www.luke-hrvatska.hr/"], +} + + +def crawl(name, urls, max_pages=15): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=15) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 200: continue + ff = [] + if title and len(title) > 8: + ff.append({"fact": f"{name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 800): + if len(c) > 100: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="pomorstvo_pgz", confidence=0.87) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 40: + queue.append(link) + time.sleep(0.5) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in MARITIME.items(): + try: + r = crawl(name, urls, max_pages=12) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"maritime_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/scrapers/harvesters/skole_pgz.py b/scrapers/harvesters/skole_pgz.py new file mode 100644 index 0000000..b4d12ea --- /dev/null +++ b/scrapers/harvesters/skole_pgz.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Osnovne i srednje škole + vrtići PGŽ.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +SCHOOLS = { + "srednje_skole_ri": ["https://www.skole.hr/skole/primorsko-goranska-zupanija"], + "osnove_skole_ri": ["https://www.rijeka.hr/skole/"], + "vrtici_pgz": ["https://www.pula.hr/hr/gradski-vrtic/"], + "gimnazija_ri": ["https://www.gimnazija-rijeka.hr/"], + "tehnicka_skola_ri": ["https://www.tehnicka-rijeka.hr/"], + "ekonomska_skola_ri": ["https://www.ekonomska-rijeka.hr/"], + "medicinska_skola_ri": ["https://www.medicinska-skola-rijeka.hr/"], + "skole_opatija": ["https://www.opatija.hr/skole"], + "skole_crikvenica": ["https://www.skole-crikvenica.hr/"], +} + + +def crawl(name, urls, max_pages=10): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=15) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 200: continue + ff = [] + if title and len(title) > 8: + ff.append({"fact": f"{name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 800): + if len(c) > 100: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="skole_pgz", confidence=0.86) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 30: + queue.append(link) + time.sleep(0.5) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in SCHOOLS.items(): + try: + r = crawl(name, urls, max_pages=8) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"schools_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/scrapers/harvesters/sport_federacije_pgz.py b/scrapers/harvesters/sport_federacije_pgz.py new file mode 100644 index 0000000..d890c62 --- /dev/null +++ b/scrapers/harvesters/sport_federacije_pgz.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +"""Sport federacije i savezi PGŽ — all sports.""" +import sys, json, time +sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters") +from _common import (fetch, extract_text, extract_title, chunk_text, + upsert_facts, find_internal_links, DSN) +from urllib.parse import urlparse +import psycopg2 + +FEDERATIONS = { + "zns_pgz": ["https://www.pgzns.hr/"], + "kss_pgz": ["https://www.kss-pgz.hr/"], + "handball_pgz": ["https://www.hrs-pgz.hr/"], + "odbojka_savez_pgz": ["https://www.odbojka-pgz.hr/"], + "atletika_savez_pgz": ["https://www.atletika-pgz.hr/"], + "plivanje_savez_pgz": ["https://www.plivanje-pgz.hr/"], + "skijaski_savez_pgz": ["https://www.ski-pgz.hr/"], + "tenis_savez_pgz": ["https://www.tenis-pgz.hr/"], + "judo_savez_pgz": ["https://www.judo-pgz.hr/"], + "karate_savez_pgz": ["https://www.karate-pgz.hr/"], + "kuglanje_savez_pgz": ["https://www.kuglanje-pgz.hr/"], +} + + +def crawl(name, urls, max_pages=12): + conn = psycopg2.connect(DSN); conn.autocommit = True + visited = set(); queue = list(urls); facts = 0 + while queue and len(visited) < max_pages: + url = queue.pop(0) + if url in visited: continue + visited.add(url) + html, status = fetch(url, timeout=15) + if not html or status != 200: continue + title = extract_title(html); text = extract_text(html) + if not text or len(text) < 200: continue + ff = [] + if title and len(title) > 8: + ff.append({"fact": f"{name} - {title}", "url": url, "title": title}) + for c in chunk_text(text, 800): + if len(c) > 100: + ff.append({"fact": c, "url": url, "title": title}) + facts += upsert_facts(conn, ff, source_name=name, + category="sport_federacije_pgz", confidence=0.88) + base = urlparse(url).hostname + for link in find_internal_links(html, url): + if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 35: + queue.append(link) + time.sleep(0.5) + conn.close() + return {"name": name, "visited": len(visited), "facts": facts} + + +def main(): + results = [] + for name, urls in FEDERATIONS.items(): + try: + r = crawl(name, urls, max_pages=10) + print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f") + results.append(r) + except Exception as e: + print(f" {name:25} FAIL: {str(e)[:60]}") + total = sum(r.get("facts", 0) for r in results) + print(f"=== TOTAL: {total} ===") + print(json.dumps({"fed_count": len(results), "total_facts": total})) + + +if __name__ == "__main__": + main() diff --git a/scripts/objekti_enrich_address.py b/scripts/objekti_enrich_address.py new file mode 100644 index 0000000..d7c750a --- /dev/null +++ b/scripts/objekti_enrich_address.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +# Fajl: objekti_enrich_address.py | v1.0 | 05.05.2026 +# Author: Damir Radulić +# Svrha: Reverse-geocode lat/lng → adresa za sportski_objekti +import os, time, json +import psycopg2, requests + +DSN = "host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7" +HEADERS = {"User-Agent": "Ri.NET PGŽ Sport (dradulic@outlook.com)"} + +conn = psycopg2.connect(DSN); conn.autocommit = True + +with conn.cursor() as cur: + cur.execute(""" + SELECT id, naziv, lat, lng FROM pgz_sport.sportski_objekti + WHERE aktivan = true AND lat IS NOT NULL AND lng IS NOT NULL + AND (adresa IS NULL OR adresa = '') + LIMIT 60 + """) + rows = cur.fetchall() + +print(f"Total: {len(rows)} objekata bez adrese") + +for i, (oid, naziv, lat, lng) in enumerate(rows): + try: + # Nominatim reverse geocoding + r = requests.get( + f"https://nominatim.openstreetmap.org/reverse", + params={"lat": lat, "lon": lng, "format": "json", "accept-language": "hr"}, + headers=HEADERS, timeout=10 + ) + if r.status_code == 200: + d = r.json() + addr = d.get("display_name", "") + # Krat: ulica + broj + grad + a = d.get("address", {}) + short = [] + for k in ["road", "house_number", "suburb", "city", "town", "village"]: + if a.get(k): short.append(a[k]) + addr_short = ", ".join(short[:4]) or addr[:100] + + with conn.cursor() as cur: + cur.execute("UPDATE pgz_sport.sportski_objekti SET adresa = %s WHERE id = %s", (addr_short, oid)) + print(f" [{i+1}/{len(rows)}] {naziv} → {addr_short}") + time.sleep(1.1) # Nominatim rate-limit 1 req/s + except Exception as e: + print(f" [FAIL] {naziv}: {e}") + +print("DONE") diff --git a/static/app.html b/static/app.html index 519dffe..5bed950 100644 --- a/static/app.html +++ b/static/app.html @@ -506,6 +506,7 @@ const NAV_BY_ROLE = { {id:'dashboard', ic:'\u{1F4CA}', label:'Dashboard'}, {id:'korisnici', ic:'\u{1F465}', label:'Korisnici', href:'/admin/users'}, {id:'savezi', ic:'\u{1F3C5}', label:'Savezi'}, + {id:'objekti', ic:'\u{1F3DF}', label:'Sportski objekti', href:'/objekti'}, {id:'klubovi', ic:'⬢', label:'Klubovi'}, {id:'sportasi', ic:'\u{1F464}', label:'Sportaši'}, {id:'financije', ic:'€', label:'Financije'}, diff --git a/static/objekti.html b/static/objekti.html new file mode 100644 index 0000000..6623518 --- /dev/null +++ b/static/objekti.html @@ -0,0 +1,196 @@ + + + + + + +🏟️ Sportski objekti PGŽ + + + + + + +
+

🏟️ Sportski objekti PGŽ

+
+ 🏠 Home + 📊 Dashboard + 📚 Dokumenti + 👥 Admin +
+
+ +
+ +
+
+ + + +