Sportski objekti: API + Leaflet map page + address enrichment
DB: pgz_sport.sportski_objekti (103 objekti, 103 s geo, 60 s adresom, 31 tip) API: - /api/v2/sportski-objekti (filter: tip, grad, sport, q) - /api/v2/sportski-objekti/meta (tipovi, gradovi, sportovi, ukupno) Frontend: - /static/objekti.html — Leaflet (OpenStreetMap) interactive map - 3 dropdown filter (tip, grad, sport) + search - Side panel s listom + map markers s ikonama (🏟️⚽🏊⛵🎿🎳⛸️🎯🥌🏃) - Popup: naziv, tip, kapacitet, adresa, upravitelj, izgradeno, sportovi, web link, Google Maps link - /objekti, /sport/objekti, /sport/api/v2/sportski-objekti routes Sidebar app.html: +Sportski objekti link Background: scripts/objekti_enrich_address.py (Nominatim reverse-geocode 60 objekata bez adrese)
This commit is contained in:
Submodule .claude/worktrees/agent-a2230c7d02a7c02f4 updated: 8127e2ef22...f488623920
Submodule .claude/worktrees/agent-a54ff6ad4250d2734 updated: 8127e2ef22...38383d07c5
Submodule .claude/worktrees/agent-a70769f0db14302aa updated: 8127e2ef22...55a27fb315
Submodule .claude/worktrees/agent-af39fdf2dbfd08afe updated: 8127e2ef22...efa15d0086
@@ -2835,6 +2835,56 @@ def auth_me_v2_alias(authorization: str = Header(None)):
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.get("/api/v2/sportski-objekti")
|
||||
def sportski_objekti_v2_list(tip: str = None, grad: str = None, sport: str = None, q: str = None, limit: int = 500):
|
||||
"""Sportski objekti PGŽ s filterima."""
|
||||
where = ["aktivan = true"]
|
||||
params = []
|
||||
if tip:
|
||||
where.append("tip = %s"); params.append(tip)
|
||||
if grad:
|
||||
where.append("grad = %s"); params.append(grad)
|
||||
if sport:
|
||||
where.append("%s = ANY(sportovi)"); params.append(sport)
|
||||
if q:
|
||||
where.append("(naziv ILIKE %s OR adresa ILIKE %s OR upravitelj ILIKE %s)")
|
||||
params.extend([f"%{q}%"]*3)
|
||||
|
||||
rows = fetch(f"""
|
||||
SELECT id, naziv, tip, grad, adresa, lat, lng, upravitelj, kapacitet,
|
||||
sportovi, izgradeno, obnovljeno_god, "veličina" AS velicina, natkrita,
|
||||
napomena, web
|
||||
FROM pgz_sport.sportski_objekti
|
||||
WHERE {' AND '.join(where)}
|
||||
ORDER BY grad, naziv
|
||||
LIMIT %s
|
||||
""", tuple(params) + (limit,))
|
||||
return {"count": len(rows), "rows": rows}
|
||||
|
||||
|
||||
@app.get("/api/v2/sportski-objekti/meta")
|
||||
def sportski_objekti_meta():
|
||||
"""Dropdown options za filter."""
|
||||
tipovi = fetch("SELECT tip, count(*) AS broj FROM pgz_sport.sportski_objekti WHERE aktivan = true AND tip IS NOT NULL GROUP BY tip ORDER BY broj DESC")
|
||||
gradovi = fetch("SELECT grad, count(*) AS broj FROM pgz_sport.sportski_objekti WHERE aktivan = true AND grad IS NOT NULL GROUP BY grad ORDER BY broj DESC")
|
||||
sportovi = fetch("SELECT DISTINCT unnest(sportovi) AS sport, count(*) AS broj FROM pgz_sport.sportski_objekti WHERE aktivan = true AND sportovi IS NOT NULL GROUP BY sport ORDER BY broj DESC LIMIT 50")
|
||||
return {
|
||||
"tipovi": tipovi,
|
||||
"gradovi": gradovi,
|
||||
"sportovi": sportovi,
|
||||
"ukupno": (fetch("SELECT count(*) AS n FROM pgz_sport.sportski_objekti WHERE aktivan = true")[0])["n"]
|
||||
}
|
||||
|
||||
|
||||
@app.get("/objekti")
|
||||
@app.get("/objekti/")
|
||||
@app.get("/sport/objekti")
|
||||
@app.get("/sport/objekti/")
|
||||
def serve_objekti():
|
||||
from fastapi.responses import FileResponse
|
||||
return FileResponse("/opt/pgz-sport/static/objekti.html")
|
||||
|
||||
@app.get("/")
|
||||
def root(request: Request):
|
||||
host = request.headers.get("host", "")
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
"""UNIRI akademski repozitorij + znanstveni radovi."""
|
||||
import sys, json, time
|
||||
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
|
||||
from _common import (fetch, extract_text, extract_title, chunk_text,
|
||||
upsert_facts, find_internal_links, DSN)
|
||||
from urllib.parse import urlparse
|
||||
import psycopg2
|
||||
|
||||
ACADEMIC = {
|
||||
"uniri_repozitorij": ["https://repozitorij.uniri.hr/"],
|
||||
"portal_znanstveni": ["https://portal.uniri.hr/"],
|
||||
"hrčak_uniri": ["https://hrcak.srce.hr/"],
|
||||
"pfri_radovi": ["https://repository.pfri.uniri.hr/"],
|
||||
"medri_radovi": ["https://medri.uniri.hr/znanstveni-radovi/"],
|
||||
"tfr_radovi": ["https://www.riteh.uniri.hr/"],
|
||||
"ffri_radovi": ["https://www.ffri.uniri.hr/znanstveni-radovi/"],
|
||||
}
|
||||
|
||||
|
||||
def crawl(name, urls, max_pages=15):
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
visited = set(); queue = list(urls); facts = 0
|
||||
while queue and len(visited) < max_pages:
|
||||
url = queue.pop(0)
|
||||
if url in visited: continue
|
||||
visited.add(url)
|
||||
html, status = fetch(url, timeout=20)
|
||||
if not html or status != 200: continue
|
||||
title = extract_title(html); text = extract_text(html)
|
||||
if not text or len(text) < 300: continue
|
||||
ff = []
|
||||
if title and len(title) > 15:
|
||||
ff.append({"fact": f"[Academic] {name} - {title}", "url": url, "title": title})
|
||||
for c in chunk_text(text, 900):
|
||||
if len(c) > 150:
|
||||
ff.append({"fact": c, "url": url, "title": title})
|
||||
facts += upsert_facts(conn, ff, source_name=name,
|
||||
category="akademski_pgz", confidence=0.90)
|
||||
base = urlparse(url).hostname
|
||||
for link in find_internal_links(html, url):
|
||||
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 40:
|
||||
queue.append(link)
|
||||
time.sleep(0.7)
|
||||
conn.close()
|
||||
return {"name": name, "visited": len(visited), "facts": facts}
|
||||
|
||||
|
||||
def main():
|
||||
results = []
|
||||
for name, urls in ACADEMIC.items():
|
||||
try:
|
||||
r = crawl(name, urls, max_pages=12)
|
||||
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
|
||||
results.append(r)
|
||||
except Exception as e:
|
||||
print(f" {name:25} FAIL: {str(e)[:60]}")
|
||||
total = sum(r.get("facts", 0) for r in results)
|
||||
print(f"=== TOTAL: {total} ===")
|
||||
print(json.dumps({"academic_count": len(results), "total_facts": total}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,67 @@
|
||||
#!/usr%bin/env python3
|
||||
"""Lokalne firme i obrti PGŽ — HGK/HOK članovi."""
|
||||
import sys, json, time
|
||||
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
|
||||
from _common import (fetch, extract_text, extract_title, chunk_text,
|
||||
upsert_facts, find_internal_links, DSN)
|
||||
from urllib.parse import urlparse
|
||||
import psycopg2
|
||||
|
||||
COMMERCE = {
|
||||
"hgk_rijeka_deep": ["https://www.hgk.hr/zupanijske-komore/primorsko-goranska-zupanijska-komora"],
|
||||
"hok_pgz": ["https://www.hok.hr/"],
|
||||
"poduzetnistvo_ri": ["https://www.pgz.hr/gospodarstvo/"],
|
||||
"poslovni_inkubator": ["https://www.step-ri.hr/"],
|
||||
"izvoz_import_pgz": ["https://www.hgk.hr/izvoz-uvoz"],
|
||||
"tehnopolis_firme": ["https://www.tehnopolis.hr/"],
|
||||
"start_up_ri": ["https://www.startup-rijeka.hr/"],
|
||||
"obrtnička_komora": ["https://www.hok.hr/pgz"],
|
||||
"tz_pgz_biznis": ["https://www.kvarner.hr/biznis"],
|
||||
"free_zone_rijeka": ["https://www.rfind.hr/"],
|
||||
}
|
||||
|
||||
|
||||
def crawl(name, urls, max_pages=12):
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
visited = set(); queue = list(urls); facts = 0
|
||||
while queue and len(visited) < max_pages:
|
||||
url = queue.pop(0)
|
||||
if url in visited: continue
|
||||
visited.add(url)
|
||||
html, status = fetch(url, timeout=15)
|
||||
if not html or status != 200: continue
|
||||
title = extract_title(html); text = extract_text(html)
|
||||
if not text or len(text) < 200: continue
|
||||
ff = []
|
||||
if title and len(title) > 8:
|
||||
ff.append({"fact": f"{name} - {title}", "url": url, "title": title})
|
||||
for c in chunk_text(text, 800):
|
||||
if len(c) > 100:
|
||||
ff.append({"fact": c, "url": url, "title": title})
|
||||
facts += upsert_facts(conn, ff, source_name=name,
|
||||
category="commerce_pgz", confidence=0.84)
|
||||
base = urlparse(url).hostname
|
||||
for link in find_internal_links(html, url):
|
||||
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 35:
|
||||
queue.append(link)
|
||||
time.sleep(0.5)
|
||||
conn.close()
|
||||
return {"name": name, "visited": len(visited), "facts": facts}
|
||||
|
||||
|
||||
def main():
|
||||
results = []
|
||||
for name, urls in COMMERCE.items():
|
||||
try:
|
||||
r = crawl(name, urls, max_pages=10)
|
||||
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
|
||||
results.append(r)
|
||||
except Exception as e:
|
||||
print(f" {name:25} FAIL: {str(e)[:60]}")
|
||||
total = sum(r.get("facts", 0) for r in results)
|
||||
print(f"=== TOTAL: {total} ===")
|
||||
print(json.dumps({"commerce_count": len(results), "total_facts": total}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Ekologija i zaštita okoliša PGŽ."""
|
||||
import sys, json, time
|
||||
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
|
||||
from _common import (fetch, extract_text, extract_title, chunk_text,
|
||||
upsert_facts, find_internal_links, DSN)
|
||||
from urllib.parse import urlparse
|
||||
import psycopg2
|
||||
|
||||
ECOLOGY = {
|
||||
"np_risnjak_full": ["https://www.np-risnjak.hr/"],
|
||||
"pp_ucka": ["https://pp-ucka.hr/"],
|
||||
"zelena_akcija": ["https://zelena-akcija.hr/"],
|
||||
"eko_kvarner": ["https://www.eko-kvarner.hr/"],
|
||||
"fundacija_adris": ["https://www.adris.hr/"],
|
||||
"plava_zastava": ["https://www.plava-zastava.hr/"],
|
||||
"cistoca_pgz": ["https://www.cistoca.hr/"],
|
||||
"vodoopskrba_pgz": ["https://www.kdvik-rijeka.hr/"],
|
||||
"otpad_pgz": ["https://www.komunalac.hr/"],
|
||||
"More_cisto": ["https://more-cisto.hr/"],
|
||||
"zzjz_okolisa": ["https://www.zzjzpgz.hr/zastita-okolisa/"],
|
||||
}
|
||||
|
||||
|
||||
def crawl(name, urls, max_pages=12):
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
visited = set(); queue = list(urls); facts = 0
|
||||
while queue and len(visited) < max_pages:
|
||||
url = queue.pop(0)
|
||||
if url in visited: continue
|
||||
visited.add(url)
|
||||
html, status = fetch(url, timeout=15)
|
||||
if not html or status != 200: continue
|
||||
title = extract_title(html); text = extract_text(html)
|
||||
if not text or len(text) < 200: continue
|
||||
ff = []
|
||||
if title and len(title) > 8:
|
||||
ff.append({"fact": f"{name} - {title}", "url": url, "title": title})
|
||||
for c in chunk_text(text, 800):
|
||||
if len(c) > 100:
|
||||
ff.append({"fact": c, "url": url, "title": title})
|
||||
facts += upsert_facts(conn, ff, source_name=name,
|
||||
category="ekologija_pgz", confidence=0.86)
|
||||
base = urlparse(url).hostname
|
||||
for link in find_internal_links(html, url):
|
||||
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 35:
|
||||
queue.append(link)
|
||||
time.sleep(0.5)
|
||||
conn.close()
|
||||
return {"name": name, "visited": len(visited), "facts": facts}
|
||||
|
||||
|
||||
def main():
|
||||
results = []
|
||||
for name, urls in ECOLOGY.items():
|
||||
try:
|
||||
r = crawl(name, urls, max_pages=10)
|
||||
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
|
||||
results.append(r)
|
||||
except Exception as e:
|
||||
print(f" {name:25} FAIL: {str(e)[:60]}")
|
||||
total = sum(r.get("facts", 0) for r in results)
|
||||
print(f"=== TOTAL: {total} ===")
|
||||
print(json.dumps({"ecology_count": len(results), "total_facts": total}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python3
|
||||
"""NGO i udruge PGŽ — mladi, veterani, humanitarne."""
|
||||
import sys, json, time
|
||||
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
|
||||
from _common import (fetch, extract_text, extract_title, chunk_text,
|
||||
upsert_facts, find_internal_links, DSN)
|
||||
from urllib.parse import urlparse
|
||||
import psycopg2
|
||||
|
||||
NGO = {
|
||||
"mladi_pgz": ["https://www.pgz.hr/mladi/"],
|
||||
"udruge_pgz": ["https://www.udruge.hr/pgz"],
|
||||
"humanitarne_pgz": ["https://www.volonterski-centar-ri.hr/"],
|
||||
"crveni_kriz_full": ["https://www.crveni-kriz-rijeka.hr/"],
|
||||
"veterani_pgz": ["https://www.veterani.hr/pgz"],
|
||||
"umirovljenici_pgz": ["https://www.savez-umirovljenika.hr/"],
|
||||
"invalidi_pgz": ["https://www.invalidi-rijeka.hr/"],
|
||||
"zivotrodi_pgz": ["https://www.zivotrodi.hr/"],
|
||||
"omladina_ri": ["https://www.omladina-rijeka.hr/"],
|
||||
}
|
||||
|
||||
|
||||
def crawl(name, urls, max_pages=10):
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
visited = set(); queue = list(urls); facts = 0
|
||||
while queue and len(visited) < max_pages:
|
||||
url = queue.pop(0)
|
||||
if url in visited: continue
|
||||
visited.add(url)
|
||||
html, status = fetch(url, timeout=15)
|
||||
if not html or status != 200: continue
|
||||
title = extract_title(html); text = extract_text(html)
|
||||
if not text or len(text) < 200: continue
|
||||
ff = []
|
||||
if title and len(title) > 8:
|
||||
ff.append({"fact": f"{name} - {title}", "url": url, "title": title})
|
||||
for c in chunk_text(text, 800):
|
||||
if len(c) > 100:
|
||||
ff.append({"fact": c, "url": url, "title": title})
|
||||
facts += upsert_facts(conn, ff, source_name=name,
|
||||
category="ngo_pgz", confidence=0.84)
|
||||
base = urlparse(url).hostname
|
||||
for link in find_internal_links(html, url):
|
||||
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 25:
|
||||
queue.append(link)
|
||||
time.sleep(0.5)
|
||||
conn.close()
|
||||
return {"name": name, "visited": len(visited), "facts": facts}
|
||||
|
||||
|
||||
def main():
|
||||
results = []
|
||||
for name, urls in NGO.items():
|
||||
try:
|
||||
r = crawl(name, urls, max_pages=8)
|
||||
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
|
||||
results.append(r)
|
||||
except Exception as e:
|
||||
print(f" {name:25} FAIL: {str(e)[:60]}")
|
||||
total = sum(r.get("facts", 0) for r in results)
|
||||
print(f"=== TOTAL: {total} ===")
|
||||
print(json.dumps({"ngo_count": len(results), "total_facts": total}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Pomorstvo — luke, marine, brodogradnja, nautika."""
|
||||
import sys, json, time
|
||||
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
|
||||
from _common import (fetch, extract_text, extract_title, chunk_text,
|
||||
upsert_facts, find_internal_links, DSN)
|
||||
from urllib.parse import urlparse
|
||||
import psycopg2
|
||||
|
||||
MARITIME = {
|
||||
"luka_rijeka_full": ["https://www.lukarijeka.hr/", "https://www.portauthority.hr/"],
|
||||
"aci_marine_kvarner": ["https://www.aci-marinas.com/"],
|
||||
"marina_opatija": ["https://www.marina-opatija.hr/"],
|
||||
"marina_punat": ["https://www.marina-punat.hr/"],
|
||||
"marina_cres": ["https://www.aci-marinas.com/marina/aci-cres"],
|
||||
"yachting_kvarner": ["https://www.yachting-kvarner.com/"],
|
||||
"brod_3maj_deep": ["https://www.3maj.hr/"],
|
||||
"viktor_lenac_deep": ["https://www.lenac.hr/"],
|
||||
"pfri_maritime": ["https://www.pfri.uniri.hr/"],
|
||||
"luka_baska": ["https://www.luka-baska.hr/"],
|
||||
"luke_kvarner": ["https://www.luke-hrvatska.hr/"],
|
||||
}
|
||||
|
||||
|
||||
def crawl(name, urls, max_pages=15):
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
visited = set(); queue = list(urls); facts = 0
|
||||
while queue and len(visited) < max_pages:
|
||||
url = queue.pop(0)
|
||||
if url in visited: continue
|
||||
visited.add(url)
|
||||
html, status = fetch(url, timeout=15)
|
||||
if not html or status != 200: continue
|
||||
title = extract_title(html); text = extract_text(html)
|
||||
if not text or len(text) < 200: continue
|
||||
ff = []
|
||||
if title and len(title) > 8:
|
||||
ff.append({"fact": f"{name} - {title}", "url": url, "title": title})
|
||||
for c in chunk_text(text, 800):
|
||||
if len(c) > 100:
|
||||
ff.append({"fact": c, "url": url, "title": title})
|
||||
facts += upsert_facts(conn, ff, source_name=name,
|
||||
category="pomorstvo_pgz", confidence=0.87)
|
||||
base = urlparse(url).hostname
|
||||
for link in find_internal_links(html, url):
|
||||
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 40:
|
||||
queue.append(link)
|
||||
time.sleep(0.5)
|
||||
conn.close()
|
||||
return {"name": name, "visited": len(visited), "facts": facts}
|
||||
|
||||
|
||||
def main():
|
||||
results = []
|
||||
for name, urls in MARITIME.items():
|
||||
try:
|
||||
r = crawl(name, urls, max_pages=12)
|
||||
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
|
||||
results.append(r)
|
||||
except Exception as e:
|
||||
print(f" {name:25} FAIL: {str(e)[:60]}")
|
||||
total = sum(r.get("facts", 0) for r in results)
|
||||
print(f"=== TOTAL: {total} ===")
|
||||
print(json.dumps({"maritime_count": len(results), "total_facts": total}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Osnovne i srednje škole + vrtići PGŽ."""
|
||||
import sys, json, time
|
||||
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
|
||||
from _common import (fetch, extract_text, extract_title, chunk_text,
|
||||
upsert_facts, find_internal_links, DSN)
|
||||
from urllib.parse import urlparse
|
||||
import psycopg2
|
||||
|
||||
SCHOOLS = {
|
||||
"srednje_skole_ri": ["https://www.skole.hr/skole/primorsko-goranska-zupanija"],
|
||||
"osnove_skole_ri": ["https://www.rijeka.hr/skole/"],
|
||||
"vrtici_pgz": ["https://www.pula.hr/hr/gradski-vrtic/"],
|
||||
"gimnazija_ri": ["https://www.gimnazija-rijeka.hr/"],
|
||||
"tehnicka_skola_ri": ["https://www.tehnicka-rijeka.hr/"],
|
||||
"ekonomska_skola_ri": ["https://www.ekonomska-rijeka.hr/"],
|
||||
"medicinska_skola_ri": ["https://www.medicinska-skola-rijeka.hr/"],
|
||||
"skole_opatija": ["https://www.opatija.hr/skole"],
|
||||
"skole_crikvenica": ["https://www.skole-crikvenica.hr/"],
|
||||
}
|
||||
|
||||
|
||||
def crawl(name, urls, max_pages=10):
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
visited = set(); queue = list(urls); facts = 0
|
||||
while queue and len(visited) < max_pages:
|
||||
url = queue.pop(0)
|
||||
if url in visited: continue
|
||||
visited.add(url)
|
||||
html, status = fetch(url, timeout=15)
|
||||
if not html or status != 200: continue
|
||||
title = extract_title(html); text = extract_text(html)
|
||||
if not text or len(text) < 200: continue
|
||||
ff = []
|
||||
if title and len(title) > 8:
|
||||
ff.append({"fact": f"{name} - {title}", "url": url, "title": title})
|
||||
for c in chunk_text(text, 800):
|
||||
if len(c) > 100:
|
||||
ff.append({"fact": c, "url": url, "title": title})
|
||||
facts += upsert_facts(conn, ff, source_name=name,
|
||||
category="skole_pgz", confidence=0.86)
|
||||
base = urlparse(url).hostname
|
||||
for link in find_internal_links(html, url):
|
||||
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 30:
|
||||
queue.append(link)
|
||||
time.sleep(0.5)
|
||||
conn.close()
|
||||
return {"name": name, "visited": len(visited), "facts": facts}
|
||||
|
||||
|
||||
def main():
|
||||
results = []
|
||||
for name, urls in SCHOOLS.items():
|
||||
try:
|
||||
r = crawl(name, urls, max_pages=8)
|
||||
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
|
||||
results.append(r)
|
||||
except Exception as e:
|
||||
print(f" {name:25} FAIL: {str(e)[:60]}")
|
||||
total = sum(r.get("facts", 0) for r in results)
|
||||
print(f"=== TOTAL: {total} ===")
|
||||
print(json.dumps({"schools_count": len(results), "total_facts": total}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Sport federacije i savezi PGŽ — all sports."""
|
||||
import sys, json, time
|
||||
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
|
||||
from _common import (fetch, extract_text, extract_title, chunk_text,
|
||||
upsert_facts, find_internal_links, DSN)
|
||||
from urllib.parse import urlparse
|
||||
import psycopg2
|
||||
|
||||
FEDERATIONS = {
|
||||
"zns_pgz": ["https://www.pgzns.hr/"],
|
||||
"kss_pgz": ["https://www.kss-pgz.hr/"],
|
||||
"handball_pgz": ["https://www.hrs-pgz.hr/"],
|
||||
"odbojka_savez_pgz": ["https://www.odbojka-pgz.hr/"],
|
||||
"atletika_savez_pgz": ["https://www.atletika-pgz.hr/"],
|
||||
"plivanje_savez_pgz": ["https://www.plivanje-pgz.hr/"],
|
||||
"skijaski_savez_pgz": ["https://www.ski-pgz.hr/"],
|
||||
"tenis_savez_pgz": ["https://www.tenis-pgz.hr/"],
|
||||
"judo_savez_pgz": ["https://www.judo-pgz.hr/"],
|
||||
"karate_savez_pgz": ["https://www.karate-pgz.hr/"],
|
||||
"kuglanje_savez_pgz": ["https://www.kuglanje-pgz.hr/"],
|
||||
}
|
||||
|
||||
|
||||
def crawl(name, urls, max_pages=12):
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
visited = set(); queue = list(urls); facts = 0
|
||||
while queue and len(visited) < max_pages:
|
||||
url = queue.pop(0)
|
||||
if url in visited: continue
|
||||
visited.add(url)
|
||||
html, status = fetch(url, timeout=15)
|
||||
if not html or status != 200: continue
|
||||
title = extract_title(html); text = extract_text(html)
|
||||
if not text or len(text) < 200: continue
|
||||
ff = []
|
||||
if title and len(title) > 8:
|
||||
ff.append({"fact": f"{name} - {title}", "url": url, "title": title})
|
||||
for c in chunk_text(text, 800):
|
||||
if len(c) > 100:
|
||||
ff.append({"fact": c, "url": url, "title": title})
|
||||
facts += upsert_facts(conn, ff, source_name=name,
|
||||
category="sport_federacije_pgz", confidence=0.88)
|
||||
base = urlparse(url).hostname
|
||||
for link in find_internal_links(html, url):
|
||||
if link not in visited and (urlparse(link).hostname or "") == base and len(queue) < 35:
|
||||
queue.append(link)
|
||||
time.sleep(0.5)
|
||||
conn.close()
|
||||
return {"name": name, "visited": len(visited), "facts": facts}
|
||||
|
||||
|
||||
def main():
|
||||
results = []
|
||||
for name, urls in FEDERATIONS.items():
|
||||
try:
|
||||
r = crawl(name, urls, max_pages=10)
|
||||
print(f" {name:25} {r['visited']:>3}p {r['facts']:>5}f")
|
||||
results.append(r)
|
||||
except Exception as e:
|
||||
print(f" {name:25} FAIL: {str(e)[:60]}")
|
||||
total = sum(r.get("facts", 0) for r in results)
|
||||
print(f"=== TOTAL: {total} ===")
|
||||
print(json.dumps({"fed_count": len(results), "total_facts": total}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
# Fajl: objekti_enrich_address.py | v1.0 | 05.05.2026
|
||||
# Author: Damir Radulić
|
||||
# Svrha: Reverse-geocode lat/lng → adresa za sportski_objekti
|
||||
import os, time, json
|
||||
import psycopg2, requests
|
||||
|
||||
DSN = "host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7"
|
||||
HEADERS = {"User-Agent": "Ri.NET PGŽ Sport (dradulic@outlook.com)"}
|
||||
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT id, naziv, lat, lng FROM pgz_sport.sportski_objekti
|
||||
WHERE aktivan = true AND lat IS NOT NULL AND lng IS NOT NULL
|
||||
AND (adresa IS NULL OR adresa = '')
|
||||
LIMIT 60
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
|
||||
print(f"Total: {len(rows)} objekata bez adrese")
|
||||
|
||||
for i, (oid, naziv, lat, lng) in enumerate(rows):
|
||||
try:
|
||||
# Nominatim reverse geocoding
|
||||
r = requests.get(
|
||||
f"https://nominatim.openstreetmap.org/reverse",
|
||||
params={"lat": lat, "lon": lng, "format": "json", "accept-language": "hr"},
|
||||
headers=HEADERS, timeout=10
|
||||
)
|
||||
if r.status_code == 200:
|
||||
d = r.json()
|
||||
addr = d.get("display_name", "")
|
||||
# Krat: ulica + broj + grad
|
||||
a = d.get("address", {})
|
||||
short = []
|
||||
for k in ["road", "house_number", "suburb", "city", "town", "village"]:
|
||||
if a.get(k): short.append(a[k])
|
||||
addr_short = ", ".join(short[:4]) or addr[:100]
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("UPDATE pgz_sport.sportski_objekti SET adresa = %s WHERE id = %s", (addr_short, oid))
|
||||
print(f" [{i+1}/{len(rows)}] {naziv} → {addr_short}")
|
||||
time.sleep(1.1) # Nominatim rate-limit 1 req/s
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {naziv}: {e}")
|
||||
|
||||
print("DONE")
|
||||
@@ -506,6 +506,7 @@ const NAV_BY_ROLE = {
|
||||
{id:'dashboard', ic:'\u{1F4CA}', label:'Dashboard'},
|
||||
{id:'korisnici', ic:'\u{1F465}', label:'Korisnici', href:'/admin/users'},
|
||||
{id:'savezi', ic:'\u{1F3C5}', label:'Savezi'},
|
||||
{id:'objekti', ic:'\u{1F3DF}', label:'Sportski objekti', href:'/objekti'},
|
||||
{id:'klubovi', ic:'⬢', label:'Klubovi'},
|
||||
{id:'sportasi', ic:'\u{1F464}', label:'Sportaši'},
|
||||
{id:'financije', ic:'€', label:'Financije'},
|
||||
|
||||
@@ -0,0 +1,196 @@
|
||||
<!DOCTYPE html>
|
||||
<!--
|
||||
objekti.html — Sportski objekti PGŽ (Google Maps + filter)
|
||||
Author: Damir Radulić | v1.0 | 05.05.2026
|
||||
-->
|
||||
<html lang="hr">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||
<title>🏟️ Sportski objekti PGŽ</title>
|
||||
<link rel="icon" href="/favicon.ico">
|
||||
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.9.4/dist/leaflet.css">
|
||||
<script src="https://unpkg.com/leaflet@1.9.4/dist/leaflet.js"></script>
|
||||
<style>
|
||||
*{box-sizing:border-box;margin:0;padding:0}
|
||||
body{font:14px system-ui;background:#06080d;color:#e0e0e0}
|
||||
header{background:#0a0e15;padding:12px 20px;border-bottom:1px solid #2a2a2e;display:flex;justify-content:space-between;align-items:center}
|
||||
header h1{font-size:18px;color:#5fb6ff}
|
||||
header a{color:#888;text-decoration:none;margin-left:14px;font-size:13px}
|
||||
header a:hover{color:#fff}
|
||||
.container{display:grid;grid-template-columns:380px 1fr;height:calc(100vh - 50px)}
|
||||
.sidebar{background:#0c1016;border-right:1px solid #1a1a1e;overflow-y:auto;padding:14px}
|
||||
.filters{display:flex;flex-direction:column;gap:8px;margin-bottom:14px;padding-bottom:14px;border-bottom:1px solid #1a1a1e}
|
||||
.filters label{font-size:11px;color:#888;text-transform:uppercase}
|
||||
.filters select, .filters input{
|
||||
background:#1a1a1e;border:1px solid #2a2a2e;color:#fff;padding:8px 10px;border-radius:5px;font-size:13px;width:100%
|
||||
}
|
||||
.stats{font-size:12px;color:#888;padding:8px 0;border-bottom:1px solid #1a1a1e;margin-bottom:8px}
|
||||
.stats b{color:#5fb6ff}
|
||||
.obj-list{display:flex;flex-direction:column;gap:6px}
|
||||
.obj-item{background:#0c1016;border:1px solid #1a1a1e;border-radius:5px;padding:10px;cursor:pointer;transition:all .15s}
|
||||
.obj-item:hover{border-color:#5fb6ff;background:#0f1620}
|
||||
.obj-item.active{border-color:#fbbf24;background:#1a1610}
|
||||
.obj-name{font-weight:600;color:#fff;margin-bottom:3px;font-size:13px}
|
||||
.obj-meta{font-size:10px;color:#888;display:flex;gap:6px;flex-wrap:wrap}
|
||||
.obj-meta span{background:#1a1a1e;padding:1px 6px;border-radius:3px}
|
||||
#map{flex:1;background:#000}
|
||||
.leaflet-container{background:#1a1a1e}
|
||||
.popup-title{font-weight:700;font-size:14px;margin-bottom:4px;color:#000}
|
||||
.popup-meta{font-size:11px;color:#666;margin-bottom:4px}
|
||||
.popup-link{display:inline-block;margin-top:6px;padding:4px 8px;background:#1a73e8;color:#fff;text-decoration:none;border-radius:3px;font-size:11px}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1><a href="/" style="color:#5fb6ff;text-decoration:none">🏟️ Sportski objekti PGŽ</a></h1>
|
||||
<div>
|
||||
<a href="/">🏠 Home</a>
|
||||
<a href="/static/sport2.html#dashboard">📊 Dashboard</a>
|
||||
<a href="/sport/dokumenti">📚 Dokumenti</a>
|
||||
<a href="/admin/users">👥 Admin</a>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<div class="container">
|
||||
<div class="sidebar">
|
||||
<div class="filters">
|
||||
<div>
|
||||
<label>Tip objekta</label>
|
||||
<select id="f-tip" onchange="loadObjekti()">
|
||||
<option value="">Svi tipovi</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label>Grad</label>
|
||||
<select id="f-grad" onchange="loadObjekti()">
|
||||
<option value="">Svi gradovi</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label>Sport</label>
|
||||
<select id="f-sport" onchange="loadObjekti()">
|
||||
<option value="">Svi sportovi</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label>Pretraga</label>
|
||||
<input type="search" id="f-q" placeholder="Naziv, adresa…" onkeyup="if(event.key==='Enter') loadObjekti()">
|
||||
</div>
|
||||
</div>
|
||||
<div class="stats" id="stats">Učitavanje…</div>
|
||||
<div class="obj-list" id="obj-list"></div>
|
||||
</div>
|
||||
<div id="map"></div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
const map = L.map('map').setView([45.3271, 14.4422], 10); // Rijeka centar
|
||||
L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
|
||||
maxZoom: 19, attribution: '© OpenStreetMap'
|
||||
}).addTo(map);
|
||||
|
||||
let markers = L.layerGroup().addTo(map);
|
||||
|
||||
const tipIcons = {
|
||||
'dvorana': '🏟️', 'stadion': '⚽', 'bazen': '🏊', 'kompleks': '🏛️',
|
||||
'marina': '⛵', 'skijalište': '🎿', 'kuglana': '🎳', 'tenis kompleks': '🎾',
|
||||
'klizalište': '⛸️', 'strelište': '🎯', 'boćalište': '🥌', 'atletska staza': '🏃',
|
||||
'centar': '🏟️', 'sanjkalište': '🛷', 'hipodrom': '🐎'
|
||||
};
|
||||
|
||||
async function loadMeta(){
|
||||
const r = await fetch('/sport/api/v2/sportski-objekti/meta');
|
||||
const m = await r.json();
|
||||
|
||||
const tipSel = document.getElementById('f-tip');
|
||||
m.tipovi.forEach(t => tipSel.innerHTML += `<option value="${t.tip}">${tipIcons[t.tip]||'•'} ${t.tip} (${t.broj})</option>`);
|
||||
|
||||
const gradSel = document.getElementById('f-grad');
|
||||
m.gradovi.forEach(g => gradSel.innerHTML += `<option value="${g.grad}">${g.grad} (${g.broj})</option>`);
|
||||
|
||||
const sportSel = document.getElementById('f-sport');
|
||||
m.sportovi.forEach(s => sportSel.innerHTML += `<option value="${s.sport}">${s.sport} (${s.broj})</option>`);
|
||||
}
|
||||
|
||||
async function loadObjekti(){
|
||||
const params = new URLSearchParams();
|
||||
const tip = document.getElementById('f-tip').value;
|
||||
const grad = document.getElementById('f-grad').value;
|
||||
const sport = document.getElementById('f-sport').value;
|
||||
const q = document.getElementById('f-q').value;
|
||||
if(tip) params.set('tip', tip);
|
||||
if(grad) params.set('grad', grad);
|
||||
if(sport) params.set('sport', sport);
|
||||
if(q) params.set('q', q);
|
||||
params.set('limit', '500');
|
||||
|
||||
const r = await fetch('/sport/api/v2/sportski-objekti?'+params.toString());
|
||||
const d = await r.json();
|
||||
|
||||
document.getElementById('stats').innerHTML = `<b>${d.count}</b> objekata po filtru`;
|
||||
|
||||
// Markers
|
||||
markers.clearLayers();
|
||||
const list = document.getElementById('obj-list');
|
||||
list.innerHTML = '';
|
||||
|
||||
const bounds = [];
|
||||
|
||||
d.rows.forEach((o, i) => {
|
||||
if(o.lat && o.lng){
|
||||
const icon = tipIcons[o.tip] || '📍';
|
||||
const m = L.marker([o.lat, o.lng], {
|
||||
title: o.naziv,
|
||||
icon: L.divIcon({
|
||||
html: `<div style="background:#1a73e8;color:#fff;padding:2px 5px;border-radius:50%;border:2px solid #fff;box-shadow:0 2px 4px rgba(0,0,0,.5);font-size:14px;width:30px;height:30px;display:flex;align-items:center;justify-content:center">${icon}</div>`,
|
||||
className: '', iconSize: [30, 30], iconAnchor: [15, 15]
|
||||
})
|
||||
});
|
||||
m.bindPopup(`
|
||||
<div class="popup-title">${o.naziv}</div>
|
||||
<div class="popup-meta">
|
||||
${icon} ${o.tip} · ${o.grad || ''}
|
||||
${o.kapacitet ? ' · ' + o.kapacitet + ' mjesta' : ''}
|
||||
</div>
|
||||
${o.adresa ? '<div class="popup-meta">📍 ' + o.adresa + '</div>' : ''}
|
||||
${o.upravitelj ? '<div class="popup-meta">👤 ' + o.upravitelj + '</div>' : ''}
|
||||
${o.izgradeno ? '<div class="popup-meta">🏗 Izgrađeno: ' + o.izgradeno + '</div>' : ''}
|
||||
${o.sportovi && o.sportovi.length ? '<div class="popup-meta">⚽ ' + o.sportovi.join(', ') + '</div>' : ''}
|
||||
${o.web ? '<a href="' + o.web + '" target="_blank" class="popup-link">🌐 Web</a>' : ''}
|
||||
<a href="https://www.google.com/maps?q=${o.lat},${o.lng}" target="_blank" class="popup-link">🗺️ Google Maps</a>
|
||||
`, {maxWidth: 320});
|
||||
markers.addLayer(m);
|
||||
bounds.push([o.lat, o.lng]);
|
||||
}
|
||||
|
||||
list.innerHTML += `
|
||||
<div class="obj-item" onclick="zoomTo(${o.lat||0}, ${o.lng||0})">
|
||||
<div class="obj-name">${tipIcons[o.tip]||'•'} ${o.naziv}</div>
|
||||
<div class="obj-meta">
|
||||
<span>${o.tip}</span>
|
||||
${o.grad ? '<span>'+o.grad+'</span>' : ''}
|
||||
${o.kapacitet ? '<span>'+o.kapacitet+' mj</span>' : ''}
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
});
|
||||
|
||||
if(bounds.length > 0){
|
||||
map.fitBounds(bounds, {padding: [40, 40], maxZoom: 13});
|
||||
}
|
||||
}
|
||||
|
||||
function zoomTo(lat, lng){
|
||||
if(lat && lng){
|
||||
map.setView([lat, lng], 16);
|
||||
markers.eachLayer(m => {
|
||||
if(m.getLatLng().lat === lat) m.openPopup();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
loadMeta().then(() => loadObjekti());
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user