PGŽ Sport Platform — Round 1+2 baseline (sport2.html + API)
This commit is contained in:
Executable
+193
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
geocode_objekti_v2.py — precision geocoding for pgz_sport.sportski_objekti
|
||||
|
||||
Re-geocodes all objects via Nominatim using {naziv} + {grad} + ", Hrvatska" queries.
|
||||
Verifies result is within PGŽ bounds (44.5-45.6, 14.0-15.1) and NOT a duplicated
|
||||
"city centroid" (where multiple objects share identical coordinates from a previous
|
||||
fallback pass). Updates lat/lng only when a more precise result is found.
|
||||
|
||||
Usage: python3 geocode_objekti_v2.py [--dry-run] [--only-duplicates]
|
||||
"""
|
||||
import os, sys, time, json, urllib.parse, argparse
|
||||
import urllib.request
|
||||
import psycopg2
|
||||
|
||||
PG = dict(host=os.environ.get('PG_HOST','10.10.0.2'),
|
||||
port=int(os.environ.get('PG_PORT','6432')),
|
||||
dbname=os.environ.get('PG_DB','rinet_v3'),
|
||||
user=os.environ.get('PG_USER','rinet'),
|
||||
password=os.environ.get('PG_PASS',''))
|
||||
|
||||
PGZ_LAT = (44.5, 45.6)
|
||||
PGZ_LNG = (14.0, 15.1)
|
||||
|
||||
UA = 'pgz-sport/2.0 (dradulic@outlook.com)'
|
||||
|
||||
def nominatim(q, country='hr', limit=3):
|
||||
url = ('https://nominatim.openstreetmap.org/search?'
|
||||
'q='+urllib.parse.quote(q)+
|
||||
'&format=json&limit='+str(limit)+
|
||||
'&countrycodes='+country+
|
||||
'&addressdetails=1')
|
||||
req = urllib.request.Request(url, headers={'User-Agent': UA})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as r:
|
||||
return json.loads(r.read().decode())
|
||||
except Exception as e:
|
||||
print(f' ! nominatim error: {e}')
|
||||
return []
|
||||
|
||||
def in_pgz(lat, lng):
|
||||
return PGZ_LAT[0] <= lat <= PGZ_LAT[1] and PGZ_LNG[0] <= lng <= PGZ_LNG[1]
|
||||
|
||||
def best_result(results):
|
||||
"""Pick best precision: prefer leisure/sports types, then building, then place."""
|
||||
if not results:
|
||||
return None
|
||||
type_priority = {
|
||||
'sports_centre': 100, 'stadium': 95, 'pitch': 90, 'swimming_pool': 90,
|
||||
'sports_hall': 95, 'leisure': 80, 'building': 70, 'tourism': 60,
|
||||
'highway': 30, 'place': 20,
|
||||
}
|
||||
best = None
|
||||
best_score = -1
|
||||
for r in results:
|
||||
try:
|
||||
lat = float(r['lat']); lng = float(r['lon'])
|
||||
except (KeyError, ValueError):
|
||||
continue
|
||||
if not in_pgz(lat, lng):
|
||||
continue
|
||||
cls = r.get('class','')
|
||||
typ = r.get('type','')
|
||||
# importance is Nominatim's intrinsic relevance score
|
||||
importance = float(r.get('importance', 0))
|
||||
score = type_priority.get(typ, type_priority.get(cls, 50)) + importance*10
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = (lat, lng, r)
|
||||
return best
|
||||
|
||||
def queries_for(naziv, grad, adresa):
|
||||
"""Generate ordered queries from most specific to most general."""
|
||||
qs = []
|
||||
n = (naziv or '').strip()
|
||||
g = (grad or '').strip()
|
||||
a = (adresa or '').strip()
|
||||
if a and g:
|
||||
qs.append(f'{a}, {g}, Hrvatska')
|
||||
if n and g:
|
||||
qs.append(f'{n}, {g}, Hrvatska')
|
||||
# Strip common prefixes for a cleaner search
|
||||
short = n
|
||||
for prefix in ('Sportska dvorana ', 'Gradska sportska dvorana ',
|
||||
'Multifunkcionalna dvorana za sport i turizam ',
|
||||
'Stadion ', 'Bazen ', 'Bazeni ', 'Dvorana ',
|
||||
'Boćalište ', 'Kuglana ', 'Marina '):
|
||||
if short.startswith(prefix):
|
||||
short = short[len(prefix):].strip()
|
||||
break
|
||||
if short and short != n and g:
|
||||
qs.append(f'{short}, {g}, Hrvatska')
|
||||
if n:
|
||||
qs.append(f'{n}, Hrvatska')
|
||||
if g and a:
|
||||
qs.append(f'{a}, {g}')
|
||||
# dedup preserving order
|
||||
seen = set(); out = []
|
||||
for q in qs:
|
||||
if q not in seen:
|
||||
seen.add(q); out.append(q)
|
||||
return out
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument('--dry-run', action='store_true')
|
||||
ap.add_argument('--only-duplicates', action='store_true',
|
||||
help='only re-geocode objects sharing coordinates with another object')
|
||||
ap.add_argument('--id', type=int, help='single object ID to re-geocode')
|
||||
args = ap.parse_args()
|
||||
|
||||
conn = psycopg2.connect(**PG)
|
||||
cur = conn.cursor()
|
||||
|
||||
if args.id:
|
||||
cur.execute("SELECT id, naziv, grad, adresa, lat, lng FROM pgz_sport.sportski_objekti WHERE id=%s", (args.id,))
|
||||
elif args.only_duplicates:
|
||||
cur.execute("""
|
||||
WITH dup AS (
|
||||
SELECT lat, lng FROM pgz_sport.sportski_objekti
|
||||
WHERE lat IS NOT NULL
|
||||
GROUP BY lat, lng HAVING count(*)>1
|
||||
)
|
||||
SELECT s.id, s.naziv, s.grad, s.adresa, s.lat, s.lng
|
||||
FROM pgz_sport.sportski_objekti s
|
||||
JOIN dup d USING (lat, lng)
|
||||
ORDER BY s.id
|
||||
""")
|
||||
else:
|
||||
cur.execute("SELECT id, naziv, grad, adresa, lat, lng FROM pgz_sport.sportski_objekti ORDER BY id")
|
||||
rows = cur.fetchall()
|
||||
print(f'== Processing {len(rows)} objects (dry_run={args.dry_run}) ==')
|
||||
|
||||
updated = 0
|
||||
skipped = 0
|
||||
failed = []
|
||||
for i, (oid, naziv, grad, adresa, oldlat, oldlng) in enumerate(rows, 1):
|
||||
print(f'[{i}/{len(rows)}] #{oid} {naziv} ({grad}) — current: {oldlat},{oldlng}')
|
||||
new_pos = None
|
||||
for q in queries_for(naziv, grad, adresa):
|
||||
results = nominatim(q)
|
||||
time.sleep(1.05) # Nominatim 1 req/s policy
|
||||
best = best_result(results)
|
||||
if best:
|
||||
lat, lng, raw = best
|
||||
# Skip queries that just resolve to a place/town center
|
||||
if raw.get('class') == 'place' and raw.get('type') in ('city','town','village','suburb','locality'):
|
||||
print(f' "{q}" -> {raw.get("display_name","")[:60]} (place type, skip)')
|
||||
continue
|
||||
print(f' "{q}" -> {lat},{lng} [{raw.get("class")}/{raw.get("type")}]')
|
||||
new_pos = (lat, lng, q)
|
||||
break
|
||||
else:
|
||||
print(f' "{q}" -> no result in PGŽ bounds')
|
||||
|
||||
if not new_pos:
|
||||
failed.append((oid, naziv, grad))
|
||||
print(' ✗ no precise match found')
|
||||
continue
|
||||
|
||||
nlat, nlng, nq = new_pos
|
||||
# Detect meaningful change (>50m). 0.0005° ≈ 55m at this latitude.
|
||||
if oldlat is not None and oldlng is not None:
|
||||
dlat = abs(float(oldlat) - nlat)
|
||||
dlng = abs(float(oldlng) - nlng)
|
||||
if dlat < 0.0005 and dlng < 0.0005:
|
||||
print(f' = unchanged (within 50m)')
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
print(f' [DRY] would UPDATE id={oid} -> {nlat},{nlng}')
|
||||
else:
|
||||
cur.execute("""
|
||||
UPDATE pgz_sport.sportski_objekti
|
||||
SET lat=%s, lng=%s
|
||||
WHERE id=%s
|
||||
""", (nlat, nlng, oid))
|
||||
conn.commit()
|
||||
print(f' ✓ UPDATED -> {nlat},{nlng}')
|
||||
updated += 1
|
||||
|
||||
print('')
|
||||
print(f'== Summary: {updated} updated, {skipped} unchanged, {len(failed)} failed ==')
|
||||
if failed:
|
||||
print('Failed:')
|
||||
for oid, n, g in failed:
|
||||
print(f' #{oid} {n} ({g})')
|
||||
|
||||
cur.close(); conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user