#!/usr/bin/env python3 """ geocode_v3_osm.py — fuzzy-match objekti against OSM sports facilities Strategy: 1) Pull all named sports leisure objects from OSM via Overpass API in PGŽ bounds. 2) For each pgz_sport.sportski_objekti row, compute a similarity match against OSM names. 3) When a confident match is found AND new coords differ from current by >100m, update the DB. """ import os, time, json, urllib.parse, urllib.request import psycopg2, psycopg2.extras import re from difflib import SequenceMatcher PG = dict(host=os.environ.get('PG_HOST','10.10.0.2'), port=int(os.environ.get('PG_PORT','6432')), dbname=os.environ.get('PG_DB','rinet_v3'), user=os.environ.get('PG_USER','rinet'), password=os.environ.get('PG_PASS','')) UA = 'pgz-sport/2.0 (dradulic@outlook.com)' OVERPASS = """[out:json][timeout:60]; ( node["leisure"~"sports_centre|sports_hall|stadium|pitch|swimming_pool|ice_rink"](44.5,14.0,45.6,15.1); way["leisure"~"sports_centre|sports_hall|stadium|pitch|swimming_pool|ice_rink"](44.5,14.0,45.6,15.1); node["sport"]["name"](44.5,14.0,45.6,15.1); way["sport"]["name"](44.5,14.0,45.6,15.1); node["amenity"~"sports_centre|gymnasium"](44.5,14.0,45.6,15.1); way["amenity"~"sports_centre|gymnasium"](44.5,14.0,45.6,15.1); ); out center tags;""" def fetch_osm(): req = urllib.request.Request( 'https://overpass-api.de/api/interpreter', data=urllib.parse.urlencode({'data': OVERPASS}).encode(), headers={'User-Agent': UA, 'Content-Type': 'application/x-www-form-urlencoded'}) with urllib.request.urlopen(req, timeout=120) as r: return json.loads(r.read().decode()) def normalize(s): s = (s or '').lower() s = re.sub(r'[^\w\s]', ' ', s, flags=re.UNICODE) # Strip common Croatian sport prefixes that confuse matching for w in ['sportska dvorana', 'gradska sportska dvorana', 'multifunkcionalna dvorana', 'sportski centar', 'gradski stadion', 'sportski kompleks', 'srednja skola', 'srednje skole', 'osnovna skola', 'os ', 'ss ', 'dr ', 'prof ', 'centar', 'stadion', 'dvorana', 'bazen', 'bazeni']: s = s.replace(w, ' ') s = re.sub(r'\s+', ' ', s).strip() return s def similarity(a, b): return SequenceMatcher(None, normalize(a), normalize(b)).ratio() def haversine(lat1, lng1, lat2, lng2): """Distance in meters.""" import math R = 6371000 p1 = math.radians(lat1); p2 = math.radians(lat2) dp = math.radians(lat2-lat1); dl = math.radians(lng2-lng1) a = math.sin(dp/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dl/2)**2 return 2*R*math.asin(math.sqrt(a)) def main(): print('Fetching OSM sports data...') osm = fetch_osm() elems = [] for e in osm.get('elements', []): t = e.get('tags', {}) name = t.get('name') if not name: continue lat = e.get('lat') or e.get('center',{}).get('lat') lon = e.get('lon') or e.get('center',{}).get('lon') if lat is None or lon is None: continue elems.append({'name': name, 'lat': lat, 'lng': lon, 'tags': t}) print(f'OSM named sports elements: {len(elems)}') conn = psycopg2.connect(**PG) cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) cur.execute("SELECT id, naziv, grad, lat, lng FROM pgz_sport.sportski_objekti ORDER BY id") objekti = cur.fetchall() print(f'DB objekti: {len(objekti)}') updated = 0 skipped_close = 0 skipped_low = 0 for o in objekti: # Find best fuzzy match best = None best_sim = 0.0 nname = normalize(o['naziv']) if not nname: continue for e in elems: sim = similarity(o['naziv'], e['name']) # Boost if same city contained in either name if o['grad'] and (o['grad'].lower() in (e['name'] or '').lower() or o['grad'].lower() in (e['tags'].get('addr:city','') or '').lower()): sim += 0.05 if sim > best_sim: best_sim = sim best = e # Require strong match if best_sim < 0.55: skipped_low += 1 continue # Skip if already within 100m if o['lat'] and o['lng']: d = haversine(float(o['lat']), float(o['lng']), best['lat'], best['lng']) if d < 100: skipped_close += 1 continue else: pass # Apply update print(f" #{o['id']:3} {o['naziv'][:55]:55} -> '{best['name'][:40]}' sim={best_sim:.2f} {best['lat']:.6f},{best['lng']:.6f}") cur.execute("UPDATE pgz_sport.sportski_objekti SET lat=%s, lng=%s WHERE id=%s", (best['lat'], best['lng'], o['id'])) conn.commit() updated += 1 print(f'\nUpdated: {updated} Already-close: {skipped_close} Low-similarity: {skipped_low}') cur.close(); conn.close() if __name__ == '__main__': main()