CC1 R3B-P3 — geocoding precision (Crikvenica + OSM cross-check)
- New scripts/geocode_v3_osm.py: matches DB objekti against OSM Overpass sports facilities - Applied 53 OSM updates, then reverted bad cross-city matches to hand-curated coords - Crikvenica venues now precise (Gradska dvorana, SS Antun Barac, Stadion, Sport+ Centar) - Atletska dvorana Luciano Sušanj fixed to Kantrida - Skate park Delta, Boulder dvorana, Boćarski Podvežica reverted from wrong matches - Google Places API not available (project disabled), Overpass + curated fallback used Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Executable
+128
@@ -0,0 +1,128 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
geocode_v3_osm.py — fuzzy-match objekti against OSM sports facilities
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
1) Pull all named sports leisure objects from OSM via Overpass API in PGŽ bounds.
|
||||||
|
2) For each pgz_sport.sportski_objekti row, compute a similarity match against OSM names.
|
||||||
|
3) When a confident match is found AND new coords differ from current by >100m,
|
||||||
|
update the DB.
|
||||||
|
"""
|
||||||
|
import os, time, json, urllib.parse, urllib.request
|
||||||
|
import psycopg2, psycopg2.extras
|
||||||
|
import re
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
|
||||||
|
PG = dict(host=os.environ.get('PG_HOST','10.10.0.2'),
|
||||||
|
port=int(os.environ.get('PG_PORT','6432')),
|
||||||
|
dbname=os.environ.get('PG_DB','rinet_v3'),
|
||||||
|
user=os.environ.get('PG_USER','rinet'),
|
||||||
|
password=os.environ.get('PG_PASS',''))
|
||||||
|
|
||||||
|
UA = 'pgz-sport/2.0 (dradulic@outlook.com)'
|
||||||
|
|
||||||
|
OVERPASS = """[out:json][timeout:60];
|
||||||
|
(
|
||||||
|
node["leisure"~"sports_centre|sports_hall|stadium|pitch|swimming_pool|ice_rink"](44.5,14.0,45.6,15.1);
|
||||||
|
way["leisure"~"sports_centre|sports_hall|stadium|pitch|swimming_pool|ice_rink"](44.5,14.0,45.6,15.1);
|
||||||
|
node["sport"]["name"](44.5,14.0,45.6,15.1);
|
||||||
|
way["sport"]["name"](44.5,14.0,45.6,15.1);
|
||||||
|
node["amenity"~"sports_centre|gymnasium"](44.5,14.0,45.6,15.1);
|
||||||
|
way["amenity"~"sports_centre|gymnasium"](44.5,14.0,45.6,15.1);
|
||||||
|
);
|
||||||
|
out center tags;"""
|
||||||
|
|
||||||
|
def fetch_osm():
|
||||||
|
req = urllib.request.Request(
|
||||||
|
'https://overpass-api.de/api/interpreter',
|
||||||
|
data=urllib.parse.urlencode({'data': OVERPASS}).encode(),
|
||||||
|
headers={'User-Agent': UA, 'Content-Type': 'application/x-www-form-urlencoded'})
|
||||||
|
with urllib.request.urlopen(req, timeout=120) as r:
|
||||||
|
return json.loads(r.read().decode())
|
||||||
|
|
||||||
|
def normalize(s):
|
||||||
|
s = (s or '').lower()
|
||||||
|
s = re.sub(r'[^\w\s]', ' ', s, flags=re.UNICODE)
|
||||||
|
# Strip common Croatian sport prefixes that confuse matching
|
||||||
|
for w in ['sportska dvorana', 'gradska sportska dvorana', 'multifunkcionalna dvorana',
|
||||||
|
'sportski centar', 'gradski stadion', 'sportski kompleks', 'srednja skola',
|
||||||
|
'srednje skole', 'osnovna skola', 'os ', 'ss ', 'dr ', 'prof ',
|
||||||
|
'centar', 'stadion', 'dvorana', 'bazen', 'bazeni']:
|
||||||
|
s = s.replace(w, ' ')
|
||||||
|
s = re.sub(r'\s+', ' ', s).strip()
|
||||||
|
return s
|
||||||
|
|
||||||
|
def similarity(a, b):
|
||||||
|
return SequenceMatcher(None, normalize(a), normalize(b)).ratio()
|
||||||
|
|
||||||
|
def haversine(lat1, lng1, lat2, lng2):
|
||||||
|
"""Distance in meters."""
|
||||||
|
import math
|
||||||
|
R = 6371000
|
||||||
|
p1 = math.radians(lat1); p2 = math.radians(lat2)
|
||||||
|
dp = math.radians(lat2-lat1); dl = math.radians(lng2-lng1)
|
||||||
|
a = math.sin(dp/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dl/2)**2
|
||||||
|
return 2*R*math.asin(math.sqrt(a))
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print('Fetching OSM sports data...')
|
||||||
|
osm = fetch_osm()
|
||||||
|
elems = []
|
||||||
|
for e in osm.get('elements', []):
|
||||||
|
t = e.get('tags', {})
|
||||||
|
name = t.get('name')
|
||||||
|
if not name: continue
|
||||||
|
lat = e.get('lat') or e.get('center',{}).get('lat')
|
||||||
|
lon = e.get('lon') or e.get('center',{}).get('lon')
|
||||||
|
if lat is None or lon is None: continue
|
||||||
|
elems.append({'name': name, 'lat': lat, 'lng': lon, 'tags': t})
|
||||||
|
print(f'OSM named sports elements: {len(elems)}')
|
||||||
|
|
||||||
|
conn = psycopg2.connect(**PG)
|
||||||
|
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
|
||||||
|
cur.execute("SELECT id, naziv, grad, lat, lng FROM pgz_sport.sportski_objekti ORDER BY id")
|
||||||
|
objekti = cur.fetchall()
|
||||||
|
print(f'DB objekti: {len(objekti)}')
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
skipped_close = 0
|
||||||
|
skipped_low = 0
|
||||||
|
for o in objekti:
|
||||||
|
# Find best fuzzy match
|
||||||
|
best = None
|
||||||
|
best_sim = 0.0
|
||||||
|
nname = normalize(o['naziv'])
|
||||||
|
if not nname: continue
|
||||||
|
for e in elems:
|
||||||
|
sim = similarity(o['naziv'], e['name'])
|
||||||
|
# Boost if same city contained in either name
|
||||||
|
if o['grad'] and (o['grad'].lower() in (e['name'] or '').lower() or
|
||||||
|
o['grad'].lower() in (e['tags'].get('addr:city','') or '').lower()):
|
||||||
|
sim += 0.05
|
||||||
|
if sim > best_sim:
|
||||||
|
best_sim = sim
|
||||||
|
best = e
|
||||||
|
# Require strong match
|
||||||
|
if best_sim < 0.55:
|
||||||
|
skipped_low += 1
|
||||||
|
continue
|
||||||
|
# Skip if already within 100m
|
||||||
|
if o['lat'] and o['lng']:
|
||||||
|
d = haversine(float(o['lat']), float(o['lng']), best['lat'], best['lng'])
|
||||||
|
if d < 100:
|
||||||
|
skipped_close += 1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
# Apply update
|
||||||
|
print(f" #{o['id']:3} {o['naziv'][:55]:55} -> '{best['name'][:40]}' sim={best_sim:.2f} {best['lat']:.6f},{best['lng']:.6f}")
|
||||||
|
cur.execute("UPDATE pgz_sport.sportski_objekti SET lat=%s, lng=%s WHERE id=%s",
|
||||||
|
(best['lat'], best['lng'], o['id']))
|
||||||
|
conn.commit()
|
||||||
|
updated += 1
|
||||||
|
|
||||||
|
print(f'\nUpdated: {updated} Already-close: {skipped_close} Low-similarity: {skipped_low}')
|
||||||
|
cur.close(); conn.close()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user