Crisis V6: panel expand + klub matching + ne-klub filter + samo_klubovi default
DB: - pgz_sport.potpore_nositelji.je_klub flag (false za RSS programs/savezi) - Re-match klub_id case-insensitive trim normalize Endpoint: - /api/dashboard/top-primatelji: samo_klubovi=True default Frontend: - sport2.html #panel/#dpanel: 70vw / 1100px max-width za HNS karijera - mobile responsive za panel
This commit is contained in:
@@ -0,0 +1,147 @@
|
||||
#!/usr/bin/env python3
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
# Fajl: sub1_hns_fix_and_extract.py | v1.0.0 | 05.05.2026
|
||||
# Lokacija: /opt/pgz-sport/scripts/sub1_hns_fix_and_extract.py
|
||||
# Autor: dradulic@outlook.com / damir@rinet.one
|
||||
# Svrha: SUB1 finalize — (a) rollback false positives,
|
||||
# (b) extract hns_klub_id iz već postojećeg source_url,
|
||||
# (c) verify presence preko HEAD i upsert.
|
||||
# ═══════════════════════════════════════════════════════════════════
|
||||
"""SUB1 fix-up: false-positive rollback + source_url-based extraction."""
|
||||
import os, re, sys, time, json, subprocess, urllib.request
|
||||
from datetime import datetime
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
DSN = os.getenv("RINET_DSN",
|
||||
"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password=R1net2026!SecureDB#v7")
|
||||
TG = os.getenv("TG_BOT_TOKEN", "8535797835:AAFItT-92jzZ9NWFafLxn0dLa1_n2s-JE5Y")
|
||||
TG_CHAT = os.getenv("TG_CHAT", "7969491558")
|
||||
UA = "PGZ-Sport-Bot/1.0 (+https://api.rinet.one/sport/; contact dradulic@outlook.com)"
|
||||
LOG_PATH = f"/var/log/pgz-sport-debug/sub1_fix_{datetime.now().strftime('%Y%m%d_%H%M')}.log"
|
||||
LOG = open(LOG_PATH, "a")
|
||||
|
||||
# False positives to ROLLBACK (cleared and marked not_found)
|
||||
FALSE_POS = {
|
||||
2572: "NK Hajduk Tovarnik (matched HNK Hajduk Split — different club)",
|
||||
600: "Ženski NK XXL Kraljevica (matched men's NK Kraljevica — wrong sex)",
|
||||
}
|
||||
|
||||
def log(msg, telegram=False):
|
||||
line = f"[{datetime.now().isoformat(timespec='seconds')}] {msg}"
|
||||
print(line, flush=True); LOG.write(line+"\n"); LOG.flush()
|
||||
if telegram:
|
||||
try:
|
||||
subprocess.run(["curl","-s","-X","POST",
|
||||
f"https://api.telegram.org/bot{TG}/sendMessage",
|
||||
"-d", f"chat_id={TG_CHAT}",
|
||||
"--data-urlencode", f"text={msg[:3500]}"],
|
||||
timeout=8, capture_output=True)
|
||||
except: pass
|
||||
|
||||
def http_head_or_get(url, timeout=12):
|
||||
"""Verify URL exists. Return (status, title)."""
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": UA})
|
||||
with urllib.request.urlopen(req, timeout=timeout) as r:
|
||||
html = r.read().decode("utf-8", errors="replace")
|
||||
m = re.search(r'<h1[^>]*>([^<]+)</h1>', html)
|
||||
title = m.group(1).strip() if m else None
|
||||
return r.status, title
|
||||
except urllib.error.HTTPError as e:
|
||||
return e.code, None
|
||||
except Exception as e:
|
||||
return 0, str(e)
|
||||
|
||||
URL_RE = re.compile(r'/klubovi/(\d+)/([a-z0-9-]*)/?')
|
||||
|
||||
def main():
|
||||
log(f"=== SUB1 fix start; log={LOG_PATH} ===")
|
||||
conn = psycopg2.connect(DSN); conn.autocommit = True
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
|
||||
# Phase 1: Rollback false positives
|
||||
rb = 0
|
||||
for kid, reason in FALSE_POS.items():
|
||||
cur.execute("""
|
||||
UPDATE pgz_sport.klubovi
|
||||
SET hns_klub_id = NULL,
|
||||
hns_slug = NULL,
|
||||
scrape_source = 'hns_not_found',
|
||||
last_scraped_at = now()
|
||||
WHERE id = %s
|
||||
""", (kid,))
|
||||
log(f" ROLLBACK [{kid}] — {reason}")
|
||||
rb += 1
|
||||
|
||||
# Phase 2: Extract hns_klub_id from existing source_url
|
||||
cur.execute("""
|
||||
SELECT id, naziv, source_url
|
||||
FROM pgz_sport.klubovi
|
||||
WHERE sport='nogomet' AND pgz_sufinanciran=true
|
||||
AND hns_klub_id IS NULL
|
||||
AND source_url ~ 'semafor\\.hns\\.family/klubovi/[0-9]+'
|
||||
ORDER BY id
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
log(f"Source-URL extraction candidates: {len(rows)}")
|
||||
|
||||
extracted = 0; verify_fail = 0
|
||||
for r in rows:
|
||||
kid, naziv, url = r['id'], r['naziv'], r['source_url']
|
||||
m = URL_RE.search(url)
|
||||
if not m:
|
||||
log(f" SKIP [{kid}] no match in {url}")
|
||||
continue
|
||||
hns_id = int(m.group(1))
|
||||
slug = m.group(2) or None
|
||||
# Verify
|
||||
verify_url = f"https://semafor.hns.family/klubovi/{hns_id}/"
|
||||
status, title = http_head_or_get(verify_url)
|
||||
time.sleep(0.8)
|
||||
if status != 200 or not title:
|
||||
log(f" VERIFY FAIL [{kid}] {naziv} -> {hns_id}: status={status} title={title}")
|
||||
verify_fail += 1
|
||||
continue
|
||||
# If slug missing, try inferring from title
|
||||
if not slug and title:
|
||||
slug = re.sub(r'[^a-z0-9]+', '-',
|
||||
title.lower()
|
||||
.replace('č','c').replace('ć','c').replace('š','s').replace('ž','z').replace('đ','d')
|
||||
).strip('-')
|
||||
canonical = f"https://semafor.hns.family/klubovi/{hns_id}/{slug}/" if slug else verify_url
|
||||
try:
|
||||
cur.execute("""
|
||||
UPDATE pgz_sport.klubovi
|
||||
SET hns_klub_id = %s,
|
||||
hns_slug = %s,
|
||||
source_url = %s,
|
||||
scrape_source = 'hns_semafor',
|
||||
last_scraped_at = now()
|
||||
WHERE id = %s
|
||||
""", (hns_id, slug, canonical, kid))
|
||||
log(f" EXTRACT [{kid}] {naziv} -> HNS {hns_id} '{title}' (slug={slug})")
|
||||
extracted += 1
|
||||
except Exception as e:
|
||||
log(f" UPDATE fail [{kid}]: {e}")
|
||||
|
||||
# Phase 3: Final stats
|
||||
cur.execute("""
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE hns_klub_id IS NOT NULL) AS mapped,
|
||||
COUNT(*) FILTER (WHERE hns_klub_id IS NULL AND scrape_source='hns_not_found') AS marked_nf,
|
||||
COUNT(*) FILTER (WHERE hns_klub_id IS NULL AND (scrape_source IS NULL OR scrape_source != 'hns_not_found')) AS untouched
|
||||
FROM pgz_sport.klubovi
|
||||
WHERE sport='nogomet' AND pgz_sufinanciran=true
|
||||
AND naziv !~* 'Malonogometni|Mini Nogomet|Mali Nogomet|Američkog|Plaž|Pijesku|HMNK|MNK|preteča|povijesn|1903|1906|1908|1917|1919|1926|1929'
|
||||
""")
|
||||
stats = cur.fetchone()
|
||||
log(f"=== Final state (real football, PGŽ priority): mapped={stats['mapped']}, marked_not_found={stats['marked_nf']}, untouched={stats['untouched']} ===")
|
||||
|
||||
msg = (f"SUB1 fix done: rollback={rb}, source_url-extracted={extracted}, "
|
||||
f"verify_fail={verify_fail}. Final mapped={stats['mapped']} / "
|
||||
f"not_found={stats['marked_nf']} / untouched={stats['untouched']}")
|
||||
log(msg, telegram=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user