Files
damir 007825acee Bug hunt V7:
DB:
- Aggressive je_klub=false flag for programs/treninzi/totals (>100K€ no klub_id)
- 53 ne-klubovi flagged false (RSS Rijeka ukupni, Stručni rad, Potpora loptačkim, etc)

Frontend (sport2.html):
- Panel back button (← Natrag) + history stack
- window._panelHistory + pushPanelState + panelBack functions
- closePanel resets history
2026-05-05 14:56:53 +02:00

74 lines
2.5 KiB
Python

#!/usr/bin/env python3
"""Lokalni news RSS PGZ."""
import sys, json, time, re
sys.path.insert(0, "/opt/pgz-sport/scrapers/harvesters")
from _common import (fetch, extract_text, chunk_text, upsert_facts, DSN)
from html import unescape
import psycopg2
FEEDS = [
("novi_list", "https://www.novilist.hr/rss/rijeka.xml"),
("novi_list_pgz", "https://www.novilist.hr/rss/regija.xml"),
("rijeka_danas", "https://rijekadanas.com/feed/"),
("rijeka_in", "https://rijekain.hr/feed/"),
("primorske_novice","https://primorskenovice.hr/feed/"),
("kvarner_news", "https://www.kvarner.news/feed/"),
("oradio", "https://otvoreniradio.hr/rss/sve.xml"),
("rijeka_today", "https://www.rijekatoday.com/feed/"),
]
def parse_rss(xml):
items = []
for m in re.finditer(r"<item>(.*?)</item>", xml, re.S | re.I):
item = m.group(1)
def grab(tag):
mt = re.search(f"<{tag}[^>]*>(.*?)</{tag}>", item, re.S | re.I)
if mt:
t = mt.group(1)
t = re.sub(r"<!\[CDATA\[(.*?)\]\]>", r"\1", t, flags=re.S)
t = re.sub(r"<[^>]+>", " ", t)
return unescape(re.sub(r"\s+", " ", t).strip())
return ""
items.append({"title": grab("title"), "link": grab("link"),
"description": grab("description"), "pubDate": grab("pubDate")})
return items
def main():
conn = psycopg2.connect(DSN); conn.autocommit = True
total_inserted = 0
for portal, url in FEEDS:
xml, status = fetch(url, timeout=15)
if not xml:
print(f" {portal:20} fetch FAIL")
continue
items = parse_rss(xml)
if not items:
print(f" {portal:20} parse 0 items")
continue
ff = []
for it in items:
title = it.get("title", "")
desc = it.get("description", "")
if not title and not desc: continue
fact = f"{title} - {desc[:400]}".strip()
if len(fact) < 30: continue
ff.append({"fact": fact, "url": it.get("link", ""), "title": title})
n = upsert_facts(conn, ff, source_name=f"news_{portal}",
category="news_pgz_rss", confidence=0.84)
total_inserted += n
print(f" {portal:20} items={len(items):>3} inserted={n:>3}")
time.sleep(1)
conn.close()
print(f"=== TOTAL inserted: {total_inserted} ===")
print(json.dumps({"feeds": len(FEEDS), "inserted": total_inserted}))
if __name__ == "__main__":
main()