PGŽ Sport Platform — Round 1+2 baseline (sport2.html + API)

This commit is contained in:
Damir Radulić
2026-05-04 23:39:08 +02:00
commit a7ec0a86be
1820 changed files with 694455 additions and 0 deletions
+20
View File
@@ -0,0 +1,20 @@
#!/usr/bin/env python3
# rijeka_sport_scraper.py — sport.rijeka.hr + rijeka.hr/sport
import sys, os
sys.path.insert(0, '/opt/pgz-sport/scrapers')
from pgz_sport_deep import harvest as base_harvest, fetch, extract_text, find_links, find_pdf_links
import logging
logging.getLogger().handlers.clear()
logging.basicConfig(level=logging.INFO, format='%(asctime)s [rijeka_sport] %(message)s')
# Override roots
import pgz_sport_deep
pgz_sport_deep.ROOTS = [
"https://www.rijeka.hr/teme-za-gradane/sport-i-rekreacija/",
"https://www.rijeka.hr/sport/",
"https://sport.rijeka.hr",
"https://www.rijekasport.hr",
]
if __name__ == "__main__":
pgz_sport_deep.harvest()