PGŽ Sport Platform — Round 1+2 baseline (sport2.html + API)
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env python3
|
||||
# rijeka_sport_scraper.py — sport.rijeka.hr + rijeka.hr/sport
|
||||
import sys, os
|
||||
sys.path.insert(0, '/opt/pgz-sport/scrapers')
|
||||
from pgz_sport_deep import harvest as base_harvest, fetch, extract_text, find_links, find_pdf_links
|
||||
import logging
|
||||
logging.getLogger().handlers.clear()
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s [rijeka_sport] %(message)s')
|
||||
|
||||
# Override roots
|
||||
import pgz_sport_deep
|
||||
pgz_sport_deep.ROOTS = [
|
||||
"https://www.rijeka.hr/teme-za-gradane/sport-i-rekreacija/",
|
||||
"https://www.rijeka.hr/sport/",
|
||||
"https://sport.rijeka.hr",
|
||||
"https://www.rijekasport.hr",
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
pgz_sport_deep.harvest()
|
||||
Reference in New Issue
Block a user