21 lines
679 B
Python
21 lines
679 B
Python
#!/usr/bin/env python3
|
|
# rijeka_sport_scraper.py — sport.rijeka.hr + rijeka.hr/sport
|
|
import sys, os
|
|
sys.path.insert(0, '/opt/pgz-sport/scrapers')
|
|
from pgz_sport_deep import harvest as base_harvest, fetch, extract_text, find_links, find_pdf_links
|
|
import logging
|
|
logging.getLogger().handlers.clear()
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s [rijeka_sport] %(message)s')
|
|
|
|
# Override roots
|
|
import pgz_sport_deep
|
|
pgz_sport_deep.ROOTS = [
|
|
"https://www.rijeka.hr/teme-za-gradane/sport-i-rekreacija/",
|
|
"https://www.rijeka.hr/sport/",
|
|
"https://sport.rijeka.hr",
|
|
"https://www.rijekasport.hr",
|
|
]
|
|
|
|
if __name__ == "__main__":
|
|
pgz_sport_deep.harvest()
|