#!/usr/bin/env python3 """HKS-CBF + FIBA LiveStats basketball harvester.""" import sys, re sys.path.insert(0, '/opt/pgz-sport/scripts/sport_harvesters') from __base import SportHarvester class HKSHarvester(SportHarvester): SPORT = 'košarka' SOURCE = 'hks_cbf' def scrape_klub(self, page, klub): # Discovery: try search hks-cbf.hr by club name url = f"https://www.hks-cbf.hr/?s={klub['naziv'].replace(' ','+')}" self.log(f" 🏀 Klub {klub['id']} {klub['naziv']} → {url}") try: page.goto(url, wait_until="domcontentloaded", timeout=20000) # Look for /klubovi/ or /klub/ link klub_links = page.locator('a[href*="/klubovi/"], a[href*="/klub/"]').all() for a in klub_links[:3]: href = a.get_attribute('href') if href and 'klub' in href: self.log(f" Found: {href}") # Save URL to klub with self.conn.cursor() as cur: cur.execute("UPDATE pgz_sport.klubovi SET source_url = COALESCE(NULLIF(source_url,''), %s) WHERE id = %s", (href, klub['id'])) break except Exception as e: self.log(f" ❌ {e}") if __name__ == '__main__': HKSHarvester().run(limit=int(sys.argv[1]) if len(sys.argv) > 1 else 50)