Files
pgz-sport/scripts/hns_avatar_harvester.py

68 lines
2.6 KiB
Python

#!/usr/bin/env python3
from dotenv import load_dotenv
load_dotenv('/opt/rinet-gpu/.env.master')
# auto-added by patch_scrapers_with_dotenv.sh
# Fajl: hns_avatar_harvester.py | v1.0 | 05.05.2026
# Author: Damir Radulić
# Lokacija: /opt/pgz-sport/scripts/hns_avatar_harvester.py
# Svrha: Dohvati avatar URL za svakog igrača sa HNS profila
import os, time, re, json, sys
import psycopg2
import requests
from bs4 import BeautifulSoup
DSN = os.environ.get("RINET_DSN", f"host=10.10.0.2 port=6432 dbname=rinet_v3 user=rinet password={os.environ['DB_PASSWORD']}")
HEADERS = {"User-Agent": "Mozilla/5.0 (Ri.NET PGŽ Sport Bot)"}
conn = psycopg2.connect(DSN); conn.autocommit = True
def fetch_avatar(hns_id, slug=""):
url = f"https://semafor.hns.family/igraci/{hns_id}/"
if slug: url += f"{slug}/"
try:
r = requests.get(url, headers=HEADERS, timeout=15)
if r.status_code != 200: return None
soup = BeautifulSoup(r.text, "html.parser")
# Player photo selectors
for sel in [".playerPhoto img", ".player-photo img", ".playerHeader img", "img.player_photo"]:
img = soup.select_one(sel)
if img and img.get("src"):
src = img["src"]
if src.startswith("/"): src = "https://hns.family" + src
return src
# Generic: first img inside header
hdr = soup.select_one(".playerHeader, .player-header, .basic_info")
if hdr:
img = hdr.find("img")
if img and img.get("src"):
src = img["src"]
if src.startswith("/"): src = "https://hns.family" + src
return src
return None
except Exception as e:
return None
with conn.cursor() as cur:
cur.execute("""
SELECT id, hns_igrac_id, ime, prezime
FROM pgz_sport.clanovi
WHERE hns_igrac_id IS NOT NULL AND foto_url IS NULL
LIMIT 200
""")
rows = cur.fetchall()
print(f"Total: {len(rows)} igrača za avatar fetch")
hits = 0
for i, (cid, hns_id, ime, prezime) in enumerate(rows):
slug = f"{ime}-{prezime}".lower().replace("ć","c").replace("č","c").replace("š","s").replace("ž","z").replace("đ","d").replace(" ","-")
slug = re.sub(r"[^a-z0-9-]", "", slug)
avatar = fetch_avatar(hns_id, slug)
if avatar:
with conn.cursor() as cur:
cur.execute("UPDATE pgz_sport.clanovi SET foto_url=%s WHERE id=%s", (avatar, cid))
hits += 1
if i % 10 == 0: print(f" [{i+1}/{len(rows)}] {ime} {prezime}{avatar[:80]}")
time.sleep(0.5)
print(f"\nDONE: {hits}/{len(rows)} avatar URL-ova spremljen")