diff --git a/pgz_sport_v2_router.py b/pgz_sport_v2_router.py index e764f13..5210ab8 100644 --- a/pgz_sport_v2_router.py +++ b/pgz_sport_v2_router.py @@ -2242,6 +2242,78 @@ def list_dokumenti(razina: Optional[str] = None, vrsta: Optional[str] = None, rows = db_query(sql, params) return {"count": len(rows), "results": rows} +@router.post("/dokumenti/upload") +async def upload_dokument( + file: UploadFile = File(...), + title: str = Form(...), + vrsta: str = Form("ostalo"), + razina: Optional[str] = Form(None), + organizacija: Optional[str] = Form(None), + sport: Optional[str] = Form(None), + izvor_url: Optional[str] = Form(None), + godina: Optional[int] = Form(None), + kratak_opis: Optional[str] = Form(None), + authorization: Optional[str] = Header(None), +): + """Upload novog dokumenta (PDF/DOCX/TXT) → spremi datoteku + DB row. + Vraća: {ok, dokument_id, fname, size, content_type}. + Tekstualni sadržaj se ekstrahira (pdftotext za PDF, raw za TXT).""" + import pathlib, subprocess as _sp, tempfile as _tf, hashlib as _hl + raw = await file.read() + if not raw: + raise HTTPException(400, "Prazna datoteka") + if len(raw) > 32 * 1024 * 1024: + raise HTTPException(400, "Datoteka prevelika (max 32 MB)") + + suf = ("." + (file.filename or "").rsplit(".", 1)[-1].lower()) if "." in (file.filename or "") else "" + if suf not in (".pdf", ".doc", ".docx", ".txt", ".rtf"): + raise HTTPException(400, f"Tip nije podržan: {suf}. Dozvoljeno: PDF/DOC/DOCX/TXT/RTF") + + out_dir = pathlib.Path("/opt/pgz-sport/_data/dokumenti_uploads") + out_dir.mkdir(parents=True, exist_ok=True) + sha = _hl.sha256(raw).hexdigest()[:12] + safe = re.sub(r"[^A-Za-z0-9._-]+", "_", file.filename or "upload")[:120] + fname = f"{int(time.time())}_{sha}_{safe}" + if not fname.endswith(suf): fname += suf + fpath = out_dir / fname + fpath.write_bytes(raw) + + # Tekst ekstrakcija (best-effort) + sadrzaj = "" + try: + if suf == ".pdf": + r = _sp.run(["pdftotext", "-layout", "-q", str(fpath), "-"], + capture_output=True, timeout=60) + sadrzaj = r.stdout.decode("utf-8", "ignore") + elif suf == ".txt": + sadrzaj = raw.decode("utf-8", "ignore") + # docx/rtf: best-effort, skip + except Exception: + pass + + row = db_one(""" + INSERT INTO pgz_sport.dokumenti + (title, kratak_opis, vrsta, razina, organizacija, sport, izvor_url, + godina, fname, sadrzaj, scraped_at, aktivan) + VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,now(),true) + RETURNING id, title, vrsta, razina, fname, length(sadrzaj) AS chars + """, (title, kratak_opis, vrsta, razina, organizacija, sport, izvor_url, + godina, fname, sadrzaj or None)) + + # Audit + try: + from erp.audit_helper import audit as _audit + _audit("pgz_sport.dokumenti", "upload", row["id"], + korisnik="api", field="title", + new=f"{title} ({len(raw)} B, sha={sha})") + except Exception: pass + + return {"ok": True, "dokument_id": row["id"], "fname": fname, + "title": row["title"], "vrsta": row["vrsta"], "chars": row["chars"], + "size": len(raw), "content_type": file.content_type, + "sha12": sha} + + @router.get("/dokumenti/by-razina") def dokumenti_grouped(): """Group po razini i vrsti — for dashboard.""" @@ -2251,9 +2323,10 @@ def dokumenti_grouped(): GROUP BY razina, vrsta ORDER BY razina, vrsta""") return {"count": len(rows), "results": rows} -@router.get("/dokumenti/{did:int}") -def get_dokument(did: int): - """Full dokument view with content.""" +@router.get("/dokumenti/{did:int}/full") +def get_dokument_full(did: int): + """Full dokument view + RAG chunks (renamed from duplicate /dokumenti/{did:int}). + Old route bila duplikat — sad je eksplicitno /full za bogatiji prikaz.""" d = db_one("""SELECT id, title AS naziv, kratak_opis, sadrzaj, vrsta, razina, organizacija, sport, sluzbeni_glasnik, izvor_url, pdf_url, kljucne_rijeci, izdano_datum, godina