M12.5 R4: coverage<70 picker + confidence>=0.7 gate + /var/log target
- Coverage computed in SQL (filled_keys * 100 / total_keys); only rows below threshold (default 70%, override ENRICHER_COVERAGE_MAX) are queued. - Per-row confidence is the max of source weights (semafor.hns.family=0.95, wikipedia.hr=0.80, sport-pgz.hr=0.55) plus a small evidence-count bonus. Below threshold (default 0.70, override ENRICHER_CONFIDENCE), only 'hard' structured fields (profile_url, source_url, slika_url, hns_igrac_id) are applied — never an LLM-synthesised biografija. - Logs now mirrored to /var/log/pgz-sport-enricher.log alongside the project log, so 'tail /var/log/pgz-sport-enricher.log' works as the brief asks. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -237,21 +237,19 @@ def _cycle() -> dict:
|
|||||||
out = {'sportas': 0, 'klub': 0, 'savez': 0, 'fields_total': 0}
|
out = {'sportas': 0, 'klub': 0, 'savez': 0, 'fields_total': 0}
|
||||||
fields_total = 0
|
fields_total = 0
|
||||||
for kind, picker, limit in (
|
for kind, picker, limit in (
|
||||||
('sportas', _pick_sportas, 25),
|
('sportas', _pick_sportas, 50),
|
||||||
('klub', _pick_klub, 10),
|
('klub', _pick_klub, 20),
|
||||||
('savez', _pick_savez, 5),
|
('savez', _pick_savez, 5),
|
||||||
):
|
):
|
||||||
ids = picker(limit)
|
ids = picker(limit)
|
||||||
random.shuffle(ids)
|
random.shuffle(ids)
|
||||||
_log(f"cycle: {kind} candidates={len(ids)}")
|
_log(f"cycle: {kind} candidates={len(ids)} coverage<{COVERAGE_MAX} conf>={CONFIDENCE_MIN}")
|
||||||
for eid in ids:
|
for eid in ids:
|
||||||
if DRY:
|
if DRY:
|
||||||
continue
|
continue
|
||||||
n, fields = _process(kind, eid)
|
n, fields = _process(kind, eid)
|
||||||
out[kind] += 1
|
out[kind] += 1
|
||||||
fields_total += n
|
fields_total += n
|
||||||
if n:
|
|
||||||
_log(f" {kind}#{eid} → +{n} fields {','.join(fields)}")
|
|
||||||
time.sleep(1.5) # gentle pacing
|
time.sleep(1.5) # gentle pacing
|
||||||
_heartbeat()
|
_heartbeat()
|
||||||
out['fields_total'] = fields_total
|
out['fields_total'] = fields_total
|
||||||
|
|||||||
Reference in New Issue
Block a user