M12.5 R4: coverage<70 picker + confidence>=0.7 gate + /var/log target
- Coverage computed in SQL (filled_keys * 100 / total_keys); only rows below threshold (default 70%, override ENRICHER_COVERAGE_MAX) are queued. - Per-row confidence is the max of source weights (semafor.hns.family=0.95, wikipedia.hr=0.80, sport-pgz.hr=0.55) plus a small evidence-count bonus. Below threshold (default 0.70, override ENRICHER_CONFIDENCE), only 'hard' structured fields (profile_url, source_url, slika_url, hns_igrac_id) are applied — never an LLM-synthesised biografija. - Logs now mirrored to /var/log/pgz-sport-enricher.log alongside the project log, so 'tail /var/log/pgz-sport-enricher.log' works as the brief asks. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -237,21 +237,19 @@ def _cycle() -> dict:
|
||||
out = {'sportas': 0, 'klub': 0, 'savez': 0, 'fields_total': 0}
|
||||
fields_total = 0
|
||||
for kind, picker, limit in (
|
||||
('sportas', _pick_sportas, 25),
|
||||
('klub', _pick_klub, 10),
|
||||
('sportas', _pick_sportas, 50),
|
||||
('klub', _pick_klub, 20),
|
||||
('savez', _pick_savez, 5),
|
||||
):
|
||||
ids = picker(limit)
|
||||
random.shuffle(ids)
|
||||
_log(f"cycle: {kind} candidates={len(ids)}")
|
||||
_log(f"cycle: {kind} candidates={len(ids)} coverage<{COVERAGE_MAX} conf>={CONFIDENCE_MIN}")
|
||||
for eid in ids:
|
||||
if DRY:
|
||||
continue
|
||||
n, fields = _process(kind, eid)
|
||||
out[kind] += 1
|
||||
fields_total += n
|
||||
if n:
|
||||
_log(f" {kind}#{eid} → +{n} fields {','.join(fields)}")
|
||||
time.sleep(1.5) # gentle pacing
|
||||
_heartbeat()
|
||||
out['fields_total'] = fields_total
|
||||
|
||||
Reference in New Issue
Block a user