#!/usr/bin/env python3 """ swarm_monitor.py — Watch CC agents, detect stuck/idle, dispatch unblock prompts. """ import subprocess, time, re, json from datetime import datetime from pathlib import Path LOG = Path("/var/log/pgz-sport-debug/swarm_monitor.log") SESSIONS = ['cc1', 'cc2', 'cc3', 'cc4', 'cc5', 'cc6', 'cc-di'] # Stuck patterns STUCK_PATTERNS = [ (r"Could not load extra usage status", "session_expired"), (r"You've used \d+% of your weekly limit", "near_limit"), (r"Stop and wait for limit to reset", "limit_prompt"), (r"Add funds to continue", "funds_prompt"), (r"Switch to Team plan", "plan_prompt"), (r"^❯\s*$", "idle"), # idle prompt with no input ] # Last seen for each session last_state = {} def log(msg): ts = datetime.now().isoformat(timespec='seconds') with open(LOG, "a") as f: f.write(f"[{ts}] {msg}\n") def capture(session): try: r = subprocess.run(['tmux', 'capture-pane', '-p', '-t', f'{session}:0'], capture_output=True, text=True, timeout=3) return r.stdout except Exception as e: return None def check(): for s in SESSIONS: text = capture(s) if text is None: continue last5 = '\n'.join(text.strip().split('\n')[-10:]) for pat, kind in STUCK_PATTERNS: if re.search(pat, last5, re.M): key = f"{s}:{kind}" # Don't spam — only log first time if last_state.get(key) != kind: last_state[key] = kind log(f"DETECT {s} = {kind}") # Telegram alert (rate-limited via auto_triage already) if kind in ('session_expired', 'limit_prompt'): try: subprocess.run([ "curl", "-s", "-X", "POST", "https://api.telegram.org/bot8535797835:AAFItT-92jzZ9NWFafLxn0dLa1_n2s-JE5Y/sendMessage", "-d", "chat_id=7969491558", "--data-urlencode", f"text=⚠️ CC agent {s} {kind} — needs attention" ], capture_output=True, timeout=5) except: pass break else: # Active state — clear flags for k in list(last_state.keys()): if k.startswith(f"{s}:"): del last_state[k] def main(): log("swarm_monitor starting") while True: try: check() except Exception as e: log(f"check fail: {e}") time.sleep(60) if __name__ == "__main__": main()