#!/usr/bin/env python3 """ Agent Health Check - Proactive Monitoring Runs every 4 hours to detect agent issues before they impact business """ import json import subprocess import sys from datetime import datetime from pathlib import Path TELEGRAM_TARGET = "telegram:8269921691" ERROR_THRESHOLD = 3 # Alert if consecutive errors > this def run_command(cmd): """Run shell command and return output""" result = subprocess.run(cmd, shell=True, capture_output=True, text=True) return result.stdout def send_alert(message): """Send Telegram alert""" cmd = f'openclaw message send --channel telegram --target "{TELEGRAM_TARGET}" --message "{message}"' subprocess.run(cmd, shell=True, capture_output=True) def check_agent_health(): """Check all cron jobs and identify issues""" output = run_command("openclaw cron list") lines = output.strip().split('\n')[1:] # Skip header issues = [] operational = [] for line in lines: if not line.strip(): continue parts = line.split() if len(parts) < 8: continue job_id = parts[0] name = parts[1] schedule = parts[2] status = parts[7] # Get detailed info for this job detail_output = run_command(f"openclaw cron list --json") job_info = { 'id': job_id, 'name': name, 'schedule': schedule, 'status': status, } if status == 'error': issues.append(job_info) else: operational.append(job_info) return operational, issues def generate_report(): """Generate health report and alert if needed""" operational, issues = check_agent_health() report = f"🔔 *AGENT HEALTH CHECK* - {datetime.now().strftime('%I:%M %p')}\n\n" report += f"✅ Operational: {len(operational)}\n" report += f"⚠️ Issues: {len(issues)}\n\n" if issues: report += "*Issues Detected:*\n" for issue in issues: report += f"• {issue['name']} ({issue['status']})\n" report += "\n_Reviewing details..._" else: report += "All agents operational! ✅\n" # Send alert if issues detected if issues: send_alert(report) return len(issues) if __name__ == "__main__": issue_count = generate_report() sys.exit(0 if issue_count == 0 else 1)