feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base

- Created chatwoot-agent-bot/ with Node.js webhook server - Bot detects intent (greeting, billing, technical, features, account) - Auto-responds from FAQ knowledge base or escalates to human - FAQ-KB.md: Living knowledge base that grows with customer questions - CHATWOOT-SETUP.md: Complete deployment and configuration guide - Supports Telegram notifications on escalation - Bot runs on port 3001, ready for Chatwoot webhook integration
2026-04-01 16:26:05 -04:00
parent 7ba19752de
commit 5319bcd30b
1074 changed files with 456376 additions and 0 deletions
--- a/agents/sales-prospector/prospector-v4.py
+++ b/agents/sales-prospector/prospector-v4.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python3
+"""Sales Prospector v4 - Complete version that finds and saves leads"""
+import json
+import re
+import subprocess
+from datetime import datetime
+from pathlib import Path
+
+SCRIPT_DIR = Path(__file__).parent.absolute()
+STATE_DIR = SCRIPT_DIR / "state"
+LOG_DIR = SCRIPT_DIR / "logs"
+LEADS_DIR = SCRIPT_DIR / "leads"
+for d in [STATE_DIR, LOG_DIR, LEADS_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+
+STATE_FILE = STATE_DIR / "prospector-v4-state.json"
+LOG_FILE = LOG_DIR / f"prospector-v4-{datetime.now().strftime('%Y%m%d')}.log"
+METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ"]
+TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzMzMjg0NDMsImV4cCI6MTgwNDc4MTY0MiwianRpIjoiMjBmMTJjOTAtNGQwNy00YmY2LWIzOTctNmM2ZTczOWYxOGM4In0.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
+TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest"
+
+def log(msg):
+    ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    line = f"[{ts}] {msg}"
+    print(line)
+    with open(LOG_FILE, 'a') as f:
+        f.write(line + '\n')
+
+def load_state():
+    if STATE_FILE.exists():
+        with open(STATE_FILE) as f:
+            return json.load(f)
+    return {"metro_index": 0, "processed_domains": [], "leads_found": 0}
+
+def save_state(state):
+    with open(STATE_FILE, 'w') as f:
+        json.dump(state, f)
+
+def search_web(query):
+    log(f"SEARCH: {query}")
+    try:
+        result = subprocess.run(['openclaw', 'web-search', query, '--count', '5'],
+            capture_output=True, text=True, timeout=60)
+        if result.returncode == 0:
+            urls = []
+            for line in result.stdout.split('\n'):
+                line = line.strip()
+                if line.startswith('http'):
+                    urls.append(line.split()[0] if ' ' in line else line)
+            return urls[:5]
+    except Exception as e:
+        log(f"Search error: {e}")
+    return []
+
+def fetch_page(url):
+    try:
+        result = subprocess.run(['openclaw', 'web-fetch', url, '--max-chars', '2000'],
+            capture_output=True, text=True, timeout=30)
+        if result.returncode == 0:
+            return result.stdout
+    except Exception as e:
+        log(f"Fetch error: {e}")
+    return ""
+
+def extract_emails(text):
+    if not text:
+        return []
+    pattern = r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}'
+    emails = re.findall(pattern, text)
+    bad = ['example.com', 'test.com', 'domain.com', 'email.com', 'noreply', '@gmail.com', '@yahoo.com', '@hotmail.com', '@aol.com']
+    filtered = [e.lower() for e in emails if len(e) > 10 and not any(b in e.lower() for b in bad)]
+    return list(set(filtered))[:3]
+
+def extract_phones(text):
+    if not text:
+        return []
+    pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
+    phones = re.findall(pattern, text)
+    return list(set([re.sub(r'[^\d]', '', p) for p in phones]))[:2]
+
+def extract_domain(url):
+    try:
+        from urllib.parse import urlparse
+        dom = urlparse(url).netloc.lower()
+        return dom[4:] if dom.startswith('www.') else dom
+    except:
+        return url.replace('https://', '').replace('http://', '').split('/')[0]
+
+def assess_quality(emails, phones):
+    score = len(emails) * 3 + len(phones) * 2
+    return "HOT" if score >= 7 else "WARM" if score >= 4 else "COLD"
+
+def push_to_crm(lead):
+    try:
+        body = f"## {lead['quality']} Lead: {lead['hoa_name']}\n\n**Metro:** {lead['metro']}\n**Website:** {lead['url']}\n**Domain:** {lead['domain']}\n"
+        if lead.get('emails'):
+            body += f"**Emails:** {', '.join(lead['emails'])}\n"
+        if lead.get('phones'):
+            body += f"**Phones:** {', '.join(lead['phones'])}\n"
+        body += f"\n_Found: {datetime.now().strftime('%Y-%m-%d %H:%M')}_"
+        
+        note_data = {"title": f"{lead['quality']}: {lead['hoa_name']}", "body": body}
+        curl_cmd = ['curl', '-s', '-X', 'POST', f'{TWENTY_BASE}/notes',
+            '-H', f'Authorization: Bearer {TWENTY_TOKEN}',
+            '-H', 'Content-Type: application/json',
+            '-d', json.dumps(note_data)]
+        
+        result = subprocess.run(curl_cmd, capture_output=True, text=True, timeout=10)
+        if result.returncode == 0:
+            log(f"CRM SUCCESS: {lead['hoa_name']}")
+            return True
+    except Exception as e:
+        log(f"CRM error: {e}")
+    return False
+
+def save_lead(lead):
+    lead_file = LEADS_DIR / f"{lead['domain'].replace('/', '_')}.json"
+    with open(lead_file, 'w') as f:
+        json.dump(lead, f, indent=2)
+    log(f"SAVED: {lead_file.name}")
+
+def main():
+    log("=== Prospector v4 Started ===")
+    state = load_state()
+    cycle = 0
+    
+    while True:
+        cycle += 1
+        metro = METROS[state['metro_index']]
+        log(f"=== CYCLE {cycle}: {metro} ===")
+        
+        queries = [
+            f'{metro} HOA contact email',
+            f'{metro} homeowners association',
+            f'{metro} HOA management company',
+        ]
+        
+        found_urls = []
+        for query in queries:
+            urls = search_web(query)
+            found_urls.extend(urls)
+        
+        log(f"Found {len(found_urls)} URLs to check")
+        
+        # Process each URL
+        new_leads = 0
+        for url in found_urls[:6]:
+            domain = extract_domain(url)
+            if domain in state['processed_domains'] or not domain:
+                continue
+            
+            state['processed_domains'].append(domain)
+            log(f"FETCH: {url[:60]}...")
+            
+            content = fetch_page(url)
+            if not content:
+                continue
+            
+            emails = extract_emails(content)
+            phones = extract_phones(content)
+            
+            if emails or phones:
+                hoa_name = domain.split('.')[0].replace('-', ' ').title() + " HOA"
+                lead = {
+                    'hoa_name': hoa_name,
+                    'metro': metro,
+                    'url': url,
+                    'domain': domain,
+                    'emails': emails,
+                    'phones': phones,
+                    'quality': assess_quality(emails, phones),
+                    'found_at': datetime.now().isoformat()
+                }
+                
+                save_lead(lead)
+                push_to_crm(lead)
+                state['leads_found'] += 1
+                new_leads += 1
+                log(f"LEAD: {hoa_name} ({lead['quality']}) - {len(emails)} emails, {len(phones)} phones")
+            else:
+                log(f"No contacts on {domain}")
+        
+        save_state(state)
+        log(f"Cycle complete: {new_leads} new leads, total: {state['leads_found']}")
+        
+        # Next metro
+        state['metro_index'] = (state['metro_index'] + 1) % len(METROS)
+        
+        # Throttle
+        hour = datetime.now().hour
+        delay = 120 if 9 <= hour < 18 else 60
+        log(f"Sleeping {delay}s...")
+        import time
+        time.sleep(delay)
+
+if __name__ == "__main__":
+    main()