feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base

- Created chatwoot-agent-bot/ with Node.js webhook server - Bot detects intent (greeting, billing, technical, features, account) - Auto-responds from FAQ knowledge base or escalates to human - FAQ-KB.md: Living knowledge base that grows with customer questions - CHATWOOT-SETUP.md: Complete deployment and configuration guide - Supports Telegram notifications on escalation - Bot runs on port 3001, ready for Chatwoot webhook integration
2026-04-01 16:26:05 -04:00
parent 7ba19752de
commit 5319bcd30b
1074 changed files with 456376 additions and 0 deletions
--- a/agents/sales-prospector/prospector-v3.py
+++ b/agents/sales-prospector/prospector-v3.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+"""
+Sales Prospector v3 - Working HOA Lead Generation
+Actually searches, extracts, and pushes real leads to CRM
+"""
+import json
+import os
+import re
+import time
+import subprocess
+from datetime import datetime
+from pathlib import Path
+
+# Config
+SCRIPT_DIR = Path(__file__).parent.absolute()
+STATE_DIR = SCRIPT_DIR / "state"
+LOG_DIR = SCRIPT_DIR / "logs"
+LEADS_DIR = SCRIPT_DIR / "leads"
+for d in [STATE_DIR, LOG_DIR, LEADS_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+
+STATE_FILE = STATE_DIR / "prospector-v3-state.json"
+LOG_FILE = LOG_DIR / f"prospector-v3-{datetime.now().strftime('%Y%m%d')}.log"
+
+METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ"]
+
+# Extended search queries for better coverage
+SEARCHES_PER_METRO = [
+    '{metro} HOA contact email',
+    '{metro} homeowners association website',
+    '{metro} HOA management contact',
+    '{metro} community association board',
+    '{metro} condo association contact',
+]
+
+# CRM Config
+TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzMzMjg0NDMsImV4cCI6MTgwNDc4MTY0MiwianRpIjoiMjBmMTJjOTAtNGQwNy00YmY2LWIzOTctNmM2ZTczOWYxOGM4In0.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
+TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest"
+
+def log(msg):
+    ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    line = f"[{ts}] {msg}"
+    print(line)
+    with open(LOG_FILE, 'a') as f:
+        f.write(line + '\n')
+
+def load_state():
+    if STATE_FILE.exists():
+        with open(STATE_FILE) as f:
+            return json.load(f)
+    return {
+        "metro_index": 0,
+        "processed_domains": [],
+        "leads_found": 0,
+        "cycle_count": 0
+    }
+
+def save_state(state):
+    with open(STATE_FILE, 'w') as f:
+        json.dump(state, f, indent=2)
+
+def extract_domain(url):
+    try:
+        from urllib.parse import urlparse
+        parsed = urlparse(url)
+        domain = parsed.netloc.lower()
+        if domain.startswith('www.'):
+            domain = domain[4:]
+        return domain
+    except:
+        return None
+
+def is_hoa_domain(domain):
+    if not domain:
+        return False
+    domain_lower = domain.lower()
+    hoa_keywords = ['hoa', 'homeowners', 'association', 'community', 'condo', 'village', 'creek', 'farms', 'estates']
+    return any(kw in domain_lower for kw in hoa_keywords)
+
+def search_web(query, count=5):
+    log(f"SEARCH: {query}")
+    try:
+        result = subprocess.run(
+            ['openclaw', 'web-search', query, '--count', str(count)],
+            capture_output=True, text=True, timeout=60
+        )
+        if result.returncode == 0 and result.stdout:
+            urls = []
+            for line in result.stdout.split('\n'):
+                if line.strip().startswith('http'):
+                    urls.append(line.strip())
+            # Filter to HOA domains
+            seen = set()
+            unique = []
+            for url in urls:
+                dom = extract_domain(url)
+                if dom and dom not in seen and is_hoa_domain(dom):
+                    seen.add(dom)
+                    unique.append(url)
+            return unique
+    except Exception as e:
+        log(f"Search error: {e}")
+    return []
+
+def fetch_page(url, max_chars=1500):
+    try:
+        result = subprocess.run(
+            ['openclaw', 'web-fetch', url, '--max-chars', str(max_chars)],
+            capture_output=True, text=True, timeout=30
+        )
+        if result.returncode == 0:
+            return result.stdout
+    except Exception as e:
+        log(f"Fetch error: {e}")
+    return None
+
+def extract_emails(text):
+    if not text:
+        return []
+    pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
+    emails = re.findall(pattern, text)
+    bad = ['example', 'test', 'domain', 'email', 'noreply', 'no-reply', '@gmail.com', '@yahoo.com', '@hotmail.com']
+    filtered = [e.lower() for e in emails if not any(b in e.lower() for b in bad)]
+    return list(set(filtered))[:3]
+
+def extract_phones(text):
+    if not text:
+        return []
+    pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
+    phones = re.findall(pattern, text)
+    return list(set(phones))[:2]
+
+def extract_hoa_name(content, domain):
+    if not content:
+        return domain.replace('-', ' ').title()
+    # Look for title
+    match = re.search(r'#\s*(.+)', content)
+    if match:
+        return match.group(1).strip()
+    # Look for HOA name pattern
+    match = re.search(r'([A-Z][A-Za-z\s]+(?:HOA|Homeowners|Community|Association))', content)
+    if match:
+        return match.group(1).strip()
+    return domain.replace('-', ' ').title()
+
+def assess_quality(emails, phones):
+    score = 0
+    if emails:
+        score += len(emails) * 3
+    if phones:
+        score += len(phones) * 2
+    if score >= 7:
+        return "HOT"
+    elif score >= 4:
+        return "WARM"
+    return "COLD"
+
+def push_to_crm(lead):
+    try:
+        body = f"""## 🎯 HOA Prospect - {lead['quality']}
+
+**HOA Name:** {lead.get('hoa_name', 'Unknown')}
+**Metro:** {lead['metro']}
+**Website:** {lead['url']}
+**Domain:** {lead['domain']}
+"""
+        if lead.get('emails'):
+            body += f"**Email(s):** {', '.join(lead['emails'])}\n"
+        if lead.get('phones'):
+            body += f"**Phone(s):** {', '.join(lead['phones'])}\n"
+        
+        body += f"\n**Source:** Prospector v3\n**Found:** {datetime.now().strftime('%Y-%m-%d %H:%M')}"
+        
+        esc_body = json.dumps(body)
+        note_data = f'{{"title":"🎯 {lead["quality"]}: {lead["hoa_name"]} | {lead["metro"]}","bodyV2":{{"markdown":{esc_body}}}}}'
+        
+        curl_cmd = [
+            'curl', '-s', '-X', 'POST',
+            f'{TWENTY_BASE}/notes',
+            '-H', f'Authorization: Bearer {TWENTY_TOKEN}',
+            '-H', 'Content-Type: application/json',
+            '-d', note_data
+        ]
+        
+        result = subprocess.run(curl_cmd, capture_output=True, text=True, timeout=10)
+        if result.returncode == 0:
+            log(f"CRM PUSH: {lead['hoa_name']} ({lead['quality']})")
+            return True
+        else:
+            log(f"CRM FAIL: {result.stderr[:100]}")
+    except Exception as e:
+        log(f"CRM ERROR: {e}")
+    return False
+
+def save_lead(lead):
+    lead_file = LEADS_DIR / f"{lead['domain']}.json"
+    with open(lead_file, 'w') as f:
+        json.dump(lead, f, indent=2)
+
+def main():
+    log("=== Sales Prospector v3 Started ===")
+    state = load_state()
+    cycle = state['cycle_count']
+    
+    while True:
+        cycle += 1
+        metro_idx = state['metro_index']
+        metro = METROS[metro_idx]
+        
+        log(f"=== CYCLE {cycle}: {metro} ===")
+        
+        # Search queries
+        queries = [q.format(metro=metro) for q in SEARCHES_PER_METRO]
+        
+        for query_idx, query in enumerate(queries):
+            log(f"QUERY: {query}")
+            urls = search_web(query, count=3)
+            log(f"Found {len(urls)} potential HOA sites")
+            
+            for url in urls:
+                domain = extract_domain(url)
+                if domain in state['processed_domains']:
+                    log(f"SKIP: Already processed {domain}")
+                    continue
+                
+                log(f"FETCH: {url}")
+                content = fetch_page(url)
+                if not content:
+                    state['processed_domains'].append(domain)
+                    save_state(state)
+                    continue
+                
+                emails = extract_em