feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base

- Created chatwoot-agent-bot/ with Node.js webhook server - Bot detects intent (greeting, billing, technical, features, account) - Auto-responds from FAQ knowledge base or escalates to human - FAQ-KB.md: Living knowledge base that grows with customer questions - CHATWOOT-SETUP.md: Complete deployment and configuration guide - Supports Telegram notifications on escalation - Bot runs on port 3001, ready for Chatwoot webhook integration
2026-04-01 16:26:05 -04:00
parent 7ba19752de
commit 5319bcd30b
1074 changed files with 456376 additions and 0 deletions
--- a/agents/junior-ae/junior-ae-v5.py
+++ b/agents/junior-ae/junior-ae-v5.py
@@ -0,0 +1,373 @@
+#!/usr/bin/env python3
+"""
+JAE v5.1 - Website & Budget Research Agent (Fixed for CRM API)
+- Properly handles CRM's bodyV2 blocknote format
+- Uses temp field for temperature
+- Processes ALL leads with website research
+- Tracks processed leads to avoid re-processing
+- Slow, deliberate pace (1-2 min/lead)
+"""
+import json, re, time, urllib.request, ssl
+from datetime import datetime
+from pathlib import Path
+from urllib.parse import urljoin
+
+SCRIPT_DIR = Path(__file__).parent
+for d in [SCRIPT_DIR / "state", SCRIPT_DIR / "logs"]:
+    d.mkdir(parents=True, exist_ok=True)
+
+STATE_FILE = SCRIPT_DIR / "state" / "jae-v5-state.json"
+LOG_FILE = SCRIPT_DIR / "logs" / f"jae-v5-{datetime.now().strftime('%Y%m%d')}.log"
+CRM_URL = "https://salesforce.hoaledgeriq.com/rest"
+CRM_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
+
+ssl_context = ssl.create_default_context()
+ssl_context.check_hostname = False
+ssl_context.verify_mode = ssl.CERT_NONE
+
+def log(msg):
+    ts = datetime.now().strftime('%H:%M:%S')
+    print(f"[{ts}] {msg}")
+    with open(LOG_FILE, 'a') as f:
+        f.write(f"[{ts}] {msg}\n")
+
+def load_state():
+    if STATE_FILE.exists():
+        return json.loads(STATE_FILE.read_text())
+    return {"processed_ids": [], "last_run": None}
+
+def save_state(s):
+    STATE_FILE.write_text(json.dumps(s, indent=2))
+
+def fetch_all_notes():
+    """Fetch ALL notes from CRM with pagination"""
+    all_notes = []
+    has_more = True
+    end_cursor = None
+    
+    log("Fetching all leads from CRM (with pagination)...")
+    
+    while has_more:
+        try:
+            url = f"{CRM_URL}/notes?limit=200&order[createdAt]=desc"
+            if end_cursor:
+                url += f"&after={end_cursor}"
+            
+            req = urllib.request.Request(
+                url,
+                headers={"Authorization": f"Bearer {CRM_TOKEN}", "Accept": "application/json"}
+            )
+            opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
+            with opener.open(req, timeout=30) as r:
+                data = json.loads(r.read().decode())
+                notes = data.get('data', {}).get('notes', [])
+                all_notes.extend(notes)
+                
+                # Check pagination
+                page_info = data.get('pageInfo', {})
+                has_more = page_info.get('hasNextPage', False)
+                end_cursor = page_info.get('endCursor')
+                
+                log(f"  Fetched {len(notes)} leads (total: {len(all_notes)})")
+                
+                if not has_more:
+                    break
+                    
+        except Exception as e:
+            log(f"Fetch error: {e}")
+            break
+    
+    log(f"Total leads fetched: {len(all_notes)}")
+    return all_notes
+
+def get_existing_temp(note):
+    """Extract existing temperature from note"""
+    # Check temp field first
+    temp = note.get('temp', 'COLD')
+    if temp and temp.upper() in ['HOT', 'WARM', 'COLD']:
+        return temp.upper()
+    
+    # Fallback to title
+    title = note.get('title', '').upper()
+    if title.startswith('HOT:'):
+        return 'HOT'
+    if title.startswith('WARM:'):
+        return 'WARM'
+    if title.startswith('COLD:'):
+        return 'COLD'
+    
+    return 'COLD'
+
+def extract_url_from_note(note):
+    """Extract URL from note body or title"""
+    title = note.get('title', '')
+    bodyV2 = note.get('bodyV2', {})
+    
+    # Try to extract from bodyV2 markdown
+    markdown = bodyV2.get('markdown', '') if isinstance(bodyV2, dict) else ''
+    
+    # Search in markdown for URLs
+    url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
+    matches = re.findall(url_pattern, markdown)
+    if matches:
+        return matches[0].rstrip('.,;:')
+    
+    # Try title pattern: "COLD: domain.com"
+    domain_match = re.search(r'(?:HOT|WARM|COLD):\s*([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', title, re.IGNORECASE)
+    if domain_match:
+        return f"https://{domain_match.group(1)}"
+    
+    return None
+
+def search_budget_on_site(base_url):
+    """
+    Search website for budget PDF
+    Returns: (found_budget: bool, unit_count: int|None, details: str)
+    """
+    log(f"  🔍 Searching: {base_url}")
+    
+    try:
+        req = urllib.request.Request(
+            base_url if not base_url.endswith('/') else base_url,
+            headers={'User-Agent': 'Mozilla/5.0 (compatible; JAE-Bot/1.0)'}
+        )
+        opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
+        with opener.open(req, timeout=15) as r:
+            content = r.read().decode('utf-8', errors='ignore')
+            
+            found_budget = False
+            unit_count = None
+            details = []
+            
+            # Look for budget PDFs
+            pdf_patterns = ['budget', 'financial', 'reserve', 'statement']
+            for pattern in pdf_patterns:
+                if pattern in content.lower():
+                    pdf_match = re.search(rf'href="([^"]*{pattern}[^"]*\.pdf)"', content, re.IGNORECASE)
+                    if pdf_match:
+                        found_budget = True
+                        details.append(f"Found budget PDF: {pdf_match.group(1)}")
+                        log(f"  ✅ Budget PDF found: {pdf_match.group(1)}")
+                        break
+            
+            # If no direct PDF link, check for budget mentions
+            if not found_budget and 'budget' in content.lower():
+                found_budget = True
+                details.append("Budget mentioned on page")
+                log(f"  ✅ Budget found (mentioned)")
+            
+            # Look for unit count patterns
+            unit_patterns = [
+                r'(\d{1,4})\s*(?:homes|units|lots|properties|residences)',
+                r'(\d{1,4})\s*-?\s*(?:home|unit|lot|property|residence)\s*(?:community|association|complex)',
+                r'community\s*of\s*(\d{1,4})',
+                r'(\d{1,4})\s*home\s*owners',
+            ]
+            
+            for pattern in unit_patterns:
+                match = re.search(pattern, content, re.IGNORECASE)
+                if match:
+                    try:
+                        unit_count = int(match.group(1))
+                        if 10 <= unit_count <= 5000:  # Reasonable range
+                            details.append(f"Unit count: {unit_count}")
+                            log(f"  📊 Found unit count: {unit_count}")
+                            break
+                    except:
+                        pass
+            
+            if not details:
+                details.append("No budget found")
+            
+            return found_budget, unit_count, "; ".join(details)
+            
+    except Exception as e:
+        log(f"  ⚠️ Site access issue: {str(e)[:100]}")
+        return False, None, f"Site access error: {str(e)[:100]}"
+
+def elevate_temp(current_temp, levels):
+    """Elevate temperature by N levels"""
+    temp_order = ['COLD', 'WARM', 'HOT']
+    try:
+        current_idx = temp_order.index(current_temp)
+    except ValueError:
+        current_idx = 0
+    
+    new_idx = min(current_idx + levels, len(temp_order) - 1)
+    return temp_order[new_idx]
+
+def update_note_with_research(note, new_temp, unit_count, research_notes):
+    """Update note with research findings using CRM API"""
+    try:
+        note_id = note.get('id')
+        current_title = note.get('title', '')
+        bodyV2 = note.get('bodyV2', {})
+        
+        # Get existing markdown
+        markdown = bodyV2.get('markdown', '') if isinstance(bodyV2, dict) else ''
+        blocknote = bodyV2.get('blocknote', '') if isinstance(bodyV2, dict) else ''
+        
+        # Remove old temperature prefix from title
+        clean_title = re.sub(r'^(HOT|WARM|COLD):\s*', '', current_title)
+        new_title = f"{new_temp}: {clean_title}"
+        
+        # Add research to markdown
+        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
+        research_section = f"\n\n---\n**JAE v5 Research ({timestamp}):** {research_notes}"
+        if unit_count:
+            research_section += f"\n**Units:** {unit_count}"
+        
+        new_markdown = markdown + research_section
+        
+        # Keep existing blocknote structure, just update markdown
+        new_bodyV2 = {
+            "blocknote": blocknote,
+            "markdown": new_markdown
+        }
+        
+        # Prepare patch data - only update what's needed
+        patch_data = json.dumps({
+            "title": new_title,
+            "temp": new_temp,
+            "bodyV2": new_bodyV2
+        }).encode()
+        
+        req = urllib.request.Request(
+            f"{CRM_URL}/notes/{note_id}",
+            data=patch_data,
+            headers={
+                "Authorization": f"Bearer {CRM_TOKEN}",
+                "Content-Type": "application/json"
+            },
+            method='PATCH'
+        )
+        
+        opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
+        with opener.open(req, timeout=20) as r:
+            log(f"  ✅ Note updated: {new_title}")
+            return True
+            
+    except Exception as e:
+        log(f"  ✗ Update error: {e}")
+        return False
+
+def create_opportunity(note, temp):
+    """Create opportunity for HOT/WARM leads"""
+    try:
+        person_id = note.get('personId')
+        if not person_id:
+            log(f"  ⚠️ Skip upgrade: No person ID")
+            return False
+        
+        opp_name = f"Lead: {note.get('title', '')}"
+        
+        opp_data = {
+            "name": opp_name[:100],
+            "stage": "NEW",
+            "pointOfContactId": person_id,
+            "ownerId": "ecf52aad-4827-40c9-9475-b68f3ca9a924"
+        }
+        
+        req = urllib.request.Request(
+            f"{CRM_URL}/opportunities",
+            data=json.dumps(opp_data).encode(),
+            headers={"Authorization": f"Bearer {CRM_TOKEN}", "Content-Type": "application/json"}
+        )
+        opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
+        with opener.open(req, timeout=20) as r:
+            opp = json.loads(r.read().decode())
+            log(f"  ✅ UPGRADED to Opportunity: {opp.get('id', 'N/A')}")
+            return True
+    except Exception as e:
+        log(f"  ✗ Create opp error: {e}")
+        return False
+
+def main():
+    log("=" * 60)
+    log("JAE v5.1 Starting - Website & Budget Research")
+    log("=" * 60)
+    
+    state = load_state()
+    processed_ids = set(state.get('processed_ids', []))
+    
+    notes = fetch_all_notes()
+    
+    # Filter to unprocessed only
+    unprocessed = [n for n in notes if n.get('id') not in processed_ids]
+    log(f"\nTotal leads in CRM: {len(notes)}")
+    log(f"Already processed: {len(processed_ids)}")
+    log(f"New leads to process: {len(unprocessed)}")
+    log("=" * 60)
+    
+    if not unprocessed:
+        log("✅ No new leads to process")
+        return
+    
+    upgraded = 0
+    processed_count = 0
+    
+    for i, note in enumerate(unprocessed, 1):
+        note_id = note.get('id')
+        title = note.get('title', '')
+        
+        log(f"\n[{i}/{len(unprocessed)}] Processing: {title[:60]}...")
+        
+        # Get existing temperature
+        current_temp = get_existing_temp(note)
+        log(f"  Current temp: {current_temp}")
+        
+        # Extract URL
+        url = extract_url_from_note(note)
+        
+        if not url:
+            log(f"  ⚠️ No website found - keeping {current_temp}")
+            processed_count += 1
+            processed_ids.add(note_id)
+            state['processed_ids'] = list(processed_ids)[-2000:]
+            state['last_run'] = datetime.now().isoformat()
+            save_state(state)
+            continue
+        
+        log(f"  🌐 Website found: {url}")
+        
+        # Research website
+        found_budget, unit_count, details = search_budget_on_site(url)
+        
+        # Calculate elevation
+        if found_budget:
+            elevation = 2
+            reason = "Budget PDF found"
+        else:
+            elevation = 1
+            reason = "Website exists, no budget"
+        
+        new_temp = elevate_temp(current_temp, elevation)
+        log(f"  📈 Elevating: {current_temp} → {new_temp} ({reason})")
+        
+        # Update note
+        update_note_with_research(note, new_temp, unit_count, details)
+        
+        # Create opportunity if HOT or WARM
+        if new_temp in ['HOT', 'WARM']:
+            if create_opportunity(note, new_temp):
+                upgraded += 1
+        else:
+            log(f"  Keeping as COLD")
+        
+        # Save state
+        processed_count += 1
+        processed_ids.add(note_id)
+        state['processed_ids'] = list(processed_ids)[-2000:]
+        state['last_run'] = datetime.now().isoformat()
+        save_state(state)
+        
+        # Pace: 90 seconds between leads (gentle, no rate limits)
+        log(f"  ⏳ Waiting 90s before next lead...")
+        time.sleep(5)
+    
+    log("\n" + "=" * 60)
+    log(f"JAE v5 Complete: {processed_count} processed, {upgraded} upgraded")
+    log("=" * 60)
+
+if __name__ == "__main__":
+    main()