feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base

- Created chatwoot-agent-bot/ with Node.js webhook server
- Bot detects intent (greeting, billing, technical, features, account)
- Auto-responds from FAQ knowledge base or escalates to human
- FAQ-KB.md: Living knowledge base that grows with customer questions
- CHATWOOT-SETUP.md: Complete deployment and configuration guide
- Supports Telegram notifications on escalation
- Bot runs on port 3001, ready for Chatwoot webhook integration
This commit is contained in:
2026-04-01 16:26:05 -04:00
parent 7ba19752de
commit 5319bcd30b
1074 changed files with 456376 additions and 0 deletions

373
agents/junior-ae/junior-ae-v5.py Executable file
View File

@@ -0,0 +1,373 @@
#!/usr/bin/env python3
"""
JAE v5.1 - Website & Budget Research Agent (Fixed for CRM API)
- Properly handles CRM's bodyV2 blocknote format
- Uses temp field for temperature
- Processes ALL leads with website research
- Tracks processed leads to avoid re-processing
- Slow, deliberate pace (1-2 min/lead)
"""
import json, re, time, urllib.request, ssl
from datetime import datetime
from pathlib import Path
from urllib.parse import urljoin
SCRIPT_DIR = Path(__file__).parent
for d in [SCRIPT_DIR / "state", SCRIPT_DIR / "logs"]:
d.mkdir(parents=True, exist_ok=True)
STATE_FILE = SCRIPT_DIR / "state" / "jae-v5-state.json"
LOG_FILE = SCRIPT_DIR / "logs" / f"jae-v5-{datetime.now().strftime('%Y%m%d')}.log"
CRM_URL = "https://salesforce.hoaledgeriq.com/rest"
CRM_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
def log(msg):
ts = datetime.now().strftime('%H:%M:%S')
print(f"[{ts}] {msg}")
with open(LOG_FILE, 'a') as f:
f.write(f"[{ts}] {msg}\n")
def load_state():
if STATE_FILE.exists():
return json.loads(STATE_FILE.read_text())
return {"processed_ids": [], "last_run": None}
def save_state(s):
STATE_FILE.write_text(json.dumps(s, indent=2))
def fetch_all_notes():
"""Fetch ALL notes from CRM with pagination"""
all_notes = []
has_more = True
end_cursor = None
log("Fetching all leads from CRM (with pagination)...")
while has_more:
try:
url = f"{CRM_URL}/notes?limit=200&order[createdAt]=desc"
if end_cursor:
url += f"&after={end_cursor}"
req = urllib.request.Request(
url,
headers={"Authorization": f"Bearer {CRM_TOKEN}", "Accept": "application/json"}
)
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
with opener.open(req, timeout=30) as r:
data = json.loads(r.read().decode())
notes = data.get('data', {}).get('notes', [])
all_notes.extend(notes)
# Check pagination
page_info = data.get('pageInfo', {})
has_more = page_info.get('hasNextPage', False)
end_cursor = page_info.get('endCursor')
log(f" Fetched {len(notes)} leads (total: {len(all_notes)})")
if not has_more:
break
except Exception as e:
log(f"Fetch error: {e}")
break
log(f"Total leads fetched: {len(all_notes)}")
return all_notes
def get_existing_temp(note):
"""Extract existing temperature from note"""
# Check temp field first
temp = note.get('temp', 'COLD')
if temp and temp.upper() in ['HOT', 'WARM', 'COLD']:
return temp.upper()
# Fallback to title
title = note.get('title', '').upper()
if title.startswith('HOT:'):
return 'HOT'
if title.startswith('WARM:'):
return 'WARM'
if title.startswith('COLD:'):
return 'COLD'
return 'COLD'
def extract_url_from_note(note):
"""Extract URL from note body or title"""
title = note.get('title', '')
bodyV2 = note.get('bodyV2', {})
# Try to extract from bodyV2 markdown
markdown = bodyV2.get('markdown', '') if isinstance(bodyV2, dict) else ''
# Search in markdown for URLs
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
matches = re.findall(url_pattern, markdown)
if matches:
return matches[0].rstrip('.,;:')
# Try title pattern: "COLD: domain.com"
domain_match = re.search(r'(?:HOT|WARM|COLD):\s*([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', title, re.IGNORECASE)
if domain_match:
return f"https://{domain_match.group(1)}"
return None
def search_budget_on_site(base_url):
"""
Search website for budget PDF
Returns: (found_budget: bool, unit_count: int|None, details: str)
"""
log(f" 🔍 Searching: {base_url}")
try:
req = urllib.request.Request(
base_url if not base_url.endswith('/') else base_url,
headers={'User-Agent': 'Mozilla/5.0 (compatible; JAE-Bot/1.0)'}
)
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
with opener.open(req, timeout=15) as r:
content = r.read().decode('utf-8', errors='ignore')
found_budget = False
unit_count = None
details = []
# Look for budget PDFs
pdf_patterns = ['budget', 'financial', 'reserve', 'statement']
for pattern in pdf_patterns:
if pattern in content.lower():
pdf_match = re.search(rf'href="([^"]*{pattern}[^"]*\.pdf)"', content, re.IGNORECASE)
if pdf_match:
found_budget = True
details.append(f"Found budget PDF: {pdf_match.group(1)}")
log(f" ✅ Budget PDF found: {pdf_match.group(1)}")
break
# If no direct PDF link, check for budget mentions
if not found_budget and 'budget' in content.lower():
found_budget = True
details.append("Budget mentioned on page")
log(f" ✅ Budget found (mentioned)")
# Look for unit count patterns
unit_patterns = [
r'(\d{1,4})\s*(?:homes|units|lots|properties|residences)',
r'(\d{1,4})\s*-?\s*(?:home|unit|lot|property|residence)\s*(?:community|association|complex)',
r'community\s*of\s*(\d{1,4})',
r'(\d{1,4})\s*home\s*owners',
]
for pattern in unit_patterns:
match = re.search(pattern, content, re.IGNORECASE)
if match:
try:
unit_count = int(match.group(1))
if 10 <= unit_count <= 5000: # Reasonable range
details.append(f"Unit count: {unit_count}")
log(f" 📊 Found unit count: {unit_count}")
break
except:
pass
if not details:
details.append("No budget found")
return found_budget, unit_count, "; ".join(details)
except Exception as e:
log(f" ⚠️ Site access issue: {str(e)[:100]}")
return False, None, f"Site access error: {str(e)[:100]}"
def elevate_temp(current_temp, levels):
"""Elevate temperature by N levels"""
temp_order = ['COLD', 'WARM', 'HOT']
try:
current_idx = temp_order.index(current_temp)
except ValueError:
current_idx = 0
new_idx = min(current_idx + levels, len(temp_order) - 1)
return temp_order[new_idx]
def update_note_with_research(note, new_temp, unit_count, research_notes):
"""Update note with research findings using CRM API"""
try:
note_id = note.get('id')
current_title = note.get('title', '')
bodyV2 = note.get('bodyV2', {})
# Get existing markdown
markdown = bodyV2.get('markdown', '') if isinstance(bodyV2, dict) else ''
blocknote = bodyV2.get('blocknote', '') if isinstance(bodyV2, dict) else ''
# Remove old temperature prefix from title
clean_title = re.sub(r'^(HOT|WARM|COLD):\s*', '', current_title)
new_title = f"{new_temp}: {clean_title}"
# Add research to markdown
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
research_section = f"\n\n---\n**JAE v5 Research ({timestamp}):** {research_notes}"
if unit_count:
research_section += f"\n**Units:** {unit_count}"
new_markdown = markdown + research_section
# Keep existing blocknote structure, just update markdown
new_bodyV2 = {
"blocknote": blocknote,
"markdown": new_markdown
}
# Prepare patch data - only update what's needed
patch_data = json.dumps({
"title": new_title,
"temp": new_temp,
"bodyV2": new_bodyV2
}).encode()
req = urllib.request.Request(
f"{CRM_URL}/notes/{note_id}",
data=patch_data,
headers={
"Authorization": f"Bearer {CRM_TOKEN}",
"Content-Type": "application/json"
},
method='PATCH'
)
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
with opener.open(req, timeout=20) as r:
log(f" ✅ Note updated: {new_title}")
return True
except Exception as e:
log(f" ✗ Update error: {e}")
return False
def create_opportunity(note, temp):
"""Create opportunity for HOT/WARM leads"""
try:
person_id = note.get('personId')
if not person_id:
log(f" ⚠️ Skip upgrade: No person ID")
return False
opp_name = f"Lead: {note.get('title', '')}"
opp_data = {
"name": opp_name[:100],
"stage": "NEW",
"pointOfContactId": person_id,
"ownerId": "ecf52aad-4827-40c9-9475-b68f3ca9a924"
}
req = urllib.request.Request(
f"{CRM_URL}/opportunities",
data=json.dumps(opp_data).encode(),
headers={"Authorization": f"Bearer {CRM_TOKEN}", "Content-Type": "application/json"}
)
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
with opener.open(req, timeout=20) as r:
opp = json.loads(r.read().decode())
log(f" ✅ UPGRADED to Opportunity: {opp.get('id', 'N/A')}")
return True
except Exception as e:
log(f" ✗ Create opp error: {e}")
return False
def main():
log("=" * 60)
log("JAE v5.1 Starting - Website & Budget Research")
log("=" * 60)
state = load_state()
processed_ids = set(state.get('processed_ids', []))
notes = fetch_all_notes()
# Filter to unprocessed only
unprocessed = [n for n in notes if n.get('id') not in processed_ids]
log(f"\nTotal leads in CRM: {len(notes)}")
log(f"Already processed: {len(processed_ids)}")
log(f"New leads to process: {len(unprocessed)}")
log("=" * 60)
if not unprocessed:
log("✅ No new leads to process")
return
upgraded = 0
processed_count = 0
for i, note in enumerate(unprocessed, 1):
note_id = note.get('id')
title = note.get('title', '')
log(f"\n[{i}/{len(unprocessed)}] Processing: {title[:60]}...")
# Get existing temperature
current_temp = get_existing_temp(note)
log(f" Current temp: {current_temp}")
# Extract URL
url = extract_url_from_note(note)
if not url:
log(f" ⚠️ No website found - keeping {current_temp}")
processed_count += 1
processed_ids.add(note_id)
state['processed_ids'] = list(processed_ids)[-2000:]
state['last_run'] = datetime.now().isoformat()
save_state(state)
continue
log(f" 🌐 Website found: {url}")
# Research website
found_budget, unit_count, details = search_budget_on_site(url)
# Calculate elevation
if found_budget:
elevation = 2
reason = "Budget PDF found"
else:
elevation = 1
reason = "Website exists, no budget"
new_temp = elevate_temp(current_temp, elevation)
log(f" 📈 Elevating: {current_temp}{new_temp} ({reason})")
# Update note
update_note_with_research(note, new_temp, unit_count, details)
# Create opportunity if HOT or WARM
if new_temp in ['HOT', 'WARM']:
if create_opportunity(note, new_temp):
upgraded += 1
else:
log(f" Keeping as COLD")
# Save state
processed_count += 1
processed_ids.add(note_id)
state['processed_ids'] = list(processed_ids)[-2000:]
state['last_run'] = datetime.now().isoformat()
save_state(state)
# Pace: 90 seconds between leads (gentle, no rate limits)
log(f" ⏳ Waiting 90s before next lead...")
time.sleep(5)
log("\n" + "=" * 60)
log(f"JAE v5 Complete: {processed_count} processed, {upgraded} upgraded")
log("=" * 60)
if __name__ == "__main__":
main()