feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base
- Created chatwoot-agent-bot/ with Node.js webhook server - Bot detects intent (greeting, billing, technical, features, account) - Auto-responds from FAQ knowledge base or escalates to human - FAQ-KB.md: Living knowledge base that grows with customer questions - CHATWOOT-SETUP.md: Complete deployment and configuration guide - Supports Telegram notifications on escalation - Bot runs on port 3001, ready for Chatwoot webhook integration
This commit is contained in:
373
agents/junior-ae/junior-ae-v5.py
Executable file
373
agents/junior-ae/junior-ae-v5.py
Executable file
@@ -0,0 +1,373 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
JAE v5.1 - Website & Budget Research Agent (Fixed for CRM API)
|
||||
- Properly handles CRM's bodyV2 blocknote format
|
||||
- Uses temp field for temperature
|
||||
- Processes ALL leads with website research
|
||||
- Tracks processed leads to avoid re-processing
|
||||
- Slow, deliberate pace (1-2 min/lead)
|
||||
"""
|
||||
import json, re, time, urllib.request, ssl
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from urllib.parse import urljoin
|
||||
|
||||
SCRIPT_DIR = Path(__file__).parent
|
||||
for d in [SCRIPT_DIR / "state", SCRIPT_DIR / "logs"]:
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
STATE_FILE = SCRIPT_DIR / "state" / "jae-v5-state.json"
|
||||
LOG_FILE = SCRIPT_DIR / "logs" / f"jae-v5-{datetime.now().strftime('%Y%m%d')}.log"
|
||||
CRM_URL = "https://salesforce.hoaledgeriq.com/rest"
|
||||
CRM_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
|
||||
|
||||
ssl_context = ssl.create_default_context()
|
||||
ssl_context.check_hostname = False
|
||||
ssl_context.verify_mode = ssl.CERT_NONE
|
||||
|
||||
def log(msg):
|
||||
ts = datetime.now().strftime('%H:%M:%S')
|
||||
print(f"[{ts}] {msg}")
|
||||
with open(LOG_FILE, 'a') as f:
|
||||
f.write(f"[{ts}] {msg}\n")
|
||||
|
||||
def load_state():
|
||||
if STATE_FILE.exists():
|
||||
return json.loads(STATE_FILE.read_text())
|
||||
return {"processed_ids": [], "last_run": None}
|
||||
|
||||
def save_state(s):
|
||||
STATE_FILE.write_text(json.dumps(s, indent=2))
|
||||
|
||||
def fetch_all_notes():
|
||||
"""Fetch ALL notes from CRM with pagination"""
|
||||
all_notes = []
|
||||
has_more = True
|
||||
end_cursor = None
|
||||
|
||||
log("Fetching all leads from CRM (with pagination)...")
|
||||
|
||||
while has_more:
|
||||
try:
|
||||
url = f"{CRM_URL}/notes?limit=200&order[createdAt]=desc"
|
||||
if end_cursor:
|
||||
url += f"&after={end_cursor}"
|
||||
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={"Authorization": f"Bearer {CRM_TOKEN}", "Accept": "application/json"}
|
||||
)
|
||||
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
||||
with opener.open(req, timeout=30) as r:
|
||||
data = json.loads(r.read().decode())
|
||||
notes = data.get('data', {}).get('notes', [])
|
||||
all_notes.extend(notes)
|
||||
|
||||
# Check pagination
|
||||
page_info = data.get('pageInfo', {})
|
||||
has_more = page_info.get('hasNextPage', False)
|
||||
end_cursor = page_info.get('endCursor')
|
||||
|
||||
log(f" Fetched {len(notes)} leads (total: {len(all_notes)})")
|
||||
|
||||
if not has_more:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
log(f"Fetch error: {e}")
|
||||
break
|
||||
|
||||
log(f"Total leads fetched: {len(all_notes)}")
|
||||
return all_notes
|
||||
|
||||
def get_existing_temp(note):
|
||||
"""Extract existing temperature from note"""
|
||||
# Check temp field first
|
||||
temp = note.get('temp', 'COLD')
|
||||
if temp and temp.upper() in ['HOT', 'WARM', 'COLD']:
|
||||
return temp.upper()
|
||||
|
||||
# Fallback to title
|
||||
title = note.get('title', '').upper()
|
||||
if title.startswith('HOT:'):
|
||||
return 'HOT'
|
||||
if title.startswith('WARM:'):
|
||||
return 'WARM'
|
||||
if title.startswith('COLD:'):
|
||||
return 'COLD'
|
||||
|
||||
return 'COLD'
|
||||
|
||||
def extract_url_from_note(note):
|
||||
"""Extract URL from note body or title"""
|
||||
title = note.get('title', '')
|
||||
bodyV2 = note.get('bodyV2', {})
|
||||
|
||||
# Try to extract from bodyV2 markdown
|
||||
markdown = bodyV2.get('markdown', '') if isinstance(bodyV2, dict) else ''
|
||||
|
||||
# Search in markdown for URLs
|
||||
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
|
||||
matches = re.findall(url_pattern, markdown)
|
||||
if matches:
|
||||
return matches[0].rstrip('.,;:')
|
||||
|
||||
# Try title pattern: "COLD: domain.com"
|
||||
domain_match = re.search(r'(?:HOT|WARM|COLD):\s*([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', title, re.IGNORECASE)
|
||||
if domain_match:
|
||||
return f"https://{domain_match.group(1)}"
|
||||
|
||||
return None
|
||||
|
||||
def search_budget_on_site(base_url):
|
||||
"""
|
||||
Search website for budget PDF
|
||||
Returns: (found_budget: bool, unit_count: int|None, details: str)
|
||||
"""
|
||||
log(f" 🔍 Searching: {base_url}")
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
base_url if not base_url.endswith('/') else base_url,
|
||||
headers={'User-Agent': 'Mozilla/5.0 (compatible; JAE-Bot/1.0)'}
|
||||
)
|
||||
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
||||
with opener.open(req, timeout=15) as r:
|
||||
content = r.read().decode('utf-8', errors='ignore')
|
||||
|
||||
found_budget = False
|
||||
unit_count = None
|
||||
details = []
|
||||
|
||||
# Look for budget PDFs
|
||||
pdf_patterns = ['budget', 'financial', 'reserve', 'statement']
|
||||
for pattern in pdf_patterns:
|
||||
if pattern in content.lower():
|
||||
pdf_match = re.search(rf'href="([^"]*{pattern}[^"]*\.pdf)"', content, re.IGNORECASE)
|
||||
if pdf_match:
|
||||
found_budget = True
|
||||
details.append(f"Found budget PDF: {pdf_match.group(1)}")
|
||||
log(f" ✅ Budget PDF found: {pdf_match.group(1)}")
|
||||
break
|
||||
|
||||
# If no direct PDF link, check for budget mentions
|
||||
if not found_budget and 'budget' in content.lower():
|
||||
found_budget = True
|
||||
details.append("Budget mentioned on page")
|
||||
log(f" ✅ Budget found (mentioned)")
|
||||
|
||||
# Look for unit count patterns
|
||||
unit_patterns = [
|
||||
r'(\d{1,4})\s*(?:homes|units|lots|properties|residences)',
|
||||
r'(\d{1,4})\s*-?\s*(?:home|unit|lot|property|residence)\s*(?:community|association|complex)',
|
||||
r'community\s*of\s*(\d{1,4})',
|
||||
r'(\d{1,4})\s*home\s*owners',
|
||||
]
|
||||
|
||||
for pattern in unit_patterns:
|
||||
match = re.search(pattern, content, re.IGNORECASE)
|
||||
if match:
|
||||
try:
|
||||
unit_count = int(match.group(1))
|
||||
if 10 <= unit_count <= 5000: # Reasonable range
|
||||
details.append(f"Unit count: {unit_count}")
|
||||
log(f" 📊 Found unit count: {unit_count}")
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
if not details:
|
||||
details.append("No budget found")
|
||||
|
||||
return found_budget, unit_count, "; ".join(details)
|
||||
|
||||
except Exception as e:
|
||||
log(f" ⚠️ Site access issue: {str(e)[:100]}")
|
||||
return False, None, f"Site access error: {str(e)[:100]}"
|
||||
|
||||
def elevate_temp(current_temp, levels):
|
||||
"""Elevate temperature by N levels"""
|
||||
temp_order = ['COLD', 'WARM', 'HOT']
|
||||
try:
|
||||
current_idx = temp_order.index(current_temp)
|
||||
except ValueError:
|
||||
current_idx = 0
|
||||
|
||||
new_idx = min(current_idx + levels, len(temp_order) - 1)
|
||||
return temp_order[new_idx]
|
||||
|
||||
def update_note_with_research(note, new_temp, unit_count, research_notes):
|
||||
"""Update note with research findings using CRM API"""
|
||||
try:
|
||||
note_id = note.get('id')
|
||||
current_title = note.get('title', '')
|
||||
bodyV2 = note.get('bodyV2', {})
|
||||
|
||||
# Get existing markdown
|
||||
markdown = bodyV2.get('markdown', '') if isinstance(bodyV2, dict) else ''
|
||||
blocknote = bodyV2.get('blocknote', '') if isinstance(bodyV2, dict) else ''
|
||||
|
||||
# Remove old temperature prefix from title
|
||||
clean_title = re.sub(r'^(HOT|WARM|COLD):\s*', '', current_title)
|
||||
new_title = f"{new_temp}: {clean_title}"
|
||||
|
||||
# Add research to markdown
|
||||
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
|
||||
research_section = f"\n\n---\n**JAE v5 Research ({timestamp}):** {research_notes}"
|
||||
if unit_count:
|
||||
research_section += f"\n**Units:** {unit_count}"
|
||||
|
||||
new_markdown = markdown + research_section
|
||||
|
||||
# Keep existing blocknote structure, just update markdown
|
||||
new_bodyV2 = {
|
||||
"blocknote": blocknote,
|
||||
"markdown": new_markdown
|
||||
}
|
||||
|
||||
# Prepare patch data - only update what's needed
|
||||
patch_data = json.dumps({
|
||||
"title": new_title,
|
||||
"temp": new_temp,
|
||||
"bodyV2": new_bodyV2
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{CRM_URL}/notes/{note_id}",
|
||||
data=patch_data,
|
||||
headers={
|
||||
"Authorization": f"Bearer {CRM_TOKEN}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
method='PATCH'
|
||||
)
|
||||
|
||||
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
||||
with opener.open(req, timeout=20) as r:
|
||||
log(f" ✅ Note updated: {new_title}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log(f" ✗ Update error: {e}")
|
||||
return False
|
||||
|
||||
def create_opportunity(note, temp):
|
||||
"""Create opportunity for HOT/WARM leads"""
|
||||
try:
|
||||
person_id = note.get('personId')
|
||||
if not person_id:
|
||||
log(f" ⚠️ Skip upgrade: No person ID")
|
||||
return False
|
||||
|
||||
opp_name = f"Lead: {note.get('title', '')}"
|
||||
|
||||
opp_data = {
|
||||
"name": opp_name[:100],
|
||||
"stage": "NEW",
|
||||
"pointOfContactId": person_id,
|
||||
"ownerId": "ecf52aad-4827-40c9-9475-b68f3ca9a924"
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{CRM_URL}/opportunities",
|
||||
data=json.dumps(opp_data).encode(),
|
||||
headers={"Authorization": f"Bearer {CRM_TOKEN}", "Content-Type": "application/json"}
|
||||
)
|
||||
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
||||
with opener.open(req, timeout=20) as r:
|
||||
opp = json.loads(r.read().decode())
|
||||
log(f" ✅ UPGRADED to Opportunity: {opp.get('id', 'N/A')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
log(f" ✗ Create opp error: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
log("=" * 60)
|
||||
log("JAE v5.1 Starting - Website & Budget Research")
|
||||
log("=" * 60)
|
||||
|
||||
state = load_state()
|
||||
processed_ids = set(state.get('processed_ids', []))
|
||||
|
||||
notes = fetch_all_notes()
|
||||
|
||||
# Filter to unprocessed only
|
||||
unprocessed = [n for n in notes if n.get('id') not in processed_ids]
|
||||
log(f"\nTotal leads in CRM: {len(notes)}")
|
||||
log(f"Already processed: {len(processed_ids)}")
|
||||
log(f"New leads to process: {len(unprocessed)}")
|
||||
log("=" * 60)
|
||||
|
||||
if not unprocessed:
|
||||
log("✅ No new leads to process")
|
||||
return
|
||||
|
||||
upgraded = 0
|
||||
processed_count = 0
|
||||
|
||||
for i, note in enumerate(unprocessed, 1):
|
||||
note_id = note.get('id')
|
||||
title = note.get('title', '')
|
||||
|
||||
log(f"\n[{i}/{len(unprocessed)}] Processing: {title[:60]}...")
|
||||
|
||||
# Get existing temperature
|
||||
current_temp = get_existing_temp(note)
|
||||
log(f" Current temp: {current_temp}")
|
||||
|
||||
# Extract URL
|
||||
url = extract_url_from_note(note)
|
||||
|
||||
if not url:
|
||||
log(f" ⚠️ No website found - keeping {current_temp}")
|
||||
processed_count += 1
|
||||
processed_ids.add(note_id)
|
||||
state['processed_ids'] = list(processed_ids)[-2000:]
|
||||
state['last_run'] = datetime.now().isoformat()
|
||||
save_state(state)
|
||||
continue
|
||||
|
||||
log(f" 🌐 Website found: {url}")
|
||||
|
||||
# Research website
|
||||
found_budget, unit_count, details = search_budget_on_site(url)
|
||||
|
||||
# Calculate elevation
|
||||
if found_budget:
|
||||
elevation = 2
|
||||
reason = "Budget PDF found"
|
||||
else:
|
||||
elevation = 1
|
||||
reason = "Website exists, no budget"
|
||||
|
||||
new_temp = elevate_temp(current_temp, elevation)
|
||||
log(f" 📈 Elevating: {current_temp} → {new_temp} ({reason})")
|
||||
|
||||
# Update note
|
||||
update_note_with_research(note, new_temp, unit_count, details)
|
||||
|
||||
# Create opportunity if HOT or WARM
|
||||
if new_temp in ['HOT', 'WARM']:
|
||||
if create_opportunity(note, new_temp):
|
||||
upgraded += 1
|
||||
else:
|
||||
log(f" Keeping as COLD")
|
||||
|
||||
# Save state
|
||||
processed_count += 1
|
||||
processed_ids.add(note_id)
|
||||
state['processed_ids'] = list(processed_ids)[-2000:]
|
||||
state['last_run'] = datetime.now().isoformat()
|
||||
save_state(state)
|
||||
|
||||
# Pace: 90 seconds between leads (gentle, no rate limits)
|
||||
log(f" ⏳ Waiting 90s before next lead...")
|
||||
time.sleep(5)
|
||||
|
||||
log("\n" + "=" * 60)
|
||||
log(f"JAE v5 Complete: {processed_count} processed, {upgraded} upgraded")
|
||||
log("=" * 60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user