- Created chatwoot-agent-bot/ with Node.js webhook server - Bot detects intent (greeting, billing, technical, features, account) - Auto-responds from FAQ knowledge base or escalates to human - FAQ-KB.md: Living knowledge base that grows with customer questions - CHATWOOT-SETUP.md: Complete deployment and configuration guide - Supports Telegram notifications on escalation - Bot runs on port 3001, ready for Chatwoot webhook integration
374 lines
13 KiB
Python
Executable File
374 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
JAE v5.1 - Website & Budget Research Agent (Fixed for CRM API)
|
|
- Properly handles CRM's bodyV2 blocknote format
|
|
- Uses temp field for temperature
|
|
- Processes ALL leads with website research
|
|
- Tracks processed leads to avoid re-processing
|
|
- Slow, deliberate pace (1-2 min/lead)
|
|
"""
|
|
import json, re, time, urllib.request, ssl
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from urllib.parse import urljoin
|
|
|
|
SCRIPT_DIR = Path(__file__).parent
|
|
for d in [SCRIPT_DIR / "state", SCRIPT_DIR / "logs"]:
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
|
|
STATE_FILE = SCRIPT_DIR / "state" / "jae-v5-state.json"
|
|
LOG_FILE = SCRIPT_DIR / "logs" / f"jae-v5-{datetime.now().strftime('%Y%m%d')}.log"
|
|
CRM_URL = "https://salesforce.hoaledgeriq.com/rest"
|
|
CRM_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
|
|
|
|
ssl_context = ssl.create_default_context()
|
|
ssl_context.check_hostname = False
|
|
ssl_context.verify_mode = ssl.CERT_NONE
|
|
|
|
def log(msg):
|
|
ts = datetime.now().strftime('%H:%M:%S')
|
|
print(f"[{ts}] {msg}")
|
|
with open(LOG_FILE, 'a') as f:
|
|
f.write(f"[{ts}] {msg}\n")
|
|
|
|
def load_state():
|
|
if STATE_FILE.exists():
|
|
return json.loads(STATE_FILE.read_text())
|
|
return {"processed_ids": [], "last_run": None}
|
|
|
|
def save_state(s):
|
|
STATE_FILE.write_text(json.dumps(s, indent=2))
|
|
|
|
def fetch_all_notes():
|
|
"""Fetch ALL notes from CRM with pagination"""
|
|
all_notes = []
|
|
has_more = True
|
|
end_cursor = None
|
|
|
|
log("Fetching all leads from CRM (with pagination)...")
|
|
|
|
while has_more:
|
|
try:
|
|
url = f"{CRM_URL}/notes?limit=200&order[createdAt]=desc"
|
|
if end_cursor:
|
|
url += f"&after={end_cursor}"
|
|
|
|
req = urllib.request.Request(
|
|
url,
|
|
headers={"Authorization": f"Bearer {CRM_TOKEN}", "Accept": "application/json"}
|
|
)
|
|
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
|
with opener.open(req, timeout=30) as r:
|
|
data = json.loads(r.read().decode())
|
|
notes = data.get('data', {}).get('notes', [])
|
|
all_notes.extend(notes)
|
|
|
|
# Check pagination
|
|
page_info = data.get('pageInfo', {})
|
|
has_more = page_info.get('hasNextPage', False)
|
|
end_cursor = page_info.get('endCursor')
|
|
|
|
log(f" Fetched {len(notes)} leads (total: {len(all_notes)})")
|
|
|
|
if not has_more:
|
|
break
|
|
|
|
except Exception as e:
|
|
log(f"Fetch error: {e}")
|
|
break
|
|
|
|
log(f"Total leads fetched: {len(all_notes)}")
|
|
return all_notes
|
|
|
|
def get_existing_temp(note):
|
|
"""Extract existing temperature from note"""
|
|
# Check temp field first
|
|
temp = note.get('temp', 'COLD')
|
|
if temp and temp.upper() in ['HOT', 'WARM', 'COLD']:
|
|
return temp.upper()
|
|
|
|
# Fallback to title
|
|
title = note.get('title', '').upper()
|
|
if title.startswith('HOT:'):
|
|
return 'HOT'
|
|
if title.startswith('WARM:'):
|
|
return 'WARM'
|
|
if title.startswith('COLD:'):
|
|
return 'COLD'
|
|
|
|
return 'COLD'
|
|
|
|
def extract_url_from_note(note):
|
|
"""Extract URL from note body or title"""
|
|
title = note.get('title', '')
|
|
bodyV2 = note.get('bodyV2', {})
|
|
|
|
# Try to extract from bodyV2 markdown
|
|
markdown = bodyV2.get('markdown', '') if isinstance(bodyV2, dict) else ''
|
|
|
|
# Search in markdown for URLs
|
|
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
|
|
matches = re.findall(url_pattern, markdown)
|
|
if matches:
|
|
return matches[0].rstrip('.,;:')
|
|
|
|
# Try title pattern: "COLD: domain.com"
|
|
domain_match = re.search(r'(?:HOT|WARM|COLD):\s*([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', title, re.IGNORECASE)
|
|
if domain_match:
|
|
return f"https://{domain_match.group(1)}"
|
|
|
|
return None
|
|
|
|
def search_budget_on_site(base_url):
|
|
"""
|
|
Search website for budget PDF
|
|
Returns: (found_budget: bool, unit_count: int|None, details: str)
|
|
"""
|
|
log(f" 🔍 Searching: {base_url}")
|
|
|
|
try:
|
|
req = urllib.request.Request(
|
|
base_url if not base_url.endswith('/') else base_url,
|
|
headers={'User-Agent': 'Mozilla/5.0 (compatible; JAE-Bot/1.0)'}
|
|
)
|
|
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
|
with opener.open(req, timeout=15) as r:
|
|
content = r.read().decode('utf-8', errors='ignore')
|
|
|
|
found_budget = False
|
|
unit_count = None
|
|
details = []
|
|
|
|
# Look for budget PDFs
|
|
pdf_patterns = ['budget', 'financial', 'reserve', 'statement']
|
|
for pattern in pdf_patterns:
|
|
if pattern in content.lower():
|
|
pdf_match = re.search(rf'href="([^"]*{pattern}[^"]*\.pdf)"', content, re.IGNORECASE)
|
|
if pdf_match:
|
|
found_budget = True
|
|
details.append(f"Found budget PDF: {pdf_match.group(1)}")
|
|
log(f" ✅ Budget PDF found: {pdf_match.group(1)}")
|
|
break
|
|
|
|
# If no direct PDF link, check for budget mentions
|
|
if not found_budget and 'budget' in content.lower():
|
|
found_budget = True
|
|
details.append("Budget mentioned on page")
|
|
log(f" ✅ Budget found (mentioned)")
|
|
|
|
# Look for unit count patterns
|
|
unit_patterns = [
|
|
r'(\d{1,4})\s*(?:homes|units|lots|properties|residences)',
|
|
r'(\d{1,4})\s*-?\s*(?:home|unit|lot|property|residence)\s*(?:community|association|complex)',
|
|
r'community\s*of\s*(\d{1,4})',
|
|
r'(\d{1,4})\s*home\s*owners',
|
|
]
|
|
|
|
for pattern in unit_patterns:
|
|
match = re.search(pattern, content, re.IGNORECASE)
|
|
if match:
|
|
try:
|
|
unit_count = int(match.group(1))
|
|
if 10 <= unit_count <= 5000: # Reasonable range
|
|
details.append(f"Unit count: {unit_count}")
|
|
log(f" 📊 Found unit count: {unit_count}")
|
|
break
|
|
except:
|
|
pass
|
|
|
|
if not details:
|
|
details.append("No budget found")
|
|
|
|
return found_budget, unit_count, "; ".join(details)
|
|
|
|
except Exception as e:
|
|
log(f" ⚠️ Site access issue: {str(e)[:100]}")
|
|
return False, None, f"Site access error: {str(e)[:100]}"
|
|
|
|
def elevate_temp(current_temp, levels):
|
|
"""Elevate temperature by N levels"""
|
|
temp_order = ['COLD', 'WARM', 'HOT']
|
|
try:
|
|
current_idx = temp_order.index(current_temp)
|
|
except ValueError:
|
|
current_idx = 0
|
|
|
|
new_idx = min(current_idx + levels, len(temp_order) - 1)
|
|
return temp_order[new_idx]
|
|
|
|
def update_note_with_research(note, new_temp, unit_count, research_notes):
|
|
"""Update note with research findings using CRM API"""
|
|
try:
|
|
note_id = note.get('id')
|
|
current_title = note.get('title', '')
|
|
bodyV2 = note.get('bodyV2', {})
|
|
|
|
# Get existing markdown
|
|
markdown = bodyV2.get('markdown', '') if isinstance(bodyV2, dict) else ''
|
|
blocknote = bodyV2.get('blocknote', '') if isinstance(bodyV2, dict) else ''
|
|
|
|
# Remove old temperature prefix from title
|
|
clean_title = re.sub(r'^(HOT|WARM|COLD):\s*', '', current_title)
|
|
new_title = f"{new_temp}: {clean_title}"
|
|
|
|
# Add research to markdown
|
|
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M')
|
|
research_section = f"\n\n---\n**JAE v5 Research ({timestamp}):** {research_notes}"
|
|
if unit_count:
|
|
research_section += f"\n**Units:** {unit_count}"
|
|
|
|
new_markdown = markdown + research_section
|
|
|
|
# Keep existing blocknote structure, just update markdown
|
|
new_bodyV2 = {
|
|
"blocknote": blocknote,
|
|
"markdown": new_markdown
|
|
}
|
|
|
|
# Prepare patch data - only update what's needed
|
|
patch_data = json.dumps({
|
|
"title": new_title,
|
|
"temp": new_temp,
|
|
"bodyV2": new_bodyV2
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(
|
|
f"{CRM_URL}/notes/{note_id}",
|
|
data=patch_data,
|
|
headers={
|
|
"Authorization": f"Bearer {CRM_TOKEN}",
|
|
"Content-Type": "application/json"
|
|
},
|
|
method='PATCH'
|
|
)
|
|
|
|
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
|
with opener.open(req, timeout=20) as r:
|
|
log(f" ✅ Note updated: {new_title}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
log(f" ✗ Update error: {e}")
|
|
return False
|
|
|
|
def create_opportunity(note, temp):
|
|
"""Create opportunity for HOT/WARM leads"""
|
|
try:
|
|
person_id = note.get('personId')
|
|
if not person_id:
|
|
log(f" ⚠️ Skip upgrade: No person ID")
|
|
return False
|
|
|
|
opp_name = f"Lead: {note.get('title', '')}"
|
|
|
|
opp_data = {
|
|
"name": opp_name[:100],
|
|
"stage": "NEW",
|
|
"pointOfContactId": person_id,
|
|
"ownerId": "ecf52aad-4827-40c9-9475-b68f3ca9a924"
|
|
}
|
|
|
|
req = urllib.request.Request(
|
|
f"{CRM_URL}/opportunities",
|
|
data=json.dumps(opp_data).encode(),
|
|
headers={"Authorization": f"Bearer {CRM_TOKEN}", "Content-Type": "application/json"}
|
|
)
|
|
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
|
with opener.open(req, timeout=20) as r:
|
|
opp = json.loads(r.read().decode())
|
|
log(f" ✅ UPGRADED to Opportunity: {opp.get('id', 'N/A')}")
|
|
return True
|
|
except Exception as e:
|
|
log(f" ✗ Create opp error: {e}")
|
|
return False
|
|
|
|
def main():
|
|
log("=" * 60)
|
|
log("JAE v5.1 Starting - Website & Budget Research")
|
|
log("=" * 60)
|
|
|
|
state = load_state()
|
|
processed_ids = set(state.get('processed_ids', []))
|
|
|
|
notes = fetch_all_notes()
|
|
|
|
# Filter to unprocessed only
|
|
unprocessed = [n for n in notes if n.get('id') not in processed_ids]
|
|
log(f"\nTotal leads in CRM: {len(notes)}")
|
|
log(f"Already processed: {len(processed_ids)}")
|
|
log(f"New leads to process: {len(unprocessed)}")
|
|
log("=" * 60)
|
|
|
|
if not unprocessed:
|
|
log("✅ No new leads to process")
|
|
return
|
|
|
|
upgraded = 0
|
|
processed_count = 0
|
|
|
|
for i, note in enumerate(unprocessed, 1):
|
|
note_id = note.get('id')
|
|
title = note.get('title', '')
|
|
|
|
log(f"\n[{i}/{len(unprocessed)}] Processing: {title[:60]}...")
|
|
|
|
# Get existing temperature
|
|
current_temp = get_existing_temp(note)
|
|
log(f" Current temp: {current_temp}")
|
|
|
|
# Extract URL
|
|
url = extract_url_from_note(note)
|
|
|
|
if not url:
|
|
log(f" ⚠️ No website found - keeping {current_temp}")
|
|
processed_count += 1
|
|
processed_ids.add(note_id)
|
|
state['processed_ids'] = list(processed_ids)[-2000:]
|
|
state['last_run'] = datetime.now().isoformat()
|
|
save_state(state)
|
|
continue
|
|
|
|
log(f" 🌐 Website found: {url}")
|
|
|
|
# Research website
|
|
found_budget, unit_count, details = search_budget_on_site(url)
|
|
|
|
# Calculate elevation
|
|
if found_budget:
|
|
elevation = 2
|
|
reason = "Budget PDF found"
|
|
else:
|
|
elevation = 1
|
|
reason = "Website exists, no budget"
|
|
|
|
new_temp = elevate_temp(current_temp, elevation)
|
|
log(f" 📈 Elevating: {current_temp} → {new_temp} ({reason})")
|
|
|
|
# Update note
|
|
update_note_with_research(note, new_temp, unit_count, details)
|
|
|
|
# Create opportunity if HOT or WARM
|
|
if new_temp in ['HOT', 'WARM']:
|
|
if create_opportunity(note, new_temp):
|
|
upgraded += 1
|
|
else:
|
|
log(f" Keeping as COLD")
|
|
|
|
# Save state
|
|
processed_count += 1
|
|
processed_ids.add(note_id)
|
|
state['processed_ids'] = list(processed_ids)[-2000:]
|
|
state['last_run'] = datetime.now().isoformat()
|
|
save_state(state)
|
|
|
|
# Pace: 90 seconds between leads (gentle, no rate limits)
|
|
log(f" ⏳ Waiting 90s before next lead...")
|
|
time.sleep(5)
|
|
|
|
log("\n" + "=" * 60)
|
|
log(f"JAE v5 Complete: {processed_count} processed, {upgraded} upgraded")
|
|
log("=" * 60)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|