feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base
- Created chatwoot-agent-bot/ with Node.js webhook server - Bot detects intent (greeting, billing, technical, features, account) - Auto-responds from FAQ knowledge base or escalates to human - FAQ-KB.md: Living knowledge base that grows with customer questions - CHATWOOT-SETUP.md: Complete deployment and configuration guide - Supports Telegram notifications on escalation - Bot runs on port 3001, ready for Chatwoot webhook integration
This commit is contained in:
241
agents/sales-prospector/prospector-v2.py
Normal file
241
agents/sales-prospector/prospector-v2.py
Normal file
@@ -0,0 +1,241 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Sales Prospector v2 - Intelligent HOA Lead Generation
|
||||
Searches for HOA websites, crawls for contact info, extracts board/mgmt contacts
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from pathlib import Path
|
||||
|
||||
# Config
|
||||
SCRIPT_DIR = Path(__file__).parent.absolute()
|
||||
STATE_DIR = SCRIPT_DIR / "state"
|
||||
LOG_DIR = SCRIPT_DIR / "logs"
|
||||
LEADS_DIR = SCRIPT_DIR / "leads"
|
||||
|
||||
for d in [STATE_DIR, LOG_DIR, LEADS_DIR]:
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
STATE_FILE = STATE_DIR / "prospector-v2-state.json"
|
||||
LOG_FILE = LOG_DIR / f"prospector-v2-{datetime.now().strftime('%Y%m%d')}.log"
|
||||
|
||||
METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ"]
|
||||
|
||||
# Search config
|
||||
SEARCHES_PER_METRO = [
|
||||
'{metro} HOA "board of directors"',
|
||||
'{metro} homeowners association contact',
|
||||
'{metro} HOA management company',
|
||||
'{metro} HOA board members',
|
||||
'{metro} community association management',
|
||||
]
|
||||
|
||||
# Keywords for validating HOA sites
|
||||
HOA_KEYWORDS = ['hoa', 'homeowners', 'association', 'board', 'community', 'management', 'condo', 'townhome']
|
||||
|
||||
# CRM Config
|
||||
TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
|
||||
TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest"
|
||||
|
||||
def log(msg):
|
||||
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
line = f"[{ts}] {msg}"
|
||||
print(line)
|
||||
with open(LOG_FILE, 'a') as f:
|
||||
f.write(line + '\n')
|
||||
|
||||
def load_state():
|
||||
if STATE_FILE.exists():
|
||||
with open(STATE_FILE) as f:
|
||||
return json.load(f)
|
||||
return {
|
||||
"metro_index": 0,
|
||||
"search_index": 0,
|
||||
"processed_domains": [],
|
||||
"leads_found": 0,
|
||||
"domains_queue": [], # Domains found but not yet crawled
|
||||
"current_domain": None,
|
||||
"cycle_count": 0
|
||||
}
|
||||
|
||||
def save_state(state):
|
||||
with open(STATE_FILE, 'w') as f:
|
||||
json.dump(state, f, indent=2)
|
||||
|
||||
def get_throttle_delay():
|
||||
"""Returns delay in seconds based on business hours"""
|
||||
hour = datetime.now().hour
|
||||
if 9 <= hour < 18:
|
||||
return 120 # 2 min business hours
|
||||
return 60 # 1 min overnight
|
||||
|
||||
def extract_domain(url):
|
||||
"""Extract clean domain from URL"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.lower()
|
||||
if domain.startswith('www.'):
|
||||
domain = domain[4:]
|
||||
return domain
|
||||
except:
|
||||
return None
|
||||
|
||||
def is_hoa_domain(domain):
|
||||
"""Check if domain looks like an HOA site"""
|
||||
if not domain:
|
||||
return False
|
||||
domain_lower = domain.lower()
|
||||
return any(kw in domain_lower for kw in HOA_KEYWORDS)
|
||||
|
||||
def search_web(query, count=10):
|
||||
"""Run web search via openclaw web_search tool"""
|
||||
log(f"SEARCH: {query}")
|
||||
try:
|
||||
# Use openclaw CLI for web search
|
||||
result = subprocess.run(
|
||||
['openclaw', 'web-search', query, '--count', str(count)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60
|
||||
)
|
||||
if result.returncode == 0 and result.stdout:
|
||||
# Parse results - look for URLs
|
||||
urls = []
|
||||
for line in result.stdout.split('\n'):
|
||||
if line.startswith('http'):
|
||||
urls.append(line.strip())
|
||||
# Also extract from markdown format
|
||||
url_match = re.search(r'https?://[^\s\)\]\"\']+', line)
|
||||
if url_match:
|
||||
urls.append(url_match.group(0))
|
||||
return list(set(urls))
|
||||
except Exception as e:
|
||||
log(f"Search error: {e}")
|
||||
return []
|
||||
|
||||
def fetch_page(url, max_chars=3000):
|
||||
"""Fetch page content via web_fetch"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['openclaw', 'web-fetch', url, '--max-chars', str(max_chars)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout
|
||||
except Exception as e:
|
||||
log(f"Fetch error for {url}: {e}")
|
||||
return None
|
||||
|
||||
def extract_emails(text):
|
||||
"""Extract email addresses from text"""
|
||||
if not text:
|
||||
return []
|
||||
# Pattern for emails
|
||||
pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
|
||||
emails = re.findall(pattern, text)
|
||||
# Filter out common false positives
|
||||
filtered = [e for e in emails if not any(x in e.lower() for x in ['example.com', 'test.com', 'domain.com', 'email.com'])]
|
||||
return list(set(filtered))
|
||||
|
||||
def extract_phones(text):
|
||||
"""Extract phone numbers from text"""
|
||||
if not text:
|
||||
return []
|
||||
# Various phone patterns
|
||||
patterns = [
|
||||
r'\(?\d{3}\)?[-.\s]\d{3}[-.\s]\d{4}', # (555) 123-4567
|
||||
r'\d{3}[-.\s]\d{3}[-.\s]\d{4}', # 555-123-4567
|
||||
r'\+?1[-.\s]?\(?\d{3}\)?[-.\s]\d{3}[-.\s]\d{4}', # +1 (555) 123-4567
|
||||
]
|
||||
phones = []
|
||||
for pattern in patterns:
|
||||
phones.extend(re.findall(pattern, text))
|
||||
return list(set(phones))
|
||||
|
||||
def extract_names_and_titles(text):
|
||||
"""Extract potential board member names with titles"""
|
||||
if not text:
|
||||
return []
|
||||
|
||||
# Look for patterns like "John Smith, President" or "Board Member: Jane Doe"
|
||||
titles = ['president', 'vice president', 'vp', 'treasurer', 'secretary', 'board member',
|
||||
'director', 'manager', 'community manager', 'property manager']
|
||||
|
||||
results = []
|
||||
lines = text.split('\n')
|
||||
|
||||
for line in lines:
|
||||
line_lower = line.lower()
|
||||
for title in titles:
|
||||
if title in line_lower:
|
||||
# Extract name before/after title
|
||||
# Simple: capture 2-3 capitalized words near the title
|
||||
match = re.search(r'([A-Z][a-z]+\s[A-Z][a-z]+(?:\s[A-Z][a-z]+)?)', line)
|
||||
if match:
|
||||
name = match.group(1)
|
||||
results.append({"name": name, "title": title.title()})
|
||||
|
||||
return results
|
||||
|
||||
def extract_hoa_info(domain, content):
|
||||
"""Extract HOA name and details from content"""
|
||||
info = {
|
||||
"name": None,
|
||||
"homes": None,
|
||||
"location": None
|
||||
}
|
||||
|
||||
if not content:
|
||||
return info
|
||||
|
||||
# Try to find HOA name from title or first heading
|
||||
lines = content.split('\n')
|
||||
for line in lines[:20]:
|
||||
if '#' in line: # Markdown header
|
||||
clean = line.replace('#', '').strip()
|
||||
if len(clean) > 3:
|
||||
info['name'] = clean
|
||||
break
|
||||
|
||||
# Look for home count patterns
|
||||
home_patterns = [
|
||||
r'(\d+)\s+(?:homes|units|properties|residences|households)',
|
||||
r'(?:over|more than)\s+(\d+)\s+(?:homes|units)',
|
||||
]
|
||||
for pattern in home_patterns:
|
||||
match = re.search(pattern, content, re.IGNORECASE)
|
||||
if match:
|
||||
info['homes'] = match.group(1)
|
||||
break
|
||||
|
||||
return info
|
||||
|
||||
def assess_quality(emails, phones, names, info):
|
||||
"""Assess lead quality based on available data"""
|
||||
score = 0
|
||||
if emails: score += 3
|
||||
if phones: score += 2
|
||||
if names: score += 2
|
||||
if info.get('name'): score += 1
|
||||
if info.get('homes'): score += 2
|
||||
|
||||
if score >= 7:
|
||||
return "HOT"
|
||||
elif score >= 4:
|
||||
return "WARM"
|
||||
return "COLD"
|
||||
|
||||
def push_to_crm(lead):
|
||||
"""Push lead to Twenty CRM"""
|
||||
try:
|
||||
body = f"""## HOA Prospect - {lead['quality']}
|
||||
|
||||
**Name:** {lead.get('hoa_name
|
||||
Reference in New Issue
Block a user