feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base
- Created chatwoot-agent-bot/ with Node.js webhook server - Bot detects intent (greeting, billing, technical, features, account) - Auto-responds from FAQ knowledge base or escalates to human - FAQ-KB.md: Living knowledge base that grows with customer questions - CHATWOOT-SETUP.md: Complete deployment and configuration guide - Supports Telegram notifications on escalation - Bot runs on port 3001, ready for Chatwoot webhook integration
This commit is contained in:
233
agents/sales-prospector/prospector-v3.py
Executable file
233
agents/sales-prospector/prospector-v3.py
Executable file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Sales Prospector v3 - Working HOA Lead Generation
|
||||
Actually searches, extracts, and pushes real leads to CRM
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Config
|
||||
SCRIPT_DIR = Path(__file__).parent.absolute()
|
||||
STATE_DIR = SCRIPT_DIR / "state"
|
||||
LOG_DIR = SCRIPT_DIR / "logs"
|
||||
LEADS_DIR = SCRIPT_DIR / "leads"
|
||||
for d in [STATE_DIR, LOG_DIR, LEADS_DIR]:
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
STATE_FILE = STATE_DIR / "prospector-v3-state.json"
|
||||
LOG_FILE = LOG_DIR / f"prospector-v3-{datetime.now().strftime('%Y%m%d')}.log"
|
||||
|
||||
METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ"]
|
||||
|
||||
# Extended search queries for better coverage
|
||||
SEARCHES_PER_METRO = [
|
||||
'{metro} HOA contact email',
|
||||
'{metro} homeowners association website',
|
||||
'{metro} HOA management contact',
|
||||
'{metro} community association board',
|
||||
'{metro} condo association contact',
|
||||
]
|
||||
|
||||
# CRM Config
|
||||
TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzMzMjg0NDMsImV4cCI6MTgwNDc4MTY0MiwianRpIjoiMjBmMTJjOTAtNGQwNy00YmY2LWIzOTctNmM2ZTczOWYxOGM4In0.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
|
||||
TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest"
|
||||
|
||||
def log(msg):
|
||||
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
line = f"[{ts}] {msg}"
|
||||
print(line)
|
||||
with open(LOG_FILE, 'a') as f:
|
||||
f.write(line + '\n')
|
||||
|
||||
def load_state():
|
||||
if STATE_FILE.exists():
|
||||
with open(STATE_FILE) as f:
|
||||
return json.load(f)
|
||||
return {
|
||||
"metro_index": 0,
|
||||
"processed_domains": [],
|
||||
"leads_found": 0,
|
||||
"cycle_count": 0
|
||||
}
|
||||
|
||||
def save_state(state):
|
||||
with open(STATE_FILE, 'w') as f:
|
||||
json.dump(state, f, indent=2)
|
||||
|
||||
def extract_domain(url):
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.lower()
|
||||
if domain.startswith('www.'):
|
||||
domain = domain[4:]
|
||||
return domain
|
||||
except:
|
||||
return None
|
||||
|
||||
def is_hoa_domain(domain):
|
||||
if not domain:
|
||||
return False
|
||||
domain_lower = domain.lower()
|
||||
hoa_keywords = ['hoa', 'homeowners', 'association', 'community', 'condo', 'village', 'creek', 'farms', 'estates']
|
||||
return any(kw in domain_lower for kw in hoa_keywords)
|
||||
|
||||
def search_web(query, count=5):
|
||||
log(f"SEARCH: {query}")
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['openclaw', 'web-search', query, '--count', str(count)],
|
||||
capture_output=True, text=True, timeout=60
|
||||
)
|
||||
if result.returncode == 0 and result.stdout:
|
||||
urls = []
|
||||
for line in result.stdout.split('\n'):
|
||||
if line.strip().startswith('http'):
|
||||
urls.append(line.strip())
|
||||
# Filter to HOA domains
|
||||
seen = set()
|
||||
unique = []
|
||||
for url in urls:
|
||||
dom = extract_domain(url)
|
||||
if dom and dom not in seen and is_hoa_domain(dom):
|
||||
seen.add(dom)
|
||||
unique.append(url)
|
||||
return unique
|
||||
except Exception as e:
|
||||
log(f"Search error: {e}")
|
||||
return []
|
||||
|
||||
def fetch_page(url, max_chars=1500):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['openclaw', 'web-fetch', url, '--max-chars', str(max_chars)],
|
||||
capture_output=True, text=True, timeout=30
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout
|
||||
except Exception as e:
|
||||
log(f"Fetch error: {e}")
|
||||
return None
|
||||
|
||||
def extract_emails(text):
|
||||
if not text:
|
||||
return []
|
||||
pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
|
||||
emails = re.findall(pattern, text)
|
||||
bad = ['example', 'test', 'domain', 'email', 'noreply', 'no-reply', '@gmail.com', '@yahoo.com', '@hotmail.com']
|
||||
filtered = [e.lower() for e in emails if not any(b in e.lower() for b in bad)]
|
||||
return list(set(filtered))[:3]
|
||||
|
||||
def extract_phones(text):
|
||||
if not text:
|
||||
return []
|
||||
pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
|
||||
phones = re.findall(pattern, text)
|
||||
return list(set(phones))[:2]
|
||||
|
||||
def extract_hoa_name(content, domain):
|
||||
if not content:
|
||||
return domain.replace('-', ' ').title()
|
||||
# Look for title
|
||||
match = re.search(r'#\s*(.+)', content)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
# Look for HOA name pattern
|
||||
match = re.search(r'([A-Z][A-Za-z\s]+(?:HOA|Homeowners|Community|Association))', content)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return domain.replace('-', ' ').title()
|
||||
|
||||
def assess_quality(emails, phones):
|
||||
score = 0
|
||||
if emails:
|
||||
score += len(emails) * 3
|
||||
if phones:
|
||||
score += len(phones) * 2
|
||||
if score >= 7:
|
||||
return "HOT"
|
||||
elif score >= 4:
|
||||
return "WARM"
|
||||
return "COLD"
|
||||
|
||||
def push_to_crm(lead):
|
||||
try:
|
||||
body = f"""## 🎯 HOA Prospect - {lead['quality']}
|
||||
|
||||
**HOA Name:** {lead.get('hoa_name', 'Unknown')}
|
||||
**Metro:** {lead['metro']}
|
||||
**Website:** {lead['url']}
|
||||
**Domain:** {lead['domain']}
|
||||
"""
|
||||
if lead.get('emails'):
|
||||
body += f"**Email(s):** {', '.join(lead['emails'])}\n"
|
||||
if lead.get('phones'):
|
||||
body += f"**Phone(s):** {', '.join(lead['phones'])}\n"
|
||||
|
||||
body += f"\n**Source:** Prospector v3\n**Found:** {datetime.now().strftime('%Y-%m-%d %H:%M')}"
|
||||
|
||||
esc_body = json.dumps(body)
|
||||
note_data = f'{{"title":"🎯 {lead["quality"]}: {lead["hoa_name"]} | {lead["metro"]}","bodyV2":{{"markdown":{esc_body}}}}}'
|
||||
|
||||
curl_cmd = [
|
||||
'curl', '-s', '-X', 'POST',
|
||||
f'{TWENTY_BASE}/notes',
|
||||
'-H', f'Authorization: Bearer {TWENTY_TOKEN}',
|
||||
'-H', 'Content-Type: application/json',
|
||||
'-d', note_data
|
||||
]
|
||||
|
||||
result = subprocess.run(curl_cmd, capture_output=True, text=True, timeout=10)
|
||||
if result.returncode == 0:
|
||||
log(f"CRM PUSH: {lead['hoa_name']} ({lead['quality']})")
|
||||
return True
|
||||
else:
|
||||
log(f"CRM FAIL: {result.stderr[:100]}")
|
||||
except Exception as e:
|
||||
log(f"CRM ERROR: {e}")
|
||||
return False
|
||||
|
||||
def save_lead(lead):
|
||||
lead_file = LEADS_DIR / f"{lead['domain']}.json"
|
||||
with open(lead_file, 'w') as f:
|
||||
json.dump(lead, f, indent=2)
|
||||
|
||||
def main():
|
||||
log("=== Sales Prospector v3 Started ===")
|
||||
state = load_state()
|
||||
cycle = state['cycle_count']
|
||||
|
||||
while True:
|
||||
cycle += 1
|
||||
metro_idx = state['metro_index']
|
||||
metro = METROS[metro_idx]
|
||||
|
||||
log(f"=== CYCLE {cycle}: {metro} ===")
|
||||
|
||||
# Search queries
|
||||
queries = [q.format(metro=metro) for q in SEARCHES_PER_METRO]
|
||||
|
||||
for query_idx, query in enumerate(queries):
|
||||
log(f"QUERY: {query}")
|
||||
urls = search_web(query, count=3)
|
||||
log(f"Found {len(urls)} potential HOA sites")
|
||||
|
||||
for url in urls:
|
||||
domain = extract_domain(url)
|
||||
if domain in state['processed_domains']:
|
||||
log(f"SKIP: Already processed {domain}")
|
||||
continue
|
||||
|
||||
log(f"FETCH: {url}")
|
||||
content = fetch_page(url)
|
||||
if not content:
|
||||
state['processed_domains'].append(domain)
|
||||
save_state(state)
|
||||
continue
|
||||
|
||||
emails = extract_em
|
||||
Reference in New Issue
Block a user