feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base

- Created chatwoot-agent-bot/ with Node.js webhook server
- Bot detects intent (greeting, billing, technical, features, account)
- Auto-responds from FAQ knowledge base or escalates to human
- FAQ-KB.md: Living knowledge base that grows with customer questions
- CHATWOOT-SETUP.md: Complete deployment and configuration guide
- Supports Telegram notifications on escalation
- Bot runs on port 3001, ready for Chatwoot webhook integration
This commit is contained in:
2026-04-01 16:26:05 -04:00
parent 7ba19752de
commit 5319bcd30b
1074 changed files with 456376 additions and 0 deletions

View File

@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""
Sales Prospector v3 - Working HOA Lead Generation
Actually searches, extracts, and pushes real leads to CRM
"""
import json
import os
import re
import time
import subprocess
from datetime import datetime
from pathlib import Path
# Config
SCRIPT_DIR = Path(__file__).parent.absolute()
STATE_DIR = SCRIPT_DIR / "state"
LOG_DIR = SCRIPT_DIR / "logs"
LEADS_DIR = SCRIPT_DIR / "leads"
for d in [STATE_DIR, LOG_DIR, LEADS_DIR]:
d.mkdir(parents=True, exist_ok=True)
STATE_FILE = STATE_DIR / "prospector-v3-state.json"
LOG_FILE = LOG_DIR / f"prospector-v3-{datetime.now().strftime('%Y%m%d')}.log"
METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ"]
# Extended search queries for better coverage
SEARCHES_PER_METRO = [
'{metro} HOA contact email',
'{metro} homeowners association website',
'{metro} HOA management contact',
'{metro} community association board',
'{metro} condo association contact',
]
# CRM Config
TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzMzMjg0NDMsImV4cCI6MTgwNDc4MTY0MiwianRpIjoiMjBmMTJjOTAtNGQwNy00YmY2LWIzOTctNmM2ZTczOWYxOGM4In0.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest"
def log(msg):
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
line = f"[{ts}] {msg}"
print(line)
with open(LOG_FILE, 'a') as f:
f.write(line + '\n')
def load_state():
if STATE_FILE.exists():
with open(STATE_FILE) as f:
return json.load(f)
return {
"metro_index": 0,
"processed_domains": [],
"leads_found": 0,
"cycle_count": 0
}
def save_state(state):
with open(STATE_FILE, 'w') as f:
json.dump(state, f, indent=2)
def extract_domain(url):
try:
from urllib.parse import urlparse
parsed = urlparse(url)
domain = parsed.netloc.lower()
if domain.startswith('www.'):
domain = domain[4:]
return domain
except:
return None
def is_hoa_domain(domain):
if not domain:
return False
domain_lower = domain.lower()
hoa_keywords = ['hoa', 'homeowners', 'association', 'community', 'condo', 'village', 'creek', 'farms', 'estates']
return any(kw in domain_lower for kw in hoa_keywords)
def search_web(query, count=5):
log(f"SEARCH: {query}")
try:
result = subprocess.run(
['openclaw', 'web-search', query, '--count', str(count)],
capture_output=True, text=True, timeout=60
)
if result.returncode == 0 and result.stdout:
urls = []
for line in result.stdout.split('\n'):
if line.strip().startswith('http'):
urls.append(line.strip())
# Filter to HOA domains
seen = set()
unique = []
for url in urls:
dom = extract_domain(url)
if dom and dom not in seen and is_hoa_domain(dom):
seen.add(dom)
unique.append(url)
return unique
except Exception as e:
log(f"Search error: {e}")
return []
def fetch_page(url, max_chars=1500):
try:
result = subprocess.run(
['openclaw', 'web-fetch', url, '--max-chars', str(max_chars)],
capture_output=True, text=True, timeout=30
)
if result.returncode == 0:
return result.stdout
except Exception as e:
log(f"Fetch error: {e}")
return None
def extract_emails(text):
if not text:
return []
pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
emails = re.findall(pattern, text)
bad = ['example', 'test', 'domain', 'email', 'noreply', 'no-reply', '@gmail.com', '@yahoo.com', '@hotmail.com']
filtered = [e.lower() for e in emails if not any(b in e.lower() for b in bad)]
return list(set(filtered))[:3]
def extract_phones(text):
if not text:
return []
pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
phones = re.findall(pattern, text)
return list(set(phones))[:2]
def extract_hoa_name(content, domain):
if not content:
return domain.replace('-', ' ').title()
# Look for title
match = re.search(r'#\s*(.+)', content)
if match:
return match.group(1).strip()
# Look for HOA name pattern
match = re.search(r'([A-Z][A-Za-z\s]+(?:HOA|Homeowners|Community|Association))', content)
if match:
return match.group(1).strip()
return domain.replace('-', ' ').title()
def assess_quality(emails, phones):
score = 0
if emails:
score += len(emails) * 3
if phones:
score += len(phones) * 2
if score >= 7:
return "HOT"
elif score >= 4:
return "WARM"
return "COLD"
def push_to_crm(lead):
try:
body = f"""## 🎯 HOA Prospect - {lead['quality']}
**HOA Name:** {lead.get('hoa_name', 'Unknown')}
**Metro:** {lead['metro']}
**Website:** {lead['url']}
**Domain:** {lead['domain']}
"""
if lead.get('emails'):
body += f"**Email(s):** {', '.join(lead['emails'])}\n"
if lead.get('phones'):
body += f"**Phone(s):** {', '.join(lead['phones'])}\n"
body += f"\n**Source:** Prospector v3\n**Found:** {datetime.now().strftime('%Y-%m-%d %H:%M')}"
esc_body = json.dumps(body)
note_data = f'{{"title":"🎯 {lead["quality"]}: {lead["hoa_name"]} | {lead["metro"]}","bodyV2":{{"markdown":{esc_body}}}}}'
curl_cmd = [
'curl', '-s', '-X', 'POST',
f'{TWENTY_BASE}/notes',
'-H', f'Authorization: Bearer {TWENTY_TOKEN}',
'-H', 'Content-Type: application/json',
'-d', note_data
]
result = subprocess.run(curl_cmd, capture_output=True, text=True, timeout=10)
if result.returncode == 0:
log(f"CRM PUSH: {lead['hoa_name']} ({lead['quality']})")
return True
else:
log(f"CRM FAIL: {result.stderr[:100]}")
except Exception as e:
log(f"CRM ERROR: {e}")
return False
def save_lead(lead):
lead_file = LEADS_DIR / f"{lead['domain']}.json"
with open(lead_file, 'w') as f:
json.dump(lead, f, indent=2)
def main():
log("=== Sales Prospector v3 Started ===")
state = load_state()
cycle = state['cycle_count']
while True:
cycle += 1
metro_idx = state['metro_index']
metro = METROS[metro_idx]
log(f"=== CYCLE {cycle}: {metro} ===")
# Search queries
queries = [q.format(metro=metro) for q in SEARCHES_PER_METRO]
for query_idx, query in enumerate(queries):
log(f"QUERY: {query}")
urls = search_web(query, count=3)
log(f"Found {len(urls)} potential HOA sites")
for url in urls:
domain = extract_domain(url)
if domain in state['processed_domains']:
log(f"SKIP: Already processed {domain}")
continue
log(f"FETCH: {url}")
content = fetch_page(url)
if not content:
state['processed_domains'].append(domain)
save_state(state)
continue
emails = extract_em