feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base

- Created chatwoot-agent-bot/ with Node.js webhook server
- Bot detects intent (greeting, billing, technical, features, account)
- Auto-responds from FAQ knowledge base or escalates to human
- FAQ-KB.md: Living knowledge base that grows with customer questions
- CHATWOOT-SETUP.md: Complete deployment and configuration guide
- Supports Telegram notifications on escalation
- Bot runs on port 3001, ready for Chatwoot webhook integration
This commit is contained in:
2026-04-01 16:26:05 -04:00
parent 7ba19752de
commit 5319bcd30b
1074 changed files with 456376 additions and 0 deletions

View File

@@ -0,0 +1,178 @@
#!/usr/bin/env python3
"""Sales Prospector v5 - Built-in urllib, finds HOA leads"""
import json
import re
import time
import urllib.request
import urllib.parse
import urllib.error
from datetime import datetime
from pathlib import Path
SCRIPT_DIR = Path(__file__).parent
STATE_DIR = SCRIPT_DIR / "state"
LOG_DIR = SCRIPT_DIR / "logs"
LEADS_DIR = SCRIPT_DIR / "leads"
for d in [STATE_DIR, LOG_DIR, LEADS_DIR]:
d.mkdir(parents=True, exist_ok=True)
STATE_FILE = STATE_DIR / "prospector-v5-state.json"
LOG_FILE = LOG_DIR / f"prospector-v5-{datetime.now().strftime('%Y%m%d')}.log"
METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ", "Austin TX", "Denver CO",
"Nashville TN", "Raleigh NC", "Tampa FL", "Dallas TX", "Houston TX", "Miami FL"]
BRAVE_KEY = "BSACPtwjz5lrsXC10pwjFVqzFGN2gr4"
TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest"
def log(msg):
ts = datetime.now().strftime('%H:%M:%S')
line = f"[{ts}] {msg}"
print(line)
with open(LOG_FILE, 'a') as f:
f.write(line + '\n')
def load_state():
if STATE_FILE.exists():
return json.loads(STATE_FILE.read_text())
return {"metro_idx": 0, "domains": [], "leads": 0, "cycle": 0}
def save_state(s):
STATE_FILE.write_text(json.dumps(s, indent=2))
def search_brave(q, count=8):
log(f"SEARCH: {q}")
url = f"https://api.search.brave.com/res/v1/web/search?{urllib.parse.urlencode({'q': q, 'count': count})}"
req = urllib.request.Request(url, headers={"X-Subscription-Token": BRAVE_KEY, "Accept": "application/json"})
try:
with urllib.request.urlopen(req, timeout=30) as r:
data = json.loads(r.read().decode())
urls = [x.get('url') for x in data.get('web', {}).get('results', [])]
log(f" -> {len(urls)} URLs")
return urls
except Exception as e:
log(f" -> Error: {e}")
return []
def fetch(url):
log(f"FETCH: {url[:50]}...")
try:
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
with urllib.request.urlopen(req, timeout=10) as r:
html = r.read().decode('utf-8', errors='ignore')
text = re.sub(r'<script.*?</script>', '', html, flags=re.DOTALL)
text = re.sub(r'<style.*?</style>', '', text, flags=re.DOTALL)
text = re.sub(r'<[^>]+>', ' ', text)
return re.sub(r'\s+', ' ', text)[:2500]
except Exception as e:
log(f" -> {e}")
return ""
def extract_domain(url):
try:
from urllib.parse import urlparse
d = urlparse(url).netloc.lower()
return d[4:] if d.startswith('www.') else d
except:
return None
def is_hoa(d):
if not d:
return False
dl = d.lower()
good = ['hoa', 'homeowners', 'association', 'community', 'condo', 'village', 'creek']
bad = ['google', 'facebook', 'yelp', 'bbb', 'wiki', 'reddit', 'linkedin', 'blog']
return any(k in dl for k in good) and not any(b in dl for b in bad)
def extract_emails(t):
if not t:
return []
ems = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b', t)
bad = ['example', 'test', 'noreply', 'info@', 'support@', 'admin@', '@gmail.com']
return list(set([e.lower() for e in ems if len(e) > 12 and not any(b in e for b in bad)]))[:3]
def extract_phones(t):
if not t:
return []
phones = re.findall(r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', t)
return list(set([re.sub(r'[^\d]', '', p) for p in phones]))[:2]
def save_lead(lead):
f = LEADS_DIR / f"{lead['domain'].replace('/', '_')}.json"
f.write_text(json.dumps(lead, indent=2))
log(f"SAVED: {lead['domain']}")
def push_crm(lead):
try:
body = f"## {lead['quality']} Lead\n\n**HOA:** {lead['name']}\n**Metro:** {lead['metro']}\n**Site:** {lead['url']}\n**Emails:** {', '.join(lead['emails'])}\n**Phones:** {', '.join(lead['phones'])}"
note = json.dumps({"title": f"{lead['quality']}: {lead['domain']}", "bodyV2": {"markdown": body}})
data = note.encode('utf-8')
req = urllib.request.Request(f"{TWENTY_BASE}/notes",
headers={"Authorization": f"Bearer {TWENTY_TOKEN}", "Content-Type": "application/json"},
data=data, method='POST')
with urllib.request.urlopen(req, timeout=10) as r:
log(f"CRM: {lead['domain']} ({r.status})")
return r.status in [200, 201]
except Exception as e:
log(f"CRM error: {e}")
return False
def main():
log("=== Prospector v5 Started ===")
s = load_state()
queries = ["{metro} HOA contact email", "{metro} homeowners association",
"{metro} HOA management", "{metro} community association board"]
while True:
s['cycle'] += 1
metro = METROS[s['metro_idx'] % len(METROS)]
log(f"CYCLE {s['cycle']}: {metro}")
start = time.time()
found = 0
for tmpl in queries:
if s['leads'] >= 25:
break
q = tmpl.format(metro=metro)
urls = search_brave(q, 8)
for url in urls[:5]:
dom = extract_domain(url)
if not dom or not is_hoa(dom) or dom in s['domains']:
continue
s['domains'].append(dom)
content = fetch(url)
if not content:
continue
emails = extract_emails(content)
phones = extract_phones(content)
if emails or phones:
name = dom.split('.')[0].replace('-', ' ').title() + " HOA"
qual = "HOT" if len(emails) >= 2 else "WARM" if emails else "COLD"
lead = {'name': name, 'metro': metro, 'url': url, 'domain': dom,
'emails': emails, 'phones': phones, 'quality': qual,
'found': datetime.now().isoformat()}
save_lead(lead)
push_crm(lead)
s['leads'] += 1
found += 1
log(f"LEAD {s['leads']}: {name} ({qual}) - {len(emails)} emails")
if s['leads'] >= 25:
log(f"TARGET REACHED: {s['leads']} leads!")
break
s['metro_idx'] = (s['metro_idx'] + 1) % len(METROS)
save_state(s)
elapsed = time.time() - start
log(f"Done: {found} leads, {s['leads']} total, {elapsed:.1f}s")
if found == 0:
time.sleep(20)
if __name__ == "__main__":
main()