- Created chatwoot-agent-bot/ with Node.js webhook server - Bot detects intent (greeting, billing, technical, features, account) - Auto-responds from FAQ knowledge base or escalates to human - FAQ-KB.md: Living knowledge base that grows with customer questions - CHATWOOT-SETUP.md: Complete deployment and configuration guide - Supports Telegram notifications on escalation - Bot runs on port 3001, ready for Chatwoot webhook integration
132 lines
5.9 KiB
Python
132 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Sales Prospector v7 - Less aggressive filtering"""
|
|
import json, re, time, urllib.request, urllib.parse
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
SCRIPT_DIR = Path(__file__).parent
|
|
STATE_DIR, LOG_DIR, LEADS_DIR = SCRIPT_DIR / "state", SCRIPT_DIR / "logs", SCRIPT_DIR / "leads"
|
|
for d in [STATE_DIR, LOG_DIR, LEADS_DIR]: d.mkdir(parents=True, exist_ok=True)
|
|
|
|
STATE_FILE = STATE_DIR / "prospector-v7-state.json"
|
|
LOG_FILE = LOG_DIR / f"prospector-v7-{datetime.now().strftime('%Y%m%d')}.log"
|
|
|
|
METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ", "Austin TX", "Denver CO",
|
|
"Nashville TN", "Raleigh NC", "Tampa FL", "Dallas TX", "Houston TX", "Miami FL"]
|
|
|
|
BRAVE_KEY = "BSACPtwjz5lrsXC10pwjFVqzFGN2gr4"
|
|
TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
|
|
TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest"
|
|
LAST_SEARCH = 0
|
|
|
|
def log(msg):
|
|
ts = datetime.now().strftime('%H:%M:%S')
|
|
print(f"[{ts}] {msg}")
|
|
with open(LOG_FILE, 'a') as f: f.write(f"[{ts}] {msg}\n")
|
|
|
|
def rate_limited_sleep():
|
|
global LAST_SEARCH
|
|
elapsed = time.time() - LAST_SEARCH
|
|
if elapsed < 1.5: time.sleep(1.5 - elapsed)
|
|
LAST_SEARCH = time.time()
|
|
|
|
def load_state():
|
|
if STATE_FILE.exists(): return json.loads(STATE_FILE.read_text())
|
|
return {"metro_idx": 0, "domains": [], "leads": 0, "cycle": 0}
|
|
|
|
def save_state(s): STATE_FILE.write_text(json.dumps(s, indent=2))
|
|
|
|
def search_brave(query, count=10):
|
|
rate_limited_sleep()
|
|
log(f"SEARCH: {query}")
|
|
try:
|
|
url = f"https://api.search.brave.com/res/v1/web/search?q={urllib.parse.quote(query)}&count={count}"
|
|
r = urllib.request.urlopen(urllib.request.Request(url, headers={"X-Subscription-Token": BRAVE_KEY, "Accept": "application/json"}), timeout=30)
|
|
urls = [x.get('url') for x in json.loads(r.read().decode()).get('web', {}).get('results', []) if x.get('url')]
|
|
log(f" -> {len(urls)} URLs")
|
|
return urls
|
|
except Exception as e:
|
|
log(f" -> Error: {e}")
|
|
return []
|
|
|
|
def fetch_page(url):
|
|
try:
|
|
r = urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}), timeout=10)
|
|
html = r.read().decode('utf-8', errors='ignore')
|
|
text = re.sub(r'<script[^>]*>.*?</script>', '', html, flags=re.DOTALL | re.IGNORECASE)
|
|
text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
|
|
text = re.sub(r'<[^>]+>', ' ', text)
|
|
return re.sub(r'\s+', ' ', text)[:3000]
|
|
except: return ""
|
|
|
|
def extract_domain(url):
|
|
try:
|
|
d = urllib.parse.urlparse(url).netloc.lower()
|
|
return d[4:] if d.startswith('www.') else d
|
|
except: return None
|
|
|
|
def is_hoa(d):
|
|
if not d: return False
|
|
dl = d.lower()
|
|
good = ['hoa', 'homeowners', 'association', 'community', 'condo', 'village', 'creek', 'estates']
|
|
bad = ['google', 'facebook', 'yelp', 'bbb', 'wiki', 'reddit', 'linkedin', 'blog', 'news']
|
|
return any(k in dl for k in good) and not any(b in dl for b in bad)
|
|
|
|
def extract_emails(text):
|
|
if not text: return []
|
|
pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b'
|
|
ems = re.findall(pattern, text)
|
|
# Only filter obvious spam
|
|
bad = ['example.com', 'test.com', 'noreply@', 'no-reply@']
|
|
filtered = [e for e in ems if len(e) > 8 and not any(b in e for b in bad)]
|
|
return list(set(filtered))[:5]
|
|
|
|
def save_lead(lead):
|
|
f = LEADS_DIR / f"{lead['domain'].replace('/', '_')}.json"
|
|
f.write_text(json.dumps(lead, indent=2))
|
|
log(f"SAVED: {lead['domain']}")
|
|
|
|
def push_crm(lead):
|
|
try:
|
|
note = {"title": f"{lead['quality']}: {lead['domain']}", "bodyV2": {"markdown": f"## {lead['quality']} Lead\n\n**HOA:** {lead['name']}\n**Metro:** {lead['metro']}\n**Site:** {lead['url']}\n**Emails:** {', '.join(lead['emails'])}"}}
|
|
urllib.request.urlopen(urllib.request.Request(f"{TWENTY_BASE}/notes", headers={"Authorization": f"Bearer {TWENTY_TOKEN}", "Content-Type": "application/json"}, data=json.dumps(note).encode(), method='POST'), timeout=10)
|
|
log(f"CRM: {lead['domain']}")
|
|
return True
|
|
except: return False
|
|
|
|
def main():
|
|
log("=== Prospector v7 Started ===")
|
|
s = load_state()
|
|
queries = ["{metro} HOA contact email", "{metro} homeowners association", "{metro} HOA management"]
|
|
|
|
while True:
|
|
s['cycle'] += 1
|
|
metro = METROS[s['metro_idx'] % len(METROS)]
|
|
log(f"CYCLE {s['cycle']}: {metro}")
|
|
start, found = time.time(), 0
|
|
|
|
for tmpl in queries:
|
|
if s['leads'] >= 25: break
|
|
for url in search_brave(tmpl.format(metro=metro), 8)[:5]:
|
|
if s['leads'] >= 25: break
|
|
dom = extract_domain(url)
|
|
if not dom or not is_hoa(dom) or dom in s['domains']: continue
|
|
s['domains'].append(dom)
|
|
text = fetch_page(url)
|
|
if text:
|
|
emails = extract_emails(text)
|
|
if emails:
|
|
name = dom.split('.')[0].replace('-', ' ').title() + " HOA"
|
|
lead = {'name': name, 'metro': metro, 'url': url, 'domain': dom, 'emails': emails, 'quality': "WARM", 'found': datetime.now().isoformat()}
|
|
save_lead(lead)
|
|
push_crm(lead)
|
|
s['leads'], found = s['leads'] + 1, found + 1
|
|
log(f"LEAD {s['leads']}: {name} ({len(emails)} emails)")
|
|
|
|
s['metro_idx'] = (s['metro_idx'] + 1) % len(METROS)
|
|
save_state(s)
|
|
log(f"Done: {found} leads, {s['leads']} total, {time.time()-start:.1f}s")
|
|
if found == 0: time.sleep(20)
|
|
|
|
if __name__ == "__main__": main()
|