Files
HOALedgerIQ_Website/agents/sales-prospector/prospector-v14.py
olsch01 5319bcd30b feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base
- Created chatwoot-agent-bot/ with Node.js webhook server
- Bot detects intent (greeting, billing, technical, features, account)
- Auto-responds from FAQ knowledge base or escalates to human
- FAQ-KB.md: Living knowledge base that grows with customer questions
- CHATWOOT-SETUP.md: Complete deployment and configuration guide
- Supports Telegram notifications on escalation
- Bot runs on port 3001, ready for Chatwoot webhook integration
2026-04-01 16:26:05 -04:00

209 lines
9.9 KiB
Python

#!/usr/bin/env python3
"""Sales Prospector v14 - 50 metros + suburbs"""
import json, re, time, random, urllib.request, urllib.parse
from datetime import datetime
from pathlib import Path
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
for d in [Path(__file__).parent / x for x in ["state", "logs", "leads"]]:
d.mkdir(parents=True, exist_ok=True)
STATE_FILE = Path(__file__).parent / "state" / "prospector-v14-state.json"
LOG_FILE = Path(__file__).parent / "logs" / f"prospector-v14-{datetime.now().strftime('%Y%m%d')}.log"
# TOP 50 METROS + surrounding cities
METROS = [
# Top 20 major metros
("New York NY", ["Manhattan", "Brooklyn", "Queens", "Bronx", "Staten Island", "Jersey City", "Newark"]),
("Los Angeles CA", ["Santa Monica", "Pasadena", "Burbank", "Glendale", "Long Beach", "Anaheim"]),
("Chicago IL", ["Evanston", "Oak Park", "Naperville", "Schaumburg", "Skokie"]),
("Houston TX", ["Sugar Land", "The Woodlands", "Katy", "Pearland", "Baytown"]),
("Phoenix AZ", ["Scottsdale", "Tempe", "Mesa", "Chandler", "Glendale"]),
("Philadelphia PA", ["Camden", "Chester", "Upper Darby"]),
("San Antonio TX", ["New Braunfels", "Schertz", "Cibolo"]),
("San Diego CA", ["Chula Vista", "Oceanside", "Escondido", "Carlsbad"]),
("Dallas TX", ["Fort Worth", "Arlington", "Plano", "Irving", "Frisco", "McKinney"]),
("San Jose CA", ["Sunnyvale", "Santa Clara", "Mountain View", "Palo Alto"]),
("Austin TX", ["Round Rock", "Cedar Park", "Georgetown", "Pflugerville"]),
("Jacksonville FL", ["Orange Park", "St. Augustine", "Ponte Vedra"]),
("Columbus OH", ["Dublin", "Westerville", "Gahanna", "Reynoldsburg"]),
("Charlotte NC", ["Matthews", "Mint Hill", "Huntersville", "Concord", "Gastonia"]),
("Indianapolis IN", ["Carmel", "Fishers", "Noblesville", "Greenwood"]),
("San Francisco CA", ["Oakland", "Berkeley", "Richmond", "Walnut Creek"]),
("Seattle WA", ["Bellevue", "Redmond", "Tacoma", "Kirkland", "Renton"]),
("Denver CO", ["Aurora", "Lakewood", "Thornton", "Westminster", "Boulder"]),
("Oklahoma City OK", ["Edmond", "Norman", "Moore", "Midwest City"]),
("Boston MA", ["Cambridge", "Somerville", "Brookline", "Newton"]),
# Next 30 metros
("Portland OR", ["Beaverton", "Gresham", "Hillsboro", "Lake Oswego"]),
("Las Vegas NV", ["Henderson", "North Las Vegas", "Summerlin"]),
("Nashville TN", ["Franklin", "Brentwood", "Hendersonville", "Murfreesboro"]),
("Detroit MI", ["Warren", "Sterling Heights", "Dearborn", "Livonia"]),
("Oklahoma City OK", ["Edmond", "Norman", "Moore"]),
("Memphis TN", ["Germantown", "Collierville", "Bartlett"]),
("Louisville KY", ["Jeffersonville", "New Albany", "Elizabethtown"]),
("Milwaukee WI", ["Waukesha", "West Allis", "Wauwatosa"]),
("Baltimore MD", ["Columbia", "Ellicott City", "Towson"]),
("Albuquerque NM", ["Rio Rancho", "Santa Fe", "Los Lunas"]),
("Tucson AZ", ["Marana", "Oro Valley", "Sahuarita"]),
("Mesa AZ", ["Gilbert", "Chandler", "Tempe"]),
("Fresno CA", ["Clovis", "Madera", "Sanger"]),
("Atlanta GA", ["Sandy Springs", "Roswell", "Johns Creek", "Alpharetta", "Marietta"]),
("Sacramento CA", ["Elk Grove", "Roseville", "Folsom", "Davis"]),
("Kansas City MO", ["Overland Park", "Olathe", "Independence", "Leawood"]),
("Colorado Springs CO", ["Fountain", "Monument", "Woodland Park"]),
("Raleigh NC", ["Cary", "Apex", "Holly Springs", "Wake Forest"]),
("Omaha NE", ["Bellevue", "Papillion", "La Vista"]),
("Miami FL", ["Miami Beach", "Coral Gables", "Hialeah", "Fort Lauderdale"]),
("Long Beach CA", ["Lakewood", "Signal Hill"]),
("Virginia Beach VA", ["Norfolk", "Chesapeake", "Newport News", "Hampton"]),
("Oakland CA", ["Berkeley", "Alameda", "San Leandro"]),
("Minneapolis MN", ["St. Paul", "Bloomington", "Plymouth", "Edina"]),
("Tulsa OK", ["Broken Arrow", "Bixby", "Jenks"]),
("Tampa FL", ["St. Petersburg", "Clearwater", "Brandon", "Lutz"]),
("Arlington TX", ["Grand Prairie", "Euless", "Bedford"]),
("Wichita KS", ["Overland Park", "Lenexa", "Shawnee"]),
("Bakersfield CA", ["Delano", "Oildale", "Rosedale"]),
("Aurora CO", ["Centennial", "Parker", "Englewood"]),
("Anaheim CA", ["Fullerton", "Orange", "Garden Grove", "Brea"]),
("Santa Ana CA", ["Irvine", "Costa Mesa", "Tustin", "Newport Beach"]),
("Corpus Christi TX", ["Portland", "Kingsville", "Alice"]),
("Riverside CA", ["Moreno Valley", "Corona", "Jurupa Valley", "Norco"]),
("Lexington KY", ["Georgetown", "Richmond", "Winchester"]),
("Stockton CA", ["Lodi", "Tracy", "Manteca"])
]
SEARXNG = "https://search.sensetostyle.com"
TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest"
LAST_REQ = 0
def log(m):
ts = datetime.now().strftime('%H:%M:%S')
print(f"[{ts}] {m}")
with open(LOG_FILE, 'a') as f: f.write(f"[{ts}] {m}\n")
def throttle():
global LAST_REQ
dly = random.uniform(3, 6)
if LAST_REQ > 0 and (time.time() - LAST_REQ) < dly:
time.sleep(dly - (time.time() - LAST_REQ))
LAST_REQ = time.time()
def load():
if STATE_FILE.exists():
s = json.loads(STATE_FILE.read_text())
s['crm'] = set(s.get('crm', []))
return s
return {"m": 0, "crm": set(), "leads": 0, "cycle": 0}
def save(s):
tmp = s.copy()
tmp['crm'] = list(s['crm'])
STATE_FILE.write_text(json.dumps(tmp, indent=2))
def search(q):
throttle()
try:
url = f"{SEARXNG}/search?q={urllib.parse.quote(q)}"
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}), timeout=15) as r:
html = r.read().decode('utf-8', errors='ignore')
urls = [m for m in re.findall(r'href="(https?://[^"]+)"', html)
if 'sensetostyle' not in m and 'archive.org' not in m]
return list(dict.fromkeys(urls))[:12]
except: return []
def get_dom(url):
try:
d = urllib.parse.urlparse(url).netloc.lower()
return d[4:] if d.startswith('www.') else d
except: return None
def is_hoa(d):
if not d: return False
dl = d.lower()
good = ['hoa', 'homeowners', 'association', 'community', 'condo', 'village',
'mgmt', 'management', 'properties', 'realty']
bad = ['sensetostyle', 'archive.org', 'google', 'facebook', 'yelp', 'bbb',
'wiki', 'reddit', 'linkedin', 'trulia', 'realtor', 'zillow']
return any(k in dl for k in good) and not any(b in dl for b in bad)
def fetch(url):
throttle()
try:
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}), timeout=10) as r:
t = re.sub(r'<script.*?script>', '', r.read().decode('utf-8', errors='ignore'), flags=re.DOTALL|re.I)
t = re.sub(r'<style.*?style>', '', t, flags=re.DOTALL|re.I)
return re.sub(r'\s+', ' ', resub(r'<[^>]+>', ' ', t))[:2000]
except: return ""
def get_emails(t):
if not t: return []
ems = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b', t)
return list(set([e.lower() for e in ems if len(e) > 8 and '@' in e]))[:3] or []
def crm_push(lead):
try:
note = {"title": f"{lead['q']}: {lead['d']}",
"bodyV2": {"markdown": f"## {lead['q']} Lead\n\n**HOA:** {lead['n']}\n**Metro:** {lead['m']}\n**City:** {lead['c']}\n**Site:** {lead['u']}\n**Emails:** {', '.join(lead['e']) or 'None'}"}}
urllib.request.urlopen(urllib.request.Request(f"{TWENTY_BASE}/notes",
headers={"Authorization": f"Bearer {TWENTY_TOKEN}", "Content-Type": "application/json"},
data=json.dumps(note).encode(), method='POST'), timeout=10)
log(f"CRM: {lead['d']}")
return True
except: return False
def main():
log("=== v14 STARTED - 50 Metros + Suburbs ===")
s = load()
queries = ["{loc} HOA", "{loc} homeowners association", "{loc} HOA management"]
while True:
s['cycle'] += 1
metro_pack = METROS[s['m'] % len(METROS)]
metro_name = metro_pack[0]
suburbs = metro_pack[1]
# Search metro + each suburb
search_locations = [metro_name] + [f"{sub} {metro_name.split()[-1]}" for sub in suburbs[:3]]
log(f"CYCLE {s['cycle']}: {metro_name} (+{len(suburbs)} suburbs) | Leads: {s['leads']}")
new = 0
for city in search_locations[:4]: # metro + 3 suburbs
if s['leads'] >= 750: break
for qt in queries:
if s['leads'] >= 750: break
urls = search(qt.format(loc=city))
if urls: log(f" | {city}: {len(urls)} URLs")
for url in urls[:4]:
if s['leads'] >= 750: break
dom = get_dom(url)
if not dom or dom in s['crm'] or not is_hoa(dom): continue
txt = fetch(url)
lead = {'n': dom.split('.')[0].replace('-', ' ').title()[:30],
'm': metro_name, 'c': city, 'u': url, 'd': dom,
'e': get_emails(txt),
'q': "HOT" if len(get_emails(txt)) >= 2 else "WARM" if get_emails(txt) else "COLD"}
if crm_push(lead):
s['crm'].add(dom)
s['leads'] += 1
new += 1
log(f"LEAD {s['leads']}: {lead['n']}")
s['m'] = (s['m'] + 1) % len(METROS)
save(s)
log(f"Done: {new} new | {s['leads']} total")
if s['leads'] >= 750: log("TARGET 750!"); break
if new == 0: time.sleep(20)
if __name__ == "__main__":
main()