- Created chatwoot-agent-bot/ with Node.js webhook server - Bot detects intent (greeting, billing, technical, features, account) - Auto-responds from FAQ knowledge base or escalates to human - FAQ-KB.md: Living knowledge base that grows with customer questions - CHATWOOT-SETUP.md: Complete deployment and configuration guide - Supports Telegram notifications on escalation - Bot runs on port 3001, ready for Chatwoot webhook integration
209 lines
9.9 KiB
Python
209 lines
9.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Sales Prospector v14 - 50 metros + suburbs"""
|
|
import json, re, time, random, urllib.request, urllib.parse
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
import ssl
|
|
ssl._create_default_https_context = ssl._create_unverified_context
|
|
|
|
for d in [Path(__file__).parent / x for x in ["state", "logs", "leads"]]:
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
|
|
STATE_FILE = Path(__file__).parent / "state" / "prospector-v14-state.json"
|
|
LOG_FILE = Path(__file__).parent / "logs" / f"prospector-v14-{datetime.now().strftime('%Y%m%d')}.log"
|
|
|
|
# TOP 50 METROS + surrounding cities
|
|
METROS = [
|
|
# Top 20 major metros
|
|
("New York NY", ["Manhattan", "Brooklyn", "Queens", "Bronx", "Staten Island", "Jersey City", "Newark"]),
|
|
("Los Angeles CA", ["Santa Monica", "Pasadena", "Burbank", "Glendale", "Long Beach", "Anaheim"]),
|
|
("Chicago IL", ["Evanston", "Oak Park", "Naperville", "Schaumburg", "Skokie"]),
|
|
("Houston TX", ["Sugar Land", "The Woodlands", "Katy", "Pearland", "Baytown"]),
|
|
("Phoenix AZ", ["Scottsdale", "Tempe", "Mesa", "Chandler", "Glendale"]),
|
|
("Philadelphia PA", ["Camden", "Chester", "Upper Darby"]),
|
|
("San Antonio TX", ["New Braunfels", "Schertz", "Cibolo"]),
|
|
("San Diego CA", ["Chula Vista", "Oceanside", "Escondido", "Carlsbad"]),
|
|
("Dallas TX", ["Fort Worth", "Arlington", "Plano", "Irving", "Frisco", "McKinney"]),
|
|
("San Jose CA", ["Sunnyvale", "Santa Clara", "Mountain View", "Palo Alto"]),
|
|
("Austin TX", ["Round Rock", "Cedar Park", "Georgetown", "Pflugerville"]),
|
|
("Jacksonville FL", ["Orange Park", "St. Augustine", "Ponte Vedra"]),
|
|
("Columbus OH", ["Dublin", "Westerville", "Gahanna", "Reynoldsburg"]),
|
|
("Charlotte NC", ["Matthews", "Mint Hill", "Huntersville", "Concord", "Gastonia"]),
|
|
("Indianapolis IN", ["Carmel", "Fishers", "Noblesville", "Greenwood"]),
|
|
("San Francisco CA", ["Oakland", "Berkeley", "Richmond", "Walnut Creek"]),
|
|
("Seattle WA", ["Bellevue", "Redmond", "Tacoma", "Kirkland", "Renton"]),
|
|
("Denver CO", ["Aurora", "Lakewood", "Thornton", "Westminster", "Boulder"]),
|
|
("Oklahoma City OK", ["Edmond", "Norman", "Moore", "Midwest City"]),
|
|
("Boston MA", ["Cambridge", "Somerville", "Brookline", "Newton"]),
|
|
# Next 30 metros
|
|
("Portland OR", ["Beaverton", "Gresham", "Hillsboro", "Lake Oswego"]),
|
|
("Las Vegas NV", ["Henderson", "North Las Vegas", "Summerlin"]),
|
|
("Nashville TN", ["Franklin", "Brentwood", "Hendersonville", "Murfreesboro"]),
|
|
("Detroit MI", ["Warren", "Sterling Heights", "Dearborn", "Livonia"]),
|
|
("Oklahoma City OK", ["Edmond", "Norman", "Moore"]),
|
|
("Memphis TN", ["Germantown", "Collierville", "Bartlett"]),
|
|
("Louisville KY", ["Jeffersonville", "New Albany", "Elizabethtown"]),
|
|
("Milwaukee WI", ["Waukesha", "West Allis", "Wauwatosa"]),
|
|
("Baltimore MD", ["Columbia", "Ellicott City", "Towson"]),
|
|
("Albuquerque NM", ["Rio Rancho", "Santa Fe", "Los Lunas"]),
|
|
("Tucson AZ", ["Marana", "Oro Valley", "Sahuarita"]),
|
|
("Mesa AZ", ["Gilbert", "Chandler", "Tempe"]),
|
|
("Fresno CA", ["Clovis", "Madera", "Sanger"]),
|
|
("Atlanta GA", ["Sandy Springs", "Roswell", "Johns Creek", "Alpharetta", "Marietta"]),
|
|
("Sacramento CA", ["Elk Grove", "Roseville", "Folsom", "Davis"]),
|
|
("Kansas City MO", ["Overland Park", "Olathe", "Independence", "Leawood"]),
|
|
("Colorado Springs CO", ["Fountain", "Monument", "Woodland Park"]),
|
|
("Raleigh NC", ["Cary", "Apex", "Holly Springs", "Wake Forest"]),
|
|
("Omaha NE", ["Bellevue", "Papillion", "La Vista"]),
|
|
("Miami FL", ["Miami Beach", "Coral Gables", "Hialeah", "Fort Lauderdale"]),
|
|
("Long Beach CA", ["Lakewood", "Signal Hill"]),
|
|
("Virginia Beach VA", ["Norfolk", "Chesapeake", "Newport News", "Hampton"]),
|
|
("Oakland CA", ["Berkeley", "Alameda", "San Leandro"]),
|
|
("Minneapolis MN", ["St. Paul", "Bloomington", "Plymouth", "Edina"]),
|
|
("Tulsa OK", ["Broken Arrow", "Bixby", "Jenks"]),
|
|
("Tampa FL", ["St. Petersburg", "Clearwater", "Brandon", "Lutz"]),
|
|
("Arlington TX", ["Grand Prairie", "Euless", "Bedford"]),
|
|
("Wichita KS", ["Overland Park", "Lenexa", "Shawnee"]),
|
|
("Bakersfield CA", ["Delano", "Oildale", "Rosedale"]),
|
|
("Aurora CO", ["Centennial", "Parker", "Englewood"]),
|
|
("Anaheim CA", ["Fullerton", "Orange", "Garden Grove", "Brea"]),
|
|
("Santa Ana CA", ["Irvine", "Costa Mesa", "Tustin", "Newport Beach"]),
|
|
("Corpus Christi TX", ["Portland", "Kingsville", "Alice"]),
|
|
("Riverside CA", ["Moreno Valley", "Corona", "Jurupa Valley", "Norco"]),
|
|
("Lexington KY", ["Georgetown", "Richmond", "Winchester"]),
|
|
("Stockton CA", ["Lodi", "Tracy", "Manteca"])
|
|
]
|
|
|
|
SEARXNG = "https://search.sensetostyle.com"
|
|
TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
|
|
TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest"
|
|
|
|
LAST_REQ = 0
|
|
|
|
def log(m):
|
|
ts = datetime.now().strftime('%H:%M:%S')
|
|
print(f"[{ts}] {m}")
|
|
with open(LOG_FILE, 'a') as f: f.write(f"[{ts}] {m}\n")
|
|
|
|
def throttle():
|
|
global LAST_REQ
|
|
dly = random.uniform(3, 6)
|
|
if LAST_REQ > 0 and (time.time() - LAST_REQ) < dly:
|
|
time.sleep(dly - (time.time() - LAST_REQ))
|
|
LAST_REQ = time.time()
|
|
|
|
def load():
|
|
if STATE_FILE.exists():
|
|
s = json.loads(STATE_FILE.read_text())
|
|
s['crm'] = set(s.get('crm', []))
|
|
return s
|
|
return {"m": 0, "crm": set(), "leads": 0, "cycle": 0}
|
|
|
|
def save(s):
|
|
tmp = s.copy()
|
|
tmp['crm'] = list(s['crm'])
|
|
STATE_FILE.write_text(json.dumps(tmp, indent=2))
|
|
|
|
def search(q):
|
|
throttle()
|
|
try:
|
|
url = f"{SEARXNG}/search?q={urllib.parse.quote(q)}"
|
|
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}), timeout=15) as r:
|
|
html = r.read().decode('utf-8', errors='ignore')
|
|
urls = [m for m in re.findall(r'href="(https?://[^"]+)"', html)
|
|
if 'sensetostyle' not in m and 'archive.org' not in m]
|
|
return list(dict.fromkeys(urls))[:12]
|
|
except: return []
|
|
|
|
def get_dom(url):
|
|
try:
|
|
d = urllib.parse.urlparse(url).netloc.lower()
|
|
return d[4:] if d.startswith('www.') else d
|
|
except: return None
|
|
|
|
def is_hoa(d):
|
|
if not d: return False
|
|
dl = d.lower()
|
|
good = ['hoa', 'homeowners', 'association', 'community', 'condo', 'village',
|
|
'mgmt', 'management', 'properties', 'realty']
|
|
bad = ['sensetostyle', 'archive.org', 'google', 'facebook', 'yelp', 'bbb',
|
|
'wiki', 'reddit', 'linkedin', 'trulia', 'realtor', 'zillow']
|
|
return any(k in dl for k in good) and not any(b in dl for b in bad)
|
|
|
|
def fetch(url):
|
|
throttle()
|
|
try:
|
|
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}), timeout=10) as r:
|
|
t = re.sub(r'<script.*?script>', '', r.read().decode('utf-8', errors='ignore'), flags=re.DOTALL|re.I)
|
|
t = re.sub(r'<style.*?style>', '', t, flags=re.DOTALL|re.I)
|
|
return re.sub(r'\s+', ' ', resub(r'<[^>]+>', ' ', t))[:2000]
|
|
except: return ""
|
|
|
|
def get_emails(t):
|
|
if not t: return []
|
|
ems = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b', t)
|
|
return list(set([e.lower() for e in ems if len(e) > 8 and '@' in e]))[:3] or []
|
|
|
|
def crm_push(lead):
|
|
try:
|
|
note = {"title": f"{lead['q']}: {lead['d']}",
|
|
"bodyV2": {"markdown": f"## {lead['q']} Lead\n\n**HOA:** {lead['n']}\n**Metro:** {lead['m']}\n**City:** {lead['c']}\n**Site:** {lead['u']}\n**Emails:** {', '.join(lead['e']) or 'None'}"}}
|
|
urllib.request.urlopen(urllib.request.Request(f"{TWENTY_BASE}/notes",
|
|
headers={"Authorization": f"Bearer {TWENTY_TOKEN}", "Content-Type": "application/json"},
|
|
data=json.dumps(note).encode(), method='POST'), timeout=10)
|
|
log(f"CRM: {lead['d']}")
|
|
return True
|
|
except: return False
|
|
|
|
def main():
|
|
log("=== v14 STARTED - 50 Metros + Suburbs ===")
|
|
s = load()
|
|
|
|
queries = ["{loc} HOA", "{loc} homeowners association", "{loc} HOA management"]
|
|
|
|
while True:
|
|
s['cycle'] += 1
|
|
metro_pack = METROS[s['m'] % len(METROS)]
|
|
metro_name = metro_pack[0]
|
|
suburbs = metro_pack[1]
|
|
|
|
# Search metro + each suburb
|
|
search_locations = [metro_name] + [f"{sub} {metro_name.split()[-1]}" for sub in suburbs[:3]]
|
|
|
|
log(f"CYCLE {s['cycle']}: {metro_name} (+{len(suburbs)} suburbs) | Leads: {s['leads']}")
|
|
|
|
new = 0
|
|
for city in search_locations[:4]: # metro + 3 suburbs
|
|
if s['leads'] >= 750: break
|
|
for qt in queries:
|
|
if s['leads'] >= 750: break
|
|
urls = search(qt.format(loc=city))
|
|
if urls: log(f" | {city}: {len(urls)} URLs")
|
|
|
|
for url in urls[:4]:
|
|
if s['leads'] >= 750: break
|
|
dom = get_dom(url)
|
|
if not dom or dom in s['crm'] or not is_hoa(dom): continue
|
|
|
|
txt = fetch(url)
|
|
lead = {'n': dom.split('.')[0].replace('-', ' ').title()[:30],
|
|
'm': metro_name, 'c': city, 'u': url, 'd': dom,
|
|
'e': get_emails(txt),
|
|
'q': "HOT" if len(get_emails(txt)) >= 2 else "WARM" if get_emails(txt) else "COLD"}
|
|
|
|
if crm_push(lead):
|
|
s['crm'].add(dom)
|
|
s['leads'] += 1
|
|
new += 1
|
|
log(f"LEAD {s['leads']}: {lead['n']}")
|
|
|
|
s['m'] = (s['m'] + 1) % len(METROS)
|
|
save(s)
|
|
log(f"Done: {new} new | {s['leads']} total")
|
|
|
|
if s['leads'] >= 750: log("TARGET 750!"); break
|
|
if new == 0: time.sleep(20)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|