feat: Add Chatwoot Agent Bot prototype and FAQ knowledge base
- Created chatwoot-agent-bot/ with Node.js webhook server - Bot detects intent (greeting, billing, technical, features, account) - Auto-responds from FAQ knowledge base or escalates to human - FAQ-KB.md: Living knowledge base that grows with customer questions - CHATWOOT-SETUP.md: Complete deployment and configuration guide - Supports Telegram notifications on escalation - Bot runs on port 3001, ready for Chatwoot webhook integration
This commit is contained in:
144
agents/sales-prospector/prospector-v12.py
Normal file
144
agents/sales-prospector/prospector-v12.py
Normal file
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Sales Prospector v12b - Aggressive SearXNG harvesting"""
|
||||
import json, re, time, random, urllib.request, urllib.parse
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import ssl
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
|
||||
for d in [Path(__file__).parent / x for x in ["state", "logs", "leads"]]:
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
STATE_FILE = Path(__file__).parent / "state" / "prospector-v12-state.json"
|
||||
LOG_FILE = Path(__file__).parent / "logs" / f"prospector-v12-{datetime.now().strftime('%Y%m%d')}.log"
|
||||
|
||||
METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ", "Austin TX",
|
||||
"Denver CO", "Nashville TN", "Raleigh NC", "Tampa FL", "Dallas TX",
|
||||
"Houston TX", "Miami FL", "Seattle WA", "Portland OR", "Las Vegas NV",
|
||||
"San Antonio TX", "Indianapolis IN", "Columbus OH", "Kansas City MO",
|
||||
"Salt Lake City UT", "San Diego CA", "Sacramento CA", "San Jose CA",
|
||||
"New Orleans LA", "Oklahoma City OK"]
|
||||
|
||||
SEARXNG = "https://search.sensetostyle.com"
|
||||
TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930"
|
||||
TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest"
|
||||
LAST_REQ = 0
|
||||
|
||||
def log(m):
|
||||
ts = datetime.now().strftime('%H:%M:%S')
|
||||
print(f"[{ts}] {m}")
|
||||
with open(LOG_FILE, 'a') as f: f.write(f"[{ts}] {m}\n")
|
||||
|
||||
def throttle():
|
||||
global LAST_REQ
|
||||
dly = random.uniform(2, 4)
|
||||
if LAST_REQ > 0 and (time.time() - LAST_REQ) < dly:
|
||||
time.sleep(dly - (time.time() - LAST_REQ))
|
||||
LAST_REQ = time.time()
|
||||
|
||||
def load():
|
||||
if STATE_FILE.exists():
|
||||
s = json.loads(STATE_FILE.read_text())
|
||||
s['crm'] = set(s.get('crm', []))
|
||||
return s
|
||||
return {"m": 0, "crm": set(), "leads": 0, "cycle": 0}
|
||||
|
||||
def save(s):
|
||||
tmp = s.copy()
|
||||
tmp['crm'] = list(s['crm'])
|
||||
STATE_FILE.write_text(json.dumps(tmp, indent=2))
|
||||
|
||||
def search(q):
|
||||
throttle()
|
||||
try:
|
||||
url = f"{SEARXNG}/search?q={urllib.parse.quote(q)}"
|
||||
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}), timeout=15) as r:
|
||||
html = r.read().decode('utf-8', errors='ignore')
|
||||
urls = [m for m in re.findall(r'href="(https?://[^"]+)"', html)
|
||||
if 'sensetostyle' not in m and 'archive.org' not in m]
|
||||
return list(dict.fromkeys(urls))[:15]
|
||||
except:
|
||||
return []
|
||||
|
||||
def get_dom(url):
|
||||
try:
|
||||
d = urllib.parse.urlparse(url).netloc.lower()
|
||||
return d[4:] if d.startswith('www.') else d
|
||||
except: return None
|
||||
|
||||
def is_hoa(d):
|
||||
if not d: return False
|
||||
dl = d.lower()
|
||||
good = ['hoa', 'homeowners', 'association', 'community', 'condo', 'village', 'mgmt', 'management', 'hood']
|
||||
bad = ['sensetostyle', 'archive.org', 'google', 'facebook', 'yelp', 'bbb', 'wiki', 'reddit', 'linkedin']
|
||||
return any(k in dl for k in good) and not any(b in dl for b in bad)
|
||||
|
||||
def fetch(url):
|
||||
throttle()
|
||||
try:
|
||||
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}), timeout=8) as r:
|
||||
t = re.sub(r'<script.*?script>', '', r.read().decode('utf-8', errors='ignore'), flags=re.DOTALL|re.I)
|
||||
t = re.sub(r'<style.*?style>', '', t, flags=re.DOTALL|re.I)
|
||||
return re.sub(r'\s+', ' ', re.sub(r'<[^>]+>', ' ', t))[:1500]
|
||||
except: return ""
|
||||
|
||||
def get_emails(t):
|
||||
ems = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b', t)
|
||||
return list(set([e.lower() for e in ems if len(e) > 8 and '@' in e]))[:3] or []
|
||||
|
||||
def crm_push(lead):
|
||||
try:
|
||||
note = {"title": f"{lead['q']}: {lead['d']}",
|
||||
"bodyV2": {"markdown": f"## {lead['q']} Lead\n\n**HOA:** {lead['n']}\n**Metro:** {lead['m']}\n**Site:** {lead['u']}\n**Emails:** {', '.join(lead['e']) or 'None'}"}}
|
||||
urllib.request.urlopen(urllib.request.Request(f"{TWENTY_BASE}/notes",
|
||||
headers={"Authorization": f"Bearer {TWENTY_TOKEN}", "Content-Type": "application/json"},
|
||||
data=json.dumps(note).encode(), method='POST'), timeout=10)
|
||||
log(f"CRM: {lead['d']}")
|
||||
return True
|
||||
except Exception as e:
|
||||
log(f"FAIL: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
log("=== v12 RESTART ===")
|
||||
s = load()
|
||||
queries = ["{m} HOA", "{m} homeowners association", "{m} HOA management contact",
|
||||
"{m} condo association", "{m} community management", "{m} HOA board"]
|
||||
|
||||
while True:
|
||||
s['cycle'] += 1
|
||||
metro = METROS[s['m'] % len(METROS)]
|
||||
log(f"CYCLE {s['cycle']}: {metro} | Leads: {s['leads']}")
|
||||
|
||||
new = 0
|
||||
for qt in queries:
|
||||
if s['leads'] >= 200: break
|
||||
urls = search(qt.format(m=metro))
|
||||
if urls: log(f" Got {len(urls)} URLs")
|
||||
|
||||
for url in urls[:5]:
|
||||
if s['leads'] >= 200: break
|
||||
dom = get_dom(url)
|
||||
if not dom or dom in s['crm'] or not is_hoa(dom): continue
|
||||
|
||||
txt = fetch(url)
|
||||
lead = {'n': dom.split('.')[0].replace('-', ' ').title()[:30] + " HOA",
|
||||
'm': metro, 'u': url, 'd': dom,
|
||||
'e': get_emails(txt),
|
||||
'q': "HOT" if len(get_emails(txt)) >= 2 else "WARM" if get_emails(txt) else "COLD"}
|
||||
|
||||
if crm_push(lead):
|
||||
s['crm'].add(dom)
|
||||
s['leads'] += 1
|
||||
new += 1
|
||||
log(f"LEAD {s['leads']}: {lead['n']}")
|
||||
|
||||
s['m'] = (s['m'] + 1) % len(METROS)
|
||||
save(s)
|
||||
log(f"Done: {new} new | {s['leads']} total")
|
||||
|
||||
if s['leads'] >= 200: log("TARGET 200!"); break
|
||||
if new == 0: time.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user