#!/usr/bin/env python3 """Sales Prospector v8 - HOA website = lead (emails optional bonus)""" import json, re, time, urllib.request, urllib.parse from datetime import datetime from pathlib import Path SCRIPT_DIR = Path(__file__).parent STATE_DIR, LOG_DIR, LEADS_DIR = SCRIPT_DIR / "state", SCRIPT_DIR / "logs", SCRIPT_DIR / "leads" for d in [STATE_DIR, LOG_DIR, LEADS_DIR]: d.mkdir(parents=True, exist_ok=True) STATE_FILE = STATE_DIR / "prospector-v8-state.json" LOG_FILE = LOG_DIR / f"prospector-v8-{datetime.now().strftime('%Y%m%d')}.log" METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ", "Austin TX", "Denver CO", "Nashville TN", "Raleigh NC", "Tampa FL", "Dallas TX", "Houston TX", "Miami FL"] BRAVE_KEY = "BSACPtwjz5lrsXC10pwjFVqzFGN2gr4" TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930" TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest" LAST_SEARCH = 0 def log(msg): ts = datetime.now().strftime('%H:%M:%S') print(f"[{ts}] {msg}") with open(LOG_FILE, 'a') as f: f.write(f"[{ts}] {msg}\n") def rate_limited_sleep(): global LAST_SEARCH elapsed = time.time() - LAST_SEARCH if elapsed < 1.0: time.sleep(1.0 - elapsed) LAST_SEARCH = time.time() def load_state(): if STATE_FILE.exists(): s = json.loads(STATE_FILE.read_text()) s['domains'] = set(s.get('domains', [])) return s return {"metro_idx": 0, "domains": set(), "leads": 0, "cycle": 0} def save_state(s): s['domains'] = list(s['domains']) STATE_FILE.write_text(json.dumps(s, indent=2)) s['domains'] = set(s['domains']) def search_brave(query, count=10): rate_limited_sleep() log(f"SEARCH: {query}") try: url = f"https://api.search.brave.com/res/v1/web/search?q={urllib.parse.quote(query)}&count={count}" r = urllib.request.urlopen(urllib.request.Request(url, headers={"X-Subscription-Token": BRAVE_KEY, "Accept": "application/json"}), timeout=30) urls = [x.get('url') for x in json.loads(r.read().decode()).get('web', {}).get('results', []) if x.get('url')] log(f" -> {len(urls)} URLs") return urls except Exception as e: log(f" -> Error: {e}") return [] def fetch_page(url): try: r = urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}), timeout=8) html = r.read().decode('utf-8', errors='ignore') text = re.sub(r']*>.*?', '', html, flags=re.DOTALL | re.IGNORECASE) text = re.sub(r']*>.*?', '', text, flags=re.DOTALL | re.IGNORECASE) text = re.sub(r'<[^>]+>', ' ', text) return re.sub(r'\s+', ' ', text)[:2500] except: return "" def extract_domain(url): try: d = urllib.parse.urlparse(url).netloc.lower() return d[4:] if d.startswith('www.') else d except: return None def is_hoa(d): if not d: return False dl = d.lower() good = ['hoa', 'homeowners', 'association', 'community', 'condo', 'village', 'creek', 'estates'] bad = ['google', 'facebook', 'yelp', 'bbb', 'wiki', 'reddit', 'linkedin', 'blog'] return any(k in dl for k in good) and not any(b in dl for b in bad) def extract_emails(text): if not text: return [] pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b' ems = re.findall(pattern, text) bad = ['example.com', 'test.com', 'noreply@'] filtered = [e.lower() for e in ems if len(e) > 8 and '@' in e and not any(b in e for b in bad)] return list(set(filtered))[:3] def save_lead(lead): f = LEADS_DIR / f"{lead['domain'].replace('/', '_')}.json" f.write_text(json.dumps(lead, indent=2)) log(f"SAVED: {lead['domain']}") def push_crm(lead): try: emails_str = ', '.join(lead['emails']) if lead['emails'] else 'None found' note = {"title": f"{lead['quality']}: {lead['domain']}", "bodyV2": {"markdown": f"## {lead['quality']} HOA Lead\n\n**Name:** {lead['name']}\n**Metro:** {lead['metro']}\n**Site:** {lead['url']}\n**Emails:** {emails_str}"}} urllib.request.urlopen(urllib.request.Request(f"{TWENTY_BASE}/notes", headers={"Authorization": f"Bearer {TWENTY_TOKEN}", "Content-Type": "application/json"}, data=json.dumps(note).encode(), method='POST'), timeout=10) log(f"CRM: {lead['domain']}") return True except Exception as e: log(f"CRM error: {e}") return False def main(): log("=== Prospector v8 Started ===") s = load_state() if isinstance(s['domains'], list): s['domains'] = set(s['domains']) queries = ["{metro} HOA", "{metro} homeowners association", "{metro} HOA management"] while True: s['cycle'] += 1 metro = METROS[s['metro_idx'] % len(METROS)] log(f"CYCLE {s['cycle']}: {metro}") start, found = time.time(), 0 for tmpl in queries: if s['leads'] >= 25: break for url in search_brave(tmpl.format(metro=metro), 10)[:6]: if s['leads'] >= 25: break dom = extract_domain(url) if not dom or dom in s['domains']: continue if not is_hoa(dom): s['domains'].add(dom) continue s['domains'].add(dom) text = fetch_page(url) emails = extract_emails(text) if text else [] # ANY HOA domain is a lead name = dom.split('.')[0].replace('-', ' ').replace('_', ' ').title() + " HOA" qual = "HOT" if len(emails) >= 2 else "WARM" if emails else "COLD" lead = {'name': name, 'metro': metro, 'url': url, 'domain': dom, 'emails': emails, 'quality': qual, 'found': datetime.now().isoformat()} save_lead(lead) push_crm(lead) s['leads'], found = s['leads'] + 1, found + 1 log(f"LEAD {s['leads']}: {name} ({qual})" + (f" - {len(emails)} emails" if emails else "")) s['metro_idx'] = (s['metro_idx'] + 1) % len(METROS) save_state(s) log(f"Done: {found} leads, {s['leads']} total, {time.time()-start:.1f}s") if s['leads'] >= 25: log(f"TARGET REACHED: 25 leads!") break if found == 0: time.sleep(15) if __name__ == "__main__": main()