#!/usr/bin/env python3 """ Sales Prospector v3 - Working HOA Lead Generation Actually searches, extracts, and pushes real leads to CRM """ import json import os import re import time import subprocess from datetime import datetime from pathlib import Path # Config SCRIPT_DIR = Path(__file__).parent.absolute() STATE_DIR = SCRIPT_DIR / "state" LOG_DIR = SCRIPT_DIR / "logs" LEADS_DIR = SCRIPT_DIR / "leads" for d in [STATE_DIR, LOG_DIR, LEADS_DIR]: d.mkdir(parents=True, exist_ok=True) STATE_FILE = STATE_DIR / "prospector-v3-state.json" LOG_FILE = LOG_DIR / f"prospector-v3-{datetime.now().strftime('%Y%m%d')}.log" METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ"] # Extended search queries for better coverage SEARCHES_PER_METRO = [ '{metro} HOA contact email', '{metro} homeowners association website', '{metro} HOA management contact', '{metro} community association board', '{metro} condo association contact', ] # CRM Config TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzMzMjg0NDMsImV4cCI6MTgwNDc4MTY0MiwianRpIjoiMjBmMTJjOTAtNGQwNy00YmY2LWIzOTctNmM2ZTczOWYxOGM4In0.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930" TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest" def log(msg): ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S') line = f"[{ts}] {msg}" print(line) with open(LOG_FILE, 'a') as f: f.write(line + '\n') def load_state(): if STATE_FILE.exists(): with open(STATE_FILE) as f: return json.load(f) return { "metro_index": 0, "processed_domains": [], "leads_found": 0, "cycle_count": 0 } def save_state(state): with open(STATE_FILE, 'w') as f: json.dump(state, f, indent=2) def extract_domain(url): try: from urllib.parse import urlparse parsed = urlparse(url) domain = parsed.netloc.lower() if domain.startswith('www.'): domain = domain[4:] return domain except: return None def is_hoa_domain(domain): if not domain: return False domain_lower = domain.lower() hoa_keywords = ['hoa', 'homeowners', 'association', 'community', 'condo', 'village', 'creek', 'farms', 'estates'] return any(kw in domain_lower for kw in hoa_keywords) def search_web(query, count=5): log(f"SEARCH: {query}") try: result = subprocess.run( ['openclaw', 'web-search', query, '--count', str(count)], capture_output=True, text=True, timeout=60 ) if result.returncode == 0 and result.stdout: urls = [] for line in result.stdout.split('\n'): if line.strip().startswith('http'): urls.append(line.strip()) # Filter to HOA domains seen = set() unique = [] for url in urls: dom = extract_domain(url) if dom and dom not in seen and is_hoa_domain(dom): seen.add(dom) unique.append(url) return unique except Exception as e: log(f"Search error: {e}") return [] def fetch_page(url, max_chars=1500): try: result = subprocess.run( ['openclaw', 'web-fetch', url, '--max-chars', str(max_chars)], capture_output=True, text=True, timeout=30 ) if result.returncode == 0: return result.stdout except Exception as e: log(f"Fetch error: {e}") return None def extract_emails(text): if not text: return [] pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' emails = re.findall(pattern, text) bad = ['example', 'test', 'domain', 'email', 'noreply', 'no-reply', '@gmail.com', '@yahoo.com', '@hotmail.com'] filtered = [e.lower() for e in emails if not any(b in e.lower() for b in bad)] return list(set(filtered))[:3] def extract_phones(text): if not text: return [] pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}' phones = re.findall(pattern, text) return list(set(phones))[:2] def extract_hoa_name(content, domain): if not content: return domain.replace('-', ' ').title() # Look for title match = re.search(r'#\s*(.+)', content) if match: return match.group(1).strip() # Look for HOA name pattern match = re.search(r'([A-Z][A-Za-z\s]+(?:HOA|Homeowners|Community|Association))', content) if match: return match.group(1).strip() return domain.replace('-', ' ').title() def assess_quality(emails, phones): score = 0 if emails: score += len(emails) * 3 if phones: score += len(phones) * 2 if score >= 7: return "HOT" elif score >= 4: return "WARM" return "COLD" def push_to_crm(lead): try: body = f"""## 🎯 HOA Prospect - {lead['quality']} **HOA Name:** {lead.get('hoa_name', 'Unknown')} **Metro:** {lead['metro']} **Website:** {lead['url']} **Domain:** {lead['domain']} """ if lead.get('emails'): body += f"**Email(s):** {', '.join(lead['emails'])}\n" if lead.get('phones'): body += f"**Phone(s):** {', '.join(lead['phones'])}\n" body += f"\n**Source:** Prospector v3\n**Found:** {datetime.now().strftime('%Y-%m-%d %H:%M')}" esc_body = json.dumps(body) note_data = f'{{"title":"🎯 {lead["quality"]}: {lead["hoa_name"]} | {lead["metro"]}","bodyV2":{{"markdown":{esc_body}}}}}' curl_cmd = [ 'curl', '-s', '-X', 'POST', f'{TWENTY_BASE}/notes', '-H', f'Authorization: Bearer {TWENTY_TOKEN}', '-H', 'Content-Type: application/json', '-d', note_data ] result = subprocess.run(curl_cmd, capture_output=True, text=True, timeout=10) if result.returncode == 0: log(f"CRM PUSH: {lead['hoa_name']} ({lead['quality']})") return True else: log(f"CRM FAIL: {result.stderr[:100]}") except Exception as e: log(f"CRM ERROR: {e}") return False def save_lead(lead): lead_file = LEADS_DIR / f"{lead['domain']}.json" with open(lead_file, 'w') as f: json.dump(lead, f, indent=2) def main(): log("=== Sales Prospector v3 Started ===") state = load_state() cycle = state['cycle_count'] while True: cycle += 1 metro_idx = state['metro_index'] metro = METROS[metro_idx] log(f"=== CYCLE {cycle}: {metro} ===") # Search queries queries = [q.format(metro=metro) for q in SEARCHES_PER_METRO] for query_idx, query in enumerate(queries): log(f"QUERY: {query}") urls = search_web(query, count=3) log(f"Found {len(urls)} potential HOA sites") for url in urls: domain = extract_domain(url) if domain in state['processed_domains']: log(f"SKIP: Already processed {domain}") continue log(f"FETCH: {url}") content = fetch_page(url) if not content: state['processed_domains'].append(domain) save_state(state) continue emails = extract_em