#!/usr/bin/env python3 """Sales Prospector v4 - Complete version that finds and saves leads""" import json import re import subprocess from datetime import datetime from pathlib import Path SCRIPT_DIR = Path(__file__).parent.absolute() STATE_DIR = SCRIPT_DIR / "state" LOG_DIR = SCRIPT_DIR / "logs" LEADS_DIR = SCRIPT_DIR / "leads" for d in [STATE_DIR, LOG_DIR, LEADS_DIR]: d.mkdir(parents=True, exist_ok=True) STATE_FILE = STATE_DIR / "prospector-v4-state.json" LOG_FILE = LOG_DIR / f"prospector-v4-{datetime.now().strftime('%Y%m%d')}.log" METROS = ["Charlotte NC", "Atlanta GA", "Orlando FL", "Phoenix AZ"] TWENTY_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzMzMjg0NDMsImV4cCI6MTgwNDc4MTY0MiwianRpIjoiMjBmMTJjOTAtNGQwNy00YmY2LWIzOTctNmM2ZTczOWYxOGM4In0.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930" TWENTY_BASE = "https://salesforce.hoaledgeriq.com/rest" def log(msg): ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S') line = f"[{ts}] {msg}" print(line) with open(LOG_FILE, 'a') as f: f.write(line + '\n') def load_state(): if STATE_FILE.exists(): with open(STATE_FILE) as f: return json.load(f) return {"metro_index": 0, "processed_domains": [], "leads_found": 0} def save_state(state): with open(STATE_FILE, 'w') as f: json.dump(state, f) def search_web(query): log(f"SEARCH: {query}") try: result = subprocess.run(['openclaw', 'web-search', query, '--count', '5'], capture_output=True, text=True, timeout=60) if result.returncode == 0: urls = [] for line in result.stdout.split('\n'): line = line.strip() if line.startswith('http'): urls.append(line.split()[0] if ' ' in line else line) return urls[:5] except Exception as e: log(f"Search error: {e}") return [] def fetch_page(url): try: result = subprocess.run(['openclaw', 'web-fetch', url, '--max-chars', '2000'], capture_output=True, text=True, timeout=30) if result.returncode == 0: return result.stdout except Exception as e: log(f"Fetch error: {e}") return "" def extract_emails(text): if not text: return [] pattern = r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}' emails = re.findall(pattern, text) bad = ['example.com', 'test.com', 'domain.com', 'email.com', 'noreply', '@gmail.com', '@yahoo.com', '@hotmail.com', '@aol.com'] filtered = [e.lower() for e in emails if len(e) > 10 and not any(b in e.lower() for b in bad)] return list(set(filtered))[:3] def extract_phones(text): if not text: return [] pattern = r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}' phones = re.findall(pattern, text) return list(set([re.sub(r'[^\d]', '', p) for p in phones]))[:2] def extract_domain(url): try: from urllib.parse import urlparse dom = urlparse(url).netloc.lower() return dom[4:] if dom.startswith('www.') else dom except: return url.replace('https://', '').replace('http://', '').split('/')[0] def assess_quality(emails, phones): score = len(emails) * 3 + len(phones) * 2 return "HOT" if score >= 7 else "WARM" if score >= 4 else "COLD" def push_to_crm(lead): try: body = f"## {lead['quality']} Lead: {lead['hoa_name']}\n\n**Metro:** {lead['metro']}\n**Website:** {lead['url']}\n**Domain:** {lead['domain']}\n" if lead.get('emails'): body += f"**Emails:** {', '.join(lead['emails'])}\n" if lead.get('phones'): body += f"**Phones:** {', '.join(lead['phones'])}\n" body += f"\n_Found: {datetime.now().strftime('%Y-%m-%d %H:%M')}_" note_data = {"title": f"{lead['quality']}: {lead['hoa_name']}", "body": body} curl_cmd = ['curl', '-s', '-X', 'POST', f'{TWENTY_BASE}/notes', '-H', f'Authorization: Bearer {TWENTY_TOKEN}', '-H', 'Content-Type: application/json', '-d', json.dumps(note_data)] result = subprocess.run(curl_cmd, capture_output=True, text=True, timeout=10) if result.returncode == 0: log(f"CRM SUCCESS: {lead['hoa_name']}") return True except Exception as e: log(f"CRM error: {e}") return False def save_lead(lead): lead_file = LEADS_DIR / f"{lead['domain'].replace('/', '_')}.json" with open(lead_file, 'w') as f: json.dump(lead, f, indent=2) log(f"SAVED: {lead_file.name}") def main(): log("=== Prospector v4 Started ===") state = load_state() cycle = 0 while True: cycle += 1 metro = METROS[state['metro_index']] log(f"=== CYCLE {cycle}: {metro} ===") queries = [ f'{metro} HOA contact email', f'{metro} homeowners association', f'{metro} HOA management company', ] found_urls = [] for query in queries: urls = search_web(query) found_urls.extend(urls) log(f"Found {len(found_urls)} URLs to check") # Process each URL new_leads = 0 for url in found_urls[:6]: domain = extract_domain(url) if domain in state['processed_domains'] or not domain: continue state['processed_domains'].append(domain) log(f"FETCH: {url[:60]}...") content = fetch_page(url) if not content: continue emails = extract_emails(content) phones = extract_phones(content) if emails or phones: hoa_name = domain.split('.')[0].replace('-', ' ').title() + " HOA" lead = { 'hoa_name': hoa_name, 'metro': metro, 'url': url, 'domain': domain, 'emails': emails, 'phones': phones, 'quality': assess_quality(emails, phones), 'found_at': datetime.now().isoformat() } save_lead(lead) push_to_crm(lead) state['leads_found'] += 1 new_leads += 1 log(f"LEAD: {hoa_name} ({lead['quality']}) - {len(emails)} emails, {len(phones)} phones") else: log(f"No contacts on {domain}") save_state(state) log(f"Cycle complete: {new_leads} new leads, total: {state['leads_found']}") # Next metro state['metro_index'] = (state['metro_index'] + 1) % len(METROS) # Throttle hour = datetime.now().hour delay = 120 if 9 <= hour < 18 else 60 log(f"Sleeping {delay}s...") import time time.sleep(delay) if __name__ == "__main__": main()