#!/usr/bin/env python3 """ JAE v5.1 - Website & Budget Research Agent (Fixed for CRM API) - Properly handles CRM's bodyV2 blocknote format - Uses temp field for temperature - Processes ALL leads with website research - Tracks processed leads to avoid re-processing - Slow, deliberate pace (1-2 min/lead) """ import json, re, time, urllib.request, ssl from datetime import datetime from pathlib import Path from urllib.parse import urljoin SCRIPT_DIR = Path(__file__).parent for d in [SCRIPT_DIR / "state", SCRIPT_DIR / "logs"]: d.mkdir(parents=True, exist_ok=True) STATE_FILE = SCRIPT_DIR / "state" / "jae-v5-state.json" LOG_FILE = SCRIPT_DIR / "logs" / f"jae-v5-{datetime.now().strftime('%Y%m%d')}.log" CRM_URL = "https://salesforce.hoaledgeriq.com/rest" CRM_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930" ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE def log(msg): ts = datetime.now().strftime('%H:%M:%S') print(f"[{ts}] {msg}") with open(LOG_FILE, 'a') as f: f.write(f"[{ts}] {msg}\n") def load_state(): if STATE_FILE.exists(): return json.loads(STATE_FILE.read_text()) return {"processed_ids": [], "last_run": None} def save_state(s): STATE_FILE.write_text(json.dumps(s, indent=2)) def fetch_all_notes(): """Fetch ALL notes from CRM with pagination""" all_notes = [] has_more = True end_cursor = None log("Fetching all leads from CRM (with pagination)...") while has_more: try: url = f"{CRM_URL}/notes?limit=200&order[createdAt]=desc" if end_cursor: url += f"&after={end_cursor}" req = urllib.request.Request( url, headers={"Authorization": f"Bearer {CRM_TOKEN}", "Accept": "application/json"} ) opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context)) with opener.open(req, timeout=30) as r: data = json.loads(r.read().decode()) notes = data.get('data', {}).get('notes', []) all_notes.extend(notes) # Check pagination page_info = data.get('pageInfo', {}) has_more = page_info.get('hasNextPage', False) end_cursor = page_info.get('endCursor') log(f" Fetched {len(notes)} leads (total: {len(all_notes)})") if not has_more: break except Exception as e: log(f"Fetch error: {e}") break log(f"Total leads fetched: {len(all_notes)}") return all_notes def get_existing_temp(note): """Extract existing temperature from note""" # Check temp field first temp = note.get('temp', 'COLD') if temp and temp.upper() in ['HOT', 'WARM', 'COLD']: return temp.upper() # Fallback to title title = note.get('title', '').upper() if title.startswith('HOT:'): return 'HOT' if title.startswith('WARM:'): return 'WARM' if title.startswith('COLD:'): return 'COLD' return 'COLD' def extract_url_from_note(note): """Extract URL from note body or title""" title = note.get('title', '') bodyV2 = note.get('bodyV2', {}) # Try to extract from bodyV2 markdown markdown = bodyV2.get('markdown', '') if isinstance(bodyV2, dict) else '' # Search in markdown for URLs url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+' matches = re.findall(url_pattern, markdown) if matches: return matches[0].rstrip('.,;:') # Try title pattern: "COLD: domain.com" domain_match = re.search(r'(?:HOT|WARM|COLD):\s*([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', title, re.IGNORECASE) if domain_match: return f"https://{domain_match.group(1)}" return None def search_budget_on_site(base_url): """ Search website for budget PDF Returns: (found_budget: bool, unit_count: int|None, details: str) """ log(f" 🔍 Searching: {base_url}") try: req = urllib.request.Request( base_url if not base_url.endswith('/') else base_url, headers={'User-Agent': 'Mozilla/5.0 (compatible; JAE-Bot/1.0)'} ) opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context)) with opener.open(req, timeout=15) as r: content = r.read().decode('utf-8', errors='ignore') found_budget = False unit_count = None details = [] # Look for budget PDFs pdf_patterns = ['budget', 'financial', 'reserve', 'statement'] for pattern in pdf_patterns: if pattern in content.lower(): pdf_match = re.search(rf'href="([^"]*{pattern}[^"]*\.pdf)"', content, re.IGNORECASE) if pdf_match: found_budget = True details.append(f"Found budget PDF: {pdf_match.group(1)}") log(f" ✅ Budget PDF found: {pdf_match.group(1)}") break # If no direct PDF link, check for budget mentions if not found_budget and 'budget' in content.lower(): found_budget = True details.append("Budget mentioned on page") log(f" ✅ Budget found (mentioned)") # Look for unit count patterns unit_patterns = [ r'(\d{1,4})\s*(?:homes|units|lots|properties|residences)', r'(\d{1,4})\s*-?\s*(?:home|unit|lot|property|residence)\s*(?:community|association|complex)', r'community\s*of\s*(\d{1,4})', r'(\d{1,4})\s*home\s*owners', ] for pattern in unit_patterns: match = re.search(pattern, content, re.IGNORECASE) if match: try: unit_count = int(match.group(1)) if 10 <= unit_count <= 5000: # Reasonable range details.append(f"Unit count: {unit_count}") log(f" 📊 Found unit count: {unit_count}") break except: pass if not details: details.append("No budget found") return found_budget, unit_count, "; ".join(details) except Exception as e: log(f" ⚠️ Site access issue: {str(e)[:100]}") return False, None, f"Site access error: {str(e)[:100]}" def elevate_temp(current_temp, levels): """Elevate temperature by N levels""" temp_order = ['COLD', 'WARM', 'HOT'] try: current_idx = temp_order.index(current_temp) except ValueError: current_idx = 0 new_idx = min(current_idx + levels, len(temp_order) - 1) return temp_order[new_idx] def update_note_with_research(note, new_temp, unit_count, research_notes): """Update note with research findings using CRM API""" try: note_id = note.get('id') current_title = note.get('title', '') bodyV2 = note.get('bodyV2', {}) # Get existing markdown markdown = bodyV2.get('markdown', '') if isinstance(bodyV2, dict) else '' blocknote = bodyV2.get('blocknote', '') if isinstance(bodyV2, dict) else '' # Remove old temperature prefix from title clean_title = re.sub(r'^(HOT|WARM|COLD):\s*', '', current_title) new_title = f"{new_temp}: {clean_title}" # Add research to markdown timestamp = datetime.now().strftime('%Y-%m-%d %H:%M') research_section = f"\n\n---\n**JAE v5 Research ({timestamp}):** {research_notes}" if unit_count: research_section += f"\n**Units:** {unit_count}" new_markdown = markdown + research_section # Keep existing blocknote structure, just update markdown new_bodyV2 = { "blocknote": blocknote, "markdown": new_markdown } # Prepare patch data - only update what's needed patch_data = json.dumps({ "title": new_title, "temp": new_temp, "bodyV2": new_bodyV2 }).encode() req = urllib.request.Request( f"{CRM_URL}/notes/{note_id}", data=patch_data, headers={ "Authorization": f"Bearer {CRM_TOKEN}", "Content-Type": "application/json" }, method='PATCH' ) opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context)) with opener.open(req, timeout=20) as r: log(f" ✅ Note updated: {new_title}") return True except Exception as e: log(f" ✗ Update error: {e}") return False def create_opportunity(note, temp): """Create opportunity for HOT/WARM leads""" try: person_id = note.get('personId') if not person_id: log(f" ⚠️ Skip upgrade: No person ID") return False opp_name = f"Lead: {note.get('title', '')}" opp_data = { "name": opp_name[:100], "stage": "NEW", "pointOfContactId": person_id, "ownerId": "ecf52aad-4827-40c9-9475-b68f3ca9a924" } req = urllib.request.Request( f"{CRM_URL}/opportunities", data=json.dumps(opp_data).encode(), headers={"Authorization": f"Bearer {CRM_TOKEN}", "Content-Type": "application/json"} ) opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context)) with opener.open(req, timeout=20) as r: opp = json.loads(r.read().decode()) log(f" ✅ UPGRADED to Opportunity: {opp.get('id', 'N/A')}") return True except Exception as e: log(f" ✗ Create opp error: {e}") return False def main(): log("=" * 60) log("JAE v5.1 Starting - Website & Budget Research") log("=" * 60) state = load_state() processed_ids = set(state.get('processed_ids', [])) notes = fetch_all_notes() # Filter to unprocessed only unprocessed = [n for n in notes if n.get('id') not in processed_ids] log(f"\nTotal leads in CRM: {len(notes)}") log(f"Already processed: {len(processed_ids)}") log(f"New leads to process: {len(unprocessed)}") log("=" * 60) if not unprocessed: log("✅ No new leads to process") return upgraded = 0 processed_count = 0 for i, note in enumerate(unprocessed, 1): note_id = note.get('id') title = note.get('title', '') log(f"\n[{i}/{len(unprocessed)}] Processing: {title[:60]}...") # Get existing temperature current_temp = get_existing_temp(note) log(f" Current temp: {current_temp}") # Extract URL url = extract_url_from_note(note) if not url: log(f" ⚠️ No website found - keeping {current_temp}") processed_count += 1 processed_ids.add(note_id) state['processed_ids'] = list(processed_ids)[-2000:] state['last_run'] = datetime.now().isoformat() save_state(state) continue log(f" 🌐 Website found: {url}") # Research website found_budget, unit_count, details = search_budget_on_site(url) # Calculate elevation if found_budget: elevation = 2 reason = "Budget PDF found" else: elevation = 1 reason = "Website exists, no budget" new_temp = elevate_temp(current_temp, elevation) log(f" 📈 Elevating: {current_temp} → {new_temp} ({reason})") # Update note update_note_with_research(note, new_temp, unit_count, details) # Create opportunity if HOT or WARM if new_temp in ['HOT', 'WARM']: if create_opportunity(note, new_temp): upgraded += 1 else: log(f" Keeping as COLD") # Save state processed_count += 1 processed_ids.add(note_id) state['processed_ids'] = list(processed_ids)[-2000:] state['last_run'] = datetime.now().isoformat() save_state(state) # Pace: 90 seconds between leads (gentle, no rate limits) log(f" ⏳ Waiting 90s before next lead...") time.sleep(5) log("\n" + "=" * 60) log(f"JAE v5 Complete: {processed_count} processed, {upgraded} upgraded") log("=" * 60) if __name__ == "__main__": main()