#!/usr/bin/env python3 """Junior AE v2 - Browser-like website validation""" import json, re, time, urllib.request, urllib.error from datetime import datetime, timedelta from pathlib import Path import ssl ssl._create_default_https_context = ssl._create_unverified_context SCRIPT_DIR = Path(__file__).parent for d in [SCRIPT_DIR / "state", SCRIPT_DIR / "logs"]: d.mkdir(parents=True, exist_ok=True) STATE_FILE = SCRIPT_DIR / "state" / "jae-v2-state.json" LOG_FILE = SCRIPT_DIR / "logs" / f"jae-v2-{datetime.now().strftime('%Y%m%d')}.log" CRM_URL = "https://salesforce.hoaledgeriq.com/rest" CRM_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5M2FmNGFmNS0zZWQ0LTQ1ZDMtOWE5Zi01MDMzZjc3YTY3MjMiLCJ0eXBlIjoiQVBJX0tFWSIsIndvcmtzcGFjZUlkIjoiOTNhZjRhZjUtM2VkNC00NWQzLTlhOWYtNTAzM2Y3N2E2NzIzIiwiaWF0IjoxNzczMzI4NDQzLCJleHAiOjE4MDQ3ODE2NDIsImp0aSI6IjIwZjEyYzkwLTRkMDctNGJmNi1iMzk3LTZjNmU3MzlmMThjOCJ9.zeM5NvwCSGEcz99m2LYtgb0sVD6WUXcCF7SwonFg930" def log(msg): ts = datetime.now().strftime('%H:%M:%S') print(f"[{ts}] {msg}") with open(LOG_FILE, 'a') as f: f.write(f"[{ts}] {msg}\n") def load_state(): if STATE_FILE.exists(): return json.loads(STATE_FILE.read_text()) return {"last_check": (datetime.now() - timedelta(hours=2)).isoformat(), "processed": 0, "upgraded": 0} def save_state(s): STATE_FILE.write_text(json.dumps(s, indent=2)) def fetch_notes(): try: req = urllib.request.Request( f"{CRM_URL}/notes?limit=50&order[createdAt]=desc", headers={"Authorization": f"Bearer {CRM_TOKEN}", "Accept": "application/json"} ) with urllib.request.urlopen(req, timeout=15) as r: return json.loads(r.read().decode()).get('data', {}).get('notes', []) except Exception as e: log(f"Fetch error: {e}") return [] def get_temp(title): t = title.upper() if 'HOT' in t: return 'HOT' if 'WARM' in t: return 'WARM' if 'COLD' in t: return 'COLD' return None def extract_url(body): if not body: return None # Match **Site:** URL pattern m = re.search(r'Site:\s*(https?://[^\s\n<]+)', str(body)) if m: return m.group(1).strip() # Fallback - any HTTP URL m = re.search(r'(https?://[^\s\n<"]+)', str(body)) return m.group(1) if m else None def validate_website(url): """Browser-like validation - GET request, check for real website content""" if not url: return False, "no_url" if not url.startswith('http'): url = 'https://' + url try: req = urllib.request.Request( url, headers={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "identity", "Connection": "keep-alive", } ) with urllib.request.urlopen(req, timeout=15, context=ssl._create_unverified_context()) as r: content = r.read() code = r.getcode() # Must return 200 if code != 200: return False, f"http_{code}" # Must have content > 500 bytes if len(content) < 500: return False, "too_small" # Parse HTML html = content.decode('utf-8', errors='ignore')[:3000].lower() # Check for real website markers has_title = '