#!/usr/bin/env python3 """Reddit Scout - HOA Edition Monitors r/HOA for relevant discussions Runs twice daily: 8am and 3pm """ import json, re, time, urllib.request from datetime import datetime, timedelta from pathlib import Path SCRIPT_DIR = Path(__file__).parent for d in [SCRIPT_DIR / "state", SCRIPT_DIR / "logs", SCRIPT_DIR / "digests"]: d.mkdir(parents=True, exist_ok=True) STATE_FILE = SCRIPT_DIR / "state" / "scout-state.json" LOG_FILE = SCRIPT_DIR / "logs" / f"scout-{datetime.now().strftime('%Y%m%d')}.log" DIGEST_FILE = SCRIPT_DIR / "digests" / f"digest-{datetime.now().strftime('%Y%m%d-%H%M')}.json" KEYWORDS = ["budget", "reserve", "assessment", "investments", "planning", "spreadsheet"] NEGATIVE = ["scam", "worst"] IGNORE_AFTER = 48 # hours def log(msg): ts = datetime.now().strftime('%H:%M:%S') print(f"[{ts}] {msg}") with open(LOG_FILE, 'a') as f: f.write(f"[{ts}] {msg}\n") def load_state(): if STATE_FILE.exists(): return json.loads(STATE_FILE.read_text()) return {"processed_ids": [], "total_scanned": 0, "total_matches": 0} def save_state(s): STATE_FILE.write_text(json.dumps(s, indent=2)) def fetch_reddit_posts(): """Fetch posts from r/HOA using Reddit JSON API""" try: # Reddit's public JSON endpoint (no auth needed for read) url = "https://www.reddit.com/r/HOA/new.json?limit=50" req = urllib.request.Request( url, headers={"User-Agent": "Mozilla/5.0 (HOA Scout Bot 1.0)"} ) with urllib.request.urlopen(req, timeout=20) as r: data = json.loads(r.read().decode()) return data.get('data', {}).get('children', []) except Exception as e: log(f"Reddit fetch error: {e}") return [] def score_post(post_data): """Score post by relevance to HOA Ledger IQ""" title = post_data.get('title', '').lower() selftext = post_data.get('selftext', '').lower() score = 0 # Keyword matches (0-5 points) matched = [k for k in KEYWORDS if k in title or k in selftext] score += min(len(matched) * 2, 5) # Engagement (0-3 points) upvotes = post_data.get('ups', 0) if upvotes > 50: score += 3 elif upvotes > 20: score += 2 elif upvotes > 5: score += 1 # Comments (0-3 points) comments = post_data.get('num_comments', 0) if comments > 20: score += 3 elif comments > 5: score += 2 elif comments > 0: score += 1 # Question post bonus (+2) if any(q in title for q in ['?', 'how', 'what', 'best', 'recommend']): score += 2 # Negative keyword penalty if any(n in title or n in selftext for n in NEGATIVE): score -= 3 return max(score, 0), matched def analyze_sentiment(text): """Basic sentiment analysis""" text = text.lower() frustrated = ['stupid', 'ridiculous', 'nightmare', 'horrible', 'terrible', 'angry'] positive = ['great', 'helpful', 'thank', 'appreciate', 'awesome'] fcount = sum(1 for w in frustrated if w in text) pcount = sum(1 for w in positive if w in text) if fcount > pcount: return "frustrated" if pcount > fcount: return "positive" return "neutral" def is_recent(created_utc): """Check if post is within last IGNORE_AFTER hours""" post_time = datetime.fromtimestamp(created_utc) age_hours = (datetime.now() - post_time).total_seconds() / 3600 return age_hours <= IGNORE_AFTER def format_digest(posts): """Format posts into digest message""" if not posts: return "🏘️ No relevant HOA discussions found in last scan." lines = [ "🏘️ *REDDIT SCOUT — HOA DIGEST*", f"📅 {datetime.now().strftime('%b %d, %Y at %I:%M %p')}", f"🎯 *{len(posts)} relevant posts found*", "", "━━━━━━━━━━━━━━━", "*TOP OPPORTUNITIES*", "━━━━━━━━━━━━━━━", "" ] for i, p in enumerate(posts[:10], 1): data = p['data'] age = (datetime.now() - datetime.fromtimestamp(data['created_utc'])).total_seconds() // 3600 lines.append(f"*{i}.* [{p['score']} pts] r/HOA ({int(age)}h ago)") lines.append(f"📌 *{data['title'][:60]}*") lines.append(f" ⬆️ {data.get('ups', 0)} 💬 {data.get('num_comments', 0)}") lines.append(f" 🔑 {', '.join(p['keywords'])}") lines.append(f" 💡 {p['sentiment']} | Reply: {'YES' if p['score'] > 8 else 'maybe'}") lines.append(f" 🔗 https://reddit.com{data['permalink']}") lines.append("") # Sentiment summary sentiments = [p['sentiment'] for p in posts] total = len(sentiments) frustrated = sentiments.count('frustrated') positive = sentiments.count('positive') lines.extend([ "━━━━━━━━━━━━━━━", "*SENTIMENT*", "━━━━━━━━━━━━━━━", f"😤 Frustrated: {frustrated}/{total}", f"😊 Positive: {positive}/{total}", f"😐 Neutral: {total - frustrated - positive}/{total}" ]) return '\n'.join(lines) def send_digest(message): """Send digest to OpenClaw""" log(message[:200] + "...") # OpenClaw will pick up stdout/log return True def scout(): """Main scouting function""" log("=== Reddit Scout Starting ===") s = load_state() # Fetch posts posts_raw = fetch_reddit_posts() log(f"Fetched {len(posts_raw)} posts from r/HOA") matches = [] for child in posts_raw: try: data = child.get('data', {}) post_id = data.get('id') # Skip already processed if post_id in s['processed_ids']: continue # Skip old posts if not is_recent(data.get('created_utc', 0)): continue # Score post score, keywords = score_post(data) # Only include if score > 5 if score < 5: s['processed_ids'].append(post_id) continue # Analyze sentiment sentiment = analyze_sentiment(data.get('selftext', '') + data.get('title', '')) matches.append({ 'data': data, 'score': score, 'keywords': keywords, 'sentiment': sentiment }) s['processed_ids'].append(post_id) except Exception as e: log(f"Error processing post: {e}") continue # Sort by score matches.sort(key=lambda x: x['score'], reverse=True) # Generate and send digest digest = format_digest(matches) send_digest(digest) s['total_scanned'] += len(posts_raw) s['total_matches'] += len(matches) save_state(s) # Save digest to file for reference DIGEST_FILE.write_text(json.dumps({ 'timestamp': datetime.now().isoformat(), 'matches': len(matches), 'posts': matches }, indent=2)) log(f"=== Done: {len(matches)} matches | Total scanned: {s['total_scanned']} ===") def main(): scout() if __name__ == "__main__": main()