HOALedgerIQ_Website/agents/reddit-scout/scout.py

#!/usr/bin/env python3
"""Reddit Scout - HOA Edition
Monitors r/HOA for relevant discussions
Runs twice daily: 8am and 3pm
"""
import json, re, time, urllib.request
from datetime import datetime, timedelta
from pathlib import Path

SCRIPT_DIR = Path(__file__).parent
for d in [SCRIPT_DIR / "state", SCRIPT_DIR / "logs", SCRIPT_DIR / "digests"]:
    d.mkdir(parents=True, exist_ok=True)

STATE_FILE = SCRIPT_DIR / "state" / "scout-state.json"
LOG_FILE = SCRIPT_DIR / "logs" / f"scout-{datetime.now().strftime('%Y%m%d')}.log"
DIGEST_FILE = SCRIPT_DIR / "digests" / f"digest-{datetime.now().strftime('%Y%m%d-%H%M')}.json"

KEYWORDS = ["budget", "reserve", "assessment", "investments", "planning", "spreadsheet"]
NEGATIVE = ["scam", "worst"]
IGNORE_AFTER = 48  # hours

def log(msg):
    ts = datetime.now().strftime('%H:%M:%S')
    print(f"[{ts}] {msg}")
    with open(LOG_FILE, 'a') as f:
        f.write(f"[{ts}] {msg}\n")

def load_state():
    if STATE_FILE.exists():
        return json.loads(STATE_FILE.read_text())
    return {"processed_ids": [], "total_scanned": 0, "total_matches": 0}

def save_state(s):
    STATE_FILE.write_text(json.dumps(s, indent=2))

def fetch_reddit_posts():
    """Fetch posts from r/HOA using Reddit JSON API"""
    try:
        # Reddit's public JSON endpoint (no auth needed for read)
        url = "https://www.reddit.com/r/HOA/new.json?limit=50"
        req = urllib.request.Request(
            url,
            headers={"User-Agent": "Mozilla/5.0 (HOA Scout Bot 1.0)"}
        )
        with urllib.request.urlopen(req, timeout=20) as r:
            data = json.loads(r.read().decode())
            return data.get('data', {}).get('children', [])
    except Exception as e:
        log(f"Reddit fetch error: {e}")
        return []

def score_post(post_data):
    """Score post by relevance to HOA Ledger IQ"""
    title = post_data.get('title', '').lower()
    selftext = post_data.get('selftext', '').lower()
    score = 0

    # Keyword matches (0-5 points)
    matched = [k for k in KEYWORDS if k in title or k in selftext]
    score += min(len(matched) * 2, 5)

    # Engagement (0-3 points)
    upvotes = post_data.get('ups', 0)
    if upvotes > 50: score += 3
    elif upvotes > 20: score += 2
    elif upvotes > 5: score += 1

    # Comments (0-3 points)
    comments = post_data.get('num_comments', 0)
    if comments > 20: score += 3
    elif comments > 5: score += 2
    elif comments > 0: score += 1

    # Question post bonus (+2)
    if any(q in title for q in ['?', 'how', 'what', 'best', 'recommend']):
        score += 2

    # Negative keyword penalty
    if any(n in title or n in selftext for n in NEGATIVE):
        score -= 3

    return max(score, 0), matched

def analyze_sentiment(text):
    """Basic sentiment analysis"""
    text = text.lower()
    frustrated = ['stupid', 'ridiculous', 'nightmare', 'horrible', 'terrible', 'angry']
    positive = ['great', 'helpful', 'thank', 'appreciate', 'awesome']

    fcount = sum(1 for w in frustrated if w in text)
    pcount = sum(1 for w in positive if w in text)

    if fcount > pcount: return "frustrated"
    if pcount > fcount: return "positive"
    return "neutral"

def is_recent(created_utc):
    """Check if post is within last IGNORE_AFTER hours"""
    post_time = datetime.fromtimestamp(created_utc)
    age_hours = (datetime.now() - post_time).total_seconds() / 3600
    return age_hours <= IGNORE_AFTER

def format_digest(posts):
    """Format posts into digest message"""
    if not posts:
        return "🏘️ No relevant HOA discussions found in last scan."

    lines = [
        "🏘️ *REDDIT SCOUT — HOA DIGEST*",
        f"📅 {datetime.now().strftime('%b %d, %Y at %I:%M %p')}",
        f"🎯 *{len(posts)} relevant posts found*",
        "",
        "━━━━━━━━━━━━━━━",
        "*TOP OPPORTUNITIES*",
        "━━━━━━━━━━━━━━━",
        ""
    ]

    for i, p in enumerate(posts[:10], 1):
        data = p['data']
        age = (datetime.now() - datetime.fromtimestamp(data['created_utc'])).total_seconds() // 3600

        lines.append(f"*{i}.* [{p['score']} pts] r/HOA ({int(age)}h ago)")
        lines.append(f"📌 *{data['title'][:60]}*")
        lines.append(f"   ⬆️ {data.get('ups', 0)}  💬 {data.get('num_comments', 0)}")
        lines.append(f"   🔑 {', '.join(p['keywords'])}")
        lines.append(f"   💡 {p['sentiment']} | Reply: {'YES' if p['score'] > 8 else 'maybe'}")
        lines.append(f"   🔗 https://reddit.com{data['permalink']}")
        lines.append("")

    # Sentiment summary
    sentiments = [p['sentiment'] for p in posts]
    total = len(sentiments)
    frustrated = sentiments.count('frustrated')
    positive = sentiments.count('positive')

    lines.extend([
        "━━━━━━━━━━━━━━━",
        "*SENTIMENT*",
        "━━━━━━━━━━━━━━━",
        f"😤 Frustrated: {frustrated}/{total}",
        f"😊 Positive: {positive}/{total}",
        f"😐 Neutral: {total - frustrated - positive}/{total}"
    ])

    return '\n'.join(lines)

def send_digest(message):
    """Send digest to OpenClaw"""
    log(message[:200] + "...")
    # OpenClaw will pick up stdout/log
    return True

def scout():
    """Main scouting function"""
    log("=== Reddit Scout Starting ===")
    s = load_state()

    # Fetch posts
    posts_raw = fetch_reddit_posts()
    log(f"Fetched {len(posts_raw)} posts from r/HOA")

    matches = []
    for child in posts_raw:
        try:
            data = child.get('data', {})
            post_id = data.get('id')

            # Skip already processed
            if post_id in s['processed_ids']:
                continue

            # Skip old posts
            if not is_recent(data.get('created_utc', 0)):
                continue

            # Score post
            score, keywords = score_post(data)

            # Only include if score > 5
            if score < 5:
                s['processed_ids'].append(post_id)
                continue

            # Analyze sentiment
            sentiment = analyze_sentiment(data.get('selftext', '') + data.get('title', ''))

            matches.append({
                'data': data,
                'score': score,
                'keywords': keywords,
                'sentiment': sentiment
            })

            s['processed_ids'].append(post_id)

        except Exception as e:
            log(f"Error processing post: {e}")
            continue

    # Sort by score
    matches.sort(key=lambda x: x['score'], reverse=True)

    # Generate and send digest
    digest = format_digest(matches)
    send_digest(digest)

    s['total_scanned'] += len(posts_raw)
    s['total_matches'] += len(matches)
    save_state(s)

    # Save digest to file for reference
    DIGEST_FILE.write_text(json.dumps({
        'timestamp': datetime.now().isoformat(),
        'matches': len(matches),
        'posts': matches
    }, indent=2))

    log(f"=== Done: {len(matches)} matches | Total scanned: {s['total_scanned']} ===")

def main():
    scout()

if __name__ == "__main__":
    main()