Initial project commit

2025-08-22 09:38:43 -04:00
commit b032ebd3af
5 changed files with 147 additions and 0 deletions
--- a/16
+++ b/16
@@ -0,0 +1,16 @@
+# Use a lightweight Python base image
+FROM python:3.9-slim
+
+# Set working directory inside the container
+WORKDIR /app
+
+# Install dependencies: only PRAW is external; others are stdlib
+RUN pip install --no-cache-dir praw
+RUN mkdir -p /app/data  # Create data directory
+
+# Copy your script and watchlist into the container
+COPY wsb_ticker_scanner.py .
+COPY watchlist.json .
+
+# Command to run the script (this will execute once when the container starts)
+CMD ["python", "wsb_ticker_scanner.py"]
--- a/data/wsb_mentions.db
+++ b/data/wsb_mentions.db
--- a/data/wsb_scanner.log
+++ b/data/wsb_scanner.log
@@ -0,0 +1,7 @@
+2025-08-22 13:32:28,394 - INFO - Starting WSB ticker scan
+2025-08-22 13:32:28,394 - INFO - Loaded 5 tickers from watchlist
+2025-08-22 13:32:28,609 - INFO - Authenticated with Reddit API for r/wallstreetbets
+2025-08-22 13:33:04,173 - INFO - Reached 24-hour cutoff after 34 posts
+2025-08-22 13:33:04,176 - INFO - Scanned 34 posts; found mentions for 8 unique tickers
+2025-08-22 13:33:04,196 - INFO - Data inserted into DB successfully
+2025-08-22 13:33:04,197 - INFO - Scan complete
--- a/watchlist.json
+++ b/watchlist.json
@@ -0,0 +1,3 @@
+{
+  "tickers": ["TSLA", "OPEN", "EOSE", "AVGO", "UNH"]
+}
--- a/wsb_ticker_scanner.py
+++ b/wsb_ticker_scanner.py
@@ -0,0 +1,121 @@
+import praw
+import re
+from collections import Counter
+import sqlite3
+import json
+import datetime
+import time
+import logging
+
+# Tunable config variables
+CLIENT_ID = 'TLca7oyo1uA5IBMhLouIag'
+CLIENT_SECRET = '6bb9HIt4_K8FV92UOWmwM8cFjoyiTQ'
+USER_AGENT = 'wsb_ticker_scanner v1.0'  # Change if needed for uniqueness
+SUBREDDIT = 'wallstreetbets'
+DB_FILE = 'data/wsb_mentions.db'
+WATCHLIST_FILE = 'watchlist.json'
+MAX_RETRIES = 3  # Max API retry attempts
+RETRY_BACKOFF = 5  # Seconds to wait between retries (increases exponentially)
+THROTTLE_SLEEP = 1  # Seconds to sleep between post fetches to avoid rate limits
+
+# Set up logging
+logging.basicConfig(filename='data/wsb_scanner.log', level=logging.INFO, 
+                    format='%(asctime)s - %(levelname)s - %(message)s')
+logging.info('Starting WSB ticker scan')
+
+try:
+    # Load watchlist (set of tickers for fast lookup)
+    try:
+        with open(WATCHLIST_FILE, 'r') as f:
+            watchlist_data = json.load(f)
+            watchlist = set(watchlist_data.get('tickers', []))
+        logging.info(f'Loaded {len(watchlist)} tickers from watchlist')
+    except FileNotFoundError:
+        watchlist = set()
+        logging.warning('Watchlist file not found; using empty watchlist')
+    except json.JSONDecodeError:
+        watchlist = set()
+        logging.error('Invalid JSON in watchlist; using empty watchlist')
+
+    # Authenticate with Reddit API
+    reddit = praw.Reddit(client_id=CLIENT_ID,
+                         client_secret=CLIENT_SECRET,
+                         user_agent=USER_AGENT)
+    subreddit = reddit.subreddit(SUBREDDIT)
+    logging.info(f'Authenticated with Reddit API for r/{SUBREDDIT}')
+
+    # Prepare for scanning: 24-hour cutoff (in Unix timestamp)
+    cutoff_time = time.time() - 86400  # 24 hours ago
+    mention_counter = Counter()
+
+    # Function for retry logic
+    def fetch_with_retry(func, *args, **kwargs):
+        for attempt in range(MAX_RETRIES):
+            try:
+                return func(*args, **kwargs)
+            except praw.exceptions.PRAWException as e:  # Catch API errors (e.g., rate limits)
+                if attempt == MAX_RETRIES - 1:
+                    raise
+                wait_time = RETRY_BACKOFF * (2 ** attempt)  # Exponential backoff
+                logging.warning(f'API error: {e}. Retrying in {wait_time}s (attempt {attempt+1}/{MAX_RETRIES})')
+                time.sleep(wait_time)
+
+    # Scan new posts until cutoff
+    post_count = 0
+    for submission in subreddit.new(limit=None):
+        # Fetch submission with retry
+        submission = fetch_with_retry(lambda: submission)  # In case of lazy loading issues
+        
+        if submission.created_utc < cutoff_time:
+            logging.info(f'Reached 24-hour cutoff after {post_count} posts')
+            break
+        
+        # Combine title and body text
+        text = submission.title + ' ' + (submission.selftext or '')
+        
+        # Find tickers: $ followed by 1-5 uppercase letters
+        tickers = re.findall(r'\$([A-Z]{1,5})', text)
+        
+        # Update counter
+        mention_counter.update(tickers)
+        
+        post_count += 1
+        time.sleep(THROTTLE_SLEEP)  # Throttle to be safe
+        
+    logging.info(f'Scanned {post_count} posts; found mentions for {len(mention_counter)} unique tickers')
+
+    # Get today's date for DB insert
+    today = datetime.date.today().isoformat()
+
+    # Connect to SQLite DB
+    conn = sqlite3.connect(DB_FILE)
+    cursor = conn.cursor()
+
+    # Create table if not exists (with UNIQUE constraint for idempotency)
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS ticker_mentions (
+            date TEXT,
+            ticker TEXT,
+            count INTEGER,
+            watched INTEGER,
+            UNIQUE(date, ticker)
+        )
+    ''')
+
+    # Insert or replace counts for today
+    for ticker, count in mention_counter.items():
+        watched = 1 if ticker in watchlist else 0
+        cursor.execute('''
+            INSERT OR REPLACE INTO ticker_mentions (date, ticker, count, watched)
+            VALUES (?, ?, ?, ?)
+        ''', (today, ticker, count, watched))
+
+    conn.commit()
+    conn.close()
+    logging.info('Data inserted into DB successfully')
+
+except Exception as e:
+    logging.error(f'Script failed: {e}')
+    raise  # Re-raise for cron to capture if needed
+
+logging.info('Scan complete')