From b032ebd3afb05afa1c75f1f2f82450c27180c6f2 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 22 Aug 2025 09:38:43 -0400 Subject: [PATCH] Initial project commit --- Dockerfile | 16 ++++++ data/wsb_mentions.db | Bin 0 -> 12288 bytes data/wsb_scanner.log | 7 +++ watchlist.json | 3 ++ wsb_ticker_scanner.py | 121 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 147 insertions(+) create mode 100644 Dockerfile create mode 100644 data/wsb_mentions.db create mode 100644 data/wsb_scanner.log create mode 100644 watchlist.json create mode 100644 wsb_ticker_scanner.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a14dc65 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +# Use a lightweight Python base image +FROM python:3.9-slim + +# Set working directory inside the container +WORKDIR /app + +# Install dependencies: only PRAW is external; others are stdlib +RUN pip install --no-cache-dir praw +RUN mkdir -p /app/data # Create data directory + +# Copy your script and watchlist into the container +COPY wsb_ticker_scanner.py . +COPY watchlist.json . + +# Command to run the script (this will execute once when the container starts) +CMD ["python", "wsb_ticker_scanner.py"] diff --git a/data/wsb_mentions.db b/data/wsb_mentions.db new file mode 100644 index 0000000000000000000000000000000000000000..b84b01245171495c52a1e2418ad28553cf04b5eb GIT binary patch literal 12288 zcmeI$&r2IY6bJB`&2F*^`dqi-b@rx2i>4I32qv;hXxNx!EiDLPn`~%k%%ZL+y|v)Y zzo4i72fg&-)kFVeC`^TALd<0zE%IJX0Vr^52olJGp0vudPEmbO~5z=AOHafKmY;|fB*y_ z009U<;Qs`^?D4{UvBge$Y2MqeKSK zAOHafKmY;|fB*y_009U<00IzrmVjeho-<*WR!XmzSJswGrJdS~g5}NeOsH9}hXpq; zquIpGR<+?;T5zZJ=DlNTOKt^jPRnam{BYK=JU6>y@V4%9EpfY9u1w04*wkZmPCw}{ zji^W8>6Csw>wrjt00bZa0SG_<0uX=z1Rwwb2tXhs;FzAnvvmO5HDw*(A#zg&7>}tU mKwisJH2_OnZ7VRfEL8!>&6=K@*+CWn3>li6c-U0k|E~Z86P{@R literal 0 HcmV?d00001 diff --git a/data/wsb_scanner.log b/data/wsb_scanner.log new file mode 100644 index 0000000..a59ddfb --- /dev/null +++ b/data/wsb_scanner.log @@ -0,0 +1,7 @@ +2025-08-22 13:32:28,394 - INFO - Starting WSB ticker scan +2025-08-22 13:32:28,394 - INFO - Loaded 5 tickers from watchlist +2025-08-22 13:32:28,609 - INFO - Authenticated with Reddit API for r/wallstreetbets +2025-08-22 13:33:04,173 - INFO - Reached 24-hour cutoff after 34 posts +2025-08-22 13:33:04,176 - INFO - Scanned 34 posts; found mentions for 8 unique tickers +2025-08-22 13:33:04,196 - INFO - Data inserted into DB successfully +2025-08-22 13:33:04,197 - INFO - Scan complete diff --git a/watchlist.json b/watchlist.json new file mode 100644 index 0000000..c234d23 --- /dev/null +++ b/watchlist.json @@ -0,0 +1,3 @@ +{ + "tickers": ["TSLA", "OPEN", "EOSE", "AVGO", "UNH"] +} diff --git a/wsb_ticker_scanner.py b/wsb_ticker_scanner.py new file mode 100644 index 0000000..3cb12b5 --- /dev/null +++ b/wsb_ticker_scanner.py @@ -0,0 +1,121 @@ +import praw +import re +from collections import Counter +import sqlite3 +import json +import datetime +import time +import logging + +# Tunable config variables +CLIENT_ID = 'TLca7oyo1uA5IBMhLouIag' +CLIENT_SECRET = '6bb9HIt4_K8FV92UOWmwM8cFjoyiTQ' +USER_AGENT = 'wsb_ticker_scanner v1.0' # Change if needed for uniqueness +SUBREDDIT = 'wallstreetbets' +DB_FILE = 'data/wsb_mentions.db' +WATCHLIST_FILE = 'watchlist.json' +MAX_RETRIES = 3 # Max API retry attempts +RETRY_BACKOFF = 5 # Seconds to wait between retries (increases exponentially) +THROTTLE_SLEEP = 1 # Seconds to sleep between post fetches to avoid rate limits + +# Set up logging +logging.basicConfig(filename='data/wsb_scanner.log', level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s') +logging.info('Starting WSB ticker scan') + +try: + # Load watchlist (set of tickers for fast lookup) + try: + with open(WATCHLIST_FILE, 'r') as f: + watchlist_data = json.load(f) + watchlist = set(watchlist_data.get('tickers', [])) + logging.info(f'Loaded {len(watchlist)} tickers from watchlist') + except FileNotFoundError: + watchlist = set() + logging.warning('Watchlist file not found; using empty watchlist') + except json.JSONDecodeError: + watchlist = set() + logging.error('Invalid JSON in watchlist; using empty watchlist') + + # Authenticate with Reddit API + reddit = praw.Reddit(client_id=CLIENT_ID, + client_secret=CLIENT_SECRET, + user_agent=USER_AGENT) + subreddit = reddit.subreddit(SUBREDDIT) + logging.info(f'Authenticated with Reddit API for r/{SUBREDDIT}') + + # Prepare for scanning: 24-hour cutoff (in Unix timestamp) + cutoff_time = time.time() - 86400 # 24 hours ago + mention_counter = Counter() + + # Function for retry logic + def fetch_with_retry(func, *args, **kwargs): + for attempt in range(MAX_RETRIES): + try: + return func(*args, **kwargs) + except praw.exceptions.PRAWException as e: # Catch API errors (e.g., rate limits) + if attempt == MAX_RETRIES - 1: + raise + wait_time = RETRY_BACKOFF * (2 ** attempt) # Exponential backoff + logging.warning(f'API error: {e}. Retrying in {wait_time}s (attempt {attempt+1}/{MAX_RETRIES})') + time.sleep(wait_time) + + # Scan new posts until cutoff + post_count = 0 + for submission in subreddit.new(limit=None): + # Fetch submission with retry + submission = fetch_with_retry(lambda: submission) # In case of lazy loading issues + + if submission.created_utc < cutoff_time: + logging.info(f'Reached 24-hour cutoff after {post_count} posts') + break + + # Combine title and body text + text = submission.title + ' ' + (submission.selftext or '') + + # Find tickers: $ followed by 1-5 uppercase letters + tickers = re.findall(r'\$([A-Z]{1,5})', text) + + # Update counter + mention_counter.update(tickers) + + post_count += 1 + time.sleep(THROTTLE_SLEEP) # Throttle to be safe + + logging.info(f'Scanned {post_count} posts; found mentions for {len(mention_counter)} unique tickers') + + # Get today's date for DB insert + today = datetime.date.today().isoformat() + + # Connect to SQLite DB + conn = sqlite3.connect(DB_FILE) + cursor = conn.cursor() + + # Create table if not exists (with UNIQUE constraint for idempotency) + cursor.execute(''' + CREATE TABLE IF NOT EXISTS ticker_mentions ( + date TEXT, + ticker TEXT, + count INTEGER, + watched INTEGER, + UNIQUE(date, ticker) + ) + ''') + + # Insert or replace counts for today + for ticker, count in mention_counter.items(): + watched = 1 if ticker in watchlist else 0 + cursor.execute(''' + INSERT OR REPLACE INTO ticker_mentions (date, ticker, count, watched) + VALUES (?, ?, ?, ?) + ''', (today, ticker, count, watched)) + + conn.commit() + conn.close() + logging.info('Data inserted into DB successfully') + +except Exception as e: + logging.error(f'Script failed: {e}') + raise # Re-raise for cron to capture if needed + +logging.info('Scan complete')