Initial project commit

This commit is contained in:
2025-08-22 09:38:43 -04:00
commit b032ebd3af
5 changed files with 147 additions and 0 deletions

16
Dockerfile Normal file
View File

@@ -0,0 +1,16 @@
# Use a lightweight Python base image
FROM python:3.9-slim
# Set working directory inside the container
WORKDIR /app
# Install dependencies: only PRAW is external; others are stdlib
RUN pip install --no-cache-dir praw
RUN mkdir -p /app/data # Create data directory
# Copy your script and watchlist into the container
COPY wsb_ticker_scanner.py .
COPY watchlist.json .
# Command to run the script (this will execute once when the container starts)
CMD ["python", "wsb_ticker_scanner.py"]

BIN
data/wsb_mentions.db Normal file

Binary file not shown.

7
data/wsb_scanner.log Normal file
View File

@@ -0,0 +1,7 @@
2025-08-22 13:32:28,394 - INFO - Starting WSB ticker scan
2025-08-22 13:32:28,394 - INFO - Loaded 5 tickers from watchlist
2025-08-22 13:32:28,609 - INFO - Authenticated with Reddit API for r/wallstreetbets
2025-08-22 13:33:04,173 - INFO - Reached 24-hour cutoff after 34 posts
2025-08-22 13:33:04,176 - INFO - Scanned 34 posts; found mentions for 8 unique tickers
2025-08-22 13:33:04,196 - INFO - Data inserted into DB successfully
2025-08-22 13:33:04,197 - INFO - Scan complete

3
watchlist.json Normal file
View File

@@ -0,0 +1,3 @@
{
"tickers": ["TSLA", "OPEN", "EOSE", "AVGO", "UNH"]
}

121
wsb_ticker_scanner.py Normal file
View File

@@ -0,0 +1,121 @@
import praw
import re
from collections import Counter
import sqlite3
import json
import datetime
import time
import logging
# Tunable config variables
CLIENT_ID = 'TLca7oyo1uA5IBMhLouIag'
CLIENT_SECRET = '6bb9HIt4_K8FV92UOWmwM8cFjoyiTQ'
USER_AGENT = 'wsb_ticker_scanner v1.0' # Change if needed for uniqueness
SUBREDDIT = 'wallstreetbets'
DB_FILE = 'data/wsb_mentions.db'
WATCHLIST_FILE = 'watchlist.json'
MAX_RETRIES = 3 # Max API retry attempts
RETRY_BACKOFF = 5 # Seconds to wait between retries (increases exponentially)
THROTTLE_SLEEP = 1 # Seconds to sleep between post fetches to avoid rate limits
# Set up logging
logging.basicConfig(filename='data/wsb_scanner.log', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
logging.info('Starting WSB ticker scan')
try:
# Load watchlist (set of tickers for fast lookup)
try:
with open(WATCHLIST_FILE, 'r') as f:
watchlist_data = json.load(f)
watchlist = set(watchlist_data.get('tickers', []))
logging.info(f'Loaded {len(watchlist)} tickers from watchlist')
except FileNotFoundError:
watchlist = set()
logging.warning('Watchlist file not found; using empty watchlist')
except json.JSONDecodeError:
watchlist = set()
logging.error('Invalid JSON in watchlist; using empty watchlist')
# Authenticate with Reddit API
reddit = praw.Reddit(client_id=CLIENT_ID,
client_secret=CLIENT_SECRET,
user_agent=USER_AGENT)
subreddit = reddit.subreddit(SUBREDDIT)
logging.info(f'Authenticated with Reddit API for r/{SUBREDDIT}')
# Prepare for scanning: 24-hour cutoff (in Unix timestamp)
cutoff_time = time.time() - 86400 # 24 hours ago
mention_counter = Counter()
# Function for retry logic
def fetch_with_retry(func, *args, **kwargs):
for attempt in range(MAX_RETRIES):
try:
return func(*args, **kwargs)
except praw.exceptions.PRAWException as e: # Catch API errors (e.g., rate limits)
if attempt == MAX_RETRIES - 1:
raise
wait_time = RETRY_BACKOFF * (2 ** attempt) # Exponential backoff
logging.warning(f'API error: {e}. Retrying in {wait_time}s (attempt {attempt+1}/{MAX_RETRIES})')
time.sleep(wait_time)
# Scan new posts until cutoff
post_count = 0
for submission in subreddit.new(limit=None):
# Fetch submission with retry
submission = fetch_with_retry(lambda: submission) # In case of lazy loading issues
if submission.created_utc < cutoff_time:
logging.info(f'Reached 24-hour cutoff after {post_count} posts')
break
# Combine title and body text
text = submission.title + ' ' + (submission.selftext or '')
# Find tickers: $ followed by 1-5 uppercase letters
tickers = re.findall(r'\$([A-Z]{1,5})', text)
# Update counter
mention_counter.update(tickers)
post_count += 1
time.sleep(THROTTLE_SLEEP) # Throttle to be safe
logging.info(f'Scanned {post_count} posts; found mentions for {len(mention_counter)} unique tickers')
# Get today's date for DB insert
today = datetime.date.today().isoformat()
# Connect to SQLite DB
conn = sqlite3.connect(DB_FILE)
cursor = conn.cursor()
# Create table if not exists (with UNIQUE constraint for idempotency)
cursor.execute('''
CREATE TABLE IF NOT EXISTS ticker_mentions (
date TEXT,
ticker TEXT,
count INTEGER,
watched INTEGER,
UNIQUE(date, ticker)
)
''')
# Insert or replace counts for today
for ticker, count in mention_counter.items():
watched = 1 if ticker in watchlist else 0
cursor.execute('''
INSERT OR REPLACE INTO ticker_mentions (date, ticker, count, watched)
VALUES (?, ?, ?, ?)
''', (today, ticker, count, watched))
conn.commit()
conn.close()
logging.info('Data inserted into DB successfully')
except Exception as e:
logging.error(f'Script failed: {e}')
raise # Re-raise for cron to capture if needed
logging.info('Scan complete')