Initial project commit
This commit is contained in:
16
Dockerfile
Normal file
16
Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# Use a lightweight Python base image
|
||||||
|
FROM python:3.9-slim
|
||||||
|
|
||||||
|
# Set working directory inside the container
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install dependencies: only PRAW is external; others are stdlib
|
||||||
|
RUN pip install --no-cache-dir praw
|
||||||
|
RUN mkdir -p /app/data # Create data directory
|
||||||
|
|
||||||
|
# Copy your script and watchlist into the container
|
||||||
|
COPY wsb_ticker_scanner.py .
|
||||||
|
COPY watchlist.json .
|
||||||
|
|
||||||
|
# Command to run the script (this will execute once when the container starts)
|
||||||
|
CMD ["python", "wsb_ticker_scanner.py"]
|
||||||
BIN
data/wsb_mentions.db
Normal file
BIN
data/wsb_mentions.db
Normal file
Binary file not shown.
7
data/wsb_scanner.log
Normal file
7
data/wsb_scanner.log
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
2025-08-22 13:32:28,394 - INFO - Starting WSB ticker scan
|
||||||
|
2025-08-22 13:32:28,394 - INFO - Loaded 5 tickers from watchlist
|
||||||
|
2025-08-22 13:32:28,609 - INFO - Authenticated with Reddit API for r/wallstreetbets
|
||||||
|
2025-08-22 13:33:04,173 - INFO - Reached 24-hour cutoff after 34 posts
|
||||||
|
2025-08-22 13:33:04,176 - INFO - Scanned 34 posts; found mentions for 8 unique tickers
|
||||||
|
2025-08-22 13:33:04,196 - INFO - Data inserted into DB successfully
|
||||||
|
2025-08-22 13:33:04,197 - INFO - Scan complete
|
||||||
3
watchlist.json
Normal file
3
watchlist.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"tickers": ["TSLA", "OPEN", "EOSE", "AVGO", "UNH"]
|
||||||
|
}
|
||||||
121
wsb_ticker_scanner.py
Normal file
121
wsb_ticker_scanner.py
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
import praw
|
||||||
|
import re
|
||||||
|
from collections import Counter
|
||||||
|
import sqlite3
|
||||||
|
import json
|
||||||
|
import datetime
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Tunable config variables
|
||||||
|
CLIENT_ID = 'TLca7oyo1uA5IBMhLouIag'
|
||||||
|
CLIENT_SECRET = '6bb9HIt4_K8FV92UOWmwM8cFjoyiTQ'
|
||||||
|
USER_AGENT = 'wsb_ticker_scanner v1.0' # Change if needed for uniqueness
|
||||||
|
SUBREDDIT = 'wallstreetbets'
|
||||||
|
DB_FILE = 'data/wsb_mentions.db'
|
||||||
|
WATCHLIST_FILE = 'watchlist.json'
|
||||||
|
MAX_RETRIES = 3 # Max API retry attempts
|
||||||
|
RETRY_BACKOFF = 5 # Seconds to wait between retries (increases exponentially)
|
||||||
|
THROTTLE_SLEEP = 1 # Seconds to sleep between post fetches to avoid rate limits
|
||||||
|
|
||||||
|
# Set up logging
|
||||||
|
logging.basicConfig(filename='data/wsb_scanner.log', level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
logging.info('Starting WSB ticker scan')
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Load watchlist (set of tickers for fast lookup)
|
||||||
|
try:
|
||||||
|
with open(WATCHLIST_FILE, 'r') as f:
|
||||||
|
watchlist_data = json.load(f)
|
||||||
|
watchlist = set(watchlist_data.get('tickers', []))
|
||||||
|
logging.info(f'Loaded {len(watchlist)} tickers from watchlist')
|
||||||
|
except FileNotFoundError:
|
||||||
|
watchlist = set()
|
||||||
|
logging.warning('Watchlist file not found; using empty watchlist')
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
watchlist = set()
|
||||||
|
logging.error('Invalid JSON in watchlist; using empty watchlist')
|
||||||
|
|
||||||
|
# Authenticate with Reddit API
|
||||||
|
reddit = praw.Reddit(client_id=CLIENT_ID,
|
||||||
|
client_secret=CLIENT_SECRET,
|
||||||
|
user_agent=USER_AGENT)
|
||||||
|
subreddit = reddit.subreddit(SUBREDDIT)
|
||||||
|
logging.info(f'Authenticated with Reddit API for r/{SUBREDDIT}')
|
||||||
|
|
||||||
|
# Prepare for scanning: 24-hour cutoff (in Unix timestamp)
|
||||||
|
cutoff_time = time.time() - 86400 # 24 hours ago
|
||||||
|
mention_counter = Counter()
|
||||||
|
|
||||||
|
# Function for retry logic
|
||||||
|
def fetch_with_retry(func, *args, **kwargs):
|
||||||
|
for attempt in range(MAX_RETRIES):
|
||||||
|
try:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
except praw.exceptions.PRAWException as e: # Catch API errors (e.g., rate limits)
|
||||||
|
if attempt == MAX_RETRIES - 1:
|
||||||
|
raise
|
||||||
|
wait_time = RETRY_BACKOFF * (2 ** attempt) # Exponential backoff
|
||||||
|
logging.warning(f'API error: {e}. Retrying in {wait_time}s (attempt {attempt+1}/{MAX_RETRIES})')
|
||||||
|
time.sleep(wait_time)
|
||||||
|
|
||||||
|
# Scan new posts until cutoff
|
||||||
|
post_count = 0
|
||||||
|
for submission in subreddit.new(limit=None):
|
||||||
|
# Fetch submission with retry
|
||||||
|
submission = fetch_with_retry(lambda: submission) # In case of lazy loading issues
|
||||||
|
|
||||||
|
if submission.created_utc < cutoff_time:
|
||||||
|
logging.info(f'Reached 24-hour cutoff after {post_count} posts')
|
||||||
|
break
|
||||||
|
|
||||||
|
# Combine title and body text
|
||||||
|
text = submission.title + ' ' + (submission.selftext or '')
|
||||||
|
|
||||||
|
# Find tickers: $ followed by 1-5 uppercase letters
|
||||||
|
tickers = re.findall(r'\$([A-Z]{1,5})', text)
|
||||||
|
|
||||||
|
# Update counter
|
||||||
|
mention_counter.update(tickers)
|
||||||
|
|
||||||
|
post_count += 1
|
||||||
|
time.sleep(THROTTLE_SLEEP) # Throttle to be safe
|
||||||
|
|
||||||
|
logging.info(f'Scanned {post_count} posts; found mentions for {len(mention_counter)} unique tickers')
|
||||||
|
|
||||||
|
# Get today's date for DB insert
|
||||||
|
today = datetime.date.today().isoformat()
|
||||||
|
|
||||||
|
# Connect to SQLite DB
|
||||||
|
conn = sqlite3.connect(DB_FILE)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Create table if not exists (with UNIQUE constraint for idempotency)
|
||||||
|
cursor.execute('''
|
||||||
|
CREATE TABLE IF NOT EXISTS ticker_mentions (
|
||||||
|
date TEXT,
|
||||||
|
ticker TEXT,
|
||||||
|
count INTEGER,
|
||||||
|
watched INTEGER,
|
||||||
|
UNIQUE(date, ticker)
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
|
||||||
|
# Insert or replace counts for today
|
||||||
|
for ticker, count in mention_counter.items():
|
||||||
|
watched = 1 if ticker in watchlist else 0
|
||||||
|
cursor.execute('''
|
||||||
|
INSERT OR REPLACE INTO ticker_mentions (date, ticker, count, watched)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
''', (today, ticker, count, watched))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
logging.info('Data inserted into DB successfully')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f'Script failed: {e}')
|
||||||
|
raise # Re-raise for cron to capture if needed
|
||||||
|
|
||||||
|
logging.info('Scan complete')
|
||||||
Reference in New Issue
Block a user