122 lines
4.3 KiB
Python
122 lines
4.3 KiB
Python
import praw
|
|
import re
|
|
from collections import Counter
|
|
import sqlite3
|
|
import json
|
|
import datetime
|
|
import time
|
|
import logging
|
|
|
|
# Tunable config variables
|
|
CLIENT_ID = 'TLca7oyo1uA5IBMhLouIag'
|
|
CLIENT_SECRET = '6bb9HIt4_K8FV92UOWmwM8cFjoyiTQ'
|
|
USER_AGENT = 'wsb_ticker_scanner v1.0' # Change if needed for uniqueness
|
|
SUBREDDIT = 'wallstreetbets'
|
|
DB_FILE = 'data/wsb_mentions.db'
|
|
WATCHLIST_FILE = 'watchlist.json'
|
|
MAX_RETRIES = 3 # Max API retry attempts
|
|
RETRY_BACKOFF = 5 # Seconds to wait between retries (increases exponentially)
|
|
THROTTLE_SLEEP = 1 # Seconds to sleep between post fetches to avoid rate limits
|
|
|
|
# Set up logging
|
|
logging.basicConfig(filename='data/wsb_scanner.log', level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s')
|
|
logging.info('Starting WSB ticker scan')
|
|
|
|
try:
|
|
# Load watchlist (set of tickers for fast lookup)
|
|
try:
|
|
with open(WATCHLIST_FILE, 'r') as f:
|
|
watchlist_data = json.load(f)
|
|
watchlist = set(watchlist_data.get('tickers', []))
|
|
logging.info(f'Loaded {len(watchlist)} tickers from watchlist')
|
|
except FileNotFoundError:
|
|
watchlist = set()
|
|
logging.warning('Watchlist file not found; using empty watchlist')
|
|
except json.JSONDecodeError:
|
|
watchlist = set()
|
|
logging.error('Invalid JSON in watchlist; using empty watchlist')
|
|
|
|
# Authenticate with Reddit API
|
|
reddit = praw.Reddit(client_id=CLIENT_ID,
|
|
client_secret=CLIENT_SECRET,
|
|
user_agent=USER_AGENT)
|
|
subreddit = reddit.subreddit(SUBREDDIT)
|
|
logging.info(f'Authenticated with Reddit API for r/{SUBREDDIT}')
|
|
|
|
# Prepare for scanning: 24-hour cutoff (in Unix timestamp)
|
|
cutoff_time = time.time() - 86400 # 24 hours ago
|
|
mention_counter = Counter()
|
|
|
|
# Function for retry logic
|
|
def fetch_with_retry(func, *args, **kwargs):
|
|
for attempt in range(MAX_RETRIES):
|
|
try:
|
|
return func(*args, **kwargs)
|
|
except praw.exceptions.PRAWException as e: # Catch API errors (e.g., rate limits)
|
|
if attempt == MAX_RETRIES - 1:
|
|
raise
|
|
wait_time = RETRY_BACKOFF * (2 ** attempt) # Exponential backoff
|
|
logging.warning(f'API error: {e}. Retrying in {wait_time}s (attempt {attempt+1}/{MAX_RETRIES})')
|
|
time.sleep(wait_time)
|
|
|
|
# Scan new posts until cutoff
|
|
post_count = 0
|
|
for submission in subreddit.new(limit=None):
|
|
# Fetch submission with retry
|
|
submission = fetch_with_retry(lambda: submission) # In case of lazy loading issues
|
|
|
|
if submission.created_utc < cutoff_time:
|
|
logging.info(f'Reached 24-hour cutoff after {post_count} posts')
|
|
break
|
|
|
|
# Combine title and body text
|
|
text = submission.title + ' ' + (submission.selftext or '')
|
|
|
|
# Find tickers: $ followed by 1-5 uppercase letters
|
|
tickers = re.findall(r'\$([A-Z]{1,5})', text)
|
|
|
|
# Update counter
|
|
mention_counter.update(tickers)
|
|
|
|
post_count += 1
|
|
time.sleep(THROTTLE_SLEEP) # Throttle to be safe
|
|
|
|
logging.info(f'Scanned {post_count} posts; found mentions for {len(mention_counter)} unique tickers')
|
|
|
|
# Get today's date for DB insert
|
|
today = datetime.date.today().isoformat()
|
|
|
|
# Connect to SQLite DB
|
|
conn = sqlite3.connect(DB_FILE)
|
|
cursor = conn.cursor()
|
|
|
|
# Create table if not exists (with UNIQUE constraint for idempotency)
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS ticker_mentions (
|
|
date TEXT,
|
|
ticker TEXT,
|
|
count INTEGER,
|
|
watched INTEGER,
|
|
UNIQUE(date, ticker)
|
|
)
|
|
''')
|
|
|
|
# Insert or replace counts for today
|
|
for ticker, count in mention_counter.items():
|
|
watched = 1 if ticker in watchlist else 0
|
|
cursor.execute('''
|
|
INSERT OR REPLACE INTO ticker_mentions (date, ticker, count, watched)
|
|
VALUES (?, ?, ?, ?)
|
|
''', (today, ticker, count, watched))
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
logging.info('Data inserted into DB successfully')
|
|
|
|
except Exception as e:
|
|
logging.error(f'Script failed: {e}')
|
|
raise # Re-raise for cron to capture if needed
|
|
|
|
logging.info('Scan complete')
|