fix: Improved duplicate prevention in cast iron scanner
- Better link normalization and checking - Skip items already in seen_links with logging - Clean up state file to last 500 items - Always mark items as seen (deal or not) - Added logging for skipped duplicates Also: eBay scraping temporarily blocked/changed - investigating
This commit is contained in:
@@ -27,7 +27,13 @@ def load_config():
|
||||
def load_state():
|
||||
"""Load previously seen items to avoid duplicates"""
|
||||
if STATE_FILE.exists():
|
||||
return json.loads(STATE_FILE.read_text())
|
||||
data = json.loads(STATE_FILE.read_text())
|
||||
# Ensure we have the right structure
|
||||
if isinstance(data, dict) and 'seen_links' in data:
|
||||
# Keep only last 500 items to prevent bloat
|
||||
if len(data['seen_links']) > 500:
|
||||
data['seen_links'] = data['seen_links'][-500:]
|
||||
return data
|
||||
return {"seen_links": [], "last_scan": None}
|
||||
|
||||
def save_state(state):
|
||||
@@ -112,8 +118,14 @@ def scan_all_sources():
|
||||
min_discount = config.get('min_discount_percent', 80) # Only ultra-deals now
|
||||
|
||||
for item in all_items:
|
||||
# Normalize link for comparison
|
||||
link = item.get('link', '')
|
||||
if not link:
|
||||
continue
|
||||
|
||||
# Skip if already seen
|
||||
if item['link'] in seen_links:
|
||||
if link in seen_links:
|
||||
log(f"⏭️ Skipping duplicate: {item['title'][:50]}")
|
||||
continue
|
||||
|
||||
# Check if it's a good deal
|
||||
@@ -124,8 +136,8 @@ def scan_all_sources():
|
||||
send_telegram_alert(item, fmv, discount)
|
||||
deals_found += 1
|
||||
|
||||
# Mark as seen
|
||||
seen_links.add(item['link'])
|
||||
# ALWAYS mark as seen (whether deal or not) to prevent future duplicates
|
||||
seen_links.add(link)
|
||||
|
||||
# Keep only last 1000 seen items to prevent state file from growing forever
|
||||
if len(seen_links) > 1000:
|
||||
|
||||
Reference in New Issue
Block a user