fix: Improved duplicate prevention in cast iron scanner

- Better link normalization and checking
- Skip items already in seen_links with logging
- Clean up state file to last 500 items
- Always mark items as seen (deal or not)
- Added logging for skipped duplicates

Also: eBay scraping temporarily blocked/changed - investigating
This commit is contained in:
2026-04-10 16:16:35 -04:00
parent 30703bfd45
commit 4bd829ca8c
9 changed files with 573 additions and 1002 deletions

View File

@@ -27,7 +27,13 @@ def load_config():
def load_state():
"""Load previously seen items to avoid duplicates"""
if STATE_FILE.exists():
return json.loads(STATE_FILE.read_text())
data = json.loads(STATE_FILE.read_text())
# Ensure we have the right structure
if isinstance(data, dict) and 'seen_links' in data:
# Keep only last 500 items to prevent bloat
if len(data['seen_links']) > 500:
data['seen_links'] = data['seen_links'][-500:]
return data
return {"seen_links": [], "last_scan": None}
def save_state(state):
@@ -112,8 +118,14 @@ def scan_all_sources():
min_discount = config.get('min_discount_percent', 80) # Only ultra-deals now
for item in all_items:
# Normalize link for comparison
link = item.get('link', '')
if not link:
continue
# Skip if already seen
if item['link'] in seen_links:
if link in seen_links:
log(f"⏭️ Skipping duplicate: {item['title'][:50]}")
continue
# Check if it's a good deal
@@ -124,8 +136,8 @@ def scan_all_sources():
send_telegram_alert(item, fmv, discount)
deals_found += 1
# Mark as seen
seen_links.add(item['link'])
# ALWAYS mark as seen (whether deal or not) to prevent future duplicates
seen_links.add(link)
# Keep only last 1000 seen items to prevent state file from growing forever
if len(seen_links) > 1000: