Files
HOALedgerIQ_Website/agents/cast-iron-scout/scanner.py
olsch01 4bd829ca8c fix: Improved duplicate prevention in cast iron scanner
- Better link normalization and checking
- Skip items already in seen_links with logging
- Clean up state file to last 500 items
- Always mark items as seen (deal or not)
- Added logging for skipped duplicates

Also: eBay scraping temporarily blocked/changed - investigating
2026-04-10 16:16:35 -04:00

155 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""
Cast Iron Scout - Main Scanner Engine
Continuously scans for cast iron deals and alerts when good deals found
"""
import json
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from sources.ebay_working import search_ebay_cast_iron
from sources.craigslist_rss import search_craigslist_rss
from sources.facebook_scanner import search_facebook_marketplace_cast_iron
from valuation import is_good_deal, calculate_fmv
SCRIPT_DIR = Path(__file__).parent
STATE_FILE = SCRIPT_DIR / "state" / "seen_items.json"
CONFIG_FILE = SCRIPT_DIR / "config.json"
LOG_FILE = SCRIPT_DIR / "logs" / f"scanner-{datetime.now().strftime('%Y%m%d')}.log"
def load_config():
"""Load configuration"""
if CONFIG_FILE.exists():
return json.loads(CONFIG_FILE.read_text())
return {}
def load_state():
"""Load previously seen items to avoid duplicates"""
if STATE_FILE.exists():
data = json.loads(STATE_FILE.read_text())
# Ensure we have the right structure
if isinstance(data, dict) and 'seen_links' in data:
# Keep only last 500 items to prevent bloat
if len(data['seen_links']) > 500:
data['seen_links'] = data['seen_links'][-500:]
return data
return {"seen_links": [], "last_scan": None}
def save_state(state):
"""Save state to file"""
state['last_scan'] = datetime.now().isoformat()
STATE_FILE.write_text(json.dumps(state, indent=2))
def log(message):
"""Log message"""
ts = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
log_line = f"[{ts}] {message}"
print(log_line)
with open(LOG_FILE, 'a') as f:
f.write(log_line + '\n')
def send_telegram_alert(item, fmv, discount):
"""Send Telegram alert for a good deal"""
config = load_config()
target = config.get('telegram_target', 'telegram:8269921691')
message = f"""🔥 *CAST IRON DEAL ALERT!*
*Item:* {item['title']}
*Price:* ${item['price']:.2f}
*FMV:* ${fmv:.2f}
*Discount:* {discount:.0f}% below FMV! 💰
*Source:* {item['source']}
*Found:* {item['found_at']}
🔗 {item['link']}
_Action: Buy now / Bid / Ignore_"""
try:
subprocess.run([
'openclaw', 'message', 'send',
'--channel', 'telegram',
'--target', target,
'--message', message
], capture_output=True, timeout=30)
log(f"✅ Alert sent for: {item['title'][:50]}")
except Exception as e:
log(f"❌ Failed to send alert: {e}")
def scan_all_sources():
"""Scan all sources for cast iron items"""
log("🔍 Starting cast iron scan...")
# Load config and state
config = load_config()
state = load_state()
seen_links = set(state.get('seen_links', []))
all_items = []
# Scan eBay (WORKING!)
try:
ebay_items = search_ebay_cast_iron()
log(f"🔍 eBay: Found {len(ebay_items)} items")
all_items.extend(ebay_items)
except Exception as e:
log(f"eBay scan error: {e}")
# Scan Craigslist RSS
try:
cl_items = search_craigslist_rss()
log(f"📘 Craigslist RSS: Found {len(cl_items)} items")
all_items.extend(cl_items)
except Exception as e:
log(f"Craigslist scan error: {e}")
# Scan Facebook (placeholder)
try:
fb_items = search_facebook_marketplace_cast_iron(config)
log(f"📘 Facebook: Found {len(fb_items)} items")
all_items.extend(fb_items)
except Exception as e:
log(f"Facebook scan error: {e}")
deals_found = 0
min_discount = config.get('min_discount_percent', 80) # Only ultra-deals now
for item in all_items:
# Normalize link for comparison
link = item.get('link', '')
if not link:
continue
# Skip if already seen
if link in seen_links:
log(f"⏭️ Skipping duplicate: {item['title'][:50]}")
continue
# Check if it's a good deal
is_deal, discount, fmv = is_good_deal(item['price'], item['title'], min_discount)
if is_deal:
log(f"🎯 DEAL FOUND: {item['title'][:50]} - ${item['price']} ({discount:.0f}% off)")
send_telegram_alert(item, fmv, discount)
deals_found += 1
# ALWAYS mark as seen (whether deal or not) to prevent future duplicates
seen_links.add(link)
# Keep only last 1000 seen items to prevent state file from growing forever
if len(seen_links) > 1000:
seen_links = set(list(seen_links)[-1000:])
state['seen_links'] = list(seen_links)
save_state(state)
log(f"Scan complete. Deals found: {deals_found}, Total items processed: {len(all_items)}")
return deals_found
if __name__ == "__main__":
deals = scan_all_sources()
sys.exit(0)