- Added Craigslist scanner framework - Added Facebook Marketplace placeholder - Updated main scanner to aggregate all sources - Added STATUS.md for development tracking - Fixed import paths for all scanners - Ready for HTML scraping implementation Current status: - eBay: RSS built but unreliable, need HTML scraping - Craigslist: Framework ready, debugging HTML parsing - Facebook: Placeholder (needs Selenium) - All sources tracked in unified scan loop
88 lines
3.5 KiB
Python
88 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Craigslist Scanner for Cast Iron
|
|
Scans Craigslist for cast iron cookware deals
|
|
"""
|
|
import requests
|
|
from datetime import datetime
|
|
import re
|
|
|
|
def search_craigslist_cast_iron(locations=None):
|
|
"""
|
|
Search Craigslist for cast iron items
|
|
locations: list of Craigslist location codes (e.g., 'atl', 'nyc', 'la')
|
|
"""
|
|
if locations is None:
|
|
# Major metro areas with active cast iron markets
|
|
locations = [
|
|
'atlanta', 'austin', 'boston', 'charleston', 'chicago',
|
|
'dallas', 'denver', 'detroit', 'houston', 'kansas',
|
|
'lasvegas', 'losangeles', 'miami', 'minneapolis', 'nashville',
|
|
'newjersey', 'newyork', 'orangecounty', 'philadelphia',
|
|
'phoenix', 'pittsburgh', 'portland', 'raleigh', 'sacramento',
|
|
'sandiego', 'sf', 'seattle', 'stlouis', 'tampa', 'washingtondc'
|
|
]
|
|
|
|
items = []
|
|
|
|
search_query = "cast iron skillet"
|
|
|
|
for location in locations[:5]: # Start with first 5 to avoid rate limiting
|
|
try:
|
|
url = f"https://{location}.craigslist.org/search/sss?query={search_query.replace(' ', '%20')}"
|
|
|
|
response = requests.get(url, headers={
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
|
|
}, timeout=10)
|
|
|
|
if response.status_code == 200:
|
|
# Parse HTML for listings
|
|
# Craigslist structure: each result is in a div.result-row
|
|
from bs4 import BeautifulSoup
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
results = soup.find_all('li', class_='result-row')
|
|
|
|
for result in results[:10]: # Top 10 per location
|
|
try:
|
|
title_elem = result.find('a', class_='result-title')
|
|
if not title_elem:
|
|
continue
|
|
|
|
title = title_elem.text
|
|
link = title_elem['href']
|
|
price_text = result.find('span', class_='result-price')
|
|
price = 0
|
|
|
|
if price_text:
|
|
price_match = re.search(r'\$?([\d,]+)', price_text.text)
|
|
if price_match:
|
|
price = float(price_match.group(1).replace(',', ''))
|
|
|
|
# Extract location
|
|
loc_elem = result.find('span', class_='result-hood')
|
|
loc = loc_elem.text.strip() if loc_elem else location
|
|
|
|
items.append({
|
|
'title': title,
|
|
'price': price,
|
|
'link': link,
|
|
'source': f'Craigslist ({location})',
|
|
'location': loc,
|
|
'found_at': datetime.now().isoformat()
|
|
})
|
|
except Exception as e:
|
|
continue
|
|
|
|
except Exception as e:
|
|
print(f"Error scanning Craigslist {location}: {e}")
|
|
|
|
return items
|
|
|
|
if __name__ == "__main__":
|
|
print("🔍 Scanning Craigslist for cast iron...")
|
|
items = search_craigslist_cast_iron()
|
|
print(f"Found {len(items)} items")
|
|
for item in items[:5]:
|
|
print(f" - {item['title'][:50]} - ${item['price']} ({item['source']})")
|