HOALedgerIQ_Website/agents/cast-iron-scout/sources/craigslist_scanner.py

#!/usr/bin/env python3
"""
Craigslist Scanner for Cast Iron
Scans Craigslist for cast iron cookware deals
"""
import requests
from datetime import datetime
import re

def search_craigslist_cast_iron(locations=None):
    """
    Search Craigslist for cast iron items
    locations: list of Craigslist location codes (e.g., 'atl', 'nyc', 'la')
    """
    if locations is None:
        # Major metro areas with active cast iron markets
        locations = [
            'atlanta', 'austin', 'boston', 'charleston', 'chicago',
            'dallas', 'denver', 'detroit', 'houston', 'kansas',
            'lasvegas', 'losangeles', 'miami', 'minneapolis', 'nashville',
            'newjersey', 'newyork', 'orangecounty', 'philadelphia',
            'phoenix', 'pittsburgh', 'portland', 'raleigh', 'sacramento',
            'sandiego', 'sf', 'seattle', 'stlouis', 'tampa', 'washingtondc'
        ]

    items = []

    search_query = "cast iron skillet"

    for location in locations[:5]:  # Start with first 5 to avoid rate limiting
        try:
            url = f"https://{location}.craigslist.org/search/sss?query={search_query.replace(' ', '%20')}"

            response = requests.get(url, headers={
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
            }, timeout=10)

            if response.status_code == 200:
                # Parse HTML for listings
                # Craigslist structure: each result is in a div.result-row
                from bs4 import BeautifulSoup
                soup = BeautifulSoup(response.text, 'html.parser')

                results = soup.find_all('li', class_='result-row')

                for result in results[:10]:  # Top 10 per location
                    try:
                        title_elem = result.find('a', class_='result-title')
                        if not title_elem:
                            continue

                        title = title_elem.text
                        link = title_elem['href']
                        price_text = result.find('span', class_='result-price')
                        price = 0

                        if price_text:
                            price_match = re.search(r'\$?([\d,]+)', price_text.text)
                            if price_match:
                                price = float(price_match.group(1).replace(',', ''))

                        # Extract location
                        loc_elem = result.find('span', class_='result-hood')
                        loc = loc_elem.text.strip() if loc_elem else location

                        items.append({
                            'title': title,
                            'price': price,
                            'link': link,
                            'source': f'Craigslist ({location})',
                            'location': loc,
                            'found_at': datetime.now().isoformat()
                        })
                    except Exception as e:
                        continue

        except Exception as e:
            print(f"Error scanning Craigslist {location}: {e}")

    return items

if __name__ == "__main__":
    print("🔍 Scanning Craigslist for cast iron...")
    items = search_craigslist_cast_iron()
    print(f"Found {len(items)} items")
    for item in items[:5]:
        print(f"  - {item['title'][:50]} - ${item['price']} ({item['source']})")