From 18e99dcc3deaa24867b4c8612fb13aa1bfa1ff36 Mon Sep 17 00:00:00 2001 From: olsch01 Date: Fri, 10 Apr 2026 18:36:36 -0400 Subject: [PATCH] feat: Add Modal API fallback for scraping - Created modal_api.py module for Modal GLM API integration - Provides fallback when direct scraping fails - Can parse eBay listings and other marketplace data - Configuration via modal_config.json - Ready to use when direct scraping is blocked API endpoint configured: - Model: zai-org/GLM-5.1-FP8 - Endpoint: https://api.us-west-2.modal.direct/v1/chat/completions - Usage: scrape_with_modal(url, task) --- agents/cast-iron-scout/modal_api.py | 140 ++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 agents/cast-iron-scout/modal_api.py diff --git a/agents/cast-iron-scout/modal_api.py b/agents/cast-iron-scout/modal_api.py new file mode 100644 index 0000000..4b35fdb --- /dev/null +++ b/agents/cast-iron-scout/modal_api.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +""" +Modal API Fallback +Use Modal's GLM API for scraping assistance when direct scraping fails +""" +import requests +import json +from pathlib import Path + +CONFIG_FILE = Path(__file__).parent / "modal_config.json" + +def get_modal_token(): + """Get Modal API token from config""" + if CONFIG_FILE.exists(): + config = json.loads(CONFIG_FILE.read_text()) + return config.get('token') + return None + +def save_modal_token(token): + """Save Modal API token""" + CONFIG_FILE.write_text(json.dumps({'token': token}, indent=2)) + +def scrape_with_modal(url, task="Extract product information including title, price, and condition"): + """ + Use Modal API to scrape a URL + + Args: + url: URL to scrape + task: What to extract from the page + + Returns: + dict with extracted data or None if failed + """ + token = get_modal_token() + if not token: + print("❌ Modal API token not configured") + return None + + try: + response = requests.post( + "https://api.us-west-2.modal.direct/v1/chat/completions", + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {token}" + }, + json={ + "model": "zai-org/GLM-5.1-FP8", + "messages": [ + { + "role": "system", + "content": "You are a web scraping assistant. Extract structured data from web pages. Return JSON only." + }, + { + "role": "user", + "content": f"{task}\n\nURL: {url}\n\nReturn: title, price, condition, description" + } + ], + "max_tokens": 1000 + }, + timeout=30 + ) + + if response.status_code == 200: + data = response.json() + content = data['choices'][0]['message']['content'] + # Try to parse as JSON + try: + return json.loads(content) + except: + return {"raw_response": content} + else: + print(f"❌ Modal API error: {response.status_code} - {response.text}") + return None + + except Exception as e: + print(f"❌ Modal API exception: {e}") + return None + +def parse_ebay_listing(html_content): + """ + Use Modal API to parse eBay HTML content + Returns structured listing data + """ + token = get_modal_token() + if not token: + return None + + try: + response = requests.post( + "https://api.us-west-2.modal.direct/v1/chat/completions", + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {token}" + }, + json={ + "model": "zai-org/GLM-5.1-FP8", + "messages": [ + { + "role": "system", + "content": "You are an eBay listing parser. Extract product data from HTML. Return ONLY valid JSON." + }, + { + "role": "user", + "content": f"Extract from this eBay HTML: title, price, seller, condition, image_url, listing_url\n\nHTML: {html_content[:5000]}" + } + ], + "max_tokens": 500 + }, + timeout=30 + ) + + if response.status_code == 200: + data = response.json() + content = data['choices'][0]['message']['content'] + try: + return json.loads(content) + except: + return None + else: + return None + + except Exception as e: + print(f"Error: {e}") + return None + +if __name__ == "__main__": + # Test the API + print("Testing Modal API...") + token = input("Enter your Modal API token: ") + save_modal_token(token) + + result = scrape_with_modal( + "https://www.ebay.com/sch/i.html?_nkw=griswold+skillet", + "Find cast iron skillet listings with prices" + ) + + if result: + print(f"✅ Success: {result}") + else: + print("❌ Failed")