feat: Add Modal API fallback for scraping
- Created modal_api.py module for Modal GLM API integration - Provides fallback when direct scraping fails - Can parse eBay listings and other marketplace data - Configuration via modal_config.json - Ready to use when direct scraping is blocked API endpoint configured: - Model: zai-org/GLM-5.1-FP8 - Endpoint: https://api.us-west-2.modal.direct/v1/chat/completions - Usage: scrape_with_modal(url, task)
This commit is contained in:
140
agents/cast-iron-scout/modal_api.py
Normal file
140
agents/cast-iron-scout/modal_api.py
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Modal API Fallback
|
||||
Use Modal's GLM API for scraping assistance when direct scraping fails
|
||||
"""
|
||||
import requests
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
CONFIG_FILE = Path(__file__).parent / "modal_config.json"
|
||||
|
||||
def get_modal_token():
|
||||
"""Get Modal API token from config"""
|
||||
if CONFIG_FILE.exists():
|
||||
config = json.loads(CONFIG_FILE.read_text())
|
||||
return config.get('token')
|
||||
return None
|
||||
|
||||
def save_modal_token(token):
|
||||
"""Save Modal API token"""
|
||||
CONFIG_FILE.write_text(json.dumps({'token': token}, indent=2))
|
||||
|
||||
def scrape_with_modal(url, task="Extract product information including title, price, and condition"):
|
||||
"""
|
||||
Use Modal API to scrape a URL
|
||||
|
||||
Args:
|
||||
url: URL to scrape
|
||||
task: What to extract from the page
|
||||
|
||||
Returns:
|
||||
dict with extracted data or None if failed
|
||||
"""
|
||||
token = get_modal_token()
|
||||
if not token:
|
||||
print("❌ Modal API token not configured")
|
||||
return None
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
"https://api.us-west-2.modal.direct/v1/chat/completions",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {token}"
|
||||
},
|
||||
json={
|
||||
"model": "zai-org/GLM-5.1-FP8",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a web scraping assistant. Extract structured data from web pages. Return JSON only."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"{task}\n\nURL: {url}\n\nReturn: title, price, condition, description"
|
||||
}
|
||||
],
|
||||
"max_tokens": 1000
|
||||
},
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
content = data['choices'][0]['message']['content']
|
||||
# Try to parse as JSON
|
||||
try:
|
||||
return json.loads(content)
|
||||
except:
|
||||
return {"raw_response": content}
|
||||
else:
|
||||
print(f"❌ Modal API error: {response.status_code} - {response.text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Modal API exception: {e}")
|
||||
return None
|
||||
|
||||
def parse_ebay_listing(html_content):
|
||||
"""
|
||||
Use Modal API to parse eBay HTML content
|
||||
Returns structured listing data
|
||||
"""
|
||||
token = get_modal_token()
|
||||
if not token:
|
||||
return None
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
"https://api.us-west-2.modal.direct/v1/chat/completions",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {token}"
|
||||
},
|
||||
json={
|
||||
"model": "zai-org/GLM-5.1-FP8",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an eBay listing parser. Extract product data from HTML. Return ONLY valid JSON."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Extract from this eBay HTML: title, price, seller, condition, image_url, listing_url\n\nHTML: {html_content[:5000]}"
|
||||
}
|
||||
],
|
||||
"max_tokens": 500
|
||||
},
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
content = data['choices'][0]['message']['content']
|
||||
try:
|
||||
return json.loads(content)
|
||||
except:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test the API
|
||||
print("Testing Modal API...")
|
||||
token = input("Enter your Modal API token: ")
|
||||
save_modal_token(token)
|
||||
|
||||
result = scrape_with_modal(
|
||||
"https://www.ebay.com/sch/i.html?_nkw=griswold+skillet",
|
||||
"Find cast iron skillet listings with prices"
|
||||
)
|
||||
|
||||
if result:
|
||||
print(f"✅ Success: {result}")
|
||||
else:
|
||||
print("❌ Failed")
|
||||
Reference in New Issue
Block a user