feat: Add Modal API fallback for scraping
- Created modal_api.py module for Modal GLM API integration - Provides fallback when direct scraping fails - Can parse eBay listings and other marketplace data - Configuration via modal_config.json - Ready to use when direct scraping is blocked API endpoint configured: - Model: zai-org/GLM-5.1-FP8 - Endpoint: https://api.us-west-2.modal.direct/v1/chat/completions - Usage: scrape_with_modal(url, task)
This commit is contained in:
140
agents/cast-iron-scout/modal_api.py
Normal file
140
agents/cast-iron-scout/modal_api.py
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Modal API Fallback
|
||||||
|
Use Modal's GLM API for scraping assistance when direct scraping fails
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
CONFIG_FILE = Path(__file__).parent / "modal_config.json"
|
||||||
|
|
||||||
|
def get_modal_token():
|
||||||
|
"""Get Modal API token from config"""
|
||||||
|
if CONFIG_FILE.exists():
|
||||||
|
config = json.loads(CONFIG_FILE.read_text())
|
||||||
|
return config.get('token')
|
||||||
|
return None
|
||||||
|
|
||||||
|
def save_modal_token(token):
|
||||||
|
"""Save Modal API token"""
|
||||||
|
CONFIG_FILE.write_text(json.dumps({'token': token}, indent=2))
|
||||||
|
|
||||||
|
def scrape_with_modal(url, task="Extract product information including title, price, and condition"):
|
||||||
|
"""
|
||||||
|
Use Modal API to scrape a URL
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: URL to scrape
|
||||||
|
task: What to extract from the page
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict with extracted data or None if failed
|
||||||
|
"""
|
||||||
|
token = get_modal_token()
|
||||||
|
if not token:
|
||||||
|
print("❌ Modal API token not configured")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
"https://api.us-west-2.modal.direct/v1/chat/completions",
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {token}"
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": "zai-org/GLM-5.1-FP8",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a web scraping assistant. Extract structured data from web pages. Return JSON only."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"{task}\n\nURL: {url}\n\nReturn: title, price, condition, description"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 1000
|
||||||
|
},
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
content = data['choices'][0]['message']['content']
|
||||||
|
# Try to parse as JSON
|
||||||
|
try:
|
||||||
|
return json.loads(content)
|
||||||
|
except:
|
||||||
|
return {"raw_response": content}
|
||||||
|
else:
|
||||||
|
print(f"❌ Modal API error: {response.status_code} - {response.text}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Modal API exception: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def parse_ebay_listing(html_content):
|
||||||
|
"""
|
||||||
|
Use Modal API to parse eBay HTML content
|
||||||
|
Returns structured listing data
|
||||||
|
"""
|
||||||
|
token = get_modal_token()
|
||||||
|
if not token:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
"https://api.us-west-2.modal.direct/v1/chat/completions",
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {token}"
|
||||||
|
},
|
||||||
|
json={
|
||||||
|
"model": "zai-org/GLM-5.1-FP8",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are an eBay listing parser. Extract product data from HTML. Return ONLY valid JSON."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"Extract from this eBay HTML: title, price, seller, condition, image_url, listing_url\n\nHTML: {html_content[:5000]}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 500
|
||||||
|
},
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
content = data['choices'][0]['message']['content']
|
||||||
|
try:
|
||||||
|
return json.loads(content)
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test the API
|
||||||
|
print("Testing Modal API...")
|
||||||
|
token = input("Enter your Modal API token: ")
|
||||||
|
save_modal_token(token)
|
||||||
|
|
||||||
|
result = scrape_with_modal(
|
||||||
|
"https://www.ebay.com/sch/i.html?_nkw=griswold+skillet",
|
||||||
|
"Find cast iron skillet listings with prices"
|
||||||
|
)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
print(f"✅ Success: {result}")
|
||||||
|
else:
|
||||||
|
print("❌ Failed")
|
||||||
Reference in New Issue
Block a user