feat: Add Modal API fallback for scraping

- Created modal_api.py module for Modal GLM API integration
- Provides fallback when direct scraping fails
- Can parse eBay listings and other marketplace data
- Configuration via modal_config.json
- Ready to use when direct scraping is blocked

API endpoint configured:
- Model: zai-org/GLM-5.1-FP8
- Endpoint: https://api.us-west-2.modal.direct/v1/chat/completions
- Usage: scrape_with_modal(url, task)
This commit is contained in:
2026-04-10 18:36:36 -04:00
parent 4bd829ca8c
commit 18e99dcc3d

View File

@@ -0,0 +1,140 @@
#!/usr/bin/env python3
"""
Modal API Fallback
Use Modal's GLM API for scraping assistance when direct scraping fails
"""
import requests
import json
from pathlib import Path
CONFIG_FILE = Path(__file__).parent / "modal_config.json"
def get_modal_token():
"""Get Modal API token from config"""
if CONFIG_FILE.exists():
config = json.loads(CONFIG_FILE.read_text())
return config.get('token')
return None
def save_modal_token(token):
"""Save Modal API token"""
CONFIG_FILE.write_text(json.dumps({'token': token}, indent=2))
def scrape_with_modal(url, task="Extract product information including title, price, and condition"):
"""
Use Modal API to scrape a URL
Args:
url: URL to scrape
task: What to extract from the page
Returns:
dict with extracted data or None if failed
"""
token = get_modal_token()
if not token:
print("❌ Modal API token not configured")
return None
try:
response = requests.post(
"https://api.us-west-2.modal.direct/v1/chat/completions",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {token}"
},
json={
"model": "zai-org/GLM-5.1-FP8",
"messages": [
{
"role": "system",
"content": "You are a web scraping assistant. Extract structured data from web pages. Return JSON only."
},
{
"role": "user",
"content": f"{task}\n\nURL: {url}\n\nReturn: title, price, condition, description"
}
],
"max_tokens": 1000
},
timeout=30
)
if response.status_code == 200:
data = response.json()
content = data['choices'][0]['message']['content']
# Try to parse as JSON
try:
return json.loads(content)
except:
return {"raw_response": content}
else:
print(f"❌ Modal API error: {response.status_code} - {response.text}")
return None
except Exception as e:
print(f"❌ Modal API exception: {e}")
return None
def parse_ebay_listing(html_content):
"""
Use Modal API to parse eBay HTML content
Returns structured listing data
"""
token = get_modal_token()
if not token:
return None
try:
response = requests.post(
"https://api.us-west-2.modal.direct/v1/chat/completions",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {token}"
},
json={
"model": "zai-org/GLM-5.1-FP8",
"messages": [
{
"role": "system",
"content": "You are an eBay listing parser. Extract product data from HTML. Return ONLY valid JSON."
},
{
"role": "user",
"content": f"Extract from this eBay HTML: title, price, seller, condition, image_url, listing_url\n\nHTML: {html_content[:5000]}"
}
],
"max_tokens": 500
},
timeout=30
)
if response.status_code == 200:
data = response.json()
content = data['choices'][0]['message']['content']
try:
return json.loads(content)
except:
return None
else:
return None
except Exception as e:
print(f"Error: {e}")
return None
if __name__ == "__main__":
# Test the API
print("Testing Modal API...")
token = input("Enter your Modal API token: ")
save_modal_token(token)
result = scrape_with_modal(
"https://www.ebay.com/sch/i.html?_nkw=griswold+skillet",
"Find cast iron skillet listings with prices"
)
if result:
print(f"✅ Success: {result}")
else:
print("❌ Failed")