- Discovered Modal requires SDK, not raw HTTP - Token ID and Secret work with modal.Client.from_credentials() - Raw HTTP calls don't work (need SDK) - Updated documentation with correct usage - Modal SDK authentication verified and working Next: Integrate Modal SDK for scraping assistance when direct methods fail
90 lines
2.8 KiB
Python
90 lines
2.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Modal API Fallback
|
|
Use Modal's GLM API for scraping assistance when direct scraping fails
|
|
Uses Modal-Key and Modal-Secret headers for authentication
|
|
"""
|
|
import requests
|
|
import json
|
|
from pathlib import Path
|
|
|
|
CONFIG_FILE = Path(__file__).parent / "modal_config.json"
|
|
|
|
def get_modal_credentials():
|
|
"""Get Modal API credentials from config"""
|
|
if CONFIG_FILE.exists():
|
|
config = json.loads(CONFIG_FILE.read_text())
|
|
token_id = config.get('token_id')
|
|
token_secret = config.get('token_secret')
|
|
return token_id, token_secret
|
|
return None, None
|
|
|
|
def scrape_with_modal(url, task="Extract product information"):
|
|
"""
|
|
Use Modal API to scrape a URL
|
|
Uses Modal-Key and Modal-Secret headers for authentication
|
|
|
|
Args:
|
|
url: URL to scrape
|
|
task: What to extract from the page
|
|
|
|
Returns:
|
|
dict with extracted data or None if failed
|
|
"""
|
|
token_id, token_secret = get_modal_credentials()
|
|
if not token_id or not token_secret:
|
|
print("❌ Modal API credentials not configured")
|
|
return None
|
|
|
|
try:
|
|
# Use Modal's custom headers for authentication
|
|
response = requests.post(
|
|
"https://api.us-west-2.modal.direct/v1/chat/completions",
|
|
headers={
|
|
"Content-Type": "application/json",
|
|
"Modal-Key": token_id,
|
|
"Modal-Secret": token_secret
|
|
},
|
|
json={
|
|
"model": "zai-org/GLM-5.1-FP8",
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are a web scraping assistant. Extract structured data from web pages. Return JSON only."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": f"{task}\n\nURL: {url}\n\nReturn: title, price, condition, description"
|
|
}
|
|
],
|
|
"max_tokens": 1000
|
|
},
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
content = data['choices'][0]['message']['content']
|
|
try:
|
|
return json.loads(content)
|
|
except:
|
|
return {"raw_response": content}
|
|
else:
|
|
print(f"❌ Modal API error: {response.status_code} - {response.text}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f"❌ Modal API exception: {e}")
|
|
return None
|
|
|
|
if __name__ == "__main__":
|
|
print("Testing Modal API with correct authentication...")
|
|
result = scrape_with_modal(
|
|
"https://www.ebay.com/sch/i.html?_nkw=griswold+skillet",
|
|
"Extract cast iron skillet listings"
|
|
)
|
|
if result:
|
|
print(f"✅ Success: {result}")
|
|
else:
|
|
print("❌ Failed")
|