Fix AI timeout and token limits for thinking model (kimi-k2.5)
- Increase max_tokens from 4096 to 16384 to accommodate reasoning tokens - Increase timeout from 90s to 180s for thinking model latency - Add logging for response diagnostics (content length, reasoning, finish reason) - Better error message when model exhausts tokens on reasoning Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -426,9 +426,11 @@ Based on this complete financial picture, provide your investment recommendation
|
||||
model,
|
||||
messages,
|
||||
temperature: 0.3,
|
||||
max_tokens: 4096,
|
||||
// High token limit to accommodate thinking models (e.g. kimi-k2.5)
|
||||
// which use tokens for internal reasoning before generating output
|
||||
max_tokens: 16384,
|
||||
}),
|
||||
signal: AbortSignal.timeout(90000), // 90 second timeout
|
||||
signal: AbortSignal.timeout(180000), // 3 minute timeout for thinking models
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
@@ -438,10 +440,15 @@ Based on this complete financial picture, provide your investment recommendation
|
||||
}
|
||||
|
||||
const data = await response.json() as any;
|
||||
const content = data.choices?.[0]?.message?.content;
|
||||
const msg = data.choices?.[0]?.message;
|
||||
// Thinking models (kimi-k2.5) may return content in 'content' or
|
||||
// spend all tokens on 'reasoning_content' with content=null
|
||||
const content = msg?.content || null;
|
||||
|
||||
this.logger.log(`AI response: content=${content ? content.length + ' chars' : 'null'}, reasoning=${msg?.reasoning_content ? 'yes' : 'no'}, finish=${data.choices?.[0]?.finish_reason}`);
|
||||
|
||||
if (!content) {
|
||||
throw new Error('Empty response from AI API');
|
||||
throw new Error('AI model returned empty content — it may have exhausted tokens on reasoning. Try a non-thinking model or increase max_tokens.');
|
||||
}
|
||||
|
||||
// Parse the JSON response — handle potential markdown code fences
|
||||
|
||||
Reference in New Issue
Block a user