fix(core): detect vLLM context overflow errors (#17763)
Co-authored-by: Aiden Cline <63023139+rekram1-node@users.noreply.github.com>pull/12450/merge
parent
1b86c27fb8
commit
56102ff642
|
|
@ -13,7 +13,7 @@ export namespace ProviderError {
|
|||
/input token count.*exceeds the maximum/i, // Google (Gemini)
|
||||
/maximum prompt length is \d+/i, // xAI (Grok)
|
||||
/reduce the length of the messages/i, // Groq
|
||||
/maximum context length is \d+ tokens/i, // OpenRouter, DeepSeek
|
||||
/maximum context length is \d+ tokens/i, // OpenRouter, DeepSeek, vLLM
|
||||
/exceeds the limit of \d+/i, // GitHub Copilot
|
||||
/exceeds the available context size/i, // llama.cpp server
|
||||
/greater than the context length/i, // LM Studio
|
||||
|
|
@ -21,6 +21,8 @@ export namespace ProviderError {
|
|||
/exceeded model token limit/i, // Kimi For Coding, Moonshot
|
||||
/context[_ ]length[_ ]exceeded/i, // Generic fallback
|
||||
/request entity too large/i, // HTTP 413
|
||||
/context length is only \d+ tokens/i, // vLLM
|
||||
/input length.*exceeds.*context length/i, // vLLM
|
||||
]
|
||||
|
||||
function isOpenAiErrorRetryable(e: APICallError) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue