Feature: 添加可选的长上下文模型阈值配置

This commit is contained in:
JoeChen
2025-07-26 12:13:55 +08:00
parent 6883fff352
commit 3bbfebb5e3
4 changed files with 11 additions and 4 deletions

View File

@@ -143,6 +143,7 @@ Here is a comprehensive example:
"background": "ollama,qwen2.5-coder:latest",
"think": "deepseek,deepseek-reasoner",
"longContext": "openrouter,google/gemini-2.5-pro-preview",
"longContextThreshold": 60000,
"webSearch": "gemini,gemini-2.5-flash"
}
}
@@ -260,6 +261,7 @@ The `Router` object defines which model to use for different scenarios:
- `background`: A model for background tasks. This can be a smaller, local model to save costs.
- `think`: A model for reasoning-heavy tasks, like Plan Mode.
- `longContext`: A model for handling long contexts (e.g., > 60K tokens).
- `longContextThreshold` (optional): The token count threshold for triggering the long context model. Defaults to 60000 if not specified.
- `webSearch`: Used for handling web search tasks and this requires the model itself to support the feature. If you're using openrouter, you need to add the `:online` suffix after the model name.
You can also switch models dynamically in Claude Code with the `/model` command:

View File

@@ -139,6 +139,7 @@ npm install -g @musistudio/claude-code-router
"background": "ollama,qwen2.5-coder:latest",
"think": "deepseek,deepseek-reasoner",
"longContext": "openrouter,google/gemini-2.5-pro-preview",
"longContextThreshold": 60000,
"webSearch": "gemini,gemini-2.5-flash"
}
}
@@ -255,6 +256,7 @@ Transformers 允许您修改请求和响应负载,以确保与不同提供商
- `background`: 用于后台任务的模型。这可以是一个较小的本地模型以节省成本。
- `think`: 用于推理密集型任务(如计划模式)的模型。
- `longContext`: 用于处理长上下文(例如,> 60K 令牌)的模型。
- `longContextThreshold` (可选): 触发长上下文模型的令牌数阈值。如果未指定,默认为 60000。
- `webSearch`: 用于处理网络搜索任务,需要模型本身支持。如果使用`openrouter`需要在模型后面加上`:online`后缀。
您还可以使用 `/model` 命令在 Claude Code 中动态切换模型:

View File

@@ -105,7 +105,9 @@
"default": "deepseek,deepseek-chat",
"background": "ollama,qwen2.5-coder:latest",
"think": "deepseek,deepseek-reasoner",
"longContext": "openrouter,google/gemini-2.5-pro-preview"
"longContext": "openrouter,google/gemini-2.5-pro-preview",
"longContextThreshold": 60000,
"webSearch": "gemini,gemini-2.5-flash"
},
"APIKEY": "your-secret-key",
"HOST": "0.0.0.0"

View File

@@ -68,9 +68,10 @@ const getUseModel = async (req: any, tokenCount: number, config: any) => {
if (req.body.model.includes(",")) {
return req.body.model;
}
// if tokenCount is greater than 60K, use the long context model
if (tokenCount > 1000 * 60 && config.Router.longContext) {
log("Using long context model due to token count:", tokenCount);
// if tokenCount is greater than the configured threshold, use the long context model
const longContextThreshold = config.Router.longContextThreshold || 60000;
if (tokenCount > longContextThreshold && config.Router.longContext) {
log("Using long context model due to token count:", tokenCount, "threshold:", longContextThreshold);
return config.Router.longContext;
}
// If the model is claude-3-5-haiku, use the background model