diff --git a/README.md b/README.md index 9f8c057..6095195 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,7 @@ Here is a comprehensive example: "background": "ollama,qwen2.5-coder:latest", "think": "deepseek,deepseek-reasoner", "longContext": "openrouter,google/gemini-2.5-pro-preview", + "longContextThreshold": 60000, "webSearch": "gemini,gemini-2.5-flash" } } @@ -260,6 +261,7 @@ The `Router` object defines which model to use for different scenarios: - `background`: A model for background tasks. This can be a smaller, local model to save costs. - `think`: A model for reasoning-heavy tasks, like Plan Mode. - `longContext`: A model for handling long contexts (e.g., > 60K tokens). +- `longContextThreshold` (optional): The token count threshold for triggering the long context model. Defaults to 60000 if not specified. - `webSearch`: Used for handling web search tasks and this requires the model itself to support the feature. If you're using openrouter, you need to add the `:online` suffix after the model name. You can also switch models dynamically in Claude Code with the `/model` command: diff --git a/README_zh.md b/README_zh.md index b000a53..28ae60d 100644 --- a/README_zh.md +++ b/README_zh.md @@ -139,6 +139,7 @@ npm install -g @musistudio/claude-code-router "background": "ollama,qwen2.5-coder:latest", "think": "deepseek,deepseek-reasoner", "longContext": "openrouter,google/gemini-2.5-pro-preview", + "longContextThreshold": 60000, "webSearch": "gemini,gemini-2.5-flash" } } @@ -255,6 +256,7 @@ Transformers 允许您修改请求和响应负载,以确保与不同提供商 - `background`: 用于后台任务的模型。这可以是一个较小的本地模型以节省成本。 - `think`: 用于推理密集型任务(如计划模式)的模型。 - `longContext`: 用于处理长上下文(例如,> 60K 令牌)的模型。 +- `longContextThreshold` (可选): 触发长上下文模型的令牌数阈值。如果未指定,默认为 60000。 - `webSearch`: 用于处理网络搜索任务,需要模型本身支持。如果使用`openrouter`需要在模型后面加上`:online`后缀。 您还可以使用 `/model` 命令在 Claude Code 中动态切换模型: diff --git a/config.example.json b/config.example.json index 03b38ec..631d1ad 100644 --- a/config.example.json +++ b/config.example.json @@ -105,7 +105,9 @@ "default": "deepseek,deepseek-chat", "background": "ollama,qwen2.5-coder:latest", "think": "deepseek,deepseek-reasoner", - "longContext": "openrouter,google/gemini-2.5-pro-preview" + "longContext": "openrouter,google/gemini-2.5-pro-preview", + "longContextThreshold": 60000, + "webSearch": "gemini,gemini-2.5-flash" }, "APIKEY": "your-secret-key", "HOST": "0.0.0.0" diff --git a/src/utils/router.ts b/src/utils/router.ts index 00098d6..87cca28 100644 --- a/src/utils/router.ts +++ b/src/utils/router.ts @@ -68,9 +68,10 @@ const getUseModel = async (req: any, tokenCount: number, config: any) => { if (req.body.model.includes(",")) { return req.body.model; } - // if tokenCount is greater than 60K, use the long context model - if (tokenCount > 1000 * 60 && config.Router.longContext) { - log("Using long context model due to token count:", tokenCount); + // if tokenCount is greater than the configured threshold, use the long context model + const longContextThreshold = config.Router.longContextThreshold || 60000; + if (tokenCount > longContextThreshold && config.Router.longContext) { + log("Using long context model due to token count:", tokenCount, "threshold:", longContextThreshold); return config.Router.longContext; } // If the model is claude-3-5-haiku, use the background model