From 3bbfebb5e311b54d27f27b177e9c43a78c7896b7 Mon Sep 17 00:00:00 2001 From: JoeChen Date: Sat, 26 Jul 2025 12:13:55 +0800 Subject: [PATCH] =?UTF-8?q?Feature:=20=E6=B7=BB=E5=8A=A0=E5=8F=AF=E9=80=89?= =?UTF-8?q?=E7=9A=84=E9=95=BF=E4=B8=8A=E4=B8=8B=E6=96=87=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E9=98=88=E5=80=BC=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 ++ README_zh.md | 2 ++ config.example.json | 4 +++- src/utils/router.ts | 7 ++++--- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 9f8c057..6095195 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,7 @@ Here is a comprehensive example: "background": "ollama,qwen2.5-coder:latest", "think": "deepseek,deepseek-reasoner", "longContext": "openrouter,google/gemini-2.5-pro-preview", + "longContextThreshold": 60000, "webSearch": "gemini,gemini-2.5-flash" } } @@ -260,6 +261,7 @@ The `Router` object defines which model to use for different scenarios: - `background`: A model for background tasks. This can be a smaller, local model to save costs. - `think`: A model for reasoning-heavy tasks, like Plan Mode. - `longContext`: A model for handling long contexts (e.g., > 60K tokens). +- `longContextThreshold` (optional): The token count threshold for triggering the long context model. Defaults to 60000 if not specified. - `webSearch`: Used for handling web search tasks and this requires the model itself to support the feature. If you're using openrouter, you need to add the `:online` suffix after the model name. You can also switch models dynamically in Claude Code with the `/model` command: diff --git a/README_zh.md b/README_zh.md index b000a53..28ae60d 100644 --- a/README_zh.md +++ b/README_zh.md @@ -139,6 +139,7 @@ npm install -g @musistudio/claude-code-router "background": "ollama,qwen2.5-coder:latest", "think": "deepseek,deepseek-reasoner", "longContext": "openrouter,google/gemini-2.5-pro-preview", + "longContextThreshold": 60000, "webSearch": "gemini,gemini-2.5-flash" } } @@ -255,6 +256,7 @@ Transformers 允许您修改请求和响应负载,以确保与不同提供商 - `background`: 用于后台任务的模型。这可以是一个较小的本地模型以节省成本。 - `think`: 用于推理密集型任务(如计划模式)的模型。 - `longContext`: 用于处理长上下文(例如,> 60K 令牌)的模型。 +- `longContextThreshold` (可选): 触发长上下文模型的令牌数阈值。如果未指定,默认为 60000。 - `webSearch`: 用于处理网络搜索任务,需要模型本身支持。如果使用`openrouter`需要在模型后面加上`:online`后缀。 您还可以使用 `/model` 命令在 Claude Code 中动态切换模型: diff --git a/config.example.json b/config.example.json index 03b38ec..631d1ad 100644 --- a/config.example.json +++ b/config.example.json @@ -105,7 +105,9 @@ "default": "deepseek,deepseek-chat", "background": "ollama,qwen2.5-coder:latest", "think": "deepseek,deepseek-reasoner", - "longContext": "openrouter,google/gemini-2.5-pro-preview" + "longContext": "openrouter,google/gemini-2.5-pro-preview", + "longContextThreshold": 60000, + "webSearch": "gemini,gemini-2.5-flash" }, "APIKEY": "your-secret-key", "HOST": "0.0.0.0" diff --git a/src/utils/router.ts b/src/utils/router.ts index 00098d6..87cca28 100644 --- a/src/utils/router.ts +++ b/src/utils/router.ts @@ -68,9 +68,10 @@ const getUseModel = async (req: any, tokenCount: number, config: any) => { if (req.body.model.includes(",")) { return req.body.model; } - // if tokenCount is greater than 60K, use the long context model - if (tokenCount > 1000 * 60 && config.Router.longContext) { - log("Using long context model due to token count:", tokenCount); + // if tokenCount is greater than the configured threshold, use the long context model + const longContextThreshold = config.Router.longContextThreshold || 60000; + if (tokenCount > longContextThreshold && config.Router.longContext) { + log("Using long context model due to token count:", tokenCount, "threshold:", longContextThreshold); return config.Router.longContext; } // If the model is claude-3-5-haiku, use the background model