Feature: 添加可选的长上下文模型阈值配置

2025-07-26 12:13:55 +08:00
parent 6883fff352
commit 3bbfebb5e3
4 changed files with 11 additions and 4 deletions
--- a/README.md
+++ b/README.md
@@ -143,6 +143,7 @@ Here is a comprehensive example:
    "background": "ollama,qwen2.5-coder:latest",
    "think": "deepseek,deepseek-reasoner",
    "longContext": "openrouter,google/gemini-2.5-pro-preview",
+    "longContextThreshold": 60000,
    "webSearch": "gemini,gemini-2.5-flash"
  }
 }
@@ -260,6 +261,7 @@ The `Router` object defines which model to use for different scenarios:
 - `background`: A model for background tasks. This can be a smaller, local model to save costs.
 - `think`: A model for reasoning-heavy tasks, like Plan Mode.
 - `longContext`: A model for handling long contexts (e.g., > 60K tokens).
+- `longContextThreshold` (optional): The token count threshold for triggering the long context model. Defaults to 60000 if not specified.
 - `webSearch`: Used for handling web search tasks and this requires the model itself to support the feature. If you're using openrouter, you need to add the `:online` suffix after the model name.

 You can also switch models dynamically in Claude Code with the `/model` command:
--- a/README_zh.md
+++ b/README_zh.md
@@ -139,6 +139,7 @@ npm install -g @musistudio/claude-code-router
    "background": "ollama,qwen2.5-coder:latest",
    "think": "deepseek,deepseek-reasoner",
    "longContext": "openrouter,google/gemini-2.5-pro-preview",
+    "longContextThreshold": 60000,
    "webSearch": "gemini,gemini-2.5-flash"
  }
 }
@@ -255,6 +256,7 @@ Transformers 允许您修改请求和响应负载，以确保与不同提供商
 -   `background`: 用于后台任务的模型。这可以是一个较小的本地模型以节省成本。
 -   `think`: 用于推理密集型任务（如计划模式）的模型。
 -   `longContext`: 用于处理长上下文（例如，> 60K 令牌）的模型。
+-   `longContextThreshold` (可选): 触发长上下文模型的令牌数阈值。如果未指定，默认为 60000。
 -   `webSearch`: 用于处理网络搜索任务，需要模型本身支持。如果使用`openrouter`需要在模型后面加上`:online`后缀。

 您还可以使用 `/model` 命令在 Claude Code 中动态切换模型：
--- a/config.example.json
+++ b/config.example.json
@@ -105,7 +105,9 @@
    "default": "deepseek,deepseek-chat",
    "background": "ollama,qwen2.5-coder:latest",
    "think": "deepseek,deepseek-reasoner",
-    "longContext": "openrouter,google/gemini-2.5-pro-preview"
+    "longContext": "openrouter,google/gemini-2.5-pro-preview",
+    "longContextThreshold": 60000,
+    "webSearch": "gemini,gemini-2.5-flash"
  },
  "APIKEY": "your-secret-key",
  "HOST": "0.0.0.0"
--- a/src/utils/router.ts
+++ b/src/utils/router.ts
@@ -68,9 +68,10 @@ const getUseModel = async (req: any, tokenCount: number, config: any) => {
  if (req.body.model.includes(",")) {
    return req.body.model;
  }
-  // if tokenCount is greater than 60K, use the long context model
-  if (tokenCount > 1000 * 60 && config.Router.longContext) {
-    log("Using long context model due to token count:", tokenCount);
+  // if tokenCount is greater than the configured threshold, use the long context model
+  const longContextThreshold = config.Router.longContextThreshold || 60000;
+  if (tokenCount > longContextThreshold && config.Router.longContext) {
+    log("Using long context model due to token count:", tokenCount, "threshold:", longContextThreshold);
    return config.Router.longContext;
  }
  // If the model is claude-3-5-haiku, use the background model