switch to llms

2025-07-02 13:24:37 +08:00
parent dba8b1e6c8
commit 30c764828a
18 changed files with 1042 additions and 2968 deletions
--- a/README.md
+++ b/README.md
@@ -29,13 +29,11 @@ ccr code
 ```json
 {
  "OPENAI_API_KEY": "sk-xxx",
  "OPENAI_BASE_URL": "https://api.deepseek.com",
  "OPENAI_MODEL": "deepseek-chat",
  "Providers": [
    {
      "name": "openrouter",
-      "api_base_url": "https://openrouter.ai/api/v1",
+      // IMPORTANT: api_base_url must be a complete (full) URL.
      "api_base_url": "https://openrouter.ai/api/v1/chat/completions",
      "api_key": "sk-xxx",
      "models": [
        "google/gemini-2.5-pro-preview",
@@ -46,18 +44,48 @@ ccr code
    },
    {
      "name": "deepseek",
-      "api_base_url": "https://api.deepseek.com",
+      // IMPORTANT: api_base_url must be a complete (full) URL.
      "api_base_url": "https://api.deepseek.com/chat/completions",
      "api_key": "sk-xxx",
-      "models": ["deepseek-reasoner"]
+      "models": ["deepseek-chat", "deepseek-reasoner"],
      "transformer": {
        "use": ["deepseek"],
        "deepseek-chat": {
          // Enhance tool usage for the deepseek-chat model using the ToolUse transformer.
          "use": ["tooluse"]
        }
      }
    },
    {
      "name": "ollama",
-      "api_base_url": "http://localhost:11434/v1",
+      // IMPORTANT: api_base_url must be a complete (full) URL.
      "api_base_url": "http://localhost:11434/v1/chat/completions",
      "api_key": "ollama",
      "models": ["qwen2.5-coder:latest"]
    },
    {
      "name": "gemini",
      // IMPORTANT: api_base_url must be a complete (full) URL.
      "api_base_url": "https://generativelanguage.googleapis.com/v1beta/models/",
      "api_key": "sk-xxx",
      "models": ["gemini-2.5-flash", "gemini-2.5-pro"],
      "transformer": {
        "use": ["gemini"]
      }
    },
    {
      "name": "volcengine",
      // IMPORTANT: api_base_url must be a complete (full) URL.
      "api_base_url": "https://ark.cn-beijing.volces.com/api/v3/chat/completions",
      "api_key": "sk-xxx",
      "models": ["deepseek-v3-250324", "deepseek-r1-250528"],
      "transformer": {
        "use": ["deepseek"]
      }
    }
  ],
  "Router": {
    "default": "deepseek,deepseek-chat", // IMPORTANT OPENAI_MODEL has been deprecated
    "background": "ollama,qwen2.5-coder:latest",
    "think": "deepseek,deepseek-reasoner",
    "longContext": "openrouter,google/gemini-2.5-pro-preview"
@@ -86,40 +114,12 @@ ccr code
 - [x] Support change models
 - [x] Github Actions
 - [ ] More robust plugin support
 - [ ] More detailed logs
 ## Plugins
 You can modify or enhance Claude Code’s functionality by installing plugins. The mechanism works by using middleware to modify request parameters — this allows you to rewrite prompts or add/remove tools.
 To use a plugin, place it in the ~/.claude-code-router/plugins/ directory and specify the plugin name in config.js using the `usePlugins` option.like this
 ```json
 // ~/.claud-code-router/config.json
 {
  ...,
  "usePlugins": ["notebook-tools-filter", "toolcall-improvement"]
 }
 ```
 Currently, the following plugins are available:
 - **notebook-tools-filter**    
 This plugin filters out tool calls related to Jupyter notebooks (.ipynb files). You can use it if your work does not involve Jupyter.
 - **toolcall-improvement**    
 If your LLM doesn’t handle tool usage well (for example, always returning code as plain text instead of modifying files — such as with deepseek-v3), you can use this plugin.    
 This plugin simply adds the following system prompt. If you have a better prompt, you can modify it.
 ```markdown
 ## **Important Instruction:**  
 You must use tools as frequently and accurately as possible to help the user solve their problem.  
 Prioritize tool usage whenever it can enhance accuracy, efficiency, or the quality of the response.
 ```
 ## Github Actions
 You just need to install `Claude Code Actions` in your repository according to the [official documentation](https://docs.anthropic.com/en/docs/claude-code/github-actions). For `ANTHROPIC_API_KEY`, you can use any string. Then, modify your `.github/workflows/claude.yaml` file to include claude-code-router, like this:
 ```yaml
 name: Claude Code
@@ -179,6 +179,7 @@ jobs:
        with:
          anthropic_api_key: "test"
 ```
 You can modify the contents of `$HOME/.claude-code-router/config.json` as needed.
 GitHub Actions support allows you to trigger Claude Code at specific times, which opens up some interesting possibilities.
@@ -190,7 +191,6 @@ For example, between 00:30 and 08:30 Beijing Time, using the official DeepSeek A
 So maybe in the future, I’ll describe detailed tasks for Claude Code ahead of time and let it run during these discounted hours to reduce costs?
 ## Some tips:
 Now you can use deepseek-v3 models directly without using any plugins.
@@ -205,7 +205,8 @@ Some interesting points: Based on my testing, including a lot of context informa
 ## Some articles:
-1. [Project Motivation and Principles](blog/en/project-motivation-and-how-it-works.md) ([中文版看这里](blog/zh/项目初衷及原理.md))
+1. [Project Motivation and Principles](blog/en/project-motivation-and-how-it-works.md) ([项目初衷及原理](blog/zh/项目初衷及原理.md))
 2. [Maybe We Can Do More with the Router](blog/en/maybe-we-can-do-more-with-the-route.md) ([或许我们能在 Router 中做更多事情](blog/zh/或许我们能在Router中做更多事情.md))
 ## Buy me a coffee
@@ -227,7 +228,13 @@ Thanks to the following sponsors:
@Simon Leischnig (If you see this, feel free to contact me and I can update it with your GitHub information)  
 [@duanshuaimin](https://github.com/duanshuaimin)  
 [@vrgitadmin](https://github.com/vrgitadmin)  
-@*o   (可通过主页邮箱联系我修改github用户名)     
+@\*o (可通过主页邮箱联系我修改 github 用户名)  
-@**聪 (可通过主页邮箱联系我修改github用户名)     
+@\*\*聪 (可通过主页邮箱联系我修改 github 用户名)  
-@*说  (可通过主页邮箱联系我修改github用户名)     
+@\*说 (可通过主页邮箱联系我修改 github 用户名)  
-@*更  (可通过主页邮箱联系我修改github用户名)     
+@\*更 (可通过主页邮箱联系我修改 github 用户名)  
@\*更 (可通过主页邮箱联系我修改 github 用户名)  
@K\*g (可通过主页邮箱联系我修改 github 用户名)  
@R\*R (可通过主页邮箱联系我修改 github 用户名)  
@[@bobleer](https://github.com/bobleer) (可通过主页邮箱联系我修改 github 用户名)  
@\*苗 (可通过主页邮箱联系我修改 github 用户名)  
@\*划 (可通过主页邮箱联系我修改 github 用户名)
--- a/blog/en/maybe-we-can-do-more-with-the-route.md
+++ b/blog/en/maybe-we-can-do-more-with-the-route.md
@@ -0,0 +1,105 @@
 # Maybe We Can Do More with the Router
 Since the release of `claude-code-router`, I’ve received a lot of user feedback, and quite a few issues are still open. Most of them are related to support for different providers and the lack of tool usage from the deepseek model.
 Originally, I created this project for personal use, mainly to access claude code at a lower cost. So, multi-provider support wasn’t part of the initial design. But during troubleshooting, I discovered that even though most providers claim to be compatible with the OpenAI-style `/chat/completions` interface, there are many subtle differences. For example:
 1. When Gemini's tool parameter type is string, the `format` field only supports `date` and `date-time`, and there’s no tool call ID.
 2. OpenRouter requires `cache_control` for caching.
 3. The official DeepSeek API has a `max_output` of 8192, but Volcano Engine’s limit is even higher.
 Aside from these, smaller providers often have quirks in their parameter handling. So I decided to create a new project, [musistudio/llms](https://github.com/musistudio/llms), to deal with these compatibility issues. It uses the OpenAI format as a base and introduces a generic Transformer interface for transforming both requests and responses.
 Once a `Transformer` is implemented for each provider, it becomes possible to mix-and-match requests between them. For example, I implemented bidirectional conversion between Anthropic and OpenAI formats in `AnthropicTransformer`, which listens to the `/v1/messages` endpoint. Similarly, `GeminiTransformer` handles Gemini <-> OpenAI format conversions and listens to `/v1beta/models/:modelAndAction`.
 When both requests and responses are transformed into a common format, they can interoperate seamlessly:
 ```
 AnthropicRequest -> AnthropicTransformer -> OpenAIRequest -> GeminiTransformer -> GeminiRequest -> GeminiServer
 ```
 ```
 GeminiResponse -> GeminiTransformer -> OpenAIResponse -> AnthropicTransformer -> AnthropicResponse
 ```
 Using a middleware layer to smooth out differences may introduce some performance overhead, but the main goal here is to enable `claude-code-router` to support multiple providers.
 As for the issue of DeepSeek’s lackluster tool usage — I found that it stems from poor instruction adherence in long conversations. Initially, the model actively calls tools, but after several rounds, it starts responding with plain text instead. My first workaround was injecting a system prompt to remind the model to use tools proactively. But in long contexts, the model tends to forget this instruction.
 After reading the DeepSeek documentation, I noticed it supports the `tool_choice` parameter, which can be set to `"required"` to force the model to use at least one tool. I tested this by enabling the parameter, and it significantly improved the model’s tool usage. We can remove the setting when it's no longer necessary. With the help of the `Transformer` interface in [musistudio/llms](https://github.com/musistudio/llms), we can modify the request before it’s sent and adjust the response after it’s received.
 Inspired by the Plan Mode in `claude code`, I implemented a similar Tool Mode for DeepSeek:
 ```typescript
 export class TooluseTransformer implements Transformer {
  name = "tooluse";
  transformRequestIn(request: UnifiedChatRequest): UnifiedChatRequest {
    if (request.tools?.length) {
      request.messages.push({
        role: "system",
        content: `<system-reminder>Tool mode is active. The user expects you to proactively execute the most suitable tool to help complete the task. 
 Before invoking a tool, you must carefully evaluate whether it matches the current task. If no available tool is appropriate for the task, you MUST call the \`ExitTool\` to exit tool mode — this is the only valid way to terminate tool mode.
 Always prioritize completing the user's task effectively and efficiently by using tools whenever appropriate.</system-reminder>`,
      });
      request.tool_choice = "required";
      request.tools.unshift({
        type: "function",
        function: {
          name: "ExitTool",
          description: `Use this tool when you are in tool mode and have completed the task. This is the only valid way to exit tool mode.
 IMPORTANT: Before using this tool, ensure that none of the available tools are applicable to the current task. You must evaluate all available options — only if no suitable tool can help you complete the task should you use ExitTool to terminate tool mode.
 Examples:
 1. Task: "Use a tool to summarize this document" — Do not use ExitTool if a summarization tool is available.
 2. Task: "What’s the weather today?" — If no tool is available to answer, use ExitTool after reasoning that none can fulfill the task.`,
          parameters: {
            type: "object",
            properties: {
              response: {
                type: "string",
                description:
                  "Your response will be forwarded to the user exactly as returned — the tool will not modify or post-process it in any way.",
              },
            },
            required: ["response"],
          },
        },
      });
    }
    return request;
  }
  async transformResponseOut(response: Response): Promise<Response> {
    if (response.headers.get("Content-Type")?.includes("application/json")) {
      const jsonResponse = await response.json();
      if (
        jsonResponse?.choices[0]?.message.tool_calls?.length &&
        jsonResponse?.choices[0]?.message.tool_calls[0]?.function?.name ===
          "ExitTool"
      ) {
        const toolArguments = JSON.parse(toolCall.function.arguments || "{}");
        jsonResponse.choices[0].message.content = toolArguments.response || "";
        delete jsonResponse.choices[0].message.tool_calls;
      }
      // Handle non-streaming response if needed
      return new Response(JSON.stringify(jsonResponse), {
        status: response.status,
        statusText: response.statusText,
        headers: response.headers,
      });
    } else if (response.headers.get("Content-Type")?.includes("stream")) {
      // ...
    }
    return response;
  }
 }
 ```
 This transformer ensures the model calls at least one tool. If no tools are appropriate or the task is finished, it can exit using `ExitTool`. Since this relies on the `tool_choice` parameter, it only works with models that support it.
 In practice, this approach noticeably improves tool usage for DeepSeek. The tradeoff is that sometimes the model may invoke irrelevant or unnecessary tools, which could increase latency and token usage.
 This update is just a small experiment — adding an `“agent”` to the router. Maybe there are more interesting things we can explore from here.
--- a/blog/zh/或许我们能在Router中做更多事情.md
+++ b/blog/zh/或许我们能在Router中做更多事情.md
@@ -0,0 +1,95 @@
 # 或许我们能在 Router 中做更多事情
 自从`claude-code-router`发布以来，我收到了很多用户的反馈，至今还有不少的 issues 未处理。其中大多都是关于不同的供应商的支持和`deepseek`模型调用工具不积极的问题。
 之前开发这个项目主要是为了我自己能以较低成本使用上`claude code`，所以一开始的设计并没有考虑到多供应商的情况。在实际的排查问题中，我发现尽管市面上所有的供应商几乎都宣称兼容`OpenAI`格式调用，即通过`/chat/compeletions`接口调用，但是其中的细节差异非常多。例如:
 1. Gemini 的工具参数类型是 string 时，`format`参数只支持`date`和`date-time`，并且没有工具调用 ID。
 2. OpenRouter 需要使用`cache_control`进行缓存。
 3. DeepSeek 官方 API 的 `max_output` 为 8192，而火山引擎的会更大。
 除了这些问题之外，还有一些其他的小的供应商，他们或多或少参数都有点问题。于是，我打算开发一个新的项目[musistudio/llms](https://github.com/musistudio/llms)来处理这种不同服务商的兼容问题。该项目使用 OpenAI 格式为基础的通用格式，提供了一个`Transformer`接口，该接口用于处理转换请求和响应。当我们给不同的服务商都实现了`Transformer`后，我们可以实现不同服务商的混合调用。比如我在`AnthropicTransformer`中实现了`Anthropic`<->`OpenAI`格式的互相转换，并监听了`/v1/messages`端点，在`GeminiTransformer`中实现了`Gemini`<->`OpenAI`格式的互相转换，并监听了`/v1beta/models/:modelAndAction`端点，当他们的请求和响应都被转换成一个通用格式的时候，就可以实现他们的互相调用。
 ```
 AnthropicRequest -> AnthropicTransformer -> OpenAIRequest -> GeminiTransformer -> GeminiRequest -> GeminiServer
 ```
 ```
 GeminiReseponse -> GeminiTransformer -> OpenAIResponse -> AnthropicTransformer -> AnthropicResponse
 ```
 虽然使用中间层抹平差异可能会带来一些性能问题，但是该项目最初的目的是为了让`claude-code-router`支持不同的供应商。
 至于`deepseek`模型调用工具不积极的问题，我发现这是由于`deepseek`在长上下文中的指令遵循不佳导致的。现象就是刚开始模型会主动调用工具，但是在经过几轮对话后模型只会返回文本。一开始的解决方案是通过注入一个系统提示词告知模型需要积极去使用工具以解决用户的问题，但是后面测试发现在长上下文中模型会遗忘该指令。
 查看`deepseek`文档后发现模型支持`tool_choice`参数，可以强制让模型最少调用 1 个工具，我尝试将该值设置为`required`，发现模型调用工具的积极性大大增加，现在我们只需要在合适的时候取消这个参数即可。借助[musistudio/llms](https://github.com/musistudio/llms)的`Transformer`可以让我们在发送请求前和收到响应后做点什么，所以我参考`claude code`的`Plan Mode`，实现了一个使用与`deepseek`的`Tool Mode`
 ```typescript
 export class TooluseTransformer implements Transformer {
  name = "tooluse";
  transformRequestIn(request: UnifiedChatRequest): UnifiedChatRequest {
    if (request.tools?.length) {
      request.messages.push({
        role: "system",
        content: `<system-reminder>Tool mode is active. The user expects you to proactively execute the most suitable tool to help complete the task. 
 Before invoking a tool, you must carefully evaluate whether it matches the current task. If no available tool is appropriate for the task, you MUST call the \`ExitTool\` to exit tool mode — this is the only valid way to terminate tool mode.
 Always prioritize completing the user's task effectively and efficiently by using tools whenever appropriate.</system-reminder>`,
      });
      request.tool_choice = "required";
      request.tools.unshift({
        type: "function",
        function: {
          name: "ExitTool",
          description: `Use this tool when you are in tool mode and have completed the task. This is the only valid way to exit tool mode.
 IMPORTANT: Before using this tool, ensure that none of the available tools are applicable to the current task. You must evaluate all available options — only if no suitable tool can help you complete the task should you use ExitTool to terminate tool mode.
 Examples:
 1. Task: "Use a tool to summarize this document" — Do not use ExitTool if a summarization tool is available.
 2. Task: "What’s the weather today?" — If no tool is available to answer, use ExitTool after reasoning that none can fulfill the task.`,
          parameters: {
            type: "object",
            properties: {
              response: {
                type: "string",
                description:
                  "Your response will be forwarded to the user exactly as returned — the tool will not modify or post-process it in any way.",
              },
            },
            required: ["response"],
          },
        },
      });
    }
    return request;
  }
  async transformResponseOut(response: Response): Promise<Response> {
    if (response.headers.get("Content-Type")?.includes("application/json")) {
      const jsonResponse = await response.json();
      if (
        jsonResponse?.choices[0]?.message.tool_calls?.length &&
        jsonResponse?.choices[0]?.message.tool_calls[0]?.function?.name ===
          "ExitTool"
      ) {
        const toolArguments = JSON.parse(toolCall.function.arguments || "{}");
        jsonResponse.choices[0].message.content = toolArguments.response || "";
        delete jsonResponse.choices[0].message.tool_calls;
      }
      // Handle non-streaming response if needed
      return new Response(JSON.stringify(jsonResponse), {
        status: response.status,
        statusText: response.statusText,
        headers: response.headers,
      });
    } else if (response.headers.get("Content-Type")?.includes("stream")) {
      // ...
    }
    return response;
  }
 }
 ```
 该工具将始终让模型至少调用一个工具，如果没有合适的工具或者任务已完成可以调用`ExitTool`来退出工具模式，因为是依靠`tool_choice`参数实现的，所以仅适用于支持该参数的模型。经过测试，该工具能显著增加`deepseek`的工具调用次数，弊端是可能会有跟任务无关或者没有必要的工具调用导致增加任务执行事件和消耗的 `token` 数。
 这次更新仅仅是在 Router 中实现一个`agent`的一次小探索，或许还能做更多其他有趣的事也说不定...
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -18,17 +18,12 @@
  "author": "musistudio",
  "license": "MIT",
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.39.0",
+    "@musistudio/llms": "^1.0.0",
    "dotenv": "^16.4.7",
    "express": "^4.21.2",
    "https-proxy-agent": "^7.0.6",
    "lru-cache": "^11.1.0",
    "openai": "^4.85.4",
    "tiktoken": "^1.0.21",
    "uuid": "^11.1.0"
  },
  "devDependencies": {
    "@types/express": "^5.0.0",
    "esbuild": "^0.25.1",
    "typescript": "^5.8.2"
  },
--- a/plugins/notebook-tools-filter.js
+++ b/plugins/notebook-tools-filter.js
@@ -1,7 +0,0 @@
 module.exports = async function handle(req, res) {
  if (req?.body?.tools?.length) {
    req.body.tools = req.body.tools.filter(
      (tool) => !["NotebookRead", "NotebookEdit", "mcp__ide__executeCode"].includes(tool.name)
    );
  }
 };
--- a/plugins/toolcall-improvement.js
+++ b/plugins/toolcall-improvement.js
@@ -1,8 +0,0 @@
 module.exports = async function handle(req, res) {
  if (req?.body?.tools?.length) {
    req.body.system.push({
        type: "text",
        text: `## **Important Instruction:**  \nYou must use tools as frequently and accurately as possible to help the user solve their problem.\nPrioritize tool usage whenever it can enhance accuracy, efficiency, or the quality of the response.`
    })
  }
 };
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -13,7 +13,7 @@ export const REFERENCE_COUNT_FILE = '/tmp/claude-code-reference-count.txt';
 export const DEFAULT_CONFIG = {
-  log: false,
+  LOG: false,
  OPENAI_API_KEY: "",
  OPENAI_BASE_URL: "",
  OPENAI_MODEL: "",
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,19 +1,15 @@
 import { existsSync } from "fs";
 import { writeFile } from "fs/promises";
-import { getOpenAICommonOptions, initConfig, initDir } from "./utils";
+import { homedir } from "os";
 import { join } from "path";
 import { initConfig, initDir } from "./utils";
 import { createServer } from "./server";
-import { formatRequest } from "./middlewares/formatRequest";
+import { router } from "./utils/router";
 import { rewriteBody } from "./middlewares/rewriteBody";
 import { router } from "./middlewares/router";
 import OpenAI from "openai";
 import { streamOpenAIResponse } from "./utils/stream";
 import {
  cleanupPidFile,
  isServiceRunning,
  savePid,
 } from "./utils/processCheck";
 import { LRUCache } from "lru-cache";
 import { log } from "./utils/log";
 async function initializeClaudeConfig() {
  const homeDir = process.env.HOME;
@@ -39,13 +35,6 @@ interface RunOptions {
  port?: number;
 }
 interface ModelProvider {
  name: string;
  api_base_url: string;
  api_key: string;
  models: string[];
 }
 async function run(options: RunOptions = {}) {
  // Check if service is already running
  if (isServiceRunning()) {
@@ -57,51 +46,6 @@ async function run(options: RunOptions = {}) {
  await initDir();
  const config = await initConfig();
  const Providers = new Map<string, ModelProvider>();
  const providerCache = new LRUCache<string, OpenAI>({
    max: 10,
    ttl: 2 * 60 * 60 * 1000,
  });
  function getProviderInstance(providerName: string): OpenAI {
    const provider: ModelProvider | undefined = Providers.get(providerName);
    if (provider === undefined) {
      throw new Error(`Provider ${providerName} not found`);
    }
    let openai = providerCache.get(provider.name);
    if (!openai) {
      openai = new OpenAI({
        baseURL: provider.api_base_url,
        apiKey: provider.api_key,
        ...getOpenAICommonOptions(),
      });
      providerCache.set(provider.name, openai);
    }
    return openai;
  }
  if (Array.isArray(config.Providers)) {
    config.Providers.forEach((provider) => {
      try {
        Providers.set(provider.name, provider);
      } catch (error) {
        console.error("Failed to parse model provider:", error);
      }
    });
  }
  if (config.OPENAI_API_KEY && config.OPENAI_BASE_URL && config.OPENAI_MODEL) {
    const defaultProvider = {
      name: "default",
      api_base_url: config.OPENAI_BASE_URL,
      api_key: config.OPENAI_API_KEY,
      models: [config.OPENAI_MODEL],
    };
    Providers.set("default", defaultProvider);
  } else if (Providers.size > 0) {
    const defaultProvider = Providers.values().next().value!;
    Providers.set("default", defaultProvider);
  }
  const port = options.port || 3456;
  // Save the PID of the background process
@@ -124,39 +68,16 @@ async function run(options: RunOptions = {}) {
  const servicePort = process.env.SERVICE_PORT
    ? parseInt(process.env.SERVICE_PORT)
    : port;
-
+  const server = createServer({
-  const server = await createServer(servicePort);
+    ...config,
-  server.useMiddleware((req, res, next) => {
+    providers: config.Providers || config.providers,
-    req.config = config;
+    PORT: servicePort,
-    next();
+    LOG_FILE: join(homedir(), ".claude-code-router", "claude-code-router.log"),
  });
  server.useMiddleware(rewriteBody);
  if (
    config.Router?.background &&
    config.Router?.think &&
    config?.Router?.longContext
  ) {
    server.useMiddleware(router);
  } else {
    server.useMiddleware((req, res, next) => {
      req.provider = "default";
      req.body.model = config.OPENAI_MODEL;
      next();
    });
  }
  server.useMiddleware(formatRequest);
  server.app.post("/v1/messages", async (req, res) => {
    try {
      const provider = getProviderInstance(req.provider || "default");
      const completion: any = await provider.chat.completions.create(req.body);
      await streamOpenAIResponse(res, completion, req.body.model, req.body);
    } catch (e) {
      log("Error in OpenAI API call:", e);
    }
  });
  server.addHook("preHandler", async (req, reply) =>
    router(req, reply, config)
  );
  server.start();
  console.log(`🚀 Claude Code Router is running on port ${servicePort}`);
 }
 export { run };
--- a/src/middlewares/formatRequest.ts
+++ b/src/middlewares/formatRequest.ts
@@ -1,209 +0,0 @@
 import { Request, Response, NextFunction } from "express";
 import { MessageCreateParamsBase } from "@anthropic-ai/sdk/resources/messages";
 import OpenAI from "openai";
 import { streamOpenAIResponse } from "../utils/stream";
 import { log } from "../utils/log";
 export const formatRequest = async (
  req: Request,
  res: Response,
  next: NextFunction
 ) => {
  let {
    model,
    max_tokens,
    messages,
    system = [],
    temperature,
    metadata,
    tools,
    stream,
  }: MessageCreateParamsBase = req.body;
  log("formatRequest: ", req.body);
  try {
    // @ts-ignore
    const openAIMessages = Array.isArray(messages)
      ? messages.flatMap((anthropicMessage) => {
          const openAiMessagesFromThisAnthropicMessage = [];
          if (!Array.isArray(anthropicMessage.content)) {
            // Handle simple string content
            if (typeof anthropicMessage.content === "string") {
              openAiMessagesFromThisAnthropicMessage.push({
                role: anthropicMessage.role,
                content: anthropicMessage.content,
              });
            }
            // If content is not string and not array (e.g. null/undefined), it will result in an empty array, effectively skipping this message.
            return openAiMessagesFromThisAnthropicMessage;
          }
          // Handle array content
          if (anthropicMessage.role === "assistant") {
            const assistantMessage = {
              role: "assistant",
              content: null, // Will be populated if text parts exist
            };
            let textContent = "";
            // @ts-ignore
            const toolCalls = []; // Corrected type here
            anthropicMessage.content.forEach((contentPart) => {
              if (contentPart.type === "text") {
                textContent +=
                  (typeof contentPart.text === "string"
                    ? contentPart.text
                    : JSON.stringify(contentPart.text)) + "\\n";
              } else if (contentPart.type === "tool_use") {
                toolCalls.push({
                  id: contentPart.id,
                  type: "function",
                  function: {
                    name: contentPart.name,
                    arguments: JSON.stringify(contentPart.input),
                  },
                });
              }
            });
            const trimmedTextContent = textContent.trim();
            if (trimmedTextContent.length > 0) {
              // @ts-ignore
              assistantMessage.content = trimmedTextContent;
            }
            if (toolCalls.length > 0) {
              // @ts-ignore
              assistantMessage.tool_calls = toolCalls;
            }
            // @ts-ignore
            if (
              assistantMessage.content ||
              // @ts-ignore
              (assistantMessage.tool_calls &&
                // @ts-ignore
                assistantMessage.tool_calls.length > 0)
            ) {
              openAiMessagesFromThisAnthropicMessage.push(assistantMessage);
            }
          } else if (anthropicMessage.role === "user") {
            // For user messages, text parts are combined into one message.
            // Tool results are transformed into subsequent, separate 'tool' role messages.
            let userTextMessageContent = "";
            // @ts-ignore
            const subsequentToolMessages = [];
            anthropicMessage.content.forEach((contentPart) => {
              if (contentPart.type === "text") {
                userTextMessageContent +=
                  (typeof contentPart.text === "string"
                    ? contentPart.text
                    : JSON.stringify(contentPart.text)) + "\\n";
              } else if (contentPart.type === "tool_result") {
                // Each tool_result becomes a separate 'tool' message
                subsequentToolMessages.push({
                  role: "tool",
                  tool_call_id: contentPart.tool_use_id,
                  content:
                    typeof contentPart.content === "string"
                      ? contentPart.content
                      : JSON.stringify(contentPart.content),
                });
              }
            });
            const trimmedUserText = userTextMessageContent.trim();
            if (trimmedUserText.length > 0) {
              openAiMessagesFromThisAnthropicMessage.push({
                role: "user",
                content: trimmedUserText,
              });
            }
            // @ts-ignore
            openAiMessagesFromThisAnthropicMessage.push(
              // @ts-ignore
              ...subsequentToolMessages
            );
          } else {
            // Fallback for other roles (e.g. system, or custom roles if they were to appear here with array content)
            // This will combine all text parts into a single message for that role.
            let combinedContent = "";
            anthropicMessage.content.forEach((contentPart) => {
              if (contentPart.type === "text") {
                combinedContent +=
                  (typeof contentPart.text === "string"
                    ? contentPart.text
                    : JSON.stringify(contentPart.text)) + "\\n";
              } else {
                // For non-text parts in other roles, stringify them or handle as appropriate
                combinedContent += JSON.stringify(contentPart) + "\\n";
              }
            });
            const trimmedCombinedContent = combinedContent.trim();
            if (trimmedCombinedContent.length > 0) {
              openAiMessagesFromThisAnthropicMessage.push({
                role: anthropicMessage.role, // Cast needed as role could be other than 'user'/'assistant'
                content: trimmedCombinedContent,
              });
            }
          }
          return openAiMessagesFromThisAnthropicMessage;
        })
      : [];
    const systemMessages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] =
      Array.isArray(system)
        ? system.map((item) => ({
            role: "system",
            content: item.text,
          }))
        : [{ role: "system", content: system }];
    const data: any = {
      model,
      messages: [...systemMessages, ...openAIMessages],
      temperature,
      stream,
    };
    if (tools) {
      data.tools = tools
        .filter((tool) => !["StickerRequest"].includes(tool.name))
        .map((item: any) => ({
          type: "function",
          function: {
            name: item.name,
            description: item.description,
            parameters: item.input_schema,
          },
        }));
    }
    if (stream) {
      res.setHeader("Content-Type", "text/event-stream");
    }
    res.setHeader("Cache-Control", "no-cache");
    res.setHeader("Connection", "keep-alive");
    req.body = data;
    console.log(JSON.stringify(data.messages, null, 2));
  } catch (error) {
    console.error("Error in request processing:", error);
    const errorCompletion: AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk> =
      {
        async *[Symbol.asyncIterator]() {
          yield {
            id: `error_${Date.now()}`,
            created: Math.floor(Date.now() / 1000),
            model,
            object: "chat.completion.chunk",
            choices: [
              {
                index: 0,
                delta: {
                  content: `Error: ${(error as Error).message}`,
                },
                finish_reason: "stop",
              },
            ],
          };
        },
      };
    await streamOpenAIResponse(res, errorCompletion, model, req.body);
  }
  next();
 };
--- a/src/middlewares/rewriteBody.ts
+++ b/src/middlewares/rewriteBody.ts
@@ -1,45 +0,0 @@
 import { Request, Response, NextFunction } from "express";
 import Module from "node:module";
 import { streamOpenAIResponse } from "../utils/stream";
 import { log } from "../utils/log";
 import { PLUGINS_DIR } from "../constants";
 import path from "node:path";
 import { access } from "node:fs/promises";
 import { OpenAI } from "openai";
 import { createClient } from "../utils";
 // @ts-ignore
 const originalLoad = Module._load;
 // @ts-ignore
 Module._load = function (request, parent, isMain) {
  if (request === "claude-code-router") {
    return {
      streamOpenAIResponse,
      log,
      OpenAI,
      createClient,
    };
  }
  return originalLoad.call(this, request, parent, isMain);
 };
 export const rewriteBody = async (
  req: Request,
  res: Response,
  next: NextFunction
 ) => {
  if (!req.config.usePlugins) {
    return next();
  }
  for (const plugin of req.config.usePlugins) {
    const pluginPath = path.join(PLUGINS_DIR, `${plugin.trim()}.js`);
    try {
      await access(pluginPath);
      const rewritePlugin = require(pluginPath);
      await rewritePlugin(req, res);
    } catch (e) {
      console.error(e);
    }
  }
  next();
 };
--- a/src/server.ts
+++ b/src/server.ts
@@ -1,23 +1,8 @@
-import express, { RequestHandler } from "express";
+import Server from "@musistudio/llms";
-interface Server {
+export const createServer = (config: any): Server => {
-  app: express.Application;
+  const server = new Server({
-  useMiddleware: (middleware: RequestHandler) => void;
+    initialConfig: config,
  start: () => void;
 }
 export const createServer = async (port: number): Promise<Server> => {
  const app = express();
  app.use(express.json({ limit: "500mb" }));
  return {
    app,
    useMiddleware: (middleware: RequestHandler) => {
      app.use("/v1/messages", middleware);
    },
    start: () => {
      app.listen(port, () => {
        console.log(`Server is running on port ${port}`);
  });
-    },
+  return server;
  };
 };
--- a/src/utils/codeCommand.ts
+++ b/src/utils/codeCommand.ts
@@ -9,13 +9,6 @@ export async function executeCodeCommand(args: string[] = []) {
  // Set environment variables
  const env = {
    ...process.env,
    HTTPS_PROXY: undefined,
    HTTP_PROXY: undefined,
    ALL_PROXY: undefined,
    https_proxy: undefined,
    http_proxy: undefined,
    all_proxy: undefined,
    DISABLE_PROMPT_CACHING: "1",
    ANTHROPIC_AUTH_TOKEN: "test",
    ANTHROPIC_BASE_URL: `http://127.0.0.1:3456`,
    API_TIMEOUT_MS: "600000",
@@ -29,7 +22,7 @@ export async function executeCodeCommand(args: string[] = []) {
  const claudeProcess = spawn(claudePath, args, {
    env,
    stdio: "inherit",
-    shell: true
+    shell: true,
  });
  claudeProcess.on("error", (error) => {
--- a/src/utils/index.ts
+++ b/src/utils/index.ts
@@ -1,5 +1,3 @@
 import { HttpsProxyAgent } from "https-proxy-agent";
 import OpenAI, { ClientOptions } from "openai";
 import fs from "node:fs/promises";
 import readline from "node:readline";
 import {
@@ -9,16 +7,6 @@ import {
  PLUGINS_DIR,
 } from "../constants";
 export function getOpenAICommonOptions(): ClientOptions {
  const options: ClientOptions = {};
  if (process.env.PROXY_URL) {
    options.httpAgent = new HttpsProxyAgent(process.env.PROXY_URL);
  } else if (process.env.HTTPS_PROXY) {
    options.httpAgent = new HttpsProxyAgent(process.env.HTTPS_PROXY);
  }
  return options;
 }
 const ensureDir = async (dir_path: string) => {
  try {
    await fs.access(dir_path);
@@ -63,9 +51,17 @@ export const readConfigFile = async () => {
    const baseUrl = await question("Enter OPENAI_BASE_URL: ");
    const model = await question("Enter OPENAI_MODEL: ");
    const config = Object.assign({}, DEFAULT_CONFIG, {
-      OPENAI_API_KEY: apiKey,
+      Providers: [
-      OPENAI_BASE_URL: baseUrl,
+        {
-      OPENAI_MODEL: model,
+          name: "openai",
          api_base_url: baseUrl,
          api_key: apiKey,
          models: [model],
        },
      ],
      Router: {
        default: `openai,${model}`,
      },
    });
    await writeConfigFile(config);
    return config;
@@ -82,11 +78,3 @@ export const initConfig = async () => {
  Object.assign(process.env, config);
  return config;
 };
 export const createClient = (options: ClientOptions) => {
  const client = new OpenAI({
    ...options,
    ...getOpenAICommonOptions(),
  });
  return client;
 };
--- a/src/middlewares/router.ts
+++ b/src/middlewares/router.ts
@@ -1,57 +1,32 @@
 import { MessageCreateParamsBase } from "@anthropic-ai/sdk/resources/messages";
 import { Request, Response, NextFunction } from "express";
 import { get_encoding } from "tiktoken";
-import { log } from "../utils/log";
+import { log } from "./log";
 const enc = get_encoding("cl100k_base");
-const getUseModel = (req: Request, tokenCount: number) => {
+const getUseModel = (req: any, tokenCount: number, config: any) => {
-  const [provider, model] = req.body.model.split(",");
+  if (req.body.model.includes(",")) {
-  if (provider && model) {
+    return req.body.model;
    return {
      provider,
      model,
    };
  }
-
+  // if tokenCount is greater than 60K, use the long context model
-  // if tokenCount is greater than 32K, use the long context model
+  if (tokenCount > 1000 * 60) {
  if (tokenCount > 1000 * 32) {
    log("Using long context model due to token count:", tokenCount);
-    const [provider, model] = req.config.Router!.longContext.split(",");
+    return config.Router!.longContext;
    return {
      provider,
      model,
    };
  }
  // If the model is claude-3-5-haiku, use the background model
  if (req.body.model?.startsWith("claude-3-5-haiku")) {
    log("Using background model for ", req.body.model);
-    const [provider, model] = req.config.Router!.background.split(",");
+    return config.Router!.background;
    return {
      provider,
      model,
    };
  }
  // if exits thinking, use the think model
  if (req.body.thinking) {
    log("Using think model for ", req.body.thinking);
-    const [provider, model] = req.config.Router!.think.split(",");
+    return config.Router!.think;
    return {
      provider,
      model,
    };
  }
-  return {
+  return config.Router!.default;
    provider: "default",
    model: req.config.OPENAI_MODEL,
  };
 };
-export const router = async (
+export const router = async (req: any, res: any, config: any) => {
  req: Request,
  res: Response,
  next: NextFunction
 ) => {
  const { messages, system = [], tools }: MessageCreateParamsBase = req.body;
  try {
    let tokenCount = 0;
@@ -102,14 +77,11 @@ export const router = async (
        }
      });
    }
-    const { provider, model } = getUseModel(req, tokenCount);
+    const model = getUseModel(req, tokenCount, config);
    req.provider = provider;
    req.body.model = model;
-  } catch (error) {
+  } catch (error: any) {
    log("Error in router middleware:", error.message);
-    req.provider = "default";
+    req.body.model = config.Router!.default;
    req.body.model = req.config.OPENAI_MODEL;
  } finally {
    next();
  }
  return;
 };
--- a/src/utils/status.ts
+++ b/src/utils/status.ts
@@ -15,7 +15,7 @@ export function showStatus() {
        console.log('');
        console.log('🚀 Ready to use! Run the following commands:');
        console.log('   ccr code    # Start coding with Claude');
-        console.log('   ccr close   # Stop the service');
+        console.log('   ccr stop   # Stop the service');
    } else {
        console.log('❌ Status: Not Running');
        console.log('');
--- a/src/utils/stream.ts
+++ b/src/utils/stream.ts
@@ -1,343 +0,0 @@
 import { Response } from "express";
 import { OpenAI } from "openai";
 import { log } from "./log";
 interface ContentBlock {
  type: string;
  id?: string;
  name?: string;
  input?: any;
  text?: string;
 }
 interface MessageEvent {
  type: string;
  message?: {
    id: string;
    type: string;
    role: string;
    content: any[];
    model: string;
    stop_reason: string | null;
    stop_sequence: string | null;
    usage: {
      input_tokens: number;
      output_tokens: number;
    };
  };
  delta?: {
    stop_reason?: string;
    stop_sequence?: string | null;
    content?: ContentBlock[];
    type?: string;
    text?: string;
    partial_json?: string;
  };
  index?: number;
  content_block?: ContentBlock;
  usage?: {
    input_tokens: number;
    output_tokens: number;
  };
 }
 export async function streamOpenAIResponse(
  res: Response,
  completion: any,
  model: string,
  body: any
 ) {
  const write = (data: string) => {
    log("response: ", data);
    res.write(data);
  };
  const messageId = "msg_" + Date.now();
  if (!body.stream) {
    let content: any = [];
    if (completion.choices[0].message.content) {
      content = [ { text: completion.choices[0].message.content, type: "text" } ];
    }
    else if (completion.choices[0].message.tool_calls) {
      content = completion.choices[0].message.tool_calls.map((item: any) => {
        return {
          type: 'tool_use',
          id: item.id,
          name: item.function?.name,
          input: item.function?.arguments ? JSON.parse(item.function.arguments) : {},
        };
      });
    }
    const result = {
      id: messageId,
      type: "message",
      role: "assistant",
      // @ts-ignore
      content: content,
      stop_reason: completion.choices[0].finish_reason === 'tool_calls' ? "tool_use" : "end_turn",
      stop_sequence: null,
    };
    try {
      res.json(result);
      res.end();
      return;
    } catch (error) {
      log("Error sending response:", error);
      res.status(500).send("Internal Server Error");
    }
  }
  let contentBlockIndex = 0;
  let currentContentBlocks: ContentBlock[] = [];
  // Send message_start event
  const messageStart: MessageEvent = {
    type: "message_start",
    message: {
      id: messageId,
      type: "message",
      role: "assistant",
      content: [],
      model,
      stop_reason: null,
      stop_sequence: null,
      usage: { input_tokens: 1, output_tokens: 1 },
    },
  };
  write(`event: message_start\ndata: ${JSON.stringify(messageStart)}\n\n`);
  let isToolUse = false;
  let toolUseJson = "";
  let hasStartedTextBlock = false;
  let currentToolCallId: string | null = null;
  let toolCallJsonMap = new Map<string, string>();
  try {
    for await (const chunk of completion) {
      log("Processing chunk:", chunk);
      const delta = chunk.choices[0].delta;
      if (delta.tool_calls && delta.tool_calls.length > 0) {
        for (const toolCall of delta.tool_calls) {
          const toolCallId = toolCall.id;
          // Check if this is a new tool call by ID
          if (toolCallId && toolCallId !== currentToolCallId) {
            // End previous tool call if one was active
            if (isToolUse && currentToolCallId) {
              const contentBlockStop: MessageEvent = {
                type: "content_block_stop",
                index: contentBlockIndex,
              };
              write(
                `event: content_block_stop\ndata: ${JSON.stringify(
                  contentBlockStop
                )}\n\n`
              );
            }
            // Start new tool call block
            isToolUse = true;
            currentToolCallId = toolCallId;
            contentBlockIndex++;
            toolCallJsonMap.set(toolCallId, ""); // Initialize JSON accumulator for this tool call
            const toolBlock: ContentBlock = {
              type: "tool_use",
              id: toolCallId,
              name: toolCall.function?.name,
              input: {},
            };
            const toolBlockStart: MessageEvent = {
              type: "content_block_start",
              index: contentBlockIndex,
              content_block: toolBlock,
            };
            currentContentBlocks.push(toolBlock);
            write(
              `event: content_block_start\ndata: ${JSON.stringify(
                toolBlockStart
              )}\n\n`
            );
          }
          // Stream tool call JSON
          if (toolCall.function?.arguments && currentToolCallId) {
            const jsonDelta: MessageEvent = {
              type: "content_block_delta",
              index: contentBlockIndex,
              delta: {
                type: "input_json_delta",
                partial_json: toolCall.function.arguments,
              },
            };
            // Accumulate JSON for this specific tool call
            const currentJson = toolCallJsonMap.get(currentToolCallId) || "";
            toolCallJsonMap.set(currentToolCallId, currentJson + toolCall.function.arguments);
            toolUseJson = toolCallJsonMap.get(currentToolCallId) || "";
            try {
              const parsedJson = JSON.parse(toolUseJson);
              currentContentBlocks[contentBlockIndex].input = parsedJson;
            } catch (e) {
              log("JSON parsing error (continuing to accumulate):", e);
              // JSON not yet complete, continue accumulating
            }
            write(
              `event: content_block_delta\ndata: ${JSON.stringify(jsonDelta)}\n\n`
            );
          }
        }
      } else if (delta.content) {
        // Handle regular text content
        if (isToolUse) {
          log("Tool call ended here:", delta);
          // End previous tool call block
          const contentBlockStop: MessageEvent = {
            type: "content_block_stop",
            index: contentBlockIndex,
          };
          write(
            `event: content_block_stop\ndata: ${JSON.stringify(
              contentBlockStop
            )}\n\n`
          );
          contentBlockIndex++;
          isToolUse = false;
          currentToolCallId = null;
          toolUseJson = ""; // Reset for safety
        }
        if (!delta.content) continue;
        // If text block not yet started, send content_block_start
        if (!hasStartedTextBlock) {
          const textBlock: ContentBlock = {
            type: "text",
            text: "",
          };
          const textBlockStart: MessageEvent = {
            type: "content_block_start",
            index: contentBlockIndex,
            content_block: textBlock,
          };
          currentContentBlocks.push(textBlock);
          write(
            `event: content_block_start\ndata: ${JSON.stringify(
              textBlockStart
            )}\n\n`
          );
          hasStartedTextBlock = true;
        }
        // Send regular text content
        const contentDelta: MessageEvent = {
          type: "content_block_delta",
          index: contentBlockIndex,
          delta: {
            type: "text_delta",
            text: delta.content,
          },
        };
        // Update content block text
        if (currentContentBlocks[contentBlockIndex]) {
          currentContentBlocks[contentBlockIndex].text += delta.content;
        }
        write(
          `event: content_block_delta\ndata: ${JSON.stringify(
            contentDelta
          )}\n\n`
        );
      }
    }
  } catch (e: any) {
    // If text block not yet started, send content_block_start
    if (!hasStartedTextBlock) {
      const textBlock: ContentBlock = {
        type: "text",
        text: "",
      };
      const textBlockStart: MessageEvent = {
        type: "content_block_start",
        index: contentBlockIndex,
        content_block: textBlock,
      };
      currentContentBlocks.push(textBlock);
      write(
        `event: content_block_start\ndata: ${JSON.stringify(
          textBlockStart
        )}\n\n`
      );
      hasStartedTextBlock = true;
    }
    // Send regular text content
    const contentDelta: MessageEvent = {
      type: "content_block_delta",
      index: contentBlockIndex,
      delta: {
        type: "text_delta",
        text: JSON.stringify(e),
      },
    };
    // Update content block text
    if (currentContentBlocks[contentBlockIndex]) {
      currentContentBlocks[contentBlockIndex].text += JSON.stringify(e);
    }
    write(
      `event: content_block_delta\ndata: ${JSON.stringify(contentDelta)}\n\n`
    );
  }
  // Close last content block if any is open
  if (isToolUse || hasStartedTextBlock) {
    const contentBlockStop: MessageEvent = {
      type: "content_block_stop",
      index: contentBlockIndex,
    };
    write(
      `event: content_block_stop\ndata: ${JSON.stringify(contentBlockStop)}\n\n`
    );
  }
  // Send message_delta event with appropriate stop_reason
  const messageDelta: MessageEvent = {
    type: "message_delta",
    delta: {
      stop_reason: isToolUse ? "tool_use" : "end_turn",
      stop_sequence: null,
      content: currentContentBlocks,
    },
    usage: { input_tokens: 100, output_tokens: 150 },
  };
  if (!isToolUse) {
    log("body: ", body, "messageDelta: ", messageDelta);
  }
  write(`event: message_delta\ndata: ${JSON.stringify(messageDelta)}\n\n`);
  // Send message_stop event
  const messageStop: MessageEvent = {
    type: "message_stop",
  };
  write(`event: message_stop\ndata: ${JSON.stringify(messageStop)}\n\n`);
  res.end();
 }