use the router to dispatch different models: background,longcontext and think

This commit is contained in:
jinhui.li
2025-06-14 19:48:29 +08:00
parent 7a5d712444
commit 9a89250d79
12 changed files with 300 additions and 145 deletions

View File

@@ -3,5 +3,6 @@
"LOG": true,
"OPENAI_API_KEY": "",
"OPENAI_BASE_URL": "",
"OPENAI_MODEL": ""
}
"OPENAI_MODEL": "",
"modelProviders": {}
}

View File

@@ -6,9 +6,16 @@
"ccr": "./dist/cli.js"
},
"scripts": {
"build": "esbuild src/cli.ts --bundle --platform=node --outfile=dist/cli.js"
"build": "esbuild src/cli.ts --bundle --platform=node --outfile=dist/cli.js",
"buildserver": "esbuild src/index.ts --bundle --platform=node --outfile=dist/index.js"
},
"keywords": ["claude", "code", "router", "llm", "anthropic"],
"keywords": [
"claude",
"code",
"router",
"llm",
"anthropic"
],
"author": "musistudio",
"license": "MIT",
"dependencies": {
@@ -16,7 +23,10 @@
"dotenv": "^16.4.7",
"express": "^4.21.2",
"https-proxy-agent": "^7.0.6",
"openai": "^4.85.4"
"lru-cache": "^11.1.0",
"openai": "^4.85.4",
"tiktoken": "^1.0.21",
"uuid": "^11.1.0"
},
"devDependencies": {
"@types/express": "^5.0.0",

104
pnpm-lock.yaml generated
View File

@@ -5,9 +5,6 @@ settings:
excludeLinksFromLockfile: false
dependencies:
'@anthropic-ai/claude-code':
specifier: ^0.2.53
version: 0.2.53
'@anthropic-ai/sdk':
specifier: ^0.39.0
version: 0.39.0
@@ -20,9 +17,18 @@ dependencies:
https-proxy-agent:
specifier: ^7.0.6
version: 7.0.6
lru-cache:
specifier: ^11.1.0
version: 11.1.0
openai:
specifier: ^4.85.4
version: 4.86.1
tiktoken:
specifier: ^1.0.21
version: 1.0.21
uuid:
specifier: ^11.1.0
version: 11.1.0
devDependencies:
'@types/express':
@@ -37,18 +43,6 @@ devDependencies:
packages:
/@anthropic-ai/claude-code@0.2.53:
resolution: {integrity: sha512-DKXGjSsu2+rc1GaAdOjRqD7fMLvyQgwi/sqf6lLHWQAarwYxR/ahbSheu7h1Ub0wm0htnuIqgNnmNZUM43w/3Q==}
engines: {node: '>=18.0.0'}
hasBin: true
requiresBuild: true
optionalDependencies:
'@img/sharp-darwin-arm64': 0.33.5
'@img/sharp-linux-arm': 0.33.5
'@img/sharp-linux-x64': 0.33.5
'@img/sharp-win32-x64': 0.33.5
dev: false
/@anthropic-ai/sdk@0.39.0:
resolution: {integrity: sha512-eMyDIPRZbt1CCLErRCi3exlAvNkBtRe+kW5vvJyef93PmNr/clstYgHhtvmkxN82nlKgzyGPCyGxrm0JQ1ZIdg==}
dependencies:
@@ -288,72 +282,6 @@ packages:
dev: true
optional: true
/@img/sharp-darwin-arm64@0.33.5:
resolution: {integrity: sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [arm64]
os: [darwin]
requiresBuild: true
optionalDependencies:
'@img/sharp-libvips-darwin-arm64': 1.0.4
dev: false
optional: true
/@img/sharp-libvips-darwin-arm64@1.0.4:
resolution: {integrity: sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==}
cpu: [arm64]
os: [darwin]
requiresBuild: true
dev: false
optional: true
/@img/sharp-libvips-linux-arm@1.0.5:
resolution: {integrity: sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==}
cpu: [arm]
os: [linux]
requiresBuild: true
dev: false
optional: true
/@img/sharp-libvips-linux-x64@1.0.4:
resolution: {integrity: sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==}
cpu: [x64]
os: [linux]
requiresBuild: true
dev: false
optional: true
/@img/sharp-linux-arm@0.33.5:
resolution: {integrity: sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [arm]
os: [linux]
requiresBuild: true
optionalDependencies:
'@img/sharp-libvips-linux-arm': 1.0.5
dev: false
optional: true
/@img/sharp-linux-x64@0.33.5:
resolution: {integrity: sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [x64]
os: [linux]
requiresBuild: true
optionalDependencies:
'@img/sharp-libvips-linux-x64': 1.0.4
dev: false
optional: true
/@img/sharp-win32-x64@0.33.5:
resolution: {integrity: sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
cpu: [x64]
os: [win32]
requiresBuild: true
dev: false
optional: true
/@types/body-parser@1.19.5:
resolution: {integrity: sha512-fB3Zu92ucau0iQ0JMCFQE7b/dv8Ot07NI3KaZIkIUNXq82k4eBAqUaneXfleGY9JWskeS9y+u0nXMyspcuQrCg==}
dependencies:
@@ -853,6 +781,11 @@ packages:
engines: {node: '>= 0.10'}
dev: false
/lru-cache@11.1.0:
resolution: {integrity: sha512-QIXZUBJUx+2zHUdQujWejBkcD9+cs94tLn0+YL8UrCh+D5sCXZ4c7LaEH48pNwRY3MLDgqUFyhlCyjJPf1WP0A==}
engines: {node: 20 || >=22}
dev: false
/math-intrinsics@1.1.0:
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
engines: {node: '>= 0.4'}
@@ -1084,6 +1017,10 @@ packages:
engines: {node: '>= 0.8'}
dev: false
/tiktoken@1.0.21:
resolution: {integrity: sha512-/kqtlepLMptX0OgbYD9aMYbM7EFrMZCL7EoHM8Psmg2FuhXoo/bH64KqOiZGGwa6oS9TPdSEDKBnV2LuB8+5vQ==}
dev: false
/toidentifier@1.0.1:
resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==}
engines: {node: '>=0.6'}
@@ -1120,6 +1057,11 @@ packages:
engines: {node: '>= 0.4.0'}
dev: false
/uuid@11.1.0:
resolution: {integrity: sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==}
hasBin: true
dev: false
/vary@1.1.2:
resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
engines: {node: '>= 0.8'}

View File

@@ -3,7 +3,7 @@ import { run } from "./index";
import { closeService } from "./utils/close";
import { showStatus } from "./utils/status";
import { executeCodeCommand } from "./utils/codeCommand";
import { isServiceRunning } from "./utils/processCheck";
import { cleanupPidFile, isServiceRunning } from "./utils/processCheck";
import { version } from "../package.json";
const command = process.argv[2];
@@ -44,6 +44,8 @@ async function waitForService(
}
import { spawn } from "child_process";
import { PID_FILE, REFERENCE_COUNT_FILE } from "./constants";
import { existsSync, readFileSync } from "fs";
async function main() {
switch (command) {
@@ -51,7 +53,26 @@ async function main() {
run();
break;
case "stop":
await closeService();
try {
const pid = parseInt(readFileSync(PID_FILE, "utf-8"));
process.kill(pid);
cleanupPidFile();
if (existsSync(REFERENCE_COUNT_FILE)) {
try {
require("fs").unlinkSync(REFERENCE_COUNT_FILE);
} catch (e) {
// Ignore cleanup errors
}
}
console.log(
"claude code router service has been successfully stopped."
);
} catch (e) {
console.log(
"Failed to stop the service. It may have already been stopped."
);
cleanupPidFile();
}
break;
case "status":
showStatus();

View File

@@ -9,6 +9,8 @@ export const PLUGINS_DIR = `${HOME_DIR}/plugins`;
export const PID_FILE = path.join(HOME_DIR, '.claude-code-router.pid');
export const REFERENCE_COUNT_FILE = '/tmp/claude-code-reference-count.txt';
export const DEFAULT_CONFIG = {
log: false,

View File

@@ -4,10 +4,16 @@ import { getOpenAICommonOptions, initConfig, initDir } from "./utils";
import { createServer } from "./server";
import { formatRequest } from "./middlewares/formatRequest";
import { rewriteBody } from "./middlewares/rewriteBody";
import { router } from "./middlewares/router";
import OpenAI from "openai";
import { streamOpenAIResponse } from "./utils/stream";
import { cleanupPidFile, isServiceRunning, savePid } from "./utils/processCheck";
import {
cleanupPidFile,
isServiceRunning,
savePid,
} from "./utils/processCheck";
import { LRUCache } from "lru-cache";
import { log } from "./utils/log";
async function initializeClaudeConfig() {
const homeDir = process.env.HOME;
@@ -33,9 +39,14 @@ interface RunOptions {
port?: number;
}
async function run(options: RunOptions = {}) {
const port = options.port || 3456;
interface ModelProvider {
name: string;
api_base_url: string;
api_key: string;
models: string[];
}
async function run(options: RunOptions = {}) {
// Check if service is already running
if (isServiceRunning()) {
console.log("✅ Service is already running in the background.");
@@ -44,20 +55,67 @@ async function run(options: RunOptions = {}) {
await initializeClaudeConfig();
await initDir();
await initConfig();
const config = await initConfig();
const Providers = new Map<string, ModelProvider>();
const providerCache = new LRUCache<string, OpenAI>({
max: 10,
ttl: 2 * 60 * 60 * 1000,
});
function getProviderInstance(providerName: string): OpenAI {
const provider: ModelProvider | undefined = Providers.get(providerName);
if (provider === undefined) {
throw new Error(`Provider ${providerName} not found`);
}
let openai = providerCache.get(provider.name);
if (!openai) {
openai = new OpenAI({
baseURL: provider.api_base_url,
apiKey: provider.api_key,
...getOpenAICommonOptions(),
});
providerCache.set(provider.name, openai);
}
return openai;
}
if (Array.isArray(config.Providers)) {
config.Providers.forEach((provider) => {
try {
Providers.set(provider.name, provider);
} catch (error) {
console.error("Failed to parse model provider:", error);
}
});
}
if (config.OPENAI_API_KEY && config.OPENAI_BASE_URL && config.OPENAI_MODEL) {
const defaultProvider = {
name: "default",
api_base_url: config.OPENAI_BASE_URL,
api_key: config.OPENAI_API_KEY,
models: [config.OPENAI_MODEL],
};
Providers.set("default", defaultProvider);
} else if (Providers.size > 0) {
const defaultProvider = Providers.values().next().value!;
Providers.set("default", defaultProvider);
}
const port = options.port || 3456;
// Save the PID of the background process
savePid(process.pid);
// Handle SIGINT (Ctrl+C) to clean up PID file
process.on('SIGINT', () => {
process.on("SIGINT", () => {
console.log("Received SIGINT, cleaning up...");
cleanupPidFile();
process.exit(0);
});
// Handle SIGTERM to clean up PID file
process.on('SIGTERM', () => {
process.on("SIGTERM", () => {
cleanupPidFile();
process.exit(0);
});
@@ -67,21 +125,27 @@ async function run(options: RunOptions = {}) {
? parseInt(process.env.SERVICE_PORT)
: port;
const server = createServer(servicePort);
server.useMiddleware(formatRequest);
server.useMiddleware(rewriteBody);
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
baseURL: process.env.OPENAI_BASE_URL,
...getOpenAICommonOptions(),
const server = await createServer(servicePort);
server.useMiddleware((req, res, next) => {
console.log("Middleware triggered for request:", req.body.model);
req.config = config;
next();
});
if (
config.Router?.background &&
config.Router?.think &&
config?.Router?.longContext
) {
log("Using custom router middleware");
server.useMiddleware(router);
}
server.useMiddleware(rewriteBody);
server.useMiddleware(formatRequest);
server.app.post("/v1/messages", async (req, res) => {
try {
if (process.env.OPENAI_MODEL) {
req.body.model = process.env.OPENAI_MODEL;
}
const completion: any = await openai.chat.completions.create(req.body);
const provider = getProviderInstance(req.provider || "default");
const completion: any = await provider.chat.completions.create(req.body);
await streamOpenAIResponse(res, completion, req.body.model, req.body);
} catch (e) {
console.error("Error in OpenAI API call:", e);
@@ -92,3 +156,4 @@ async function run(options: RunOptions = {}) {
}
export { run };
// run();

View File

@@ -1,5 +1,4 @@
import { Request, Response, NextFunction } from "express";
import { ContentBlockParam } from "@anthropic-ai/sdk/resources";
import { MessageCreateParamsBase } from "@anthropic-ai/sdk/resources/messages";
import OpenAI from "openai";
import { streamOpenAIResponse } from "../utils/stream";
@@ -181,6 +180,7 @@ export const formatRequest = async (
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
req.body = data;
console.log(JSON.stringify(data.messages, null, 2));
} catch (error) {
console.error("Error in request processing:", error);
const errorCompletion: AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk> =
@@ -189,7 +189,7 @@ export const formatRequest = async (
yield {
id: `error_${Date.now()}`,
created: Math.floor(Date.now() / 1000),
model: "gpt-3.5-turbo",
model,
object: "chat.completion.chunk",
choices: [
{

110
src/middlewares/router.ts Normal file
View File

@@ -0,0 +1,110 @@
import { MessageCreateParamsBase } from "@anthropic-ai/sdk/resources/messages";
import { Request, Response, NextFunction } from "express";
import { get_encoding } from "tiktoken";
import { log } from "../utils/log";
const enc = get_encoding("cl100k_base");
const getUseModel = (req: Request, tokenCount: number) => {
// if tokenCount is greater than 32K, use the long context model
if (tokenCount > 1000 * 32) {
log("Using long context model due to token count:", tokenCount);
const [provider, model] = req.config.Router!.longContext.split(",");
return {
provider,
model,
};
}
// If the model is claude-3-5-haiku, use the background model
if (req.body.model?.startsWith("claude-3-5-haiku")) {
log("Using background model for ", req.body.model);
const [provider, model] = req.config.Router!.background.split(",");
return {
provider,
model,
};
}
// if exits thinking, use the think model
if (req.body.thinking) {
log("Using think model for ", req.body.thinking);
const [provider, model] = req.config.Router!.think.split(",");
return {
provider,
model,
};
}
const [provider, model] = req.body.model.split(",");
if (provider && model) {
return {
provider,
model,
};
}
return {
provider: "default",
model: req.config.OPENAI_MODEL,
};
};
export const router = async (
req: Request,
res: Response,
next: NextFunction
) => {
const { messages, system = [], tools }: MessageCreateParamsBase = req.body;
try {
let tokenCount = 0;
if (Array.isArray(messages)) {
messages.forEach((message) => {
if (typeof message.content === "string") {
tokenCount += enc.encode(message.content).length;
} else if (Array.isArray(message.content)) {
message.content.forEach((contentPart) => {
if (contentPart.type === "text") {
tokenCount += enc.encode(contentPart.text).length;
} else if (contentPart.type === "tool_use") {
tokenCount += enc.encode(
JSON.stringify(contentPart.input)
).length;
} else if (contentPart.type === "tool_result") {
tokenCount += enc.encode(contentPart.content || "").length;
}
});
}
});
}
if (typeof system === "string") {
tokenCount += enc.encode(system).length;
} else if (Array.isArray(system)) {
system.forEach((item) => {
if (item.type !== "text") return;
if (typeof item.text === "string") {
tokenCount += enc.encode(item.text).length;
} else if (Array.isArray(item.text)) {
item.text.forEach((textPart) => {
tokenCount += enc.encode(textPart || "").length;
});
}
});
}
if (tools) {
tools.forEach((tool) => {
if (tool.description) {
tokenCount += enc.encode(tool.name + tool.description).length;
}
if (tool.input_schema) {
tokenCount += enc.encode(JSON.stringify(tool.input_schema)).length;
}
});
}
const { provider, model } = getUseModel(req, tokenCount);
req.provider = provider;
req.body.model = model;
} catch (error) {
log("Error in router middleware:", error.message);
req.provider = "default";
req.body.model = req.config.OPENAI_MODEL;
} finally {
next();
}
};

View File

@@ -6,7 +6,7 @@ interface Server {
start: () => void;
}
export const createServer = (port: number): Server => {
export const createServer = async (port: number): Promise<Server> => {
const app = express();
app.use(express.json({ limit: "500mb" }));
return {

View File

@@ -1,39 +1,42 @@
import { spawn } from 'child_process';
import { isServiceRunning, incrementReferenceCount, decrementReferenceCount } from './processCheck';
import { closeService } from './close';
import { spawn } from "child_process";
import {
incrementReferenceCount,
decrementReferenceCount,
} from "./processCheck";
import { closeService } from "./close";
export async function executeCodeCommand(args: string[] = []) {
// Service check is now handled in cli.ts
// Set environment variables
const env = {
...process.env,
DISABLE_PROMPT_CACHING: "1",
ANTHROPIC_AUTH_TOKEN: "test",
ANTHROPIC_BASE_URL: `http://127.0.0.1:3456`,
API_TIMEOUT_MS: "600000",
};
// Set environment variables
const env = {
...process.env,
DISABLE_PROMPT_CACHING: '1',
ANTHROPIC_AUTH_TOKEN: 'test',
ANTHROPIC_BASE_URL: 'http://127.0.0.1:3456',
API_TIMEOUT_MS: '600000'
};
// Increment reference count when command starts
incrementReferenceCount();
// Increment reference count when command starts
incrementReferenceCount();
// Execute claude command
const claudeProcess = spawn("claude", args, {
env,
stdio: "inherit",
shell: true,
});
// Execute claude command
const claudeProcess = spawn('claude', args, {
env,
stdio: 'inherit',
shell: true
});
claudeProcess.on("error", (error) => {
console.error("Failed to start claude command:", error.message);
console.log(
"Make sure Claude Code is installed: npm install -g @anthropic-ai/claude-code"
);
decrementReferenceCount();
process.exit(1);
});
claudeProcess.on('error', (error) => {
console.error('Failed to start claude command:', error.message);
console.log('Make sure Claude Code is installed: npm install -g @anthropic-ai/claude-code');
decrementReferenceCount();
process.exit(1);
});
claudeProcess.on('close', (code) => {
decrementReferenceCount();
closeService()
process.exit(code || 0);
});
claudeProcess.on("close", (code) => {
decrementReferenceCount();
closeService();
process.exit(code || 0);
});
}

View File

@@ -13,6 +13,8 @@ export function getOpenAICommonOptions(): ClientOptions {
const options: ClientOptions = {};
if (process.env.PROXY_URL) {
options.httpAgent = new HttpsProxyAgent(process.env.PROXY_URL);
} else if (process.env.HTTPS_PROXY) {
options.httpAgent = new HttpsProxyAgent(process.env.HTTPS_PROXY);
}
return options;
}
@@ -78,6 +80,7 @@ export const writeConfigFile = async (config: any) => {
export const initConfig = async () => {
const config = await readConfigFile();
Object.assign(process.env, config);
return config;
};
export const createClient = (options: ClientOptions) => {

View File

@@ -1,7 +1,5 @@
import { existsSync, readFileSync, writeFileSync } from 'fs';
import { PID_FILE } from '../constants';
const REFERENCE_COUNT_FILE = '/tmp/claude-code-reference-count.txt';
import { PID_FILE, REFERENCE_COUNT_FILE } from '../constants';
export function incrementReferenceCount() {
let count = 0;