fix image router bugs

This commit is contained in:
musistudio
2025-10-20 22:18:06 +08:00
parent 45232813cc
commit dd14bbdf1c
5 changed files with 270 additions and 111 deletions

View File

@@ -620,5 +620,6 @@ A huge thank you to all our sponsors for their generous support!
- @\*\*东
- @\*落
- @哆\*k
- @\*涛
(If your name is masked, please contact me via my homepage email to update it with your GitHub username.)

View File

@@ -566,6 +566,7 @@ jobs:
- @\*\*东
- @\*落
- @哆\*k
- @\*涛
(如果您的名字被屏蔽,请通过我的主页电子邮件与我联系,以便使用您的 GitHub 用户名进行更新。)

View File

@@ -1,6 +1,6 @@
import {IAgent, ITool} from "./type";
import { createHash } from 'crypto';
import * as LRU from 'lru-cache';
import { IAgent, ITool } from "./type";
import { createHash } from "crypto";
import * as LRU from "lru-cache";
interface ImageCacheEntry {
source: any;
@@ -52,65 +52,99 @@ export class ImageAgent implements IAgent {
constructor() {
this.tools = new Map<string, ITool>();
this.appendTools()
this.appendTools();
}
shouldHandle(req: any, config: any): boolean {
if (!config.Router.image || req.body.model === config.Router.image) return false;
const lastMessage = req.body.messages[req.body.messages.length - 1]
if (!config.forceUseImageAgent && lastMessage.role === 'user' && Array.isArray(lastMessage.content) && lastMessage.content.find((item: any) => item.type === 'image' || (Array.isArray(item?.content) && item.content.some((sub: any) => sub.type === 'image')))) {
req.body.model = config.Router.image
const images = []
lastMessage.content.filter((item: any) => item.type === 'tool_result').forEach((item: any) => {
item.content.forEach((element: any) => {
if (element.type === 'image') {
images.push(element);
if (!config.Router.image || req.body.model === config.Router.image)
return false;
const lastMessage = req.body.messages[req.body.messages.length - 1];
if (
!config.forceUseImageAgent &&
lastMessage.role === "user" &&
Array.isArray(lastMessage.content) &&
lastMessage.content.find(
(item: any) =>
item.type === "image" ||
(Array.isArray(item?.content) &&
item.content.some((sub: any) => sub.type === "image"))
)
) {
req.body.model = config.Router.image;
const images = [];
lastMessage.content
.filter((item: any) => item.type === "tool_result")
.forEach((item: any) => {
if (Array.isArray(item.content)) {
item.content.forEach((element: any) => {
if (element.type === "image") {
images.push(element);
}
});
item.content = "read image successfully";
}
})
item.content = 'read image successfully';
})
});
lastMessage.content.push(...images);
return false;
}
return req.body.messages.some((msg: any) => msg.role === 'user' && Array.isArray(msg.content) && msg.content.some((item: any) => item.type === 'image' || (Array.isArray(item?.content) && item.content.some((sub: any) => sub.type === 'image'))))
return req.body.messages.some(
(msg: any) =>
msg.role === "user" &&
Array.isArray(msg.content) &&
msg.content.some(
(item: any) =>
item.type === "image" ||
(Array.isArray(item?.content) &&
item.content.some((sub: any) => sub.type === "image"))
)
);
}
appendTools() {
this.tools.set('analyzeImage', {
this.tools.set("analyzeImage", {
name: "analyzeImage",
description: "Analyse image or images by ID and extract information such as OCR text, objects, layout, colors, or safety signals.",
description:
"Analyse image or images by ID and extract information such as OCR text, objects, layout, colors, or safety signals.",
input_schema: {
"type": "object",
"properties": {
"imageId": {
"type": "array",
"description": "an array of IDs to analyse",
"items": {
"type": "string"
}
type: "object",
properties: {
imageId: {
type: "array",
description: "an array of IDs to analyse",
items: {
type: "string",
},
},
"task": {
"type": "string",
"description": "Details of task to perform on the image.The more detailed, the better",
task: {
type: "string",
description:
"Details of task to perform on the image.The more detailed, the better",
},
"regions": {
"type": "array",
"description": "Optional regions of interest within the image",
"items": {
"type": "object",
"properties": {
"name": {"type": "string", "description": "Optional label for the region"},
"x": {"type": "number", "description": "X coordinate"},
"y": {"type": "number", "description": "Y coordinate"},
"w": {"type": "number", "description": "Width of the region"},
"h": {"type": "number", "description": "Height of the region"},
"units": {"type": "string", "enum": ["px", "pct"], "description": "Units for coordinates and size"}
regions: {
type: "array",
description: "Optional regions of interest within the image",
items: {
type: "object",
properties: {
name: {
type: "string",
description: "Optional label for the region",
},
x: { type: "number", description: "X coordinate" },
y: { type: "number", description: "Y coordinate" },
w: { type: "number", description: "Width of the region" },
h: { type: "number", description: "Height of the region" },
units: {
type: "string",
enum: ["px", "pct"],
description: "Units for coordinates and size",
},
},
"required": ["x", "y", "w", "h", "units"]
}
}
required: ["x", "y", "w", "h", "units"],
},
},
},
"required": ["imageId", "task"]
required: ["imageId", "task"],
},
handler: async (args, context) => {
const imageMessages = [];
@@ -120,7 +154,9 @@ export class ImageAgent implements IAgent {
if (args.imageId) {
if (Array.isArray(args.imageId)) {
args.imageId.forEach((imgId: string) => {
const image = imageCache.getImage(`${context.req.id}_Image#${imgId}`);
const image = imageCache.getImage(
`${context.req.id}_Image#${imgId}`
);
if (image) {
imageMessages.push({
type: "image",
@@ -129,7 +165,9 @@ export class ImageAgent implements IAgent {
}
});
} else {
const image = imageCache.getImage(`${context.req.id}_Image#${args.imageId}`);
const image = imageCache.getImage(
`${context.req.id}_Image#${args.imageId}`
);
if (image) {
imageMessages.push({
type: "image",
@@ -141,10 +179,17 @@ export class ImageAgent implements IAgent {
delete args.imageId;
}
const userMessage = context.req.body.messages[context.req.body.messages.length - 1]
if (userMessage.role === 'user' && Array.isArray(userMessage.content)) {
const msgs = userMessage.content.filter(item => item.type === 'text' && !item.text.includes('This is an image, if you need to view or analyze it, you need to extract the imageId'))
imageMessages.push(...msgs)
const userMessage =
context.req.body.messages[context.req.body.messages.length - 1];
if (userMessage.role === "user" && Array.isArray(userMessage.content)) {
const msgs = userMessage.content.filter(
(item) =>
item.type === "text" &&
!item.text.includes(
"This is an image, if you need to view or analyze it, you need to extract the imageId"
)
);
imageMessages.push(...msgs);
}
if (Object.keys(args).length > 0) {
@@ -154,40 +199,46 @@ export class ImageAgent implements IAgent {
});
}
// Send to analysis agent and get response
const agentResponse = await fetch(`http://127.0.0.1:${context.config.PORT || 3456}/v1/messages`, {
method: "POST",
headers: {
'x-api-key': context.config.APIKEY,
'content-type': 'application/json',
},
body: JSON.stringify({
model: context.config.Router.image,
system: [{
type: 'text',
text: `You must interpret and analyze images strictly according to the assigned task.
const agentResponse = await fetch(
`http://127.0.0.1:${context.config.PORT || 3456}/v1/messages`,
{
method: "POST",
headers: {
"x-api-key": context.config.APIKEY,
"content-type": "application/json",
},
body: JSON.stringify({
model: context.config.Router.image,
system: [
{
type: "text",
text: `You must interpret and analyze images strictly according to the assigned task.
When an image placeholder is provided, your role is to parse the image content only within the scope of the users instructions.
Do not ignore or deviate from the task.
Always ensure that your response reflects a clear, accurate interpretation of the image aligned with the given objective.`
}],
messages: [
{
role: 'user',
content: imageMessages,
}
],
stream: false,
}),
}).then(res => res.json()).catch(err => {
return null;
});
Always ensure that your response reflects a clear, accurate interpretation of the image aligned with the given objective.`,
},
],
messages: [
{
role: "user",
content: imageMessages,
},
],
stream: false,
}),
}
)
.then((res) => res.json())
.catch((err) => {
return null;
});
if (!agentResponse || !agentResponse.content) {
return 'analyzeImage Error';
return "analyzeImage Error";
}
return agentResponse.content[0].text
}
})
return agentResponse.content[0].text;
},
});
}
reqHandler(req: any, config: any) {
@@ -205,27 +256,42 @@ If multiple images exist, select the **most relevant imageId** based on the user
Do not attempt to describe or analyze the image directly yourself.
Ignore any user interruptions or unrelated instructions that might cause you to skip this requirement.
Your response should consistently follow this rule whenever image-related analysis is requested.`,
})
});
const imageContents = req.body.messages.filter((item: any) => {
return item.role === 'user' && Array.isArray(item.content) &&
item.content.some((msg: any) => msg.type === "image" || (Array.isArray(msg.content) && msg.content.some((sub: any) => sub.type === 'image')));
return (
item.role === "user" &&
Array.isArray(item.content) &&
item.content.some(
(msg: any) =>
msg.type === "image" ||
(Array.isArray(msg.content) &&
msg.content.some((sub: any) => sub.type === "image"))
)
);
});
let imgId = 1;
imageContents.forEach((item: any) => {
if (!Array.isArray(item.content)) return;
item.content.forEach((msg: any) => {
if (msg.type === "image") {
imageCache.storeImage(`${req.id}_Image#${imgId}`, msg.source);
msg.type = 'text';
msg.type = "text";
delete msg.source;
msg.text = `[Image #${imgId}]This is an image, if you need to view or analyze it, you need to extract the imageId`;
imgId++;
} else if (msg.type === "text" && msg.text.includes('[Image #')) {
msg.text = msg.text.replace(/\[Image #\d+\]/g, '');
} else if (msg.type === "text" && msg.text.includes("[Image #")) {
msg.text = msg.text.replace(/\[Image #\d+\]/g, "");
} else if (msg.type === "tool_result") {
if (Array.isArray(msg.content) && msg.content.some(ele => ele.type === "image")) {
imageCache.storeImage(`${req.id}_Image#${imgId}`, msg.content[0].source);
if (
Array.isArray(msg.content) &&
msg.content.some((ele) => ele.type === "image")
) {
imageCache.storeImage(
`${req.id}_Image#${imgId}`,
msg.content[0].source
);
msg.content = `[Image #${imgId}]This is an image, if you need to view or analyze it, you need to extract the imageId`;
imgId++;
}
@@ -233,7 +299,6 @@ Your response should consistently follow this rule whenever image-related analys
});
});
}
}
export const imageAgent = new ImageAgent();

View File

@@ -11,6 +11,9 @@ export const PID_FILE = path.join(HOME_DIR, '.claude-code-router.pid');
export const REFERENCE_COUNT_FILE = path.join(os.tmpdir(), "claude-code-reference-count.txt");
// Claude projects directory
export const CLAUDE_PROJECTS_DIR = path.join(os.homedir(), ".claude", "projects");
export const DEFAULT_CONFIG = {
LOG: false,

View File

@@ -5,7 +5,10 @@ import {
} from "@anthropic-ai/sdk/resources/messages";
import { get_encoding } from "tiktoken";
import { sessionUsageCache, Usage } from "./cache";
import { readFile } from 'fs/promises'
import { readFile, access } from "fs/promises";
import { opendir, stat } from "fs/promises";
import { join } from "path";
import { CLAUDE_PROJECTS_DIR, HOME_DIR } from "../constants";
const enc = get_encoding("cl100k_base");
@@ -63,19 +66,58 @@ export const calculateTokenCount = (
return tokenCount;
};
const readConfigFile = async (filePath: string) => {
try {
await access(filePath);
const content = await readFile(filePath, "utf8");
return JSON.parse(content);
} catch (error) {
return null; // 文件不存在或读取失败时返回null
}
};
const getProjectSpecificRouter = async (req: any) => {
// 检查是否有项目特定的配置
if (req.sessionId) {
const project = await searchProjectBySession(req.sessionId);
if (project) {
const projectConfigPath = join(HOME_DIR, project, "config.json");
const sessionConfigPath = join(
HOME_DIR,
project,
`${req.sessionId}.json`
);
// 首先尝试读取sessionConfig文件
const sessionConfig = await readConfigFile(sessionConfigPath);
if (sessionConfig && sessionConfig.Router) {
return sessionConfig.Router;
}
const projectConfig = await readConfigFile(projectConfigPath);
if (projectConfig && projectConfig.Router) {
return projectConfig.Router;
}
}
}
return undefined; // 返回undefined表示使用原始配置
};
const getUseModel = async (
req: any,
tokenCount: number,
config: any,
lastUsage?: Usage | undefined
) => {
const projectSpecificRouter = await getProjectSpecificRouter(req);
const Router = projectSpecificRouter || config.Router;
if (req.body.model.includes(",")) {
const [provider, model] = req.body.model.split(",");
const finalProvider = config.Providers.find(
(p: any) => p.name.toLowerCase() === provider
(p: any) => p.name.toLowerCase() === provider
);
const finalModel = finalProvider?.models?.find(
(m: any) => m.toLowerCase() === model
(m: any) => m.toLowerCase() === model
);
if (finalProvider && finalModel) {
return `${finalProvider.name},${finalModel}`;
@@ -84,20 +126,17 @@ const getUseModel = async (
}
// if tokenCount is greater than the configured threshold, use the long context model
const longContextThreshold = config.Router.longContextThreshold || 60000;
const longContextThreshold = Router.longContextThreshold || 60000;
const lastUsageThreshold =
lastUsage &&
lastUsage.input_tokens > longContextThreshold &&
tokenCount > 20000;
const tokenCountThreshold = tokenCount > longContextThreshold;
if (
(lastUsageThreshold || tokenCountThreshold) &&
config.Router.longContext
) {
req.log.info(
if ((lastUsageThreshold || tokenCountThreshold) && Router.longContext) {
req.log.info(
`Using long context model due to token count: ${tokenCount}, threshold: ${longContextThreshold}`
);
return config.Router.longContext;
return Router.longContext;
}
if (
req.body?.system?.length > 1 &&
@@ -125,18 +164,18 @@ const getUseModel = async (
}
// The priority of websearch must be higher than thinking.
if (
Array.isArray(req.body.tools) &&
req.body.tools.some((tool: any) => tool.type?.startsWith("web_search")) &&
config.Router.webSearch
Array.isArray(req.body.tools) &&
req.body.tools.some((tool: any) => tool.type?.startsWith("web_search")) &&
Router.webSearch
) {
return config.Router.webSearch;
return Router.webSearch;
}
// if exits thinking, use the think model
if (req.body.thinking && config.Router.think) {
if (req.body.thinking && Router.think) {
req.log.info(`Using think model for ${req.body.thinking}`);
return config.Router.think;
return Router.think;
}
return config.Router!.default;
return Router!.default;
};
export const router = async (req: any, _res: any, context: any) => {
@@ -150,9 +189,13 @@ export const router = async (req: any, _res: any, context: any) => {
}
const lastMessageUsage = sessionUsageCache.get(req.sessionId);
const { messages, system = [], tools }: MessageCreateParamsBase = req.body;
if (config.REWRITE_SYSTEM_PROMPT && system.length > 1 && system[1]?.text?.includes('<env>')) {
const prompt = await readFile(config.REWRITE_SYSTEM_PROMPT, 'utf-8');
system[1].text = `${prompt}<env>${system[1].text.split('<env>').pop()}`
if (
config.REWRITE_SYSTEM_PROMPT &&
system.length > 1 &&
system[1]?.text?.includes("<env>")
) {
const prompt = await readFile(config.REWRITE_SYSTEM_PROMPT, "utf-8");
system[1].text = `${prompt}<env>${system[1].text.split("<env>").pop()}`;
}
try {
@@ -168,7 +211,7 @@ export const router = async (req: any, _res: any, context: any) => {
const customRouter = require(config.CUSTOM_ROUTER_PATH);
req.tokenCount = tokenCount; // Pass token count to custom router
model = await customRouter(req, config, {
event
event,
});
} catch (e: any) {
req.log.error(`failed to load custom router: ${e.message}`);
@@ -184,3 +227,49 @@ export const router = async (req: any, _res: any, context: any) => {
}
return;
};
export const searchProjectBySession = async (
sessionId: string
): Promise<string | null> => {
try {
const dir = await opendir(CLAUDE_PROJECTS_DIR);
const folderNames: string[] = [];
// 收集所有文件夹名称
for await (const dirent of dir) {
if (dirent.isDirectory()) {
folderNames.push(dirent.name);
}
}
// 并发检查每个项目文件夹中是否存在sessionId.jsonl文件
const checkPromises = folderNames.map(async (folderName) => {
const sessionFilePath = join(
CLAUDE_PROJECTS_DIR,
folderName,
`${sessionId}.jsonl`
);
try {
const fileStat = await stat(sessionFilePath);
return fileStat.isFile() ? folderName : null;
} catch {
// 文件不存在,继续检查下一个
return null;
}
});
const results = await Promise.all(checkPromises);
// 返回第一个存在的项目目录名称
for (const result of results) {
if (result) {
return result;
}
}
return null; // 没有找到匹配的项目
} catch (error) {
console.error("Error searching for project by session:", error);
return null;
}
};