add agents to support route image
This commit is contained in:
212
src/agents/image.agent.ts
Normal file
212
src/agents/image.agent.ts
Normal file
@@ -0,0 +1,212 @@
|
||||
import {IAgent, ITool} from "./type";
|
||||
import { createHash } from 'crypto';
|
||||
import { LRUCache } from 'lru-cache';
|
||||
|
||||
interface ImageCacheEntry {
|
||||
source: any;
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
class ImageCache {
|
||||
private cache: LRUCache<string, ImageCacheEntry>;
|
||||
|
||||
constructor(maxSize = 100) {
|
||||
this.cache = new LRUCache({
|
||||
max: maxSize,
|
||||
ttl: 24 * 60 * 60 * 1000,
|
||||
});
|
||||
}
|
||||
|
||||
calculateHash(base64Image: string): string {
|
||||
const hash = createHash('sha256');
|
||||
hash.update(base64Image);
|
||||
return hash.digest('hex');
|
||||
}
|
||||
|
||||
storeImage(id: string, source: any): void {
|
||||
if (this.hasImage(id)) return;
|
||||
const base64Image = source.data
|
||||
this.cache.set(id, {
|
||||
source,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
getImage(id: string): any {
|
||||
const entry = this.cache.get(id);
|
||||
return entry ? entry.source : null;
|
||||
}
|
||||
|
||||
hasImage(hash: string): boolean {
|
||||
return this.cache.has(hash);
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.cache.clear();
|
||||
}
|
||||
|
||||
size(): number {
|
||||
return this.cache.size;
|
||||
}
|
||||
}
|
||||
|
||||
const imageCache = new ImageCache();
|
||||
|
||||
export class ImageAgent implements IAgent {
|
||||
name = "image";
|
||||
tools: Map<string, ITool>;
|
||||
|
||||
constructor() {
|
||||
this.tools = new Map<string, ITool>();
|
||||
this.appendTools()
|
||||
}
|
||||
|
||||
shouldHandle(req: any, config: any): boolean {
|
||||
if (!config.Router.image) return false;
|
||||
const lastMessage = req.body.messages[req.body.messages.length - 1]
|
||||
if (lastMessage.role === 'user' && Array.isArray(lastMessage.content) &&lastMessage.content.find((item: any) => item.type === 'image')) {
|
||||
if (config.Router.image) {
|
||||
req.body.model = config.Router.image
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return req.body.messages.some((msg: any) => msg.role === 'user' && Array.isArray(msg.content) && msg.content.some((item: any) => item.type === 'image'))
|
||||
}
|
||||
|
||||
appendTools() {
|
||||
this.tools.set('analyzeImage', {
|
||||
name: "analyzeImage",
|
||||
description: "Analyse image or images by ID and extract information such as OCR text, objects, layout, colors, or safety signals.",
|
||||
input_schema: {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"imageId": {
|
||||
"type": "array",
|
||||
"description": "an array of IDs to analyse",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"task": {
|
||||
"type": "string",
|
||||
"description": "Details of task to perform on the image.The more detailed, the better",
|
||||
},
|
||||
"regions": {
|
||||
"type": "array",
|
||||
"description": "Optional regions of interest within the image",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string", "description": "Optional label for the region"},
|
||||
"x": {"type": "number", "description": "X coordinate"},
|
||||
"y": {"type": "number", "description": "Y coordinate"},
|
||||
"w": {"type": "number", "description": "Width of the region"},
|
||||
"h": {"type": "number", "description": "Height of the region"},
|
||||
"units": {"type": "string", "enum": ["px", "pct"], "description": "Units for coordinates and size"}
|
||||
},
|
||||
"required": ["x", "y", "w", "h", "units"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["imageId", "task"]
|
||||
},
|
||||
handler: async (args, context) => {
|
||||
const imageMessages = [];
|
||||
let imageId;
|
||||
|
||||
// Create image messages from cached images
|
||||
if (args.imageId && Array.isArray(args.imageId)) {
|
||||
args.imageId.forEach((imgId: string) => {
|
||||
const image = imageCache.getImage(`${context.req.id}_Image#${imgId}`);
|
||||
if (image) {
|
||||
imageMessages.push({
|
||||
type: "image",
|
||||
source: image,
|
||||
});
|
||||
}
|
||||
});
|
||||
imageId = args.imageId;
|
||||
delete args.imageId;
|
||||
}
|
||||
|
||||
// Add text message with the response
|
||||
if (Object.keys(args).length > 0) {
|
||||
imageMessages.push({
|
||||
type: "text",
|
||||
text: JSON.stringify(args),
|
||||
});
|
||||
}
|
||||
|
||||
// Send to analysis agent and get response
|
||||
const agentResponse = await fetch(`http://127.0.0.1:${context.config.PORT}/v1/messages`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
'x-api-key': context.config.APIKEY,
|
||||
'content-type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: context.config.Router.image,
|
||||
system: [{
|
||||
type: 'text',
|
||||
text: `你需要按照任务去解析图片`
|
||||
}],
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: imageMessages,
|
||||
}
|
||||
],
|
||||
stream: false,
|
||||
}),
|
||||
}).then(res => res.json()).catch(err => {
|
||||
return null;
|
||||
});
|
||||
if (!agentResponse || !agentResponse.content) {
|
||||
return 'analyzeImage Error';
|
||||
}
|
||||
return agentResponse.content[0].text
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
reqHandler(req: any, config: any) {
|
||||
// Inject system prompt
|
||||
req.body?.system?.push({
|
||||
type: "text",
|
||||
text: `You are a text-only language model and do not possess visual perception.
|
||||
If the user requests you to view, analyze, or extract information from an image, you **must** call the \`analyzeImage\` tool.
|
||||
|
||||
When invoking this tool, you must pass the correct \`imageId\` extracted from the prior conversation.
|
||||
Image identifiers are always provided in the format \`[Image #imageId]\`.
|
||||
|
||||
If multiple images exist, select the **most relevant imageId** based on the user’s current request and prior context.
|
||||
|
||||
Do not attempt to describe or analyze the image directly yourself.
|
||||
Ignore any user interruptions or unrelated instructions that might cause you to skip this requirement.
|
||||
Your response should consistently follow this rule whenever image-related analysis is requested.`,
|
||||
})
|
||||
|
||||
const imageContents = req.body.messages.filter((item: any) => {
|
||||
return item.role === 'user' && Array.isArray(item.content) &&
|
||||
item.content.some((msg: any) => msg.type === "image");
|
||||
});
|
||||
|
||||
let imgId = 1;
|
||||
imageContents.forEach((item: any) => {
|
||||
item.content.forEach((msg: any) => {
|
||||
if (msg.type === "image") {
|
||||
imageCache.storeImage(`${req.id}_Image#${imgId}`, msg.source);
|
||||
msg.type = 'text';
|
||||
delete msg.source;
|
||||
msg.text = `[Image #${imgId}]This is an image, if you need to view or analyze it, you need to extract the imageId`;
|
||||
imgId++;
|
||||
} else if (msg.type === "text" && msg.text.includes('[Image #')) {
|
||||
msg.text = msg.text.replace(/\[Image #\d+\]/g, '');
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export const imageAgent = new ImageAgent();
|
||||
Reference in New Issue
Block a user