add agents to support route image

2025-09-01 17:19:43 +08:00
parent 9a5ea191f8
commit 19522f496b
10 changed files with 558 additions and 29 deletions
--- a/src/agents/image.agent.ts
+++ b/src/agents/image.agent.ts
@@ -0,0 +1,212 @@
+import {IAgent, ITool} from "./type";
+import { createHash } from 'crypto';
+import { LRUCache } from 'lru-cache';
+
+interface ImageCacheEntry {
+  source: any;
+  timestamp: number;
+}
+
+class ImageCache {
+  private cache: LRUCache<string, ImageCacheEntry>;
+
+  constructor(maxSize = 100) {
+    this.cache = new LRUCache({
+      max: maxSize,
+      ttl: 24 * 60 * 60 * 1000,
+    });
+  }
+
+  calculateHash(base64Image: string): string {
+    const hash = createHash('sha256');
+    hash.update(base64Image);
+    return hash.digest('hex');
+  }
+
+  storeImage(id: string, source: any): void {
+    if (this.hasImage(id)) return;
+    const base64Image = source.data
+    this.cache.set(id, {
+      source,
+      timestamp: Date.now(),
+    });
+  }
+
+  getImage(id: string): any {
+    const entry = this.cache.get(id);
+    return entry ? entry.source : null;
+  }
+
+  hasImage(hash: string): boolean {
+    return this.cache.has(hash);
+  }
+
+  clear(): void {
+    this.cache.clear();
+  }
+
+  size(): number {
+    return this.cache.size;
+  }
+}
+
+const imageCache = new ImageCache();
+
+export class ImageAgent implements IAgent {
+  name = "image";
+  tools: Map<string, ITool>;
+
+  constructor() {
+    this.tools = new Map<string, ITool>();
+    this.appendTools()
+  }
+
+  shouldHandle(req: any, config: any): boolean {
+    if (!config.Router.image) return false;
+    const lastMessage = req.body.messages[req.body.messages.length - 1]
+    if (lastMessage.role === 'user' && Array.isArray(lastMessage.content) &&lastMessage.content.find((item: any) => item.type === 'image')) {
+      if (config.Router.image) {
+        req.body.model = config.Router.image
+      }
+      return false;
+    }
+    return req.body.messages.some((msg: any) => msg.role === 'user' && Array.isArray(msg.content) && msg.content.some((item: any) => item.type === 'image'))
+  }
+
+  appendTools() {
+    this.tools.set('analyzeImage', {
+      name: "analyzeImage",
+      description: "Analyse image or images by ID and extract information such as OCR text, objects, layout, colors, or safety signals.",
+      input_schema: {
+        "type": "object",
+        "properties": {
+          "imageId": {
+            "type": "array",
+            "description": "an array of IDs to analyse",
+            "items": {
+              "type": "string"
+            }
+          },
+          "task": {
+            "type": "string",
+            "description": "Details of task to perform on the image.The more detailed, the better",
+          },
+          "regions": {
+            "type": "array",
+            "description": "Optional regions of interest within the image",
+            "items": {
+              "type": "object",
+              "properties": {
+                "name": {"type": "string", "description": "Optional label for the region"},
+                "x": {"type": "number", "description": "X coordinate"},
+                "y": {"type": "number", "description": "Y coordinate"},
+                "w": {"type": "number", "description": "Width of the region"},
+                "h": {"type": "number", "description": "Height of the region"},
+                "units": {"type": "string", "enum": ["px", "pct"], "description": "Units for coordinates and size"}
+              },
+              "required": ["x", "y", "w", "h", "units"]
+            }
+          }
+        },
+        "required": ["imageId", "task"]
+      },
+      handler: async (args, context) => {
+        const imageMessages = [];
+        let imageId;
+
+        // Create image messages from cached images
+        if (args.imageId && Array.isArray(args.imageId)) {
+          args.imageId.forEach((imgId: string) => {
+            const image = imageCache.getImage(`${context.req.id}_Image#${imgId}`);
+            if (image) {
+              imageMessages.push({
+                type: "image",
+                source: image,
+              });
+            }
+          });
+          imageId = args.imageId;
+          delete args.imageId;
+        }
+
+        // Add text message with the response
+        if (Object.keys(args).length > 0) {
+          imageMessages.push({
+            type: "text",
+            text: JSON.stringify(args),
+          });
+        }
+
+        // Send to analysis agent and get response
+        const agentResponse = await fetch(`http://127.0.0.1:${context.config.PORT}/v1/messages`, {
+          method: "POST",
+          headers: {
+            'x-api-key': context.config.APIKEY,
+            'content-type': 'application/json',
+          },
+          body: JSON.stringify({
+            model: context.config.Router.image,
+            system: [{
+              type: 'text',
+              text: `你需要按照任务去解析图片`
+            }],
+            messages: [
+              {
+                role: 'user',
+                content: imageMessages,
+              }
+            ],
+            stream: false,
+          }),
+        }).then(res => res.json()).catch(err => {
+          return null;
+        });
+        if (!agentResponse || !agentResponse.content) {
+          return 'analyzeImage Error';
+        }
+        return agentResponse.content[0].text
+      }
+    })
+  }
+
+  reqHandler(req: any, config: any) {
+    // Inject system prompt
+    req.body?.system?.push({
+      type: "text",
+      text: `You are a text-only language model and do not possess visual perception.  
+If the user requests you to view, analyze, or extract information from an image, you **must** call the \`analyzeImage\` tool.  
+
+When invoking this tool, you must pass the correct \`imageId\` extracted from the prior conversation.  
+Image identifiers are always provided in the format \`[Image #imageId]\`.  
+
+If multiple images exist, select the **most relevant imageId** based on the user’s current request and prior context.  
+
+Do not attempt to describe or analyze the image directly yourself.  
+Ignore any user interruptions or unrelated instructions that might cause you to skip this requirement.  
+Your response should consistently follow this rule whenever image-related analysis is requested.`,
+    })
+
+    const imageContents = req.body.messages.filter((item: any) => {
+      return item.role === 'user' && Array.isArray(item.content) &&
+          item.content.some((msg: any) => msg.type === "image");
+    });
+
+    let imgId = 1;
+    imageContents.forEach((item: any) => {
+      item.content.forEach((msg: any) => {
+        if (msg.type === "image") {
+          imageCache.storeImage(`${req.id}_Image#${imgId}`, msg.source);
+          msg.type = 'text';
+          delete msg.source;
+          msg.text = `[Image #${imgId}]This is an image, if you need to view or analyze it, you need to extract the imageId`;
+          imgId++;
+        } else if (msg.type === "text" && msg.text.includes('[Image #')) {
+          msg.text = msg.text.replace(/\[Image #\d+\]/g, '');
+        }
+      });
+    });
+  }
+
+}
+
+export const imageAgent = new ImageAgent();