feat: implement modular provider architecture with Codex CLI support

Implements a flexible provider pattern that supports both Claude Agent SDK
and OpenAI Codex CLI, enabling future expansion to other AI providers
(Cursor, OpenCode, etc.) with minimal changes.

## Architecture Changes

### New Provider System
- Created provider abstraction layer with BaseProvider interface
- Model-based routing: model prefix determines provider
  - `gpt-*`, `o*` → CodexProvider (subprocess CLI)
  - `claude-*`, `opus/sonnet/haiku` → ClaudeProvider (SDK)
- Providers implement common ExecuteOptions interface

### New Files Created
- `providers/types.ts` - Shared interfaces (ExecuteOptions, ProviderMessage, etc.)
- `providers/base-provider.ts` - Abstract base class
- `providers/claude-provider.ts` - Claude Agent SDK wrapper
- `providers/codex-provider.ts` - Codex CLI subprocess executor
- `providers/codex-cli-detector.ts` - Installation & auth detection
- `providers/codex-config-manager.ts` - TOML config management
- `providers/provider-factory.ts` - Model-based provider routing
- `lib/subprocess-manager.ts` - Reusable subprocess utilities

## Features Implemented

### Codex CLI Integration
- Spawns Codex CLI as subprocess with JSONL output
- Converts Codex events to Claude SDK-compatible format
- Supports both `codex login` and OPENAI_API_KEY auth methods
- Handles: reasoning, messages, commands, todos, file changes
- Extracts text from content blocks for non-vision CLI

### Conversation History
- Added conversationHistory support to ExecuteOptions
- ClaudeProvider: yields previous messages to SDK
- CodexProvider: prepends history as text context
- Follow-up prompts maintain full conversation context

### Image Upload Support
- Images embedded as base64 for vision models
- Image paths appended to prompt text for Read tool access
- Auto-mode: copies images to feature folder
- Follow-up: combines original + new images
- Updates feature.json with image metadata

### Session Model Persistence
- Added `model` field to Session and SessionMetadata
- Sessions remember model preference across interactions
- API endpoints accept model parameter
- Auto-mode respects feature's model setting

## Modified Files

### Services
- `agent-service.ts`:
  - Added conversation history building
  - Uses ProviderFactory instead of direct SDK calls
  - Appends image paths to prompts
  - Added model parameter and persistence

- `auto-mode-service.ts`:
  - Removed OpenAI model block restriction
  - Uses ProviderFactory for all models
  - Added image support in buildFeaturePrompt
  - Follow-up: loads context, copies images, updates feature.json
  - Returns to waiting_approval after follow-up

### Routes
- `agent.ts`: Added model parameter to /send endpoint
- `sessions.ts`: Added model field to create/update
- `models.ts`: Added Codex models (gpt-5.2, gpt-5.1-codex*)

### Configuration
- `.env.example`: Added OPENAI_API_KEY and CODEX_CLI_PATH
- `.gitignore`: Added provider-specific ignores

## Bug Fixes
- Fixed image path resolution (relative → absolute)
- Fixed Codex empty prompt when images attached
- Fixed follow-up status management (in_progress → waiting_approval)
- Fixed follow-up images not appearing in prompt text
- Removed OpenAI model restrictions in auto-mode

## Testing Notes
- Codex CLI authentication verified with both methods
- Image uploads work for both Claude (vision) and Codex (Read tool)
- Follow-up prompts maintain full context
- Conversation history persists across turns
- Model switching works per-session

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Kacper
2025-12-13 03:45:41 +01:00
parent 55603cb5c7
commit a65b16cbae
15 changed files with 2404 additions and 89 deletions

View File

@@ -1,12 +1,14 @@
/**
* Agent Service - Runs Claude agents via the Claude Agent SDK
* Agent Service - Runs AI agents via provider architecture
* Manages conversation sessions and streams responses via WebSocket
*/
import { query, AbortError, type Options } from "@anthropic-ai/claude-agent-sdk";
import { AbortError } from "@anthropic-ai/claude-agent-sdk";
import path from "path";
import fs from "fs/promises";
import type { EventEmitter } from "../lib/events.js";
import { ProviderFactory } from "../providers/provider-factory.js";
import type { ExecuteOptions } from "../providers/types.js";
interface Message {
id: string;
@@ -26,6 +28,7 @@ interface Session {
isRunning: boolean;
abortController: AbortController | null;
workingDirectory: string;
model?: string;
}
interface SessionMetadata {
@@ -37,6 +40,7 @@ interface SessionMetadata {
updatedAt: string;
archived?: boolean;
tags?: string[];
model?: string;
}
export class AgentService {
@@ -91,11 +95,13 @@ export class AgentService {
message,
workingDirectory,
imagePaths,
model,
}: {
sessionId: string;
message: string;
workingDirectory?: string;
imagePaths?: string[];
model?: string;
}) {
const session = this.sessions.get(sessionId);
if (!session) {
@@ -106,6 +112,12 @@ export class AgentService {
throw new Error("Agent is already processing a message");
}
// Update session model if provided
if (model) {
session.model = model;
await this.updateSession(sessionId, { model });
}
// Read images and convert to base64
const images: Message["images"] = [];
if (imagePaths && imagePaths.length > 0) {
@@ -143,6 +155,12 @@ export class AgentService {
timestamp: new Date().toISOString(),
};
// Build conversation history from existing messages BEFORE adding current message
const conversationHistory = session.messages.map((msg) => ({
role: msg.role,
content: msg.content,
}));
session.messages.push(userMessage);
session.isRunning = true;
session.abortController = new AbortController();
@@ -156,11 +174,23 @@ export class AgentService {
await this.saveSession(sessionId, session.messages);
try {
const options: Options = {
model: "claude-opus-4-5-20251101",
// Use session model, parameter model, or default
const effectiveModel = model || session.model || "claude-opus-4-5-20251101";
// Get provider for this model
const provider = ProviderFactory.getProviderForModel(effectiveModel);
console.log(
`[AgentService] Using provider "${provider.getName()}" for model "${effectiveModel}"`
);
// Build options for provider
const options: ExecuteOptions = {
prompt: "", // Will be set below based on images
model: effectiveModel,
cwd: workingDirectory || session.workingDirectory,
systemPrompt: this.getSystemPrompt(),
maxTurns: 20,
cwd: workingDirectory || session.workingDirectory,
allowedTools: [
"Read",
"Write",
@@ -171,23 +201,28 @@ export class AgentService {
"WebSearch",
"WebFetch",
],
permissionMode: "acceptEdits",
sandbox: {
enabled: true,
autoAllowBashIfSandboxed: true,
},
abortController: session.abortController!,
conversationHistory: conversationHistory.length > 0 ? conversationHistory : undefined,
};
// Build prompt content
let promptContent: string | Array<{ type: string; text?: string; source?: object }> =
message;
// Append image paths to prompt text (like old implementation)
if (imagePaths && imagePaths.length > 0) {
let enhancedMessage = message;
// Append image file paths to the message text
enhancedMessage += "\n\nAttached images:\n";
for (const imagePath of imagePaths) {
enhancedMessage += `- ${imagePath}\n`;
}
const contentBlocks: Array<{ type: string; text?: string; source?: object }> = [];
if (message && message.trim()) {
contentBlocks.push({ type: "text", text: message });
if (enhancedMessage && enhancedMessage.trim()) {
contentBlocks.push({ type: "text", text: enhancedMessage });
}
for (const imagePath of imagePaths) {
@@ -219,25 +254,16 @@ export class AgentService {
if (contentBlocks.length > 1 || contentBlocks[0]?.type === "image") {
promptContent = contentBlocks;
} else {
promptContent = enhancedMessage;
}
}
// Build payload
const promptPayload = Array.isArray(promptContent)
? (async function* () {
yield {
type: "user" as const,
session_id: "",
message: {
role: "user" as const,
content: promptContent,
},
parent_tool_use_id: null,
};
})()
: promptContent;
// Set the prompt in options
options.prompt = promptContent;
const stream = query({ prompt: promptPayload, options });
// Execute via provider
const stream = provider.executeQuery(options);
let currentAssistantMessage: Message | null = null;
let responseText = "";
@@ -245,7 +271,7 @@ export class AgentService {
for await (const msg of stream) {
if (msg.type === "assistant") {
if (msg.message.content) {
if (msg.message?.content) {
for (const block of msg.message.content) {
if (block.type === "text") {
responseText += block.text;
@@ -270,7 +296,7 @@ export class AgentService {
});
} else if (block.type === "tool_use") {
const toolUse = {
name: block.name,
name: block.name || "unknown",
input: block.input,
};
toolUses.push(toolUse);
@@ -450,7 +476,8 @@ export class AgentService {
async createSession(
name: string,
projectPath?: string,
workingDirectory?: string
workingDirectory?: string,
model?: string
): Promise<SessionMetadata> {
const sessionId = this.generateId();
const metadata = await this.loadMetadata();
@@ -462,6 +489,7 @@ export class AgentService {
workingDirectory: workingDirectory || projectPath || process.cwd(),
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
model,
};
metadata[sessionId] = session;
@@ -470,6 +498,16 @@ export class AgentService {
return session;
}
async setSessionModel(sessionId: string, model: string): Promise<boolean> {
const session = this.sessions.get(sessionId);
if (session) {
session.model = model;
await this.updateSession(sessionId, { model });
return true;
}
return false;
}
async updateSession(
sessionId: string,
updates: Partial<SessionMetadata>

View File

@@ -9,7 +9,9 @@
* - Verification and merge workflows
*/
import { query, AbortError, type Options } from "@anthropic-ai/claude-agent-sdk";
import { AbortError } from "@anthropic-ai/claude-agent-sdk";
import { ProviderFactory } from "../providers/provider-factory.js";
import type { ExecuteOptions } from "../providers/types.js";
import { exec } from "child_process";
import { promisify } from "util";
import path from "path";
@@ -33,6 +35,7 @@ interface Feature {
priority?: number;
spec?: string;
model?: string; // Model to use for this feature
imagePaths?: Array<string | { path: string; filename?: string; mimeType?: string; [key: string]: unknown }>;
}
/**
@@ -237,12 +240,17 @@ export class AutoModeService {
// Build the prompt
const prompt = this.buildFeaturePrompt(feature);
// Extract image paths from feature
const imagePaths = feature.imagePaths?.map((img) =>
typeof img === "string" ? img : img.path
);
// Get model from feature
const model = getModelString(feature);
console.log(`[AutoMode] Executing feature ${featureId} with model: ${model}`);
// Run the agent with the feature's model
await this.runAgent(workDir, featureId, prompt, abortController, undefined, model);
// Run the agent with the feature's model and images
await this.runAgent(workDir, featureId, prompt, abortController, imagePaths, model);
// Mark as waiting_approval for user review
await this.updateFeatureStatus(projectPath, featureId, "waiting_approval");
@@ -377,7 +385,126 @@ export class AutoModeService {
const model = feature ? getModelString(feature) : MODEL_MAP.opus;
console.log(`[AutoMode] Follow-up for feature ${featureId} using model: ${model}`);
await this.runAgent(workDir, featureId, prompt, abortController, imagePaths, model);
// Update feature status to in_progress
await this.updateFeatureStatus(projectPath, featureId, "in_progress");
// Copy follow-up images to feature folder
const copiedImagePaths: string[] = [];
if (imagePaths && imagePaths.length > 0) {
const featureImagesDir = path.join(
projectPath,
".automaker",
"features",
featureId,
"images"
);
await fs.mkdir(featureImagesDir, { recursive: true });
for (const imagePath of imagePaths) {
try {
// Get the filename from the path
const filename = path.basename(imagePath);
const destPath = path.join(featureImagesDir, filename);
// Copy the image
await fs.copyFile(imagePath, destPath);
// Store the relative path (like FeatureLoader does)
const relativePath = path.join(
".automaker",
"features",
featureId,
"images",
filename
);
copiedImagePaths.push(relativePath);
} catch (error) {
console.error(`[AutoMode] Failed to copy follow-up image ${imagePath}:`, error);
}
}
}
// Update feature object with new follow-up images BEFORE building prompt
if (copiedImagePaths.length > 0 && feature) {
const currentImagePaths = feature.imagePaths || [];
const newImagePaths = copiedImagePaths.map((p) => ({
path: p,
filename: path.basename(p),
mimeType: "image/png", // Default, could be improved
}));
feature.imagePaths = [...currentImagePaths, ...newImagePaths];
}
// Load previous agent output for context
const outputPath = path.join(
workDir,
".automaker",
"features",
featureId,
"agent-output.md"
);
let previousContext = "";
try {
previousContext = await fs.readFile(outputPath, "utf-8");
} catch {
// No previous context
}
// Build follow-up prompt with context (feature now includes new images)
let followUpPrompt = prompt;
if (previousContext) {
followUpPrompt = `## Follow-up Request
${this.buildFeaturePrompt(feature!)}
## Previous Work
The following is the output from the previous implementation:
${previousContext}
---
## New Instructions
${prompt}
Please continue from where you left off and address the new instructions above.`;
}
// Combine original feature images with new follow-up images
const allImagePaths: string[] = [];
// Add all images from feature (now includes both original and new)
if (feature?.imagePaths) {
const allPaths = feature.imagePaths.map((img) =>
typeof img === "string" ? img : img.path
);
allImagePaths.push(...allPaths);
}
// Save updated feature.json with new images
if (copiedImagePaths.length > 0 && feature) {
const featurePath = path.join(
projectPath,
".automaker",
"features",
featureId,
"feature.json"
);
try {
await fs.writeFile(featurePath, JSON.stringify(feature, null, 2));
} catch (error) {
console.error(`[AutoMode] Failed to save feature.json:`, error);
}
}
await this.runAgent(workDir, featureId, followUpPrompt, abortController, allImagePaths.length > 0 ? allImagePaths : undefined, model);
// Mark as waiting_approval for user review
await this.updateFeatureStatus(projectPath, featureId, "waiting_approval");
this.emitAutoModeEvent("auto_mode_feature_complete", {
featureId,
@@ -544,23 +671,25 @@ export class AutoModeService {
Format your response as a structured markdown document.`;
try {
const options: Options = {
const provider = ProviderFactory.getProviderForModel("claude-sonnet-4-20250514");
const options: ExecuteOptions = {
prompt,
model: "claude-sonnet-4-20250514",
maxTurns: 5,
cwd: projectPath,
allowedTools: ["Read", "Glob", "Grep"],
permissionMode: "acceptEdits",
abortController,
};
const stream = query({ prompt, options });
const stream = provider.executeQuery(options);
let analysisResult = "";
for await (const msg of stream) {
if (msg.type === "assistant" && msg.message.content) {
if (msg.type === "assistant" && msg.message?.content) {
for (const block of msg.message.content) {
if (block.type === "text") {
analysisResult = block.text;
analysisResult = block.text || "";
this.emitAutoModeEvent("auto_mode_progress", {
featureId: analysisFeatureId,
content: block.text,
@@ -736,6 +865,27 @@ ${feature.spec}
`;
}
// Add images note (like old implementation)
if (feature.imagePaths && feature.imagePaths.length > 0) {
const imagesList = feature.imagePaths
.map((img, idx) => {
const path = typeof img === "string" ? img : img.path;
const filename = typeof img === "string" ? path.split("/").pop() : img.filename || path.split("/").pop();
const mimeType = typeof img === "string" ? "image/*" : img.mimeType || "image/*";
return ` ${idx + 1}. ${filename} (${mimeType})\n Path: ${path}`;
})
.join("\n");
prompt += `
**📎 Context Images Attached:**
The user has attached ${feature.imagePaths.length} image(s) for context. These images are provided both visually (in the initial message) and as files you can read:
${imagesList}
You can use the Read tool to view these images at any time during implementation. Review them carefully before implementing.
`;
}
prompt += `
## Instructions
@@ -761,17 +911,62 @@ When done, summarize what you implemented and any notes for the developer.`;
): Promise<void> {
const finalModel = model || MODEL_MAP.opus;
console.log(`[AutoMode] runAgent called for feature ${featureId} with model: ${finalModel}`);
// Check if this is an OpenAI/Codex model - Claude Agent SDK doesn't support these
if (finalModel.startsWith("gpt-") || finalModel.startsWith("o")) {
const errorMessage = `OpenAI/Codex models (like "${finalModel}") are not yet supported in server mode. ` +
`Please use a Claude model (opus, sonnet, or haiku) instead. ` +
`OpenAI/Codex models are only supported in the Electron app.`;
console.error(`[AutoMode] ${errorMessage}`);
throw new Error(errorMessage);
// Get provider for this model
const provider = ProviderFactory.getProviderForModel(finalModel);
console.log(
`[AutoMode] Using provider "${provider.getName()}" for model "${finalModel}"`
);
// Build prompt content with images (like AgentService)
let promptContent: string | Array<{ type: string; text?: string; source?: object }> = prompt;
if (imagePaths && imagePaths.length > 0) {
const contentBlocks: Array<{ type: string; text?: string; source?: object }> = [];
// Add text block first
contentBlocks.push({ type: "text", text: prompt });
// Add image blocks (for vision models)
for (const imagePath of imagePaths) {
try {
// Make path absolute by prepending workDir if it's relative
const absolutePath = path.isAbsolute(imagePath)
? imagePath
: path.join(workDir, imagePath);
const imageBuffer = await fs.readFile(absolutePath);
const base64Data = imageBuffer.toString("base64");
const ext = path.extname(imagePath).toLowerCase();
const mimeTypeMap: Record<string, string> = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
};
const mediaType = mimeTypeMap[ext] || "image/png";
contentBlocks.push({
type: "image",
source: {
type: "base64",
media_type: mediaType,
data: base64Data,
},
});
} catch (error) {
console.error(`[AutoMode] Failed to load image ${imagePath}:`, error);
}
}
promptContent = contentBlocks;
}
const options: Options = {
const options: ExecuteOptions = {
prompt: promptContent,
model: finalModel,
maxTurns: 50,
cwd: workDir,
@@ -783,35 +978,24 @@ When done, summarize what you implemented and any notes for the developer.`;
"Grep",
"Bash",
],
permissionMode: "acceptEdits",
sandbox: {
enabled: true,
autoAllowBashIfSandboxed: true,
},
abortController,
};
// Build prompt - include image paths for the agent to read
let finalPrompt = prompt;
if (imagePaths && imagePaths.length > 0) {
finalPrompt = `${prompt}\n\n## Reference Images\nThe following images are available for reference. Use the Read tool to view them:\n${imagePaths.map((p) => `- ${p}`).join("\n")}`;
}
const stream = query({ prompt: finalPrompt, options });
// Execute via provider
const stream = provider.executeQuery(options);
let responseText = "";
const outputPath = path.join(workDir, ".automaker", "features", featureId, "agent-output.md");
for await (const msg of stream) {
if (msg.type === "assistant" && msg.message.content) {
if (msg.type === "assistant" && msg.message?.content) {
for (const block of msg.message.content) {
if (block.type === "text") {
responseText = block.text;
responseText = block.text || "";
// Check for authentication errors in the response
if (block.text.includes("Invalid API key") ||
if (block.text && (block.text.includes("Invalid API key") ||
block.text.includes("authentication_failed") ||
block.text.includes("Fix external API key")) {
block.text.includes("Fix external API key"))) {
throw new Error(
"Authentication failed: Invalid or expired API key. " +
"Please check your ANTHROPIC_API_KEY or run 'claude login' to re-authenticate."
@@ -830,20 +1014,10 @@ When done, summarize what you implemented and any notes for the developer.`;
});
}
}
} else if (msg.type === "assistant" && (msg as { error?: string }).error === "authentication_failed") {
// Handle authentication error from the SDK
throw new Error(
"Authentication failed: Invalid or expired API key. " +
"Please set a valid ANTHROPIC_API_KEY environment variable or run 'claude login' to authenticate."
);
} else if (msg.type === "error") {
// Handle error messages
throw new Error(msg.error || "Unknown error");
} else if (msg.type === "result" && msg.subtype === "success") {
// Check if result indicates an error
if (msg.is_error && msg.result?.includes("Invalid API key")) {
throw new Error(
"Authentication failed: Invalid or expired API key. " +
"Please set a valid ANTHROPIC_API_KEY environment variable or run 'claude login' to authenticate."
);
}
responseText = msg.result || responseText;
}
}