feat: implement modular provider architecture with Codex CLI support

Implements a flexible provider pattern that supports both Claude Agent SDK
and OpenAI Codex CLI, enabling future expansion to other AI providers
(Cursor, OpenCode, etc.) with minimal changes.

## Architecture Changes

### New Provider System
- Created provider abstraction layer with BaseProvider interface
- Model-based routing: model prefix determines provider
  - `gpt-*`, `o*` → CodexProvider (subprocess CLI)
  - `claude-*`, `opus/sonnet/haiku` → ClaudeProvider (SDK)
- Providers implement common ExecuteOptions interface

### New Files Created
- `providers/types.ts` - Shared interfaces (ExecuteOptions, ProviderMessage, etc.)
- `providers/base-provider.ts` - Abstract base class
- `providers/claude-provider.ts` - Claude Agent SDK wrapper
- `providers/codex-provider.ts` - Codex CLI subprocess executor
- `providers/codex-cli-detector.ts` - Installation & auth detection
- `providers/codex-config-manager.ts` - TOML config management
- `providers/provider-factory.ts` - Model-based provider routing
- `lib/subprocess-manager.ts` - Reusable subprocess utilities

## Features Implemented

### Codex CLI Integration
- Spawns Codex CLI as subprocess with JSONL output
- Converts Codex events to Claude SDK-compatible format
- Supports both `codex login` and OPENAI_API_KEY auth methods
- Handles: reasoning, messages, commands, todos, file changes
- Extracts text from content blocks for non-vision CLI

### Conversation History
- Added conversationHistory support to ExecuteOptions
- ClaudeProvider: yields previous messages to SDK
- CodexProvider: prepends history as text context
- Follow-up prompts maintain full conversation context

### Image Upload Support
- Images embedded as base64 for vision models
- Image paths appended to prompt text for Read tool access
- Auto-mode: copies images to feature folder
- Follow-up: combines original + new images
- Updates feature.json with image metadata

### Session Model Persistence
- Added `model` field to Session and SessionMetadata
- Sessions remember model preference across interactions
- API endpoints accept model parameter
- Auto-mode respects feature's model setting

## Modified Files

### Services
- `agent-service.ts`:
  - Added conversation history building
  - Uses ProviderFactory instead of direct SDK calls
  - Appends image paths to prompts
  - Added model parameter and persistence

- `auto-mode-service.ts`:
  - Removed OpenAI model block restriction
  - Uses ProviderFactory for all models
  - Added image support in buildFeaturePrompt
  - Follow-up: loads context, copies images, updates feature.json
  - Returns to waiting_approval after follow-up

### Routes
- `agent.ts`: Added model parameter to /send endpoint
- `sessions.ts`: Added model field to create/update
- `models.ts`: Added Codex models (gpt-5.2, gpt-5.1-codex*)

### Configuration
- `.env.example`: Added OPENAI_API_KEY and CODEX_CLI_PATH
- `.gitignore`: Added provider-specific ignores

## Bug Fixes
- Fixed image path resolution (relative → absolute)
- Fixed Codex empty prompt when images attached
- Fixed follow-up status management (in_progress → waiting_approval)
- Fixed follow-up images not appearing in prompt text
- Removed OpenAI model restrictions in auto-mode

## Testing Notes
- Codex CLI authentication verified with both methods
- Image uploads work for both Claude (vision) and Codex (Read tool)
- Follow-up prompts maintain full context
- Conversation history persists across turns
- Model switching works per-session

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Kacper
2025-12-13 03:45:41 +01:00
parent 55603cb5c7
commit a65b16cbae
15 changed files with 2404 additions and 89 deletions

View File

@@ -1,12 +1,14 @@
/**
* Agent Service - Runs Claude agents via the Claude Agent SDK
* Agent Service - Runs AI agents via provider architecture
* Manages conversation sessions and streams responses via WebSocket
*/
import { query, AbortError, type Options } from "@anthropic-ai/claude-agent-sdk";
import { AbortError } from "@anthropic-ai/claude-agent-sdk";
import path from "path";
import fs from "fs/promises";
import type { EventEmitter } from "../lib/events.js";
import { ProviderFactory } from "../providers/provider-factory.js";
import type { ExecuteOptions } from "../providers/types.js";
interface Message {
id: string;
@@ -26,6 +28,7 @@ interface Session {
isRunning: boolean;
abortController: AbortController | null;
workingDirectory: string;
model?: string;
}
interface SessionMetadata {
@@ -37,6 +40,7 @@ interface SessionMetadata {
updatedAt: string;
archived?: boolean;
tags?: string[];
model?: string;
}
export class AgentService {
@@ -91,11 +95,13 @@ export class AgentService {
message,
workingDirectory,
imagePaths,
model,
}: {
sessionId: string;
message: string;
workingDirectory?: string;
imagePaths?: string[];
model?: string;
}) {
const session = this.sessions.get(sessionId);
if (!session) {
@@ -106,6 +112,12 @@ export class AgentService {
throw new Error("Agent is already processing a message");
}
// Update session model if provided
if (model) {
session.model = model;
await this.updateSession(sessionId, { model });
}
// Read images and convert to base64
const images: Message["images"] = [];
if (imagePaths && imagePaths.length > 0) {
@@ -143,6 +155,12 @@ export class AgentService {
timestamp: new Date().toISOString(),
};
// Build conversation history from existing messages BEFORE adding current message
const conversationHistory = session.messages.map((msg) => ({
role: msg.role,
content: msg.content,
}));
session.messages.push(userMessage);
session.isRunning = true;
session.abortController = new AbortController();
@@ -156,11 +174,23 @@ export class AgentService {
await this.saveSession(sessionId, session.messages);
try {
const options: Options = {
model: "claude-opus-4-5-20251101",
// Use session model, parameter model, or default
const effectiveModel = model || session.model || "claude-opus-4-5-20251101";
// Get provider for this model
const provider = ProviderFactory.getProviderForModel(effectiveModel);
console.log(
`[AgentService] Using provider "${provider.getName()}" for model "${effectiveModel}"`
);
// Build options for provider
const options: ExecuteOptions = {
prompt: "", // Will be set below based on images
model: effectiveModel,
cwd: workingDirectory || session.workingDirectory,
systemPrompt: this.getSystemPrompt(),
maxTurns: 20,
cwd: workingDirectory || session.workingDirectory,
allowedTools: [
"Read",
"Write",
@@ -171,23 +201,28 @@ export class AgentService {
"WebSearch",
"WebFetch",
],
permissionMode: "acceptEdits",
sandbox: {
enabled: true,
autoAllowBashIfSandboxed: true,
},
abortController: session.abortController!,
conversationHistory: conversationHistory.length > 0 ? conversationHistory : undefined,
};
// Build prompt content
let promptContent: string | Array<{ type: string; text?: string; source?: object }> =
message;
// Append image paths to prompt text (like old implementation)
if (imagePaths && imagePaths.length > 0) {
let enhancedMessage = message;
// Append image file paths to the message text
enhancedMessage += "\n\nAttached images:\n";
for (const imagePath of imagePaths) {
enhancedMessage += `- ${imagePath}\n`;
}
const contentBlocks: Array<{ type: string; text?: string; source?: object }> = [];
if (message && message.trim()) {
contentBlocks.push({ type: "text", text: message });
if (enhancedMessage && enhancedMessage.trim()) {
contentBlocks.push({ type: "text", text: enhancedMessage });
}
for (const imagePath of imagePaths) {
@@ -219,25 +254,16 @@ export class AgentService {
if (contentBlocks.length > 1 || contentBlocks[0]?.type === "image") {
promptContent = contentBlocks;
} else {
promptContent = enhancedMessage;
}
}
// Build payload
const promptPayload = Array.isArray(promptContent)
? (async function* () {
yield {
type: "user" as const,
session_id: "",
message: {
role: "user" as const,
content: promptContent,
},
parent_tool_use_id: null,
};
})()
: promptContent;
// Set the prompt in options
options.prompt = promptContent;
const stream = query({ prompt: promptPayload, options });
// Execute via provider
const stream = provider.executeQuery(options);
let currentAssistantMessage: Message | null = null;
let responseText = "";
@@ -245,7 +271,7 @@ export class AgentService {
for await (const msg of stream) {
if (msg.type === "assistant") {
if (msg.message.content) {
if (msg.message?.content) {
for (const block of msg.message.content) {
if (block.type === "text") {
responseText += block.text;
@@ -270,7 +296,7 @@ export class AgentService {
});
} else if (block.type === "tool_use") {
const toolUse = {
name: block.name,
name: block.name || "unknown",
input: block.input,
};
toolUses.push(toolUse);
@@ -450,7 +476,8 @@ export class AgentService {
async createSession(
name: string,
projectPath?: string,
workingDirectory?: string
workingDirectory?: string,
model?: string
): Promise<SessionMetadata> {
const sessionId = this.generateId();
const metadata = await this.loadMetadata();
@@ -462,6 +489,7 @@ export class AgentService {
workingDirectory: workingDirectory || projectPath || process.cwd(),
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
model,
};
metadata[sessionId] = session;
@@ -470,6 +498,16 @@ export class AgentService {
return session;
}
async setSessionModel(sessionId: string, model: string): Promise<boolean> {
const session = this.sessions.get(sessionId);
if (session) {
session.model = model;
await this.updateSession(sessionId, { model });
return true;
}
return false;
}
async updateSession(
sessionId: string,
updates: Partial<SessionMetadata>