Merge pull request #104 from leonvanzyl/ollama-support

add ollama support
2026-03-18 11:23:08 +00:00 · 2026-01-26 09:50:22 +02:00
parent 486979c3d9 2dc12061fa
commit aa9e8b1ab7
10 changed files with 90 additions and 3 deletions
--- a/.env.example
+++ b/.env.example
@@ -19,3 +19,20 @@
 # ANTHROPIC_DEFAULT_SONNET_MODEL=glm-4.7
 # ANTHROPIC_DEFAULT_OPUS_MODEL=glm-4.7
 # ANTHROPIC_DEFAULT_HAIKU_MODEL=glm-4.5-air
 # Ollama Local Model Configuration (Optional)
 # To use local models via Ollama instead of Claude, uncomment and set these variables.
 # Requires Ollama v0.14.0+ with Anthropic API compatibility.
 # See: https://ollama.com/blog/claude
 #
 # ANTHROPIC_BASE_URL=http://localhost:11434
 # ANTHROPIC_AUTH_TOKEN=ollama
 # API_TIMEOUT_MS=3000000
 # ANTHROPIC_DEFAULT_SONNET_MODEL=qwen3-coder
 # ANTHROPIC_DEFAULT_OPUS_MODEL=qwen3-coder
 # ANTHROPIC_DEFAULT_HAIKU_MODEL=qwen3-coder
 #
 # Model recommendations:
 # - For best results, use a capable coding model like qwen3-coder or deepseek-coder-v2
 # - You can use the same model for all tiers, or different models per tier
 # - Larger models (70B+) work best for Opus tier, smaller (7B-20B) for Haiku
--- a/.gitignore
+++ b/.gitignore
@@ -76,6 +76,11 @@ ui/playwright-report/
 .dmypy.json
 dmypy.json
 # ===================
 # Claude Code
 # ===================
 .claude/settings.local.json
 # ===================
 # IDE / Editors
 # ===================
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -256,6 +256,39 @@ python test_security_integration.py
 - `examples/README.md` - Comprehensive guide with use cases, testing, and troubleshooting
 - `PHASE3_SPEC.md` - Specification for mid-session approval feature (future enhancement)
 ### Ollama Local Models (Optional)
 Run coding agents using local models via Ollama v0.14.0+:
 1. Install Ollama: https://ollama.com
 2. Start Ollama: `ollama serve`
 3. Pull a coding model: `ollama pull qwen3-coder`
 4. Configure `.env`:
   ```
   ANTHROPIC_BASE_URL=http://localhost:11434
   ANTHROPIC_AUTH_TOKEN=ollama
   API_TIMEOUT_MS=3000000
   ANTHROPIC_DEFAULT_SONNET_MODEL=qwen3-coder
   ANTHROPIC_DEFAULT_OPUS_MODEL=qwen3-coder
   ANTHROPIC_DEFAULT_HAIKU_MODEL=qwen3-coder
   ```
 5. Run autocoder normally - it will use your local Ollama models
 **Recommended coding models:**
 - `qwen3-coder` - Good balance of speed and capability
 - `deepseek-coder-v2` - Strong coding performance
 - `codellama` - Meta's code-focused model
 **Model tier mapping:**
 - Use the same model for all tiers, or map different models per capability level
 - Larger models (70B+) work best for Opus tier
 - Smaller models (7B-20B) work well for Haiku tier
 **Known limitations:**
 - Smaller context windows than Claude (model-dependent)
 - Extended context beta disabled (not supported by Ollama)
 - Performance depends on local hardware (GPU recommended)
 ## Claude Code Integration
 - `.claude/commands/create-spec.md` - `/create-spec` slash command for interactive spec creation
--- a/client.py
+++ b/client.py
@@ -257,9 +257,16 @@ def create_client(
        if value:
            sdk_env[var] = value
    # Detect alternative API mode (Ollama or GLM)
    base_url = sdk_env.get("ANTHROPIC_BASE_URL", "")
    is_alternative_api = bool(base_url)
    is_ollama = "localhost:11434" in base_url or "127.0.0.1:11434" in base_url
    if sdk_env:
        print(f"   - API overrides: {', '.join(sdk_env.keys())}")
-        if "ANTHROPIC_BASE_URL" in sdk_env:
+        if is_ollama:
            print("   - Ollama Mode: Using local models")
        elif "ANTHROPIC_BASE_URL" in sdk_env:
            print(f"   - GLM Mode: Using {sdk_env['ANTHROPIC_BASE_URL']}")
    # Create a wrapper for bash_security_hook that passes project_dir via context
@@ -336,7 +343,8 @@ def create_client(
            # Enable extended context beta for better handling of long sessions.
            # This provides up to 1M tokens of context with automatic compaction.
            # See: https://docs.anthropic.com/en/api/beta-headers
-            betas=["context-1m-2025-08-07"],
+            # Disabled for alternative APIs (Ollama, GLM) as they don't support Claude-specific betas.
            betas=[] if is_alternative_api else ["context-1m-2025-08-07"],
            # Note on context management:
            # The Claude Agent SDK handles context management automatically through the
            # underlying Claude Code CLI. When context approaches limits, the CLI
--- a/server/routers/settings.py
+++ b/server/routers/settings.py
@@ -40,7 +40,15 @@ def _parse_yolo_mode(value: str | None) -> bool:
 def _is_glm_mode() -> bool:
    """Check if GLM API is configured via environment variables."""
-    return bool(os.getenv("ANTHROPIC_BASE_URL"))
+    base_url = os.getenv("ANTHROPIC_BASE_URL", "")
    # GLM mode is when ANTHROPIC_BASE_URL is set but NOT pointing to Ollama
    return bool(base_url) and not _is_ollama_mode()
 def _is_ollama_mode() -> bool:
    """Check if Ollama API is configured via environment variables."""
    base_url = os.getenv("ANTHROPIC_BASE_URL", "")
    return "localhost:11434" in base_url or "127.0.0.1:11434" in base_url
@router.get("/models", response_model=ModelsResponse)
@@ -82,6 +90,7 @@ async def get_settings():
        yolo_mode=_parse_yolo_mode(all_settings.get("yolo_mode")),
        model=all_settings.get("model", DEFAULT_MODEL),
        glm_mode=_is_glm_mode(),
        ollama_mode=_is_ollama_mode(),
        testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
    )
@@ -104,5 +113,6 @@ async def update_settings(update: SettingsUpdate):
        yolo_mode=_parse_yolo_mode(all_settings.get("yolo_mode")),
        model=all_settings.get("model", DEFAULT_MODEL),
        glm_mode=_is_glm_mode(),
        ollama_mode=_is_ollama_mode(),
        testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
    )
--- a/server/schemas.py
+++ b/server/schemas.py
@@ -382,6 +382,7 @@ class SettingsResponse(BaseModel):
    yolo_mode: bool = False
    model: str = DEFAULT_MODEL
    glm_mode: bool = False  # True if GLM API is configured via .env
    ollama_mode: bool = False  # True if Ollama API is configured via .env
    testing_agent_ratio: int = 1  # Regression testing agents (0-3)
--- a/ui/public/ollama.png
+++ b/ui/public/ollama.png
--- a/ui/src/App.tsx
+++ b/ui/src/App.tsx
@@ -298,6 +298,17 @@ function App() {
                    <Settings size={18} />
                  </button>
                  {/* Ollama Mode Indicator */}
                  {settings?.ollama_mode && (
                    <div
                      className="flex items-center gap-1.5 px-2 py-1 bg-white rounded border-2 border-neo-border shadow-neo-sm"
                      title="Using Ollama local models (configured via .env)"
                    >
                      <img src="/ollama.png" alt="Ollama" className="w-5 h-5" />
                      <span className="text-xs font-bold text-neo-text">Ollama</span>
                    </div>
                  )}
                  {/* GLM Mode Badge */}
                  {settings?.glm_mode && (
                    <span
--- a/ui/src/hooks/useProjects.ts
+++ b/ui/src/hooks/useProjects.ts
@@ -237,6 +237,7 @@ const DEFAULT_SETTINGS: Settings = {
  yolo_mode: false,
  model: 'claude-opus-4-5-20251101',
  glm_mode: false,
  ollama_mode: false,
  testing_agent_ratio: 1,
 }
--- a/ui/src/lib/types.ts
+++ b/ui/src/lib/types.ts
@@ -526,6 +526,7 @@ export interface Settings {
  yolo_mode: boolean
  model: string
  glm_mode: boolean
  ollama_mode: boolean
  testing_agent_ratio: number  // Regression testing agents (0-3)
 }