diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 0000000..cf757bf
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,8 @@
+{
+ "permissions": {
+ "allow": [
+ "Bash(copy \"C:\\\\Projects\\\\autocoder\\\\assets\\\\ollama.png\" \"C:\\\\Projects\\\\autocoder\\\\ui\\\\public\\\\ollama.png\")",
+ "Bash(npm run build:*)"
+ ]
+ }
+}
diff --git a/.env.example b/.env.example
index e29bec3..aa3d9fa 100644
--- a/.env.example
+++ b/.env.example
@@ -19,3 +19,20 @@
# ANTHROPIC_DEFAULT_SONNET_MODEL=glm-4.7
# ANTHROPIC_DEFAULT_OPUS_MODEL=glm-4.7
# ANTHROPIC_DEFAULT_HAIKU_MODEL=glm-4.5-air
+
+# Ollama Local Model Configuration (Optional)
+# To use local models via Ollama instead of Claude, uncomment and set these variables.
+# Requires Ollama v0.14.0+ with Anthropic API compatibility.
+# See: https://ollama.com/blog/claude
+#
+# ANTHROPIC_BASE_URL=http://localhost:11434
+# ANTHROPIC_AUTH_TOKEN=ollama
+# API_TIMEOUT_MS=3000000
+# ANTHROPIC_DEFAULT_SONNET_MODEL=qwen3-coder
+# ANTHROPIC_DEFAULT_OPUS_MODEL=qwen3-coder
+# ANTHROPIC_DEFAULT_HAIKU_MODEL=qwen3-coder
+#
+# Model recommendations:
+# - For best results, use a capable coding model like qwen3-coder or deepseek-coder-v2
+# - You can use the same model for all tiers, or different models per tier
+# - Larger models (70B+) work best for Opus tier, smaller (7B-20B) for Haiku
diff --git a/CLAUDE.md b/CLAUDE.md
index 29cc2a5..c7a1b93 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -256,6 +256,39 @@ python test_security_integration.py
- `examples/README.md` - Comprehensive guide with use cases, testing, and troubleshooting
- `PHASE3_SPEC.md` - Specification for mid-session approval feature (future enhancement)
+### Ollama Local Models (Optional)
+
+Run coding agents using local models via Ollama v0.14.0+:
+
+1. Install Ollama: https://ollama.com
+2. Start Ollama: `ollama serve`
+3. Pull a coding model: `ollama pull qwen3-coder`
+4. Configure `.env`:
+ ```
+ ANTHROPIC_BASE_URL=http://localhost:11434
+ ANTHROPIC_AUTH_TOKEN=ollama
+ API_TIMEOUT_MS=3000000
+ ANTHROPIC_DEFAULT_SONNET_MODEL=qwen3-coder
+ ANTHROPIC_DEFAULT_OPUS_MODEL=qwen3-coder
+ ANTHROPIC_DEFAULT_HAIKU_MODEL=qwen3-coder
+ ```
+5. Run autocoder normally - it will use your local Ollama models
+
+**Recommended coding models:**
+- `qwen3-coder` - Good balance of speed and capability
+- `deepseek-coder-v2` - Strong coding performance
+- `codellama` - Meta's code-focused model
+
+**Model tier mapping:**
+- Use the same model for all tiers, or map different models per capability level
+- Larger models (70B+) work best for Opus tier
+- Smaller models (7B-20B) work well for Haiku tier
+
+**Known limitations:**
+- Smaller context windows than Claude (model-dependent)
+- Extended context beta disabled (not supported by Ollama)
+- Performance depends on local hardware (GPU recommended)
+
## Claude Code Integration
- `.claude/commands/create-spec.md` - `/create-spec` slash command for interactive spec creation
diff --git a/assets/ollama.png b/assets/ollama.png
new file mode 100644
index 0000000..9f559ae
Binary files /dev/null and b/assets/ollama.png differ
diff --git a/client.py b/client.py
index e844aa4..7994f64 100644
--- a/client.py
+++ b/client.py
@@ -257,9 +257,16 @@ def create_client(
if value:
sdk_env[var] = value
+ # Detect alternative API mode (Ollama or GLM)
+ base_url = sdk_env.get("ANTHROPIC_BASE_URL", "")
+ is_alternative_api = bool(base_url)
+ is_ollama = "localhost:11434" in base_url or "127.0.0.1:11434" in base_url
+
if sdk_env:
print(f" - API overrides: {', '.join(sdk_env.keys())}")
- if "ANTHROPIC_BASE_URL" in sdk_env:
+ if is_ollama:
+ print(" - Ollama Mode: Using local models")
+ elif "ANTHROPIC_BASE_URL" in sdk_env:
print(f" - GLM Mode: Using {sdk_env['ANTHROPIC_BASE_URL']}")
# Create a wrapper for bash_security_hook that passes project_dir via context
@@ -336,7 +343,8 @@ def create_client(
# Enable extended context beta for better handling of long sessions.
# This provides up to 1M tokens of context with automatic compaction.
# See: https://docs.anthropic.com/en/api/beta-headers
- betas=["context-1m-2025-08-07"],
+ # Disabled for alternative APIs (Ollama, GLM) as they don't support Claude-specific betas.
+ betas=[] if is_alternative_api else ["context-1m-2025-08-07"],
# Note on context management:
# The Claude Agent SDK handles context management automatically through the
# underlying Claude Code CLI. When context approaches limits, the CLI
diff --git a/server/routers/settings.py b/server/routers/settings.py
index cf16045..8f3f906 100644
--- a/server/routers/settings.py
+++ b/server/routers/settings.py
@@ -40,7 +40,15 @@ def _parse_yolo_mode(value: str | None) -> bool:
def _is_glm_mode() -> bool:
"""Check if GLM API is configured via environment variables."""
- return bool(os.getenv("ANTHROPIC_BASE_URL"))
+ base_url = os.getenv("ANTHROPIC_BASE_URL", "")
+ # GLM mode is when ANTHROPIC_BASE_URL is set but NOT pointing to Ollama
+ return bool(base_url) and not _is_ollama_mode()
+
+
+def _is_ollama_mode() -> bool:
+ """Check if Ollama API is configured via environment variables."""
+ base_url = os.getenv("ANTHROPIC_BASE_URL", "")
+ return "localhost:11434" in base_url or "127.0.0.1:11434" in base_url
@router.get("/models", response_model=ModelsResponse)
@@ -82,6 +90,7 @@ async def get_settings():
yolo_mode=_parse_yolo_mode(all_settings.get("yolo_mode")),
model=all_settings.get("model", DEFAULT_MODEL),
glm_mode=_is_glm_mode(),
+ ollama_mode=_is_ollama_mode(),
testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
)
@@ -104,5 +113,6 @@ async def update_settings(update: SettingsUpdate):
yolo_mode=_parse_yolo_mode(all_settings.get("yolo_mode")),
model=all_settings.get("model", DEFAULT_MODEL),
glm_mode=_is_glm_mode(),
+ ollama_mode=_is_ollama_mode(),
testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
)
diff --git a/server/schemas.py b/server/schemas.py
index 844aaa1..0a2807c 100644
--- a/server/schemas.py
+++ b/server/schemas.py
@@ -382,6 +382,7 @@ class SettingsResponse(BaseModel):
yolo_mode: bool = False
model: str = DEFAULT_MODEL
glm_mode: bool = False # True if GLM API is configured via .env
+ ollama_mode: bool = False # True if Ollama API is configured via .env
testing_agent_ratio: int = 1 # Regression testing agents (0-3)
diff --git a/ui/public/ollama.png b/ui/public/ollama.png
new file mode 100644
index 0000000..9f559ae
Binary files /dev/null and b/ui/public/ollama.png differ
diff --git a/ui/src/App.tsx b/ui/src/App.tsx
index 59ed0ab..0483ab7 100644
--- a/ui/src/App.tsx
+++ b/ui/src/App.tsx
@@ -298,6 +298,17 @@ function App() {
+ Ollama
+