mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-03-17 02:43:09 +00:00
feat: add API provider selection UI and fix stuck features on agent crash
API Provider Selection: - Add provider switcher in Settings modal (Claude, Kimi, GLM, Ollama, Custom) - Auth tokens stored locally only (registry.db), never returned by API - get_effective_sdk_env() builds provider-specific env vars for agent subprocess - All chat sessions (spec, expand, assistant) use provider settings - Backward compatible: defaults to Claude, env vars still work as override Fix Stuck Features: - Add _cleanup_stale_features() to process_manager.py - Reset in_progress features when agent stops, crashes, or fails healthcheck - Prevents features from being permanently stuck after rate limit crashes - Uses separate SQLAlchemy engine to avoid session conflicts with subprocess Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -258,15 +258,11 @@ class AssistantChatSession:
|
||||
system_cli = shutil.which("claude")
|
||||
|
||||
# Build environment overrides for API configuration
|
||||
sdk_env: dict[str, str] = {}
|
||||
for var in API_ENV_VARS:
|
||||
value = os.getenv(var)
|
||||
if value:
|
||||
sdk_env[var] = value
|
||||
from registry import get_effective_sdk_env
|
||||
sdk_env = get_effective_sdk_env()
|
||||
|
||||
# Determine model from environment or use default
|
||||
# This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names
|
||||
model = os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", "claude-opus-4-5-20251101")
|
||||
# Determine model from SDK env (provider-aware) or fallback to env/default
|
||||
model = sdk_env.get("ANTHROPIC_DEFAULT_OPUS_MODEL") or os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", "claude-opus-4-5-20251101")
|
||||
|
||||
try:
|
||||
logger.info("Creating ClaudeSDKClient...")
|
||||
|
||||
@@ -154,16 +154,11 @@ class ExpandChatSession:
|
||||
system_prompt = skill_content.replace("$ARGUMENTS", project_path)
|
||||
|
||||
# Build environment overrides for API configuration
|
||||
# Filter to only include vars that are actually set (non-None)
|
||||
sdk_env: dict[str, str] = {}
|
||||
for var in API_ENV_VARS:
|
||||
value = os.getenv(var)
|
||||
if value:
|
||||
sdk_env[var] = value
|
||||
from registry import get_effective_sdk_env
|
||||
sdk_env = get_effective_sdk_env()
|
||||
|
||||
# Determine model from environment or use default
|
||||
# This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names
|
||||
model = os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", "claude-opus-4-5-20251101")
|
||||
# Determine model from SDK env (provider-aware) or fallback to env/default
|
||||
model = sdk_env.get("ANTHROPIC_DEFAULT_OPUS_MODEL") or os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", "claude-opus-4-5-20251101")
|
||||
|
||||
# Build MCP servers config for feature creation
|
||||
mcp_servers = {
|
||||
|
||||
@@ -227,6 +227,46 @@ class AgentProcessManager:
|
||||
"""Remove lock file."""
|
||||
self.lock_file.unlink(missing_ok=True)
|
||||
|
||||
def _cleanup_stale_features(self) -> None:
|
||||
"""Clear in_progress flag for all features when agent stops/crashes.
|
||||
|
||||
When the agent process exits (normally or crash), any features left
|
||||
with in_progress=True were being worked on and didn't complete.
|
||||
Reset them so they can be picked up on next agent start.
|
||||
"""
|
||||
try:
|
||||
from autoforge_paths import get_features_db_path
|
||||
features_db = get_features_db_path(self.project_dir)
|
||||
if not features_db.exists():
|
||||
return
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from api.database import Feature
|
||||
|
||||
engine = create_engine(f"sqlite:///{features_db}")
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
try:
|
||||
stuck = session.query(Feature).filter(
|
||||
Feature.in_progress == True, # noqa: E712
|
||||
Feature.passes == False, # noqa: E712
|
||||
).all()
|
||||
if stuck:
|
||||
for f in stuck:
|
||||
f.in_progress = False
|
||||
session.commit()
|
||||
logger.info(
|
||||
"Cleaned up %d stuck feature(s) for %s",
|
||||
len(stuck), self.project_name,
|
||||
)
|
||||
finally:
|
||||
session.close()
|
||||
engine.dispose()
|
||||
except Exception as e:
|
||||
logger.warning("Failed to cleanup features for %s: %s", self.project_name, e)
|
||||
|
||||
async def _broadcast_output(self, line: str) -> None:
|
||||
"""Broadcast output line to all registered callbacks."""
|
||||
with self._callbacks_lock:
|
||||
@@ -288,6 +328,7 @@ class AgentProcessManager:
|
||||
self.status = "crashed"
|
||||
elif self.status == "running":
|
||||
self.status = "stopped"
|
||||
self._cleanup_stale_features()
|
||||
self._remove_lock()
|
||||
|
||||
async def start(
|
||||
@@ -359,12 +400,22 @@ class AgentProcessManager:
|
||||
# stdin=DEVNULL prevents blocking if Claude CLI or child process tries to read stdin
|
||||
# CREATE_NO_WINDOW on Windows prevents console window pop-ups
|
||||
# PYTHONUNBUFFERED ensures output isn't delayed
|
||||
# Build subprocess environment with API provider settings
|
||||
from registry import get_effective_sdk_env
|
||||
api_env = get_effective_sdk_env()
|
||||
subprocess_env = {
|
||||
**os.environ,
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"PLAYWRIGHT_HEADLESS": "true" if playwright_headless else "false",
|
||||
**api_env,
|
||||
}
|
||||
|
||||
popen_kwargs: dict[str, Any] = {
|
||||
"stdin": subprocess.DEVNULL,
|
||||
"stdout": subprocess.PIPE,
|
||||
"stderr": subprocess.STDOUT,
|
||||
"cwd": str(self.project_dir),
|
||||
"env": {**os.environ, "PYTHONUNBUFFERED": "1", "PLAYWRIGHT_HEADLESS": "true" if playwright_headless else "false"},
|
||||
"env": subprocess_env,
|
||||
}
|
||||
if sys.platform == "win32":
|
||||
popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
|
||||
@@ -425,6 +476,7 @@ class AgentProcessManager:
|
||||
result.children_terminated, result.children_killed
|
||||
)
|
||||
|
||||
self._cleanup_stale_features()
|
||||
self._remove_lock()
|
||||
self.status = "stopped"
|
||||
self.process = None
|
||||
@@ -502,6 +554,7 @@ class AgentProcessManager:
|
||||
if poll is not None:
|
||||
# Process has terminated
|
||||
if self.status in ("running", "paused"):
|
||||
self._cleanup_stale_features()
|
||||
self.status = "crashed"
|
||||
self._remove_lock()
|
||||
return False
|
||||
|
||||
@@ -140,16 +140,11 @@ class SpecChatSession:
|
||||
system_cli = shutil.which("claude")
|
||||
|
||||
# Build environment overrides for API configuration
|
||||
# Filter to only include vars that are actually set (non-None)
|
||||
sdk_env: dict[str, str] = {}
|
||||
for var in API_ENV_VARS:
|
||||
value = os.getenv(var)
|
||||
if value:
|
||||
sdk_env[var] = value
|
||||
from registry import get_effective_sdk_env
|
||||
sdk_env = get_effective_sdk_env()
|
||||
|
||||
# Determine model from environment or use default
|
||||
# This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names
|
||||
model = os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", "claude-opus-4-5-20251101")
|
||||
# Determine model from SDK env (provider-aware) or fallback to env/default
|
||||
model = sdk_env.get("ANTHROPIC_DEFAULT_OPUS_MODEL") or os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", "claude-opus-4-5-20251101")
|
||||
|
||||
try:
|
||||
self.client = ClaudeSDKClient(
|
||||
|
||||
Reference in New Issue
Block a user