diff --git a/docs/models.md b/docs/models.md index 2f7c6fe4..a5d12ef0 100644 --- a/docs/models.md +++ b/docs/models.md @@ -4,21 +4,15 @@ | Provider | Model Name | SWE Score | Input Cost | Output Cost | | ----------- | ---------------------------------------------- | --------- | ---------- | ----------- | -| bedrock | us.anthropic.claude-3-haiku-20240307-v1:0 | 0.4 | 0.25 | 1.25 | -| bedrock | us.anthropic.claude-3-opus-20240229-v1:0 | 0.725 | 15 | 75 | -| bedrock | us.anthropic.claude-3-5-sonnet-20240620-v1:0 | 0.49 | 3 | 15 | -| bedrock | us.anthropic.claude-3-5-sonnet-20241022-v2:0 | 0.49 | 3 | 15 | -| bedrock | us.anthropic.claude-3-7-sonnet-20250219-v1:0 | 0.623 | 3 | 15 | -| bedrock | us.anthropic.claude-3-5-haiku-20241022-v1:0 | 0.4 | 0.8 | 4 | -| bedrock | us.anthropic.claude-opus-4-20250514-v1:0 | 0.725 | 15 | 75 | -| bedrock | us.anthropic.claude-sonnet-4-20250514-v1:0 | 0.727 | 3 | 15 | | anthropic | claude-sonnet-4-20250514 | 0.727 | 3 | 15 | | anthropic | claude-opus-4-20250514 | 0.725 | 15 | 75 | | anthropic | claude-3-7-sonnet-20250219 | 0.623 | 3 | 15 | | anthropic | claude-3-5-sonnet-20241022 | 0.49 | 3 | 15 | -| azure | gpt-4o | 0.332 | 2.5 | 10 | -| azure | gpt-4o-mini | 0.3 | 0.15 | 0.6 | -| azure | gpt-4-1 | — | 2 | 10 | +| claude-code | opus | 0.725 | 0 | 0 | +| claude-code | sonnet | 0.727 | 0 | 0 | +| mcp | mcp-sampling | — | 0 | 0 | +| gemini-cli | gemini-2.5-pro | 0.72 | 0 | 0 | +| gemini-cli | gemini-2.5-flash | 0.71 | 0 | 0 | | openai | gpt-4o | 0.332 | 2.5 | 10 | | openai | o1 | 0.489 | 15 | 60 | | openai | o3 | 0.5 | 2 | 8 | @@ -35,19 +29,21 @@ | google | gemini-2.5-flash-preview-04-17 | 0.604 | — | — | | google | gemini-2.0-flash | 0.518 | 0.15 | 0.6 | | google | gemini-2.0-flash-lite | — | — | — | -| perplexity | sonar-pro | — | 3 | 15 | -| perplexity | sonar-reasoning-pro | 0.211 | 2 | 8 | -| perplexity | sonar-reasoning | 0.211 | 1 | 5 | | xai | grok-3 | — | 3 | 15 | | xai | grok-3-fast | — | 5 | 25 | | xai | grok-4 | — | 3 | 15 | -| ollama | devstral:latest | — | 0 | 0 | -| ollama | qwen3:latest | — | 0 | 0 | -| ollama | qwen3:14b | — | 0 | 0 | -| ollama | qwen3:32b | — | 0 | 0 | -| ollama | mistral-small3.1:latest | — | 0 | 0 | -| ollama | llama3.3:latest | — | 0 | 0 | -| ollama | phi4:latest | — | 0 | 0 | +| groq | llama-3.3-70b-versatile | 0.55 | 0.59 | 0.79 | +| groq | llama-3.1-8b-instant | 0.32 | 0.05 | 0.08 | +| groq | llama-4-scout | 0.45 | 0.11 | 0.34 | +| groq | llama-4-maverick | 0.52 | 0.5 | 0.77 | +| groq | mixtral-8x7b-32768 | 0.35 | 0.24 | 0.24 | +| groq | qwen-qwq-32b-preview | 0.4 | 0.18 | 0.18 | +| groq | deepseek-r1-distill-llama-70b | 0.52 | 0.75 | 0.99 | +| groq | gemma2-9b-it | 0.3 | 0.2 | 0.2 | +| groq | whisper-large-v3 | — | 0.11 | 0 | +| perplexity | sonar-pro | — | 3 | 15 | +| perplexity | sonar-reasoning-pro | 0.211 | 2 | 8 | +| perplexity | sonar-reasoning | 0.211 | 1 | 5 | | openrouter | google/gemini-2.5-flash-preview-05-20 | — | 0.15 | 0.6 | | openrouter | google/gemini-2.5-flash-preview-05-20:thinking | — | 0.15 | 3.5 | | openrouter | google/gemini-2.5-pro-exp-03-25 | — | 0 | 0 | @@ -73,57 +69,16 @@ | openrouter | mistralai/devstral-small | — | 0.1 | 0.3 | | openrouter | mistralai/mistral-nemo | — | 0.03 | 0.07 | | openrouter | thudm/glm-4-32b:free | — | 0 | 0 | -| groq | llama-3.3-70b-versatile | 0.55 | 0.59 | 0.79 | -| groq | llama-3.1-8b-instant | 0.32 | 0.05 | 0.08 | -| groq | llama-4-scout | 0.45 | 0.11 | 0.34 | -| groq | llama-4-maverick | 0.52 | 0.5 | 0.77 | -| groq | mixtral-8x7b-32768 | 0.35 | 0.24 | 0.24 | -| groq | qwen-qwq-32b-preview | 0.4 | 0.18 | 0.18 | -| groq | deepseek-r1-distill-llama-70b | 0.52 | 0.75 | 0.99 | -| groq | gemma2-9b-it | 0.3 | 0.2 | 0.2 | -| groq | whisper-large-v3 | — | 0.11 | 0 | -| claude-code | opus | 0.725 | 0 | 0 | -| claude-code | sonnet | 0.727 | 0 | 0 | -| mcp | mcp-sampling | — | 0 | 0 | -| gemini-cli | gemini-2.5-pro | 0.72 | 0 | 0 | -| gemini-cli | gemini-2.5-flash | 0.71 | 0 | 0 | - -## Research Models - -| Provider | Model Name | SWE Score | Input Cost | Output Cost | -| ----------- | -------------------------------------------- | --------- | ---------- | ----------- | -| bedrock | us.anthropic.claude-3-opus-20240229-v1:0 | 0.725 | 15 | 75 | -| bedrock | us.anthropic.claude-3-5-sonnet-20240620-v1:0 | 0.49 | 3 | 15 | -| bedrock | us.anthropic.claude-3-5-sonnet-20241022-v2:0 | 0.49 | 3 | 15 | -| bedrock | us.anthropic.claude-3-7-sonnet-20250219-v1:0 | 0.623 | 3 | 15 | -| bedrock | us.anthropic.claude-opus-4-20250514-v1:0 | 0.725 | 15 | 75 | -| bedrock | us.anthropic.claude-sonnet-4-20250514-v1:0 | 0.727 | 3 | 15 | -| bedrock | us.deepseek.r1-v1:0 | — | 1.35 | 5.4 | -| openai | gpt-4o-search-preview | 0.33 | 2.5 | 10 | -| openai | gpt-4o-mini-search-preview | 0.3 | 0.15 | 0.6 | -| perplexity | sonar-pro | — | 3 | 15 | -| perplexity | sonar | — | 1 | 1 | -| perplexity | deep-research | 0.211 | 2 | 8 | -| perplexity | sonar-reasoning-pro | 0.211 | 2 | 8 | -| perplexity | sonar-reasoning | 0.211 | 1 | 5 | -| xai | grok-3 | — | 3 | 15 | -| xai | grok-3-fast | — | 5 | 25 | -| xai | grok-4 | — | 3 | 15 | -| groq | llama-3.3-70b-versatile | 0.55 | 0.59 | 0.79 | -| groq | llama-4-scout | 0.45 | 0.11 | 0.34 | -| groq | llama-4-maverick | 0.52 | 0.5 | 0.77 | -| groq | qwen-qwq-32b-preview | 0.4 | 0.18 | 0.18 | -| groq | deepseek-r1-distill-llama-70b | 0.52 | 0.75 | 0.99 | -| claude-code | opus | 0.725 | 0 | 0 | -| claude-code | sonnet | 0.727 | 0 | 0 | -| mcp | mcp-sampling | — | 0 | 0 | -| gemini-cli | gemini-2.5-pro | 0.72 | 0 | 0 | -| gemini-cli | gemini-2.5-flash | 0.71 | 0 | 0 | - -## Fallback Models - -| Provider | Model Name | SWE Score | Input Cost | Output Cost | -| ----------- | ---------------------------------------------- | --------- | ---------- | ----------- | +| ollama | devstral:latest | — | 0 | 0 | +| ollama | qwen3:latest | — | 0 | 0 | +| ollama | qwen3:14b | — | 0 | 0 | +| ollama | qwen3:32b | — | 0 | 0 | +| ollama | mistral-small3.1:latest | — | 0 | 0 | +| ollama | llama3.3:latest | — | 0 | 0 | +| ollama | phi4:latest | — | 0 | 0 | +| azure | gpt-4o | 0.332 | 2.5 | 10 | +| azure | gpt-4o-mini | 0.3 | 0.15 | 0.6 | +| azure | gpt-4-1 | — | 2 | 10 | | bedrock | us.anthropic.claude-3-haiku-20240307-v1:0 | 0.4 | 0.25 | 1.25 | | bedrock | us.anthropic.claude-3-opus-20240229-v1:0 | 0.725 | 15 | 75 | | bedrock | us.anthropic.claude-3-5-sonnet-20240620-v1:0 | 0.49 | 3 | 15 | @@ -132,13 +87,52 @@ | bedrock | us.anthropic.claude-3-5-haiku-20241022-v1:0 | 0.4 | 0.8 | 4 | | bedrock | us.anthropic.claude-opus-4-20250514-v1:0 | 0.725 | 15 | 75 | | bedrock | us.anthropic.claude-sonnet-4-20250514-v1:0 | 0.727 | 3 | 15 | + +## Research Models + +| Provider | Model Name | SWE Score | Input Cost | Output Cost | +| ----------- | -------------------------------------------- | --------- | ---------- | ----------- | +| claude-code | opus | 0.725 | 0 | 0 | +| claude-code | sonnet | 0.727 | 0 | 0 | +| mcp | mcp-sampling | — | 0 | 0 | +| gemini-cli | gemini-2.5-pro | 0.72 | 0 | 0 | +| gemini-cli | gemini-2.5-flash | 0.71 | 0 | 0 | +| openai | gpt-4o-search-preview | 0.33 | 2.5 | 10 | +| openai | gpt-4o-mini-search-preview | 0.3 | 0.15 | 0.6 | +| xai | grok-3 | — | 3 | 15 | +| xai | grok-3-fast | — | 5 | 25 | +| xai | grok-4 | — | 3 | 15 | +| groq | llama-3.3-70b-versatile | 0.55 | 0.59 | 0.79 | +| groq | llama-4-scout | 0.45 | 0.11 | 0.34 | +| groq | llama-4-maverick | 0.52 | 0.5 | 0.77 | +| groq | qwen-qwq-32b-preview | 0.4 | 0.18 | 0.18 | +| groq | deepseek-r1-distill-llama-70b | 0.52 | 0.75 | 0.99 | +| perplexity | sonar-pro | — | 3 | 15 | +| perplexity | sonar | — | 1 | 1 | +| perplexity | deep-research | 0.211 | 2 | 8 | +| perplexity | sonar-reasoning-pro | 0.211 | 2 | 8 | +| perplexity | sonar-reasoning | 0.211 | 1 | 5 | +| bedrock | us.anthropic.claude-3-opus-20240229-v1:0 | 0.725 | 15 | 75 | +| bedrock | us.anthropic.claude-3-5-sonnet-20240620-v1:0 | 0.49 | 3 | 15 | +| bedrock | us.anthropic.claude-3-5-sonnet-20241022-v2:0 | 0.49 | 3 | 15 | +| bedrock | us.anthropic.claude-3-7-sonnet-20250219-v1:0 | 0.623 | 3 | 15 | +| bedrock | us.anthropic.claude-opus-4-20250514-v1:0 | 0.725 | 15 | 75 | +| bedrock | us.anthropic.claude-sonnet-4-20250514-v1:0 | 0.727 | 3 | 15 | +| bedrock | us.deepseek.r1-v1:0 | — | 1.35 | 5.4 | + +## Fallback Models + +| Provider | Model Name | SWE Score | Input Cost | Output Cost | +| ----------- | ---------------------------------------------- | --------- | ---------- | ----------- | | anthropic | claude-sonnet-4-20250514 | 0.727 | 3 | 15 | | anthropic | claude-opus-4-20250514 | 0.725 | 15 | 75 | | anthropic | claude-3-7-sonnet-20250219 | 0.623 | 3 | 15 | | anthropic | claude-3-5-sonnet-20241022 | 0.49 | 3 | 15 | -| azure | gpt-4o | 0.332 | 2.5 | 10 | -| azure | gpt-4o-mini | 0.3 | 0.15 | 0.6 | -| azure | gpt-4-1 | — | 2 | 10 | +| claude-code | opus | 0.725 | 0 | 0 | +| claude-code | sonnet | 0.727 | 0 | 0 | +| mcp | mcp-sampling | — | 0 | 0 | +| gemini-cli | gemini-2.5-pro | 0.72 | 0 | 0 | +| gemini-cli | gemini-2.5-flash | 0.71 | 0 | 0 | | openai | gpt-4o | 0.332 | 2.5 | 10 | | openai | o3 | 0.5 | 2 | 8 | | openai | o4-mini | 0.45 | 1.1 | 4.4 | @@ -147,18 +141,18 @@ | google | gemini-2.5-flash-preview-04-17 | 0.604 | — | — | | google | gemini-2.0-flash | 0.518 | 0.15 | 0.6 | | google | gemini-2.0-flash-lite | — | — | — | -| perplexity | sonar-reasoning-pro | 0.211 | 2 | 8 | -| perplexity | sonar-reasoning | 0.211 | 1 | 5 | | xai | grok-3 | — | 3 | 15 | | xai | grok-3-fast | — | 5 | 25 | | xai | grok-4 | — | 3 | 15 | -| ollama | devstral:latest | — | 0 | 0 | -| ollama | qwen3:latest | — | 0 | 0 | -| ollama | qwen3:14b | — | 0 | 0 | -| ollama | qwen3:32b | — | 0 | 0 | -| ollama | mistral-small3.1:latest | — | 0 | 0 | -| ollama | llama3.3:latest | — | 0 | 0 | -| ollama | phi4:latest | — | 0 | 0 | +| groq | llama-3.3-70b-versatile | 0.55 | 0.59 | 0.79 | +| groq | llama-3.1-8b-instant | 0.32 | 0.05 | 0.08 | +| groq | llama-4-scout | 0.45 | 0.11 | 0.34 | +| groq | llama-4-maverick | 0.52 | 0.5 | 0.77 | +| groq | mixtral-8x7b-32768 | 0.35 | 0.24 | 0.24 | +| groq | qwen-qwq-32b-preview | 0.4 | 0.18 | 0.18 | +| groq | gemma2-9b-it | 0.3 | 0.2 | 0.2 | +| perplexity | sonar-reasoning-pro | 0.211 | 2 | 8 | +| perplexity | sonar-reasoning | 0.211 | 1 | 5 | | openrouter | google/gemini-2.5-flash-preview-05-20 | — | 0.15 | 0.6 | | openrouter | google/gemini-2.5-flash-preview-05-20:thinking | — | 0.15 | 3.5 | | openrouter | google/gemini-2.5-pro-exp-03-25 | — | 0 | 0 | @@ -182,15 +176,21 @@ | openrouter | mistralai/mistral-small-3.1-24b-instruct | — | 0.1 | 0.3 | | openrouter | mistralai/mistral-nemo | — | 0.03 | 0.07 | | openrouter | thudm/glm-4-32b:free | — | 0 | 0 | -| groq | llama-3.3-70b-versatile | 0.55 | 0.59 | 0.79 | -| groq | llama-3.1-8b-instant | 0.32 | 0.05 | 0.08 | -| groq | llama-4-scout | 0.45 | 0.11 | 0.34 | -| groq | llama-4-maverick | 0.52 | 0.5 | 0.77 | -| groq | mixtral-8x7b-32768 | 0.35 | 0.24 | 0.24 | -| groq | qwen-qwq-32b-preview | 0.4 | 0.18 | 0.18 | -| groq | gemma2-9b-it | 0.3 | 0.2 | 0.2 | -| claude-code | opus | 0.725 | 0 | 0 | -| claude-code | sonnet | 0.727 | 0 | 0 | -| mcp | mcp-sampling | — | 0 | 0 | -| gemini-cli | gemini-2.5-pro | 0.72 | 0 | 0 | -| gemini-cli | gemini-2.5-flash | 0.71 | 0 | 0 | +| ollama | devstral:latest | — | 0 | 0 | +| ollama | qwen3:latest | — | 0 | 0 | +| ollama | qwen3:14b | — | 0 | 0 | +| ollama | qwen3:32b | — | 0 | 0 | +| ollama | mistral-small3.1:latest | — | 0 | 0 | +| ollama | llama3.3:latest | — | 0 | 0 | +| ollama | phi4:latest | — | 0 | 0 | +| azure | gpt-4o | 0.332 | 2.5 | 10 | +| azure | gpt-4o-mini | 0.3 | 0.15 | 0.6 | +| azure | gpt-4-1 | — | 2 | 10 | +| bedrock | us.anthropic.claude-3-haiku-20240307-v1:0 | 0.4 | 0.25 | 1.25 | +| bedrock | us.anthropic.claude-3-opus-20240229-v1:0 | 0.725 | 15 | 75 | +| bedrock | us.anthropic.claude-3-5-sonnet-20240620-v1:0 | 0.49 | 3 | 15 | +| bedrock | us.anthropic.claude-3-5-sonnet-20241022-v2:0 | 0.49 | 3 | 15 | +| bedrock | us.anthropic.claude-3-7-sonnet-20250219-v1:0 | 0.623 | 3 | 15 | +| bedrock | us.anthropic.claude-3-5-haiku-20241022-v1:0 | 0.4 | 0.8 | 4 | +| bedrock | us.anthropic.claude-opus-4-20250514-v1:0 | 0.725 | 15 | 75 | +| bedrock | us.anthropic.claude-sonnet-4-20250514-v1:0 | 0.727 | 3 | 15 |