- Enhance E2E testing and LLM analysis report and: - Add --analyze-log flag to run_e2e.sh to re-run LLM analysis on existing logs. - Add test:e2e and analyze-log scripts to package.json for easier execution. - Correct display errors and dependency validation output: - Update chalk usage in add-task.js to use bracket notation (chalk[color]) compatible with v5, resolving 'chalk.keyword is not a function' error. - Modify fix-dependencies command output to show red failure box with issue count instead of green success box when validation fails. - Refactor interactive model setup: - Verify inclusion of 'No change' option during interactive model setup flow (task-master models --setup). - Update model definitions: - Add max_tokens field for gpt-4o in supported-models.json. - Remove unused scripts: - Delete prepare-package.js and rule-transformer.test.js. Release candidate
439 lines
11 KiB
JSON
439 lines
11 KiB
JSON
{
|
|
"anthropic": [
|
|
{
|
|
"id": "claude-3-7-sonnet-20250219",
|
|
"swe_score": 0.623,
|
|
"cost_per_1m_tokens": { "input": 3.0, "output": 15.0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 120000
|
|
},
|
|
{
|
|
"id": "claude-3-5-sonnet-20241022",
|
|
"swe_score": 0.49,
|
|
"cost_per_1m_tokens": { "input": 3.0, "output": 15.0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 64000
|
|
},
|
|
{
|
|
"id": "claude-3-5-haiku-20241022",
|
|
"swe_score": 0.406,
|
|
"cost_per_1m_tokens": { "input": 0.8, "output": 4.0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 64000
|
|
},
|
|
{
|
|
"id": "claude-3-opus-20240229",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 15, "output": 75 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 64000
|
|
}
|
|
],
|
|
"openai": [
|
|
{
|
|
"id": "gpt-4o",
|
|
"swe_score": 0.332,
|
|
"cost_per_1m_tokens": { "input": 2.5, "output": 10.0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 16384
|
|
},
|
|
{
|
|
"id": "o1",
|
|
"swe_score": 0.489,
|
|
"cost_per_1m_tokens": { "input": 15.0, "output": 60.0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "o3",
|
|
"swe_score": 0.5,
|
|
"cost_per_1m_tokens": { "input": 10.0, "output": 40.0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "o3-mini",
|
|
"swe_score": 0.493,
|
|
"cost_per_1m_tokens": { "input": 1.1, "output": 4.4 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 100000
|
|
},
|
|
{
|
|
"id": "o4-mini",
|
|
"swe_score": 0.45,
|
|
"cost_per_1m_tokens": { "input": 1.1, "output": 4.4 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "o1-mini",
|
|
"swe_score": 0.4,
|
|
"cost_per_1m_tokens": { "input": 1.1, "output": 4.4 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "o1-pro",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 150.0, "output": 600.0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "gpt-4-5-preview",
|
|
"swe_score": 0.38,
|
|
"cost_per_1m_tokens": { "input": 75.0, "output": 150.0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "gpt-4-1-mini",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0.4, "output": 1.6 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "gpt-4-1-nano",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0.1, "output": 0.4 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "gpt-4o-mini",
|
|
"swe_score": 0.3,
|
|
"cost_per_1m_tokens": { "input": 0.15, "output": 0.6 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "gpt-4o-search-preview",
|
|
"swe_score": 0.33,
|
|
"cost_per_1m_tokens": { "input": 2.5, "output": 10.0 },
|
|
"allowed_roles": ["main", "fallback", "research"]
|
|
},
|
|
{
|
|
"id": "gpt-4o-mini-search-preview",
|
|
"swe_score": 0.3,
|
|
"cost_per_1m_tokens": { "input": 0.15, "output": 0.6 },
|
|
"allowed_roles": ["main", "fallback", "research"]
|
|
}
|
|
],
|
|
"google": [
|
|
{
|
|
"id": "gemini-2.5-pro-exp-03-25",
|
|
"swe_score": 0.638,
|
|
"cost_per_1m_tokens": null,
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "gemini-2.5-flash-preview-04-17",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": null,
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "gemini-2.0-flash",
|
|
"swe_score": 0.754,
|
|
"cost_per_1m_tokens": { "input": 0.15, "output": 0.6 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "gemini-2.0-flash-thinking-experimental",
|
|
"swe_score": 0.754,
|
|
"cost_per_1m_tokens": { "input": 0.15, "output": 0.6 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "gemini-2.0-pro",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": null,
|
|
"allowed_roles": ["main", "fallback"]
|
|
}
|
|
],
|
|
"perplexity": [
|
|
{
|
|
"id": "sonar-pro",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 3, "output": 15 },
|
|
"allowed_roles": ["research"],
|
|
"max_tokens": 8700
|
|
},
|
|
{
|
|
"id": "sonar",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 1, "output": 1 },
|
|
"allowed_roles": ["research"],
|
|
"max_tokens": 8700
|
|
},
|
|
{
|
|
"id": "deep-research",
|
|
"swe_score": 0.211,
|
|
"cost_per_1m_tokens": { "input": 2, "output": 8 },
|
|
"allowed_roles": ["research"],
|
|
"max_tokens": 8700
|
|
},
|
|
{
|
|
"id": "sonar-reasoning-pro",
|
|
"swe_score": 0.211,
|
|
"cost_per_1m_tokens": { "input": 2, "output": 8 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 8700
|
|
},
|
|
{
|
|
"id": "sonar-reasoning",
|
|
"swe_score": 0.211,
|
|
"cost_per_1m_tokens": { "input": 1, "output": 5 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 8700
|
|
}
|
|
],
|
|
"xai": [
|
|
{
|
|
"id": "grok-3",
|
|
"name": "Grok 3",
|
|
"swe_score": null,
|
|
"cost_per_1m_tokens": { "input": 3, "output": 15 },
|
|
"allowed_roles": ["main", "fallback", "research"],
|
|
"max_tokens": 131072
|
|
},
|
|
{
|
|
"id": "grok-3-mini",
|
|
"name": "Grok 3 Mini",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0.3, "output": 0.5 },
|
|
"allowed_roles": ["main", "fallback", "research"],
|
|
"max_tokens": 131072
|
|
},
|
|
{
|
|
"id": "grok-3-fast",
|
|
"name": "Grok 3 Fast",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 5, "output": 25 },
|
|
"allowed_roles": ["main", "fallback", "research"],
|
|
"max_tokens": 131072
|
|
},
|
|
{
|
|
"id": "grok-3-mini-fast",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0.6, "output": 4 },
|
|
"allowed_roles": ["main", "fallback", "research"],
|
|
"max_tokens": 131072
|
|
}
|
|
],
|
|
"ollama": [
|
|
{
|
|
"id": "gemma3:27b",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "gemma3:12b",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "qwq",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "deepseek-r1",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "mistral-small3.1",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "llama3.3",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
},
|
|
{
|
|
"id": "phi4",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"]
|
|
}
|
|
],
|
|
"openrouter": [
|
|
{
|
|
"id": "google/gemini-2.0-flash-001",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0.1, "output": 0.4 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 1048576
|
|
},
|
|
{
|
|
"id": "google/gemini-2.5-pro-exp-03-25",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 1000000
|
|
},
|
|
{
|
|
"id": "deepseek/deepseek-chat-v3-0324:free",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 163840
|
|
},
|
|
{
|
|
"id": "deepseek/deepseek-chat-v3-0324",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0.27, "output": 1.1 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 64000
|
|
},
|
|
{
|
|
"id": "deepseek/deepseek-r1:free",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 163840
|
|
},
|
|
|
|
{
|
|
"id": "microsoft/mai-ds-r1:free",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 163840
|
|
},
|
|
{
|
|
"id": "google/gemini-2.5-pro-preview-03-25",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 1.25, "output": 10 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 65535
|
|
},
|
|
{
|
|
"id": "google/gemini-2.5-flash-preview",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0.15, "output": 0.6 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 65535
|
|
},
|
|
{
|
|
"id": "google/gemini-2.5-flash-preview:thinking",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0.15, "output": 3.5 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 65535
|
|
},
|
|
{
|
|
"id": "openai/o3",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 10, "output": 40 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 200000
|
|
},
|
|
{
|
|
"id": "openai/o4-mini",
|
|
"swe_score": 0.45,
|
|
"cost_per_1m_tokens": { "input": 1.1, "output": 4.4 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 100000
|
|
},
|
|
{
|
|
"id": "openai/o4-mini-high",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 1.1, "output": 4.4 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 100000
|
|
},
|
|
{
|
|
"id": "openai/o1-pro",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 150, "output": 600 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 100000
|
|
},
|
|
{
|
|
"id": "meta-llama/llama-3.3-70b-instruct",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 120, "output": 600 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 1048576
|
|
},
|
|
{
|
|
"id": "google/gemma-3-12b-it:free",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 131072
|
|
},
|
|
{
|
|
"id": "google/gemma-3-12b-it",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 50, "output": 100 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 131072
|
|
},
|
|
{
|
|
"id": "google/gemma-3-27b-it:free",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 96000
|
|
},
|
|
{
|
|
"id": "google/gemma-3-27b-it",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 100, "output": 200 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 131072
|
|
},
|
|
{
|
|
"id": "qwen/qwq-32b:free",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 40000
|
|
},
|
|
{
|
|
"id": "qwen/qwq-32b",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 150, "output": 200 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 131072
|
|
},
|
|
{
|
|
"id": "qwen/qwen-max",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 1.6, "output": 6.4 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 32768
|
|
},
|
|
{
|
|
"id": "qwen/qwen-turbo",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0.05, "output": 0.2 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 1000000
|
|
},
|
|
{
|
|
"id": "mistralai/mistral-small-3.1-24b-instruct:free",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 96000
|
|
},
|
|
{
|
|
"id": "mistralai/mistral-small-3.1-24b-instruct",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0.1, "output": 0.3 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 128000
|
|
},
|
|
{
|
|
"id": "thudm/glm-4-32b:free",
|
|
"swe_score": 0,
|
|
"cost_per_1m_tokens": { "input": 0, "output": 0 },
|
|
"allowed_roles": ["main", "fallback"],
|
|
"max_tokens": 32768
|
|
}
|
|
]
|
|
}
|