fix: improve error handling, test options, and model configuration

- Enhance error validation in parse-prd.js and update-tasks.js
- Fix bug where mcpLog was incorrectly passed as logWrapper
- Improve error messages and response formatting
- Add --skip-verification flag to E2E tests
- Update MCP server config that ships with init to match new API key structure
- Fix task force/append handling in parse-prd command
- Increase column width in update-tasks display
This commit is contained in:
Eyal Toledano
2025-05-02 23:11:39 -04:00
parent 2e17437da3
commit 25ca1a45a0
8 changed files with 248 additions and 195 deletions

View File

@@ -34,18 +34,17 @@ export async function parsePRDDirect(args, log, context = {}) {
projectRoot projectRoot
} = args; } = args;
// Create the standard logger wrapper
const logWrapper = createLogWrapper(log); const logWrapper = createLogWrapper(log);
// --- Input Validation and Path Resolution --- // --- Input Validation and Path Resolution ---
if (!projectRoot || !path.isAbsolute(projectRoot)) { if (!projectRoot) {
logWrapper.error( logWrapper.error('parsePRDDirect requires a projectRoot argument.');
'parsePRDDirect requires an absolute projectRoot argument.'
);
return { return {
success: false, success: false,
error: { error: {
code: 'MISSING_ARGUMENT', code: 'MISSING_ARGUMENT',
message: 'projectRoot is required and must be absolute.' message: 'projectRoot is required.'
} }
}; };
} }
@@ -57,7 +56,7 @@ export async function parsePRDDirect(args, log, context = {}) {
}; };
} }
// Resolve input and output paths relative to projectRoot if they aren't absolute // Resolve input and output paths relative to projectRoot
const inputPath = path.resolve(projectRoot, inputArg); const inputPath = path.resolve(projectRoot, inputArg);
const outputPath = outputArg const outputPath = outputArg
? path.resolve(projectRoot, outputArg) ? path.resolve(projectRoot, outputArg)
@@ -101,7 +100,7 @@ export async function parsePRDDirect(args, log, context = {}) {
// Ensure positive number // Ensure positive number
numTasks = getDefaultNumTasks(projectRoot); // Fallback to default if parsing fails or invalid numTasks = getDefaultNumTasks(projectRoot); // Fallback to default if parsing fails or invalid
logWrapper.warn( logWrapper.warn(
`Invalid numTasks value: ${numTasksArg}. Using default: 10` `Invalid numTasks value: ${numTasksArg}. Using default: ${numTasks}`
); );
} }
} }
@@ -132,7 +131,7 @@ export async function parsePRDDirect(args, log, context = {}) {
inputPath, inputPath,
outputPath, outputPath,
numTasks, numTasks,
{ session, mcpLog: logWrapper, projectRoot, useForce, useAppend }, { session, mcpLog, projectRoot, useForce, useAppend },
'json' 'json'
); );
@@ -147,7 +146,6 @@ export async function parsePRDDirect(args, log, context = {}) {
message: `Successfully parsed PRD and generated ${result.tasks.length} tasks.`, message: `Successfully parsed PRD and generated ${result.tasks.length} tasks.`,
outputPath: outputPath, outputPath: outputPath,
taskCount: result.tasks.length taskCount: result.tasks.length
// Optionally include tasks if needed by client: tasks: result.tasks
} }
}; };
} else { } else {

View File

@@ -1,121 +1,122 @@
/** /**
* update-tasks.js * update-tasks.js
* Direct function implementation for updating tasks based on new context/prompt * Direct function implementation for updating tasks based on new context
*/ */
import path from 'path';
import { updateTasks } from '../../../../scripts/modules/task-manager.js'; import { updateTasks } from '../../../../scripts/modules/task-manager.js';
import {
enableSilentMode,
disableSilentMode
} from '../../../../scripts/modules/utils.js';
import { createLogWrapper } from '../../tools/utils.js'; import { createLogWrapper } from '../../tools/utils.js';
/** /**
* Direct function wrapper for updating tasks based on new context/prompt. * Direct function wrapper for updating tasks based on new context.
* *
* @param {Object} args - Command arguments containing from, prompt, research and tasksJsonPath. * @param {Object} args - Command arguments containing projectRoot, from, prompt, research options.
* @param {Object} log - Logger object. * @param {Object} log - Logger object.
* @param {Object} context - Context object containing session data. * @param {Object} context - Context object containing session data.
* @returns {Promise<Object>} - Result object with success status and data/error information. * @returns {Promise<Object>} - Result object with success status and data/error information.
*/ */
export async function updateTasksDirect(args, log, context = {}) { export async function updateTasksDirect(args, log, context = {}) {
const { session } = context; // Extract session const { session } = context;
const { tasksJsonPath, from, prompt, research, projectRoot } = args; const { from, prompt, research, file: fileArg, projectRoot } = args;
// --- Input Validation (Keep existing checks) --- // Create the standard logger wrapper
if (!tasksJsonPath) { const logWrapper = createLogWrapper(log);
log.error('updateTasksDirect called without tasksJsonPath');
return { // --- Input Validation ---
success: false, if (!projectRoot) {
error: { code: 'MISSING_ARGUMENT', message: 'tasksJsonPath is required' }, logWrapper.error('updateTasksDirect requires a projectRoot argument.');
fromCache: false
};
}
if (args.id !== undefined && from === undefined) {
// Keep 'from' vs 'id' check
const errorMessage =
"Use 'from' parameter, not 'id', or use 'update_task' tool.";
log.error(errorMessage);
return {
success: false,
error: { code: 'PARAMETER_MISMATCH', message: errorMessage },
fromCache: false
};
}
if (!from) {
log.error('Missing from ID.');
return {
success: false,
error: { code: 'MISSING_FROM_ID', message: 'No from ID specified.' },
fromCache: false
};
}
if (!prompt) {
log.error('Missing prompt.');
return {
success: false,
error: { code: 'MISSING_PROMPT', message: 'No prompt specified.' },
fromCache: false
};
}
let fromId;
try {
fromId = parseInt(from, 10);
if (isNaN(fromId) || fromId <= 0) throw new Error();
} catch {
log.error(`Invalid from ID: ${from}`);
return { return {
success: false, success: false,
error: { error: {
code: 'INVALID_FROM_ID', code: 'MISSING_ARGUMENT',
message: `Invalid from ID: ${from}. Must be a positive integer.` message: 'projectRoot is required.'
}, }
fromCache: false
}; };
} }
const useResearch = research === true;
// --- End Input Validation ---
log.info( if (!from) {
`Updating tasks from ID ${fromId}. Research: ${useResearch}. Project Root: ${projectRoot}` logWrapper.error('updateTasksDirect called without from ID');
return {
success: false,
error: {
code: 'MISSING_ARGUMENT',
message: 'Starting task ID (from) is required'
}
};
}
if (!prompt) {
logWrapper.error('updateTasksDirect called without prompt');
return {
success: false,
error: {
code: 'MISSING_ARGUMENT',
message: 'Update prompt is required'
}
};
}
// Resolve tasks file path
const tasksFile = fileArg
? path.resolve(projectRoot, fileArg)
: path.resolve(projectRoot, 'tasks', 'tasks.json');
logWrapper.info(
`Updating tasks via direct function. From: ${from}, Research: ${research}, File: ${tasksFile}, ProjectRoot: ${projectRoot}`
); );
enableSilentMode(); // Enable silent mode enableSilentMode(); // Enable silent mode
try { try {
// Create logger wrapper using the utility // Call the core updateTasks function
const mcpLog = createLogWrapper(log); const result = await updateTasks(
tasksFile,
// Execute core updateTasks function, passing session context AND projectRoot from,
await updateTasks(
tasksJsonPath,
fromId,
prompt, prompt,
useResearch, research,
// Pass context with logger wrapper, session, AND projectRoot {
{ mcpLog, session, projectRoot }, session,
'json' // Explicitly request JSON format for MCP mcpLog: logWrapper,
projectRoot
},
'json'
); );
// Since updateTasks modifies file and doesn't return data, create success message // updateTasks returns { success: true, updatedTasks: [...] } on success
return { if (result && result.success && Array.isArray(result.updatedTasks)) {
success: true, logWrapper.success(
data: { `Successfully updated ${result.updatedTasks.length} tasks.`
message: `Successfully initiated update for tasks from ID ${fromId} based on the prompt.`, );
fromId, return {
tasksPath: tasksJsonPath, success: true,
useResearch data: {
}, message: `Successfully updated ${result.updatedTasks.length} tasks.`,
fromCache: false // Modifies state tasksFile,
}; updatedCount: result.updatedTasks.length
}
};
} else {
// Handle case where core function didn't return expected success structure
logWrapper.error(
'Core updateTasks function did not return a successful structure.'
);
return {
success: false,
error: {
code: 'CORE_FUNCTION_ERROR',
message:
result?.message ||
'Core function failed to update tasks or returned unexpected result.'
}
};
}
} catch (error) { } catch (error) {
log.error(`Error executing core updateTasks: ${error.message}`); logWrapper.error(`Error executing core updateTasks: ${error.message}`);
return { return {
success: false, success: false,
error: { error: {
code: 'UPDATE_TASKS_CORE_ERROR', code: 'UPDATE_TASKS_CORE_ERROR',
message: error.message || 'Unknown error updating tasks' message: error.message || 'Unknown error updating tasks'
}, }
fromCache: false
}; };
} finally { } finally {
disableSilentMode(); // Ensure silent mode is disabled disableSilentMode(); // Ensure silent mode is disabled

View File

@@ -761,21 +761,22 @@ function setupMCPConfiguration(targetDir) {
const newMCPServer = { const newMCPServer = {
'task-master-ai': { 'task-master-ai': {
command: 'npx', command: 'npx',
args: ['-y', 'task-master-mcp'], args: ['-y', '--package=task-master-ai', 'task-master-ai'],
env: { env: {
ANTHROPIC_API_KEY: 'YOUR_ANTHROPIC_API_KEY', ANTHROPIC_API_KEY: 'ANTHROPIC_API_KEY_HERE',
PERPLEXITY_API_KEY: 'YOUR_PERPLEXITY_API_KEY', PERPLEXITY_API_KEY: 'PERPLEXITY_API_KEY_HERE',
MODEL: 'claude-3-7-sonnet-20250219', OPENAI_API_KEY: 'OPENAI_API_KEY_HERE',
PERPLEXITY_MODEL: 'sonar-pro', GOOGLE_API_KEY: 'GOOGLE_API_KEY_HERE',
MAX_TOKENS: '64000', XAI_API_KEY: 'XAI_API_KEY_HERE',
TEMPERATURE: '0.2', OPENROUTER_API_KEY: 'OPENROUTER_API_KEY_HERE',
DEFAULT_SUBTASKS: '5', MISTRAL_API_KEY: 'MISTRAL_API_KEY_HERE',
DEFAULT_PRIORITY: 'medium' AZURE_OPENAI_API_KEY: 'AZURE_OPENAI_API_KEY_HERE',
OLLAMA_API_KEY: 'OLLAMA_API_KEY_HERE'
} }
} }
}; };
// Check if mcp.json already exists // Check if mcp.json already existsimage.png
if (fs.existsSync(mcpJsonPath)) { if (fs.existsSync(mcpJsonPath)) {
log( log(
'info', 'info',

View File

@@ -514,15 +514,19 @@ function registerCommands(programInstance) {
const outputPath = options.output; const outputPath = options.output;
const force = options.force || false; const force = options.force || false;
const append = options.append || false; const append = options.append || false;
let useForce = false;
let useAppend = false;
// Helper function to check if tasks.json exists and confirm overwrite // Helper function to check if tasks.json exists and confirm overwrite
async function confirmOverwriteIfNeeded() { async function confirmOverwriteIfNeeded() {
if (fs.existsSync(outputPath) && !force && !append) { if (fs.existsSync(outputPath) && !force && !append) {
const shouldContinue = await confirmTaskOverwrite(outputPath); const overwrite = await confirmTaskOverwrite(outputPath); // Calls inquirer prompt
if (!shouldContinue) { if (!overwrite) {
console.log(chalk.yellow('Operation cancelled by user.')); log('info', 'Operation cancelled.');
return false; return false; // Exit if user selects 'N'
} }
// If user confirms 'y', we should set useForce = true for the parsePRD call
useForce = true;
} }
return true; return true;
} }
@@ -536,7 +540,10 @@ function registerCommands(programInstance) {
if (!(await confirmOverwriteIfNeeded())) return; if (!(await confirmOverwriteIfNeeded())) return;
console.log(chalk.blue(`Generating ${numTasks} tasks...`)); console.log(chalk.blue(`Generating ${numTasks} tasks...`));
await parsePRD(defaultPrdPath, outputPath, numTasks, { append }); await parsePRD(defaultPrdPath, outputPath, numTasks, {
useAppend,
useForce
});
return; return;
} }

View File

@@ -275,7 +275,7 @@ async function updateTasks(
chalk.cyan.bold('Title'), chalk.cyan.bold('Title'),
chalk.cyan.bold('Status') chalk.cyan.bold('Status')
], ],
colWidths: [5, 60, 10] colWidths: [5, 70, 20]
}); });
tasksToUpdate.forEach((task) => { tasksToUpdate.forEach((task) => {

View File

@@ -1,3 +0,0 @@
Task Master PRD
Create a CLI tool for task management

View File

@@ -5,6 +5,47 @@ set -u
# Prevent errors in pipelines from being masked. # Prevent errors in pipelines from being masked.
set -o pipefail set -o pipefail
# --- Default Settings ---
run_verification_test=true
# --- Argument Parsing ---
# Simple loop to check for the skip flag
# Note: This needs to happen *before* the main block piped to tee
# if we want the decision logged early. Or handle args inside.
# Let's handle it before for clarity.
processed_args=()
while [[ $# -gt 0 ]]; do
case "$1" in
--skip-verification)
run_verification_test=false
echo "[INFO] Argument '--skip-verification' detected. Fallback verification will be skipped."
shift # Consume the flag
;;
--analyze-log)
# Keep the analyze-log flag handling separate for now
# It exits early, so doesn't conflict with the main run flags
processed_args+=("$1")
if [[ $# -gt 1 ]]; then
processed_args+=("$2")
shift 2
else
shift 1
fi
;;
*)
# Unknown argument, pass it along or handle error
# For now, just pass it along in case --analyze-log needs it later
processed_args+=("$1")
shift
;;
esac
done
# Restore processed arguments ONLY if the array is not empty
if [ ${#processed_args[@]} -gt 0 ]; then
set -- "${processed_args[@]}"
fi
# --- Configuration --- # --- Configuration ---
# Assumes script is run from the project root (claude-task-master) # Assumes script is run from the project root (claude-task-master)
TASKMASTER_SOURCE_DIR="." # Current directory is the source TASKMASTER_SOURCE_DIR="." # Current directory is the source
@@ -24,7 +65,7 @@ source "$TASKMASTER_SOURCE_DIR/tests/e2e/e2e_helpers.sh"
export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log
# --- Argument Parsing for Analysis-Only Mode --- # --- Argument Parsing for Analysis-Only Mode ---
# Check if the first argument is --analyze-log # This remains the same, as it exits early if matched
if [ "$#" -ge 1 ] && [ "$1" == "--analyze-log" ]; then if [ "$#" -ge 1 ] && [ "$1" == "--analyze-log" ]; then
LOG_TO_ANALYZE="" LOG_TO_ANALYZE=""
# Check if a log file path was provided as the second argument # Check if a log file path was provided as the second argument
@@ -171,6 +212,13 @@ log_step() {
# called *inside* this block depend on it. If not, it can be removed. # called *inside* this block depend on it. If not, it can be removed.
start_time_for_helpers=$(date +%s) # Keep if needed by helpers called inside this block start_time_for_helpers=$(date +%s) # Keep if needed by helpers called inside this block
# Log the verification decision
if [ "$run_verification_test" = true ]; then
log_info "Fallback verification test will be run as part of this E2E test."
else
log_info "Fallback verification test will be SKIPPED (--skip-verification flag detected)."
fi
# --- Dependency Checks --- # --- Dependency Checks ---
log_step "Checking for dependencies (jq)" log_step "Checking for dependencies (jq)"
if ! command -v jq &> /dev/null; then if ! command -v jq &> /dev/null; then
@@ -305,29 +353,33 @@ log_step() {
# === End Model Commands Test === # === End Model Commands Test ===
# === Fallback Model generateObjectService Verification === # === Fallback Model generateObjectService Verification ===
log_step "Starting Fallback Model (generateObjectService) Verification (Calls separate script)" if [ "$run_verification_test" = true ]; then
verification_script_path="$ORIGINAL_DIR/tests/e2e/run_fallback_verification.sh" log_step "Starting Fallback Model (generateObjectService) Verification (Calls separate script)"
verification_script_path="$ORIGINAL_DIR/tests/e2e/run_fallback_verification.sh"
if [ -x "$verification_script_path" ]; then if [ -x "$verification_script_path" ]; then
log_info "--- Executing Fallback Verification Script: $verification_script_path ---" log_info "--- Executing Fallback Verification Script: $verification_script_path ---"
# Execute the script directly, allowing output to flow to tee # Execute the script directly, allowing output to flow to tee
# Pass the current directory (the test run dir) as the argument # Pass the current directory (the test run dir) as the argument
"$verification_script_path" "$(pwd)" "$verification_script_path" "$(pwd)"
verification_exit_code=$? # Capture exit code immediately verification_exit_code=$? # Capture exit code immediately
log_info "--- Finished Fallback Verification Script Execution (Exit Code: $verification_exit_code) ---" log_info "--- Finished Fallback Verification Script Execution (Exit Code: $verification_exit_code) ---"
# Log success/failure based on captured exit code # Log success/failure based on captured exit code
if [ $verification_exit_code -eq 0 ]; then if [ $verification_exit_code -eq 0 ]; then
log_success "Fallback verification script reported success." log_success "Fallback verification script reported success."
else else
log_error "Fallback verification script reported FAILURE (Exit Code: $verification_exit_code)." log_error "Fallback verification script reported FAILURE (Exit Code: $verification_exit_code)."
# Decide whether to exit the main script or just log the error # Decide whether to exit the main script or just log the error
# exit 1 # Uncomment to make verification failure fatal # exit 1 # Uncomment to make verification failure fatal
fi fi
else
log_error "Fallback verification script not found or not executable at $verification_script_path. Skipping verification."
# Decide whether to exit or continue
# exit 1
fi
else else
log_error "Fallback verification script not found or not executable at $verification_script_path. Skipping verification." log_info "Skipping Fallback Verification test as requested by flag."
# Decide whether to exit or continue
# exit 1
fi fi
# === END Verification Section === # === END Verification Section ===

View File

@@ -57,24 +57,19 @@ log_step() {
# --- Signal Handling --- # --- Signal Handling ---
# Global variable to hold child PID # Global variable to hold child PID
child_pid=0 child_pid=0
# Keep track of the summary file for cleanup # Use a persistent log file name
verification_summary_file="fallback_verification_summary.log" # Temp file in cwd PROGRESS_LOG_FILE="fallback_verification_progress.log"
cleanup() { cleanup() {
echo "" # Newline after ^C echo "" # Newline after ^C
log_error "Interrupt received. Cleaning up..." log_error "Interrupt received. Cleaning up any running child process..."
if [ "$child_pid" -ne 0 ]; then if [ "$child_pid" -ne 0 ]; then
log_info "Killing child process (PID: $child_pid) and its group..." log_info "Killing child process (PID: $child_pid) and its group..."
# Kill the process group (timeout and task-master) - TERM first, then KILL
kill -TERM -- "-$child_pid" 2>/dev/null || kill -KILL -- "-$child_pid" 2>/dev/null kill -TERM -- "-$child_pid" 2>/dev/null || kill -KILL -- "-$child_pid" 2>/dev/null
child_pid=0 # Reset pid after attempting kill child_pid=0
fi fi
# Clean up temporary file if it exists # DO NOT delete the progress log file on interrupt
if [ -f "$verification_summary_file" ]; then log_info "Progress saved in: $PROGRESS_LOG_FILE"
log_info "Removing temporary summary file: $verification_summary_file"
rm -f "$verification_summary_file"
fi
# Ensure script exits after cleanup
exit 130 # Exit with code indicating interrupt exit 130 # Exit with code indicating interrupt
} }
@@ -126,13 +121,10 @@ fi
echo "[INFO] Now operating inside: $(pwd)" echo "[INFO] Now operating inside: $(pwd)"
# --- Now we are inside the target run directory --- # --- Now we are inside the target run directory ---
# Define overall_start_time and test_step_count *after* changing dir
overall_start_time=$(date +%s) overall_start_time=$(date +%s)
test_step_count=0 # Local step counter for this script test_step_count=0
# Log that helpers were sourced (now that functions are available)
# No longer sourcing, just log start
log_info "Starting fallback verification script execution in $(pwd)" log_info "Starting fallback verification script execution in $(pwd)"
log_info "Progress will be logged to: $(pwd)/$PROGRESS_LOG_FILE"
# --- Dependency Checks --- # --- Dependency Checks ---
log_step "Checking for dependencies (jq) in verification script" log_step "Checking for dependencies (jq) in verification script"
@@ -143,9 +135,9 @@ fi
log_success "Dependency 'jq' found." log_success "Dependency 'jq' found."
# --- Verification Logic --- # --- Verification Logic ---
log_step "Starting Fallback Model (generateObjectService) Verification" log_step "Starting/Resuming Fallback Model (generateObjectService) Verification"
# Initialise summary file (path defined earlier) # Ensure progress log exists, create if not
echo "--- Fallback Verification Summary ---" > "$verification_summary_file" touch "$PROGRESS_LOG_FILE"
# Ensure the supported models file exists (using absolute path) # Ensure the supported models file exists (using absolute path)
if [ ! -f "$SUPPORTED_MODELS_FILE" ]; then if [ ! -f "$SUPPORTED_MODELS_FILE" ]; then
@@ -166,36 +158,41 @@ if ! jq -e '.tasks[] | select(.id == 1) | .subtasks[] | select(.id == 1)' tasks/
fi fi
log_info "Subtask 1.1 found in $(pwd)/tasks/tasks.json, proceeding with verification." log_info "Subtask 1.1 found in $(pwd)/tasks/tasks.json, proceeding with verification."
# Read providers and models using jq (using absolute path to models file) # Read providers and models using jq
jq -c 'to_entries[] | .key as $provider | .value[] | select(.allowed_roles[]? == "fallback") | {provider: $provider, id: .id}' "$SUPPORTED_MODELS_FILE" | while IFS= read -r model_info; do jq -c 'to_entries[] | .key as $provider | .value[] | select(.allowed_roles[]? == "fallback") | {provider: $provider, id: .id}' "$SUPPORTED_MODELS_FILE" | while IFS= read -r model_info; do
provider=$(echo "$model_info" | jq -r '.provider') provider=$(echo "$model_info" | jq -r '.provider')
model_id=$(echo "$model_info" | jq -r '.id') model_id=$(echo "$model_info" | jq -r '.id')
flag="" # Default flag flag="" # Default flag
# Check if already tested
# Use grep -Fq for fixed string and quiet mode
if grep -Fq "${provider},${model_id}," "$PROGRESS_LOG_FILE"; then
log_info "--- Skipping: $provider / $model_id (already tested, result in $PROGRESS_LOG_FILE) ---"
continue
fi
log_info "--- Verifying: $provider / $model_id ---"
# Determine provider flag # Determine provider flag
if [ "$provider" == "openrouter" ]; then if [ "$provider" == "openrouter" ]; then
flag="--openrouter" flag="--openrouter"
elif [ "$provider" == "ollama" ]; then elif [ "$provider" == "ollama" ]; then
flag="--ollama" flag="--ollama"
# Add elif for other providers requiring flags
fi fi
log_info "--- Verifying: $provider / $model_id ---"
# 1. Set the main model # 1. Set the main model
# Ensure task-master command is available (might need linking if run totally standalone)
if ! command -v task-master &> /dev/null; then if ! command -v task-master &> /dev/null; then
log_error "task-master command not found. Ensure it's linked globally or available in PATH." log_error "task-master command not found."
# Attempt to link if possible? Risky. Better to instruct user.
echo "[INSTRUCTION] Please run 'npm link task-master-ai' in the project root first." echo "[INSTRUCTION] Please run 'npm link task-master-ai' in the project root first."
exit 1 exit 1
fi fi
log_info "Setting main model to $model_id ${flag:+using flag $flag}..." log_info "Setting main model to $model_id ${flag:+using flag $flag}..."
set_model_cmd="task-master models --set-main \"$model_id\" $flag" set_model_cmd="task-master models --set-main \"$model_id\" $flag"
if ! eval $set_model_cmd > /dev/null 2>&1; then # Hide verbose output of models cmd model_set_status="SUCCESS"
log_error "Failed to set main model for $provider / $model_id. Skipping." if ! eval $set_model_cmd > /dev/null 2>&1; then
echo "$provider,$model_id,SET_MODEL_FAILED" >> "$verification_summary_file" log_error "Failed to set main model for $provider / $model_id. Skipping test."
continue echo "$provider,$model_id,SET_MODEL_FAILED" >> "$PROGRESS_LOG_FILE"
continue # Skip the actual test if setting fails
fi fi
log_info "Set main model ok." log_info "Set main model ok."
@@ -203,69 +200,69 @@ jq -c 'to_entries[] | .key as $provider | .value[] | select(.allowed_roles[]? ==
log_info "Running update-subtask --id=1.1 --prompt='Test generateObjectService' (timeout 120s)" log_info "Running update-subtask --id=1.1 --prompt='Test generateObjectService' (timeout 120s)"
update_subtask_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log" update_subtask_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log"
# Run timeout command in the background
timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." > "$update_subtask_output_file" 2>&1 & timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." > "$update_subtask_output_file" 2>&1 &
child_pid=$! # Store the PID of the background process (timeout) child_pid=$!
# Wait specifically for the child process PID
wait "$child_pid" wait "$child_pid"
update_subtask_exit_code=$? update_subtask_exit_code=$?
child_pid=0 # Reset child_pid after it finishes or is killed/interrupted child_pid=0
# 3. Check for success # 3. Check result and log persistently
# SIGINT = 130 (128 + 2), SIGTERM = 143 (128 + 15) result_status=""
# Check exit code AND grep for the success message in the output file
if [ $update_subtask_exit_code -eq 0 ] && grep -q "Successfully updated subtask #1.1" "$update_subtask_output_file"; then if [ $update_subtask_exit_code -eq 0 ] && grep -q "Successfully updated subtask #1.1" "$update_subtask_output_file"; then
# Success (Exit code 0 AND success message found)
log_success "update-subtask succeeded for $provider / $model_id (Verified Output)." log_success "update-subtask succeeded for $provider / $model_id (Verified Output)."
echo "$provider,$model_id,SUCCESS" >> "$verification_summary_file" result_status="SUCCESS"
elif [ $update_subtask_exit_code -eq 124 ]; then elif [ $update_subtask_exit_code -eq 124 ]; then
# Timeout log_error "update-subtask TIMED OUT for $provider / $model_id. Check $update_subtask_output_file."
log_error "update-subtask TIMED OUT for $provider / $model_id. Check $update_subtask_output_file." result_status="FAILED_TIMEOUT"
echo "$provider,$model_id,FAILED_TIMEOUT" >> "$verification_summary_file"
elif [ $update_subtask_exit_code -eq 130 ] || [ $update_subtask_exit_code -eq 143 ]; then elif [ $update_subtask_exit_code -eq 130 ] || [ $update_subtask_exit_code -eq 143 ]; then
# Interrupted by trap
log_error "update-subtask INTERRUPTED for $provider / $model_id." log_error "update-subtask INTERRUPTED for $provider / $model_id."
# Trap handler already exited the script. No need to write to summary. result_status="INTERRUPTED" # Record interruption
# If we reach here unexpectedly, something is wrong with the trap. # Don't exit the loop, allow script to finish or be interrupted again
else # Covers non-zero exit code OR zero exit code but missing success message else
# Other failure
log_error "update-subtask FAILED for $provider / $model_id (Exit Code: $update_subtask_exit_code). Check $update_subtask_output_file." log_error "update-subtask FAILED for $provider / $model_id (Exit Code: $update_subtask_exit_code). Check $update_subtask_output_file."
echo "$provider,$model_id,FAILED" >> "$verification_summary_file" result_status="FAILED"
fi fi
# Append result to the persistent log file
echo "$provider,$model_id,$result_status" >> "$PROGRESS_LOG_FILE"
done # End of fallback verification loop done # End of fallback verification loop
# --- Generate Final Verification Report to STDOUT --- # --- Generate Final Verification Report to STDOUT ---
# Report reads from the persistent PROGRESS_LOG_FILE
echo "" echo ""
echo "--- Fallback Model Verification Report (via $0) ---" echo "--- Fallback Model Verification Report (via $0) ---"
echo "Executed inside run directory: $(pwd)" echo "Executed inside run directory: $(pwd)"
echo "Progress log: $(pwd)/$PROGRESS_LOG_FILE"
echo "" echo ""
echo "Test Command: task-master update-subtask --id=1.1 --prompt=\"...\" (tests generateObjectService)" echo "Test Command: task-master update-subtask --id=1.1 --prompt=\"...\" (tests generateObjectService)"
echo "Models were tested by setting them as the 'main' model temporarily." echo "Models were tested by setting them as the 'main' model temporarily."
echo "Results based on exit code of the test command:" echo "Results based on exit code and output verification:"
echo "" echo ""
echo "Models CONFIRMED to support generateObjectService (Keep 'fallback' role):" echo "Models CONFIRMED to support generateObjectService (Keep 'fallback' role):"
awk -F',' '$3 == "SUCCESS" { print "- " $1 " / " $2 }' "$verification_summary_file" | sort awk -F',' '$3 == "SUCCESS" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
echo "" echo ""
echo "Models FAILED generateObjectService test (Suggest REMOVING 'fallback' role from supported-models.json):" echo "Models FAILED generateObjectService test (Suggest REMOVING 'fallback' role):"
awk -F',' '$3 == "FAILED" { print "- " $1 " / " $2 }' "$verification_summary_file" | sort awk -F',' '$3 == "FAILED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
echo "" echo ""
echo "Models TIMED OUT during generateObjectService test (Likely Failure - Suggest REMOVING 'fallback' role):" echo "Models TIMED OUT during test (Suggest REMOVING 'fallback' role):"
awk -F',' '$3 == "FAILED_TIMEOUT" { print "- " $1 " / " $2 }' "$verification_summary_file" | sort awk -F',' '$3 == "FAILED_TIMEOUT" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
echo "" echo ""
echo "Models where setting the model failed (Inconclusive - investigate separately):" echo "Models where setting the model failed (Inconclusive):"
awk -F',' '$3 == "SET_MODEL_FAILED" { print "- " $1 " / " $2 }' "$verification_summary_file" | sort awk -F',' '$3 == "SET_MODEL_FAILED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
echo ""
echo "Models INTERRUPTED during test (Inconclusive - Rerun):"
awk -F',' '$3 == "INTERRUPTED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
echo "" echo ""
echo "-------------------------------------------------------" echo "-------------------------------------------------------"
echo "" echo ""
# Clean up temporary summary file # Don't clean up the progress log
if [ -f "$verification_summary_file" ]; then # if [ -f "$PROGRESS_LOG_FILE" ]; then
rm "$verification_summary_file" # rm "$PROGRESS_LOG_FILE"
fi # fi
log_step "Finished Fallback Model (generateObjectService) Verification Script" log_info "Finished Fallback Model (generateObjectService) Verification Script"
# Remove trap before exiting normally # Remove trap before exiting normally
trap - INT TERM trap - INT TERM