refactor: Improve update-subtask, consolidate utils, update config
This commit introduces several improvements and refactorings across MCP tools, core logic, and configuration.
**Major Changes:**
1. **Refactor updateSubtaskById:**
- Switched from generateTextService to generateObjectService for structured AI responses, using a Zod schema (subtaskSchema) for validation.
- Revised prompts to have the AI generate relevant content based on user request and context (parent/sibling tasks), while explicitly preventing AI from handling timestamp/tag formatting.
- Implemented **local timestamp generation (new Date().toISOString()) and formatting** (using <info added on ...> tags) within the function *after* receiving the AI response. This ensures reliable and correctly formatted details are appended.
- Corrected logic to append only the locally formatted, AI-generated content block to the existing subtask.details.
2. **Consolidate MCP Utilities:**
- Moved/consolidated the withNormalizedProjectRoot HOF into mcp-server/src/tools/utils.js.
- Updated MCP tools (like update-subtask.js) to import withNormalizedProjectRoot from the new location.
3. **Refactor Project Initialization:**
- Deleted the redundant mcp-server/src/core/direct-functions/initialize-project-direct.js file.
- Updated mcp-server/src/core/task-master-core.js to import initializeProjectDirect from its correct location (./direct-functions/initialize-project.js).
**Other Changes:**
- Updated .taskmasterconfig fallback model to claude-3-7-sonnet-20250219.
- Clarified model cost representation in the models tool description (taskmaster.mdc and mcp-server/src/tools/models.js).
This commit is contained in:
@@ -20,6 +20,8 @@ MAIN_ENV_FILE="$TASKMASTER_SOURCE_DIR/.env"
|
||||
|
||||
# <<< Source the helper script >>>
|
||||
source "$TASKMASTER_SOURCE_DIR/tests/e2e/e2e_helpers.sh"
|
||||
# <<< Export helper functions for subshells >>>
|
||||
export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log
|
||||
|
||||
# --- Argument Parsing for Analysis-Only Mode ---
|
||||
# Check if the first argument is --analyze-log
|
||||
@@ -50,7 +52,7 @@ if [ "$#" -ge 1 ] && [ "$1" == "--analyze-log" ]; then
|
||||
fi
|
||||
echo "[INFO] Running in analysis-only mode for log: $LOG_TO_ANALYZE"
|
||||
|
||||
# --- Derive TEST_RUN_DIR from log file path ---
|
||||
# --- Derive TEST_RUN_DIR from log file path ---
|
||||
# Extract timestamp like YYYYMMDD_HHMMSS from e2e_run_YYYYMMDD_HHMMSS.log
|
||||
log_basename=$(basename "$LOG_TO_ANALYZE")
|
||||
# Ensure the sed command matches the .log suffix correctly
|
||||
@@ -74,7 +76,7 @@ if [ "$#" -ge 1 ] && [ "$1" == "--analyze-log" ]; then
|
||||
|
||||
# Save original dir before changing
|
||||
ORIGINAL_DIR=$(pwd)
|
||||
|
||||
|
||||
echo "[INFO] Changing directory to $EXPECTED_RUN_DIR_ABS for analysis context..."
|
||||
cd "$EXPECTED_RUN_DIR_ABS"
|
||||
|
||||
@@ -169,6 +171,14 @@ log_step() {
|
||||
# called *inside* this block depend on it. If not, it can be removed.
|
||||
start_time_for_helpers=$(date +%s) # Keep if needed by helpers called inside this block
|
||||
|
||||
# --- Dependency Checks ---
|
||||
log_step "Checking for dependencies (jq)"
|
||||
if ! command -v jq &> /dev/null; then
|
||||
log_error "Dependency 'jq' is not installed or not found in PATH. Please install jq (e.g., 'brew install jq' or 'sudo apt-get install jq')."
|
||||
exit 1
|
||||
fi
|
||||
log_success "Dependency 'jq' found."
|
||||
|
||||
# --- Test Setup (Output to tee) ---
|
||||
log_step "Setting up test environment"
|
||||
|
||||
@@ -241,11 +251,7 @@ log_step() {
|
||||
fi
|
||||
log_success "PRD parsed successfully."
|
||||
|
||||
log_step "Listing tasks"
|
||||
task-master list > task_list_output.log
|
||||
log_success "Task list saved to task_list_output.log"
|
||||
|
||||
log_step "Analyzing complexity"
|
||||
log_step "Expanding Task 1 (to ensure subtask 1.1 exists)"
|
||||
# Add --research flag if needed and API keys support it
|
||||
task-master analyze-complexity --research --output complexity_results.json
|
||||
if [ ! -f "complexity_results.json" ]; then
|
||||
@@ -298,7 +304,35 @@ log_step() {
|
||||
|
||||
# === End Model Commands Test ===
|
||||
|
||||
# === Multi-Provider Add-Task Test ===
|
||||
# === Fallback Model generateObjectService Verification ===
|
||||
log_step "Starting Fallback Model (generateObjectService) Verification (Calls separate script)"
|
||||
verification_script_path="$ORIGINAL_DIR/tests/e2e/run_fallback_verification.sh"
|
||||
|
||||
if [ -x "$verification_script_path" ]; then
|
||||
log_info "--- Executing Fallback Verification Script: $verification_script_path ---"
|
||||
# Execute the script directly, allowing output to flow to tee
|
||||
# Pass the current directory (the test run dir) as the argument
|
||||
"$verification_script_path" "$(pwd)"
|
||||
verification_exit_code=$? # Capture exit code immediately
|
||||
log_info "--- Finished Fallback Verification Script Execution (Exit Code: $verification_exit_code) ---"
|
||||
|
||||
# Log success/failure based on captured exit code
|
||||
if [ $verification_exit_code -eq 0 ]; then
|
||||
log_success "Fallback verification script reported success."
|
||||
else
|
||||
log_error "Fallback verification script reported FAILURE (Exit Code: $verification_exit_code)."
|
||||
# Decide whether to exit the main script or just log the error
|
||||
# exit 1 # Uncomment to make verification failure fatal
|
||||
fi
|
||||
else
|
||||
log_error "Fallback verification script not found or not executable at $verification_script_path. Skipping verification."
|
||||
# Decide whether to exit or continue
|
||||
# exit 1
|
||||
fi
|
||||
# === END Verification Section ===
|
||||
|
||||
|
||||
# === Multi-Provider Add-Task Test (Keep as is) ===
|
||||
log_step "Starting Multi-Provider Add-Task Test Sequence"
|
||||
|
||||
# Define providers, models, and flags
|
||||
@@ -308,9 +342,9 @@ log_step() {
|
||||
"claude-3-7-sonnet-20250219"
|
||||
"gpt-4o"
|
||||
"gemini-2.5-pro-exp-03-25"
|
||||
"sonar-pro"
|
||||
"sonar-pro" # Note: This is research-only, add-task might fail if not using research model
|
||||
"grok-3"
|
||||
"anthropic/claude-3.7-sonnet" # OpenRouter uses Claude 3.7
|
||||
"anthropic/claude-3.7-sonnet" # OpenRouter uses Claude 3.7
|
||||
)
|
||||
# Flags: Add provider-specific flags here, e.g., --openrouter. Use empty string if none.
|
||||
declare -a flags=("" "" "" "" "" "--openrouter")
|
||||
@@ -318,6 +352,7 @@ log_step() {
|
||||
# Consistent prompt for all providers
|
||||
add_task_prompt="Create a task to implement user authentication using OAuth 2.0 with Google as the provider. Include steps for registering the app, handling the callback, and storing user sessions."
|
||||
log_info "Using consistent prompt for add-task tests: \"$add_task_prompt\""
|
||||
echo "--- Multi-Provider Add Task Summary ---" > provider_add_task_summary.log # Initialize summary log
|
||||
|
||||
for i in "${!providers[@]}"; do
|
||||
provider="${providers[$i]}"
|
||||
@@ -341,7 +376,7 @@ log_step() {
|
||||
|
||||
# 2. Run add-task
|
||||
log_info "Running add-task with prompt..."
|
||||
add_task_output_file="add_task_raw_output_${provider}.log"
|
||||
add_task_output_file="add_task_raw_output_${provider}_${model//\//_}.log" # Sanitize ID
|
||||
# Run add-task and capture ALL output (stdout & stderr) to a file AND a variable
|
||||
add_task_cmd_output=$(task-master add-task --prompt "$add_task_prompt" 2>&1 | tee "$add_task_output_file")
|
||||
add_task_exit_code=${PIPESTATUS[0]}
|
||||
@@ -388,29 +423,30 @@ log_step() {
|
||||
echo "Provider add-task summary log available at: provider_add_task_summary.log"
|
||||
# === End Multi-Provider Add-Task Test ===
|
||||
|
||||
log_step "Listing tasks again (final)"
|
||||
task-master list --with-subtasks > task_list_final.log
|
||||
log_success "Final task list saved to task_list_final.log"
|
||||
log_step "Listing tasks again (after multi-add)"
|
||||
task-master list --with-subtasks > task_list_after_multi_add.log
|
||||
log_success "Task list after multi-add saved to task_list_after_multi_add.log"
|
||||
|
||||
# === Test Core Task Commands ===
|
||||
log_step "Listing tasks (initial)"
|
||||
task-master list > task_list_initial.log
|
||||
log_success "Initial task list saved to task_list_initial.log"
|
||||
|
||||
# === Resume Core Task Commands Test ===
|
||||
log_step "Listing tasks (for core tests)"
|
||||
task-master list > task_list_core_test_start.log
|
||||
log_success "Core test initial task list saved."
|
||||
|
||||
log_step "Getting next task"
|
||||
task-master next > next_task_initial.log
|
||||
log_success "Initial next task saved to next_task_initial.log"
|
||||
task-master next > next_task_core_test.log
|
||||
log_success "Core test next task saved."
|
||||
|
||||
log_step "Showing Task 1 details"
|
||||
task-master show 1 > task_1_details.log
|
||||
log_success "Task 1 details saved to task_1_details.log"
|
||||
task-master show 1 > task_1_details_core_test.log
|
||||
log_success "Task 1 details saved."
|
||||
|
||||
log_step "Adding dependency (Task 2 depends on Task 1)"
|
||||
task-master add-dependency --id=2 --depends-on=1
|
||||
log_success "Added dependency 2->1."
|
||||
|
||||
log_step "Validating dependencies (after add)"
|
||||
task-master validate-dependencies > validate_dependencies_after_add.log
|
||||
task-master validate-dependencies > validate_dependencies_after_add_core.log
|
||||
log_success "Dependency validation after add saved."
|
||||
|
||||
log_step "Removing dependency (Task 2 depends on Task 1)"
|
||||
@@ -418,7 +454,7 @@ log_step() {
|
||||
log_success "Removed dependency 2->1."
|
||||
|
||||
log_step "Fixing dependencies (should be no-op now)"
|
||||
task-master fix-dependencies > fix_dependencies_output.log
|
||||
task-master fix-dependencies > fix_dependencies_output_core.log
|
||||
log_success "Fix dependencies attempted."
|
||||
|
||||
# === Start New Test Section: Validate/Fix Bad Dependencies ===
|
||||
@@ -483,15 +519,20 @@ log_step() {
|
||||
|
||||
# === End New Test Section ===
|
||||
|
||||
log_step "Adding Task 11 (Manual)"
|
||||
task-master add-task --title="Manual E2E Task" --description="Add basic health check endpoint" --priority=low --dependencies=3 # Depends on backend setup
|
||||
# Assuming the new task gets ID 11 (adjust if PRD parsing changes)
|
||||
log_success "Added Task 11 manually."
|
||||
# Find the next available task ID dynamically instead of hardcoding 11, 12
|
||||
# Assuming tasks are added sequentially and we didn't remove any core tasks yet
|
||||
last_task_id=$(jq '[.tasks[].id] | max' tasks/tasks.json)
|
||||
manual_task_id=$((last_task_id + 1))
|
||||
ai_task_id=$((manual_task_id + 1))
|
||||
|
||||
log_step "Adding Task 12 (AI)"
|
||||
log_step "Adding Task $manual_task_id (Manual)"
|
||||
task-master add-task --title="Manual E2E Task" --description="Add basic health check endpoint" --priority=low --dependencies=3 # Depends on backend setup
|
||||
log_success "Added Task $manual_task_id manually."
|
||||
|
||||
log_step "Adding Task $ai_task_id (AI)"
|
||||
task-master add-task --prompt="Implement basic UI styling using CSS variables for colors and spacing" --priority=medium --dependencies=1 # Depends on frontend setup
|
||||
# Assuming the new task gets ID 12
|
||||
log_success "Added Task 12 via AI prompt."
|
||||
log_success "Added Task $ai_task_id via AI prompt."
|
||||
|
||||
|
||||
log_step "Updating Task 3 (update-task AI)"
|
||||
task-master update-task --id=3 --prompt="Update backend server setup: Ensure CORS is configured to allow requests from the frontend origin."
|
||||
@@ -524,8 +565,8 @@ log_step() {
|
||||
log_success "Set status for Task 1 to done."
|
||||
|
||||
log_step "Getting next task (after status change)"
|
||||
task-master next > next_task_after_change.log
|
||||
log_success "Next task after change saved to next_task_after_change.log"
|
||||
task-master next > next_task_after_change_core.log
|
||||
log_success "Next task after change saved."
|
||||
|
||||
# === Start New Test Section: List Filtering ===
|
||||
log_step "Listing tasks filtered by status 'done'"
|
||||
@@ -543,10 +584,10 @@ log_step() {
|
||||
task-master clear-subtasks --id=8
|
||||
log_success "Attempted to clear subtasks from Task 8."
|
||||
|
||||
log_step "Removing Tasks 11 and 12 (multi-ID)"
|
||||
log_step "Removing Tasks $manual_task_id and $ai_task_id (multi-ID)"
|
||||
# Remove the tasks we added earlier
|
||||
task-master remove-task --id=11,12 -y
|
||||
log_success "Removed tasks 11 and 12."
|
||||
task-master remove-task --id="$manual_task_id,$ai_task_id" -y
|
||||
log_success "Removed tasks $manual_task_id and $ai_task_id."
|
||||
|
||||
# === Start New Test Section: Subtasks & Dependencies ===
|
||||
|
||||
@@ -569,6 +610,11 @@ log_step() {
|
||||
log_step "Expanding Task 1 again (to have subtasks for next test)"
|
||||
task-master expand --id=1
|
||||
log_success "Attempted to expand Task 1 again."
|
||||
# Verify 1.1 exists again
|
||||
if ! jq -e '.tasks[] | select(.id == 1) | .subtasks[] | select(.id == 1)' tasks/tasks.json > /dev/null; then
|
||||
log_error "Subtask 1.1 not found in tasks.json after re-expanding Task 1."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_step "Adding dependency: Task 3 depends on Subtask 1.1"
|
||||
task-master add-dependency --id=3 --depends-on=1.1
|
||||
@@ -593,25 +639,17 @@ log_step() {
|
||||
log_success "Generated task files."
|
||||
# === End Core Task Commands Test ===
|
||||
|
||||
# === AI Commands (Tested earlier implicitly with add/update/expand) ===
|
||||
log_step "Analyzing complexity (AI with Research)"
|
||||
task-master analyze-complexity --research --output complexity_results.json
|
||||
if [ ! -f "complexity_results.json" ]; then log_error "Complexity analysis failed."; exit 1; fi
|
||||
log_success "Complexity analysis saved to complexity_results.json"
|
||||
# === AI Commands (Re-test some after changes) ===
|
||||
log_step "Analyzing complexity (AI with Research - Final Check)"
|
||||
task-master analyze-complexity --research --output complexity_results_final.json
|
||||
if [ ! -f "complexity_results_final.json" ]; then log_error "Final Complexity analysis failed."; exit 1; fi
|
||||
log_success "Final Complexity analysis saved."
|
||||
|
||||
log_step "Generating complexity report (Non-AI)"
|
||||
task-master complexity-report --file complexity_results.json > complexity_report_formatted.log
|
||||
log_success "Formatted complexity report saved to complexity_report_formatted.log"
|
||||
log_step "Generating complexity report (Non-AI - Final Check)"
|
||||
task-master complexity-report --file complexity_results_final.json > complexity_report_formatted_final.log
|
||||
log_success "Final Formatted complexity report saved."
|
||||
|
||||
# Expand All (Commented Out)
|
||||
# log_step "Expanding All Tasks (AI - Heavy Operation, Commented Out)"
|
||||
# task-master expand --all --research
|
||||
# log_success "Attempted to expand all tasks."
|
||||
|
||||
log_step "Expanding Task 1 (AI - Note: Subtasks were removed/cleared)"
|
||||
task-master expand --id=1
|
||||
log_success "Attempted to expand Task 1 again."
|
||||
# === End AI Commands ===
|
||||
# === End AI Commands Re-test ===
|
||||
|
||||
log_step "Listing tasks again (final)"
|
||||
task-master list --with-subtasks > task_list_final.log
|
||||
@@ -623,17 +661,7 @@ log_step() {
|
||||
ABS_TEST_RUN_DIR="$(pwd)"
|
||||
echo "Test artifacts and logs are located in: $ABS_TEST_RUN_DIR"
|
||||
echo "Key artifact files (within above dir):"
|
||||
echo " - .env (Copied from source)"
|
||||
echo " - tasks/tasks.json"
|
||||
echo " - task_list_output.log"
|
||||
echo " - complexity_results.json"
|
||||
echo " - complexity_report_formatted.log"
|
||||
echo " - task_list_after_changes.log"
|
||||
echo " - models_initial_config.log, models_final_config.log"
|
||||
echo " - task_list_final.log"
|
||||
echo " - task_list_initial.log, next_task_initial.log, task_1_details.log"
|
||||
echo " - validate_dependencies_after_add.log, fix_dependencies_output.log"
|
||||
echo " - complexity_*.log"
|
||||
ls -1 # List files in the current directory
|
||||
echo ""
|
||||
echo "Full script log also available at: $LOG_FILE (relative to project root)"
|
||||
|
||||
|
||||
273
tests/e2e/run_fallback_verification.sh
Executable file
273
tests/e2e/run_fallback_verification.sh
Executable file
@@ -0,0 +1,273 @@
|
||||
#!/bin/bash
|
||||
|
||||
# --- Fallback Model Verification Script ---
|
||||
# Purpose: Tests models marked as 'fallback' in supported-models.json
|
||||
# to see if they work with generateObjectService (via update-subtask).
|
||||
# Usage: 1. Run from within a prepared E2E test run directory:
|
||||
# ./path/to/script.sh .
|
||||
# 2. Run from project root (or anywhere) to use the latest run dir:
|
||||
# ./tests/e2e/run_fallback_verification.sh
|
||||
# 3. Run from project root (or anywhere) targeting a specific run dir:
|
||||
# ./tests/e2e/run_fallback_verification.sh /path/to/tests/e2e/_runs/run_YYYYMMDD_HHMMSS
|
||||
# Output: Prints a summary report to standard output. Errors to standard error.
|
||||
|
||||
# Treat unset variables as an error when substituting.
|
||||
set -u
|
||||
# Prevent errors in pipelines from being masked.
|
||||
set -o pipefail
|
||||
|
||||
# --- Embedded Helper Functions ---
|
||||
# Copied from e2e_helpers.sh to make this script standalone
|
||||
|
||||
_format_duration() {
|
||||
local total_seconds=$1
|
||||
local minutes=$((total_seconds / 60))
|
||||
local seconds=$((total_seconds % 60))
|
||||
printf "%dm%02ds" "$minutes" "$seconds"
|
||||
}
|
||||
|
||||
_get_elapsed_time_for_log() {
|
||||
# Needs overall_start_time defined in the main script body
|
||||
local current_time=$(date +%s)
|
||||
local elapsed_seconds=$((current_time - overall_start_time))
|
||||
_format_duration "$elapsed_seconds"
|
||||
}
|
||||
|
||||
log_info() {
|
||||
echo "[INFO] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo "[SUCCESS] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo "[ERROR] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2
|
||||
}
|
||||
|
||||
log_step() {
|
||||
# Needs test_step_count defined and incremented in the main script body
|
||||
test_step_count=$((test_step_count + 1))
|
||||
echo ""
|
||||
echo "============================================="
|
||||
echo " STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
echo "============================================="
|
||||
}
|
||||
|
||||
# --- Signal Handling ---
|
||||
# Global variable to hold child PID
|
||||
child_pid=0
|
||||
# Keep track of the summary file for cleanup
|
||||
verification_summary_file="fallback_verification_summary.log" # Temp file in cwd
|
||||
|
||||
cleanup() {
|
||||
echo "" # Newline after ^C
|
||||
log_error "Interrupt received. Cleaning up..."
|
||||
if [ "$child_pid" -ne 0 ]; then
|
||||
log_info "Killing child process (PID: $child_pid) and its group..."
|
||||
# Kill the process group (timeout and task-master) - TERM first, then KILL
|
||||
kill -TERM -- "-$child_pid" 2>/dev/null || kill -KILL -- "-$child_pid" 2>/dev/null
|
||||
child_pid=0 # Reset pid after attempting kill
|
||||
fi
|
||||
# Clean up temporary file if it exists
|
||||
if [ -f "$verification_summary_file" ]; then
|
||||
log_info "Removing temporary summary file: $verification_summary_file"
|
||||
rm -f "$verification_summary_file"
|
||||
fi
|
||||
# Ensure script exits after cleanup
|
||||
exit 130 # Exit with code indicating interrupt
|
||||
}
|
||||
|
||||
# Trap SIGINT (Ctrl+C) and SIGTERM
|
||||
trap cleanup INT TERM
|
||||
|
||||
# --- Configuration ---
|
||||
# Determine the project root relative to this script's location
|
||||
# Use a robust method to find the script's own directory
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
# Assumes this script is in tests/e2e/
|
||||
PROJECT_ROOT_DIR="$( cd "$SCRIPT_DIR/../.." &> /dev/null && pwd )"
|
||||
SUPPORTED_MODELS_FILE="$PROJECT_ROOT_DIR/scripts/modules/supported-models.json"
|
||||
BASE_RUNS_DIR="$PROJECT_ROOT_DIR/tests/e2e/_runs"
|
||||
|
||||
# --- Determine Target Run Directory ---
|
||||
TARGET_RUN_DIR=""
|
||||
if [ "$#" -ge 1 ] && [ -n "$1" ]; then
|
||||
# Use provided argument if it exists
|
||||
TARGET_RUN_DIR="$1"
|
||||
# Make path absolute if it's relative
|
||||
if [[ "$TARGET_RUN_DIR" != /* ]]; then
|
||||
TARGET_RUN_DIR="$(pwd)/$TARGET_RUN_DIR"
|
||||
fi
|
||||
echo "[INFO] Using provided target run directory: $TARGET_RUN_DIR"
|
||||
else
|
||||
# Find the latest run directory
|
||||
echo "[INFO] No run directory provided, finding latest in $BASE_RUNS_DIR..."
|
||||
TARGET_RUN_DIR=$(ls -td "$BASE_RUNS_DIR"/run_* 2>/dev/null | head -n 1)
|
||||
if [ -z "$TARGET_RUN_DIR" ]; then
|
||||
echo "[ERROR] No run directories found matching 'run_*' in $BASE_RUNS_DIR. Cannot proceed." >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "[INFO] Found latest run directory: $TARGET_RUN_DIR"
|
||||
fi
|
||||
|
||||
# Validate the target directory
|
||||
if [ ! -d "$TARGET_RUN_DIR" ]; then
|
||||
echo "[ERROR] Target run directory not found or is not a directory: $TARGET_RUN_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- Change to Target Directory ---
|
||||
echo "[INFO] Changing working directory to: $TARGET_RUN_DIR"
|
||||
if ! cd "$TARGET_RUN_DIR"; then
|
||||
echo "[ERROR] Failed to cd into target directory: $TARGET_RUN_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "[INFO] Now operating inside: $(pwd)"
|
||||
|
||||
# --- Now we are inside the target run directory ---
|
||||
# Define overall_start_time and test_step_count *after* changing dir
|
||||
overall_start_time=$(date +%s)
|
||||
test_step_count=0 # Local step counter for this script
|
||||
|
||||
# Log that helpers were sourced (now that functions are available)
|
||||
# No longer sourcing, just log start
|
||||
log_info "Starting fallback verification script execution in $(pwd)"
|
||||
|
||||
# --- Dependency Checks ---
|
||||
log_step "Checking for dependencies (jq) in verification script"
|
||||
if ! command -v jq &> /dev/null; then
|
||||
log_error "Dependency 'jq' is not installed or not found in PATH."
|
||||
exit 1
|
||||
fi
|
||||
log_success "Dependency 'jq' found."
|
||||
|
||||
# --- Verification Logic ---
|
||||
log_step "Starting Fallback Model (generateObjectService) Verification"
|
||||
# Initialise summary file (path defined earlier)
|
||||
echo "--- Fallback Verification Summary ---" > "$verification_summary_file"
|
||||
|
||||
# Ensure the supported models file exists (using absolute path)
|
||||
if [ ! -f "$SUPPORTED_MODELS_FILE" ]; then
|
||||
log_error "supported-models.json not found at absolute path: $SUPPORTED_MODELS_FILE."
|
||||
exit 1
|
||||
fi
|
||||
log_info "Using supported models file: $SUPPORTED_MODELS_FILE"
|
||||
|
||||
# Ensure subtask 1.1 exists (basic check, main script should guarantee)
|
||||
# Check for tasks.json in the current directory (which is now the run dir)
|
||||
if [ ! -f "tasks/tasks.json" ]; then
|
||||
log_error "tasks/tasks.json not found in current directory ($(pwd)). Was this run directory properly initialized?"
|
||||
exit 1
|
||||
fi
|
||||
if ! jq -e '.tasks[] | select(.id == 1) | .subtasks[] | select(.id == 1)' tasks/tasks.json > /dev/null 2>&1; then
|
||||
log_error "Subtask 1.1 not found in tasks.json within $(pwd). Cannot perform update-subtask tests."
|
||||
exit 1
|
||||
fi
|
||||
log_info "Subtask 1.1 found in $(pwd)/tasks/tasks.json, proceeding with verification."
|
||||
|
||||
# Read providers and models using jq (using absolute path to models file)
|
||||
jq -c 'to_entries[] | .key as $provider | .value[] | select(.allowed_roles[]? == "fallback") | {provider: $provider, id: .id}' "$SUPPORTED_MODELS_FILE" | while IFS= read -r model_info; do
|
||||
provider=$(echo "$model_info" | jq -r '.provider')
|
||||
model_id=$(echo "$model_info" | jq -r '.id')
|
||||
flag="" # Default flag
|
||||
|
||||
# Determine provider flag
|
||||
if [ "$provider" == "openrouter" ]; then
|
||||
flag="--openrouter"
|
||||
elif [ "$provider" == "ollama" ]; then
|
||||
flag="--ollama"
|
||||
# Add elif for other providers requiring flags
|
||||
fi
|
||||
|
||||
log_info "--- Verifying: $provider / $model_id ---"
|
||||
|
||||
# 1. Set the main model
|
||||
# Ensure task-master command is available (might need linking if run totally standalone)
|
||||
if ! command -v task-master &> /dev/null; then
|
||||
log_error "task-master command not found. Ensure it's linked globally or available in PATH."
|
||||
# Attempt to link if possible? Risky. Better to instruct user.
|
||||
echo "[INSTRUCTION] Please run 'npm link task-master-ai' in the project root first."
|
||||
exit 1
|
||||
fi
|
||||
log_info "Setting main model to $model_id ${flag:+using flag $flag}..."
|
||||
set_model_cmd="task-master models --set-main \"$model_id\" $flag"
|
||||
if ! eval $set_model_cmd > /dev/null 2>&1; then # Hide verbose output of models cmd
|
||||
log_error "Failed to set main model for $provider / $model_id. Skipping."
|
||||
echo "$provider,$model_id,SET_MODEL_FAILED" >> "$verification_summary_file"
|
||||
continue
|
||||
fi
|
||||
log_info "Set main model ok."
|
||||
|
||||
# 2. Run update-subtask
|
||||
log_info "Running update-subtask --id=1.1 --prompt='Test generateObjectService' (timeout 120s)"
|
||||
update_subtask_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log"
|
||||
|
||||
# Run timeout command in the background
|
||||
timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." > "$update_subtask_output_file" 2>&1 &
|
||||
child_pid=$! # Store the PID of the background process (timeout)
|
||||
|
||||
# Wait specifically for the child process PID
|
||||
wait "$child_pid"
|
||||
update_subtask_exit_code=$?
|
||||
child_pid=0 # Reset child_pid after it finishes or is killed/interrupted
|
||||
|
||||
# 3. Check for success
|
||||
# SIGINT = 130 (128 + 2), SIGTERM = 143 (128 + 15)
|
||||
# Check exit code AND grep for the success message in the output file
|
||||
if [ $update_subtask_exit_code -eq 0 ] && grep -q "Successfully updated subtask #1.1" "$update_subtask_output_file"; then
|
||||
# Success (Exit code 0 AND success message found)
|
||||
log_success "update-subtask succeeded for $provider / $model_id (Verified Output)."
|
||||
echo "$provider,$model_id,SUCCESS" >> "$verification_summary_file"
|
||||
elif [ $update_subtask_exit_code -eq 124 ]; then
|
||||
# Timeout
|
||||
log_error "update-subtask TIMED OUT for $provider / $model_id. Check $update_subtask_output_file."
|
||||
echo "$provider,$model_id,FAILED_TIMEOUT" >> "$verification_summary_file"
|
||||
elif [ $update_subtask_exit_code -eq 130 ] || [ $update_subtask_exit_code -eq 143 ]; then
|
||||
# Interrupted by trap
|
||||
log_error "update-subtask INTERRUPTED for $provider / $model_id."
|
||||
# Trap handler already exited the script. No need to write to summary.
|
||||
# If we reach here unexpectedly, something is wrong with the trap.
|
||||
else # Covers non-zero exit code OR zero exit code but missing success message
|
||||
# Other failure
|
||||
log_error "update-subtask FAILED for $provider / $model_id (Exit Code: $update_subtask_exit_code). Check $update_subtask_output_file."
|
||||
echo "$provider,$model_id,FAILED" >> "$verification_summary_file"
|
||||
fi
|
||||
|
||||
done # End of fallback verification loop
|
||||
|
||||
# --- Generate Final Verification Report to STDOUT ---
|
||||
echo ""
|
||||
echo "--- Fallback Model Verification Report (via $0) ---"
|
||||
echo "Executed inside run directory: $(pwd)"
|
||||
echo ""
|
||||
echo "Test Command: task-master update-subtask --id=1.1 --prompt=\"...\" (tests generateObjectService)"
|
||||
echo "Models were tested by setting them as the 'main' model temporarily."
|
||||
echo "Results based on exit code of the test command:"
|
||||
echo ""
|
||||
echo "Models CONFIRMED to support generateObjectService (Keep 'fallback' role):"
|
||||
awk -F',' '$3 == "SUCCESS" { print "- " $1 " / " $2 }' "$verification_summary_file" | sort
|
||||
echo ""
|
||||
echo "Models FAILED generateObjectService test (Suggest REMOVING 'fallback' role from supported-models.json):"
|
||||
awk -F',' '$3 == "FAILED" { print "- " $1 " / " $2 }' "$verification_summary_file" | sort
|
||||
echo ""
|
||||
echo "Models TIMED OUT during generateObjectService test (Likely Failure - Suggest REMOVING 'fallback' role):"
|
||||
awk -F',' '$3 == "FAILED_TIMEOUT" { print "- " $1 " / " $2 }' "$verification_summary_file" | sort
|
||||
echo ""
|
||||
echo "Models where setting the model failed (Inconclusive - investigate separately):"
|
||||
awk -F',' '$3 == "SET_MODEL_FAILED" { print "- " $1 " / " $2 }' "$verification_summary_file" | sort
|
||||
echo ""
|
||||
echo "-------------------------------------------------------"
|
||||
echo ""
|
||||
|
||||
# Clean up temporary summary file
|
||||
if [ -f "$verification_summary_file" ]; then
|
||||
rm "$verification_summary_file"
|
||||
fi
|
||||
|
||||
log_step "Finished Fallback Model (generateObjectService) Verification Script"
|
||||
|
||||
# Remove trap before exiting normally
|
||||
trap - INT TERM
|
||||
|
||||
exit 0 # Exit successfully after printing the report
|
||||
Reference in New Issue
Block a user