Merge branch 'next' of github.com:eyaltoledano/claude-task-master into add-complexity-score-to-task

2025-05-03 16:34:47 +05:30
parent b6b0dd1e29 0a45f4329c
commit a0ac50ffd7
76 changed files with 3056 additions and 1933 deletions
--- a/tests/e2e/run_e2e.sh
+++ b/tests/e2e/run_e2e.sh
@@ -5,6 +5,47 @@ set -u
 # Prevent errors in pipelines from being masked.
 set -o pipefail

+# --- Default Settings ---
+run_verification_test=true
+
+# --- Argument Parsing ---
+# Simple loop to check for the skip flag
+# Note: This needs to happen *before* the main block piped to tee
+# if we want the decision logged early. Or handle args inside.
+# Let's handle it before for clarity.
+processed_args=()
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --skip-verification)
+      run_verification_test=false
+      echo "[INFO] Argument '--skip-verification' detected. Fallback verification will be skipped."
+      shift # Consume the flag
+      ;;
+    --analyze-log)
+      # Keep the analyze-log flag handling separate for now
+      # It exits early, so doesn't conflict with the main run flags
+      processed_args+=("$1")
+      if [[ $# -gt 1 ]]; then
+        processed_args+=("$2")
+        shift 2
+      else
+        shift 1
+      fi
+      ;;
+    *)
+      # Unknown argument, pass it along or handle error
+      # For now, just pass it along in case --analyze-log needs it later
+      processed_args+=("$1")
+      shift
+      ;;
+  esac
+done
+# Restore processed arguments ONLY if the array is not empty
+if [ ${#processed_args[@]} -gt 0 ]; then
+  set -- "${processed_args[@]}"
+fi
+
+
 # --- Configuration ---
 # Assumes script is run from the project root (claude-task-master)
 TASKMASTER_SOURCE_DIR="." # Current directory is the source
@@ -20,9 +61,11 @@ MAIN_ENV_FILE="$TASKMASTER_SOURCE_DIR/.env"

 # <<< Source the helper script >>>
 source "$TASKMASTER_SOURCE_DIR/tests/e2e/e2e_helpers.sh"
+# <<< Export helper functions for subshells >>>
+export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log

 # --- Argument Parsing for Analysis-Only Mode ---
-# Check if the first argument is --analyze-log
+# This remains the same, as it exits early if matched
 if [ "$#" -ge 1 ] && [ "$1" == "--analyze-log" ]; then
  LOG_TO_ANALYZE=""
  # Check if a log file path was provided as the second argument
@@ -50,7 +93,7 @@ if [ "$#" -ge 1 ] && [ "$1" == "--analyze-log" ]; then
  fi
  echo "[INFO] Running in analysis-only mode for log: $LOG_TO_ANALYZE"

-  # --- Derive TEST_RUN_DIR from log file path --- 
+  # --- Derive TEST_RUN_DIR from log file path ---
  # Extract timestamp like YYYYMMDD_HHMMSS from e2e_run_YYYYMMDD_HHMMSS.log
  log_basename=$(basename "$LOG_TO_ANALYZE")
  # Ensure the sed command matches the .log suffix correctly
@@ -74,7 +117,7 @@ if [ "$#" -ge 1 ] && [ "$1" == "--analyze-log" ]; then

  # Save original dir before changing
  ORIGINAL_DIR=$(pwd)
-  
+
  echo "[INFO] Changing directory to $EXPECTED_RUN_DIR_ABS for analysis context..."
  cd "$EXPECTED_RUN_DIR_ABS"

@@ -169,6 +212,21 @@ log_step() {
  # called *inside* this block depend on it. If not, it can be removed.
  start_time_for_helpers=$(date +%s) # Keep if needed by helpers called inside this block

+  # Log the verification decision
+  if [ "$run_verification_test" = true ]; then
+      log_info "Fallback verification test will be run as part of this E2E test."
+  else
+      log_info "Fallback verification test will be SKIPPED (--skip-verification flag detected)."
+  fi
+
+  # --- Dependency Checks ---
+  log_step "Checking for dependencies (jq)"
+  if ! command -v jq &> /dev/null; then
+      log_error "Dependency 'jq' is not installed or not found in PATH. Please install jq (e.g., 'brew install jq' or 'sudo apt-get install jq')."
+      exit 1
+  fi
+  log_success "Dependency 'jq' found."
+
  # --- Test Setup (Output to tee) ---
  log_step "Setting up test environment"

@@ -241,11 +299,7 @@ log_step() {
  fi
  log_success "PRD parsed successfully."

-  log_step "Listing tasks"
-  task-master list > task_list_output.log
-  log_success "Task list saved to task_list_output.log"
-
-  log_step "Analyzing complexity"
+  log_step "Expanding Task 1 (to ensure subtask 1.1 exists)"
  # Add --research flag if needed and API keys support it
  task-master analyze-complexity --research --output complexity_results.json
  if [ ! -f "complexity_results.json" ]; then
@@ -298,7 +352,39 @@ log_step() {

  # === End Model Commands Test ===

-  # === Multi-Provider Add-Task Test ===
+  # === Fallback Model generateObjectService Verification ===
+  if [ "$run_verification_test" = true ]; then
+    log_step "Starting Fallback Model (generateObjectService) Verification (Calls separate script)"
+    verification_script_path="$ORIGINAL_DIR/tests/e2e/run_fallback_verification.sh"
+
+    if [ -x "$verification_script_path" ]; then
+        log_info "--- Executing Fallback Verification Script: $verification_script_path ---"
+        # Execute the script directly, allowing output to flow to tee
+        # Pass the current directory (the test run dir) as the argument
+        "$verification_script_path" "$(pwd)"
+        verification_exit_code=$? # Capture exit code immediately
+        log_info "--- Finished Fallback Verification Script Execution (Exit Code: $verification_exit_code) ---"
+
+        # Log success/failure based on captured exit code
+        if [ $verification_exit_code -eq 0 ]; then
+            log_success "Fallback verification script reported success."
+        else
+            log_error "Fallback verification script reported FAILURE (Exit Code: $verification_exit_code)."
+            # Decide whether to exit the main script or just log the error
+            # exit 1 # Uncomment to make verification failure fatal
+        fi
+    else
+        log_error "Fallback verification script not found or not executable at $verification_script_path. Skipping verification."
+        # Decide whether to exit or continue
+        # exit 1
+    fi
+  else
+      log_info "Skipping Fallback Verification test as requested by flag."
+  fi
+  # === END Verification Section ===
+
+
+  # === Multi-Provider Add-Task Test (Keep as is) ===
  log_step "Starting Multi-Provider Add-Task Test Sequence"

  # Define providers, models, and flags
@@ -308,9 +394,9 @@ log_step() {
    "claude-3-7-sonnet-20250219"
    "gpt-4o"
    "gemini-2.5-pro-exp-03-25"
-    "sonar-pro"
+    "sonar-pro" # Note: This is research-only, add-task might fail if not using research model
    "grok-3"
-    "anthropic/claude-3.7-sonnet" # OpenRouter uses Claude 3.7 
+    "anthropic/claude-3.7-sonnet" # OpenRouter uses Claude 3.7
  )
  # Flags: Add provider-specific flags here, e.g., --openrouter. Use empty string if none.
  declare -a flags=("" "" "" "" "" "--openrouter")
@@ -318,6 +404,7 @@ log_step() {
  # Consistent prompt for all providers
  add_task_prompt="Create a task to implement user authentication using OAuth 2.0 with Google as the provider. Include steps for registering the app, handling the callback, and storing user sessions."
  log_info "Using consistent prompt for add-task tests: \"$add_task_prompt\""
+  echo "--- Multi-Provider Add Task Summary ---" > provider_add_task_summary.log # Initialize summary log

  for i in "${!providers[@]}"; do
    provider="${providers[$i]}"
@@ -341,7 +428,7 @@ log_step() {

    # 2. Run add-task
    log_info "Running add-task with prompt..."
-    add_task_output_file="add_task_raw_output_${provider}.log"
+    add_task_output_file="add_task_raw_output_${provider}_${model//\//_}.log" # Sanitize ID
    # Run add-task and capture ALL output (stdout & stderr) to a file AND a variable
    add_task_cmd_output=$(task-master add-task --prompt "$add_task_prompt" 2>&1 | tee "$add_task_output_file")
    add_task_exit_code=${PIPESTATUS[0]}
@@ -388,29 +475,30 @@ log_step() {
  echo "Provider add-task summary log available at: provider_add_task_summary.log"
  # === End Multi-Provider Add-Task Test ===

-  log_step "Listing tasks again (final)"
-  task-master list --with-subtasks > task_list_final.log
-  log_success "Final task list saved to task_list_final.log"
+  log_step "Listing tasks again (after multi-add)"
+  task-master list --with-subtasks > task_list_after_multi_add.log
+  log_success "Task list after multi-add saved to task_list_after_multi_add.log"

-  # === Test Core Task Commands ===
-  log_step "Listing tasks (initial)"
-  task-master list > task_list_initial.log
-  log_success "Initial task list saved to task_list_initial.log"
+
+  # === Resume Core Task Commands Test ===
+  log_step "Listing tasks (for core tests)"
+  task-master list > task_list_core_test_start.log
+  log_success "Core test initial task list saved."

  log_step "Getting next task"
-  task-master next > next_task_initial.log
-  log_success "Initial next task saved to next_task_initial.log"
+  task-master next > next_task_core_test.log
+  log_success "Core test next task saved."

  log_step "Showing Task 1 details"
-  task-master show 1 > task_1_details.log
-  log_success "Task 1 details saved to task_1_details.log"
+  task-master show 1 > task_1_details_core_test.log
+  log_success "Task 1 details saved."

  log_step "Adding dependency (Task 2 depends on Task 1)"
  task-master add-dependency --id=2 --depends-on=1
  log_success "Added dependency 2->1."

  log_step "Validating dependencies (after add)"
-  task-master validate-dependencies > validate_dependencies_after_add.log
+  task-master validate-dependencies > validate_dependencies_after_add_core.log
  log_success "Dependency validation after add saved."

  log_step "Removing dependency (Task 2 depends on Task 1)"
@@ -418,7 +506,7 @@ log_step() {
  log_success "Removed dependency 2->1."

  log_step "Fixing dependencies (should be no-op now)"
-  task-master fix-dependencies > fix_dependencies_output.log
+  task-master fix-dependencies > fix_dependencies_output_core.log
  log_success "Fix dependencies attempted."

  # === Start New Test Section: Validate/Fix Bad Dependencies ===
@@ -483,15 +571,20 @@ log_step() {

  # === End New Test Section ===

-  log_step "Adding Task 11 (Manual)"
-  task-master add-task --title="Manual E2E Task" --description="Add basic health check endpoint" --priority=low --dependencies=3 # Depends on backend setup
-  # Assuming the new task gets ID 11 (adjust if PRD parsing changes)
-  log_success "Added Task 11 manually."
+  # Find the next available task ID dynamically instead of hardcoding 11, 12
+  # Assuming tasks are added sequentially and we didn't remove any core tasks yet
+  last_task_id=$(jq '[.tasks[].id] | max' tasks/tasks.json)
+  manual_task_id=$((last_task_id + 1))
+  ai_task_id=$((manual_task_id + 1))

-  log_step "Adding Task 12 (AI)"
+  log_step "Adding Task $manual_task_id (Manual)"
+  task-master add-task --title="Manual E2E Task" --description="Add basic health check endpoint" --priority=low --dependencies=3 # Depends on backend setup
+  log_success "Added Task $manual_task_id manually."
+
+  log_step "Adding Task $ai_task_id (AI)"
  task-master add-task --prompt="Implement basic UI styling using CSS variables for colors and spacing" --priority=medium --dependencies=1 # Depends on frontend setup
-  # Assuming the new task gets ID 12
-  log_success "Added Task 12 via AI prompt."
+  log_success "Added Task $ai_task_id via AI prompt."
+

  log_step "Updating Task 3 (update-task AI)"
  task-master update-task --id=3 --prompt="Update backend server setup: Ensure CORS is configured to allow requests from the frontend origin."
@@ -524,8 +617,8 @@ log_step() {
  log_success "Set status for Task 1 to done."

  log_step "Getting next task (after status change)"
-  task-master next > next_task_after_change.log
-  log_success "Next task after change saved to next_task_after_change.log"
+  task-master next > next_task_after_change_core.log
+  log_success "Next task after change saved."

  # === Start New Test Section: List Filtering ===
  log_step "Listing tasks filtered by status 'done'"
@@ -543,10 +636,10 @@ log_step() {
  task-master clear-subtasks --id=8
  log_success "Attempted to clear subtasks from Task 8."

-  log_step "Removing Tasks 11 and 12 (multi-ID)"
+  log_step "Removing Tasks $manual_task_id and $ai_task_id (multi-ID)"
  # Remove the tasks we added earlier
-  task-master remove-task --id=11,12 -y
-  log_success "Removed tasks 11 and 12."
+  task-master remove-task --id="$manual_task_id,$ai_task_id" -y
+  log_success "Removed tasks $manual_task_id and $ai_task_id."

  # === Start New Test Section: Subtasks & Dependencies ===

@@ -569,6 +662,11 @@ log_step() {
  log_step "Expanding Task 1 again (to have subtasks for next test)"
  task-master expand --id=1
  log_success "Attempted to expand Task 1 again."
+  # Verify 1.1 exists again
+  if ! jq -e '.tasks[] | select(.id == 1) | .subtasks[] | select(.id == 1)' tasks/tasks.json > /dev/null; then
+      log_error "Subtask 1.1 not found in tasks.json after re-expanding Task 1."
+      exit 1
+  fi

  log_step "Adding dependency: Task 3 depends on Subtask 1.1"
  task-master add-dependency --id=3 --depends-on=1.1
@@ -593,25 +691,17 @@ log_step() {
  log_success "Generated task files."
  # === End Core Task Commands Test ===

-  # === AI Commands (Tested earlier implicitly with add/update/expand) ===
-  log_step "Analyzing complexity (AI with Research)"
-  task-master analyze-complexity --research --output complexity_results.json
-  if [ ! -f "complexity_results.json" ]; then log_error "Complexity analysis failed."; exit 1; fi
-  log_success "Complexity analysis saved to complexity_results.json"
+  # === AI Commands (Re-test some after changes) ===
+  log_step "Analyzing complexity (AI with Research - Final Check)"
+  task-master analyze-complexity --research --output complexity_results_final.json
+  if [ ! -f "complexity_results_final.json" ]; then log_error "Final Complexity analysis failed."; exit 1; fi
+  log_success "Final Complexity analysis saved."

-  log_step "Generating complexity report (Non-AI)"
-  task-master complexity-report --file complexity_results.json > complexity_report_formatted.log
-  log_success "Formatted complexity report saved to complexity_report_formatted.log"
+  log_step "Generating complexity report (Non-AI - Final Check)"
+  task-master complexity-report --file complexity_results_final.json > complexity_report_formatted_final.log
+  log_success "Final Formatted complexity report saved."

-  # Expand All (Commented Out)
-  # log_step "Expanding All Tasks (AI - Heavy Operation, Commented Out)"
-  # task-master expand --all --research
-  # log_success "Attempted to expand all tasks."
-
-  log_step "Expanding Task 1 (AI - Note: Subtasks were removed/cleared)"
-  task-master expand --id=1
-  log_success "Attempted to expand Task 1 again."
-  # === End AI Commands ===
+  # === End AI Commands Re-test ===

  log_step "Listing tasks again (final)"
  task-master list --with-subtasks > task_list_final.log
@@ -623,17 +713,7 @@ log_step() {
  ABS_TEST_RUN_DIR="$(pwd)"
  echo "Test artifacts and logs are located in: $ABS_TEST_RUN_DIR"
  echo "Key artifact files (within above dir):"
-  echo "  - .env (Copied from source)"
-  echo "  - tasks/tasks.json"
-  echo "  - task_list_output.log"
-  echo "  - complexity_results.json"
-  echo "  - complexity_report_formatted.log"
-  echo "  - task_list_after_changes.log"
-  echo "  - models_initial_config.log, models_final_config.log"
-  echo "  - task_list_final.log"
-  echo "  - task_list_initial.log, next_task_initial.log, task_1_details.log"
-  echo "  - validate_dependencies_after_add.log, fix_dependencies_output.log"
-  echo "  - complexity_*.log"
+  ls -1 # List files in the current directory
  echo ""
  echo "Full script log also available at: $LOG_FILE (relative to project root)"

--- a/tests/e2e/run_fallback_verification.sh
+++ b/tests/e2e/run_fallback_verification.sh
@@ -0,0 +1,270 @@
+#!/bin/bash
+
+# --- Fallback Model Verification Script ---
+# Purpose: Tests models marked as 'fallback' in supported-models.json
+#          to see if they work with generateObjectService (via update-subtask).
+# Usage:   1. Run from within a prepared E2E test run directory:
+#             ./path/to/script.sh .
+#          2. Run from project root (or anywhere) to use the latest run dir:
+#             ./tests/e2e/run_fallback_verification.sh
+#          3. Run from project root (or anywhere) targeting a specific run dir:
+#             ./tests/e2e/run_fallback_verification.sh /path/to/tests/e2e/_runs/run_YYYYMMDD_HHMMSS
+# Output: Prints a summary report to standard output. Errors to standard error.
+
+# Treat unset variables as an error when substituting.
+set -u
+# Prevent errors in pipelines from being masked.
+set -o pipefail
+
+# --- Embedded Helper Functions ---
+# Copied from e2e_helpers.sh to make this script standalone
+
+_format_duration() {
+  local total_seconds=$1
+  local minutes=$((total_seconds / 60))
+  local seconds=$((total_seconds % 60))
+  printf "%dm%02ds" "$minutes" "$seconds"
+}
+
+_get_elapsed_time_for_log() {
+  # Needs overall_start_time defined in the main script body
+  local current_time=$(date +%s)
+  local elapsed_seconds=$((current_time - overall_start_time))
+  _format_duration "$elapsed_seconds"
+}
+
+log_info() {
+  echo "[INFO] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
+}
+
+log_success() {
+  echo "[SUCCESS] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
+}
+
+log_error() {
+  echo "[ERROR] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2
+}
+
+log_step() {
+  # Needs test_step_count defined and incremented in the main script body
+  test_step_count=$((test_step_count + 1))
+  echo ""
+  echo "============================================="
+  echo "  STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
+  echo "============================================="
+}
+
+# --- Signal Handling ---
+# Global variable to hold child PID
+child_pid=0
+# Use a persistent log file name
+PROGRESS_LOG_FILE="fallback_verification_progress.log"
+
+cleanup() {
+    echo "" # Newline after ^C
+    log_error "Interrupt received. Cleaning up any running child process..."
+    if [ "$child_pid" -ne 0 ]; then
+        log_info "Killing child process (PID: $child_pid) and its group..."
+        kill -TERM -- "-$child_pid" 2>/dev/null || kill -KILL -- "-$child_pid" 2>/dev/null
+        child_pid=0
+    fi
+    # DO NOT delete the progress log file on interrupt
+    log_info "Progress saved in: $PROGRESS_LOG_FILE"
+    exit 130 # Exit with code indicating interrupt
+}
+
+# Trap SIGINT (Ctrl+C) and SIGTERM
+trap cleanup INT TERM
+
+# --- Configuration ---
+# Determine the project root relative to this script's location
+# Use a robust method to find the script's own directory
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+# Assumes this script is in tests/e2e/
+PROJECT_ROOT_DIR="$( cd "$SCRIPT_DIR/../.." &> /dev/null && pwd )"
+SUPPORTED_MODELS_FILE="$PROJECT_ROOT_DIR/scripts/modules/supported-models.json"
+BASE_RUNS_DIR="$PROJECT_ROOT_DIR/tests/e2e/_runs"
+
+# --- Determine Target Run Directory ---
+TARGET_RUN_DIR=""
+if [ "$#" -ge 1 ] && [ -n "$1" ]; then
+    # Use provided argument if it exists
+    TARGET_RUN_DIR="$1"
+     # Make path absolute if it's relative
+    if [[ "$TARGET_RUN_DIR" != /* ]]; then
+        TARGET_RUN_DIR="$(pwd)/$TARGET_RUN_DIR"
+    fi
+    echo "[INFO] Using provided target run directory: $TARGET_RUN_DIR"
+else
+    # Find the latest run directory
+    echo "[INFO] No run directory provided, finding latest in $BASE_RUNS_DIR..."
+    TARGET_RUN_DIR=$(ls -td "$BASE_RUNS_DIR"/run_* 2>/dev/null | head -n 1)
+    if [ -z "$TARGET_RUN_DIR" ]; then
+        echo "[ERROR] No run directories found matching 'run_*' in $BASE_RUNS_DIR. Cannot proceed." >&2
+        exit 1
+    fi
+     echo "[INFO] Found latest run directory: $TARGET_RUN_DIR"
+fi
+
+# Validate the target directory
+if [ ! -d "$TARGET_RUN_DIR" ]; then
+    echo "[ERROR] Target run directory not found or is not a directory: $TARGET_RUN_DIR" >&2
+    exit 1
+fi
+
+# --- Change to Target Directory ---
+echo "[INFO] Changing working directory to: $TARGET_RUN_DIR"
+if ! cd "$TARGET_RUN_DIR"; then
+     echo "[ERROR] Failed to cd into target directory: $TARGET_RUN_DIR" >&2
+     exit 1
+fi
+echo "[INFO] Now operating inside: $(pwd)"
+
+# --- Now we are inside the target run directory ---
+overall_start_time=$(date +%s)
+test_step_count=0
+log_info "Starting fallback verification script execution in $(pwd)"
+log_info "Progress will be logged to: $(pwd)/$PROGRESS_LOG_FILE"
+
+# --- Dependency Checks ---
+log_step "Checking for dependencies (jq) in verification script"
+if ! command -v jq &> /dev/null; then
+    log_error "Dependency 'jq' is not installed or not found in PATH."
+    exit 1
+fi
+log_success "Dependency 'jq' found."
+
+# --- Verification Logic ---
+log_step "Starting/Resuming Fallback Model (generateObjectService) Verification"
+# Ensure progress log exists, create if not
+touch "$PROGRESS_LOG_FILE"
+
+# Ensure the supported models file exists (using absolute path)
+if [ ! -f "$SUPPORTED_MODELS_FILE" ]; then
+    log_error "supported-models.json not found at absolute path: $SUPPORTED_MODELS_FILE."
+    exit 1
+fi
+log_info "Using supported models file: $SUPPORTED_MODELS_FILE"
+
+# Ensure subtask 1.1 exists (basic check, main script should guarantee)
+# Check for tasks.json in the current directory (which is now the run dir)
+if [ ! -f "tasks/tasks.json" ]; then
+    log_error "tasks/tasks.json not found in current directory ($(pwd)). Was this run directory properly initialized?"
+    exit 1
+fi
+if ! jq -e '.tasks[] | select(.id == 1) | .subtasks[] | select(.id == 1)' tasks/tasks.json > /dev/null 2>&1; then
+    log_error "Subtask 1.1 not found in tasks.json within $(pwd). Cannot perform update-subtask tests."
+    exit 1
+fi
+log_info "Subtask 1.1 found in $(pwd)/tasks/tasks.json, proceeding with verification."
+
+# Read providers and models using jq
+jq -c 'to_entries[] | .key as $provider | .value[] | select(.allowed_roles[]? == "fallback") | {provider: $provider, id: .id}' "$SUPPORTED_MODELS_FILE" | while IFS= read -r model_info; do
+    provider=$(echo "$model_info" | jq -r '.provider')
+    model_id=$(echo "$model_info" | jq -r '.id')
+    flag="" # Default flag
+
+    # Check if already tested
+    # Use grep -Fq for fixed string and quiet mode
+    if grep -Fq "${provider},${model_id}," "$PROGRESS_LOG_FILE"; then
+        log_info "--- Skipping: $provider / $model_id (already tested, result in $PROGRESS_LOG_FILE) ---"
+        continue
+    fi
+
+    log_info "--- Verifying: $provider / $model_id ---"
+
+    # Determine provider flag
+    if [ "$provider" == "openrouter" ]; then
+        flag="--openrouter"
+    elif [ "$provider" == "ollama" ]; then
+        flag="--ollama"
+    fi
+
+    # 1. Set the main model
+    if ! command -v task-master &> /dev/null; then
+        log_error "task-master command not found."
+        echo "[INSTRUCTION] Please run 'npm link task-master-ai' in the project root first."
+        exit 1
+    fi
+    log_info "Setting main model to $model_id ${flag:+using flag $flag}..."
+    set_model_cmd="task-master models --set-main \"$model_id\" $flag"
+    model_set_status="SUCCESS"
+    if ! eval $set_model_cmd > /dev/null 2>&1; then
+        log_error "Failed to set main model for $provider / $model_id. Skipping test."
+        echo "$provider,$model_id,SET_MODEL_FAILED" >> "$PROGRESS_LOG_FILE"
+        continue # Skip the actual test if setting fails
+    fi
+    log_info "Set main model ok."
+
+    # 2. Run update-subtask
+    log_info "Running update-subtask --id=1.1 --prompt='Test generateObjectService' (timeout 120s)"
+    update_subtask_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log"
+
+    timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." > "$update_subtask_output_file" 2>&1 &
+    child_pid=$!
+    wait "$child_pid"
+    update_subtask_exit_code=$?
+    child_pid=0
+
+    # 3. Check result and log persistently
+    result_status=""
+    if [ $update_subtask_exit_code -eq 0 ] && grep -q "Successfully updated subtask #1.1" "$update_subtask_output_file"; then
+        log_success "update-subtask succeeded for $provider / $model_id (Verified Output)."
+        result_status="SUCCESS"
+    elif [ $update_subtask_exit_code -eq 124 ]; then
+        log_error "update-subtask TIMED OUT for $provider / $model_id. Check $update_subtask_output_file."
+        result_status="FAILED_TIMEOUT"
+    elif [ $update_subtask_exit_code -eq 130 ] || [ $update_subtask_exit_code -eq 143 ]; then
+         log_error "update-subtask INTERRUPTED for $provider / $model_id."
+         result_status="INTERRUPTED" # Record interruption
+         # Don't exit the loop, allow script to finish or be interrupted again
+    else
+        log_error "update-subtask FAILED for $provider / $model_id (Exit Code: $update_subtask_exit_code). Check $update_subtask_output_file."
+        result_status="FAILED"
+    fi
+
+    # Append result to the persistent log file
+    echo "$provider,$model_id,$result_status" >> "$PROGRESS_LOG_FILE"
+
+done # End of fallback verification loop
+
+# --- Generate Final Verification Report to STDOUT ---
+# Report reads from the persistent PROGRESS_LOG_FILE
+echo ""
+echo "--- Fallback Model Verification Report (via $0) ---"
+echo "Executed inside run directory: $(pwd)"
+echo "Progress log: $(pwd)/$PROGRESS_LOG_FILE"
+echo ""
+echo "Test Command: task-master update-subtask --id=1.1 --prompt=\"...\" (tests generateObjectService)"
+echo "Models were tested by setting them as the 'main' model temporarily."
+echo "Results based on exit code and output verification:"
+echo ""
+echo "Models CONFIRMED to support generateObjectService (Keep 'fallback' role):"
+awk -F',' '$3 == "SUCCESS" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
+echo ""
+echo "Models FAILED generateObjectService test (Suggest REMOVING 'fallback' role):"
+awk -F',' '$3 == "FAILED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
+echo ""
+echo "Models TIMED OUT during test (Suggest REMOVING 'fallback' role):"
+awk -F',' '$3 == "FAILED_TIMEOUT" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
+echo ""
+echo "Models where setting the model failed (Inconclusive):"
+awk -F',' '$3 == "SET_MODEL_FAILED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
+echo ""
+echo "Models INTERRUPTED during test (Inconclusive - Rerun):"
+awk -F',' '$3 == "INTERRUPTED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
+echo ""
+echo "-------------------------------------------------------"
+echo ""
+
+# Don't clean up the progress log
+# if [ -f "$PROGRESS_LOG_FILE" ]; then
+#     rm "$PROGRESS_LOG_FILE"
+# fi
+
+log_info "Finished Fallback Model (generateObjectService) Verification Script"
+
+# Remove trap before exiting normally
+trap - INT TERM
+
+exit 0 # Exit successfully after printing the report
--- a/tests/unit/ai-services-unified.test.js
+++ b/tests/unit/ai-services-unified.test.js
@@ -40,12 +40,14 @@ jest.unstable_mockModule('../../src/ai-providers/perplexity.js', () => ({

 // ... Mock other providers (google, openai, etc.) similarly ...

-// Mock utils logger and API key resolver
+// Mock utils logger, API key resolver, AND findProjectRoot
 const mockLog = jest.fn();
 const mockResolveEnvVariable = jest.fn();
+const mockFindProjectRoot = jest.fn();
 jest.unstable_mockModule('../../scripts/modules/utils.js', () => ({
 	log: mockLog,
-	resolveEnvVariable: mockResolveEnvVariable
+	resolveEnvVariable: mockResolveEnvVariable,
+	findProjectRoot: mockFindProjectRoot
 }));

 // Import the module to test (AFTER mocks)
@@ -54,6 +56,8 @@ const { generateTextService } = await import(
 );

 describe('Unified AI Services', () => {
+	const fakeProjectRoot = '/fake/project/root'; // Define for reuse
+
 	beforeEach(() => {
 		// Clear mocks before each test
 		jest.clearAllMocks(); // Clears all mocks
@@ -76,6 +80,9 @@ describe('Unified AI Services', () => {
 			if (key === 'PERPLEXITY_API_KEY') return 'mock-perplexity-key';
 			return null;
 		});
+
+		// Set a default behavior for the new mock
+		mockFindProjectRoot.mockReturnValue(fakeProjectRoot);
 	});

 	describe('generateTextService', () => {
@@ -91,12 +98,16 @@ describe('Unified AI Services', () => {
 			const result = await generateTextService(params);

 			expect(result).toBe('Main provider response');
-			expect(mockGetMainProvider).toHaveBeenCalled();
-			expect(mockGetMainModelId).toHaveBeenCalled();
-			expect(mockGetParametersForRole).toHaveBeenCalledWith('main');
+			expect(mockGetMainProvider).toHaveBeenCalledWith(fakeProjectRoot);
+			expect(mockGetMainModelId).toHaveBeenCalledWith(fakeProjectRoot);
+			expect(mockGetParametersForRole).toHaveBeenCalledWith(
+				'main',
+				fakeProjectRoot
+			);
 			expect(mockResolveEnvVariable).toHaveBeenCalledWith(
 				'ANTHROPIC_API_KEY',
-				params.session
+				params.session,
+				fakeProjectRoot
 			);
 			expect(mockGenerateAnthropicText).toHaveBeenCalledTimes(1);
 			expect(mockGenerateAnthropicText).toHaveBeenCalledWith({
@@ -109,26 +120,43 @@ describe('Unified AI Services', () => {
 					{ role: 'user', content: 'Test' }
 				]
 			});
-			// Verify other providers NOT called
 			expect(mockGeneratePerplexityText).not.toHaveBeenCalled();
 		});

 		test('should fall back to fallback provider if main fails', async () => {
 			const mainError = new Error('Main provider failed');
 			mockGenerateAnthropicText
-				.mockRejectedValueOnce(mainError) // Main fails first
-				.mockResolvedValueOnce('Fallback provider response'); // Fallback succeeds
+				.mockRejectedValueOnce(mainError)
+				.mockResolvedValueOnce('Fallback provider response');

-			const params = { role: 'main', prompt: 'Fallback test' };
+			const explicitRoot = '/explicit/test/root';
+			const params = {
+				role: 'main',
+				prompt: 'Fallback test',
+				projectRoot: explicitRoot
+			};
 			const result = await generateTextService(params);

 			expect(result).toBe('Fallback provider response');
-			expect(mockGetMainProvider).toHaveBeenCalled();
-			expect(mockGetFallbackProvider).toHaveBeenCalled(); // Fallback was tried
-			expect(mockGenerateAnthropicText).toHaveBeenCalledTimes(2); // Called for main (fail) and fallback (success)
-			expect(mockGeneratePerplexityText).not.toHaveBeenCalled(); // Research not called
+			expect(mockGetMainProvider).toHaveBeenCalledWith(explicitRoot);
+			expect(mockGetFallbackProvider).toHaveBeenCalledWith(explicitRoot);
+			expect(mockGetParametersForRole).toHaveBeenCalledWith(
+				'main',
+				explicitRoot
+			);
+			expect(mockGetParametersForRole).toHaveBeenCalledWith(
+				'fallback',
+				explicitRoot
+			);

-			// Check log messages for fallback attempt
+			expect(mockResolveEnvVariable).toHaveBeenCalledWith(
+				'ANTHROPIC_API_KEY',
+				undefined,
+				explicitRoot
+			);
+
+			expect(mockGenerateAnthropicText).toHaveBeenCalledTimes(2);
+			expect(mockGeneratePerplexityText).not.toHaveBeenCalled();
 			expect(mockLog).toHaveBeenCalledWith(
 				'error',
 				expect.stringContaining('Service call failed for role main')
@@ -153,12 +181,40 @@ describe('Unified AI Services', () => {
 			const result = await generateTextService(params);

 			expect(result).toBe('Research provider response');
-			expect(mockGetMainProvider).toHaveBeenCalled();
-			expect(mockGetFallbackProvider).toHaveBeenCalled();
-			expect(mockGetResearchProvider).toHaveBeenCalled(); // Research was tried
-			expect(mockGenerateAnthropicText).toHaveBeenCalledTimes(2); // main, fallback
-			expect(mockGeneratePerplexityText).toHaveBeenCalledTimes(1); // research
+			expect(mockGetMainProvider).toHaveBeenCalledWith(fakeProjectRoot);
+			expect(mockGetFallbackProvider).toHaveBeenCalledWith(fakeProjectRoot);
+			expect(mockGetResearchProvider).toHaveBeenCalledWith(fakeProjectRoot);
+			expect(mockGetParametersForRole).toHaveBeenCalledWith(
+				'main',
+				fakeProjectRoot
+			);
+			expect(mockGetParametersForRole).toHaveBeenCalledWith(
+				'fallback',
+				fakeProjectRoot
+			);
+			expect(mockGetParametersForRole).toHaveBeenCalledWith(
+				'research',
+				fakeProjectRoot
+			);

+			expect(mockResolveEnvVariable).toHaveBeenCalledWith(
+				'ANTHROPIC_API_KEY',
+				undefined,
+				fakeProjectRoot
+			);
+			expect(mockResolveEnvVariable).toHaveBeenCalledWith(
+				'ANTHROPIC_API_KEY',
+				undefined,
+				fakeProjectRoot
+			);
+			expect(mockResolveEnvVariable).toHaveBeenCalledWith(
+				'PERPLEXITY_API_KEY',
+				undefined,
+				fakeProjectRoot
+			);
+
+			expect(mockGenerateAnthropicText).toHaveBeenCalledTimes(2);
+			expect(mockGeneratePerplexityText).toHaveBeenCalledTimes(1);
 			expect(mockLog).toHaveBeenCalledWith(
 				'error',
 				expect.stringContaining('Service call failed for role fallback')
@@ -204,6 +260,23 @@ describe('Unified AI Services', () => {
 			);
 		});

+		test('should use default project root or handle null if findProjectRoot returns null', async () => {
+			mockFindProjectRoot.mockReturnValue(null); // Simulate not finding root
+			mockGenerateAnthropicText.mockResolvedValue('Response with no root');
+
+			const params = { role: 'main', prompt: 'No root test' }; // No explicit root passed
+			await generateTextService(params);
+
+			expect(mockGetMainProvider).toHaveBeenCalledWith(null);
+			expect(mockGetParametersForRole).toHaveBeenCalledWith('main', null);
+			expect(mockResolveEnvVariable).toHaveBeenCalledWith(
+				'ANTHROPIC_API_KEY',
+				undefined,
+				null
+			);
+			expect(mockGenerateAnthropicText).toHaveBeenCalledTimes(1);
+		});
+
 		// Add more tests for edge cases:
 		// - Missing API keys (should throw from _resolveApiKey)
 		// - Unsupported provider configured (should skip and log)