Merge branch 'next' of https://github.com/eyaltoledano/claude-task-master into joedanz/flexible-brand-rules

# Conflicts: # scripts/modules/commands.js # scripts/modules/ui.js
2025-05-19 11:16:29 -04:00
parent 3313659923 3a6d6dd671
commit 665c018c90
123 changed files with 7224 additions and 1761 deletions
--- a/tests/e2e/e2e_helpers.sh
+++ b/tests/e2e/e2e_helpers.sh
@@ -5,6 +5,42 @@
 # It requires curl and jq to be installed.
 # It expects the project root path to be passed as the second argument.

+# --- New Function: extract_and_sum_cost ---
+# Takes a string containing command output.
+# Extracts costs (lines with "Est. Cost: $X.YYYYYY" or similar from telemetry output)
+# from the output, sums them, and adds them to the GLOBAL total_e2e_cost variable.
+extract_and_sum_cost() {
+  local command_output="$1"
+  # Ensure total_e2e_cost is treated as a number, default to 0.0 if not set or invalid
+  if ! [[ "$total_e2e_cost" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+    total_e2e_cost="0.0"
+  fi
+
+  local extracted_cost_sum="0.0"
+
+  # Grep for lines containing "Est. Cost: $", then extract the numeric value.
+  # Example line: │     Est. Cost: $0.093549                       │
+  # Accumulate all costs found in the command_output
+  while IFS= read -r line; do
+    # Extract the numeric part after 'Est. Cost: $' and before any trailing spaces/chars
+    cost_value=$(echo "$line" | grep -o -E 'Est\. Cost: \$([0-9]+\.[0-9]+)' | sed -E 's/Est\. Cost: \$//g')
+    if [[ -n "$cost_value" && "$cost_value" =~ ^[0-9]+\.[0-9]+$ ]]; then
+      # echo "[DEBUG] Found cost value: $cost_value in line: '$line'" # For debugging
+      extracted_cost_sum=$(echo "$extracted_cost_sum + $cost_value" | bc)
+    # else # For debugging
+      # echo "[DEBUG] No valid cost value found or extracted in line: '$line' (extracted: '$cost_value')" # For debugging
+    fi
+  done < <(echo "$command_output" | grep -E 'Est\. Cost: \$')
+
+  # echo "[DEBUG] Extracted sum from this command output: $extracted_cost_sum" # For debugging
+  if (( $(echo "$extracted_cost_sum > 0" | bc -l) )); then
+    total_e2e_cost=$(echo "$total_e2e_cost + $extracted_cost_sum" | bc)
+    # echo "[DEBUG] Updated global total_e2e_cost: $total_e2e_cost" # For debugging
+  fi
+  # No echo here, the function modifies a global variable.
+}
+export -f extract_and_sum_cost # Export for use in other scripts if sourced
+
 analyze_log_with_llm() {
  local log_file="$1"
  local project_root="$2" # Expect project root as the second argument
@@ -15,17 +51,17 @@ analyze_log_with_llm() {
  fi

  local env_file="${project_root}/.env" # Path to .env in project root
+  local supported_models_file="${project_root}/scripts/modules/supported-models.json"

  local provider_summary_log="provider_add_task_summary.log" # File summarizing provider test outcomes
  local api_key=""
-  # !!! IMPORTANT: Replace with your actual Claude API endpoint if different !!!
  local api_endpoint="https://api.anthropic.com/v1/messages"
-  # !!! IMPORTANT: Ensure this matches the variable name in your .env file !!!
  local api_key_name="ANTHROPIC_API_KEY"
+  local llm_analysis_model_id="claude-3-7-sonnet-20250219" # Model used for this analysis
+  local llm_analysis_provider="anthropic"

  echo "" # Add a newline before analysis starts

-  # Check for jq and curl
  if ! command -v jq &> /dev/null; then
    echo "[HELPER_ERROR] LLM Analysis requires 'jq'. Skipping analysis." >&2
    return 1
@@ -34,34 +70,31 @@ analyze_log_with_llm() {
    echo "[HELPER_ERROR] LLM Analysis requires 'curl'. Skipping analysis." >&2
    return 1
  fi
+  if ! command -v bc &> /dev/null; then
+    echo "[HELPER_ERROR] LLM Analysis requires 'bc' for cost calculation. Skipping analysis." >&2
+    return 1
+  fi

-  # Check for API Key in the PROJECT ROOT's .env file
  if [ -f "$env_file" ]; then
-    # Original assignment - Reading from project root .env
    api_key=$(grep "^${api_key_name}=" "$env_file" | sed -e "s/^${api_key_name}=//" -e 's/^[[:space:]"]*//' -e 's/[[:space:]"]*$//')
  fi

  if [ -z "$api_key" ]; then
-    echo "[HELPER_ERROR] ${api_key_name} not found or empty in project root .env file ($env_file). Skipping LLM analysis." >&2 # Updated error message
+    echo "[HELPER_ERROR] ${api_key_name} not found or empty in project root .env file ($env_file). Skipping LLM analysis." >&2
    return 1
  fi

-  # Log file path is passed as argument, need to ensure it exists relative to where the script *calling* this function is, OR use absolute path.
-  # Assuming absolute path or path relative to the initial PWD for simplicity here.
-  # The calling script passes the correct path relative to the original PWD.
  if [ ! -f "$log_file" ]; then
-    echo "[HELPER_ERROR] Log file not found: $log_file (PWD: $(pwd)). Check path passed to function. Skipping LLM analysis." >&2 # Updated error
+    echo "[HELPER_ERROR] Log file not found: $log_file (PWD: $(pwd)). Check path passed to function. Skipping LLM analysis." >&2
    return 1
  fi

  local log_content
-  # Read entire file, handle potential errors
  log_content=$(cat "$log_file") || {
    echo "[HELPER_ERROR] Failed to read log file: $log_file. Skipping LLM analysis." >&2
    return 1
  }

-  # Prepare the prompt using a quoted heredoc for literal interpretation
  read -r -d '' prompt_template <<'EOF'
 Analyze the following E2E test log for the task-master tool. The log contains output from various 'task-master' commands executed sequentially.

@@ -99,41 +132,34 @@ Here is the main log content:

 %s
 EOF
-# Note: The final %s is a placeholder for printf later

  local full_prompt
-  # Use printf to substitute the log content into the %s placeholder
  if ! printf -v full_prompt "$prompt_template" "$log_content"; then
    echo "[HELPER_ERROR] Failed to format prompt using printf." >&2
-    # It's unlikely printf itself fails, but good practice
    return 1
  fi

-  # Construct the JSON payload for Claude Messages API
  local payload
  payload=$(jq -n --arg prompt "$full_prompt" '{
-    "model": "claude-3-haiku-20240307",  # Using Haiku for faster/cheaper testing
-    "max_tokens": 3072, # Increased slightly
+    "model": "'"$llm_analysis_model_id"'",
+    "max_tokens": 3072,
    "messages": [
      {"role": "user", "content": $prompt}
    ]
-    # "temperature": 0.0 # Optional: Lower temperature for more deterministic JSON output
  }') || {
      echo "[HELPER_ERROR] Failed to create JSON payload using jq." >&2
      return 1
  }

  local response_raw response_http_code response_body
-  # Capture body and HTTP status code separately
  response_raw=$(curl -s -w "\nHTTP_STATUS_CODE:%{http_code}" -X POST "$api_endpoint" \
       -H "Content-Type: application/json" \
       -H "x-api-key: $api_key" \
       -H "anthropic-version: 2023-06-01" \
       --data "$payload")

-  # Extract status code and body
  response_http_code=$(echo "$response_raw" | grep '^HTTP_STATUS_CODE:' | sed 's/HTTP_STATUS_CODE://')
-  response_body=$(echo "$response_raw" | sed '$d') # Remove last line (status code)
+  response_body=$(echo "$response_raw" | sed '$d')

  if [ "$response_http_code" != "200" ]; then
      echo "[HELPER_ERROR] LLM API call failed with HTTP status $response_http_code." >&2
@@ -146,17 +172,41 @@ EOF
      return 1
  fi

-  # Pipe the raw response body directly to the Node.js parser script
+  # Calculate cost of this LLM analysis call
+  local input_tokens output_tokens input_cost_per_1m output_cost_per_1m calculated_llm_cost
+  input_tokens=$(echo "$response_body" | jq -r '.usage.input_tokens // 0')
+  output_tokens=$(echo "$response_body" | jq -r '.usage.output_tokens // 0')
+
+  if [ -f "$supported_models_file" ]; then
+      model_cost_info=$(jq -r --arg provider "$llm_analysis_provider" --arg model_id "$llm_analysis_model_id" '
+          .[$provider][] | select(.id == $model_id) | .cost_per_1m_tokens
+      ' "$supported_models_file")
+
+      if [[ -n "$model_cost_info" && "$model_cost_info" != "null" ]]; then
+          input_cost_per_1m=$(echo "$model_cost_info" | jq -r '.input // 0')
+          output_cost_per_1m=$(echo "$model_cost_info" | jq -r '.output // 0')
+
+          calculated_llm_cost=$(echo "($input_tokens / 1000000 * $input_cost_per_1m) + ($output_tokens / 1000000 * $output_cost_per_1m)" | bc -l)
+          # Format to 6 decimal places
+          formatted_llm_cost=$(printf "%.6f" "$calculated_llm_cost")
+          echo "LLM Analysis AI Cost: $formatted_llm_cost USD" # This line will be parsed by run_e2e.sh
+      else
+          echo "[HELPER_WARNING] Cost data for model $llm_analysis_model_id not found in $supported_models_file. LLM analysis cost not calculated."
+      fi
+  else
+      echo "[HELPER_WARNING] $supported_models_file not found. LLM analysis cost not calculated."
+  fi
+  # --- End cost calculation for this call ---
+
  if echo "$response_body" | node "${project_root}/tests/e2e/parse_llm_output.cjs" "$log_file"; then
      echo "[HELPER_SUCCESS] LLM analysis parsed and printed successfully by Node.js script."
-      return 0 # Success
+      return 0
  else
      local node_exit_code=$?
      echo "[HELPER_ERROR] Node.js parsing script failed with exit code ${node_exit_code}."
      echo "[HELPER_ERROR] Raw API response body (first 500 chars): $(echo "$response_body" | head -c 500)"
-      return 1 # Failure
+      return 1
  fi
 }

-# Export the function so it might be available to subshells if sourced
 export -f analyze_log_with_llm 
--- a/tests/e2e/run_e2e.sh
+++ b/tests/e2e/run_e2e.sh
@@ -60,9 +60,52 @@ MAIN_ENV_FILE="$TASKMASTER_SOURCE_DIR/.env"
 # ---

 # <<< Source the helper script >>>
+# shellcheck source=tests/e2e/e2e_helpers.sh
 source "$TASKMASTER_SOURCE_DIR/tests/e2e/e2e_helpers.sh"
+
+# ==========================================
+# >>> Global Helper Functions Defined in run_e2e.sh <<<
+# --- Helper Functions (Define globally before export) ---
+_format_duration() {
+  local total_seconds=$1
+  local minutes=$((total_seconds / 60))
+  local seconds=$((total_seconds % 60))
+  printf "%dm%02ds" "$minutes" "$seconds"
+}
+
+# Note: This relies on 'overall_start_time' being set globally before the function is called
+_get_elapsed_time_for_log() {
+  local current_time
+  current_time=$(date +%s)
+  # Use overall_start_time here, as start_time_for_helpers might not be relevant globally
+  local elapsed_seconds
+  elapsed_seconds=$((current_time - overall_start_time))
+  _format_duration "$elapsed_seconds"
+}
+
+log_info() {
+  echo "[INFO] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
+}
+
+log_success() {
+  echo "[SUCCESS] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
+}
+
+log_error() {
+  echo "[ERROR] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2
+}
+
+log_step() {
+  test_step_count=$((test_step_count + 1))
+  echo ""
+  echo "============================================="
+  echo "  STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
+  echo "============================================="
+}
+# ==========================================
+
 # <<< Export helper functions for subshells >>>
-export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log
+export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log extract_and_sum_cost

 # --- Argument Parsing for Analysis-Only Mode ---
 # This remains the same, as it exits early if matched
@@ -138,6 +181,7 @@ fi
 # Note: These are mainly for step numbering within the log now, not for final summary
 test_step_count=0
 start_time_for_helpers=0 # Separate start time for helper functions inside the pipe
+total_e2e_cost="0.0" # Initialize total E2E cost
 # ---

 # --- Log File Setup ---
@@ -220,12 +264,16 @@ log_step() {
  fi

  # --- Dependency Checks ---
-  log_step "Checking for dependencies (jq)"
+  log_step "Checking for dependencies (jq, bc)"
  if ! command -v jq &> /dev/null; then
      log_error "Dependency 'jq' is not installed or not found in PATH. Please install jq (e.g., 'brew install jq' or 'sudo apt-get install jq')."
      exit 1
  fi
-  log_success "Dependency 'jq' found."
+  if ! command -v bc &> /dev/null; then
+      log_error "Dependency 'bc' not installed (for cost calculation). Please install bc (e.g., 'brew install bc' or 'sudo apt-get install bc')."
+      exit 1
+  fi
+  log_success "Dependencies 'jq' and 'bc' found."

  # --- Test Setup (Output to tee) ---
  log_step "Setting up test environment"
@@ -292,30 +340,43 @@ log_step() {
  log_success "Project initialized."

  log_step "Parsing PRD"
-  task-master parse-prd ./prd.txt --force
-  if [ ! -s "tasks/tasks.json" ]; then
-    log_error "Parsing PRD failed: tasks/tasks.json not found or is empty."
+  cmd_output_prd=$(task-master parse-prd ./prd.txt --force 2>&1)
+  exit_status_prd=$?
+  echo "$cmd_output_prd"
+  extract_and_sum_cost "$cmd_output_prd"
+  if [ $exit_status_prd -ne 0 ] || [ ! -s "tasks/tasks.json" ]; then
+    log_error "Parsing PRD failed: tasks/tasks.json not found or is empty. Exit status: $exit_status_prd"
    exit 1
+  else
+    log_success "PRD parsed successfully."
  fi
-  log_success "PRD parsed successfully."

  log_step "Expanding Task 1 (to ensure subtask 1.1 exists)"
-  # Add --research flag if needed and API keys support it
-  task-master analyze-complexity --research --output complexity_results.json
-  if [ ! -f "complexity_results.json" ]; then
-    log_error "Complexity analysis failed: complexity_results.json not found."
+  cmd_output_analyze=$(task-master analyze-complexity --research --output complexity_results.json 2>&1)
+  exit_status_analyze=$?
+  echo "$cmd_output_analyze"
+  extract_and_sum_cost "$cmd_output_analyze"
+  if [ $exit_status_analyze -ne 0 ] || [ ! -f "complexity_results.json" ]; then
+    log_error "Complexity analysis failed: complexity_results.json not found. Exit status: $exit_status_analyze"
    exit 1
+  else
+    log_success "Complexity analysis saved to complexity_results.json"
  fi
-  log_success "Complexity analysis saved to complexity_results.json"

  log_step "Generating complexity report"
  task-master complexity-report --file complexity_results.json > complexity_report_formatted.log
  log_success "Formatted complexity report saved to complexity_report_formatted.log"

  log_step "Expanding Task 1 (assuming it exists)"
-  # Add --research flag if needed and API keys support it
-  task-master expand --id=1 # Add --research?
-  log_success "Attempted to expand Task 1."
+  cmd_output_expand1=$(task-master expand --id=1 2>&1)
+  exit_status_expand1=$?
+  echo "$cmd_output_expand1"
+  extract_and_sum_cost "$cmd_output_expand1"
+  if [ $exit_status_expand1 -ne 0 ]; then
+    log_error "Expanding Task 1 failed. Exit status: $exit_status_expand1"
+  else
+    log_success "Attempted to expand Task 1."
+  fi

  log_step "Setting status for Subtask 1.1 (assuming it exists)"
  task-master set-status --id=1.1 --status=done
@@ -359,10 +420,11 @@ log_step() {

    if [ -x "$verification_script_path" ]; then
        log_info "--- Executing Fallback Verification Script: $verification_script_path ---"
-        # Execute the script directly, allowing output to flow to tee
-        # Pass the current directory (the test run dir) as the argument
-        "$verification_script_path" "$(pwd)"
-        verification_exit_code=$? # Capture exit code immediately
+        verification_output=$("$verification_script_path" "$(pwd)" 2>&1)
+        verification_exit_code=$?
+        echo "$verification_output"
+        extract_and_sum_cost "$verification_output"
+
        log_info "--- Finished Fallback Verification Script Execution (Exit Code: $verification_exit_code) ---"

        # Log success/failure based on captured exit code
@@ -370,13 +432,9 @@ log_step() {
            log_success "Fallback verification script reported success."
        else
            log_error "Fallback verification script reported FAILURE (Exit Code: $verification_exit_code)."
-            # Decide whether to exit the main script or just log the error
-            # exit 1 # Uncomment to make verification failure fatal
        fi
    else
        log_error "Fallback verification script not found or not executable at $verification_script_path. Skipping verification."
-        # Decide whether to exit or continue
-        # exit 1
    fi
  else
      log_info "Skipping Fallback Verification test as requested by flag."
@@ -393,7 +451,7 @@ log_step() {
  declare -a models=(
    "claude-3-7-sonnet-20250219"
    "gpt-4o"
-    "gemini-2.5-pro-exp-03-25"
+    "gemini-2.5-pro-preview-05-06"
    "sonar-pro" # Note: This is research-only, add-task might fail if not using research model
    "grok-3"
    "anthropic/claude-3.7-sonnet" # OpenRouter uses Claude 3.7
@@ -435,9 +493,9 @@ log_step() {

    # 3. Check for success and extract task ID
    new_task_id=""
-    if [ $add_task_exit_code -eq 0 ] && echo "$add_task_cmd_output" | grep -q "✓ Added new task #"; then
-      # Attempt to extract the ID (adjust grep/sed/awk as needed based on actual output format)
-      new_task_id=$(echo "$add_task_cmd_output" | grep "✓ Added new task #" | sed 's/.*✓ Added new task #\([0-9.]\+\).*/\1/')
+    extract_and_sum_cost "$add_task_cmd_output"
+    if [ $add_task_exit_code -eq 0 ] && (echo "$add_task_cmd_output" | grep -q "✓ Added new task #" || echo "$add_task_cmd_output" | grep -q "✅ New task created successfully:" || echo "$add_task_cmd_output" | grep -q "Task [0-9]\+ Created Successfully"); then
+      new_task_id=$(echo "$add_task_cmd_output" | grep -o -E "(Task |#)[0-9.]+" | grep -o -E "[0-9.]+" | head -n 1)
      if [ -n "$new_task_id" ]; then
        log_success "Add-task succeeded for $provider. New task ID: $new_task_id"
        echo "Provider $provider add-task SUCCESS (ID: $new_task_id)" >> provider_add_task_summary.log
@@ -522,8 +580,6 @@ log_step() {
      log_success "Validation correctly identified non-existent dependency 999."
  else
      log_error "Validation DID NOT report non-existent dependency 999 as expected. Check validate_deps_non_existent.log"
-      # Consider exiting here if this check fails, as it indicates a validation logic problem
-      # exit 1
  fi

  log_step "Fixing dependencies (should remove 1 -> 999)"
@@ -534,7 +590,6 @@ log_step() {
  task-master validate-dependencies > validate_deps_after_fix_non_existent.log 2>&1 || true # Allow potential failure
  if grep -q "Non-existent dependency ID: 999" validate_deps_after_fix_non_existent.log; then
      log_error "Validation STILL reports non-existent dependency 999 after fix. Check logs."
-      # exit 1
  else
      log_success "Validation shows non-existent dependency 999 was removed."
  fi
@@ -553,7 +608,6 @@ log_step() {
      log_success "Validation correctly identified circular dependency between 4 and 5."
  else
      log_error "Validation DID NOT report circular dependency 4<->5 as expected. Check validate_deps_circular.log"
-      # exit 1
  fi

  log_step "Fixing dependencies (should remove one side of 4 <-> 5)"
@@ -564,7 +618,6 @@ log_step() {
  task-master validate-dependencies > validate_deps_after_fix_circular.log 2>&1 || true # Allow potential failure
  if grep -q -E "Circular dependency detected involving task IDs: (4, 5|5, 4)" validate_deps_after_fix_circular.log; then
      log_error "Validation STILL reports circular dependency 4<->5 after fix. Check logs."
-      # exit 1
  else
      log_success "Validation shows circular dependency 4<->5 was resolved."
  fi
@@ -582,25 +635,60 @@ log_step() {
  log_success "Added Task $manual_task_id manually."

  log_step "Adding Task $ai_task_id (AI)"
-  task-master add-task --prompt="Implement basic UI styling using CSS variables for colors and spacing" --priority=medium --dependencies=1 # Depends on frontend setup
-  log_success "Added Task $ai_task_id via AI prompt."
+  cmd_output_add_ai=$(task-master add-task --prompt="Implement basic UI styling using CSS variables for colors and spacing" --priority=medium --dependencies=1 2>&1)
+  exit_status_add_ai=$?
+  echo "$cmd_output_add_ai"
+  extract_and_sum_cost "$cmd_output_add_ai"
+  if [ $exit_status_add_ai -ne 0 ]; then
+    log_error "Adding AI Task $ai_task_id failed. Exit status: $exit_status_add_ai"
+  else
+    log_success "Added Task $ai_task_id via AI prompt."
+  fi


  log_step "Updating Task 3 (update-task AI)"
-  task-master update-task --id=3 --prompt="Update backend server setup: Ensure CORS is configured to allow requests from the frontend origin."
-  log_success "Attempted update for Task 3."
+  cmd_output_update_task3=$(task-master update-task --id=3 --prompt="Update backend server setup: Ensure CORS is configured to allow requests from the frontend origin." 2>&1)
+  exit_status_update_task3=$?
+  echo "$cmd_output_update_task3"
+  extract_and_sum_cost "$cmd_output_update_task3"
+  if [ $exit_status_update_task3 -ne 0 ]; then
+    log_error "Updating Task 3 failed. Exit status: $exit_status_update_task3"
+  else
+    log_success "Attempted update for Task 3."
+  fi

  log_step "Updating Tasks from Task 5 (update AI)"
-  task-master update --from=5 --prompt="Refactor the backend storage module to use a simple JSON file (storage.json) instead of an in-memory object for persistence. Update relevant tasks."
-  log_success "Attempted update from Task 5 onwards."
+  cmd_output_update_from5=$(task-master update --from=5 --prompt="Refactor the backend storage module to use a simple JSON file (storage.json) instead of an in-memory object for persistence. Update relevant tasks." 2>&1)
+  exit_status_update_from5=$?
+  echo "$cmd_output_update_from5"
+  extract_and_sum_cost "$cmd_output_update_from5"
+  if [ $exit_status_update_from5 -ne 0 ]; then
+    log_error "Updating from Task 5 failed. Exit status: $exit_status_update_from5"
+  else
+    log_success "Attempted update from Task 5 onwards."
+  fi

  log_step "Expanding Task 8 (AI)"
-  task-master expand --id=8 # Expand task 8: Frontend logic
-  log_success "Attempted to expand Task 8."
+  cmd_output_expand8=$(task-master expand --id=8 2>&1)
+  exit_status_expand8=$?
+  echo "$cmd_output_expand8"
+  extract_and_sum_cost "$cmd_output_expand8"
+  if [ $exit_status_expand8 -ne 0 ]; then
+    log_error "Expanding Task 8 failed. Exit status: $exit_status_expand8"
+  else
+    log_success "Attempted to expand Task 8."
+  fi

  log_step "Updating Subtask 8.1 (update-subtask AI)"
-  task-master update-subtask --id=8.1 --prompt="Implementation note: Remember to handle potential API errors and display a user-friendly message."
-  log_success "Attempted update for Subtask 8.1."
+  cmd_output_update_subtask81=$(task-master update-subtask --id=8.1 --prompt="Implementation note: Remember to handle potential API errors and display a user-friendly message." 2>&1)
+  exit_status_update_subtask81=$?
+  echo "$cmd_output_update_subtask81"
+  extract_and_sum_cost "$cmd_output_update_subtask81"
+  if [ $exit_status_update_subtask81 -ne 0 ]; then
+    log_error "Updating Subtask 8.1 failed. Exit status: $exit_status_update_subtask81"
+  else
+    log_success "Attempted update for Subtask 8.1."
+  fi

  # Add a couple more subtasks for multi-remove test
  log_step 'Adding subtasks to Task 2 (for multi-remove test)'
@@ -693,9 +781,16 @@ log_step() {

  # === AI Commands (Re-test some after changes) ===
  log_step "Analyzing complexity (AI with Research - Final Check)"
-  task-master analyze-complexity --research --output complexity_results_final.json
-  if [ ! -f "complexity_results_final.json" ]; then log_error "Final Complexity analysis failed."; exit 1; fi
-  log_success "Final Complexity analysis saved."
+  cmd_output_analyze_final=$(task-master analyze-complexity --research --output complexity_results_final.json 2>&1)
+  exit_status_analyze_final=$?
+  echo "$cmd_output_analyze_final"
+  extract_and_sum_cost "$cmd_output_analyze_final"
+  if [ $exit_status_analyze_final -ne 0 ] || [ ! -f "complexity_results_final.json" ]; then
+    log_error "Final Complexity analysis failed. Exit status: $exit_status_analyze_final. File found: $(test -f complexity_results_final.json && echo true || echo false)"
+    exit 1 # Critical for subsequent report step
+  else
+    log_success "Final Complexity analysis command executed and file created."
+  fi

  log_step "Generating complexity report (Non-AI - Final Check)"
  task-master complexity-report --file complexity_results_final.json > complexity_report_formatted_final.log
@@ -775,4 +870,8 @@ else
  echo "[ERROR] [$formatted_duration_for_error] $(date +"%Y-%m-%d %H:%M:%S") Test run directory $TEST_RUN_DIR not found. Cannot perform LLM analysis." >&2
 fi

+# Final cost formatting
+formatted_total_e2e_cost=$(printf "%.6f" "$total_e2e_cost")
+echo "Total E2E AI Cost: $formatted_total_e2e_cost USD"
+
 exit $EXIT_CODE
--- a/tests/e2e/run_fallback_verification.sh
+++ b/tests/e2e/run_fallback_verification.sh
@@ -18,6 +18,25 @@ set -o pipefail

 # --- Embedded Helper Functions ---
 # Copied from e2e_helpers.sh to make this script standalone
+# OR source it if preferred and path is reliable
+
+# <<< Determine SCRIPT_DIR and PROJECT_ROOT_DIR early >>>
+SCRIPT_DIR_FV="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+PROJECT_ROOT_DIR_FV="$( cd "$SCRIPT_DIR_FV/../.." &> /dev/null && pwd )" # Assumes script is in tests/e2e/
+
+# --- Try to Source e2e_helpers.sh ---
+E2E_HELPERS_PATH_FV="${PROJECT_ROOT_DIR_FV}/tests/e2e/e2e_helpers.sh"
+if [ -f "$E2E_HELPERS_PATH_FV" ]; then
+    # shellcheck source=tests/e2e/e2e_helpers.sh
+    source "$E2E_HELPERS_PATH_FV"
+    echo "[INFO FV] Sourced e2e_helpers.sh successfully."
+else
+    echo "[ERROR FV] e2e_helpers.sh not found at $E2E_HELPERS_PATH_FV. Cost extraction will fail."
+    # Define a placeholder if not found, so the script doesn't break immediately,
+    # but cost extraction will effectively be a no-op.
+    extract_and_sum_cost() { echo "$2"; } # Returns current total, effectively adding 0
+fi
+

 _format_duration() {
  local total_seconds=$1
@@ -27,127 +46,112 @@ _format_duration() {
 }

 _get_elapsed_time_for_log() {
-  # Needs overall_start_time defined in the main script body
-  local current_time=$(date +%s)
-  local elapsed_seconds=$((current_time - overall_start_time))
+  local current_time
+  current_time=$(date +%s)
+  local elapsed_seconds
+  elapsed_seconds=$((current_time - overall_start_time)) # Needs overall_start_time
  _format_duration "$elapsed_seconds"
 }

 log_info() {
-  echo "[INFO] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
+  echo "[INFO FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
 }

 log_success() {
-  echo "[SUCCESS] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
+  echo "[SUCCESS FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
 }

 log_error() {
-  echo "[ERROR] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2
+  echo "[ERROR FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2
 }

 log_step() {
-  # Needs test_step_count defined and incremented in the main script body
-  test_step_count=$((test_step_count + 1))
+  test_step_count=$((test_step_count + 1)) # Needs test_step_count
  echo ""
  echo "============================================="
-  echo "  STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
+  echo "  FV STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
  echo "============================================="
 }

 # --- Signal Handling ---
-# Global variable to hold child PID
 child_pid=0
-# Use a persistent log file name
-PROGRESS_LOG_FILE="fallback_verification_progress.log"
+PROGRESS_LOG_FILE="fallback_verification_progress.log" # Stays in run dir

 cleanup() {
-    echo "" # Newline after ^C
+    echo ""
    log_error "Interrupt received. Cleaning up any running child process..."
    if [ "$child_pid" -ne 0 ]; then
        log_info "Killing child process (PID: $child_pid) and its group..."
        kill -TERM -- "-$child_pid" 2>/dev/null || kill -KILL -- "-$child_pid" 2>/dev/null
        child_pid=0
    fi
-    # DO NOT delete the progress log file on interrupt
    log_info "Progress saved in: $PROGRESS_LOG_FILE"
-    exit 130 # Exit with code indicating interrupt
+    # Print current total cost on interrupt
+    if [[ -n "${total_fallback_cost+x}" && "$total_fallback_cost" != "0.0" ]]; then # Check if var is set and not initial
+        log_info "Current Total Fallback AI Cost at interruption: $total_fallback_cost USD"
+    fi
+    exit 130
 }

-# Trap SIGINT (Ctrl+C) and SIGTERM
 trap cleanup INT TERM

 # --- Configuration ---
-# Determine the project root relative to this script's location
-# Use a robust method to find the script's own directory
-SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
-# Assumes this script is in tests/e2e/
-PROJECT_ROOT_DIR="$( cd "$SCRIPT_DIR/../.." &> /dev/null && pwd )"
-SUPPORTED_MODELS_FILE="$PROJECT_ROOT_DIR/scripts/modules/supported-models.json"
-BASE_RUNS_DIR="$PROJECT_ROOT_DIR/tests/e2e/_runs"
+# SCRIPT_DIR and PROJECT_ROOT_DIR already defined above
+SUPPORTED_MODELS_FILE="$PROJECT_ROOT_DIR_FV/scripts/modules/supported-models.json"
+BASE_RUNS_DIR="$PROJECT_ROOT_DIR_FV/tests/e2e/_runs"

 # --- Determine Target Run Directory ---
 TARGET_RUN_DIR=""
 if [ "$#" -ge 1 ] && [ -n "$1" ]; then
-    # Use provided argument if it exists
    TARGET_RUN_DIR="$1"
-     # Make path absolute if it's relative
    if [[ "$TARGET_RUN_DIR" != /* ]]; then
        TARGET_RUN_DIR="$(pwd)/$TARGET_RUN_DIR"
    fi
-    echo "[INFO] Using provided target run directory: $TARGET_RUN_DIR"
+    echo "[INFO FV] Using provided target run directory: $TARGET_RUN_DIR"
 else
-    # Find the latest run directory
-    echo "[INFO] No run directory provided, finding latest in $BASE_RUNS_DIR..."
+    echo "[INFO FV] No run directory provided, finding latest in $BASE_RUNS_DIR..."
    TARGET_RUN_DIR=$(ls -td "$BASE_RUNS_DIR"/run_* 2>/dev/null | head -n 1)
    if [ -z "$TARGET_RUN_DIR" ]; then
-        echo "[ERROR] No run directories found matching 'run_*' in $BASE_RUNS_DIR. Cannot proceed." >&2
+        echo "[ERROR FV] No run directories found matching 'run_*' in $BASE_RUNS_DIR. Cannot proceed." >&2
        exit 1
    fi
-     echo "[INFO] Found latest run directory: $TARGET_RUN_DIR"
+     echo "[INFO FV] Found latest run directory: $TARGET_RUN_DIR"
 fi

-# Validate the target directory
 if [ ! -d "$TARGET_RUN_DIR" ]; then
-    echo "[ERROR] Target run directory not found or is not a directory: $TARGET_RUN_DIR" >&2
+    echo "[ERROR FV] Target run directory not found or is not a directory: $TARGET_RUN_DIR" >&2
    exit 1
 fi

-# --- Change to Target Directory ---
-echo "[INFO] Changing working directory to: $TARGET_RUN_DIR"
+echo "[INFO FV] Changing working directory to: $TARGET_RUN_DIR"
 if ! cd "$TARGET_RUN_DIR"; then
-     echo "[ERROR] Failed to cd into target directory: $TARGET_RUN_DIR" >&2
+     echo "[ERROR FV] Failed to cd into target directory: $TARGET_RUN_DIR" >&2
     exit 1
 fi
-echo "[INFO] Now operating inside: $(pwd)"
+echo "[INFO FV] Now operating inside: $(pwd)"
+
+overall_start_time=$(date +%s) # Initialize for logging helpers
+test_step_count=0               # Initialize for logging helpers
+total_fallback_cost="0.0"       # Initialize total cost for this script

-# --- Now we are inside the target run directory ---
-overall_start_time=$(date +%s)
-test_step_count=0
 log_info "Starting fallback verification script execution in $(pwd)"
 log_info "Progress will be logged to: $(pwd)/$PROGRESS_LOG_FILE"

-# --- Dependency Checks ---
-log_step "Checking for dependencies (jq) in verification script"
-if ! command -v jq &> /dev/null; then
-    log_error "Dependency 'jq' is not installed or not found in PATH."
-    exit 1
-fi
-log_success "Dependency 'jq' found."
+log_step "Checking for dependencies (jq, bc) in verification script"
+if ! command -v jq &> /dev/null; then log_error "Dependency 'jq' not installed."; exit 1; fi
+if ! command -v bc &> /dev/null; then log_error "Dependency 'bc' not installed (for cost calculation)."; exit 1; fi
+log_success "Dependencies 'jq' and 'bc' found."
+

-# --- Verification Logic ---
 log_step "Starting/Resuming Fallback Model (generateObjectService) Verification"
-# Ensure progress log exists, create if not
 touch "$PROGRESS_LOG_FILE"

-# Ensure the supported models file exists (using absolute path)
 if [ ! -f "$SUPPORTED_MODELS_FILE" ]; then
-    log_error "supported-models.json not found at absolute path: $SUPPORTED_MODELS_FILE."
+    log_error "supported-models.json not found at: $SUPPORTED_MODELS_FILE."
    exit 1
 fi
 log_info "Using supported models file: $SUPPORTED_MODELS_FILE"

-# Ensure subtask 1.1 exists (basic check, main script should guarantee)
-# Check for tasks.json in the current directory (which is now the run dir)
 if [ ! -f "tasks/tasks.json" ]; then
    log_error "tasks/tasks.json not found in current directory ($(pwd)). Was this run directory properly initialized?"
    exit 1
@@ -158,78 +162,90 @@ if ! jq -e '.tasks[] | select(.id == 1) | .subtasks[] | select(.id == 1)' tasks/
 fi
 log_info "Subtask 1.1 found in $(pwd)/tasks/tasks.json, proceeding with verification."

-# Read providers and models using jq
 jq -c 'to_entries[] | .key as $provider | .value[] | select(.allowed_roles[]? == "fallback") | {provider: $provider, id: .id}' "$SUPPORTED_MODELS_FILE" | while IFS= read -r model_info; do
    provider=$(echo "$model_info" | jq -r '.provider')
    model_id=$(echo "$model_info" | jq -r '.id')
-    flag="" # Default flag
+    flag=""

-    # Check if already tested
-    # Use grep -Fq for fixed string and quiet mode
    if grep -Fq "${provider},${model_id}," "$PROGRESS_LOG_FILE"; then
        log_info "--- Skipping: $provider / $model_id (already tested, result in $PROGRESS_LOG_FILE) ---"
+        # Still need to sum up its cost if it was successful before
+        previous_test_output=$(grep -F "${provider},${model_id}," "$PROGRESS_LOG_FILE" | head -n 1)
+        # Assuming the output file for successful test exists and contains cost
+        prev_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log"
+        if [[ "$previous_test_output" == *",SUCCESS"* && -f "$prev_output_file" ]]; then
+            # shellcheck disable=SC2154 # overall_start_time is set
+            log_info "Summing cost from previous successful test of $provider / $model_id from $prev_output_file"
+            # shellcheck disable=SC2154 # total_fallback_cost is set
+            total_fallback_cost=$(extract_and_sum_cost "$(cat "$prev_output_file")" "$total_fallback_cost")
+            log_info "Cumulative fallback AI cost after previous $provider / $model_id: $total_fallback_cost USD"
+        fi
        continue
    fi

    log_info "--- Verifying: $provider / $model_id ---"

-    # Determine provider flag
-    if [ "$provider" == "openrouter" ]; then
-        flag="--openrouter"
-    elif [ "$provider" == "ollama" ]; then
-        flag="--ollama"
-    fi
+    if [ "$provider" == "openrouter" ]; then flag="--openrouter"; fi
+    if [ "$provider" == "ollama" ]; then flag="--ollama"; fi

-    # 1. Set the main model
    if ! command -v task-master &> /dev/null; then
        log_error "task-master command not found."
-        echo "[INSTRUCTION] Please run 'npm link task-master-ai' in the project root first."
+        echo "[INSTRUCTION FV] Please run 'npm link task-master-ai' in the project root first."
        exit 1
    fi
    log_info "Setting main model to $model_id ${flag:+using flag $flag}..."
    set_model_cmd="task-master models --set-main \"$model_id\" $flag"
-    model_set_status="SUCCESS"
-    if ! eval $set_model_cmd > /dev/null 2>&1; then
+    if ! eval "$set_model_cmd" > /dev/null 2>&1; then
        log_error "Failed to set main model for $provider / $model_id. Skipping test."
        echo "$provider,$model_id,SET_MODEL_FAILED" >> "$PROGRESS_LOG_FILE"
-        continue # Skip the actual test if setting fails
+        continue
    fi
    log_info "Set main model ok."

-    # 2. Run update-subtask
    log_info "Running update-subtask --id=1.1 --prompt='Test generateObjectService' (timeout 120s)"
    update_subtask_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log"
-
-    timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." > "$update_subtask_output_file" 2>&1 &
+    
+    # Capture output to a variable AND a file
+    update_subtask_command_output=""
+    timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." 2>&1 | tee "$update_subtask_output_file" &
+    # Store the command output in a variable simultaneously
+    # update_subtask_command_output=$(timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." 2>&1)
+    # The above direct capture won't work well with tee and backgrounding. Instead, read the file after command completion.
    child_pid=$!
    wait "$child_pid"
    update_subtask_exit_code=$?
    child_pid=0

-    # 3. Check result and log persistently
+    # Read output from file for cost extraction
+    if [ -f "$update_subtask_output_file" ]; then
+        update_subtask_command_output=$(cat "$update_subtask_output_file")
+    else
+        update_subtask_command_output="" # Ensure it's defined
+    fi
+
    result_status=""
-    if [ $update_subtask_exit_code -eq 0 ] && grep -q "Successfully updated subtask #1.1" "$update_subtask_output_file"; then
+    if [ $update_subtask_exit_code -eq 0 ] && echo "$update_subtask_command_output" | grep -q "Successfully updated subtask #1.1"; then
        log_success "update-subtask succeeded for $provider / $model_id (Verified Output)."
        result_status="SUCCESS"
+        # Extract and sum cost if successful
+        # shellcheck disable=SC2154 # total_fallback_cost is set
+        total_fallback_cost=$(extract_and_sum_cost "$update_subtask_command_output" "$total_fallback_cost")
+        log_info "Cumulative fallback AI cost after $provider / $model_id: $total_fallback_cost USD"
    elif [ $update_subtask_exit_code -eq 124 ]; then
        log_error "update-subtask TIMED OUT for $provider / $model_id. Check $update_subtask_output_file."
        result_status="FAILED_TIMEOUT"
    elif [ $update_subtask_exit_code -eq 130 ] || [ $update_subtask_exit_code -eq 143 ]; then
         log_error "update-subtask INTERRUPTED for $provider / $model_id."
-         result_status="INTERRUPTED" # Record interruption
-         # Don't exit the loop, allow script to finish or be interrupted again
+         result_status="INTERRUPTED"
    else
        log_error "update-subtask FAILED for $provider / $model_id (Exit Code: $update_subtask_exit_code). Check $update_subtask_output_file."
        result_status="FAILED"
    fi

-    # Append result to the persistent log file
    echo "$provider,$model_id,$result_status" >> "$PROGRESS_LOG_FILE"

-done # End of fallback verification loop
+done

-# --- Generate Final Verification Report to STDOUT ---
-# Report reads from the persistent PROGRESS_LOG_FILE
 echo ""
 echo "--- Fallback Model Verification Report (via $0) ---"
 echo "Executed inside run directory: $(pwd)"
@@ -254,17 +270,13 @@ echo ""
 echo "Models INTERRUPTED during test (Inconclusive - Rerun):"
 awk -F',' '$3 == "INTERRUPTED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
 echo ""
+# Print the total cost for this script's operations
+formatted_total_fallback_cost=$(printf "%.6f" "$total_fallback_cost")
+echo "Total Fallback AI Cost (this script run): $formatted_total_fallback_cost USD" # This line will be parsed
 echo "-------------------------------------------------------"
 echo ""

-# Don't clean up the progress log
-# if [ -f "$PROGRESS_LOG_FILE" ]; then
-#     rm "$PROGRESS_LOG_FILE"
-# fi
-
 log_info "Finished Fallback Model (generateObjectService) Verification Script"

-# Remove trap before exiting normally
 trap - INT TERM
-
-exit 0 # Exit successfully after printing the report
+exit 0
--- a/tests/integration/mcp-server/direct-functions.test.js
+++ b/tests/integration/mcp-server/direct-functions.test.js
@@ -3,9 +3,8 @@
 */

 import { jest } from '@jest/globals';
-import path from 'path';
+import path, { dirname } from 'path';
 import { fileURLToPath } from 'url';
-import { dirname } from 'path';

 // Get the current module's directory
 const __filename = fileURLToPath(import.meta.url);
@@ -27,6 +26,7 @@ const mockReadJSON = jest.fn();
 const mockWriteJSON = jest.fn();
 const mockEnableSilentMode = jest.fn();
 const mockDisableSilentMode = jest.fn();
+const mockReadComplexityReport = jest.fn().mockReturnValue(null);

 const mockGetAnthropicClient = jest.fn().mockReturnValue({});
 const mockGetConfiguredAnthropicClient = jest.fn().mockReturnValue({});
@@ -130,6 +130,7 @@ jest.mock('../../../scripts/modules/utils.js', () => ({
 	writeJSON: mockWriteJSON,
 	enableSilentMode: mockEnableSilentMode,
 	disableSilentMode: mockDisableSilentMode,
+	readComplexityReport: mockReadComplexityReport,
 	CONFIG: {
 		model: 'claude-3-7-sonnet-20250219',
 		maxTokens: 64000,
@@ -160,15 +161,6 @@ jest.mock('../../../scripts/modules/task-manager.js', () => ({
 }));

 // Import dependencies after mocks are set up
-import fs from 'fs';
-import {
-	readJSON,
-	writeJSON,
-	enableSilentMode,
-	disableSilentMode
-} from '../../../scripts/modules/utils.js';
-import { expandTask } from '../../../scripts/modules/task-manager.js';
-import { findTasksJsonPath } from '../../../mcp-server/src/core/utils/path-utils.js';
 import { sampleTasks } from '../../fixtures/sample-tasks.js';

 // Mock logger
@@ -220,6 +212,37 @@ describe('MCP Server Direct Functions', () => {
 	});

 	describe('listTasksDirect', () => {
+		// Sample complexity report for testing
+		const mockComplexityReport = {
+			meta: {
+				generatedAt: '2025-03-24T20:01:35.986Z',
+				tasksAnalyzed: 3,
+				thresholdScore: 5,
+				projectName: 'Test Project',
+				usedResearch: false
+			},
+			complexityAnalysis: [
+				{
+					taskId: 1,
+					taskTitle: 'Initialize Project',
+					complexityScore: 3,
+					recommendedSubtasks: 2
+				},
+				{
+					taskId: 2,
+					taskTitle: 'Create Core Functionality',
+					complexityScore: 8,
+					recommendedSubtasks: 5
+				},
+				{
+					taskId: 3,
+					taskTitle: 'Implement UI Components',
+					complexityScore: 6,
+					recommendedSubtasks: 4
+				}
+			]
+		};
+
 		// Test wrapper function that doesn't rely on the actual implementation
 		async function testListTasks(args, mockLogger) {
 			// File not found case
@@ -235,21 +258,35 @@ describe('MCP Server Direct Functions', () => {
 				};
 			}

+			// Check for complexity report
+			const complexityReport = mockReadComplexityReport();
+			let tasksData = [...sampleTasks.tasks];
+
+			// Add complexity scores if report exists
+			if (complexityReport && complexityReport.complexityAnalysis) {
+				tasksData = tasksData.map((task) => {
+					const analysis = complexityReport.complexityAnalysis.find(
+						(a) => a.taskId === task.id
+					);
+					if (analysis) {
+						return { ...task, complexityScore: analysis.complexityScore };
+					}
+					return task;
+				});
+			}
+
 			// Success case
 			if (!args.status && !args.withSubtasks) {
 				return {
 					success: true,
 					data: {
-						tasks: sampleTasks.tasks,
+						tasks: tasksData,
 						stats: {
-							total: sampleTasks.tasks.length,
-							completed: sampleTasks.tasks.filter((t) => t.status === 'done')
+							total: tasksData.length,
+							completed: tasksData.filter((t) => t.status === 'done').length,
+							inProgress: tasksData.filter((t) => t.status === 'in-progress')
 								.length,
-							inProgress: sampleTasks.tasks.filter(
-								(t) => t.status === 'in-progress'
-							).length,
-							pending: sampleTasks.tasks.filter((t) => t.status === 'pending')
-								.length
+							pending: tasksData.filter((t) => t.status === 'pending').length
 						}
 					},
 					fromCache: false
@@ -258,16 +295,14 @@ describe('MCP Server Direct Functions', () => {

 			// Status filter case
 			if (args.status) {
-				const filteredTasks = sampleTasks.tasks.filter(
-					(t) => t.status === args.status
-				);
+				const filteredTasks = tasksData.filter((t) => t.status === args.status);
 				return {
 					success: true,
 					data: {
 						tasks: filteredTasks,
 						filter: args.status,
 						stats: {
-							total: sampleTasks.tasks.length,
+							total: tasksData.length,
 							filtered: filteredTasks.length
 						}
 					},
@@ -280,10 +315,10 @@ describe('MCP Server Direct Functions', () => {
 				return {
 					success: true,
 					data: {
-						tasks: sampleTasks.tasks,
+						tasks: tasksData,
 						includeSubtasks: true,
 						stats: {
-							total: sampleTasks.tasks.length
+							total: tasksData.length
 						}
 					},
 					fromCache: false
@@ -370,6 +405,29 @@ describe('MCP Server Direct Functions', () => {
 			expect(result.error.code).toBe('FILE_NOT_FOUND_ERROR');
 			expect(mockLogger.error).toHaveBeenCalled();
 		});
+
+		test('should include complexity scores when complexity report exists', async () => {
+			// Arrange
+			mockReadComplexityReport.mockReturnValueOnce(mockComplexityReport);
+			const args = {
+				projectRoot: testProjectRoot,
+				file: testTasksPath,
+				withSubtasks: true
+			};
+
+			// Act
+			const result = await testListTasks(args, mockLogger);
+			// Assert
+			expect(result.success).toBe(true);
+
+			// Check that tasks have complexity scores from the report
+			mockComplexityReport.complexityAnalysis.forEach((analysis) => {
+				const task = result.data.tasks.find((t) => t.id === analysis.taskId);
+				if (task) {
+					expect(task.complexityScore).toBe(analysis.complexityScore);
+				}
+			});
+		});
 	});

 	describe('expandTaskDirect', () => {
--- a/tests/setup.js
+++ b/tests/setup.js
@@ -9,7 +9,7 @@ process.env.MODEL = 'sonar-pro';
 process.env.MAX_TOKENS = '64000';
 process.env.TEMPERATURE = '0.2';
 process.env.DEBUG = 'false';
-process.env.LOG_LEVEL = 'error'; // Set to error to reduce noise in tests
+process.env.TASKMASTER_LOG_LEVEL = 'error'; // Set to error to reduce noise in tests
 process.env.DEFAULT_SUBTASKS = '5';
 process.env.DEFAULT_PRIORITY = 'medium';
 process.env.PROJECT_NAME = 'Test Project';
--- a/tests/unit/ai-services-unified.test.js
+++ b/tests/unit/ai-services-unified.test.js
@@ -8,6 +8,31 @@ const mockGetResearchModelId = jest.fn();
 const mockGetFallbackProvider = jest.fn();
 const mockGetFallbackModelId = jest.fn();
 const mockGetParametersForRole = jest.fn();
+const mockGetUserId = jest.fn();
+const mockGetDebugFlag = jest.fn();
+
+// --- Mock MODEL_MAP Data ---
+// Provide a simplified structure sufficient for cost calculation tests
+const mockModelMap = {
+	anthropic: [
+		{
+			id: 'test-main-model',
+			cost_per_1m_tokens: { input: 3, output: 15, currency: 'USD' }
+		},
+		{
+			id: 'test-fallback-model',
+			cost_per_1m_tokens: { input: 3, output: 15, currency: 'USD' }
+		}
+	],
+	perplexity: [
+		{
+			id: 'test-research-model',
+			cost_per_1m_tokens: { input: 1, output: 1, currency: 'USD' }
+		}
+	]
+	// Add other providers/models if needed for specific tests
+};
+const mockGetBaseUrlForRole = jest.fn();

 jest.unstable_mockModule('../../scripts/modules/config-manager.js', () => ({
 	getMainProvider: mockGetMainProvider,
@@ -16,7 +41,11 @@ jest.unstable_mockModule('../../scripts/modules/config-manager.js', () => ({
 	getResearchModelId: mockGetResearchModelId,
 	getFallbackProvider: mockGetFallbackProvider,
 	getFallbackModelId: mockGetFallbackModelId,
-	getParametersForRole: mockGetParametersForRole
+	getParametersForRole: mockGetParametersForRole,
+	getUserId: mockGetUserId,
+	getDebugFlag: mockGetDebugFlag,
+	MODEL_MAP: mockModelMap,
+	getBaseUrlForRole: mockGetBaseUrlForRole
 }));

 // Mock AI Provider Modules
@@ -44,10 +73,15 @@ jest.unstable_mockModule('../../src/ai-providers/perplexity.js', () => ({
 const mockLog = jest.fn();
 const mockResolveEnvVariable = jest.fn();
 const mockFindProjectRoot = jest.fn();
+const mockIsSilentMode = jest.fn();
+const mockLogAiUsage = jest.fn();
+
 jest.unstable_mockModule('../../scripts/modules/utils.js', () => ({
 	log: mockLog,
 	resolveEnvVariable: mockResolveEnvVariable,
-	findProjectRoot: mockFindProjectRoot
+	findProjectRoot: mockFindProjectRoot,
+	isSilentMode: mockIsSilentMode,
+	logAiUsage: mockLogAiUsage
 }));

 // Import the module to test (AFTER mocks)
@@ -83,11 +117,16 @@ describe('Unified AI Services', () => {

 		// Set a default behavior for the new mock
 		mockFindProjectRoot.mockReturnValue(fakeProjectRoot);
+		mockGetDebugFlag.mockReturnValue(false);
+		mockGetUserId.mockReturnValue('test-user-id'); // Add default mock for getUserId
 	});

 	describe('generateTextService', () => {
 		test('should use main provider/model and succeed', async () => {
-			mockGenerateAnthropicText.mockResolvedValue('Main provider response');
+			mockGenerateAnthropicText.mockResolvedValue({
+				text: 'Main provider response',
+				usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 }
+			});

 			const params = {
 				role: 'main',
@@ -97,7 +136,8 @@ describe('Unified AI Services', () => {
 			};
 			const result = await generateTextService(params);

-			expect(result).toBe('Main provider response');
+			expect(result.mainResult).toBe('Main provider response');
+			expect(result).toHaveProperty('telemetryData');
 			expect(mockGetMainProvider).toHaveBeenCalledWith(fakeProjectRoot);
 			expect(mockGetMainModelId).toHaveBeenCalledWith(fakeProjectRoot);
 			expect(mockGetParametersForRole).toHaveBeenCalledWith(
@@ -127,7 +167,10 @@ describe('Unified AI Services', () => {
 			const mainError = new Error('Main provider failed');
 			mockGenerateAnthropicText
 				.mockRejectedValueOnce(mainError)
-				.mockResolvedValueOnce('Fallback provider response');
+				.mockResolvedValueOnce({
+					text: 'Fallback provider response',
+					usage: { inputTokens: 15, outputTokens: 25, totalTokens: 40 }
+				});

 			const explicitRoot = '/explicit/test/root';
 			const params = {
@@ -137,7 +180,8 @@ describe('Unified AI Services', () => {
 			};
 			const result = await generateTextService(params);

-			expect(result).toBe('Fallback provider response');
+			expect(result.mainResult).toBe('Fallback provider response');
+			expect(result).toHaveProperty('telemetryData');
 			expect(mockGetMainProvider).toHaveBeenCalledWith(explicitRoot);
 			expect(mockGetFallbackProvider).toHaveBeenCalledWith(explicitRoot);
 			expect(mockGetParametersForRole).toHaveBeenCalledWith(
@@ -173,14 +217,16 @@ describe('Unified AI Services', () => {
 			mockGenerateAnthropicText
 				.mockRejectedValueOnce(mainError)
 				.mockRejectedValueOnce(fallbackError);
-			mockGeneratePerplexityText.mockResolvedValue(
-				'Research provider response'
-			);
+			mockGeneratePerplexityText.mockResolvedValue({
+				text: 'Research provider response',
+				usage: { inputTokens: 20, outputTokens: 30, totalTokens: 50 }
+			});

 			const params = { role: 'main', prompt: 'Research fallback test' };
 			const result = await generateTextService(params);

-			expect(result).toBe('Research provider response');
+			expect(result.mainResult).toBe('Research provider response');
+			expect(result).toHaveProperty('telemetryData');
 			expect(mockGetMainProvider).toHaveBeenCalledWith(fakeProjectRoot);
 			expect(mockGetFallbackProvider).toHaveBeenCalledWith(fakeProjectRoot);
 			expect(mockGetResearchProvider).toHaveBeenCalledWith(fakeProjectRoot);
@@ -247,22 +293,32 @@ describe('Unified AI Services', () => {
 			const retryableError = new Error('Rate limit');
 			mockGenerateAnthropicText
 				.mockRejectedValueOnce(retryableError) // Fails once
-				.mockResolvedValue('Success after retry'); // Succeeds on retry
+				.mockResolvedValueOnce({
+					// Succeeds on retry
+					text: 'Success after retry',
+					usage: { inputTokens: 5, outputTokens: 10, totalTokens: 15 }
+				});

 			const params = { role: 'main', prompt: 'Retry success test' };
 			const result = await generateTextService(params);

-			expect(result).toBe('Success after retry');
+			expect(result.mainResult).toBe('Success after retry');
+			expect(result).toHaveProperty('telemetryData');
 			expect(mockGenerateAnthropicText).toHaveBeenCalledTimes(2); // Initial + 1 retry
 			expect(mockLog).toHaveBeenCalledWith(
 				'info',
-				expect.stringContaining('Retryable error detected. Retrying')
+				expect.stringContaining(
+					'Something went wrong on the provider side. Retrying'
+				)
 			);
 		});

 		test('should use default project root or handle null if findProjectRoot returns null', async () => {
 			mockFindProjectRoot.mockReturnValue(null); // Simulate not finding root
-			mockGenerateAnthropicText.mockResolvedValue('Response with no root');
+			mockGenerateAnthropicText.mockResolvedValue({
+				text: 'Response with no root',
+				usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 }
+			});

 			const params = { role: 'main', prompt: 'No root test' }; // No explicit root passed
 			await generateTextService(params);
--- a/tests/unit/task-finder.test.js
+++ b/tests/unit/task-finder.test.js
@@ -2,8 +2,9 @@
 * Task finder tests
 */

+// Import after mocks are set up - No mocks needed for readComplexityReport anymore
 import { findTaskById } from '../../scripts/modules/utils.js';
-import { sampleTasks, emptySampleTasks } from '../fixtures/sample-tasks.js';
+import { emptySampleTasks, sampleTasks } from '../fixtures/sample-tasks.js';

 describe('Task Finder', () => {
 	describe('findTaskById function', () => {
@@ -55,5 +56,62 @@ describe('Task Finder', () => {
 			expect(result.task).toBeNull();
 			expect(result.originalSubtaskCount).toBeNull();
 		});
+		test('should work correctly when no complexity report is provided', () => {
+			// Pass null as the complexity report
+			const result = findTaskById(sampleTasks.tasks, 2, null);
+
+			expect(result.task).toBeDefined();
+			expect(result.task.id).toBe(2);
+			expect(result.task.complexityScore).toBeUndefined();
+		});
+		test('should work correctly when task has no complexity data in the provided report', () => {
+			// Define a complexity report that doesn't include task 2
+			const complexityReport = {
+				complexityAnalysis: [{ taskId: 999, complexityScore: 5 }]
+			};
+
+			const result = findTaskById(sampleTasks.tasks, 2, complexityReport);
+
+			expect(result.task).toBeDefined();
+			expect(result.task.id).toBe(2);
+			expect(result.task.complexityScore).toBeUndefined();
+		});
+
+		test('should include complexity score when report is provided', () => {
+			// Define the complexity report for this test
+			const complexityReport = {
+				meta: {
+					generatedAt: '2023-01-01T00:00:00.000Z',
+					tasksAnalyzed: 3,
+					thresholdScore: 5
+				},
+				complexityAnalysis: [
+					{
+						taskId: 1,
+						taskTitle: 'Initialize Project',
+						complexityScore: 3,
+						recommendedSubtasks: 2
+					},
+					{
+						taskId: 2,
+						taskTitle: 'Create Core Functionality',
+						complexityScore: 8,
+						recommendedSubtasks: 5
+					},
+					{
+						taskId: 3,
+						taskTitle: 'Implement UI Components',
+						complexityScore: 6,
+						recommendedSubtasks: 4
+					}
+				]
+			};
+
+			const result = findTaskById(sampleTasks.tasks, 2, complexityReport);
+
+			expect(result.task).toBeDefined();
+			expect(result.task.id).toBe(2);
+			expect(result.task.complexityScore).toBe(8);
+		});
 	});
 });
--- a/tests/unit/task-manager.test.js
+++ b/tests/unit/task-manager.test.js
@@ -199,6 +199,12 @@ const testSetTaskStatus = (tasksData, taskIdInput, newStatus) => {

 // Simplified version of updateSingleTaskStatus for testing
 const testUpdateSingleTaskStatus = (tasksData, taskIdInput, newStatus) => {
+	if (!isValidTaskStatus(newStatus)) {
+		throw new Error(
+			`Error: Invalid status value: ${newStatus}. Use one of: ${TASK_STATUS_OPTIONS.join(', ')}`
+		);
+	}
+
 	// Check if it's a subtask (e.g., "1.2")
 	if (taskIdInput.includes('.')) {
 		const [parentId, subtaskId] = taskIdInput
@@ -329,6 +335,10 @@ const testAddTask = (
 import * as taskManager from '../../scripts/modules/task-manager.js';
 import { sampleClaudeResponse } from '../fixtures/sample-claude-response.js';
 import { sampleTasks, emptySampleTasks } from '../fixtures/sample-tasks.js';
+import {
+	isValidTaskStatus,
+	TASK_STATUS_OPTIONS
+} from '../../src/constants/task-status.js';

 // Destructure the required functions for convenience
 const { findNextTask, generateTaskFiles, clearSubtasks, updateTaskById } =
@@ -1165,6 +1175,16 @@ describe('Task Manager Module', () => {
 			expect(testTasksData.tasks[1].status).toBe('done');
 		});

+		test('should throw error for invalid status', async () => {
+			// Arrange
+			const testTasksData = JSON.parse(JSON.stringify(sampleTasks));
+
+			// Assert
+			expect(() =>
+				testUpdateSingleTaskStatus(testTasksData, '2', 'Done')
+			).toThrow(/Error: Invalid status value: Done./);
+		});
+
 		test('should update subtask status', async () => {
 			// Arrange
 			const testTasksData = JSON.parse(JSON.stringify(sampleTasks));