Merge branch 'next' of https://github.com/eyaltoledano/claude-task-master into joedanz/flexible-brand-rules
# Conflicts: # scripts/modules/commands.js # scripts/modules/ui.js
This commit is contained in:
@@ -5,6 +5,42 @@
|
||||
# It requires curl and jq to be installed.
|
||||
# It expects the project root path to be passed as the second argument.
|
||||
|
||||
# --- New Function: extract_and_sum_cost ---
|
||||
# Takes a string containing command output.
|
||||
# Extracts costs (lines with "Est. Cost: $X.YYYYYY" or similar from telemetry output)
|
||||
# from the output, sums them, and adds them to the GLOBAL total_e2e_cost variable.
|
||||
extract_and_sum_cost() {
|
||||
local command_output="$1"
|
||||
# Ensure total_e2e_cost is treated as a number, default to 0.0 if not set or invalid
|
||||
if ! [[ "$total_e2e_cost" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
|
||||
total_e2e_cost="0.0"
|
||||
fi
|
||||
|
||||
local extracted_cost_sum="0.0"
|
||||
|
||||
# Grep for lines containing "Est. Cost: $", then extract the numeric value.
|
||||
# Example line: │ Est. Cost: $0.093549 │
|
||||
# Accumulate all costs found in the command_output
|
||||
while IFS= read -r line; do
|
||||
# Extract the numeric part after 'Est. Cost: $' and before any trailing spaces/chars
|
||||
cost_value=$(echo "$line" | grep -o -E 'Est\. Cost: \$([0-9]+\.[0-9]+)' | sed -E 's/Est\. Cost: \$//g')
|
||||
if [[ -n "$cost_value" && "$cost_value" =~ ^[0-9]+\.[0-9]+$ ]]; then
|
||||
# echo "[DEBUG] Found cost value: $cost_value in line: '$line'" # For debugging
|
||||
extracted_cost_sum=$(echo "$extracted_cost_sum + $cost_value" | bc)
|
||||
# else # For debugging
|
||||
# echo "[DEBUG] No valid cost value found or extracted in line: '$line' (extracted: '$cost_value')" # For debugging
|
||||
fi
|
||||
done < <(echo "$command_output" | grep -E 'Est\. Cost: \$')
|
||||
|
||||
# echo "[DEBUG] Extracted sum from this command output: $extracted_cost_sum" # For debugging
|
||||
if (( $(echo "$extracted_cost_sum > 0" | bc -l) )); then
|
||||
total_e2e_cost=$(echo "$total_e2e_cost + $extracted_cost_sum" | bc)
|
||||
# echo "[DEBUG] Updated global total_e2e_cost: $total_e2e_cost" # For debugging
|
||||
fi
|
||||
# No echo here, the function modifies a global variable.
|
||||
}
|
||||
export -f extract_and_sum_cost # Export for use in other scripts if sourced
|
||||
|
||||
analyze_log_with_llm() {
|
||||
local log_file="$1"
|
||||
local project_root="$2" # Expect project root as the second argument
|
||||
@@ -15,17 +51,17 @@ analyze_log_with_llm() {
|
||||
fi
|
||||
|
||||
local env_file="${project_root}/.env" # Path to .env in project root
|
||||
local supported_models_file="${project_root}/scripts/modules/supported-models.json"
|
||||
|
||||
local provider_summary_log="provider_add_task_summary.log" # File summarizing provider test outcomes
|
||||
local api_key=""
|
||||
# !!! IMPORTANT: Replace with your actual Claude API endpoint if different !!!
|
||||
local api_endpoint="https://api.anthropic.com/v1/messages"
|
||||
# !!! IMPORTANT: Ensure this matches the variable name in your .env file !!!
|
||||
local api_key_name="ANTHROPIC_API_KEY"
|
||||
local llm_analysis_model_id="claude-3-7-sonnet-20250219" # Model used for this analysis
|
||||
local llm_analysis_provider="anthropic"
|
||||
|
||||
echo "" # Add a newline before analysis starts
|
||||
|
||||
# Check for jq and curl
|
||||
if ! command -v jq &> /dev/null; then
|
||||
echo "[HELPER_ERROR] LLM Analysis requires 'jq'. Skipping analysis." >&2
|
||||
return 1
|
||||
@@ -34,34 +70,31 @@ analyze_log_with_llm() {
|
||||
echo "[HELPER_ERROR] LLM Analysis requires 'curl'. Skipping analysis." >&2
|
||||
return 1
|
||||
fi
|
||||
if ! command -v bc &> /dev/null; then
|
||||
echo "[HELPER_ERROR] LLM Analysis requires 'bc' for cost calculation. Skipping analysis." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check for API Key in the PROJECT ROOT's .env file
|
||||
if [ -f "$env_file" ]; then
|
||||
# Original assignment - Reading from project root .env
|
||||
api_key=$(grep "^${api_key_name}=" "$env_file" | sed -e "s/^${api_key_name}=//" -e 's/^[[:space:]"]*//' -e 's/[[:space:]"]*$//')
|
||||
fi
|
||||
|
||||
if [ -z "$api_key" ]; then
|
||||
echo "[HELPER_ERROR] ${api_key_name} not found or empty in project root .env file ($env_file). Skipping LLM analysis." >&2 # Updated error message
|
||||
echo "[HELPER_ERROR] ${api_key_name} not found or empty in project root .env file ($env_file). Skipping LLM analysis." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Log file path is passed as argument, need to ensure it exists relative to where the script *calling* this function is, OR use absolute path.
|
||||
# Assuming absolute path or path relative to the initial PWD for simplicity here.
|
||||
# The calling script passes the correct path relative to the original PWD.
|
||||
if [ ! -f "$log_file" ]; then
|
||||
echo "[HELPER_ERROR] Log file not found: $log_file (PWD: $(pwd)). Check path passed to function. Skipping LLM analysis." >&2 # Updated error
|
||||
echo "[HELPER_ERROR] Log file not found: $log_file (PWD: $(pwd)). Check path passed to function. Skipping LLM analysis." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
local log_content
|
||||
# Read entire file, handle potential errors
|
||||
log_content=$(cat "$log_file") || {
|
||||
echo "[HELPER_ERROR] Failed to read log file: $log_file. Skipping LLM analysis." >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# Prepare the prompt using a quoted heredoc for literal interpretation
|
||||
read -r -d '' prompt_template <<'EOF'
|
||||
Analyze the following E2E test log for the task-master tool. The log contains output from various 'task-master' commands executed sequentially.
|
||||
|
||||
@@ -99,41 +132,34 @@ Here is the main log content:
|
||||
|
||||
%s
|
||||
EOF
|
||||
# Note: The final %s is a placeholder for printf later
|
||||
|
||||
local full_prompt
|
||||
# Use printf to substitute the log content into the %s placeholder
|
||||
if ! printf -v full_prompt "$prompt_template" "$log_content"; then
|
||||
echo "[HELPER_ERROR] Failed to format prompt using printf." >&2
|
||||
# It's unlikely printf itself fails, but good practice
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Construct the JSON payload for Claude Messages API
|
||||
local payload
|
||||
payload=$(jq -n --arg prompt "$full_prompt" '{
|
||||
"model": "claude-3-haiku-20240307", # Using Haiku for faster/cheaper testing
|
||||
"max_tokens": 3072, # Increased slightly
|
||||
"model": "'"$llm_analysis_model_id"'",
|
||||
"max_tokens": 3072,
|
||||
"messages": [
|
||||
{"role": "user", "content": $prompt}
|
||||
]
|
||||
# "temperature": 0.0 # Optional: Lower temperature for more deterministic JSON output
|
||||
}') || {
|
||||
echo "[HELPER_ERROR] Failed to create JSON payload using jq." >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
local response_raw response_http_code response_body
|
||||
# Capture body and HTTP status code separately
|
||||
response_raw=$(curl -s -w "\nHTTP_STATUS_CODE:%{http_code}" -X POST "$api_endpoint" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "x-api-key: $api_key" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
--data "$payload")
|
||||
|
||||
# Extract status code and body
|
||||
response_http_code=$(echo "$response_raw" | grep '^HTTP_STATUS_CODE:' | sed 's/HTTP_STATUS_CODE://')
|
||||
response_body=$(echo "$response_raw" | sed '$d') # Remove last line (status code)
|
||||
response_body=$(echo "$response_raw" | sed '$d')
|
||||
|
||||
if [ "$response_http_code" != "200" ]; then
|
||||
echo "[HELPER_ERROR] LLM API call failed with HTTP status $response_http_code." >&2
|
||||
@@ -146,17 +172,41 @@ EOF
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Pipe the raw response body directly to the Node.js parser script
|
||||
# Calculate cost of this LLM analysis call
|
||||
local input_tokens output_tokens input_cost_per_1m output_cost_per_1m calculated_llm_cost
|
||||
input_tokens=$(echo "$response_body" | jq -r '.usage.input_tokens // 0')
|
||||
output_tokens=$(echo "$response_body" | jq -r '.usage.output_tokens // 0')
|
||||
|
||||
if [ -f "$supported_models_file" ]; then
|
||||
model_cost_info=$(jq -r --arg provider "$llm_analysis_provider" --arg model_id "$llm_analysis_model_id" '
|
||||
.[$provider][] | select(.id == $model_id) | .cost_per_1m_tokens
|
||||
' "$supported_models_file")
|
||||
|
||||
if [[ -n "$model_cost_info" && "$model_cost_info" != "null" ]]; then
|
||||
input_cost_per_1m=$(echo "$model_cost_info" | jq -r '.input // 0')
|
||||
output_cost_per_1m=$(echo "$model_cost_info" | jq -r '.output // 0')
|
||||
|
||||
calculated_llm_cost=$(echo "($input_tokens / 1000000 * $input_cost_per_1m) + ($output_tokens / 1000000 * $output_cost_per_1m)" | bc -l)
|
||||
# Format to 6 decimal places
|
||||
formatted_llm_cost=$(printf "%.6f" "$calculated_llm_cost")
|
||||
echo "LLM Analysis AI Cost: $formatted_llm_cost USD" # This line will be parsed by run_e2e.sh
|
||||
else
|
||||
echo "[HELPER_WARNING] Cost data for model $llm_analysis_model_id not found in $supported_models_file. LLM analysis cost not calculated."
|
||||
fi
|
||||
else
|
||||
echo "[HELPER_WARNING] $supported_models_file not found. LLM analysis cost not calculated."
|
||||
fi
|
||||
# --- End cost calculation for this call ---
|
||||
|
||||
if echo "$response_body" | node "${project_root}/tests/e2e/parse_llm_output.cjs" "$log_file"; then
|
||||
echo "[HELPER_SUCCESS] LLM analysis parsed and printed successfully by Node.js script."
|
||||
return 0 # Success
|
||||
return 0
|
||||
else
|
||||
local node_exit_code=$?
|
||||
echo "[HELPER_ERROR] Node.js parsing script failed with exit code ${node_exit_code}."
|
||||
echo "[HELPER_ERROR] Raw API response body (first 500 chars): $(echo "$response_body" | head -c 500)"
|
||||
return 1 # Failure
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Export the function so it might be available to subshells if sourced
|
||||
export -f analyze_log_with_llm
|
||||
@@ -60,9 +60,52 @@ MAIN_ENV_FILE="$TASKMASTER_SOURCE_DIR/.env"
|
||||
# ---
|
||||
|
||||
# <<< Source the helper script >>>
|
||||
# shellcheck source=tests/e2e/e2e_helpers.sh
|
||||
source "$TASKMASTER_SOURCE_DIR/tests/e2e/e2e_helpers.sh"
|
||||
|
||||
# ==========================================
|
||||
# >>> Global Helper Functions Defined in run_e2e.sh <<<
|
||||
# --- Helper Functions (Define globally before export) ---
|
||||
_format_duration() {
|
||||
local total_seconds=$1
|
||||
local minutes=$((total_seconds / 60))
|
||||
local seconds=$((total_seconds % 60))
|
||||
printf "%dm%02ds" "$minutes" "$seconds"
|
||||
}
|
||||
|
||||
# Note: This relies on 'overall_start_time' being set globally before the function is called
|
||||
_get_elapsed_time_for_log() {
|
||||
local current_time
|
||||
current_time=$(date +%s)
|
||||
# Use overall_start_time here, as start_time_for_helpers might not be relevant globally
|
||||
local elapsed_seconds
|
||||
elapsed_seconds=$((current_time - overall_start_time))
|
||||
_format_duration "$elapsed_seconds"
|
||||
}
|
||||
|
||||
log_info() {
|
||||
echo "[INFO] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo "[SUCCESS] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo "[ERROR] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2
|
||||
}
|
||||
|
||||
log_step() {
|
||||
test_step_count=$((test_step_count + 1))
|
||||
echo ""
|
||||
echo "============================================="
|
||||
echo " STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
echo "============================================="
|
||||
}
|
||||
# ==========================================
|
||||
|
||||
# <<< Export helper functions for subshells >>>
|
||||
export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log
|
||||
export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log extract_and_sum_cost
|
||||
|
||||
# --- Argument Parsing for Analysis-Only Mode ---
|
||||
# This remains the same, as it exits early if matched
|
||||
@@ -138,6 +181,7 @@ fi
|
||||
# Note: These are mainly for step numbering within the log now, not for final summary
|
||||
test_step_count=0
|
||||
start_time_for_helpers=0 # Separate start time for helper functions inside the pipe
|
||||
total_e2e_cost="0.0" # Initialize total E2E cost
|
||||
# ---
|
||||
|
||||
# --- Log File Setup ---
|
||||
@@ -220,12 +264,16 @@ log_step() {
|
||||
fi
|
||||
|
||||
# --- Dependency Checks ---
|
||||
log_step "Checking for dependencies (jq)"
|
||||
log_step "Checking for dependencies (jq, bc)"
|
||||
if ! command -v jq &> /dev/null; then
|
||||
log_error "Dependency 'jq' is not installed or not found in PATH. Please install jq (e.g., 'brew install jq' or 'sudo apt-get install jq')."
|
||||
exit 1
|
||||
fi
|
||||
log_success "Dependency 'jq' found."
|
||||
if ! command -v bc &> /dev/null; then
|
||||
log_error "Dependency 'bc' not installed (for cost calculation). Please install bc (e.g., 'brew install bc' or 'sudo apt-get install bc')."
|
||||
exit 1
|
||||
fi
|
||||
log_success "Dependencies 'jq' and 'bc' found."
|
||||
|
||||
# --- Test Setup (Output to tee) ---
|
||||
log_step "Setting up test environment"
|
||||
@@ -292,30 +340,43 @@ log_step() {
|
||||
log_success "Project initialized."
|
||||
|
||||
log_step "Parsing PRD"
|
||||
task-master parse-prd ./prd.txt --force
|
||||
if [ ! -s "tasks/tasks.json" ]; then
|
||||
log_error "Parsing PRD failed: tasks/tasks.json not found or is empty."
|
||||
cmd_output_prd=$(task-master parse-prd ./prd.txt --force 2>&1)
|
||||
exit_status_prd=$?
|
||||
echo "$cmd_output_prd"
|
||||
extract_and_sum_cost "$cmd_output_prd"
|
||||
if [ $exit_status_prd -ne 0 ] || [ ! -s "tasks/tasks.json" ]; then
|
||||
log_error "Parsing PRD failed: tasks/tasks.json not found or is empty. Exit status: $exit_status_prd"
|
||||
exit 1
|
||||
else
|
||||
log_success "PRD parsed successfully."
|
||||
fi
|
||||
log_success "PRD parsed successfully."
|
||||
|
||||
log_step "Expanding Task 1 (to ensure subtask 1.1 exists)"
|
||||
# Add --research flag if needed and API keys support it
|
||||
task-master analyze-complexity --research --output complexity_results.json
|
||||
if [ ! -f "complexity_results.json" ]; then
|
||||
log_error "Complexity analysis failed: complexity_results.json not found."
|
||||
cmd_output_analyze=$(task-master analyze-complexity --research --output complexity_results.json 2>&1)
|
||||
exit_status_analyze=$?
|
||||
echo "$cmd_output_analyze"
|
||||
extract_and_sum_cost "$cmd_output_analyze"
|
||||
if [ $exit_status_analyze -ne 0 ] || [ ! -f "complexity_results.json" ]; then
|
||||
log_error "Complexity analysis failed: complexity_results.json not found. Exit status: $exit_status_analyze"
|
||||
exit 1
|
||||
else
|
||||
log_success "Complexity analysis saved to complexity_results.json"
|
||||
fi
|
||||
log_success "Complexity analysis saved to complexity_results.json"
|
||||
|
||||
log_step "Generating complexity report"
|
||||
task-master complexity-report --file complexity_results.json > complexity_report_formatted.log
|
||||
log_success "Formatted complexity report saved to complexity_report_formatted.log"
|
||||
|
||||
log_step "Expanding Task 1 (assuming it exists)"
|
||||
# Add --research flag if needed and API keys support it
|
||||
task-master expand --id=1 # Add --research?
|
||||
log_success "Attempted to expand Task 1."
|
||||
cmd_output_expand1=$(task-master expand --id=1 2>&1)
|
||||
exit_status_expand1=$?
|
||||
echo "$cmd_output_expand1"
|
||||
extract_and_sum_cost "$cmd_output_expand1"
|
||||
if [ $exit_status_expand1 -ne 0 ]; then
|
||||
log_error "Expanding Task 1 failed. Exit status: $exit_status_expand1"
|
||||
else
|
||||
log_success "Attempted to expand Task 1."
|
||||
fi
|
||||
|
||||
log_step "Setting status for Subtask 1.1 (assuming it exists)"
|
||||
task-master set-status --id=1.1 --status=done
|
||||
@@ -359,10 +420,11 @@ log_step() {
|
||||
|
||||
if [ -x "$verification_script_path" ]; then
|
||||
log_info "--- Executing Fallback Verification Script: $verification_script_path ---"
|
||||
# Execute the script directly, allowing output to flow to tee
|
||||
# Pass the current directory (the test run dir) as the argument
|
||||
"$verification_script_path" "$(pwd)"
|
||||
verification_exit_code=$? # Capture exit code immediately
|
||||
verification_output=$("$verification_script_path" "$(pwd)" 2>&1)
|
||||
verification_exit_code=$?
|
||||
echo "$verification_output"
|
||||
extract_and_sum_cost "$verification_output"
|
||||
|
||||
log_info "--- Finished Fallback Verification Script Execution (Exit Code: $verification_exit_code) ---"
|
||||
|
||||
# Log success/failure based on captured exit code
|
||||
@@ -370,13 +432,9 @@ log_step() {
|
||||
log_success "Fallback verification script reported success."
|
||||
else
|
||||
log_error "Fallback verification script reported FAILURE (Exit Code: $verification_exit_code)."
|
||||
# Decide whether to exit the main script or just log the error
|
||||
# exit 1 # Uncomment to make verification failure fatal
|
||||
fi
|
||||
else
|
||||
log_error "Fallback verification script not found or not executable at $verification_script_path. Skipping verification."
|
||||
# Decide whether to exit or continue
|
||||
# exit 1
|
||||
fi
|
||||
else
|
||||
log_info "Skipping Fallback Verification test as requested by flag."
|
||||
@@ -393,7 +451,7 @@ log_step() {
|
||||
declare -a models=(
|
||||
"claude-3-7-sonnet-20250219"
|
||||
"gpt-4o"
|
||||
"gemini-2.5-pro-exp-03-25"
|
||||
"gemini-2.5-pro-preview-05-06"
|
||||
"sonar-pro" # Note: This is research-only, add-task might fail if not using research model
|
||||
"grok-3"
|
||||
"anthropic/claude-3.7-sonnet" # OpenRouter uses Claude 3.7
|
||||
@@ -435,9 +493,9 @@ log_step() {
|
||||
|
||||
# 3. Check for success and extract task ID
|
||||
new_task_id=""
|
||||
if [ $add_task_exit_code -eq 0 ] && echo "$add_task_cmd_output" | grep -q "✓ Added new task #"; then
|
||||
# Attempt to extract the ID (adjust grep/sed/awk as needed based on actual output format)
|
||||
new_task_id=$(echo "$add_task_cmd_output" | grep "✓ Added new task #" | sed 's/.*✓ Added new task #\([0-9.]\+\).*/\1/')
|
||||
extract_and_sum_cost "$add_task_cmd_output"
|
||||
if [ $add_task_exit_code -eq 0 ] && (echo "$add_task_cmd_output" | grep -q "✓ Added new task #" || echo "$add_task_cmd_output" | grep -q "✅ New task created successfully:" || echo "$add_task_cmd_output" | grep -q "Task [0-9]\+ Created Successfully"); then
|
||||
new_task_id=$(echo "$add_task_cmd_output" | grep -o -E "(Task |#)[0-9.]+" | grep -o -E "[0-9.]+" | head -n 1)
|
||||
if [ -n "$new_task_id" ]; then
|
||||
log_success "Add-task succeeded for $provider. New task ID: $new_task_id"
|
||||
echo "Provider $provider add-task SUCCESS (ID: $new_task_id)" >> provider_add_task_summary.log
|
||||
@@ -522,8 +580,6 @@ log_step() {
|
||||
log_success "Validation correctly identified non-existent dependency 999."
|
||||
else
|
||||
log_error "Validation DID NOT report non-existent dependency 999 as expected. Check validate_deps_non_existent.log"
|
||||
# Consider exiting here if this check fails, as it indicates a validation logic problem
|
||||
# exit 1
|
||||
fi
|
||||
|
||||
log_step "Fixing dependencies (should remove 1 -> 999)"
|
||||
@@ -534,7 +590,6 @@ log_step() {
|
||||
task-master validate-dependencies > validate_deps_after_fix_non_existent.log 2>&1 || true # Allow potential failure
|
||||
if grep -q "Non-existent dependency ID: 999" validate_deps_after_fix_non_existent.log; then
|
||||
log_error "Validation STILL reports non-existent dependency 999 after fix. Check logs."
|
||||
# exit 1
|
||||
else
|
||||
log_success "Validation shows non-existent dependency 999 was removed."
|
||||
fi
|
||||
@@ -553,7 +608,6 @@ log_step() {
|
||||
log_success "Validation correctly identified circular dependency between 4 and 5."
|
||||
else
|
||||
log_error "Validation DID NOT report circular dependency 4<->5 as expected. Check validate_deps_circular.log"
|
||||
# exit 1
|
||||
fi
|
||||
|
||||
log_step "Fixing dependencies (should remove one side of 4 <-> 5)"
|
||||
@@ -564,7 +618,6 @@ log_step() {
|
||||
task-master validate-dependencies > validate_deps_after_fix_circular.log 2>&1 || true # Allow potential failure
|
||||
if grep -q -E "Circular dependency detected involving task IDs: (4, 5|5, 4)" validate_deps_after_fix_circular.log; then
|
||||
log_error "Validation STILL reports circular dependency 4<->5 after fix. Check logs."
|
||||
# exit 1
|
||||
else
|
||||
log_success "Validation shows circular dependency 4<->5 was resolved."
|
||||
fi
|
||||
@@ -582,25 +635,60 @@ log_step() {
|
||||
log_success "Added Task $manual_task_id manually."
|
||||
|
||||
log_step "Adding Task $ai_task_id (AI)"
|
||||
task-master add-task --prompt="Implement basic UI styling using CSS variables for colors and spacing" --priority=medium --dependencies=1 # Depends on frontend setup
|
||||
log_success "Added Task $ai_task_id via AI prompt."
|
||||
cmd_output_add_ai=$(task-master add-task --prompt="Implement basic UI styling using CSS variables for colors and spacing" --priority=medium --dependencies=1 2>&1)
|
||||
exit_status_add_ai=$?
|
||||
echo "$cmd_output_add_ai"
|
||||
extract_and_sum_cost "$cmd_output_add_ai"
|
||||
if [ $exit_status_add_ai -ne 0 ]; then
|
||||
log_error "Adding AI Task $ai_task_id failed. Exit status: $exit_status_add_ai"
|
||||
else
|
||||
log_success "Added Task $ai_task_id via AI prompt."
|
||||
fi
|
||||
|
||||
|
||||
log_step "Updating Task 3 (update-task AI)"
|
||||
task-master update-task --id=3 --prompt="Update backend server setup: Ensure CORS is configured to allow requests from the frontend origin."
|
||||
log_success "Attempted update for Task 3."
|
||||
cmd_output_update_task3=$(task-master update-task --id=3 --prompt="Update backend server setup: Ensure CORS is configured to allow requests from the frontend origin." 2>&1)
|
||||
exit_status_update_task3=$?
|
||||
echo "$cmd_output_update_task3"
|
||||
extract_and_sum_cost "$cmd_output_update_task3"
|
||||
if [ $exit_status_update_task3 -ne 0 ]; then
|
||||
log_error "Updating Task 3 failed. Exit status: $exit_status_update_task3"
|
||||
else
|
||||
log_success "Attempted update for Task 3."
|
||||
fi
|
||||
|
||||
log_step "Updating Tasks from Task 5 (update AI)"
|
||||
task-master update --from=5 --prompt="Refactor the backend storage module to use a simple JSON file (storage.json) instead of an in-memory object for persistence. Update relevant tasks."
|
||||
log_success "Attempted update from Task 5 onwards."
|
||||
cmd_output_update_from5=$(task-master update --from=5 --prompt="Refactor the backend storage module to use a simple JSON file (storage.json) instead of an in-memory object for persistence. Update relevant tasks." 2>&1)
|
||||
exit_status_update_from5=$?
|
||||
echo "$cmd_output_update_from5"
|
||||
extract_and_sum_cost "$cmd_output_update_from5"
|
||||
if [ $exit_status_update_from5 -ne 0 ]; then
|
||||
log_error "Updating from Task 5 failed. Exit status: $exit_status_update_from5"
|
||||
else
|
||||
log_success "Attempted update from Task 5 onwards."
|
||||
fi
|
||||
|
||||
log_step "Expanding Task 8 (AI)"
|
||||
task-master expand --id=8 # Expand task 8: Frontend logic
|
||||
log_success "Attempted to expand Task 8."
|
||||
cmd_output_expand8=$(task-master expand --id=8 2>&1)
|
||||
exit_status_expand8=$?
|
||||
echo "$cmd_output_expand8"
|
||||
extract_and_sum_cost "$cmd_output_expand8"
|
||||
if [ $exit_status_expand8 -ne 0 ]; then
|
||||
log_error "Expanding Task 8 failed. Exit status: $exit_status_expand8"
|
||||
else
|
||||
log_success "Attempted to expand Task 8."
|
||||
fi
|
||||
|
||||
log_step "Updating Subtask 8.1 (update-subtask AI)"
|
||||
task-master update-subtask --id=8.1 --prompt="Implementation note: Remember to handle potential API errors and display a user-friendly message."
|
||||
log_success "Attempted update for Subtask 8.1."
|
||||
cmd_output_update_subtask81=$(task-master update-subtask --id=8.1 --prompt="Implementation note: Remember to handle potential API errors and display a user-friendly message." 2>&1)
|
||||
exit_status_update_subtask81=$?
|
||||
echo "$cmd_output_update_subtask81"
|
||||
extract_and_sum_cost "$cmd_output_update_subtask81"
|
||||
if [ $exit_status_update_subtask81 -ne 0 ]; then
|
||||
log_error "Updating Subtask 8.1 failed. Exit status: $exit_status_update_subtask81"
|
||||
else
|
||||
log_success "Attempted update for Subtask 8.1."
|
||||
fi
|
||||
|
||||
# Add a couple more subtasks for multi-remove test
|
||||
log_step 'Adding subtasks to Task 2 (for multi-remove test)'
|
||||
@@ -693,9 +781,16 @@ log_step() {
|
||||
|
||||
# === AI Commands (Re-test some after changes) ===
|
||||
log_step "Analyzing complexity (AI with Research - Final Check)"
|
||||
task-master analyze-complexity --research --output complexity_results_final.json
|
||||
if [ ! -f "complexity_results_final.json" ]; then log_error "Final Complexity analysis failed."; exit 1; fi
|
||||
log_success "Final Complexity analysis saved."
|
||||
cmd_output_analyze_final=$(task-master analyze-complexity --research --output complexity_results_final.json 2>&1)
|
||||
exit_status_analyze_final=$?
|
||||
echo "$cmd_output_analyze_final"
|
||||
extract_and_sum_cost "$cmd_output_analyze_final"
|
||||
if [ $exit_status_analyze_final -ne 0 ] || [ ! -f "complexity_results_final.json" ]; then
|
||||
log_error "Final Complexity analysis failed. Exit status: $exit_status_analyze_final. File found: $(test -f complexity_results_final.json && echo true || echo false)"
|
||||
exit 1 # Critical for subsequent report step
|
||||
else
|
||||
log_success "Final Complexity analysis command executed and file created."
|
||||
fi
|
||||
|
||||
log_step "Generating complexity report (Non-AI - Final Check)"
|
||||
task-master complexity-report --file complexity_results_final.json > complexity_report_formatted_final.log
|
||||
@@ -775,4 +870,8 @@ else
|
||||
echo "[ERROR] [$formatted_duration_for_error] $(date +"%Y-%m-%d %H:%M:%S") Test run directory $TEST_RUN_DIR not found. Cannot perform LLM analysis." >&2
|
||||
fi
|
||||
|
||||
# Final cost formatting
|
||||
formatted_total_e2e_cost=$(printf "%.6f" "$total_e2e_cost")
|
||||
echo "Total E2E AI Cost: $formatted_total_e2e_cost USD"
|
||||
|
||||
exit $EXIT_CODE
|
||||
@@ -18,6 +18,25 @@ set -o pipefail
|
||||
|
||||
# --- Embedded Helper Functions ---
|
||||
# Copied from e2e_helpers.sh to make this script standalone
|
||||
# OR source it if preferred and path is reliable
|
||||
|
||||
# <<< Determine SCRIPT_DIR and PROJECT_ROOT_DIR early >>>
|
||||
SCRIPT_DIR_FV="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
PROJECT_ROOT_DIR_FV="$( cd "$SCRIPT_DIR_FV/../.." &> /dev/null && pwd )" # Assumes script is in tests/e2e/
|
||||
|
||||
# --- Try to Source e2e_helpers.sh ---
|
||||
E2E_HELPERS_PATH_FV="${PROJECT_ROOT_DIR_FV}/tests/e2e/e2e_helpers.sh"
|
||||
if [ -f "$E2E_HELPERS_PATH_FV" ]; then
|
||||
# shellcheck source=tests/e2e/e2e_helpers.sh
|
||||
source "$E2E_HELPERS_PATH_FV"
|
||||
echo "[INFO FV] Sourced e2e_helpers.sh successfully."
|
||||
else
|
||||
echo "[ERROR FV] e2e_helpers.sh not found at $E2E_HELPERS_PATH_FV. Cost extraction will fail."
|
||||
# Define a placeholder if not found, so the script doesn't break immediately,
|
||||
# but cost extraction will effectively be a no-op.
|
||||
extract_and_sum_cost() { echo "$2"; } # Returns current total, effectively adding 0
|
||||
fi
|
||||
|
||||
|
||||
_format_duration() {
|
||||
local total_seconds=$1
|
||||
@@ -27,127 +46,112 @@ _format_duration() {
|
||||
}
|
||||
|
||||
_get_elapsed_time_for_log() {
|
||||
# Needs overall_start_time defined in the main script body
|
||||
local current_time=$(date +%s)
|
||||
local elapsed_seconds=$((current_time - overall_start_time))
|
||||
local current_time
|
||||
current_time=$(date +%s)
|
||||
local elapsed_seconds
|
||||
elapsed_seconds=$((current_time - overall_start_time)) # Needs overall_start_time
|
||||
_format_duration "$elapsed_seconds"
|
||||
}
|
||||
|
||||
log_info() {
|
||||
echo "[INFO] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
echo "[INFO FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo "[SUCCESS] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
echo "[SUCCESS FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo "[ERROR] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2
|
||||
echo "[ERROR FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2
|
||||
}
|
||||
|
||||
log_step() {
|
||||
# Needs test_step_count defined and incremented in the main script body
|
||||
test_step_count=$((test_step_count + 1))
|
||||
test_step_count=$((test_step_count + 1)) # Needs test_step_count
|
||||
echo ""
|
||||
echo "============================================="
|
||||
echo " STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
echo " FV STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
echo "============================================="
|
||||
}
|
||||
|
||||
# --- Signal Handling ---
|
||||
# Global variable to hold child PID
|
||||
child_pid=0
|
||||
# Use a persistent log file name
|
||||
PROGRESS_LOG_FILE="fallback_verification_progress.log"
|
||||
PROGRESS_LOG_FILE="fallback_verification_progress.log" # Stays in run dir
|
||||
|
||||
cleanup() {
|
||||
echo "" # Newline after ^C
|
||||
echo ""
|
||||
log_error "Interrupt received. Cleaning up any running child process..."
|
||||
if [ "$child_pid" -ne 0 ]; then
|
||||
log_info "Killing child process (PID: $child_pid) and its group..."
|
||||
kill -TERM -- "-$child_pid" 2>/dev/null || kill -KILL -- "-$child_pid" 2>/dev/null
|
||||
child_pid=0
|
||||
fi
|
||||
# DO NOT delete the progress log file on interrupt
|
||||
log_info "Progress saved in: $PROGRESS_LOG_FILE"
|
||||
exit 130 # Exit with code indicating interrupt
|
||||
# Print current total cost on interrupt
|
||||
if [[ -n "${total_fallback_cost+x}" && "$total_fallback_cost" != "0.0" ]]; then # Check if var is set and not initial
|
||||
log_info "Current Total Fallback AI Cost at interruption: $total_fallback_cost USD"
|
||||
fi
|
||||
exit 130
|
||||
}
|
||||
|
||||
# Trap SIGINT (Ctrl+C) and SIGTERM
|
||||
trap cleanup INT TERM
|
||||
|
||||
# --- Configuration ---
|
||||
# Determine the project root relative to this script's location
|
||||
# Use a robust method to find the script's own directory
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
# Assumes this script is in tests/e2e/
|
||||
PROJECT_ROOT_DIR="$( cd "$SCRIPT_DIR/../.." &> /dev/null && pwd )"
|
||||
SUPPORTED_MODELS_FILE="$PROJECT_ROOT_DIR/scripts/modules/supported-models.json"
|
||||
BASE_RUNS_DIR="$PROJECT_ROOT_DIR/tests/e2e/_runs"
|
||||
# SCRIPT_DIR and PROJECT_ROOT_DIR already defined above
|
||||
SUPPORTED_MODELS_FILE="$PROJECT_ROOT_DIR_FV/scripts/modules/supported-models.json"
|
||||
BASE_RUNS_DIR="$PROJECT_ROOT_DIR_FV/tests/e2e/_runs"
|
||||
|
||||
# --- Determine Target Run Directory ---
|
||||
TARGET_RUN_DIR=""
|
||||
if [ "$#" -ge 1 ] && [ -n "$1" ]; then
|
||||
# Use provided argument if it exists
|
||||
TARGET_RUN_DIR="$1"
|
||||
# Make path absolute if it's relative
|
||||
if [[ "$TARGET_RUN_DIR" != /* ]]; then
|
||||
TARGET_RUN_DIR="$(pwd)/$TARGET_RUN_DIR"
|
||||
fi
|
||||
echo "[INFO] Using provided target run directory: $TARGET_RUN_DIR"
|
||||
echo "[INFO FV] Using provided target run directory: $TARGET_RUN_DIR"
|
||||
else
|
||||
# Find the latest run directory
|
||||
echo "[INFO] No run directory provided, finding latest in $BASE_RUNS_DIR..."
|
||||
echo "[INFO FV] No run directory provided, finding latest in $BASE_RUNS_DIR..."
|
||||
TARGET_RUN_DIR=$(ls -td "$BASE_RUNS_DIR"/run_* 2>/dev/null | head -n 1)
|
||||
if [ -z "$TARGET_RUN_DIR" ]; then
|
||||
echo "[ERROR] No run directories found matching 'run_*' in $BASE_RUNS_DIR. Cannot proceed." >&2
|
||||
echo "[ERROR FV] No run directories found matching 'run_*' in $BASE_RUNS_DIR. Cannot proceed." >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "[INFO] Found latest run directory: $TARGET_RUN_DIR"
|
||||
echo "[INFO FV] Found latest run directory: $TARGET_RUN_DIR"
|
||||
fi
|
||||
|
||||
# Validate the target directory
|
||||
if [ ! -d "$TARGET_RUN_DIR" ]; then
|
||||
echo "[ERROR] Target run directory not found or is not a directory: $TARGET_RUN_DIR" >&2
|
||||
echo "[ERROR FV] Target run directory not found or is not a directory: $TARGET_RUN_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# --- Change to Target Directory ---
|
||||
echo "[INFO] Changing working directory to: $TARGET_RUN_DIR"
|
||||
echo "[INFO FV] Changing working directory to: $TARGET_RUN_DIR"
|
||||
if ! cd "$TARGET_RUN_DIR"; then
|
||||
echo "[ERROR] Failed to cd into target directory: $TARGET_RUN_DIR" >&2
|
||||
echo "[ERROR FV] Failed to cd into target directory: $TARGET_RUN_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "[INFO] Now operating inside: $(pwd)"
|
||||
echo "[INFO FV] Now operating inside: $(pwd)"
|
||||
|
||||
overall_start_time=$(date +%s) # Initialize for logging helpers
|
||||
test_step_count=0 # Initialize for logging helpers
|
||||
total_fallback_cost="0.0" # Initialize total cost for this script
|
||||
|
||||
# --- Now we are inside the target run directory ---
|
||||
overall_start_time=$(date +%s)
|
||||
test_step_count=0
|
||||
log_info "Starting fallback verification script execution in $(pwd)"
|
||||
log_info "Progress will be logged to: $(pwd)/$PROGRESS_LOG_FILE"
|
||||
|
||||
# --- Dependency Checks ---
|
||||
log_step "Checking for dependencies (jq) in verification script"
|
||||
if ! command -v jq &> /dev/null; then
|
||||
log_error "Dependency 'jq' is not installed or not found in PATH."
|
||||
exit 1
|
||||
fi
|
||||
log_success "Dependency 'jq' found."
|
||||
log_step "Checking for dependencies (jq, bc) in verification script"
|
||||
if ! command -v jq &> /dev/null; then log_error "Dependency 'jq' not installed."; exit 1; fi
|
||||
if ! command -v bc &> /dev/null; then log_error "Dependency 'bc' not installed (for cost calculation)."; exit 1; fi
|
||||
log_success "Dependencies 'jq' and 'bc' found."
|
||||
|
||||
|
||||
# --- Verification Logic ---
|
||||
log_step "Starting/Resuming Fallback Model (generateObjectService) Verification"
|
||||
# Ensure progress log exists, create if not
|
||||
touch "$PROGRESS_LOG_FILE"
|
||||
|
||||
# Ensure the supported models file exists (using absolute path)
|
||||
if [ ! -f "$SUPPORTED_MODELS_FILE" ]; then
|
||||
log_error "supported-models.json not found at absolute path: $SUPPORTED_MODELS_FILE."
|
||||
log_error "supported-models.json not found at: $SUPPORTED_MODELS_FILE."
|
||||
exit 1
|
||||
fi
|
||||
log_info "Using supported models file: $SUPPORTED_MODELS_FILE"
|
||||
|
||||
# Ensure subtask 1.1 exists (basic check, main script should guarantee)
|
||||
# Check for tasks.json in the current directory (which is now the run dir)
|
||||
if [ ! -f "tasks/tasks.json" ]; then
|
||||
log_error "tasks/tasks.json not found in current directory ($(pwd)). Was this run directory properly initialized?"
|
||||
exit 1
|
||||
@@ -158,78 +162,90 @@ if ! jq -e '.tasks[] | select(.id == 1) | .subtasks[] | select(.id == 1)' tasks/
|
||||
fi
|
||||
log_info "Subtask 1.1 found in $(pwd)/tasks/tasks.json, proceeding with verification."
|
||||
|
||||
# Read providers and models using jq
|
||||
jq -c 'to_entries[] | .key as $provider | .value[] | select(.allowed_roles[]? == "fallback") | {provider: $provider, id: .id}' "$SUPPORTED_MODELS_FILE" | while IFS= read -r model_info; do
|
||||
provider=$(echo "$model_info" | jq -r '.provider')
|
||||
model_id=$(echo "$model_info" | jq -r '.id')
|
||||
flag="" # Default flag
|
||||
flag=""
|
||||
|
||||
# Check if already tested
|
||||
# Use grep -Fq for fixed string and quiet mode
|
||||
if grep -Fq "${provider},${model_id}," "$PROGRESS_LOG_FILE"; then
|
||||
log_info "--- Skipping: $provider / $model_id (already tested, result in $PROGRESS_LOG_FILE) ---"
|
||||
# Still need to sum up its cost if it was successful before
|
||||
previous_test_output=$(grep -F "${provider},${model_id}," "$PROGRESS_LOG_FILE" | head -n 1)
|
||||
# Assuming the output file for successful test exists and contains cost
|
||||
prev_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log"
|
||||
if [[ "$previous_test_output" == *",SUCCESS"* && -f "$prev_output_file" ]]; then
|
||||
# shellcheck disable=SC2154 # overall_start_time is set
|
||||
log_info "Summing cost from previous successful test of $provider / $model_id from $prev_output_file"
|
||||
# shellcheck disable=SC2154 # total_fallback_cost is set
|
||||
total_fallback_cost=$(extract_and_sum_cost "$(cat "$prev_output_file")" "$total_fallback_cost")
|
||||
log_info "Cumulative fallback AI cost after previous $provider / $model_id: $total_fallback_cost USD"
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
|
||||
log_info "--- Verifying: $provider / $model_id ---"
|
||||
|
||||
# Determine provider flag
|
||||
if [ "$provider" == "openrouter" ]; then
|
||||
flag="--openrouter"
|
||||
elif [ "$provider" == "ollama" ]; then
|
||||
flag="--ollama"
|
||||
fi
|
||||
if [ "$provider" == "openrouter" ]; then flag="--openrouter"; fi
|
||||
if [ "$provider" == "ollama" ]; then flag="--ollama"; fi
|
||||
|
||||
# 1. Set the main model
|
||||
if ! command -v task-master &> /dev/null; then
|
||||
log_error "task-master command not found."
|
||||
echo "[INSTRUCTION] Please run 'npm link task-master-ai' in the project root first."
|
||||
echo "[INSTRUCTION FV] Please run 'npm link task-master-ai' in the project root first."
|
||||
exit 1
|
||||
fi
|
||||
log_info "Setting main model to $model_id ${flag:+using flag $flag}..."
|
||||
set_model_cmd="task-master models --set-main \"$model_id\" $flag"
|
||||
model_set_status="SUCCESS"
|
||||
if ! eval $set_model_cmd > /dev/null 2>&1; then
|
||||
if ! eval "$set_model_cmd" > /dev/null 2>&1; then
|
||||
log_error "Failed to set main model for $provider / $model_id. Skipping test."
|
||||
echo "$provider,$model_id,SET_MODEL_FAILED" >> "$PROGRESS_LOG_FILE"
|
||||
continue # Skip the actual test if setting fails
|
||||
continue
|
||||
fi
|
||||
log_info "Set main model ok."
|
||||
|
||||
# 2. Run update-subtask
|
||||
log_info "Running update-subtask --id=1.1 --prompt='Test generateObjectService' (timeout 120s)"
|
||||
update_subtask_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log"
|
||||
|
||||
timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." > "$update_subtask_output_file" 2>&1 &
|
||||
|
||||
# Capture output to a variable AND a file
|
||||
update_subtask_command_output=""
|
||||
timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." 2>&1 | tee "$update_subtask_output_file" &
|
||||
# Store the command output in a variable simultaneously
|
||||
# update_subtask_command_output=$(timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." 2>&1)
|
||||
# The above direct capture won't work well with tee and backgrounding. Instead, read the file after command completion.
|
||||
child_pid=$!
|
||||
wait "$child_pid"
|
||||
update_subtask_exit_code=$?
|
||||
child_pid=0
|
||||
|
||||
# 3. Check result and log persistently
|
||||
# Read output from file for cost extraction
|
||||
if [ -f "$update_subtask_output_file" ]; then
|
||||
update_subtask_command_output=$(cat "$update_subtask_output_file")
|
||||
else
|
||||
update_subtask_command_output="" # Ensure it's defined
|
||||
fi
|
||||
|
||||
result_status=""
|
||||
if [ $update_subtask_exit_code -eq 0 ] && grep -q "Successfully updated subtask #1.1" "$update_subtask_output_file"; then
|
||||
if [ $update_subtask_exit_code -eq 0 ] && echo "$update_subtask_command_output" | grep -q "Successfully updated subtask #1.1"; then
|
||||
log_success "update-subtask succeeded for $provider / $model_id (Verified Output)."
|
||||
result_status="SUCCESS"
|
||||
# Extract and sum cost if successful
|
||||
# shellcheck disable=SC2154 # total_fallback_cost is set
|
||||
total_fallback_cost=$(extract_and_sum_cost "$update_subtask_command_output" "$total_fallback_cost")
|
||||
log_info "Cumulative fallback AI cost after $provider / $model_id: $total_fallback_cost USD"
|
||||
elif [ $update_subtask_exit_code -eq 124 ]; then
|
||||
log_error "update-subtask TIMED OUT for $provider / $model_id. Check $update_subtask_output_file."
|
||||
result_status="FAILED_TIMEOUT"
|
||||
elif [ $update_subtask_exit_code -eq 130 ] || [ $update_subtask_exit_code -eq 143 ]; then
|
||||
log_error "update-subtask INTERRUPTED for $provider / $model_id."
|
||||
result_status="INTERRUPTED" # Record interruption
|
||||
# Don't exit the loop, allow script to finish or be interrupted again
|
||||
result_status="INTERRUPTED"
|
||||
else
|
||||
log_error "update-subtask FAILED for $provider / $model_id (Exit Code: $update_subtask_exit_code). Check $update_subtask_output_file."
|
||||
result_status="FAILED"
|
||||
fi
|
||||
|
||||
# Append result to the persistent log file
|
||||
echo "$provider,$model_id,$result_status" >> "$PROGRESS_LOG_FILE"
|
||||
|
||||
done # End of fallback verification loop
|
||||
done
|
||||
|
||||
# --- Generate Final Verification Report to STDOUT ---
|
||||
# Report reads from the persistent PROGRESS_LOG_FILE
|
||||
echo ""
|
||||
echo "--- Fallback Model Verification Report (via $0) ---"
|
||||
echo "Executed inside run directory: $(pwd)"
|
||||
@@ -254,17 +270,13 @@ echo ""
|
||||
echo "Models INTERRUPTED during test (Inconclusive - Rerun):"
|
||||
awk -F',' '$3 == "INTERRUPTED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
|
||||
echo ""
|
||||
# Print the total cost for this script's operations
|
||||
formatted_total_fallback_cost=$(printf "%.6f" "$total_fallback_cost")
|
||||
echo "Total Fallback AI Cost (this script run): $formatted_total_fallback_cost USD" # This line will be parsed
|
||||
echo "-------------------------------------------------------"
|
||||
echo ""
|
||||
|
||||
# Don't clean up the progress log
|
||||
# if [ -f "$PROGRESS_LOG_FILE" ]; then
|
||||
# rm "$PROGRESS_LOG_FILE"
|
||||
# fi
|
||||
|
||||
log_info "Finished Fallback Model (generateObjectService) Verification Script"
|
||||
|
||||
# Remove trap before exiting normally
|
||||
trap - INT TERM
|
||||
|
||||
exit 0 # Exit successfully after printing the report
|
||||
exit 0
|
||||
|
||||
@@ -3,9 +3,8 @@
|
||||
*/
|
||||
|
||||
import { jest } from '@jest/globals';
|
||||
import path from 'path';
|
||||
import path, { dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { dirname } from 'path';
|
||||
|
||||
// Get the current module's directory
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
@@ -27,6 +26,7 @@ const mockReadJSON = jest.fn();
|
||||
const mockWriteJSON = jest.fn();
|
||||
const mockEnableSilentMode = jest.fn();
|
||||
const mockDisableSilentMode = jest.fn();
|
||||
const mockReadComplexityReport = jest.fn().mockReturnValue(null);
|
||||
|
||||
const mockGetAnthropicClient = jest.fn().mockReturnValue({});
|
||||
const mockGetConfiguredAnthropicClient = jest.fn().mockReturnValue({});
|
||||
@@ -130,6 +130,7 @@ jest.mock('../../../scripts/modules/utils.js', () => ({
|
||||
writeJSON: mockWriteJSON,
|
||||
enableSilentMode: mockEnableSilentMode,
|
||||
disableSilentMode: mockDisableSilentMode,
|
||||
readComplexityReport: mockReadComplexityReport,
|
||||
CONFIG: {
|
||||
model: 'claude-3-7-sonnet-20250219',
|
||||
maxTokens: 64000,
|
||||
@@ -160,15 +161,6 @@ jest.mock('../../../scripts/modules/task-manager.js', () => ({
|
||||
}));
|
||||
|
||||
// Import dependencies after mocks are set up
|
||||
import fs from 'fs';
|
||||
import {
|
||||
readJSON,
|
||||
writeJSON,
|
||||
enableSilentMode,
|
||||
disableSilentMode
|
||||
} from '../../../scripts/modules/utils.js';
|
||||
import { expandTask } from '../../../scripts/modules/task-manager.js';
|
||||
import { findTasksJsonPath } from '../../../mcp-server/src/core/utils/path-utils.js';
|
||||
import { sampleTasks } from '../../fixtures/sample-tasks.js';
|
||||
|
||||
// Mock logger
|
||||
@@ -220,6 +212,37 @@ describe('MCP Server Direct Functions', () => {
|
||||
});
|
||||
|
||||
describe('listTasksDirect', () => {
|
||||
// Sample complexity report for testing
|
||||
const mockComplexityReport = {
|
||||
meta: {
|
||||
generatedAt: '2025-03-24T20:01:35.986Z',
|
||||
tasksAnalyzed: 3,
|
||||
thresholdScore: 5,
|
||||
projectName: 'Test Project',
|
||||
usedResearch: false
|
||||
},
|
||||
complexityAnalysis: [
|
||||
{
|
||||
taskId: 1,
|
||||
taskTitle: 'Initialize Project',
|
||||
complexityScore: 3,
|
||||
recommendedSubtasks: 2
|
||||
},
|
||||
{
|
||||
taskId: 2,
|
||||
taskTitle: 'Create Core Functionality',
|
||||
complexityScore: 8,
|
||||
recommendedSubtasks: 5
|
||||
},
|
||||
{
|
||||
taskId: 3,
|
||||
taskTitle: 'Implement UI Components',
|
||||
complexityScore: 6,
|
||||
recommendedSubtasks: 4
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
// Test wrapper function that doesn't rely on the actual implementation
|
||||
async function testListTasks(args, mockLogger) {
|
||||
// File not found case
|
||||
@@ -235,21 +258,35 @@ describe('MCP Server Direct Functions', () => {
|
||||
};
|
||||
}
|
||||
|
||||
// Check for complexity report
|
||||
const complexityReport = mockReadComplexityReport();
|
||||
let tasksData = [...sampleTasks.tasks];
|
||||
|
||||
// Add complexity scores if report exists
|
||||
if (complexityReport && complexityReport.complexityAnalysis) {
|
||||
tasksData = tasksData.map((task) => {
|
||||
const analysis = complexityReport.complexityAnalysis.find(
|
||||
(a) => a.taskId === task.id
|
||||
);
|
||||
if (analysis) {
|
||||
return { ...task, complexityScore: analysis.complexityScore };
|
||||
}
|
||||
return task;
|
||||
});
|
||||
}
|
||||
|
||||
// Success case
|
||||
if (!args.status && !args.withSubtasks) {
|
||||
return {
|
||||
success: true,
|
||||
data: {
|
||||
tasks: sampleTasks.tasks,
|
||||
tasks: tasksData,
|
||||
stats: {
|
||||
total: sampleTasks.tasks.length,
|
||||
completed: sampleTasks.tasks.filter((t) => t.status === 'done')
|
||||
total: tasksData.length,
|
||||
completed: tasksData.filter((t) => t.status === 'done').length,
|
||||
inProgress: tasksData.filter((t) => t.status === 'in-progress')
|
||||
.length,
|
||||
inProgress: sampleTasks.tasks.filter(
|
||||
(t) => t.status === 'in-progress'
|
||||
).length,
|
||||
pending: sampleTasks.tasks.filter((t) => t.status === 'pending')
|
||||
.length
|
||||
pending: tasksData.filter((t) => t.status === 'pending').length
|
||||
}
|
||||
},
|
||||
fromCache: false
|
||||
@@ -258,16 +295,14 @@ describe('MCP Server Direct Functions', () => {
|
||||
|
||||
// Status filter case
|
||||
if (args.status) {
|
||||
const filteredTasks = sampleTasks.tasks.filter(
|
||||
(t) => t.status === args.status
|
||||
);
|
||||
const filteredTasks = tasksData.filter((t) => t.status === args.status);
|
||||
return {
|
||||
success: true,
|
||||
data: {
|
||||
tasks: filteredTasks,
|
||||
filter: args.status,
|
||||
stats: {
|
||||
total: sampleTasks.tasks.length,
|
||||
total: tasksData.length,
|
||||
filtered: filteredTasks.length
|
||||
}
|
||||
},
|
||||
@@ -280,10 +315,10 @@ describe('MCP Server Direct Functions', () => {
|
||||
return {
|
||||
success: true,
|
||||
data: {
|
||||
tasks: sampleTasks.tasks,
|
||||
tasks: tasksData,
|
||||
includeSubtasks: true,
|
||||
stats: {
|
||||
total: sampleTasks.tasks.length
|
||||
total: tasksData.length
|
||||
}
|
||||
},
|
||||
fromCache: false
|
||||
@@ -370,6 +405,29 @@ describe('MCP Server Direct Functions', () => {
|
||||
expect(result.error.code).toBe('FILE_NOT_FOUND_ERROR');
|
||||
expect(mockLogger.error).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('should include complexity scores when complexity report exists', async () => {
|
||||
// Arrange
|
||||
mockReadComplexityReport.mockReturnValueOnce(mockComplexityReport);
|
||||
const args = {
|
||||
projectRoot: testProjectRoot,
|
||||
file: testTasksPath,
|
||||
withSubtasks: true
|
||||
};
|
||||
|
||||
// Act
|
||||
const result = await testListTasks(args, mockLogger);
|
||||
// Assert
|
||||
expect(result.success).toBe(true);
|
||||
|
||||
// Check that tasks have complexity scores from the report
|
||||
mockComplexityReport.complexityAnalysis.forEach((analysis) => {
|
||||
const task = result.data.tasks.find((t) => t.id === analysis.taskId);
|
||||
if (task) {
|
||||
expect(task.complexityScore).toBe(analysis.complexityScore);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('expandTaskDirect', () => {
|
||||
|
||||
@@ -9,7 +9,7 @@ process.env.MODEL = 'sonar-pro';
|
||||
process.env.MAX_TOKENS = '64000';
|
||||
process.env.TEMPERATURE = '0.2';
|
||||
process.env.DEBUG = 'false';
|
||||
process.env.LOG_LEVEL = 'error'; // Set to error to reduce noise in tests
|
||||
process.env.TASKMASTER_LOG_LEVEL = 'error'; // Set to error to reduce noise in tests
|
||||
process.env.DEFAULT_SUBTASKS = '5';
|
||||
process.env.DEFAULT_PRIORITY = 'medium';
|
||||
process.env.PROJECT_NAME = 'Test Project';
|
||||
|
||||
@@ -8,6 +8,31 @@ const mockGetResearchModelId = jest.fn();
|
||||
const mockGetFallbackProvider = jest.fn();
|
||||
const mockGetFallbackModelId = jest.fn();
|
||||
const mockGetParametersForRole = jest.fn();
|
||||
const mockGetUserId = jest.fn();
|
||||
const mockGetDebugFlag = jest.fn();
|
||||
|
||||
// --- Mock MODEL_MAP Data ---
|
||||
// Provide a simplified structure sufficient for cost calculation tests
|
||||
const mockModelMap = {
|
||||
anthropic: [
|
||||
{
|
||||
id: 'test-main-model',
|
||||
cost_per_1m_tokens: { input: 3, output: 15, currency: 'USD' }
|
||||
},
|
||||
{
|
||||
id: 'test-fallback-model',
|
||||
cost_per_1m_tokens: { input: 3, output: 15, currency: 'USD' }
|
||||
}
|
||||
],
|
||||
perplexity: [
|
||||
{
|
||||
id: 'test-research-model',
|
||||
cost_per_1m_tokens: { input: 1, output: 1, currency: 'USD' }
|
||||
}
|
||||
]
|
||||
// Add other providers/models if needed for specific tests
|
||||
};
|
||||
const mockGetBaseUrlForRole = jest.fn();
|
||||
|
||||
jest.unstable_mockModule('../../scripts/modules/config-manager.js', () => ({
|
||||
getMainProvider: mockGetMainProvider,
|
||||
@@ -16,7 +41,11 @@ jest.unstable_mockModule('../../scripts/modules/config-manager.js', () => ({
|
||||
getResearchModelId: mockGetResearchModelId,
|
||||
getFallbackProvider: mockGetFallbackProvider,
|
||||
getFallbackModelId: mockGetFallbackModelId,
|
||||
getParametersForRole: mockGetParametersForRole
|
||||
getParametersForRole: mockGetParametersForRole,
|
||||
getUserId: mockGetUserId,
|
||||
getDebugFlag: mockGetDebugFlag,
|
||||
MODEL_MAP: mockModelMap,
|
||||
getBaseUrlForRole: mockGetBaseUrlForRole
|
||||
}));
|
||||
|
||||
// Mock AI Provider Modules
|
||||
@@ -44,10 +73,15 @@ jest.unstable_mockModule('../../src/ai-providers/perplexity.js', () => ({
|
||||
const mockLog = jest.fn();
|
||||
const mockResolveEnvVariable = jest.fn();
|
||||
const mockFindProjectRoot = jest.fn();
|
||||
const mockIsSilentMode = jest.fn();
|
||||
const mockLogAiUsage = jest.fn();
|
||||
|
||||
jest.unstable_mockModule('../../scripts/modules/utils.js', () => ({
|
||||
log: mockLog,
|
||||
resolveEnvVariable: mockResolveEnvVariable,
|
||||
findProjectRoot: mockFindProjectRoot
|
||||
findProjectRoot: mockFindProjectRoot,
|
||||
isSilentMode: mockIsSilentMode,
|
||||
logAiUsage: mockLogAiUsage
|
||||
}));
|
||||
|
||||
// Import the module to test (AFTER mocks)
|
||||
@@ -83,11 +117,16 @@ describe('Unified AI Services', () => {
|
||||
|
||||
// Set a default behavior for the new mock
|
||||
mockFindProjectRoot.mockReturnValue(fakeProjectRoot);
|
||||
mockGetDebugFlag.mockReturnValue(false);
|
||||
mockGetUserId.mockReturnValue('test-user-id'); // Add default mock for getUserId
|
||||
});
|
||||
|
||||
describe('generateTextService', () => {
|
||||
test('should use main provider/model and succeed', async () => {
|
||||
mockGenerateAnthropicText.mockResolvedValue('Main provider response');
|
||||
mockGenerateAnthropicText.mockResolvedValue({
|
||||
text: 'Main provider response',
|
||||
usage: { inputTokens: 10, outputTokens: 20, totalTokens: 30 }
|
||||
});
|
||||
|
||||
const params = {
|
||||
role: 'main',
|
||||
@@ -97,7 +136,8 @@ describe('Unified AI Services', () => {
|
||||
};
|
||||
const result = await generateTextService(params);
|
||||
|
||||
expect(result).toBe('Main provider response');
|
||||
expect(result.mainResult).toBe('Main provider response');
|
||||
expect(result).toHaveProperty('telemetryData');
|
||||
expect(mockGetMainProvider).toHaveBeenCalledWith(fakeProjectRoot);
|
||||
expect(mockGetMainModelId).toHaveBeenCalledWith(fakeProjectRoot);
|
||||
expect(mockGetParametersForRole).toHaveBeenCalledWith(
|
||||
@@ -127,7 +167,10 @@ describe('Unified AI Services', () => {
|
||||
const mainError = new Error('Main provider failed');
|
||||
mockGenerateAnthropicText
|
||||
.mockRejectedValueOnce(mainError)
|
||||
.mockResolvedValueOnce('Fallback provider response');
|
||||
.mockResolvedValueOnce({
|
||||
text: 'Fallback provider response',
|
||||
usage: { inputTokens: 15, outputTokens: 25, totalTokens: 40 }
|
||||
});
|
||||
|
||||
const explicitRoot = '/explicit/test/root';
|
||||
const params = {
|
||||
@@ -137,7 +180,8 @@ describe('Unified AI Services', () => {
|
||||
};
|
||||
const result = await generateTextService(params);
|
||||
|
||||
expect(result).toBe('Fallback provider response');
|
||||
expect(result.mainResult).toBe('Fallback provider response');
|
||||
expect(result).toHaveProperty('telemetryData');
|
||||
expect(mockGetMainProvider).toHaveBeenCalledWith(explicitRoot);
|
||||
expect(mockGetFallbackProvider).toHaveBeenCalledWith(explicitRoot);
|
||||
expect(mockGetParametersForRole).toHaveBeenCalledWith(
|
||||
@@ -173,14 +217,16 @@ describe('Unified AI Services', () => {
|
||||
mockGenerateAnthropicText
|
||||
.mockRejectedValueOnce(mainError)
|
||||
.mockRejectedValueOnce(fallbackError);
|
||||
mockGeneratePerplexityText.mockResolvedValue(
|
||||
'Research provider response'
|
||||
);
|
||||
mockGeneratePerplexityText.mockResolvedValue({
|
||||
text: 'Research provider response',
|
||||
usage: { inputTokens: 20, outputTokens: 30, totalTokens: 50 }
|
||||
});
|
||||
|
||||
const params = { role: 'main', prompt: 'Research fallback test' };
|
||||
const result = await generateTextService(params);
|
||||
|
||||
expect(result).toBe('Research provider response');
|
||||
expect(result.mainResult).toBe('Research provider response');
|
||||
expect(result).toHaveProperty('telemetryData');
|
||||
expect(mockGetMainProvider).toHaveBeenCalledWith(fakeProjectRoot);
|
||||
expect(mockGetFallbackProvider).toHaveBeenCalledWith(fakeProjectRoot);
|
||||
expect(mockGetResearchProvider).toHaveBeenCalledWith(fakeProjectRoot);
|
||||
@@ -247,22 +293,32 @@ describe('Unified AI Services', () => {
|
||||
const retryableError = new Error('Rate limit');
|
||||
mockGenerateAnthropicText
|
||||
.mockRejectedValueOnce(retryableError) // Fails once
|
||||
.mockResolvedValue('Success after retry'); // Succeeds on retry
|
||||
.mockResolvedValueOnce({
|
||||
// Succeeds on retry
|
||||
text: 'Success after retry',
|
||||
usage: { inputTokens: 5, outputTokens: 10, totalTokens: 15 }
|
||||
});
|
||||
|
||||
const params = { role: 'main', prompt: 'Retry success test' };
|
||||
const result = await generateTextService(params);
|
||||
|
||||
expect(result).toBe('Success after retry');
|
||||
expect(result.mainResult).toBe('Success after retry');
|
||||
expect(result).toHaveProperty('telemetryData');
|
||||
expect(mockGenerateAnthropicText).toHaveBeenCalledTimes(2); // Initial + 1 retry
|
||||
expect(mockLog).toHaveBeenCalledWith(
|
||||
'info',
|
||||
expect.stringContaining('Retryable error detected. Retrying')
|
||||
expect.stringContaining(
|
||||
'Something went wrong on the provider side. Retrying'
|
||||
)
|
||||
);
|
||||
});
|
||||
|
||||
test('should use default project root or handle null if findProjectRoot returns null', async () => {
|
||||
mockFindProjectRoot.mockReturnValue(null); // Simulate not finding root
|
||||
mockGenerateAnthropicText.mockResolvedValue('Response with no root');
|
||||
mockGenerateAnthropicText.mockResolvedValue({
|
||||
text: 'Response with no root',
|
||||
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 }
|
||||
});
|
||||
|
||||
const params = { role: 'main', prompt: 'No root test' }; // No explicit root passed
|
||||
await generateTextService(params);
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
* Task finder tests
|
||||
*/
|
||||
|
||||
// Import after mocks are set up - No mocks needed for readComplexityReport anymore
|
||||
import { findTaskById } from '../../scripts/modules/utils.js';
|
||||
import { sampleTasks, emptySampleTasks } from '../fixtures/sample-tasks.js';
|
||||
import { emptySampleTasks, sampleTasks } from '../fixtures/sample-tasks.js';
|
||||
|
||||
describe('Task Finder', () => {
|
||||
describe('findTaskById function', () => {
|
||||
@@ -55,5 +56,62 @@ describe('Task Finder', () => {
|
||||
expect(result.task).toBeNull();
|
||||
expect(result.originalSubtaskCount).toBeNull();
|
||||
});
|
||||
test('should work correctly when no complexity report is provided', () => {
|
||||
// Pass null as the complexity report
|
||||
const result = findTaskById(sampleTasks.tasks, 2, null);
|
||||
|
||||
expect(result.task).toBeDefined();
|
||||
expect(result.task.id).toBe(2);
|
||||
expect(result.task.complexityScore).toBeUndefined();
|
||||
});
|
||||
test('should work correctly when task has no complexity data in the provided report', () => {
|
||||
// Define a complexity report that doesn't include task 2
|
||||
const complexityReport = {
|
||||
complexityAnalysis: [{ taskId: 999, complexityScore: 5 }]
|
||||
};
|
||||
|
||||
const result = findTaskById(sampleTasks.tasks, 2, complexityReport);
|
||||
|
||||
expect(result.task).toBeDefined();
|
||||
expect(result.task.id).toBe(2);
|
||||
expect(result.task.complexityScore).toBeUndefined();
|
||||
});
|
||||
|
||||
test('should include complexity score when report is provided', () => {
|
||||
// Define the complexity report for this test
|
||||
const complexityReport = {
|
||||
meta: {
|
||||
generatedAt: '2023-01-01T00:00:00.000Z',
|
||||
tasksAnalyzed: 3,
|
||||
thresholdScore: 5
|
||||
},
|
||||
complexityAnalysis: [
|
||||
{
|
||||
taskId: 1,
|
||||
taskTitle: 'Initialize Project',
|
||||
complexityScore: 3,
|
||||
recommendedSubtasks: 2
|
||||
},
|
||||
{
|
||||
taskId: 2,
|
||||
taskTitle: 'Create Core Functionality',
|
||||
complexityScore: 8,
|
||||
recommendedSubtasks: 5
|
||||
},
|
||||
{
|
||||
taskId: 3,
|
||||
taskTitle: 'Implement UI Components',
|
||||
complexityScore: 6,
|
||||
recommendedSubtasks: 4
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
const result = findTaskById(sampleTasks.tasks, 2, complexityReport);
|
||||
|
||||
expect(result.task).toBeDefined();
|
||||
expect(result.task.id).toBe(2);
|
||||
expect(result.task.complexityScore).toBe(8);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -199,6 +199,12 @@ const testSetTaskStatus = (tasksData, taskIdInput, newStatus) => {
|
||||
|
||||
// Simplified version of updateSingleTaskStatus for testing
|
||||
const testUpdateSingleTaskStatus = (tasksData, taskIdInput, newStatus) => {
|
||||
if (!isValidTaskStatus(newStatus)) {
|
||||
throw new Error(
|
||||
`Error: Invalid status value: ${newStatus}. Use one of: ${TASK_STATUS_OPTIONS.join(', ')}`
|
||||
);
|
||||
}
|
||||
|
||||
// Check if it's a subtask (e.g., "1.2")
|
||||
if (taskIdInput.includes('.')) {
|
||||
const [parentId, subtaskId] = taskIdInput
|
||||
@@ -329,6 +335,10 @@ const testAddTask = (
|
||||
import * as taskManager from '../../scripts/modules/task-manager.js';
|
||||
import { sampleClaudeResponse } from '../fixtures/sample-claude-response.js';
|
||||
import { sampleTasks, emptySampleTasks } from '../fixtures/sample-tasks.js';
|
||||
import {
|
||||
isValidTaskStatus,
|
||||
TASK_STATUS_OPTIONS
|
||||
} from '../../src/constants/task-status.js';
|
||||
|
||||
// Destructure the required functions for convenience
|
||||
const { findNextTask, generateTaskFiles, clearSubtasks, updateTaskById } =
|
||||
@@ -1165,6 +1175,16 @@ describe('Task Manager Module', () => {
|
||||
expect(testTasksData.tasks[1].status).toBe('done');
|
||||
});
|
||||
|
||||
test('should throw error for invalid status', async () => {
|
||||
// Arrange
|
||||
const testTasksData = JSON.parse(JSON.stringify(sampleTasks));
|
||||
|
||||
// Assert
|
||||
expect(() =>
|
||||
testUpdateSingleTaskStatus(testTasksData, '2', 'Done')
|
||||
).toThrow(/Error: Invalid status value: Done./);
|
||||
});
|
||||
|
||||
test('should update subtask status', async () => {
|
||||
// Arrange
|
||||
const testTasksData = JSON.parse(JSON.stringify(sampleTasks));
|
||||
|
||||
Reference in New Issue
Block a user