fix(e2e): further improves the end to end script to take into account the changes made for each AI provider as it now responds with an obejct not just the result straight up.

This commit is contained in:
Eyal Toledano
2025-05-14 19:04:47 -04:00
parent ca5ec03cd8
commit da636f6681
3 changed files with 222 additions and 119 deletions

View File

@@ -5,6 +5,32 @@
# It requires curl and jq to be installed. # It requires curl and jq to be installed.
# It expects the project root path to be passed as the second argument. # It expects the project root path to be passed as the second argument.
# --- New Function: extract_and_sum_cost ---
# Takes a string containing command output and the current total cost.
# Extracts costs (lines with "Cost: $X.YYYY USD" or "Total Cost: $X.YYYY USD")
# from the output, sums them, and adds to the current total.
# Returns the new total cost.
extract_and_sum_cost() {
local command_output="$1"
local current_total_cost="$2"
local extracted_cost_sum="0.0"
# Grep for lines containing "Cost: $" or "Total Cost: $", then extract the numeric value.
# Handles cases like "Cost: $0.001234 USD" or "Total Cost: $0.001234 USD"
# Accumulate all costs found in the command_output
while IFS= read -r line; do
# Extract the numeric part after '$' and before ' USD'
cost_value=$(echo "$line" | grep -o -E '(\$ ?[0-9]+\.[0-9]+)' | sed 's/\$ //g' | sed 's/\$//g')
if [[ -n "$cost_value" && "$cost_value" =~ ^[0-9]+\.[0-9]+$ ]]; then
extracted_cost_sum=$(echo "$extracted_cost_sum + $cost_value" | bc)
fi
done < <(echo "$command_output" | grep -E 'Cost: \$|Total Cost: \$')
new_total_cost=$(echo "$current_total_cost + $extracted_cost_sum" | bc)
echo "$new_total_cost"
}
export -f extract_and_sum_cost # Export for use in other scripts if sourced
analyze_log_with_llm() { analyze_log_with_llm() {
local log_file="$1" local log_file="$1"
local project_root="$2" # Expect project root as the second argument local project_root="$2" # Expect project root as the second argument
@@ -15,17 +41,17 @@ analyze_log_with_llm() {
fi fi
local env_file="${project_root}/.env" # Path to .env in project root local env_file="${project_root}/.env" # Path to .env in project root
local supported_models_file="${project_root}/scripts/modules/supported-models.json"
local provider_summary_log="provider_add_task_summary.log" # File summarizing provider test outcomes local provider_summary_log="provider_add_task_summary.log" # File summarizing provider test outcomes
local api_key="" local api_key=""
# !!! IMPORTANT: Replace with your actual Claude API endpoint if different !!!
local api_endpoint="https://api.anthropic.com/v1/messages" local api_endpoint="https://api.anthropic.com/v1/messages"
# !!! IMPORTANT: Ensure this matches the variable name in your .env file !!!
local api_key_name="ANTHROPIC_API_KEY" local api_key_name="ANTHROPIC_API_KEY"
local llm_analysis_model_id="claude-3-7-sonnet-20250219" # Model used for this analysis
local llm_analysis_provider="anthropic"
echo "" # Add a newline before analysis starts echo "" # Add a newline before analysis starts
# Check for jq and curl
if ! command -v jq &> /dev/null; then if ! command -v jq &> /dev/null; then
echo "[HELPER_ERROR] LLM Analysis requires 'jq'. Skipping analysis." >&2 echo "[HELPER_ERROR] LLM Analysis requires 'jq'. Skipping analysis." >&2
return 1 return 1
@@ -34,34 +60,31 @@ analyze_log_with_llm() {
echo "[HELPER_ERROR] LLM Analysis requires 'curl'. Skipping analysis." >&2 echo "[HELPER_ERROR] LLM Analysis requires 'curl'. Skipping analysis." >&2
return 1 return 1
fi fi
if ! command -v bc &> /dev/null; then
echo "[HELPER_ERROR] LLM Analysis requires 'bc' for cost calculation. Skipping analysis." >&2
return 1
fi
# Check for API Key in the PROJECT ROOT's .env file
if [ -f "$env_file" ]; then if [ -f "$env_file" ]; then
# Original assignment - Reading from project root .env
api_key=$(grep "^${api_key_name}=" "$env_file" | sed -e "s/^${api_key_name}=//" -e 's/^[[:space:]"]*//' -e 's/[[:space:]"]*$//') api_key=$(grep "^${api_key_name}=" "$env_file" | sed -e "s/^${api_key_name}=//" -e 's/^[[:space:]"]*//' -e 's/[[:space:]"]*$//')
fi fi
if [ -z "$api_key" ]; then if [ -z "$api_key" ]; then
echo "[HELPER_ERROR] ${api_key_name} not found or empty in project root .env file ($env_file). Skipping LLM analysis." >&2 # Updated error message echo "[HELPER_ERROR] ${api_key_name} not found or empty in project root .env file ($env_file). Skipping LLM analysis." >&2
return 1 return 1
fi fi
# Log file path is passed as argument, need to ensure it exists relative to where the script *calling* this function is, OR use absolute path.
# Assuming absolute path or path relative to the initial PWD for simplicity here.
# The calling script passes the correct path relative to the original PWD.
if [ ! -f "$log_file" ]; then if [ ! -f "$log_file" ]; then
echo "[HELPER_ERROR] Log file not found: $log_file (PWD: $(pwd)). Check path passed to function. Skipping LLM analysis." >&2 # Updated error echo "[HELPER_ERROR] Log file not found: $log_file (PWD: $(pwd)). Check path passed to function. Skipping LLM analysis." >&2
return 1 return 1
fi fi
local log_content local log_content
# Read entire file, handle potential errors
log_content=$(cat "$log_file") || { log_content=$(cat "$log_file") || {
echo "[HELPER_ERROR] Failed to read log file: $log_file. Skipping LLM analysis." >&2 echo "[HELPER_ERROR] Failed to read log file: $log_file. Skipping LLM analysis." >&2
return 1 return 1
} }
# Prepare the prompt using a quoted heredoc for literal interpretation
read -r -d '' prompt_template <<'EOF' read -r -d '' prompt_template <<'EOF'
Analyze the following E2E test log for the task-master tool. The log contains output from various 'task-master' commands executed sequentially. Analyze the following E2E test log for the task-master tool. The log contains output from various 'task-master' commands executed sequentially.
@@ -99,41 +122,34 @@ Here is the main log content:
%s %s
EOF EOF
# Note: The final %s is a placeholder for printf later
local full_prompt local full_prompt
# Use printf to substitute the log content into the %s placeholder
if ! printf -v full_prompt "$prompt_template" "$log_content"; then if ! printf -v full_prompt "$prompt_template" "$log_content"; then
echo "[HELPER_ERROR] Failed to format prompt using printf." >&2 echo "[HELPER_ERROR] Failed to format prompt using printf." >&2
# It's unlikely printf itself fails, but good practice
return 1 return 1
fi fi
# Construct the JSON payload for Claude Messages API
local payload local payload
payload=$(jq -n --arg prompt "$full_prompt" '{ payload=$(jq -n --arg prompt "$full_prompt" '{
"model": "claude-3-haiku-20240307", # Using Haiku for faster/cheaper testing "model": "'"$llm_analysis_model_id"'",
"max_tokens": 3072, # Increased slightly "max_tokens": 3072,
"messages": [ "messages": [
{"role": "user", "content": $prompt} {"role": "user", "content": $prompt}
] ]
# "temperature": 0.0 # Optional: Lower temperature for more deterministic JSON output
}') || { }') || {
echo "[HELPER_ERROR] Failed to create JSON payload using jq." >&2 echo "[HELPER_ERROR] Failed to create JSON payload using jq." >&2
return 1 return 1
} }
local response_raw response_http_code response_body local response_raw response_http_code response_body
# Capture body and HTTP status code separately
response_raw=$(curl -s -w "\nHTTP_STATUS_CODE:%{http_code}" -X POST "$api_endpoint" \ response_raw=$(curl -s -w "\nHTTP_STATUS_CODE:%{http_code}" -X POST "$api_endpoint" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-H "x-api-key: $api_key" \ -H "x-api-key: $api_key" \
-H "anthropic-version: 2023-06-01" \ -H "anthropic-version: 2023-06-01" \
--data "$payload") --data "$payload")
# Extract status code and body
response_http_code=$(echo "$response_raw" | grep '^HTTP_STATUS_CODE:' | sed 's/HTTP_STATUS_CODE://') response_http_code=$(echo "$response_raw" | grep '^HTTP_STATUS_CODE:' | sed 's/HTTP_STATUS_CODE://')
response_body=$(echo "$response_raw" | sed '$d') # Remove last line (status code) response_body=$(echo "$response_raw" | sed '$d')
if [ "$response_http_code" != "200" ]; then if [ "$response_http_code" != "200" ]; then
echo "[HELPER_ERROR] LLM API call failed with HTTP status $response_http_code." >&2 echo "[HELPER_ERROR] LLM API call failed with HTTP status $response_http_code." >&2
@@ -146,17 +162,41 @@ EOF
return 1 return 1
fi fi
# Pipe the raw response body directly to the Node.js parser script # Calculate cost of this LLM analysis call
local input_tokens output_tokens input_cost_per_1m output_cost_per_1m calculated_llm_cost
input_tokens=$(echo "$response_body" | jq -r '.usage.input_tokens // 0')
output_tokens=$(echo "$response_body" | jq -r '.usage.output_tokens // 0')
if [ -f "$supported_models_file" ]; then
model_cost_info=$(jq -r --arg provider "$llm_analysis_provider" --arg model_id "$llm_analysis_model_id" '
.[$provider][] | select(.id == $model_id) | .cost_per_1m_tokens
' "$supported_models_file")
if [[ -n "$model_cost_info" && "$model_cost_info" != "null" ]]; then
input_cost_per_1m=$(echo "$model_cost_info" | jq -r '.input // 0')
output_cost_per_1m=$(echo "$model_cost_info" | jq -r '.output // 0')
calculated_llm_cost=$(echo "($input_tokens / 1000000 * $input_cost_per_1m) + ($output_tokens / 1000000 * $output_cost_per_1m)" | bc -l)
# Format to 6 decimal places
formatted_llm_cost=$(printf "%.6f" "$calculated_llm_cost")
echo "LLM Analysis AI Cost: $formatted_llm_cost USD" # This line will be parsed by run_e2e.sh
else
echo "[HELPER_WARNING] Cost data for model $llm_analysis_model_id not found in $supported_models_file. LLM analysis cost not calculated."
fi
else
echo "[HELPER_WARNING] $supported_models_file not found. LLM analysis cost not calculated."
fi
# --- End cost calculation for this call ---
if echo "$response_body" | node "${project_root}/tests/e2e/parse_llm_output.cjs" "$log_file"; then if echo "$response_body" | node "${project_root}/tests/e2e/parse_llm_output.cjs" "$log_file"; then
echo "[HELPER_SUCCESS] LLM analysis parsed and printed successfully by Node.js script." echo "[HELPER_SUCCESS] LLM analysis parsed and printed successfully by Node.js script."
return 0 # Success return 0
else else
local node_exit_code=$? local node_exit_code=$?
echo "[HELPER_ERROR] Node.js parsing script failed with exit code ${node_exit_code}." echo "[HELPER_ERROR] Node.js parsing script failed with exit code ${node_exit_code}."
echo "[HELPER_ERROR] Raw API response body (first 500 chars): $(echo "$response_body" | head -c 500)" echo "[HELPER_ERROR] Raw API response body (first 500 chars): $(echo "$response_body" | head -c 500)"
return 1 # Failure return 1
fi fi
} }
# Export the function so it might be available to subshells if sourced
export -f analyze_log_with_llm export -f analyze_log_with_llm

View File

@@ -60,9 +60,52 @@ MAIN_ENV_FILE="$TASKMASTER_SOURCE_DIR/.env"
# --- # ---
# <<< Source the helper script >>> # <<< Source the helper script >>>
# shellcheck source=tests/e2e/e2e_helpers.sh
source "$TASKMASTER_SOURCE_DIR/tests/e2e/e2e_helpers.sh" source "$TASKMASTER_SOURCE_DIR/tests/e2e/e2e_helpers.sh"
# ==========================================
# >>> Global Helper Functions Defined in run_e2e.sh <<<
# --- Helper Functions (Define globally before export) ---
_format_duration() {
local total_seconds=$1
local minutes=$((total_seconds / 60))
local seconds=$((total_seconds % 60))
printf "%dm%02ds" "$minutes" "$seconds"
}
# Note: This relies on 'overall_start_time' being set globally before the function is called
_get_elapsed_time_for_log() {
local current_time
current_time=$(date +%s)
# Use overall_start_time here, as start_time_for_helpers might not be relevant globally
local elapsed_seconds
elapsed_seconds=$((current_time - overall_start_time))
_format_duration "$elapsed_seconds"
}
log_info() {
echo "[INFO] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
}
log_success() {
echo "[SUCCESS] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
}
log_error() {
echo "[ERROR] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2
}
log_step() {
test_step_count=$((test_step_count + 1))
echo ""
echo "============================================="
echo " STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
echo "============================================="
}
# ==========================================
# <<< Export helper functions for subshells >>> # <<< Export helper functions for subshells >>>
export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log extract_and_sum_cost
# --- Argument Parsing for Analysis-Only Mode --- # --- Argument Parsing for Analysis-Only Mode ---
# This remains the same, as it exits early if matched # This remains the same, as it exits early if matched
@@ -138,6 +181,7 @@ fi
# Note: These are mainly for step numbering within the log now, not for final summary # Note: These are mainly for step numbering within the log now, not for final summary
test_step_count=0 test_step_count=0
start_time_for_helpers=0 # Separate start time for helper functions inside the pipe start_time_for_helpers=0 # Separate start time for helper functions inside the pipe
total_e2e_cost="0.0" # Initialize total E2E cost
# --- # ---
# --- Log File Setup --- # --- Log File Setup ---
@@ -220,12 +264,16 @@ log_step() {
fi fi
# --- Dependency Checks --- # --- Dependency Checks ---
log_step "Checking for dependencies (jq)" log_step "Checking for dependencies (jq, bc)"
if ! command -v jq &> /dev/null; then if ! command -v jq &> /dev/null; then
log_error "Dependency 'jq' is not installed or not found in PATH. Please install jq (e.g., 'brew install jq' or 'sudo apt-get install jq')." log_error "Dependency 'jq' is not installed or not found in PATH. Please install jq (e.g., 'brew install jq' or 'sudo apt-get install jq')."
exit 1 exit 1
fi fi
log_success "Dependency 'jq' found." if ! command -v bc &> /dev/null; then
log_error "Dependency 'bc' not installed (for cost calculation). Please install bc (e.g., 'brew install bc' or 'sudo apt-get install bc')."
exit 1
fi
log_success "Dependencies 'jq' and 'bc' found."
# --- Test Setup (Output to tee) --- # --- Test Setup (Output to tee) ---
log_step "Setting up test environment" log_step "Setting up test environment"
@@ -393,7 +441,7 @@ log_step() {
declare -a models=( declare -a models=(
"claude-3-7-sonnet-20250219" "claude-3-7-sonnet-20250219"
"gpt-4o" "gpt-4o"
"gemini-2.5-pro-exp-03-25" "gemini-2.5-pro-preview-05-06"
"sonar-pro" # Note: This is research-only, add-task might fail if not using research model "sonar-pro" # Note: This is research-only, add-task might fail if not using research model
"grok-3" "grok-3"
"anthropic/claude-3.7-sonnet" # OpenRouter uses Claude 3.7 "anthropic/claude-3.7-sonnet" # OpenRouter uses Claude 3.7
@@ -435,9 +483,8 @@ log_step() {
# 3. Check for success and extract task ID # 3. Check for success and extract task ID
new_task_id="" new_task_id=""
if [ $add_task_exit_code -eq 0 ] && echo "$add_task_cmd_output" | grep -q "✓ Added new task #"; then if [ $add_task_exit_code -eq 0 ] && (echo "$add_task_cmd_output" | grep -q "✓ Added new task #" || echo "$add_task_cmd_output" | grep -q "✅ New task created successfully:" || echo "$add_task_cmd_output" | grep -q "Task [0-9]\+ Created Successfully"); then
# Attempt to extract the ID (adjust grep/sed/awk as needed based on actual output format) new_task_id=$(echo "$add_task_cmd_output" | grep -o -E "(Task |#)[0-9.]+" | grep -o -E "[0-9.]+" | head -n 1)
new_task_id=$(echo "$add_task_cmd_output" | grep "✓ Added new task #" | sed 's/.*✓ Added new task #\([0-9.]\+\).*/\1/')
if [ -n "$new_task_id" ]; then if [ -n "$new_task_id" ]; then
log_success "Add-task succeeded for $provider. New task ID: $new_task_id" log_success "Add-task succeeded for $provider. New task ID: $new_task_id"
echo "Provider $provider add-task SUCCESS (ID: $new_task_id)" >> provider_add_task_summary.log echo "Provider $provider add-task SUCCESS (ID: $new_task_id)" >> provider_add_task_summary.log
@@ -775,4 +822,8 @@ else
echo "[ERROR] [$formatted_duration_for_error] $(date +"%Y-%m-%d %H:%M:%S") Test run directory $TEST_RUN_DIR not found. Cannot perform LLM analysis." >&2 echo "[ERROR] [$formatted_duration_for_error] $(date +"%Y-%m-%d %H:%M:%S") Test run directory $TEST_RUN_DIR not found. Cannot perform LLM analysis." >&2
fi fi
# Final cost formatting
formatted_total_e2e_cost=$(printf "%.6f" "$total_e2e_cost")
echo "Total E2E AI Cost: $formatted_total_e2e_cost USD"
exit $EXIT_CODE exit $EXIT_CODE

View File

@@ -18,6 +18,25 @@ set -o pipefail
# --- Embedded Helper Functions --- # --- Embedded Helper Functions ---
# Copied from e2e_helpers.sh to make this script standalone # Copied from e2e_helpers.sh to make this script standalone
# OR source it if preferred and path is reliable
# <<< Determine SCRIPT_DIR and PROJECT_ROOT_DIR early >>>
SCRIPT_DIR_FV="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
PROJECT_ROOT_DIR_FV="$( cd "$SCRIPT_DIR_FV/../.." &> /dev/null && pwd )" # Assumes script is in tests/e2e/
# --- Try to Source e2e_helpers.sh ---
E2E_HELPERS_PATH_FV="${PROJECT_ROOT_DIR_FV}/tests/e2e/e2e_helpers.sh"
if [ -f "$E2E_HELPERS_PATH_FV" ]; then
# shellcheck source=tests/e2e/e2e_helpers.sh
source "$E2E_HELPERS_PATH_FV"
echo "[INFO FV] Sourced e2e_helpers.sh successfully."
else
echo "[ERROR FV] e2e_helpers.sh not found at $E2E_HELPERS_PATH_FV. Cost extraction will fail."
# Define a placeholder if not found, so the script doesn't break immediately,
# but cost extraction will effectively be a no-op.
extract_and_sum_cost() { echo "$2"; } # Returns current total, effectively adding 0
fi
_format_duration() { _format_duration() {
local total_seconds=$1 local total_seconds=$1
@@ -27,127 +46,112 @@ _format_duration() {
} }
_get_elapsed_time_for_log() { _get_elapsed_time_for_log() {
# Needs overall_start_time defined in the main script body local current_time
local current_time=$(date +%s) current_time=$(date +%s)
local elapsed_seconds=$((current_time - overall_start_time)) local elapsed_seconds
elapsed_seconds=$((current_time - overall_start_time)) # Needs overall_start_time
_format_duration "$elapsed_seconds" _format_duration "$elapsed_seconds"
} }
log_info() { log_info() {
echo "[INFO] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" echo "[INFO FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
} }
log_success() { log_success() {
echo "[SUCCESS] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" echo "[SUCCESS FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
} }
log_error() { log_error() {
echo "[ERROR] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2 echo "[ERROR FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2
} }
log_step() { log_step() {
# Needs test_step_count defined and incremented in the main script body test_step_count=$((test_step_count + 1)) # Needs test_step_count
test_step_count=$((test_step_count + 1))
echo "" echo ""
echo "=============================================" echo "============================================="
echo " STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" echo " FV STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
echo "=============================================" echo "============================================="
} }
# --- Signal Handling --- # --- Signal Handling ---
# Global variable to hold child PID
child_pid=0 child_pid=0
# Use a persistent log file name PROGRESS_LOG_FILE="fallback_verification_progress.log" # Stays in run dir
PROGRESS_LOG_FILE="fallback_verification_progress.log"
cleanup() { cleanup() {
echo "" # Newline after ^C echo ""
log_error "Interrupt received. Cleaning up any running child process..." log_error "Interrupt received. Cleaning up any running child process..."
if [ "$child_pid" -ne 0 ]; then if [ "$child_pid" -ne 0 ]; then
log_info "Killing child process (PID: $child_pid) and its group..." log_info "Killing child process (PID: $child_pid) and its group..."
kill -TERM -- "-$child_pid" 2>/dev/null || kill -KILL -- "-$child_pid" 2>/dev/null kill -TERM -- "-$child_pid" 2>/dev/null || kill -KILL -- "-$child_pid" 2>/dev/null
child_pid=0 child_pid=0
fi fi
# DO NOT delete the progress log file on interrupt
log_info "Progress saved in: $PROGRESS_LOG_FILE" log_info "Progress saved in: $PROGRESS_LOG_FILE"
exit 130 # Exit with code indicating interrupt # Print current total cost on interrupt
if [[ -n "${total_fallback_cost+x}" && "$total_fallback_cost" != "0.0" ]]; then # Check if var is set and not initial
log_info "Current Total Fallback AI Cost at interruption: $total_fallback_cost USD"
fi
exit 130
} }
# Trap SIGINT (Ctrl+C) and SIGTERM
trap cleanup INT TERM trap cleanup INT TERM
# --- Configuration --- # --- Configuration ---
# Determine the project root relative to this script's location # SCRIPT_DIR and PROJECT_ROOT_DIR already defined above
# Use a robust method to find the script's own directory SUPPORTED_MODELS_FILE="$PROJECT_ROOT_DIR_FV/scripts/modules/supported-models.json"
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" BASE_RUNS_DIR="$PROJECT_ROOT_DIR_FV/tests/e2e/_runs"
# Assumes this script is in tests/e2e/
PROJECT_ROOT_DIR="$( cd "$SCRIPT_DIR/../.." &> /dev/null && pwd )"
SUPPORTED_MODELS_FILE="$PROJECT_ROOT_DIR/scripts/modules/supported-models.json"
BASE_RUNS_DIR="$PROJECT_ROOT_DIR/tests/e2e/_runs"
# --- Determine Target Run Directory --- # --- Determine Target Run Directory ---
TARGET_RUN_DIR="" TARGET_RUN_DIR=""
if [ "$#" -ge 1 ] && [ -n "$1" ]; then if [ "$#" -ge 1 ] && [ -n "$1" ]; then
# Use provided argument if it exists
TARGET_RUN_DIR="$1" TARGET_RUN_DIR="$1"
# Make path absolute if it's relative
if [[ "$TARGET_RUN_DIR" != /* ]]; then if [[ "$TARGET_RUN_DIR" != /* ]]; then
TARGET_RUN_DIR="$(pwd)/$TARGET_RUN_DIR" TARGET_RUN_DIR="$(pwd)/$TARGET_RUN_DIR"
fi fi
echo "[INFO] Using provided target run directory: $TARGET_RUN_DIR" echo "[INFO FV] Using provided target run directory: $TARGET_RUN_DIR"
else else
# Find the latest run directory echo "[INFO FV] No run directory provided, finding latest in $BASE_RUNS_DIR..."
echo "[INFO] No run directory provided, finding latest in $BASE_RUNS_DIR..."
TARGET_RUN_DIR=$(ls -td "$BASE_RUNS_DIR"/run_* 2>/dev/null | head -n 1) TARGET_RUN_DIR=$(ls -td "$BASE_RUNS_DIR"/run_* 2>/dev/null | head -n 1)
if [ -z "$TARGET_RUN_DIR" ]; then if [ -z "$TARGET_RUN_DIR" ]; then
echo "[ERROR] No run directories found matching 'run_*' in $BASE_RUNS_DIR. Cannot proceed." >&2 echo "[ERROR FV] No run directories found matching 'run_*' in $BASE_RUNS_DIR. Cannot proceed." >&2
exit 1 exit 1
fi fi
echo "[INFO] Found latest run directory: $TARGET_RUN_DIR" echo "[INFO FV] Found latest run directory: $TARGET_RUN_DIR"
fi fi
# Validate the target directory
if [ ! -d "$TARGET_RUN_DIR" ]; then if [ ! -d "$TARGET_RUN_DIR" ]; then
echo "[ERROR] Target run directory not found or is not a directory: $TARGET_RUN_DIR" >&2 echo "[ERROR FV] Target run directory not found or is not a directory: $TARGET_RUN_DIR" >&2
exit 1 exit 1
fi fi
# --- Change to Target Directory --- echo "[INFO FV] Changing working directory to: $TARGET_RUN_DIR"
echo "[INFO] Changing working directory to: $TARGET_RUN_DIR"
if ! cd "$TARGET_RUN_DIR"; then if ! cd "$TARGET_RUN_DIR"; then
echo "[ERROR] Failed to cd into target directory: $TARGET_RUN_DIR" >&2 echo "[ERROR FV] Failed to cd into target directory: $TARGET_RUN_DIR" >&2
exit 1 exit 1
fi fi
echo "[INFO] Now operating inside: $(pwd)" echo "[INFO FV] Now operating inside: $(pwd)"
overall_start_time=$(date +%s) # Initialize for logging helpers
test_step_count=0 # Initialize for logging helpers
total_fallback_cost="0.0" # Initialize total cost for this script
# --- Now we are inside the target run directory ---
overall_start_time=$(date +%s)
test_step_count=0
log_info "Starting fallback verification script execution in $(pwd)" log_info "Starting fallback verification script execution in $(pwd)"
log_info "Progress will be logged to: $(pwd)/$PROGRESS_LOG_FILE" log_info "Progress will be logged to: $(pwd)/$PROGRESS_LOG_FILE"
# --- Dependency Checks --- log_step "Checking for dependencies (jq, bc) in verification script"
log_step "Checking for dependencies (jq) in verification script" if ! command -v jq &> /dev/null; then log_error "Dependency 'jq' not installed."; exit 1; fi
if ! command -v jq &> /dev/null; then if ! command -v bc &> /dev/null; then log_error "Dependency 'bc' not installed (for cost calculation)."; exit 1; fi
log_error "Dependency 'jq' is not installed or not found in PATH." log_success "Dependencies 'jq' and 'bc' found."
exit 1
fi
log_success "Dependency 'jq' found."
# --- Verification Logic ---
log_step "Starting/Resuming Fallback Model (generateObjectService) Verification" log_step "Starting/Resuming Fallback Model (generateObjectService) Verification"
# Ensure progress log exists, create if not
touch "$PROGRESS_LOG_FILE" touch "$PROGRESS_LOG_FILE"
# Ensure the supported models file exists (using absolute path)
if [ ! -f "$SUPPORTED_MODELS_FILE" ]; then if [ ! -f "$SUPPORTED_MODELS_FILE" ]; then
log_error "supported-models.json not found at absolute path: $SUPPORTED_MODELS_FILE." log_error "supported-models.json not found at: $SUPPORTED_MODELS_FILE."
exit 1 exit 1
fi fi
log_info "Using supported models file: $SUPPORTED_MODELS_FILE" log_info "Using supported models file: $SUPPORTED_MODELS_FILE"
# Ensure subtask 1.1 exists (basic check, main script should guarantee)
# Check for tasks.json in the current directory (which is now the run dir)
if [ ! -f "tasks/tasks.json" ]; then if [ ! -f "tasks/tasks.json" ]; then
log_error "tasks/tasks.json not found in current directory ($(pwd)). Was this run directory properly initialized?" log_error "tasks/tasks.json not found in current directory ($(pwd)). Was this run directory properly initialized?"
exit 1 exit 1
@@ -158,78 +162,90 @@ if ! jq -e '.tasks[] | select(.id == 1) | .subtasks[] | select(.id == 1)' tasks/
fi fi
log_info "Subtask 1.1 found in $(pwd)/tasks/tasks.json, proceeding with verification." log_info "Subtask 1.1 found in $(pwd)/tasks/tasks.json, proceeding with verification."
# Read providers and models using jq
jq -c 'to_entries[] | .key as $provider | .value[] | select(.allowed_roles[]? == "fallback") | {provider: $provider, id: .id}' "$SUPPORTED_MODELS_FILE" | while IFS= read -r model_info; do jq -c 'to_entries[] | .key as $provider | .value[] | select(.allowed_roles[]? == "fallback") | {provider: $provider, id: .id}' "$SUPPORTED_MODELS_FILE" | while IFS= read -r model_info; do
provider=$(echo "$model_info" | jq -r '.provider') provider=$(echo "$model_info" | jq -r '.provider')
model_id=$(echo "$model_info" | jq -r '.id') model_id=$(echo "$model_info" | jq -r '.id')
flag="" # Default flag flag=""
# Check if already tested
# Use grep -Fq for fixed string and quiet mode
if grep -Fq "${provider},${model_id}," "$PROGRESS_LOG_FILE"; then if grep -Fq "${provider},${model_id}," "$PROGRESS_LOG_FILE"; then
log_info "--- Skipping: $provider / $model_id (already tested, result in $PROGRESS_LOG_FILE) ---" log_info "--- Skipping: $provider / $model_id (already tested, result in $PROGRESS_LOG_FILE) ---"
# Still need to sum up its cost if it was successful before
previous_test_output=$(grep -F "${provider},${model_id}," "$PROGRESS_LOG_FILE" | head -n 1)
# Assuming the output file for successful test exists and contains cost
prev_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log"
if [[ "$previous_test_output" == *",SUCCESS"* && -f "$prev_output_file" ]]; then
# shellcheck disable=SC2154 # overall_start_time is set
log_info "Summing cost from previous successful test of $provider / $model_id from $prev_output_file"
# shellcheck disable=SC2154 # total_fallback_cost is set
total_fallback_cost=$(extract_and_sum_cost "$(cat "$prev_output_file")" "$total_fallback_cost")
log_info "Cumulative fallback AI cost after previous $provider / $model_id: $total_fallback_cost USD"
fi
continue continue
fi fi
log_info "--- Verifying: $provider / $model_id ---" log_info "--- Verifying: $provider / $model_id ---"
# Determine provider flag if [ "$provider" == "openrouter" ]; then flag="--openrouter"; fi
if [ "$provider" == "openrouter" ]; then if [ "$provider" == "ollama" ]; then flag="--ollama"; fi
flag="--openrouter"
elif [ "$provider" == "ollama" ]; then
flag="--ollama"
fi
# 1. Set the main model
if ! command -v task-master &> /dev/null; then if ! command -v task-master &> /dev/null; then
log_error "task-master command not found." log_error "task-master command not found."
echo "[INSTRUCTION] Please run 'npm link task-master-ai' in the project root first." echo "[INSTRUCTION FV] Please run 'npm link task-master-ai' in the project root first."
exit 1 exit 1
fi fi
log_info "Setting main model to $model_id ${flag:+using flag $flag}..." log_info "Setting main model to $model_id ${flag:+using flag $flag}..."
set_model_cmd="task-master models --set-main \"$model_id\" $flag" set_model_cmd="task-master models --set-main \"$model_id\" $flag"
model_set_status="SUCCESS" if ! eval "$set_model_cmd" > /dev/null 2>&1; then
if ! eval $set_model_cmd > /dev/null 2>&1; then
log_error "Failed to set main model for $provider / $model_id. Skipping test." log_error "Failed to set main model for $provider / $model_id. Skipping test."
echo "$provider,$model_id,SET_MODEL_FAILED" >> "$PROGRESS_LOG_FILE" echo "$provider,$model_id,SET_MODEL_FAILED" >> "$PROGRESS_LOG_FILE"
continue # Skip the actual test if setting fails continue
fi fi
log_info "Set main model ok." log_info "Set main model ok."
# 2. Run update-subtask
log_info "Running update-subtask --id=1.1 --prompt='Test generateObjectService' (timeout 120s)" log_info "Running update-subtask --id=1.1 --prompt='Test generateObjectService' (timeout 120s)"
update_subtask_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log" update_subtask_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log"
timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." > "$update_subtask_output_file" 2>&1 & # Capture output to a variable AND a file
update_subtask_command_output=""
timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." 2>&1 | tee "$update_subtask_output_file" &
# Store the command output in a variable simultaneously
# update_subtask_command_output=$(timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." 2>&1)
# The above direct capture won't work well with tee and backgrounding. Instead, read the file after command completion.
child_pid=$! child_pid=$!
wait "$child_pid" wait "$child_pid"
update_subtask_exit_code=$? update_subtask_exit_code=$?
child_pid=0 child_pid=0
# 3. Check result and log persistently # Read output from file for cost extraction
if [ -f "$update_subtask_output_file" ]; then
update_subtask_command_output=$(cat "$update_subtask_output_file")
else
update_subtask_command_output="" # Ensure it's defined
fi
result_status="" result_status=""
if [ $update_subtask_exit_code -eq 0 ] && grep -q "Successfully updated subtask #1.1" "$update_subtask_output_file"; then if [ $update_subtask_exit_code -eq 0 ] && echo "$update_subtask_command_output" | grep -q "Successfully updated subtask #1.1"; then
log_success "update-subtask succeeded for $provider / $model_id (Verified Output)." log_success "update-subtask succeeded for $provider / $model_id (Verified Output)."
result_status="SUCCESS" result_status="SUCCESS"
# Extract and sum cost if successful
# shellcheck disable=SC2154 # total_fallback_cost is set
total_fallback_cost=$(extract_and_sum_cost "$update_subtask_command_output" "$total_fallback_cost")
log_info "Cumulative fallback AI cost after $provider / $model_id: $total_fallback_cost USD"
elif [ $update_subtask_exit_code -eq 124 ]; then elif [ $update_subtask_exit_code -eq 124 ]; then
log_error "update-subtask TIMED OUT for $provider / $model_id. Check $update_subtask_output_file." log_error "update-subtask TIMED OUT for $provider / $model_id. Check $update_subtask_output_file."
result_status="FAILED_TIMEOUT" result_status="FAILED_TIMEOUT"
elif [ $update_subtask_exit_code -eq 130 ] || [ $update_subtask_exit_code -eq 143 ]; then elif [ $update_subtask_exit_code -eq 130 ] || [ $update_subtask_exit_code -eq 143 ]; then
log_error "update-subtask INTERRUPTED for $provider / $model_id." log_error "update-subtask INTERRUPTED for $provider / $model_id."
result_status="INTERRUPTED" # Record interruption result_status="INTERRUPTED"
# Don't exit the loop, allow script to finish or be interrupted again
else else
log_error "update-subtask FAILED for $provider / $model_id (Exit Code: $update_subtask_exit_code). Check $update_subtask_output_file." log_error "update-subtask FAILED for $provider / $model_id (Exit Code: $update_subtask_exit_code). Check $update_subtask_output_file."
result_status="FAILED" result_status="FAILED"
fi fi
# Append result to the persistent log file
echo "$provider,$model_id,$result_status" >> "$PROGRESS_LOG_FILE" echo "$provider,$model_id,$result_status" >> "$PROGRESS_LOG_FILE"
done # End of fallback verification loop done
# --- Generate Final Verification Report to STDOUT ---
# Report reads from the persistent PROGRESS_LOG_FILE
echo "" echo ""
echo "--- Fallback Model Verification Report (via $0) ---" echo "--- Fallback Model Verification Report (via $0) ---"
echo "Executed inside run directory: $(pwd)" echo "Executed inside run directory: $(pwd)"
@@ -254,17 +270,13 @@ echo ""
echo "Models INTERRUPTED during test (Inconclusive - Rerun):" echo "Models INTERRUPTED during test (Inconclusive - Rerun):"
awk -F',' '$3 == "INTERRUPTED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort awk -F',' '$3 == "INTERRUPTED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort
echo "" echo ""
# Print the total cost for this script's operations
formatted_total_fallback_cost=$(printf "%.6f" "$total_fallback_cost")
echo "Total Fallback AI Cost (this script run): $formatted_total_fallback_cost USD" # This line will be parsed
echo "-------------------------------------------------------" echo "-------------------------------------------------------"
echo "" echo ""
# Don't clean up the progress log
# if [ -f "$PROGRESS_LOG_FILE" ]; then
# rm "$PROGRESS_LOG_FILE"
# fi
log_info "Finished Fallback Model (generateObjectService) Verification Script" log_info "Finished Fallback Model (generateObjectService) Verification Script"
# Remove trap before exiting normally
trap - INT TERM trap - INT TERM
exit 0
exit 0 # Exit successfully after printing the report