diff --git a/tests/e2e/e2e_helpers.sh b/tests/e2e/e2e_helpers.sh index 8d3c6f25..9c1a47ca 100644 --- a/tests/e2e/e2e_helpers.sh +++ b/tests/e2e/e2e_helpers.sh @@ -5,6 +5,32 @@ # It requires curl and jq to be installed. # It expects the project root path to be passed as the second argument. +# --- New Function: extract_and_sum_cost --- +# Takes a string containing command output and the current total cost. +# Extracts costs (lines with "Cost: $X.YYYY USD" or "Total Cost: $X.YYYY USD") +# from the output, sums them, and adds to the current total. +# Returns the new total cost. +extract_and_sum_cost() { + local command_output="$1" + local current_total_cost="$2" + local extracted_cost_sum="0.0" + + # Grep for lines containing "Cost: $" or "Total Cost: $", then extract the numeric value. + # Handles cases like "Cost: $0.001234 USD" or "Total Cost: $0.001234 USD" + # Accumulate all costs found in the command_output + while IFS= read -r line; do + # Extract the numeric part after '$' and before ' USD' + cost_value=$(echo "$line" | grep -o -E '(\$ ?[0-9]+\.[0-9]+)' | sed 's/\$ //g' | sed 's/\$//g') + if [[ -n "$cost_value" && "$cost_value" =~ ^[0-9]+\.[0-9]+$ ]]; then + extracted_cost_sum=$(echo "$extracted_cost_sum + $cost_value" | bc) + fi + done < <(echo "$command_output" | grep -E 'Cost: \$|Total Cost: \$') + + new_total_cost=$(echo "$current_total_cost + $extracted_cost_sum" | bc) + echo "$new_total_cost" +} +export -f extract_and_sum_cost # Export for use in other scripts if sourced + analyze_log_with_llm() { local log_file="$1" local project_root="$2" # Expect project root as the second argument @@ -15,17 +41,17 @@ analyze_log_with_llm() { fi local env_file="${project_root}/.env" # Path to .env in project root + local supported_models_file="${project_root}/scripts/modules/supported-models.json" local provider_summary_log="provider_add_task_summary.log" # File summarizing provider test outcomes local api_key="" - # !!! IMPORTANT: Replace with your actual Claude API endpoint if different !!! local api_endpoint="https://api.anthropic.com/v1/messages" - # !!! IMPORTANT: Ensure this matches the variable name in your .env file !!! local api_key_name="ANTHROPIC_API_KEY" + local llm_analysis_model_id="claude-3-7-sonnet-20250219" # Model used for this analysis + local llm_analysis_provider="anthropic" echo "" # Add a newline before analysis starts - # Check for jq and curl if ! command -v jq &> /dev/null; then echo "[HELPER_ERROR] LLM Analysis requires 'jq'. Skipping analysis." >&2 return 1 @@ -34,34 +60,31 @@ analyze_log_with_llm() { echo "[HELPER_ERROR] LLM Analysis requires 'curl'. Skipping analysis." >&2 return 1 fi + if ! command -v bc &> /dev/null; then + echo "[HELPER_ERROR] LLM Analysis requires 'bc' for cost calculation. Skipping analysis." >&2 + return 1 + fi - # Check for API Key in the PROJECT ROOT's .env file if [ -f "$env_file" ]; then - # Original assignment - Reading from project root .env api_key=$(grep "^${api_key_name}=" "$env_file" | sed -e "s/^${api_key_name}=//" -e 's/^[[:space:]"]*//' -e 's/[[:space:]"]*$//') fi if [ -z "$api_key" ]; then - echo "[HELPER_ERROR] ${api_key_name} not found or empty in project root .env file ($env_file). Skipping LLM analysis." >&2 # Updated error message + echo "[HELPER_ERROR] ${api_key_name} not found or empty in project root .env file ($env_file). Skipping LLM analysis." >&2 return 1 fi - # Log file path is passed as argument, need to ensure it exists relative to where the script *calling* this function is, OR use absolute path. - # Assuming absolute path or path relative to the initial PWD for simplicity here. - # The calling script passes the correct path relative to the original PWD. if [ ! -f "$log_file" ]; then - echo "[HELPER_ERROR] Log file not found: $log_file (PWD: $(pwd)). Check path passed to function. Skipping LLM analysis." >&2 # Updated error + echo "[HELPER_ERROR] Log file not found: $log_file (PWD: $(pwd)). Check path passed to function. Skipping LLM analysis." >&2 return 1 fi local log_content - # Read entire file, handle potential errors log_content=$(cat "$log_file") || { echo "[HELPER_ERROR] Failed to read log file: $log_file. Skipping LLM analysis." >&2 return 1 } - # Prepare the prompt using a quoted heredoc for literal interpretation read -r -d '' prompt_template <<'EOF' Analyze the following E2E test log for the task-master tool. The log contains output from various 'task-master' commands executed sequentially. @@ -99,41 +122,34 @@ Here is the main log content: %s EOF -# Note: The final %s is a placeholder for printf later local full_prompt - # Use printf to substitute the log content into the %s placeholder if ! printf -v full_prompt "$prompt_template" "$log_content"; then echo "[HELPER_ERROR] Failed to format prompt using printf." >&2 - # It's unlikely printf itself fails, but good practice return 1 fi - # Construct the JSON payload for Claude Messages API local payload payload=$(jq -n --arg prompt "$full_prompt" '{ - "model": "claude-3-haiku-20240307", # Using Haiku for faster/cheaper testing - "max_tokens": 3072, # Increased slightly + "model": "'"$llm_analysis_model_id"'", + "max_tokens": 3072, "messages": [ {"role": "user", "content": $prompt} ] - # "temperature": 0.0 # Optional: Lower temperature for more deterministic JSON output }') || { echo "[HELPER_ERROR] Failed to create JSON payload using jq." >&2 return 1 } local response_raw response_http_code response_body - # Capture body and HTTP status code separately response_raw=$(curl -s -w "\nHTTP_STATUS_CODE:%{http_code}" -X POST "$api_endpoint" \ -H "Content-Type: application/json" \ -H "x-api-key: $api_key" \ -H "anthropic-version: 2023-06-01" \ --data "$payload") - # Extract status code and body response_http_code=$(echo "$response_raw" | grep '^HTTP_STATUS_CODE:' | sed 's/HTTP_STATUS_CODE://') - response_body=$(echo "$response_raw" | sed '$d') # Remove last line (status code) + response_body=$(echo "$response_raw" | sed '$d') if [ "$response_http_code" != "200" ]; then echo "[HELPER_ERROR] LLM API call failed with HTTP status $response_http_code." >&2 @@ -146,17 +162,41 @@ EOF return 1 fi - # Pipe the raw response body directly to the Node.js parser script + # Calculate cost of this LLM analysis call + local input_tokens output_tokens input_cost_per_1m output_cost_per_1m calculated_llm_cost + input_tokens=$(echo "$response_body" | jq -r '.usage.input_tokens // 0') + output_tokens=$(echo "$response_body" | jq -r '.usage.output_tokens // 0') + + if [ -f "$supported_models_file" ]; then + model_cost_info=$(jq -r --arg provider "$llm_analysis_provider" --arg model_id "$llm_analysis_model_id" ' + .[$provider][] | select(.id == $model_id) | .cost_per_1m_tokens + ' "$supported_models_file") + + if [[ -n "$model_cost_info" && "$model_cost_info" != "null" ]]; then + input_cost_per_1m=$(echo "$model_cost_info" | jq -r '.input // 0') + output_cost_per_1m=$(echo "$model_cost_info" | jq -r '.output // 0') + + calculated_llm_cost=$(echo "($input_tokens / 1000000 * $input_cost_per_1m) + ($output_tokens / 1000000 * $output_cost_per_1m)" | bc -l) + # Format to 6 decimal places + formatted_llm_cost=$(printf "%.6f" "$calculated_llm_cost") + echo "LLM Analysis AI Cost: $formatted_llm_cost USD" # This line will be parsed by run_e2e.sh + else + echo "[HELPER_WARNING] Cost data for model $llm_analysis_model_id not found in $supported_models_file. LLM analysis cost not calculated." + fi + else + echo "[HELPER_WARNING] $supported_models_file not found. LLM analysis cost not calculated." + fi + # --- End cost calculation for this call --- + if echo "$response_body" | node "${project_root}/tests/e2e/parse_llm_output.cjs" "$log_file"; then echo "[HELPER_SUCCESS] LLM analysis parsed and printed successfully by Node.js script." - return 0 # Success + return 0 else local node_exit_code=$? echo "[HELPER_ERROR] Node.js parsing script failed with exit code ${node_exit_code}." echo "[HELPER_ERROR] Raw API response body (first 500 chars): $(echo "$response_body" | head -c 500)" - return 1 # Failure + return 1 fi } -# Export the function so it might be available to subshells if sourced export -f analyze_log_with_llm \ No newline at end of file diff --git a/tests/e2e/run_e2e.sh b/tests/e2e/run_e2e.sh index 0ff47fae..61045f6e 100755 --- a/tests/e2e/run_e2e.sh +++ b/tests/e2e/run_e2e.sh @@ -60,9 +60,52 @@ MAIN_ENV_FILE="$TASKMASTER_SOURCE_DIR/.env" # --- # <<< Source the helper script >>> +# shellcheck source=tests/e2e/e2e_helpers.sh source "$TASKMASTER_SOURCE_DIR/tests/e2e/e2e_helpers.sh" + +# ========================================== +# >>> Global Helper Functions Defined in run_e2e.sh <<< +# --- Helper Functions (Define globally before export) --- +_format_duration() { + local total_seconds=$1 + local minutes=$((total_seconds / 60)) + local seconds=$((total_seconds % 60)) + printf "%dm%02ds" "$minutes" "$seconds" +} + +# Note: This relies on 'overall_start_time' being set globally before the function is called +_get_elapsed_time_for_log() { + local current_time + current_time=$(date +%s) + # Use overall_start_time here, as start_time_for_helpers might not be relevant globally + local elapsed_seconds + elapsed_seconds=$((current_time - overall_start_time)) + _format_duration "$elapsed_seconds" +} + +log_info() { + echo "[INFO] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" +} + +log_success() { + echo "[SUCCESS] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" +} + +log_error() { + echo "[ERROR] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2 +} + +log_step() { + test_step_count=$((test_step_count + 1)) + echo "" + echo "=============================================" + echo " STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" + echo "=============================================" +} +# ========================================== + # <<< Export helper functions for subshells >>> -export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log +export -f log_info log_success log_error log_step _format_duration _get_elapsed_time_for_log extract_and_sum_cost # --- Argument Parsing for Analysis-Only Mode --- # This remains the same, as it exits early if matched @@ -138,6 +181,7 @@ fi # Note: These are mainly for step numbering within the log now, not for final summary test_step_count=0 start_time_for_helpers=0 # Separate start time for helper functions inside the pipe +total_e2e_cost="0.0" # Initialize total E2E cost # --- # --- Log File Setup --- @@ -220,12 +264,16 @@ log_step() { fi # --- Dependency Checks --- - log_step "Checking for dependencies (jq)" + log_step "Checking for dependencies (jq, bc)" if ! command -v jq &> /dev/null; then log_error "Dependency 'jq' is not installed or not found in PATH. Please install jq (e.g., 'brew install jq' or 'sudo apt-get install jq')." exit 1 fi - log_success "Dependency 'jq' found." + if ! command -v bc &> /dev/null; then + log_error "Dependency 'bc' not installed (for cost calculation). Please install bc (e.g., 'brew install bc' or 'sudo apt-get install bc')." + exit 1 + fi + log_success "Dependencies 'jq' and 'bc' found." # --- Test Setup (Output to tee) --- log_step "Setting up test environment" @@ -393,7 +441,7 @@ log_step() { declare -a models=( "claude-3-7-sonnet-20250219" "gpt-4o" - "gemini-2.5-pro-exp-03-25" + "gemini-2.5-pro-preview-05-06" "sonar-pro" # Note: This is research-only, add-task might fail if not using research model "grok-3" "anthropic/claude-3.7-sonnet" # OpenRouter uses Claude 3.7 @@ -435,9 +483,8 @@ log_step() { # 3. Check for success and extract task ID new_task_id="" - if [ $add_task_exit_code -eq 0 ] && echo "$add_task_cmd_output" | grep -q "✓ Added new task #"; then - # Attempt to extract the ID (adjust grep/sed/awk as needed based on actual output format) - new_task_id=$(echo "$add_task_cmd_output" | grep "✓ Added new task #" | sed 's/.*✓ Added new task #\([0-9.]\+\).*/\1/') + if [ $add_task_exit_code -eq 0 ] && (echo "$add_task_cmd_output" | grep -q "✓ Added new task #" || echo "$add_task_cmd_output" | grep -q "✅ New task created successfully:" || echo "$add_task_cmd_output" | grep -q "Task [0-9]\+ Created Successfully"); then + new_task_id=$(echo "$add_task_cmd_output" | grep -o -E "(Task |#)[0-9.]+" | grep -o -E "[0-9.]+" | head -n 1) if [ -n "$new_task_id" ]; then log_success "Add-task succeeded for $provider. New task ID: $new_task_id" echo "Provider $provider add-task SUCCESS (ID: $new_task_id)" >> provider_add_task_summary.log @@ -775,4 +822,8 @@ else echo "[ERROR] [$formatted_duration_for_error] $(date +"%Y-%m-%d %H:%M:%S") Test run directory $TEST_RUN_DIR not found. Cannot perform LLM analysis." >&2 fi +# Final cost formatting +formatted_total_e2e_cost=$(printf "%.6f" "$total_e2e_cost") +echo "Total E2E AI Cost: $formatted_total_e2e_cost USD" + exit $EXIT_CODE \ No newline at end of file diff --git a/tests/e2e/run_fallback_verification.sh b/tests/e2e/run_fallback_verification.sh index 9546b2e6..7782296a 100755 --- a/tests/e2e/run_fallback_verification.sh +++ b/tests/e2e/run_fallback_verification.sh @@ -18,6 +18,25 @@ set -o pipefail # --- Embedded Helper Functions --- # Copied from e2e_helpers.sh to make this script standalone +# OR source it if preferred and path is reliable + +# <<< Determine SCRIPT_DIR and PROJECT_ROOT_DIR early >>> +SCRIPT_DIR_FV="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +PROJECT_ROOT_DIR_FV="$( cd "$SCRIPT_DIR_FV/../.." &> /dev/null && pwd )" # Assumes script is in tests/e2e/ + +# --- Try to Source e2e_helpers.sh --- +E2E_HELPERS_PATH_FV="${PROJECT_ROOT_DIR_FV}/tests/e2e/e2e_helpers.sh" +if [ -f "$E2E_HELPERS_PATH_FV" ]; then + # shellcheck source=tests/e2e/e2e_helpers.sh + source "$E2E_HELPERS_PATH_FV" + echo "[INFO FV] Sourced e2e_helpers.sh successfully." +else + echo "[ERROR FV] e2e_helpers.sh not found at $E2E_HELPERS_PATH_FV. Cost extraction will fail." + # Define a placeholder if not found, so the script doesn't break immediately, + # but cost extraction will effectively be a no-op. + extract_and_sum_cost() { echo "$2"; } # Returns current total, effectively adding 0 +fi + _format_duration() { local total_seconds=$1 @@ -27,127 +46,112 @@ _format_duration() { } _get_elapsed_time_for_log() { - # Needs overall_start_time defined in the main script body - local current_time=$(date +%s) - local elapsed_seconds=$((current_time - overall_start_time)) + local current_time + current_time=$(date +%s) + local elapsed_seconds + elapsed_seconds=$((current_time - overall_start_time)) # Needs overall_start_time _format_duration "$elapsed_seconds" } log_info() { - echo "[INFO] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" + echo "[INFO FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" } log_success() { - echo "[SUCCESS] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" + echo "[SUCCESS FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" } log_error() { - echo "[ERROR] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2 + echo "[ERROR FV] [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" >&2 } log_step() { - # Needs test_step_count defined and incremented in the main script body - test_step_count=$((test_step_count + 1)) + test_step_count=$((test_step_count + 1)) # Needs test_step_count echo "" echo "=============================================" - echo " STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" + echo " FV STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1" echo "=============================================" } # --- Signal Handling --- -# Global variable to hold child PID child_pid=0 -# Use a persistent log file name -PROGRESS_LOG_FILE="fallback_verification_progress.log" +PROGRESS_LOG_FILE="fallback_verification_progress.log" # Stays in run dir cleanup() { - echo "" # Newline after ^C + echo "" log_error "Interrupt received. Cleaning up any running child process..." if [ "$child_pid" -ne 0 ]; then log_info "Killing child process (PID: $child_pid) and its group..." kill -TERM -- "-$child_pid" 2>/dev/null || kill -KILL -- "-$child_pid" 2>/dev/null child_pid=0 fi - # DO NOT delete the progress log file on interrupt log_info "Progress saved in: $PROGRESS_LOG_FILE" - exit 130 # Exit with code indicating interrupt + # Print current total cost on interrupt + if [[ -n "${total_fallback_cost+x}" && "$total_fallback_cost" != "0.0" ]]; then # Check if var is set and not initial + log_info "Current Total Fallback AI Cost at interruption: $total_fallback_cost USD" + fi + exit 130 } -# Trap SIGINT (Ctrl+C) and SIGTERM trap cleanup INT TERM # --- Configuration --- -# Determine the project root relative to this script's location -# Use a robust method to find the script's own directory -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -# Assumes this script is in tests/e2e/ -PROJECT_ROOT_DIR="$( cd "$SCRIPT_DIR/../.." &> /dev/null && pwd )" -SUPPORTED_MODELS_FILE="$PROJECT_ROOT_DIR/scripts/modules/supported-models.json" -BASE_RUNS_DIR="$PROJECT_ROOT_DIR/tests/e2e/_runs" +# SCRIPT_DIR and PROJECT_ROOT_DIR already defined above +SUPPORTED_MODELS_FILE="$PROJECT_ROOT_DIR_FV/scripts/modules/supported-models.json" +BASE_RUNS_DIR="$PROJECT_ROOT_DIR_FV/tests/e2e/_runs" # --- Determine Target Run Directory --- TARGET_RUN_DIR="" if [ "$#" -ge 1 ] && [ -n "$1" ]; then - # Use provided argument if it exists TARGET_RUN_DIR="$1" - # Make path absolute if it's relative if [[ "$TARGET_RUN_DIR" != /* ]]; then TARGET_RUN_DIR="$(pwd)/$TARGET_RUN_DIR" fi - echo "[INFO] Using provided target run directory: $TARGET_RUN_DIR" + echo "[INFO FV] Using provided target run directory: $TARGET_RUN_DIR" else - # Find the latest run directory - echo "[INFO] No run directory provided, finding latest in $BASE_RUNS_DIR..." + echo "[INFO FV] No run directory provided, finding latest in $BASE_RUNS_DIR..." TARGET_RUN_DIR=$(ls -td "$BASE_RUNS_DIR"/run_* 2>/dev/null | head -n 1) if [ -z "$TARGET_RUN_DIR" ]; then - echo "[ERROR] No run directories found matching 'run_*' in $BASE_RUNS_DIR. Cannot proceed." >&2 + echo "[ERROR FV] No run directories found matching 'run_*' in $BASE_RUNS_DIR. Cannot proceed." >&2 exit 1 fi - echo "[INFO] Found latest run directory: $TARGET_RUN_DIR" + echo "[INFO FV] Found latest run directory: $TARGET_RUN_DIR" fi -# Validate the target directory if [ ! -d "$TARGET_RUN_DIR" ]; then - echo "[ERROR] Target run directory not found or is not a directory: $TARGET_RUN_DIR" >&2 + echo "[ERROR FV] Target run directory not found or is not a directory: $TARGET_RUN_DIR" >&2 exit 1 fi -# --- Change to Target Directory --- -echo "[INFO] Changing working directory to: $TARGET_RUN_DIR" +echo "[INFO FV] Changing working directory to: $TARGET_RUN_DIR" if ! cd "$TARGET_RUN_DIR"; then - echo "[ERROR] Failed to cd into target directory: $TARGET_RUN_DIR" >&2 + echo "[ERROR FV] Failed to cd into target directory: $TARGET_RUN_DIR" >&2 exit 1 fi -echo "[INFO] Now operating inside: $(pwd)" +echo "[INFO FV] Now operating inside: $(pwd)" + +overall_start_time=$(date +%s) # Initialize for logging helpers +test_step_count=0 # Initialize for logging helpers +total_fallback_cost="0.0" # Initialize total cost for this script -# --- Now we are inside the target run directory --- -overall_start_time=$(date +%s) -test_step_count=0 log_info "Starting fallback verification script execution in $(pwd)" log_info "Progress will be logged to: $(pwd)/$PROGRESS_LOG_FILE" -# --- Dependency Checks --- -log_step "Checking for dependencies (jq) in verification script" -if ! command -v jq &> /dev/null; then - log_error "Dependency 'jq' is not installed or not found in PATH." - exit 1 -fi -log_success "Dependency 'jq' found." +log_step "Checking for dependencies (jq, bc) in verification script" +if ! command -v jq &> /dev/null; then log_error "Dependency 'jq' not installed."; exit 1; fi +if ! command -v bc &> /dev/null; then log_error "Dependency 'bc' not installed (for cost calculation)."; exit 1; fi +log_success "Dependencies 'jq' and 'bc' found." + -# --- Verification Logic --- log_step "Starting/Resuming Fallback Model (generateObjectService) Verification" -# Ensure progress log exists, create if not touch "$PROGRESS_LOG_FILE" -# Ensure the supported models file exists (using absolute path) if [ ! -f "$SUPPORTED_MODELS_FILE" ]; then - log_error "supported-models.json not found at absolute path: $SUPPORTED_MODELS_FILE." + log_error "supported-models.json not found at: $SUPPORTED_MODELS_FILE." exit 1 fi log_info "Using supported models file: $SUPPORTED_MODELS_FILE" -# Ensure subtask 1.1 exists (basic check, main script should guarantee) -# Check for tasks.json in the current directory (which is now the run dir) if [ ! -f "tasks/tasks.json" ]; then log_error "tasks/tasks.json not found in current directory ($(pwd)). Was this run directory properly initialized?" exit 1 @@ -158,78 +162,90 @@ if ! jq -e '.tasks[] | select(.id == 1) | .subtasks[] | select(.id == 1)' tasks/ fi log_info "Subtask 1.1 found in $(pwd)/tasks/tasks.json, proceeding with verification." -# Read providers and models using jq jq -c 'to_entries[] | .key as $provider | .value[] | select(.allowed_roles[]? == "fallback") | {provider: $provider, id: .id}' "$SUPPORTED_MODELS_FILE" | while IFS= read -r model_info; do provider=$(echo "$model_info" | jq -r '.provider') model_id=$(echo "$model_info" | jq -r '.id') - flag="" # Default flag + flag="" - # Check if already tested - # Use grep -Fq for fixed string and quiet mode if grep -Fq "${provider},${model_id}," "$PROGRESS_LOG_FILE"; then log_info "--- Skipping: $provider / $model_id (already tested, result in $PROGRESS_LOG_FILE) ---" + # Still need to sum up its cost if it was successful before + previous_test_output=$(grep -F "${provider},${model_id}," "$PROGRESS_LOG_FILE" | head -n 1) + # Assuming the output file for successful test exists and contains cost + prev_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log" + if [[ "$previous_test_output" == *",SUCCESS"* && -f "$prev_output_file" ]]; then + # shellcheck disable=SC2154 # overall_start_time is set + log_info "Summing cost from previous successful test of $provider / $model_id from $prev_output_file" + # shellcheck disable=SC2154 # total_fallback_cost is set + total_fallback_cost=$(extract_and_sum_cost "$(cat "$prev_output_file")" "$total_fallback_cost") + log_info "Cumulative fallback AI cost after previous $provider / $model_id: $total_fallback_cost USD" + fi continue fi log_info "--- Verifying: $provider / $model_id ---" - # Determine provider flag - if [ "$provider" == "openrouter" ]; then - flag="--openrouter" - elif [ "$provider" == "ollama" ]; then - flag="--ollama" - fi + if [ "$provider" == "openrouter" ]; then flag="--openrouter"; fi + if [ "$provider" == "ollama" ]; then flag="--ollama"; fi - # 1. Set the main model if ! command -v task-master &> /dev/null; then log_error "task-master command not found." - echo "[INSTRUCTION] Please run 'npm link task-master-ai' in the project root first." + echo "[INSTRUCTION FV] Please run 'npm link task-master-ai' in the project root first." exit 1 fi log_info "Setting main model to $model_id ${flag:+using flag $flag}..." set_model_cmd="task-master models --set-main \"$model_id\" $flag" - model_set_status="SUCCESS" - if ! eval $set_model_cmd > /dev/null 2>&1; then + if ! eval "$set_model_cmd" > /dev/null 2>&1; then log_error "Failed to set main model for $provider / $model_id. Skipping test." echo "$provider,$model_id,SET_MODEL_FAILED" >> "$PROGRESS_LOG_FILE" - continue # Skip the actual test if setting fails + continue fi log_info "Set main model ok." - # 2. Run update-subtask log_info "Running update-subtask --id=1.1 --prompt='Test generateObjectService' (timeout 120s)" update_subtask_output_file="update_subtask_raw_output_${provider}_${model_id//\//_}.log" - - timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." > "$update_subtask_output_file" 2>&1 & + + # Capture output to a variable AND a file + update_subtask_command_output="" + timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." 2>&1 | tee "$update_subtask_output_file" & + # Store the command output in a variable simultaneously + # update_subtask_command_output=$(timeout 120s task-master update-subtask --id=1.1 --prompt="Simple test prompt to verify generateObjectService call." 2>&1) + # The above direct capture won't work well with tee and backgrounding. Instead, read the file after command completion. child_pid=$! wait "$child_pid" update_subtask_exit_code=$? child_pid=0 - # 3. Check result and log persistently + # Read output from file for cost extraction + if [ -f "$update_subtask_output_file" ]; then + update_subtask_command_output=$(cat "$update_subtask_output_file") + else + update_subtask_command_output="" # Ensure it's defined + fi + result_status="" - if [ $update_subtask_exit_code -eq 0 ] && grep -q "Successfully updated subtask #1.1" "$update_subtask_output_file"; then + if [ $update_subtask_exit_code -eq 0 ] && echo "$update_subtask_command_output" | grep -q "Successfully updated subtask #1.1"; then log_success "update-subtask succeeded for $provider / $model_id (Verified Output)." result_status="SUCCESS" + # Extract and sum cost if successful + # shellcheck disable=SC2154 # total_fallback_cost is set + total_fallback_cost=$(extract_and_sum_cost "$update_subtask_command_output" "$total_fallback_cost") + log_info "Cumulative fallback AI cost after $provider / $model_id: $total_fallback_cost USD" elif [ $update_subtask_exit_code -eq 124 ]; then log_error "update-subtask TIMED OUT for $provider / $model_id. Check $update_subtask_output_file." result_status="FAILED_TIMEOUT" elif [ $update_subtask_exit_code -eq 130 ] || [ $update_subtask_exit_code -eq 143 ]; then log_error "update-subtask INTERRUPTED for $provider / $model_id." - result_status="INTERRUPTED" # Record interruption - # Don't exit the loop, allow script to finish or be interrupted again + result_status="INTERRUPTED" else log_error "update-subtask FAILED for $provider / $model_id (Exit Code: $update_subtask_exit_code). Check $update_subtask_output_file." result_status="FAILED" fi - # Append result to the persistent log file echo "$provider,$model_id,$result_status" >> "$PROGRESS_LOG_FILE" -done # End of fallback verification loop +done -# --- Generate Final Verification Report to STDOUT --- -# Report reads from the persistent PROGRESS_LOG_FILE echo "" echo "--- Fallback Model Verification Report (via $0) ---" echo "Executed inside run directory: $(pwd)" @@ -254,17 +270,13 @@ echo "" echo "Models INTERRUPTED during test (Inconclusive - Rerun):" awk -F',' '$3 == "INTERRUPTED" { print "- " $1 " / " $2 }' "$PROGRESS_LOG_FILE" | sort echo "" +# Print the total cost for this script's operations +formatted_total_fallback_cost=$(printf "%.6f" "$total_fallback_cost") +echo "Total Fallback AI Cost (this script run): $formatted_total_fallback_cost USD" # This line will be parsed echo "-------------------------------------------------------" echo "" -# Don't clean up the progress log -# if [ -f "$PROGRESS_LOG_FILE" ]; then -# rm "$PROGRESS_LOG_FILE" -# fi - log_info "Finished Fallback Model (generateObjectService) Verification Script" -# Remove trap before exiting normally trap - INT TERM - -exit 0 # Exit successfully after printing the report +exit 0