fix: add shlex fallback parser and heredoc warning

- Add _extract_primary_command() fallback when shlex.split() fails on complex nested quotes (e.g., docker exec with PHP)

- Returns primary command instead of empty list, allowing valid commands to proceed

- Add heredoc warning to coding prompt - sandbox blocks /tmp access for here documents

- All 162 security tests pass
This commit is contained in:
cabana8471
2026-01-29 08:04:01 +01:00
parent a12e4aa3b8
commit d47028d97a
2 changed files with 58 additions and 10 deletions

View File

@@ -202,17 +202,23 @@ Use the feature_mark_passing tool with feature_id=42
### STEP 7: COMMIT YOUR PROGRESS ### STEP 7: COMMIT YOUR PROGRESS
Make a descriptive git commit: Make a descriptive git commit.
**Git Commit Rules:**
- ALWAYS use simple `-m` flag for commit messages
- NEVER use heredocs (`cat <<EOF` or `<<'EOF'`) - they fail in sandbox mode with "can't create temp file for here document: operation not permitted"
- For multi-line messages, use multiple `-m` flags:
```bash ```bash
git add . git add .
git commit -m "Implement [feature name] - verified end-to-end git commit -m "Implement [feature name] - verified end-to-end" -m "- Added [specific changes]" -m "- Tested with browser automation" -m "- Marked feature #X as passing"
```
- Added [specific changes] Or use a single descriptive message:
- Tested with browser automation
- Marked feature #X as passing ```bash
- Screenshots in verification/ directory git add .
" git commit -m "feat: implement [feature name] with browser verification"
``` ```
### STEP 8: UPDATE PROGRESS NOTES ### STEP 8: UPDATE PROGRESS NOTES

View File

@@ -140,6 +140,45 @@ def split_command_segments(command_string: str) -> list[str]:
return result return result
def _extract_primary_command(segment: str) -> str | None:
"""
Fallback command extraction when shlex fails.
Extracts the first word that looks like a command, handling cases
like complex docker exec commands with nested quotes.
Args:
segment: The command segment to parse
Returns:
The primary command name, or None if extraction fails
"""
# Remove leading whitespace
segment = segment.lstrip()
if not segment:
return None
# Skip env var assignments at start (VAR=value cmd)
words = segment.split()
while words and "=" in words[0] and not words[0].startswith("="):
words = words[1:]
if not words:
return None
# Extract first token (the command)
first_word = words[0]
# Match valid command characters (alphanumeric, dots, underscores, hyphens, slashes)
match = re.match(r"^([a-zA-Z0-9_./-]+)", first_word)
if match:
cmd = match.group(1)
return os.path.basename(cmd)
return None
def extract_commands(command_string: str) -> list[str]: def extract_commands(command_string: str) -> list[str]:
""" """
Extract command names from a shell command string. Extract command names from a shell command string.
@@ -156,7 +195,7 @@ def extract_commands(command_string: str) -> list[str]:
commands = [] commands = []
# shlex doesn't treat ; as a separator, so we need to pre-process # shlex doesn't treat ; as a separator, so we need to pre-process
import re # (re is already imported at module level)
# Split on semicolons that aren't inside quotes (simple heuristic) # Split on semicolons that aren't inside quotes (simple heuristic)
# This handles common cases like "echo hello; ls" # This handles common cases like "echo hello; ls"
@@ -171,8 +210,11 @@ def extract_commands(command_string: str) -> list[str]:
tokens = shlex.split(segment) tokens = shlex.split(segment)
except ValueError: except ValueError:
# Malformed command (unclosed quotes, etc.) # Malformed command (unclosed quotes, etc.)
# Return empty to trigger block (fail-safe) # Try fallback extraction instead of blocking entirely
return [] fallback_cmd = _extract_primary_command(segment)
if fallback_cmd:
commands.append(fallback_cmd)
continue
if not tokens: if not tokens:
continue continue