fix: add shlex fallback parser and heredoc warning

- Add _extract_primary_command() fallback when shlex.split() fails on complex nested quotes (e.g., docker exec with PHP) - Returns primary command instead of empty list, allowing valid commands to proceed - Add heredoc warning to coding prompt - sandbox blocks /tmp access for here documents - All 162 security tests pass
2026-01-30 06:12:06 +00:00 · 2026-01-29 08:04:01 +01:00
parent a12e4aa3b8
commit d47028d97a
2 changed files with 58 additions and 10 deletions
--- a/.claude/templates/coding_prompt.template.md
+++ b/.claude/templates/coding_prompt.template.md
@@ -202,17 +202,23 @@ Use the feature_mark_passing tool with feature_id=42
 ### STEP 7: COMMIT YOUR PROGRESS
-Make a descriptive git commit:
+Make a descriptive git commit.
 **Git Commit Rules:**
 - ALWAYS use simple `-m` flag for commit messages
 - NEVER use heredocs (`cat <<EOF` or `<<'EOF'`) - they fail in sandbox mode with "can't create temp file for here document: operation not permitted"
 - For multi-line messages, use multiple `-m` flags:
 ```bash
 git add .
-git commit -m "Implement [feature name] - verified end-to-end
+git commit -m "Implement [feature name] - verified end-to-end" -m "- Added [specific changes]" -m "- Tested with browser automation" -m "- Marked feature #X as passing"
 ```
- Added [specific changes]
+Or use a single descriptive message:
- Tested with browser automation
+
- Marked feature #X as passing
+```bash
- Screenshots in verification/ directory
+git add .
-"
+git commit -m "feat: implement [feature name] with browser verification"
 ```
 ### STEP 8: UPDATE PROGRESS NOTES
--- a/security.py
+++ b/security.py
@@ -140,6 +140,45 @@ def split_command_segments(command_string: str) -> list[str]:
    return result
 def _extract_primary_command(segment: str) -> str | None:
    """
    Fallback command extraction when shlex fails.
    Extracts the first word that looks like a command, handling cases
    like complex docker exec commands with nested quotes.
    Args:
        segment: The command segment to parse
    Returns:
        The primary command name, or None if extraction fails
    """
    # Remove leading whitespace
    segment = segment.lstrip()
    if not segment:
        return None
    # Skip env var assignments at start (VAR=value cmd)
    words = segment.split()
    while words and "=" in words[0] and not words[0].startswith("="):
        words = words[1:]
    if not words:
        return None
    # Extract first token (the command)
    first_word = words[0]
    # Match valid command characters (alphanumeric, dots, underscores, hyphens, slashes)
    match = re.match(r"^([a-zA-Z0-9_./-]+)", first_word)
    if match:
        cmd = match.group(1)
        return os.path.basename(cmd)
    return None
 def extract_commands(command_string: str) -> list[str]:
    """
    Extract command names from a shell command string.
@@ -156,7 +195,7 @@ def extract_commands(command_string: str) -> list[str]:
    commands = []
    # shlex doesn't treat ; as a separator, so we need to pre-process
-    import re
+    # (re is already imported at module level)
    # Split on semicolons that aren't inside quotes (simple heuristic)
    # This handles common cases like "echo hello; ls"
@@ -171,8 +210,11 @@ def extract_commands(command_string: str) -> list[str]:
            tokens = shlex.split(segment)
        except ValueError:
            # Malformed command (unclosed quotes, etc.)
-            # Return empty to trigger block (fail-safe)
+            # Try fallback extraction instead of blocking entirely
-            return []
+            fallback_cmd = _extract_primary_command(segment)
            if fallback_cmd:
                commands.append(fallback_cmd)
            continue
        if not tokens:
            continue