feat(security): add audit logging for shlex fallback parser

- Add debug logging when shlex fallback extraction is used, capturing
  both successful extractions and failures for security auditing
- Add test case for docker nested quotes that trigger fallback parser
- Remove redundant comment about re import (already at module level)

Follow-up improvements from PR #127 code review:
- Enables tracking of malformed command patterns in production logs
- Verifies fallback parser handles the exact docker exec case reported

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Auto
2026-01-29 09:19:48 +02:00
parent 77b91caa85
commit ce6da81a34
2 changed files with 16 additions and 1 deletions

View File

@@ -6,6 +6,7 @@ Pre-tool-use hooks that validate bash commands for security.
Uses an allowlist approach - only explicitly permitted commands can run. Uses an allowlist approach - only explicitly permitted commands can run.
""" """
import logging
import os import os
import re import re
import shlex import shlex
@@ -14,6 +15,9 @@ from typing import Optional
import yaml import yaml
# Logger for security-related events (fallback parsing, validation failures, etc.)
logger = logging.getLogger(__name__)
# Regex pattern for valid pkill process names (no regex metacharacters allowed) # Regex pattern for valid pkill process names (no regex metacharacters allowed)
# Matches alphanumeric names with dots, underscores, and hyphens # Matches alphanumeric names with dots, underscores, and hyphens
VALID_PROCESS_NAME_PATTERN = re.compile(r"^[A-Za-z0-9._-]+$") VALID_PROCESS_NAME_PATTERN = re.compile(r"^[A-Za-z0-9._-]+$")
@@ -195,7 +199,6 @@ def extract_commands(command_string: str) -> list[str]:
commands = [] commands = []
# shlex doesn't treat ; as a separator, so we need to pre-process # shlex doesn't treat ; as a separator, so we need to pre-process
# (re is already imported at module level)
# Split on semicolons that aren't inside quotes (simple heuristic) # Split on semicolons that aren't inside quotes (simple heuristic)
# This handles common cases like "echo hello; ls" # This handles common cases like "echo hello; ls"
@@ -213,7 +216,17 @@ def extract_commands(command_string: str) -> list[str]:
# Try fallback extraction instead of blocking entirely # Try fallback extraction instead of blocking entirely
fallback_cmd = _extract_primary_command(segment) fallback_cmd = _extract_primary_command(segment)
if fallback_cmd: if fallback_cmd:
logger.debug(
"shlex fallback used: segment=%r -> command=%r",
segment,
fallback_cmd,
)
commands.append(fallback_cmd) commands.append(fallback_cmd)
else:
logger.debug(
"shlex fallback failed: segment=%r (no command extracted)",
segment,
)
continue continue
if not tokens: if not tokens:

View File

@@ -107,6 +107,8 @@ def test_extract_commands():
("/usr/bin/node script.js", ["node"]), ("/usr/bin/node script.js", ["node"]),
("VAR=value ls", ["ls"]), ("VAR=value ls", ["ls"]),
("git status || git init", ["git", "git"]), ("git status || git init", ["git", "git"]),
# Fallback parser test: complex nested quotes that break shlex
('docker exec container php -r "echo \\"test\\";"', ["docker"]),
] ]
for cmd, expected in test_cases: for cmd, expected in test_cases: