Files
autocoder/test_security_integration.py
Marian Paul f1b48be10e feat: increase command limit to 100 and add optimization guide
Changes:
- Increase command limit from 50 to 100 per project
- Add examples/OPTIMIZE_CONFIG.md with optimization strategies
- Update all documentation references (50 → 100)
- Update tests for new limit

Rationale:
- 50 was too restrictive for projects with many tools (Flutter, etc.)
- Users were unknowingly exceeding limit by listing subcommands
- 100 provides headroom while maintaining security
- New guide teaches wildcard optimization (flutter* vs listing each subcommand)

UI feedback idea: Show command count and optimization suggestions
(tracked for Phase 3 or future enhancement)
2026-01-22 13:29:33 +01:00

412 lines
13 KiB
Python

#!/usr/bin/env python3
"""
Security Integration Tests
===========================
Integration tests that spin up real agent instances and verify
bash command security policies are enforced correctly.
These tests actually run the agent (not just unit tests), so they:
- Create real temporary projects
- Configure real YAML files
- Execute the agent with test prompts
- Parse agent output to verify behavior
Run with: python test_security_integration.py
"""
import asyncio
import os
import sys
import tempfile
from pathlib import Path
from security import bash_security_hook
def test_blocked_command_via_hook():
"""Test that hardcoded blocked commands are rejected by the security hook."""
print("\n" + "=" * 70)
print("TEST 1: Hardcoded blocked command (sudo)")
print("=" * 70)
with tempfile.TemporaryDirectory() as tmpdir:
project_dir = Path(tmpdir)
# Create minimal project structure
autocoder_dir = project_dir / ".autocoder"
autocoder_dir.mkdir()
(autocoder_dir / "allowed_commands.yaml").write_text(
"version: 1\ncommands: []"
)
# Try to run sudo (should be blocked)
input_data = {
"tool_name": "Bash",
"tool_input": {"command": "sudo apt install nginx"},
}
context = {"project_dir": str(project_dir)}
result = asyncio.run(bash_security_hook(input_data, context=context))
if result.get("decision") == "block":
print("✅ PASS: sudo was blocked")
print(f" Reason: {result.get('reason', 'N/A')[:80]}...")
return True
else:
print("❌ FAIL: sudo should have been blocked")
print(f" Got: {result}")
return False
def test_allowed_command_via_hook():
"""Test that default allowed commands work."""
print("\n" + "=" * 70)
print("TEST 2: Default allowed command (ls)")
print("=" * 70)
with tempfile.TemporaryDirectory() as tmpdir:
project_dir = Path(tmpdir)
# Create minimal project structure
autocoder_dir = project_dir / ".autocoder"
autocoder_dir.mkdir()
(autocoder_dir / "allowed_commands.yaml").write_text(
"version: 1\ncommands: []"
)
# Try to run ls (should be allowed - in default allowlist)
input_data = {"tool_name": "Bash", "tool_input": {"command": "ls -la"}}
context = {"project_dir": str(project_dir)}
result = asyncio.run(bash_security_hook(input_data, context=context))
if result.get("decision") != "block":
print("✅ PASS: ls was allowed (default allowlist)")
return True
else:
print("❌ FAIL: ls should have been allowed")
print(f" Reason: {result.get('reason', 'N/A')}")
return False
def test_non_allowed_command_via_hook():
"""Test that commands not in any allowlist are blocked."""
print("\n" + "=" * 70)
print("TEST 3: Non-allowed command (wget)")
print("=" * 70)
with tempfile.TemporaryDirectory() as tmpdir:
project_dir = Path(tmpdir)
# Create minimal project structure
autocoder_dir = project_dir / ".autocoder"
autocoder_dir.mkdir()
(autocoder_dir / "allowed_commands.yaml").write_text(
"version: 1\ncommands: []"
)
# Try to run wget (not in default allowlist)
input_data = {
"tool_name": "Bash",
"tool_input": {"command": "wget https://example.com"},
}
context = {"project_dir": str(project_dir)}
result = asyncio.run(bash_security_hook(input_data, context=context))
if result.get("decision") == "block":
print("✅ PASS: wget was blocked (not in allowlist)")
print(f" Reason: {result.get('reason', 'N/A')[:80]}...")
return True
else:
print("❌ FAIL: wget should have been blocked")
return False
def test_project_config_allows_command():
"""Test that adding a command to project config allows it."""
print("\n" + "=" * 70)
print("TEST 4: Project config allows command (swift)")
print("=" * 70)
with tempfile.TemporaryDirectory() as tmpdir:
project_dir = Path(tmpdir)
# Create project config with swift allowed
autocoder_dir = project_dir / ".autocoder"
autocoder_dir.mkdir()
(autocoder_dir / "allowed_commands.yaml").write_text("""version: 1
commands:
- name: swift
description: Swift compiler
- name: xcodebuild
description: Xcode build system
""")
# Try to run swift (should be allowed via project config)
input_data = {"tool_name": "Bash", "tool_input": {"command": "swift --version"}}
context = {"project_dir": str(project_dir)}
result = asyncio.run(bash_security_hook(input_data, context=context))
if result.get("decision") != "block":
print("✅ PASS: swift was allowed (project config)")
return True
else:
print("❌ FAIL: swift should have been allowed")
print(f" Reason: {result.get('reason', 'N/A')}")
return False
def test_pattern_matching():
"""Test that wildcard patterns work correctly."""
print("\n" + "=" * 70)
print("TEST 5: Pattern matching (swift*)")
print("=" * 70)
with tempfile.TemporaryDirectory() as tmpdir:
project_dir = Path(tmpdir)
# Create project config with swift* pattern
autocoder_dir = project_dir / ".autocoder"
autocoder_dir.mkdir()
(autocoder_dir / "allowed_commands.yaml").write_text("""version: 1
commands:
- name: swift*
description: All Swift tools
""")
# Try to run swiftlint (should match swift* pattern)
input_data = {"tool_name": "Bash", "tool_input": {"command": "swiftlint"}}
context = {"project_dir": str(project_dir)}
result = asyncio.run(bash_security_hook(input_data, context=context))
if result.get("decision") != "block":
print("✅ PASS: swiftlint matched swift* pattern")
return True
else:
print("❌ FAIL: swiftlint should have matched swift*")
print(f" Reason: {result.get('reason', 'N/A')}")
return False
def test_org_blocklist_enforcement():
"""Test that org-level blocked commands cannot be overridden."""
print("\n" + "=" * 70)
print("TEST 6: Org blocklist enforcement (terraform)")
print("=" * 70)
with tempfile.TemporaryDirectory() as tmphome:
with tempfile.TemporaryDirectory() as tmpproject:
# Setup fake home directory with org config
original_home = os.environ.get("HOME")
os.environ["HOME"] = tmphome
org_dir = Path(tmphome) / ".autocoder"
org_dir.mkdir()
(org_dir / "config.yaml").write_text("""version: 1
allowed_commands: []
blocked_commands:
- terraform
- kubectl
""")
project_dir = Path(tmpproject)
autocoder_dir = project_dir / ".autocoder"
autocoder_dir.mkdir()
# Try to allow terraform in project config (should fail - org blocked)
(autocoder_dir / "allowed_commands.yaml").write_text("""version: 1
commands:
- name: terraform
description: Infrastructure as code
""")
# Try to run terraform (should be blocked by org config)
input_data = {
"tool_name": "Bash",
"tool_input": {"command": "terraform apply"},
}
context = {"project_dir": str(project_dir)}
result = asyncio.run(bash_security_hook(input_data, context=context))
# Restore HOME
if original_home:
os.environ["HOME"] = original_home
else:
del os.environ["HOME"]
if result.get("decision") == "block":
print("✅ PASS: terraform blocked by org config (cannot override)")
print(f" Reason: {result.get('reason', 'N/A')[:80]}...")
return True
else:
print("❌ FAIL: terraform should have been blocked by org config")
return False
def test_org_allowlist_inheritance():
"""Test that org-level allowed commands are available to projects."""
print("\n" + "=" * 70)
print("TEST 7: Org allowlist inheritance (jq)")
print("=" * 70)
with tempfile.TemporaryDirectory() as tmphome:
with tempfile.TemporaryDirectory() as tmpproject:
# Setup fake home directory with org config
original_home = os.environ.get("HOME")
os.environ["HOME"] = tmphome
org_dir = Path(tmphome) / ".autocoder"
org_dir.mkdir()
(org_dir / "config.yaml").write_text("""version: 1
allowed_commands:
- name: jq
description: JSON processor
blocked_commands: []
""")
project_dir = Path(tmpproject)
autocoder_dir = project_dir / ".autocoder"
autocoder_dir.mkdir()
(autocoder_dir / "allowed_commands.yaml").write_text(
"version: 1\ncommands: []"
)
# Try to run jq (should be allowed via org config)
input_data = {"tool_name": "Bash", "tool_input": {"command": "jq '.data'"}}
context = {"project_dir": str(project_dir)}
result = asyncio.run(bash_security_hook(input_data, context=context))
# Restore HOME
if original_home:
os.environ["HOME"] = original_home
else:
del os.environ["HOME"]
if result.get("decision") != "block":
print("✅ PASS: jq allowed via org config")
return True
else:
print("❌ FAIL: jq should have been allowed via org config")
print(f" Reason: {result.get('reason', 'N/A')}")
return False
def test_invalid_yaml_ignored():
"""Test that invalid YAML config is safely ignored."""
print("\n" + "=" * 70)
print("TEST 8: Invalid YAML safely ignored")
print("=" * 70)
with tempfile.TemporaryDirectory() as tmpdir:
project_dir = Path(tmpdir)
# Create invalid YAML
autocoder_dir = project_dir / ".autocoder"
autocoder_dir.mkdir()
(autocoder_dir / "allowed_commands.yaml").write_text("invalid: yaml: content:")
# Try to run ls (should still work - falls back to defaults)
input_data = {"tool_name": "Bash", "tool_input": {"command": "ls"}}
context = {"project_dir": str(project_dir)}
result = asyncio.run(bash_security_hook(input_data, context=context))
if result.get("decision") != "block":
print("✅ PASS: Invalid YAML ignored, defaults still work")
return True
else:
print("❌ FAIL: Should fall back to defaults when YAML is invalid")
print(f" Reason: {result.get('reason', 'N/A')}")
return False
def test_100_command_limit():
"""Test that configs with >100 commands are rejected."""
print("\n" + "=" * 70)
print("TEST 9: 100 command limit enforced")
print("=" * 70)
with tempfile.TemporaryDirectory() as tmpdir:
project_dir = Path(tmpdir)
# Create config with 101 commands
autocoder_dir = project_dir / ".autocoder"
autocoder_dir.mkdir()
commands = [
f" - name: cmd{i}\n description: Command {i}" for i in range(101)
]
(autocoder_dir / "allowed_commands.yaml").write_text(
"version: 1\ncommands:\n" + "\n".join(commands)
)
# Try to run cmd0 (should be blocked - config is invalid)
input_data = {"tool_name": "Bash", "tool_input": {"command": "cmd0"}}
context = {"project_dir": str(project_dir)}
result = asyncio.run(bash_security_hook(input_data, context=context))
if result.get("decision") == "block":
print("✅ PASS: Config with >100 commands rejected")
return True
else:
print("❌ FAIL: Config with >100 commands should be rejected")
return False
def main():
print("=" * 70)
print(" SECURITY INTEGRATION TESTS")
print("=" * 70)
print("\nThese tests verify bash command security policies using real hooks.")
print("They test the actual security.py implementation, not just unit tests.\n")
tests = [
test_blocked_command_via_hook,
test_allowed_command_via_hook,
test_non_allowed_command_via_hook,
test_project_config_allows_command,
test_pattern_matching,
test_org_blocklist_enforcement,
test_org_allowlist_inheritance,
test_invalid_yaml_ignored,
test_100_command_limit,
]
passed = 0
failed = 0
for test in tests:
try:
if test():
passed += 1
else:
failed += 1
except Exception as e:
print(f"❌ FAIL: Test raised exception: {e}")
import traceback
traceback.print_exc()
failed += 1
print("\n" + "=" * 70)
print(f" RESULTS: {passed} passed, {failed} failed")
print("=" * 70)
if failed == 0:
print("\n✅ ALL INTEGRATION TESTS PASSED")
return 0
else:
print(f"\n{failed} INTEGRATION TEST(S) FAILED")
return 1
if __name__ == "__main__":
sys.exit(main())