feat(cli): polite deep merge for settings.json and support JSONC (#1874)

* feat(cli): polite deep merge for settings.json with json5 and safe atomic write

* fix(cli): prevent temp fd leak and align merge-policy docs
This commit is contained in:
Hamilton Snow
2026-03-17 22:51:13 +08:00
committed by GitHub
parent d2ecf6560d
commit c12b8c1763
4 changed files with 303 additions and 27 deletions

View File

@@ -35,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- feat(cli): polite deep merge for VSCode settings.json with JSONC support via `json5` and zero-data-loss fallbacks
- feat(presets): Pluggable preset system with preset catalog and template resolver
- Preset manifest (`preset.yml`) with validation for artifact, command, and script types
- `PresetManifest`, `PresetRegistry`, `PresetManager`, `PresetCatalog`, `PresetResolver` classes in `src/specify_cli/presets.py`

View File

@@ -14,6 +14,7 @@ dependencies = [
"pyyaml>=6.0",
"packaging>=23.0",
"pathspec>=0.12.0",
"json5>=0.13.0",
]
[project.scripts]

View File

@@ -7,6 +7,7 @@
# "platformdirs",
# "readchar",
# "httpx",
# "json5",
# ]
# ///
"""
@@ -32,6 +33,8 @@ import tempfile
import shutil
import shlex
import json
import json5
import stat
import yaml
from pathlib import Path
from typing import Any, Optional, Tuple
@@ -654,37 +657,82 @@ def init_git_repo(project_path: Path, quiet: bool = False) -> Tuple[bool, Option
os.chdir(original_cwd)
def handle_vscode_settings(sub_item, dest_file, rel_path, verbose=False, tracker=None) -> None:
"""Handle merging or copying of .vscode/settings.json files."""
"""Handle merging or copying of .vscode/settings.json files.
Note: when merge produces changes, rewritten output is normalized JSON and
existing JSONC comments/trailing commas are not preserved.
"""
def log(message, color="green"):
if verbose and not tracker:
console.print(f"[{color}]{message}[/] {rel_path}")
def atomic_write_json(target_file: Path, payload: dict[str, Any]) -> None:
"""Atomically write JSON while preserving existing mode bits when possible."""
temp_path: Optional[Path] = None
try:
with tempfile.NamedTemporaryFile(
mode='w',
encoding='utf-8',
dir=target_file.parent,
prefix=f"{target_file.name}.",
suffix=".tmp",
delete=False,
) as f:
temp_path = Path(f.name)
json.dump(payload, f, indent=4)
f.write('\n')
if target_file.exists():
try:
existing_stat = target_file.stat()
os.chmod(temp_path, stat.S_IMODE(existing_stat.st_mode))
if hasattr(os, "chown"):
try:
os.chown(temp_path, existing_stat.st_uid, existing_stat.st_gid)
except PermissionError:
# Best-effort owner/group preservation without requiring elevated privileges.
pass
except OSError:
# Best-effort metadata preservation; data safety is prioritized.
pass
os.replace(temp_path, target_file)
except Exception:
if temp_path and temp_path.exists():
temp_path.unlink()
raise
try:
with open(sub_item, 'r', encoding='utf-8') as f:
new_settings = json.load(f)
# json5 natively supports comments and trailing commas (JSONC)
new_settings = json5.load(f)
if dest_file.exists():
merged = merge_json_files(dest_file, new_settings, verbose=verbose and not tracker)
with open(dest_file, 'w', encoding='utf-8') as f:
json.dump(merged, f, indent=4)
f.write('\n')
log("Merged:", "green")
if merged is not None:
atomic_write_json(dest_file, merged)
log("Merged:", "green")
log("Note: comments/trailing commas are normalized when rewritten", "yellow")
else:
log("Skipped merge (preserved existing settings)", "yellow")
else:
shutil.copy2(sub_item, dest_file)
log("Copied (no existing settings.json):", "blue")
except Exception as e:
log(f"Warning: Could not merge, copying instead: {e}", "yellow")
shutil.copy2(sub_item, dest_file)
log(f"Warning: Could not merge settings: {e}", "yellow")
if not dest_file.exists():
shutil.copy2(sub_item, dest_file)
def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict:
def merge_json_files(existing_path: Path, new_content: Any, verbose: bool = False) -> Optional[dict[str, Any]]:
"""Merge new JSON content into existing JSON file.
Performs a deep merge where:
Performs a polite deep merge where:
- New keys are added
- Existing keys are preserved unless overwritten by new content
- Nested dictionaries are merged recursively
- Lists and other values are replaced (not merged)
- Existing keys are preserved (not overwritten) unless both values are dictionaries
- Nested dictionaries are merged recursively only when both sides are dictionaries
- Lists and other values are preserved from base if they exist
Args:
existing_path: Path to existing JSON file
@@ -692,28 +740,64 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal
verbose: Whether to print merge details
Returns:
Merged JSON content as dict
Merged JSON content as dict, or None if the existing file should be left untouched.
"""
try:
with open(existing_path, 'r', encoding='utf-8') as f:
existing_content = json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
# If file doesn't exist or is invalid, just use new content
# Load existing content first to have a safe fallback
existing_content = None
exists = existing_path.exists()
if exists:
try:
with open(existing_path, 'r', encoding='utf-8') as f:
# Handle comments (JSONC) natively with json5
# Note: json5 handles BOM automatically
existing_content = json5.load(f)
except FileNotFoundError:
# Handle race condition where file is deleted after exists() check
exists = False
except Exception as e:
if verbose:
console.print(f"[yellow]Warning: Could not read or parse existing JSON in {existing_path.name} ({e}).[/yellow]")
# Skip merge to preserve existing file if unparseable or inaccessible (e.g. PermissionError)
return None
# Validate template content
if not isinstance(new_content, dict):
if verbose:
console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Preserving existing settings.[/yellow]")
return None
if not exists:
return new_content
def deep_merge(base: dict, update: dict) -> dict:
"""Recursively merge update dict into base dict."""
# If existing content parsed but is not a dict, skip merge to avoid data loss
if not isinstance(existing_content, dict):
if verbose:
console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object. Skipping merge to avoid data loss.[/yellow]")
return None
def deep_merge_polite(base: dict[str, Any], update: dict[str, Any]) -> dict[str, Any]:
"""Recursively merge update dict into base dict, preserving base values."""
result = base.copy()
for key, value in update.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
# Recursively merge nested dictionaries
result[key] = deep_merge(result[key], value)
else:
# Add new key or replace existing value
if key not in result:
# Add new key
result[key] = value
elif isinstance(result[key], dict) and isinstance(value, dict):
# Recursively merge nested dictionaries
result[key] = deep_merge_polite(result[key], value)
else:
# Key already exists and values are not both dicts; preserve existing value.
# This ensures user settings aren't overwritten by template defaults.
pass
return result
merged = deep_merge(existing_content, new_content)
merged = deep_merge_polite(existing_content, new_content)
# Detect if anything actually changed. If not, return None so the caller
# can skip rewriting the file (preserving user's comments/formatting).
if merged == existing_content:
return None
if verbose:
console.print(f"[cyan]Merged JSON file:[/cyan] {existing_path.name}")

190
tests/test_merge.py Normal file
View File

@@ -0,0 +1,190 @@
import stat
from specify_cli import merge_json_files
from specify_cli import handle_vscode_settings
# --- Dimension 2: Polite Deep Merge Strategy ---
def test_merge_json_files_type_mismatch_preservation(tmp_path):
"""If user has a string but template wants a dict, PRESERVE user's string."""
existing_file = tmp_path / "settings.json"
# User might have overridden a setting with a simple string or different type
existing_file.write_text('{"chat.editor.fontFamily": "CustomFont"}')
# Template might expect a dict for the same key (hypothetically)
new_settings = {
"chat.editor.fontFamily": {"font": "TemplateFont"}
}
merged = merge_json_files(existing_file, new_settings)
# Result is None because user settings were preserved and nothing else changed
assert merged is None
def test_merge_json_files_deep_nesting(tmp_path):
"""Verify deep recursive merging of new keys."""
existing_file = tmp_path / "settings.json"
existing_file.write_text("""
{
"a": {
"b": {
"c": 1
}
}
}
""")
new_settings = {
"a": {
"b": {
"d": 2 # New nested key
},
"e": 3 # New mid-level key
}
}
merged = merge_json_files(existing_file, new_settings)
assert merged["a"]["b"]["c"] == 1
assert merged["a"]["b"]["d"] == 2
assert merged["a"]["e"] == 3
def test_merge_json_files_empty_existing(tmp_path):
"""Merging into an empty/new file."""
existing_file = tmp_path / "empty.json"
existing_file.write_text("{}")
new_settings = {"a": 1}
merged = merge_json_files(existing_file, new_settings)
assert merged == {"a": 1}
# --- Dimension 3: Real-world Simulation ---
def test_merge_vscode_realistic_scenario(tmp_path):
"""A realistic VSCode settings.json with many existing preferences, comments, and trailing commas."""
existing_file = tmp_path / "vscode_settings.json"
existing_file.write_text("""
{
"editor.fontSize": 12,
"editor.formatOnSave": true, /* block comment */
"files.exclude": {
"**/.git": true,
"**/node_modules": true,
},
"chat.promptFilesRecommendations": {
"existing.tool": true,
} // User comment
}
""")
template_settings = {
"chat.promptFilesRecommendations": {
"speckit.specify": True,
"speckit.plan": True
},
"chat.tools.terminal.autoApprove": {
".specify/scripts/bash/": True
}
}
merged = merge_json_files(existing_file, template_settings)
# Check preservation
assert merged["editor.fontSize"] == 12
assert merged["files.exclude"]["**/.git"] is True
assert merged["chat.promptFilesRecommendations"]["existing.tool"] is True
# Check additions
assert merged["chat.promptFilesRecommendations"]["speckit.specify"] is True
assert merged["chat.tools.terminal.autoApprove"][".specify/scripts/bash/"] is True
# --- Dimension 4: Error Handling & Robustness ---
def test_merge_json_files_with_bom(tmp_path):
"""Test files with UTF-8 BOM (sometimes created on Windows)."""
existing_file = tmp_path / "bom.json"
content = '{"a": 1}'
# Prepend UTF-8 BOM
existing_file.write_bytes(b'\xef\xbb\xbf' + content.encode('utf-8'))
new_settings = {"b": 2}
merged = merge_json_files(existing_file, new_settings)
assert merged == {"a": 1, "b": 2}
def test_merge_json_files_not_a_dictionary_template(tmp_path):
"""If for some reason new_content is not a dict, PRESERVE existing settings by returning None."""
existing_file = tmp_path / "ok.json"
existing_file.write_text('{"a": 1}')
# Secure fallback: return None to skip writing and avoid clobbering
assert merge_json_files(existing_file, ["not", "a", "dict"]) is None
def test_merge_json_files_unparseable_existing(tmp_path):
"""If the existing file is unparseable JSON, return None to avoid overwriting it."""
bad_file = tmp_path / "bad.json"
bad_file.write_text('{"a": 1, missing_value}') # Invalid JSON
assert merge_json_files(bad_file, {"b": 2}) is None
def test_merge_json_files_list_preservation(tmp_path):
"""Verify that existing list values are preserved and NOT merged or overwritten."""
existing_file = tmp_path / "list.json"
existing_file.write_text('{"my.list": ["user_item"]}')
template_settings = {
"my.list": ["template_item"]
}
merged = merge_json_files(existing_file, template_settings)
# The polite merge policy says: keep existing values if they exist and aren't both dicts.
# Since nothing changed, it returns None.
assert merged is None
def test_merge_json_files_no_changes(tmp_path):
"""If the merge doesn't introduce any new keys or changes, return None to skip rewrite."""
existing_file = tmp_path / "no_change.json"
existing_file.write_text('{"a": 1, "b": {"c": 2}}')
template_settings = {
"a": 1, # Already exists
"b": {"c": 2} # Already exists nested
}
# Should return None because result == existing
assert merge_json_files(existing_file, template_settings) is None
def test_merge_json_files_type_mismatch_no_op(tmp_path):
"""If a key exists with different type and we preserve it, it might still result in no change."""
existing_file = tmp_path / "mismatch_no_op.json"
existing_file.write_text('{"a": "user_string"}')
template_settings = {
"a": {"key": "template_dict"} # Mismatch, will be ignored
}
# Should return None because we preserved the user's string and nothing else changed
assert merge_json_files(existing_file, template_settings) is None
def test_handle_vscode_settings_preserves_mode_on_atomic_write(tmp_path):
"""Atomic rewrite should preserve existing file mode bits."""
vscode_dir = tmp_path / ".vscode"
vscode_dir.mkdir()
dest_file = vscode_dir / "settings.json"
template_file = tmp_path / "template_settings.json"
dest_file.write_text('{"a": 1}\n', encoding="utf-8")
dest_file.chmod(0o640)
before_mode = stat.S_IMODE(dest_file.stat().st_mode)
template_file.write_text('{"b": 2}\n', encoding="utf-8")
handle_vscode_settings(
template_file,
dest_file,
"settings.json",
verbose=False,
tracker=None,
)
after_mode = stat.S_IMODE(dest_file.stat().st_mode)
assert after_mode == before_mode