feat(cli): polite deep merge for settings.json and support JSONC (#1874)

* feat(cli): polite deep merge for settings.json with json5 and safe atomic write

* fix(cli): prevent temp fd leak and align merge-policy docs
This commit is contained in:
Hamilton Snow
2026-03-17 22:51:13 +08:00
committed by GitHub
parent d2ecf6560d
commit c12b8c1763
4 changed files with 303 additions and 27 deletions

View File

@@ -35,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ### Added
- feat(cli): polite deep merge for VSCode settings.json with JSONC support via `json5` and zero-data-loss fallbacks
- feat(presets): Pluggable preset system with preset catalog and template resolver - feat(presets): Pluggable preset system with preset catalog and template resolver
- Preset manifest (`preset.yml`) with validation for artifact, command, and script types - Preset manifest (`preset.yml`) with validation for artifact, command, and script types
- `PresetManifest`, `PresetRegistry`, `PresetManager`, `PresetCatalog`, `PresetResolver` classes in `src/specify_cli/presets.py` - `PresetManifest`, `PresetRegistry`, `PresetManager`, `PresetCatalog`, `PresetResolver` classes in `src/specify_cli/presets.py`

View File

@@ -14,6 +14,7 @@ dependencies = [
"pyyaml>=6.0", "pyyaml>=6.0",
"packaging>=23.0", "packaging>=23.0",
"pathspec>=0.12.0", "pathspec>=0.12.0",
"json5>=0.13.0",
] ]
[project.scripts] [project.scripts]

View File

@@ -7,6 +7,7 @@
# "platformdirs", # "platformdirs",
# "readchar", # "readchar",
# "httpx", # "httpx",
# "json5",
# ] # ]
# /// # ///
""" """
@@ -32,6 +33,8 @@ import tempfile
import shutil import shutil
import shlex import shlex
import json import json
import json5
import stat
import yaml import yaml
from pathlib import Path from pathlib import Path
from typing import Any, Optional, Tuple from typing import Any, Optional, Tuple
@@ -654,37 +657,82 @@ def init_git_repo(project_path: Path, quiet: bool = False) -> Tuple[bool, Option
os.chdir(original_cwd) os.chdir(original_cwd)
def handle_vscode_settings(sub_item, dest_file, rel_path, verbose=False, tracker=None) -> None: def handle_vscode_settings(sub_item, dest_file, rel_path, verbose=False, tracker=None) -> None:
"""Handle merging or copying of .vscode/settings.json files.""" """Handle merging or copying of .vscode/settings.json files.
Note: when merge produces changes, rewritten output is normalized JSON and
existing JSONC comments/trailing commas are not preserved.
"""
def log(message, color="green"): def log(message, color="green"):
if verbose and not tracker: if verbose and not tracker:
console.print(f"[{color}]{message}[/] {rel_path}") console.print(f"[{color}]{message}[/] {rel_path}")
def atomic_write_json(target_file: Path, payload: dict[str, Any]) -> None:
"""Atomically write JSON while preserving existing mode bits when possible."""
temp_path: Optional[Path] = None
try:
with tempfile.NamedTemporaryFile(
mode='w',
encoding='utf-8',
dir=target_file.parent,
prefix=f"{target_file.name}.",
suffix=".tmp",
delete=False,
) as f:
temp_path = Path(f.name)
json.dump(payload, f, indent=4)
f.write('\n')
if target_file.exists():
try:
existing_stat = target_file.stat()
os.chmod(temp_path, stat.S_IMODE(existing_stat.st_mode))
if hasattr(os, "chown"):
try:
os.chown(temp_path, existing_stat.st_uid, existing_stat.st_gid)
except PermissionError:
# Best-effort owner/group preservation without requiring elevated privileges.
pass
except OSError:
# Best-effort metadata preservation; data safety is prioritized.
pass
os.replace(temp_path, target_file)
except Exception:
if temp_path and temp_path.exists():
temp_path.unlink()
raise
try: try:
with open(sub_item, 'r', encoding='utf-8') as f: with open(sub_item, 'r', encoding='utf-8') as f:
new_settings = json.load(f) # json5 natively supports comments and trailing commas (JSONC)
new_settings = json5.load(f)
if dest_file.exists(): if dest_file.exists():
merged = merge_json_files(dest_file, new_settings, verbose=verbose and not tracker) merged = merge_json_files(dest_file, new_settings, verbose=verbose and not tracker)
with open(dest_file, 'w', encoding='utf-8') as f: if merged is not None:
json.dump(merged, f, indent=4) atomic_write_json(dest_file, merged)
f.write('\n') log("Merged:", "green")
log("Merged:", "green") log("Note: comments/trailing commas are normalized when rewritten", "yellow")
else:
log("Skipped merge (preserved existing settings)", "yellow")
else: else:
shutil.copy2(sub_item, dest_file) shutil.copy2(sub_item, dest_file)
log("Copied (no existing settings.json):", "blue") log("Copied (no existing settings.json):", "blue")
except Exception as e: except Exception as e:
log(f"Warning: Could not merge, copying instead: {e}", "yellow") log(f"Warning: Could not merge settings: {e}", "yellow")
shutil.copy2(sub_item, dest_file) if not dest_file.exists():
shutil.copy2(sub_item, dest_file)
def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict:
def merge_json_files(existing_path: Path, new_content: Any, verbose: bool = False) -> Optional[dict[str, Any]]:
"""Merge new JSON content into existing JSON file. """Merge new JSON content into existing JSON file.
Performs a deep merge where: Performs a polite deep merge where:
- New keys are added - New keys are added
- Existing keys are preserved unless overwritten by new content - Existing keys are preserved (not overwritten) unless both values are dictionaries
- Nested dictionaries are merged recursively - Nested dictionaries are merged recursively only when both sides are dictionaries
- Lists and other values are replaced (not merged) - Lists and other values are preserved from base if they exist
Args: Args:
existing_path: Path to existing JSON file existing_path: Path to existing JSON file
@@ -692,28 +740,64 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal
verbose: Whether to print merge details verbose: Whether to print merge details
Returns: Returns:
Merged JSON content as dict Merged JSON content as dict, or None if the existing file should be left untouched.
""" """
try: # Load existing content first to have a safe fallback
with open(existing_path, 'r', encoding='utf-8') as f: existing_content = None
existing_content = json.load(f) exists = existing_path.exists()
except (FileNotFoundError, json.JSONDecodeError):
# If file doesn't exist or is invalid, just use new content if exists:
try:
with open(existing_path, 'r', encoding='utf-8') as f:
# Handle comments (JSONC) natively with json5
# Note: json5 handles BOM automatically
existing_content = json5.load(f)
except FileNotFoundError:
# Handle race condition where file is deleted after exists() check
exists = False
except Exception as e:
if verbose:
console.print(f"[yellow]Warning: Could not read or parse existing JSON in {existing_path.name} ({e}).[/yellow]")
# Skip merge to preserve existing file if unparseable or inaccessible (e.g. PermissionError)
return None
# Validate template content
if not isinstance(new_content, dict):
if verbose:
console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Preserving existing settings.[/yellow]")
return None
if not exists:
return new_content return new_content
def deep_merge(base: dict, update: dict) -> dict: # If existing content parsed but is not a dict, skip merge to avoid data loss
"""Recursively merge update dict into base dict.""" if not isinstance(existing_content, dict):
if verbose:
console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object. Skipping merge to avoid data loss.[/yellow]")
return None
def deep_merge_polite(base: dict[str, Any], update: dict[str, Any]) -> dict[str, Any]:
"""Recursively merge update dict into base dict, preserving base values."""
result = base.copy() result = base.copy()
for key, value in update.items(): for key, value in update.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict): if key not in result:
# Recursively merge nested dictionaries # Add new key
result[key] = deep_merge(result[key], value)
else:
# Add new key or replace existing value
result[key] = value result[key] = value
elif isinstance(result[key], dict) and isinstance(value, dict):
# Recursively merge nested dictionaries
result[key] = deep_merge_polite(result[key], value)
else:
# Key already exists and values are not both dicts; preserve existing value.
# This ensures user settings aren't overwritten by template defaults.
pass
return result return result
merged = deep_merge(existing_content, new_content) merged = deep_merge_polite(existing_content, new_content)
# Detect if anything actually changed. If not, return None so the caller
# can skip rewriting the file (preserving user's comments/formatting).
if merged == existing_content:
return None
if verbose: if verbose:
console.print(f"[cyan]Merged JSON file:[/cyan] {existing_path.name}") console.print(f"[cyan]Merged JSON file:[/cyan] {existing_path.name}")

190
tests/test_merge.py Normal file
View File

@@ -0,0 +1,190 @@
import stat
from specify_cli import merge_json_files
from specify_cli import handle_vscode_settings
# --- Dimension 2: Polite Deep Merge Strategy ---
def test_merge_json_files_type_mismatch_preservation(tmp_path):
"""If user has a string but template wants a dict, PRESERVE user's string."""
existing_file = tmp_path / "settings.json"
# User might have overridden a setting with a simple string or different type
existing_file.write_text('{"chat.editor.fontFamily": "CustomFont"}')
# Template might expect a dict for the same key (hypothetically)
new_settings = {
"chat.editor.fontFamily": {"font": "TemplateFont"}
}
merged = merge_json_files(existing_file, new_settings)
# Result is None because user settings were preserved and nothing else changed
assert merged is None
def test_merge_json_files_deep_nesting(tmp_path):
"""Verify deep recursive merging of new keys."""
existing_file = tmp_path / "settings.json"
existing_file.write_text("""
{
"a": {
"b": {
"c": 1
}
}
}
""")
new_settings = {
"a": {
"b": {
"d": 2 # New nested key
},
"e": 3 # New mid-level key
}
}
merged = merge_json_files(existing_file, new_settings)
assert merged["a"]["b"]["c"] == 1
assert merged["a"]["b"]["d"] == 2
assert merged["a"]["e"] == 3
def test_merge_json_files_empty_existing(tmp_path):
"""Merging into an empty/new file."""
existing_file = tmp_path / "empty.json"
existing_file.write_text("{}")
new_settings = {"a": 1}
merged = merge_json_files(existing_file, new_settings)
assert merged == {"a": 1}
# --- Dimension 3: Real-world Simulation ---
def test_merge_vscode_realistic_scenario(tmp_path):
"""A realistic VSCode settings.json with many existing preferences, comments, and trailing commas."""
existing_file = tmp_path / "vscode_settings.json"
existing_file.write_text("""
{
"editor.fontSize": 12,
"editor.formatOnSave": true, /* block comment */
"files.exclude": {
"**/.git": true,
"**/node_modules": true,
},
"chat.promptFilesRecommendations": {
"existing.tool": true,
} // User comment
}
""")
template_settings = {
"chat.promptFilesRecommendations": {
"speckit.specify": True,
"speckit.plan": True
},
"chat.tools.terminal.autoApprove": {
".specify/scripts/bash/": True
}
}
merged = merge_json_files(existing_file, template_settings)
# Check preservation
assert merged["editor.fontSize"] == 12
assert merged["files.exclude"]["**/.git"] is True
assert merged["chat.promptFilesRecommendations"]["existing.tool"] is True
# Check additions
assert merged["chat.promptFilesRecommendations"]["speckit.specify"] is True
assert merged["chat.tools.terminal.autoApprove"][".specify/scripts/bash/"] is True
# --- Dimension 4: Error Handling & Robustness ---
def test_merge_json_files_with_bom(tmp_path):
"""Test files with UTF-8 BOM (sometimes created on Windows)."""
existing_file = tmp_path / "bom.json"
content = '{"a": 1}'
# Prepend UTF-8 BOM
existing_file.write_bytes(b'\xef\xbb\xbf' + content.encode('utf-8'))
new_settings = {"b": 2}
merged = merge_json_files(existing_file, new_settings)
assert merged == {"a": 1, "b": 2}
def test_merge_json_files_not_a_dictionary_template(tmp_path):
"""If for some reason new_content is not a dict, PRESERVE existing settings by returning None."""
existing_file = tmp_path / "ok.json"
existing_file.write_text('{"a": 1}')
# Secure fallback: return None to skip writing and avoid clobbering
assert merge_json_files(existing_file, ["not", "a", "dict"]) is None
def test_merge_json_files_unparseable_existing(tmp_path):
"""If the existing file is unparseable JSON, return None to avoid overwriting it."""
bad_file = tmp_path / "bad.json"
bad_file.write_text('{"a": 1, missing_value}') # Invalid JSON
assert merge_json_files(bad_file, {"b": 2}) is None
def test_merge_json_files_list_preservation(tmp_path):
"""Verify that existing list values are preserved and NOT merged or overwritten."""
existing_file = tmp_path / "list.json"
existing_file.write_text('{"my.list": ["user_item"]}')
template_settings = {
"my.list": ["template_item"]
}
merged = merge_json_files(existing_file, template_settings)
# The polite merge policy says: keep existing values if they exist and aren't both dicts.
# Since nothing changed, it returns None.
assert merged is None
def test_merge_json_files_no_changes(tmp_path):
"""If the merge doesn't introduce any new keys or changes, return None to skip rewrite."""
existing_file = tmp_path / "no_change.json"
existing_file.write_text('{"a": 1, "b": {"c": 2}}')
template_settings = {
"a": 1, # Already exists
"b": {"c": 2} # Already exists nested
}
# Should return None because result == existing
assert merge_json_files(existing_file, template_settings) is None
def test_merge_json_files_type_mismatch_no_op(tmp_path):
"""If a key exists with different type and we preserve it, it might still result in no change."""
existing_file = tmp_path / "mismatch_no_op.json"
existing_file.write_text('{"a": "user_string"}')
template_settings = {
"a": {"key": "template_dict"} # Mismatch, will be ignored
}
# Should return None because we preserved the user's string and nothing else changed
assert merge_json_files(existing_file, template_settings) is None
def test_handle_vscode_settings_preserves_mode_on_atomic_write(tmp_path):
"""Atomic rewrite should preserve existing file mode bits."""
vscode_dir = tmp_path / ".vscode"
vscode_dir.mkdir()
dest_file = vscode_dir / "settings.json"
template_file = tmp_path / "template_settings.json"
dest_file.write_text('{"a": 1}\n', encoding="utf-8")
dest_file.chmod(0o640)
before_mode = stat.S_IMODE(dest_file.stat().st_mode)
template_file.write_text('{"b": 2}\n', encoding="utf-8")
handle_vscode_settings(
template_file,
dest_file,
"settings.json",
verbose=False,
tracker=None,
)
after_mode = stat.S_IMODE(dest_file.stat().st_mode)
assert after_mode == before_mode