/** * Git diff generation utilities */ import { createLogger } from '@automaker/utils'; import { secureFs } from '@automaker/platform'; import path from 'path'; import { exec } from 'child_process'; import { promisify } from 'util'; import { BINARY_EXTENSIONS, type FileStatus, type MergeStateInfo } from './types.js'; import { isGitRepo, parseGitStatus, detectMergeState, detectMergeCommit } from './status.js'; const execAsync = promisify(exec); const logger = createLogger('GitUtils'); // Max file size for generating synthetic diffs (1MB) const MAX_SYNTHETIC_DIFF_SIZE = 1024 * 1024; /** * Check if a file is likely binary based on extension */ function isBinaryFile(filePath: string): boolean { const ext = path.extname(filePath).toLowerCase(); return BINARY_EXTENSIONS.has(ext); } /** * Create a synthetic diff for a new file with the given content lines * This helper reduces duplication in diff generation logic */ function createNewFileDiff(relativePath: string, mode: string, contentLines: string[]): string { const lineCount = contentLines.length; const addedLines = contentLines.map((line) => `+${line}`).join('\n'); return `diff --git a/${relativePath} b/${relativePath} new file mode ${mode} index 0000000..0000000 --- /dev/null +++ b/${relativePath} @@ -0,0 +${lineCount === 1 ? '1' : `1,${lineCount}`} @@ ${addedLines} `; } /** * Generate a synthetic unified diff for an untracked (new) file * This is needed because `git diff HEAD` doesn't include untracked files * * If the path is a directory, this will recursively generate diffs for all files inside */ export async function generateSyntheticDiffForNewFile( basePath: string, relativePath: string ): Promise { // Remove trailing slash if present (git status reports directories with trailing /) const cleanPath = relativePath.endsWith('/') ? relativePath.slice(0, -1) : relativePath; const fullPath = path.join(basePath, cleanPath); try { // Get file stats to check size and type const stats = await secureFs.stat(fullPath); // Check if it's a directory first (before binary check) // This handles edge cases like directories named "images.png/" if (stats.isDirectory()) { const filesInDir = await listAllFilesInDirectory(basePath, cleanPath); if (filesInDir.length === 0) { // Empty directory return createNewFileDiff(cleanPath, '040000', ['[Empty directory]']); } // Generate diffs for all files in the directory sequentially // Using sequential processing to avoid exhausting file descriptors on large directories const diffs: string[] = []; for (const filePath of filesInDir) { diffs.push(await generateSyntheticDiffForNewFile(basePath, filePath)); } return diffs.join(''); } // Check if it's a binary file (after directory check to handle dirs with binary extensions) if (isBinaryFile(cleanPath)) { return `diff --git a/${cleanPath} b/${cleanPath} new file mode 100644 index 0000000..0000000 Binary file ${cleanPath} added `; } const fileSize = Number(stats.size); if (fileSize > MAX_SYNTHETIC_DIFF_SIZE) { const sizeKB = Math.round(fileSize / 1024); return createNewFileDiff(cleanPath, '100644', [`[File too large to display: ${sizeKB}KB]`]); } // Read file content const content = (await secureFs.readFile(fullPath, 'utf-8')) as string; const hasTrailingNewline = content.endsWith('\n'); const lines = content.split('\n'); // Remove trailing empty line if the file ends with newline if (lines.length > 0 && lines[lines.length - 1] === '') { lines.pop(); } // Generate diff format const lineCount = lines.length; const addedLines = lines.map((line) => `+${line}`).join('\n'); let diff = `diff --git a/${cleanPath} b/${cleanPath} new file mode 100644 index 0000000..0000000 --- /dev/null +++ b/${cleanPath} @@ -0,0 +1,${lineCount} @@ ${addedLines}`; // Add "No newline at end of file" indicator if needed if (!hasTrailingNewline && content.length > 0) { diff += '\n\\ No newline at end of file'; } return diff + '\n'; } catch (error) { // Log the error for debugging logger.error(`Failed to generate synthetic diff for ${fullPath}:`, error); // Return a placeholder diff return createNewFileDiff(cleanPath, '100644', ['[Unable to read file content]']); } } /** * Generate synthetic diffs for all untracked files and combine with existing diff */ export async function appendUntrackedFileDiffs( basePath: string, existingDiff: string, files: Array<{ status: string; path: string }> ): Promise { // Find untracked files (status "?") const untrackedFiles = files.filter((f) => f.status === '?'); if (untrackedFiles.length === 0) { return existingDiff; } // Generate synthetic diffs for each untracked file const syntheticDiffs = await Promise.all( untrackedFiles.map((f) => generateSyntheticDiffForNewFile(basePath, f.path)) ); // Combine existing diff with synthetic diffs const combinedDiff = existingDiff + syntheticDiffs.join(''); return combinedDiff; } /** * List all files in a directory recursively (for non-git repositories) * Excludes hidden files/folders and common build artifacts */ export async function listAllFilesInDirectory( basePath: string, relativePath: string = '' ): Promise { const files: string[] = []; const fullPath = path.join(basePath, relativePath); // Directories to skip const skipDirs = new Set([ 'node_modules', '.git', '.automaker', 'dist', 'build', '.next', '.nuxt', '__pycache__', '.cache', 'coverage', '.venv', 'venv', 'target', 'vendor', '.gradle', 'out', 'tmp', '.tmp', ]); try { const entries = await secureFs.readdir(fullPath, { withFileTypes: true }); for (const entry of entries) { // Skip hidden files/folders (except we want to allow some) if (entry.name.startsWith('.') && entry.name !== '.env') { continue; } const entryRelPath = relativePath ? `${relativePath}/${entry.name}` : entry.name; if (entry.isDirectory()) { if (!skipDirs.has(entry.name)) { const subFiles = await listAllFilesInDirectory(basePath, entryRelPath); files.push(...subFiles); } } else if (entry.isFile()) { files.push(entryRelPath); } } } catch (error) { // Log the error to help diagnose file system issues logger.error(`Error reading directory ${fullPath}:`, error); } return files; } /** * Generate diffs for all files in a non-git directory * Treats all files as "new" files */ export async function generateDiffsForNonGitDirectory( basePath: string ): Promise<{ diff: string; files: FileStatus[] }> { const allFiles = await listAllFilesInDirectory(basePath); const files: FileStatus[] = allFiles.map((filePath) => ({ status: '?', path: filePath, statusText: 'New', })); // Generate synthetic diffs for all files const syntheticDiffs = await Promise.all( files.map((f) => generateSyntheticDiffForNewFile(basePath, f.path)) ); return { diff: syntheticDiffs.join(''), files, }; } /** * Get git repository diffs for a given path * Handles both git repos and non-git directories. * Also detects merge state and annotates files accordingly. */ export async function getGitRepositoryDiffs(repoPath: string): Promise<{ diff: string; files: FileStatus[]; hasChanges: boolean; mergeState?: MergeStateInfo; }> { // Check if it's a git repository const isRepo = await isGitRepo(repoPath); if (!isRepo) { // Not a git repo - list all files and treat them as new const result = await generateDiffsForNonGitDirectory(repoPath); return { diff: result.diff, files: result.files, hasChanges: result.files.length > 0, }; } // Get git diff and status const { stdout: diff } = await execAsync('git diff HEAD', { cwd: repoPath, maxBuffer: 10 * 1024 * 1024, }); const { stdout: status } = await execAsync('git status --porcelain', { cwd: repoPath, }); const files = parseGitStatus(status); // Generate synthetic diffs for untracked (new) files let combinedDiff = await appendUntrackedFileDiffs(repoPath, diff, files); // Detect merge state (in-progress merge/rebase/cherry-pick) const mergeState = await detectMergeState(repoPath); // If no in-progress merge, check if HEAD is a completed merge commit // and include merge commit changes in the diff and file list if (!mergeState.isMerging) { const mergeCommitInfo = await detectMergeCommit(repoPath); if (mergeCommitInfo.isMergeCommit && mergeCommitInfo.mergeAffectedFiles.length > 0) { // Get the diff of the merge commit relative to first parent try { const { stdout: mergeDiff } = await execAsync('git diff HEAD~1 HEAD', { cwd: repoPath, maxBuffer: 10 * 1024 * 1024, }); // Add merge-affected files to the file list (avoid duplicates with working tree changes) const fileByPath = new Map(files.map((f) => [f.path, f])); const existingPaths = new Set(fileByPath.keys()); for (const filePath of mergeCommitInfo.mergeAffectedFiles) { if (!existingPaths.has(filePath)) { const newFile = { status: 'M', path: filePath, statusText: 'Merged', indexStatus: ' ', workTreeStatus: ' ', isMergeAffected: true, mergeType: 'merged', }; files.push(newFile); fileByPath.set(filePath, newFile); existingPaths.add(filePath); } else { // Mark existing file as also merge-affected const existing = fileByPath.get(filePath); if (existing) { existing.isMergeAffected = true; existing.mergeType = 'merged'; } } } // Prepend merge diff to the combined diff so merge changes appear // For files that only exist in the merge (not in working tree), we need their diffs if (mergeDiff.trim()) { // Parse the existing working tree diff to find which files it covers const workingTreeDiffPaths = new Set(); const diffLines = combinedDiff.split('\n'); for (const line of diffLines) { if (line.startsWith('diff --git')) { const match = line.match(/diff --git a\/(.*?) b\/(.*)/); if (match) { workingTreeDiffPaths.add(match[2]); } } } // Only include merge diff entries for files NOT already in working tree diff const mergeDiffFiles = mergeDiff.split(/(?=diff --git)/); const newMergeDiffs: string[] = []; for (const fileDiff of mergeDiffFiles) { if (!fileDiff.trim()) continue; const match = fileDiff.match(/diff --git a\/(.*?) b\/(.*)/); if (match && !workingTreeDiffPaths.has(match[2])) { newMergeDiffs.push(fileDiff); } } if (newMergeDiffs.length > 0) { combinedDiff = newMergeDiffs.join('') + combinedDiff; } } } catch (mergeError) { // Best-effort: log and continue without merge diff logger.error('Failed to get merge commit diff:', mergeError); // Ensure files[] is consistent with mergeState.mergeAffectedFiles even when the // diff command failed. Without this, mergeAffectedFiles would list paths that have // no corresponding entry in the files array. const existingPathsAfterError = new Set(files.map((f) => f.path)); for (const filePath of mergeCommitInfo.mergeAffectedFiles) { if (!existingPathsAfterError.has(filePath)) { files.push({ status: 'M', path: filePath, statusText: 'Merged', indexStatus: ' ', workTreeStatus: ' ', isMergeAffected: true, mergeType: 'merged', }); existingPathsAfterError.add(filePath); } else { // Mark existing file as also merge-affected const existing = files.find((f) => f.path === filePath); if (existing) { existing.isMergeAffected = true; existing.mergeType = 'merged'; } } } } // Return with merge commit info in the mergeState return { diff: combinedDiff, files, hasChanges: files.length > 0, mergeState: { isMerging: false, mergeOperationType: 'merge', isCleanMerge: true, mergeAffectedFiles: mergeCommitInfo.mergeAffectedFiles, conflictFiles: [], isMergeCommit: true, }, }; } } return { diff: combinedDiff, files, hasChanges: files.length > 0, ...(mergeState.isMerging ? { mergeState } : {}), }; }