Flatten venv & many other bins dir fix (#408)
* added .venv to ignore list of flattener * more files pattern to ignore --------- Co-authored-by: Lior Assouline <Lior.Assouline@harmonicinc.com>
This commit is contained in:
@@ -45,6 +45,20 @@ async function discoverFiles(rootDir) {
|
|||||||
'.env.*',
|
'.env.*',
|
||||||
'*.env',
|
'*.env',
|
||||||
'.config',
|
'.config',
|
||||||
|
'.venv/**',
|
||||||
|
'*/.venv/**',
|
||||||
|
'**/.venv/**',
|
||||||
|
'.venv',
|
||||||
|
'venv/**',
|
||||||
|
'*/venv/**',
|
||||||
|
'**/venv/**',
|
||||||
|
'venv',
|
||||||
|
'env/**',
|
||||||
|
'*/env/**',
|
||||||
|
'**/env/**',
|
||||||
|
'virtualenv/**',
|
||||||
|
'*/virtualenv/**',
|
||||||
|
'**/virtualenv/**',
|
||||||
|
|
||||||
// Logs
|
// Logs
|
||||||
'logs/**',
|
'logs/**',
|
||||||
@@ -113,6 +127,9 @@ async function discoverFiles(rootDir) {
|
|||||||
'*.so',
|
'*.so',
|
||||||
'*.dll',
|
'*.dll',
|
||||||
'*.exe',
|
'*.exe',
|
||||||
|
'lib64/**',
|
||||||
|
'**/.venv/lib64/**',
|
||||||
|
'**/venv/lib64/**',
|
||||||
|
|
||||||
// Documentation build
|
// Documentation build
|
||||||
'_site/**',
|
'_site/**',
|
||||||
@@ -129,13 +146,30 @@ async function discoverFiles(rootDir) {
|
|||||||
...commonIgnorePatterns
|
...commonIgnorePatterns
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// Add specific patterns for commonly ignored directories and files
|
||||||
|
const additionalGlobIgnores = [
|
||||||
|
// Virtual environments
|
||||||
|
'**/.venv/**', '**/venv/**', '**/.virtualenv/**', '**/virtualenv/**',
|
||||||
|
// Node modules
|
||||||
|
'**/node_modules/**',
|
||||||
|
// Python cache
|
||||||
|
'**/__pycache__/**', '**/*.pyc', '**/*.pyo', '**/*.pyd',
|
||||||
|
// Binary and media files
|
||||||
|
'**/*.jpg', '**/*.jpeg', '**/*.png', '**/*.gif', '**/*.bmp', '**/*.ico', '**/*.svg',
|
||||||
|
'**/*.pdf', '**/*.doc', '**/*.docx', '**/*.xls', '**/*.xlsx', '**/*.ppt', '**/*.pptx',
|
||||||
|
'**/*.zip', '**/*.tar', '**/*.gz', '**/*.rar', '**/*.7z',
|
||||||
|
'**/*.exe', '**/*.dll', '**/*.so', '**/*.dylib',
|
||||||
|
'**/*.mp3', '**/*.mp4', '**/*.avi', '**/*.mov', '**/*.wav',
|
||||||
|
'**/*.ttf', '**/*.otf', '**/*.woff', '**/*.woff2'
|
||||||
|
];
|
||||||
|
|
||||||
// Use glob to recursively find all files, excluding common ignore patterns
|
// Use glob to recursively find all files, excluding common ignore patterns
|
||||||
const files = await glob('**/*', {
|
const files = await glob('**/*', {
|
||||||
cwd: rootDir,
|
cwd: rootDir,
|
||||||
nodir: true, // Only files, not directories
|
nodir: true, // Only files, not directories
|
||||||
dot: true, // Include hidden files
|
dot: true, // Include hidden files
|
||||||
follow: false, // Don't follow symbolic links
|
follow: false, // Don't follow symbolic links
|
||||||
ignore: combinedIgnores
|
ignore: [...combinedIgnores, ...additionalGlobIgnores]
|
||||||
});
|
});
|
||||||
|
|
||||||
return files.map(file => path.resolve(rootDir, file));
|
return files.map(file => path.resolve(rootDir, file));
|
||||||
@@ -181,7 +215,13 @@ async function parseGitignore(gitignorePath) {
|
|||||||
*/
|
*/
|
||||||
async function isBinaryFile(filePath) {
|
async function isBinaryFile(filePath) {
|
||||||
try {
|
try {
|
||||||
// First check by file extension
|
// First check if the path is a directory
|
||||||
|
const stats = await fs.stat(filePath);
|
||||||
|
if (stats.isDirectory()) {
|
||||||
|
throw new Error(`EISDIR: illegal operation on a directory`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check by file extension
|
||||||
const binaryExtensions = [
|
const binaryExtensions = [
|
||||||
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.ico', '.svg',
|
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.ico', '.svg',
|
||||||
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
|
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
|
||||||
@@ -198,7 +238,6 @@ async function isBinaryFile(filePath) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// For files without clear extensions, try to read a small sample
|
// For files without clear extensions, try to read a small sample
|
||||||
const stats = await fs.stat(filePath);
|
|
||||||
if (stats.size === 0) {
|
if (stats.size === 0) {
|
||||||
return false; // Empty files are considered text
|
return false; // Empty files are considered text
|
||||||
}
|
}
|
||||||
@@ -446,16 +485,46 @@ async function filterFiles(files, rootDir) {
|
|||||||
const gitignorePath = path.join(rootDir, '.gitignore');
|
const gitignorePath = path.join(rootDir, '.gitignore');
|
||||||
const ignorePatterns = await parseGitignore(gitignorePath);
|
const ignorePatterns = await parseGitignore(gitignorePath);
|
||||||
|
|
||||||
if (ignorePatterns.length === 0) {
|
// Add explicit patterns for common directories and files to ignore
|
||||||
return files;
|
const additionalPatterns = [
|
||||||
}
|
// Virtual environments
|
||||||
|
'**/.venv/**', '**/venv/**', '**/env/**', '**/virtualenv/**',
|
||||||
|
'.venv/**', 'venv/**', 'env/**', 'virtualenv/**',
|
||||||
|
'.venv', 'venv', 'env', 'virtualenv',
|
||||||
|
|
||||||
|
// Node modules
|
||||||
|
'**/node_modules/**',
|
||||||
|
'node_modules/**',
|
||||||
|
'node_modules',
|
||||||
|
|
||||||
|
// Python cache
|
||||||
|
'**/__pycache__/**',
|
||||||
|
'__pycache__/**',
|
||||||
|
'__pycache__',
|
||||||
|
'**/*.pyc',
|
||||||
|
'**/*.pyo',
|
||||||
|
'**/*.pyd',
|
||||||
|
|
||||||
|
// Binary and media files
|
||||||
|
'**/*.jpg', '**/*.jpeg', '**/*.png', '**/*.gif', '**/*.bmp', '**/*.ico', '**/*.svg',
|
||||||
|
'**/*.pdf', '**/*.doc', '**/*.docx', '**/*.xls', '**/*.xlsx', '**/*.ppt', '**/*.pptx',
|
||||||
|
'**/*.zip', '**/*.tar', '**/*.gz', '**/*.rar', '**/*.7z',
|
||||||
|
'**/*.exe', '**/*.dll', '**/*.so', '**/*.dylib',
|
||||||
|
'**/*.mp3', '**/*.mp4', '**/*.avi', '**/*.mov', '**/*.wav',
|
||||||
|
'**/*.ttf', '**/*.otf', '**/*.woff', '**/*.woff2'
|
||||||
|
];
|
||||||
|
|
||||||
|
const allIgnorePatterns = [
|
||||||
|
...ignorePatterns,
|
||||||
|
...additionalPatterns
|
||||||
|
];
|
||||||
|
|
||||||
// Convert absolute paths to relative for pattern matching
|
// Convert absolute paths to relative for pattern matching
|
||||||
const relativeFiles = files.map(file => path.relative(rootDir, file));
|
const relativeFiles = files.map(file => path.relative(rootDir, file));
|
||||||
|
|
||||||
// Separate positive and negative patterns
|
// Separate positive and negative patterns
|
||||||
const positivePatterns = ignorePatterns.filter(p => !p.startsWith('!'));
|
const positivePatterns = allIgnorePatterns.filter(p => !p.startsWith('!'));
|
||||||
const negativePatterns = ignorePatterns.filter(p => p.startsWith('!')).map(p => p.slice(1));
|
const negativePatterns = allIgnorePatterns.filter(p => p.startsWith('!')).map(p => p.slice(1));
|
||||||
|
|
||||||
// Filter out files that match ignore patterns
|
// Filter out files that match ignore patterns
|
||||||
const filteredRelative = [];
|
const filteredRelative = [];
|
||||||
@@ -463,22 +532,38 @@ async function filterFiles(files, rootDir) {
|
|||||||
for (const file of relativeFiles) {
|
for (const file of relativeFiles) {
|
||||||
let shouldIgnore = false;
|
let shouldIgnore = false;
|
||||||
|
|
||||||
// First check positive patterns (ignore these files)
|
// First, explicit check for commonly ignored directories and file types
|
||||||
for (const pattern of positivePatterns) {
|
if (
|
||||||
if (minimatch(file, pattern)) {
|
// Check for virtual environments
|
||||||
shouldIgnore = true;
|
file.includes('/.venv/') || file.includes('/venv/') ||
|
||||||
break;
|
file.startsWith('.venv/') || file.startsWith('venv/') ||
|
||||||
}
|
// Check for node_modules
|
||||||
}
|
file.includes('/node_modules/') || file.startsWith('node_modules/') ||
|
||||||
|
// Check for Python cache
|
||||||
// Then check negative patterns (don't ignore these files even if they match positive patterns)
|
file.includes('/__pycache__/') || file.startsWith('__pycache__/') ||
|
||||||
if (shouldIgnore) {
|
file.endsWith('.pyc') || file.endsWith('.pyo') || file.endsWith('.pyd') ||
|
||||||
for (const pattern of negativePatterns) {
|
// Check for common binary file extensions
|
||||||
if (minimatch(file, pattern)) {
|
/\.(jpg|jpeg|png|gif|bmp|ico|svg|pdf|doc|docx|xls|xlsx|ppt|pptx|zip|tar|gz|rar|7z|exe|dll|so|dylib|mp3|mp4|avi|mov|wav|ttf|otf|woff|woff2)$/i.test(file)
|
||||||
shouldIgnore = false;
|
) {
|
||||||
|
shouldIgnore = true;
|
||||||
|
} else {
|
||||||
|
// Check against other patterns
|
||||||
|
for (const pattern of positivePatterns) {
|
||||||
|
if (minimatch(file, pattern, { dot: true })) {
|
||||||
|
shouldIgnore = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Then check negative patterns (don't ignore these files even if they match positive patterns)
|
||||||
|
if (shouldIgnore) {
|
||||||
|
for (const pattern of negativePatterns) {
|
||||||
|
if (minimatch(file, pattern, { dot: true })) {
|
||||||
|
shouldIgnore = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!shouldIgnore) {
|
if (!shouldIgnore) {
|
||||||
@@ -521,6 +606,14 @@ program
|
|||||||
const filteredFiles = await filterFiles(files, inputDir);
|
const filteredFiles = await filterFiles(files, inputDir);
|
||||||
discoverySpinner.succeed(`📁 Found ${filteredFiles.length} files to include`);
|
discoverySpinner.succeed(`📁 Found ${filteredFiles.length} files to include`);
|
||||||
|
|
||||||
|
// Write filteredFiles to temp.txt for debugging XML including unneeded files
|
||||||
|
// const tempFilePath = path.join(process.cwd(), 'temp-filtered-files.txt');
|
||||||
|
// await fs.writeFile(
|
||||||
|
// tempFilePath,
|
||||||
|
// filteredFiles.map(file => `${file}\n${path.relative(inputDir, file)}\n---\n`).join('\n')
|
||||||
|
// );
|
||||||
|
// console.log(`📄 Filtered files written to: ${tempFilePath}`);
|
||||||
|
|
||||||
// Process files with progress tracking
|
// Process files with progress tracking
|
||||||
console.log('Reading file contents');
|
console.log('Reading file contents');
|
||||||
const processingSpinner = ora('📄 Processing files...').start();
|
const processingSpinner = ora('📄 Processing files...').start();
|
||||||
|
|||||||
Reference in New Issue
Block a user