feat: improve URL accessibility checks and download handling

- Enhanced the URL accessibility check function to handle multiple redirect types and provide detailed feedback on accessibility status, including content type validation.
- Updated the download function to follow redirects correctly and ensure proper error handling, improving the reliability of downloading source archives from GitHub.
- Adjusted the main function to utilize the final URLs after redirects for downloading, ensuring accurate resource retrieval.
This commit is contained in:
Cody Seibert
2025-12-13 01:03:26 -05:00
committed by Kacper
parent 8709b5d34b
commit f71533ab17

View File

@@ -68,58 +68,131 @@ function findArtifacts(dir, pattern) {
async function checkUrlAccessible(url, maxRetries = 10, initialDelay = 1000) { async function checkUrlAccessible(url, maxRetries = 10, initialDelay = 1000) {
for (let attempt = 0; attempt < maxRetries; attempt++) { for (let attempt = 0; attempt < maxRetries; attempt++) {
try { try {
const isAccessible = await new Promise((resolve, reject) => { const result = await new Promise((resolve, reject) => {
const request = https.get(url, { timeout: 10000 }, (response) => { const request = https.get(url, { timeout: 10000 }, (response) => {
// Follow redirects immediately (no retry needed for redirects) const statusCode = response.statusCode;
if (response.statusCode === 302 || response.statusCode === 301) {
response.destroy(); // Follow redirects
if (
statusCode === 302 ||
statusCode === 301 ||
statusCode === 307 ||
statusCode === 308
) {
const redirectUrl = response.headers.location; const redirectUrl = response.headers.location;
// Recursively check the redirect URL, but only once (no retries) response.destroy();
if (!redirectUrl) {
resolve({
accessible: false,
statusCode,
error: "Redirect without location header",
});
return;
}
// Follow the redirect URL
return https return https
.get(redirectUrl, { timeout: 10000 }, (redirectResponse) => { .get(redirectUrl, { timeout: 10000 }, (redirectResponse) => {
const redirectStatus = redirectResponse.statusCode;
const contentType =
redirectResponse.headers["content-type"] || "";
// Check if it's actually a file (zip/tar.gz) and not HTML
const isFile =
contentType.includes("application/zip") ||
contentType.includes("application/gzip") ||
contentType.includes("application/x-gzip") ||
contentType.includes("application/x-tar") ||
redirectUrl.includes(".zip") ||
redirectUrl.includes(".tar.gz");
const isGood = const isGood =
redirectResponse.statusCode >= 200 && redirectStatus >= 200 && redirectStatus < 300 && isFile;
redirectResponse.statusCode < 300;
redirectResponse.destroy(); redirectResponse.destroy();
resolve(isGood); resolve({
accessible: isGood,
statusCode: redirectStatus,
finalUrl: redirectUrl,
contentType,
});
})
.on("error", (error) => {
resolve({
accessible: false,
statusCode,
error: error.message,
});
}) })
.on("error", () => resolve(false))
.on("timeout", function () { .on("timeout", function () {
this.destroy(); this.destroy();
resolve(false); resolve({
accessible: false,
statusCode,
error: "Timeout following redirect",
});
}); });
} }
// Check if status is good (200-299 range)
const isGood = // Check if status is good (200-299 range) and it's actually a file
response.statusCode >= 200 && response.statusCode < 300; const contentType = response.headers["content-type"] || "";
const isFile =
contentType.includes("application/zip") ||
contentType.includes("application/gzip") ||
contentType.includes("application/x-gzip") ||
contentType.includes("application/x-tar") ||
url.includes(".zip") ||
url.includes(".tar.gz");
const isGood = statusCode >= 200 && statusCode < 300 && isFile;
response.destroy(); response.destroy();
resolve(isGood); resolve({ accessible: isGood, statusCode, contentType });
}); });
request.on("error", (error) => { request.on("error", (error) => {
resolve(false); resolve({
accessible: false,
statusCode: null,
error: error.message,
});
}); });
request.on("timeout", () => { request.on("timeout", () => {
request.destroy(); request.destroy();
resolve(false); resolve({
accessible: false,
statusCode: null,
error: "Request timeout",
});
}); });
}); });
if (isAccessible) { if (result.accessible) {
if (attempt > 0) { if (attempt > 0) {
console.log(`URL ${url} is now accessible after ${attempt} retries`); console.log(
`✓ URL ${url} is now accessible after ${attempt} retries (status: ${result.statusCode})`
);
} else {
console.log(
`✓ URL ${url} is accessible (status: ${result.statusCode})`
);
} }
return true; return result.finalUrl || url; // Return the final URL (after redirects) if available
} else {
const errorMsg = result.error ? ` - ${result.error}` : "";
const statusMsg = result.statusCode
? ` (status: ${result.statusCode})`
: "";
const contentTypeMsg = result.contentType
? ` [content-type: ${result.contentType}]`
: "";
console.log(
`✗ URL ${url} not accessible${statusMsg}${contentTypeMsg}${errorMsg}`
);
} }
} catch (error) { } catch (error) {
// Continue to retry console.log(`✗ URL ${url} check failed: ${error.message}`);
} }
if (attempt < maxRetries - 1) { if (attempt < maxRetries - 1) {
const delay = initialDelay * Math.pow(2, attempt); const delay = initialDelay * Math.pow(2, attempt);
console.log( console.log(
`URL ${url} not accessible yet (attempt ${ ` Retrying in ${delay}ms... (attempt ${attempt + 1}/${maxRetries})`
attempt + 1
}/${maxRetries}), retrying in ${delay}ms...`
); );
await new Promise((resolve) => setTimeout(resolve, delay)); await new Promise((resolve) => setTimeout(resolve, delay));
} }
@@ -130,31 +203,59 @@ async function checkUrlAccessible(url, maxRetries = 10, initialDelay = 1000) {
async function downloadFromGitHub(url, outputPath) { async function downloadFromGitHub(url, outputPath) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
https const request = https.get(url, { timeout: 30000 }, (response) => {
.get(url, (response) => { const statusCode = response.statusCode;
if (response.statusCode === 302 || response.statusCode === 301) {
// Follow redirect // Follow redirects (all redirect types)
return downloadFromGitHub(response.headers.location, outputPath) if (
.then(resolve) statusCode === 301 ||
.catch(reject); statusCode === 302 ||
} statusCode === 307 ||
if (response.statusCode !== 200) { statusCode === 308
reject( ) {
new Error( const redirectUrl = response.headers.location;
`Failed to download ${url}: ${response.statusCode} ${response.statusMessage}` response.destroy();
) if (!redirectUrl) {
); reject(new Error(`Redirect without location header for ${url}`));
return; return;
} }
const fileStream = fs.createWriteStream(outputPath); // Resolve relative redirects
response.pipe(fileStream); const finalRedirectUrl = redirectUrl.startsWith("http")
fileStream.on("finish", () => { ? redirectUrl
fileStream.close(); : new URL(redirectUrl, url).href;
resolve(); console.log(` Following redirect: ${finalRedirectUrl}`);
}); return downloadFromGitHub(finalRedirectUrl, outputPath)
fileStream.on("error", reject); .then(resolve)
}) .catch(reject);
.on("error", reject); }
if (statusCode !== 200) {
response.destroy();
reject(
new Error(
`Failed to download ${url}: ${statusCode} ${response.statusMessage}`
)
);
return;
}
const fileStream = fs.createWriteStream(outputPath);
response.pipe(fileStream);
fileStream.on("finish", () => {
fileStream.close();
resolve();
});
fileStream.on("error", (error) => {
response.destroy();
reject(error);
});
});
request.on("error", reject);
request.on("timeout", () => {
request.destroy();
reject(new Error(`Request timeout for ${url}`));
});
}); });
} }
@@ -179,12 +280,13 @@ async function main() {
console.log(` TAR.GZ: ${githubTarGzUrl}`); console.log(` TAR.GZ: ${githubTarGzUrl}`);
// Wait for archives to be accessible with exponential backoff // Wait for archives to be accessible with exponential backoff
await checkUrlAccessible(githubZipUrl); // This returns the final URL after following redirects
await checkUrlAccessible(githubTarGzUrl); const finalZipUrl = await checkUrlAccessible(githubZipUrl);
const finalTarGzUrl = await checkUrlAccessible(githubTarGzUrl);
console.log(`Downloading source archives from GitHub...`); console.log(`Downloading source archives from GitHub...`);
await downloadFromGitHub(githubZipUrl, sourceZipPath); await downloadFromGitHub(finalZipUrl, sourceZipPath);
await downloadFromGitHub(githubTarGzUrl, sourceTarGzPath); await downloadFromGitHub(finalTarGzUrl, sourceTarGzPath);
console.log(`Downloaded source archives successfully`); console.log(`Downloaded source archives successfully`);