From e551695d17a416341987871989e741f341bee49e Mon Sep 17 00:00:00 2001 From: Yanlong Wang Date: Sat, 8 Mar 2025 18:43:10 +0800 Subject: [PATCH] fix: fail early on special cookie redirects --- src/services/curl.ts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/services/curl.ts b/src/services/curl.ts index acfa0c3..4de2f0d 100644 --- a/src/services/curl.ts +++ b/src/services/curl.ts @@ -294,7 +294,8 @@ export class CurlControl extends AsyncService { } async urlToFile(urlToCrawl: URL, crawlOpts?: CURLScrappingOptions) { - let leftRedirection = 10; + let leftRedirection = 6; + let cookieRedirects = 0; let opts = { ...crawlOpts }; let nextHopUrl = urlToCrawl; const fakeHeaderInfos: HeaderInfo[] = []; @@ -312,10 +313,16 @@ export class CurlControl extends AsyncService { if (parsed.length) { opts.cookies = [...(opts.cookies || []), ...parsed]; } + if (!location) { + cookieRedirects += 1; + } } if (!location && !setCookieHeader) { - throw new AssertionFailureError(`Failed to access ${urlToCrawl}: Bad redirection from ${nextHopUrl}`); + throw new ServiceBadAttemptError(`Failed to access ${urlToCrawl}: Bad redirection from ${nextHopUrl}`); + } + if (!location && cookieRedirects > 1) { + throw new ServiceBadAttemptError(`Failed to access ${urlToCrawl}: Browser required to solve complex cookie preconditions.`); } nextHopUrl = new URL(location || '', nextHopUrl); @@ -331,7 +338,7 @@ export class CurlControl extends AsyncService { }; } while (leftRedirection > 0); - throw new AssertionFailureError(`Failed to access ${urlToCrawl}: Too many redirections.`); + throw new ServiceBadAttemptError(`Failed to access ${urlToCrawl}: Too many redirections.`); } async sideLoad(targetUrl: URL, crawlOpts?: CURLScrappingOptions) {