feat(sitemap): reenable fallback to tlsclient

This commit is contained in:
Móricz Gergő 2025-01-23 08:43:13 +01:00
parent 72198123cb
commit 3761eb17a7

View File

@ -51,45 +51,42 @@ export async function getLinksFromSitemap(
) { ) {
content = fetchResponse.document.rawHtml!; content = fetchResponse.document.rawHtml!;
} else { } else {
// logger.debug( logger.debug(
// "Failed to scrape sitemap via fetch, falling back to TLSClient...", "Failed to scrape sitemap via fetch, falling back to TLSClient...",
// { {
// error: fetchResponse.success error: fetchResponse.success
// ? fetchResponse.document ? fetchResponse.document
// : fetchResponse.error, : fetchResponse.error,
// }, },
// ); );
// const tlsResponse = await scrapeURL( const tlsResponse = await scrapeURL(
// "sitemap", "sitemap",
// sitemapUrl, sitemapUrl,
// scrapeOptions.parse({ formats: ["rawHtml"] }), scrapeOptions.parse({ formats: ["rawHtml"] }),
// { forceEngine: "fire-engine;tlsclient", v0DisableJsDom: true }, { forceEngine: "fire-engine;tlsclient", v0DisableJsDom: true },
// ); );
// if ( if (
// tlsResponse.success && tlsResponse.success &&
// tlsResponse.document.metadata.statusCode >= 200 && tlsResponse.document.metadata.statusCode >= 200 &&
// tlsResponse.document.metadata.statusCode < 300 tlsResponse.document.metadata.statusCode < 300
// ) { ) {
// content = tlsResponse.document.rawHtml!; content = tlsResponse.document.rawHtml!;
// } else { } else {
logger.error( logger.error(
`Request failed for ${sitemapUrl}, ran out of engines!`, `Request failed for ${sitemapUrl}, ran out of engines!`,
{ {
method: "getLinksFromSitemap", method: "getLinksFromSitemap",
mode, mode,
sitemapUrl, sitemapUrl,
// error: tlsResponse.success error: tlsResponse.success
// ? tlsResponse.document ? tlsResponse.document
// : tlsResponse.error, : tlsResponse.error,
error: fetchResponse.success
? fetchResponse.document
: fetchResponse.error,
}, },
); );
return 0; return 0;
// } }
} }
} else { } else {
const fetchResponse = await scrapeURL( const fetchResponse = await scrapeURL(