From faf58dfca74c21f0f17d4862c9bda0ba36a34a16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=B3ricz=20Gerg=C5=91?= Date: Fri, 17 Jan 2025 12:41:00 +0100 Subject: [PATCH] fix(removeUnwantedElements): post-includeTags excludeTags Fixes #700 --- apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts b/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts index 92df68f4..57015557 100644 --- a/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts +++ b/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts @@ -54,7 +54,7 @@ export const htmlTransform = ( url: string, scrapeOptions: ScrapeOptions, ) => { - const soup = load(html); + let soup = load(html); // remove unwanted elements if ( @@ -68,7 +68,8 @@ export const htmlTransform = ( newRoot.append(soup(element).clone()); }); }); - return newRoot.html() ?? ""; + + soup = load(newRoot.html() ?? ""); } soup("script, style, noscript, meta, head").remove();