From 367af9512fa47d9bd41fd9822b81031517b809c0 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 31 Oct 2024 10:53:47 -0300 Subject: [PATCH] added iframe links to extractLinksFromHTML --- apps/api/src/scraper/WebScraper/crawler.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 72a49fd8..1ae166b4 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -333,6 +333,16 @@ export class WebCrawler { } }); + // Extract links from iframes with inline src + $("iframe").each((_, element) => { + const src = $(element).attr("src"); + if (src && src.startsWith("data:text/html")) { + const iframeHtml = decodeURIComponent(src.split(",")[1]); + const iframeLinks = this.extractLinksFromHTML(iframeHtml, url); + links = links.concat(iframeLinks); + } + }); + return links; }