From 0b8047c7a0c451246ef768120bd8e10971730ba9 Mon Sep 17 00:00:00 2001 From: Gergo Moricz Date: Thu, 18 Jul 2024 19:13:43 +0200 Subject: [PATCH] fix(WebScraper): infinite regex leading to fly.io instance hangs --- apps/api/src/scraper/WebScraper/utils/replacePaths.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/utils/replacePaths.ts b/apps/api/src/scraper/WebScraper/utils/replacePaths.ts index 788916cd..25b43f0a 100644 --- a/apps/api/src/scraper/WebScraper/utils/replacePaths.ts +++ b/apps/api/src/scraper/WebScraper/utils/replacePaths.ts @@ -6,13 +6,13 @@ export const replacePathsWithAbsolutePaths = (documents: Document[]): Document[] const baseUrl = new URL(document.metadata.sourceURL).origin; const paths = document.content.match( - /(!?\[.*?\])\(((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*)\)|href="([^"]+)"/g + /!?\[.*?\]\(.*?\)|href=".+?"/g ) || []; paths.forEach((path: string) => { try { const isImage = path.startsWith("!"); - let matchedUrl = path.match(/\(([^)]+)\)/) || path.match(/href="([^"]+)"/); + let matchedUrl = path.match(/\((.*?)\)/) || path.match(/href="([^"]+)"/); let url = matchedUrl[1]; if (!url.startsWith("data:") && !url.startsWith("http")) { @@ -50,11 +50,11 @@ export const replaceImgPathsWithAbsolutePaths = (documents: Document[]): Documen const baseUrl = new URL(document.metadata.sourceURL).origin; const images = document.content.match( - /!\[.*?\]\(((?:[^()]+|\((?:[^()]+|\([^()]*\))*\))*)\)/g + /!\[.*?\]\(.*?\)/g ) || []; images.forEach((image: string) => { - let imageUrl = image.match(/\(([^)]+)\)/)[1]; + let imageUrl = image.match(/\((.*?)\)/)[1]; let altText = image.match(/\[(.*?)\]/)[1]; if (!imageUrl.startsWith("data:image")) {