From 3d53f4e213436b62e3905cfdc80f80a49f108cb4 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 23 Aug 2024 13:56:05 -0300 Subject: [PATCH 1/2] Nick: unblocking pin --- .../api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts | 1 - .../scraper/WebScraper/utils/__tests__/socialBlockList.test.ts | 1 - apps/api/src/scraper/WebScraper/utils/blocklist.ts | 1 - 3 files changed, 3 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts index 42525257..77411b00 100644 --- a/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts +++ b/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts @@ -8,7 +8,6 @@ describe('Blocklist Functionality', () => { 'https://twitter.com/home', 'https://instagram.com/explore', 'https://linkedin.com/in/johndoe', - 'https://pinterest.com/pin/create', 'https://snapchat.com/add/johndoe', 'https://tiktok.com/@johndoe', 'https://reddit.com/r/funny', diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts index c09cc5b3..3d98fedf 100644 --- a/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts +++ b/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts @@ -8,7 +8,6 @@ describe('isUrlBlocked', () => { 'https://twitter.com/someuser', 'https://instagram.com/someuser', 'https://www.linkedin.com/in/someuser', - 'https://pinterest.com/someuser', 'https://snapchat.com/someuser', 'https://tiktok.com/@someuser', 'https://reddit.com/r/somesubreddit', diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index 7f1602e1..7b1ee19c 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -6,7 +6,6 @@ const socialMediaBlocklist = [ 'twitter.com', 'instagram.com', 'linkedin.com', - 'pinterest.com', 'snapchat.com', 'tiktok.com', 'reddit.com', From 2ab0dd2e150da1020fb6d30b7aa99118f4d884c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 23 Aug 2024 19:20:17 +0200 Subject: [PATCH 2/2] fix(scrape): add further llm extraction catch --- apps/api/src/controllers/scrape.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index 880cbbfa..0b4df13c 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -1,4 +1,4 @@ -import { ExtractorOptions, PageOptions } from './../lib/entities'; + import { ExtractorOptions, PageOptions } from './../lib/entities'; import { Request, Response } from "express"; import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; import { authenticateUser } from "./auth"; @@ -78,7 +78,7 @@ export async function scrapeHelper( error: "Request timed out", returnCode: 408, } - } else if (typeof e === "string" && (e.includes("Error generating completions: ") || e.includes("Invalid schema for function"))) { + } else if (typeof e === "string" && (e.includes("Error generating completions: ") || e.includes("Invalid schema for function") || e.includes("LLM extraction did not match the extraction schema you provided."))) { return { success: false, error: e,