From 749d89a551388fea98976cc245b2e9f8a8cd315f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 22 May 2025 02:46:55 +0200 Subject: [PATCH] feat(api/v1/extract): ignoreInvalidURLs (#1585) --- apps/api/src/controllers/v1/extract.ts | 7 ++++++- apps/api/src/controllers/v1/types.ts | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 59f6f8f2..4ffaddfe 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -62,7 +62,9 @@ export async function extractController( const originalRequest = { ...req.body }; req.body = extractRequestSchema.parse(req.body); - if (req.body.urls?.some((url: string) => isUrlBlocked(url, req.acuc?.flags ?? null))) { + const invalidURLs: string[] = req.body.urls?.filter((url: string) => isUrlBlocked(url, req.acuc?.flags ?? null)) ?? []; + + if (invalidURLs.length > 0 && !req.body.ignoreInvalidURLs) { if (!res.headersSent) { return res.status(403).json({ success: false, @@ -144,5 +146,8 @@ export async function extractController( success: true, id: extractId, urlTrace: [], + ...(invalidURLs.length > 0 && req.body.ignoreInvalidURLs ? { + invalidURLs, + } : {}), }); } diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index f4a3134d..020cd563 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -478,6 +478,7 @@ export const extractV1Options = z .optional(), agent: agentOptionsExtract.optional(), __experimental_showCostTracking: z.boolean().default(false), + ignoreInvalidURLs: z.boolean().default(false), }) .strict(strictMessage) .refine((obj) => obj.urls || obj.prompt, {