diff --git a/apps/api/src/controllers/v1/batch-scrape.ts b/apps/api/src/controllers/v1/batch-scrape.ts index 3830b1fe..dd8ab2fb 100644 --- a/apps/api/src/controllers/v1/batch-scrape.ts +++ b/apps/api/src/controllers/v1/batch-scrape.ts @@ -8,6 +8,7 @@ import { } from "./types"; import { addCrawlJobs, + getCrawl, lockURLs, saveCrawl, StoredCrawl, @@ -24,9 +25,11 @@ export async function batchScrapeController( ) { req.body = batchScrapeRequestSchema.parse(req.body); - const id = uuidv4(); + const id = req.body.appendToId ?? uuidv4(); - await logCrawl(id, req.auth.team_id); + if (!req.body.appendToId) { + await logCrawl(id, req.auth.team_id); + } let { remainingCredits } = req.account!; const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true'; @@ -34,7 +37,7 @@ export async function batchScrapeController( remainingCredits = Infinity; } - const sc: StoredCrawl = { + const sc: StoredCrawl = req.body.appendToId ? await getCrawl(req.body.appendToId) as StoredCrawl : { crawlerOptions: null, scrapeOptions: req.body, internalOptions: {}, @@ -43,7 +46,9 @@ export async function batchScrapeController( plan: req.auth.plan, }; - await saveCrawl(id, sc); + if (!req.body.appendToId) { + await saveCrawl(id, sc); + } let jobPriority = 20; diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index da8847a8..f1596f5e 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -210,6 +210,7 @@ export const batchScrapeRequestSchema = scrapeOptions.extend({ urls: url.array(), origin: z.string().optional().default("api"), webhook: webhookSchema.optional(), + appendToId: z.string().uuid().optional(), }).strict(strictMessage).refine( (obj) => { const hasExtractFormat = obj.formats?.includes("extract");