feat(v1/batch/scrape): appendToId

This commit is contained in:
Gergő Móricz 2024-12-04 23:35:29 +01:00
parent 7bde034020
commit 41d859203f
2 changed files with 10 additions and 4 deletions

View File

@ -8,6 +8,7 @@ import {
} from "./types"; } from "./types";
import { import {
addCrawlJobs, addCrawlJobs,
getCrawl,
lockURLs, lockURLs,
saveCrawl, saveCrawl,
StoredCrawl, StoredCrawl,
@ -24,9 +25,11 @@ export async function batchScrapeController(
) { ) {
req.body = batchScrapeRequestSchema.parse(req.body); req.body = batchScrapeRequestSchema.parse(req.body);
const id = uuidv4(); const id = req.body.appendToId ?? uuidv4();
await logCrawl(id, req.auth.team_id); if (!req.body.appendToId) {
await logCrawl(id, req.auth.team_id);
}
let { remainingCredits } = req.account!; let { remainingCredits } = req.account!;
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true'; const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
@ -34,7 +37,7 @@ export async function batchScrapeController(
remainingCredits = Infinity; remainingCredits = Infinity;
} }
const sc: StoredCrawl = { const sc: StoredCrawl = req.body.appendToId ? await getCrawl(req.body.appendToId) as StoredCrawl : {
crawlerOptions: null, crawlerOptions: null,
scrapeOptions: req.body, scrapeOptions: req.body,
internalOptions: {}, internalOptions: {},
@ -43,7 +46,9 @@ export async function batchScrapeController(
plan: req.auth.plan, plan: req.auth.plan,
}; };
await saveCrawl(id, sc); if (!req.body.appendToId) {
await saveCrawl(id, sc);
}
let jobPriority = 20; let jobPriority = 20;

View File

@ -210,6 +210,7 @@ export const batchScrapeRequestSchema = scrapeOptions.extend({
urls: url.array(), urls: url.array(),
origin: z.string().optional().default("api"), origin: z.string().optional().default("api"),
webhook: webhookSchema.optional(), webhook: webhookSchema.optional(),
appendToId: z.string().uuid().optional(),
}).strict(strictMessage).refine( }).strict(strictMessage).refine(
(obj) => { (obj) => {
const hasExtractFormat = obj.formats?.includes("extract"); const hasExtractFormat = obj.formats?.includes("extract");