feat(js-sdk/batch/scrape): add ignoreInvalidURLs option

This commit is contained in:
Gergő Móricz 2024-12-14 01:16:09 +01:00
parent 4b5014d7fe
commit 9cc6576571

View File

@ -183,6 +183,7 @@ export interface BatchScrapeResponse {
url?: string; url?: string;
success: true; success: true;
error?: string; error?: string;
invalidURLs?: string[];
} }
/** /**
@ -576,9 +577,10 @@ export default class FirecrawlApp {
pollInterval: number = 2, pollInterval: number = 2,
idempotencyKey?: string, idempotencyKey?: string,
webhook?: CrawlParams["webhook"], webhook?: CrawlParams["webhook"],
ignoreInvalidURLs?: boolean,
): Promise<BatchScrapeStatusResponse | ErrorResponse> { ): Promise<BatchScrapeStatusResponse | ErrorResponse> {
const headers = this.prepareHeaders(idempotencyKey); const headers = this.prepareHeaders(idempotencyKey);
let jsonData: any = { urls, ...params }; let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
if (jsonData?.extract?.schema) { if (jsonData?.extract?.schema) {
let schema = jsonData.extract.schema; let schema = jsonData.extract.schema;
@ -621,10 +623,12 @@ export default class FirecrawlApp {
async asyncBatchScrapeUrls( async asyncBatchScrapeUrls(
urls: string[], urls: string[],
params?: ScrapeParams, params?: ScrapeParams,
idempotencyKey?: string idempotencyKey?: string,
webhook?: CrawlParams["webhook"],
ignoreInvalidURLs?: boolean,
): Promise<BatchScrapeResponse | ErrorResponse> { ): Promise<BatchScrapeResponse | ErrorResponse> {
const headers = this.prepareHeaders(idempotencyKey); const headers = this.prepareHeaders(idempotencyKey);
let jsonData: any = { urls, ...(params ?? {}) }; let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) };
try { try {
const response: AxiosResponse = await this.postRequest( const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/v1/batch/scrape`, this.apiUrl + `/v1/batch/scrape`,
@ -657,8 +661,10 @@ export default class FirecrawlApp {
urls: string[], urls: string[],
params?: ScrapeParams, params?: ScrapeParams,
idempotencyKey?: string, idempotencyKey?: string,
webhook?: CrawlParams["webhook"],
ignoreInvalidURLs?: boolean,
) { ) {
const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey); const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
if (crawl.success && crawl.id) { if (crawl.success && crawl.id) {
const id = crawl.id; const id = crawl.id;