mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 05:25:57 +08:00
Merge branch 'main' into pr/1003
This commit is contained in:
commit
f7cfbba651
@ -111,6 +111,20 @@ curl -X POST http://localhost:3002/v1/crawl \
|
|||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Alternative: Using Docker Compose
|
||||||
|
|
||||||
|
For a simpler setup, you can use Docker Compose to run all services:
|
||||||
|
|
||||||
|
1. Prerequisites: Make sure you have Docker and Docker Compose installed
|
||||||
|
2. Copy the `.env.example` file to `.env` in the `/apps/api/` directory and configure as needed
|
||||||
|
3. From the root directory, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up
|
||||||
|
```
|
||||||
|
|
||||||
|
This will start Redis, the API server, and workers automatically in the correct configuration.
|
||||||
|
|
||||||
## Tests:
|
## Tests:
|
||||||
|
|
||||||
The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication.
|
The best way to do this is run the test with `npm run test:local-no-auth` if you'd like to run the tests without authentication.
|
||||||
|
2
apps/api/.gitignore
vendored
2
apps/api/.gitignore
vendored
@ -9,3 +9,5 @@ dump.rdb
|
|||||||
|
|
||||||
.rdb
|
.rdb
|
||||||
.sentryclirc
|
.sentryclirc
|
||||||
|
|
||||||
|
.env.*
|
@ -3,7 +3,7 @@ import * as Sentry from "@sentry/node";
|
|||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
|
|
||||||
import { robustFetch } from "../../lib/fetch";
|
import { robustFetch } from "../../lib/fetch";
|
||||||
import { ActionError, EngineError, SiteError } from "../../error";
|
import { ActionError, EngineError, SiteError, UnsupportedFileError } from "../../error";
|
||||||
|
|
||||||
const successSchema = z.object({
|
const successSchema = z.object({
|
||||||
jobId: z.string(),
|
jobId: z.string(),
|
||||||
@ -35,6 +35,12 @@ const successSchema = z.object({
|
|||||||
})
|
})
|
||||||
.array()
|
.array()
|
||||||
.optional(),
|
.optional(),
|
||||||
|
|
||||||
|
// chrome-cdp only -- file download handler
|
||||||
|
file: z.object({
|
||||||
|
name: z.string(),
|
||||||
|
content: z.string(),
|
||||||
|
}).optional().or(z.null()),
|
||||||
});
|
});
|
||||||
|
|
||||||
export type FireEngineCheckStatusSuccess = z.infer<typeof successSchema>;
|
export type FireEngineCheckStatusSuccess = z.infer<typeof successSchema>;
|
||||||
@ -111,6 +117,11 @@ export async function fireEngineCheckStatus(
|
|||||||
status.error.includes("Chrome error: ")
|
status.error.includes("Chrome error: ")
|
||||||
) {
|
) {
|
||||||
throw new SiteError(status.error.split("Chrome error: ")[1]);
|
throw new SiteError(status.error.split("Chrome error: ")[1]);
|
||||||
|
} else if (
|
||||||
|
typeof status.error === "string" &&
|
||||||
|
status.error.includes("File size exceeds")
|
||||||
|
) {
|
||||||
|
throw new UnsupportedFileError("File size exceeds " + status.error.split("File size exceeds ")[1]);
|
||||||
} else if (
|
} else if (
|
||||||
typeof status.error === "string" &&
|
typeof status.error === "string" &&
|
||||||
// TODO: improve this later
|
// TODO: improve this later
|
||||||
|
@ -13,10 +13,11 @@ import {
|
|||||||
FireEngineCheckStatusSuccess,
|
FireEngineCheckStatusSuccess,
|
||||||
StillProcessingError,
|
StillProcessingError,
|
||||||
} from "./checkStatus";
|
} from "./checkStatus";
|
||||||
import { ActionError, EngineError, SiteError, TimeoutError } from "../../error";
|
import { ActionError, EngineError, SiteError, TimeoutError, UnsupportedFileError } from "../../error";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
import { Action } from "../../../../lib/entities";
|
import { Action } from "../../../../lib/entities";
|
||||||
import { specialtyScrapeCheck } from "../utils/specialtyHandler";
|
import { specialtyScrapeCheck } from "../utils/specialtyHandler";
|
||||||
|
import { fireEngineDelete } from "./delete";
|
||||||
|
|
||||||
// This function does not take `Meta` on purpose. It may not access any
|
// This function does not take `Meta` on purpose. It may not access any
|
||||||
// meta values to construct the request -- that must be done by the
|
// meta values to construct the request -- that must be done by the
|
||||||
@ -44,6 +45,13 @@ async function performFireEngineScrape<
|
|||||||
while (status === undefined) {
|
while (status === undefined) {
|
||||||
if (errors.length >= errorLimit) {
|
if (errors.length >= errorLimit) {
|
||||||
logger.error("Error limit hit.", { errors });
|
logger.error("Error limit hit.", { errors });
|
||||||
|
fireEngineDelete(
|
||||||
|
logger.child({
|
||||||
|
method: "performFireEngineScrape/fireEngineDelete",
|
||||||
|
afterErrors: errors,
|
||||||
|
}),
|
||||||
|
scrape.jobId,
|
||||||
|
);
|
||||||
throw new Error("Error limit hit. See e.cause.errors for errors.", {
|
throw new Error("Error limit hit. See e.cause.errors for errors.", {
|
||||||
cause: { errors },
|
cause: { errors },
|
||||||
});
|
});
|
||||||
@ -71,8 +79,16 @@ async function performFireEngineScrape<
|
|||||||
} else if (
|
} else if (
|
||||||
error instanceof EngineError ||
|
error instanceof EngineError ||
|
||||||
error instanceof SiteError ||
|
error instanceof SiteError ||
|
||||||
error instanceof ActionError
|
error instanceof ActionError ||
|
||||||
|
error instanceof UnsupportedFileError
|
||||||
) {
|
) {
|
||||||
|
fireEngineDelete(
|
||||||
|
logger.child({
|
||||||
|
method: "performFireEngineScrape/fireEngineDelete",
|
||||||
|
afterError: error,
|
||||||
|
}),
|
||||||
|
scrape.jobId,
|
||||||
|
);
|
||||||
logger.debug("Fire-engine scrape job failed.", {
|
logger.debug("Fire-engine scrape job failed.", {
|
||||||
error,
|
error,
|
||||||
jobId: scrape.jobId,
|
jobId: scrape.jobId,
|
||||||
@ -91,6 +107,26 @@ async function performFireEngineScrape<
|
|||||||
await new Promise((resolve) => setTimeout(resolve, 250));
|
await new Promise((resolve) => setTimeout(resolve, 250));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
specialtyScrapeCheck(
|
||||||
|
logger.child({
|
||||||
|
method: "performFireEngineScrape/specialtyScrapeCheck",
|
||||||
|
}),
|
||||||
|
status.responseHeaders,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (status.file) {
|
||||||
|
const content = status.file.content;
|
||||||
|
delete status.file;
|
||||||
|
status.content = Buffer.from(content, "base64").toString("utf8"); // TODO: handle other encodings via Content-Type tag
|
||||||
|
}
|
||||||
|
|
||||||
|
fireEngineDelete(
|
||||||
|
logger.child({
|
||||||
|
method: "performFireEngineScrape/fireEngineDelete",
|
||||||
|
}),
|
||||||
|
scrape.jobId,
|
||||||
|
);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,13 +196,6 @@ export async function scrapeURLWithFireEngineChromeCDP(
|
|||||||
timeout,
|
timeout,
|
||||||
);
|
);
|
||||||
|
|
||||||
specialtyScrapeCheck(
|
|
||||||
meta.logger.child({
|
|
||||||
method: "scrapeURLWithFireEngineChromeCDP/specialtyScrapeCheck",
|
|
||||||
}),
|
|
||||||
response.responseHeaders,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
meta.options.formats.includes("screenshot") ||
|
meta.options.formats.includes("screenshot") ||
|
||||||
meta.options.formats.includes("screenshot@fullPage")
|
meta.options.formats.includes("screenshot@fullPage")
|
||||||
@ -241,13 +270,6 @@ export async function scrapeURLWithFireEnginePlaywright(
|
|||||||
timeout,
|
timeout,
|
||||||
);
|
);
|
||||||
|
|
||||||
specialtyScrapeCheck(
|
|
||||||
meta.logger.child({
|
|
||||||
method: "scrapeURLWithFireEnginePlaywright/specialtyScrapeCheck",
|
|
||||||
}),
|
|
||||||
response.responseHeaders,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!response.url) {
|
if (!response.url) {
|
||||||
meta.logger.warn("Fire-engine did not return the response's URL", {
|
meta.logger.warn("Fire-engine did not return the response's URL", {
|
||||||
response,
|
response,
|
||||||
@ -301,13 +323,6 @@ export async function scrapeURLWithFireEngineTLSClient(
|
|||||||
timeout,
|
timeout,
|
||||||
);
|
);
|
||||||
|
|
||||||
specialtyScrapeCheck(
|
|
||||||
meta.logger.child({
|
|
||||||
method: "scrapeURLWithFireEngineTLSClient/specialtyScrapeCheck",
|
|
||||||
}),
|
|
||||||
response.responseHeaders,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!response.url) {
|
if (!response.url) {
|
||||||
meta.logger.warn("Fire-engine did not return the response's URL", {
|
meta.logger.warn("Fire-engine did not return the response's URL", {
|
||||||
response,
|
response,
|
||||||
|
@ -32,6 +32,9 @@ async function scrapePDFWithLlamaParse(
|
|||||||
tempFilePath,
|
tempFilePath,
|
||||||
) as unknown as ReadableStream<Uint8Array>;
|
) as unknown as ReadableStream<Uint8Array>;
|
||||||
},
|
},
|
||||||
|
bytes() {
|
||||||
|
throw Error("Unimplemented in mock Blob: bytes");
|
||||||
|
},
|
||||||
arrayBuffer() {
|
arrayBuffer() {
|
||||||
throw Error("Unimplemented in mock Blob: arrayBuffer");
|
throw Error("Unimplemented in mock Blob: arrayBuffer");
|
||||||
},
|
},
|
||||||
|
@ -64,3 +64,11 @@ export class ActionError extends Error {
|
|||||||
this.code = code;
|
this.code = code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export class UnsupportedFileError extends Error {
|
||||||
|
public reason: string;
|
||||||
|
constructor(reason: string) {
|
||||||
|
super("Scrape resulted in unsupported file: " + reason);
|
||||||
|
this.reason = reason;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -19,6 +19,7 @@ import {
|
|||||||
RemoveFeatureError,
|
RemoveFeatureError,
|
||||||
SiteError,
|
SiteError,
|
||||||
TimeoutError,
|
TimeoutError,
|
||||||
|
UnsupportedFileError,
|
||||||
} from "./error";
|
} from "./error";
|
||||||
import { executeTransformers } from "./transformers";
|
import { executeTransformers } from "./transformers";
|
||||||
import { LLMRefusalError } from "./transformers/llmExtract";
|
import { LLMRefusalError } from "./transformers/llmExtract";
|
||||||
@ -292,6 +293,8 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
|
|||||||
throw error;
|
throw error;
|
||||||
} else if (error instanceof ActionError) {
|
} else if (error instanceof ActionError) {
|
||||||
throw error;
|
throw error;
|
||||||
|
} else if (error instanceof UnsupportedFileError) {
|
||||||
|
throw error;
|
||||||
} else {
|
} else {
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
meta.logger.info(
|
meta.logger.info(
|
||||||
@ -414,6 +417,8 @@ export async function scrapeURL(
|
|||||||
meta.logger.warn("scrapeURL: Site failed to load in browser", { error });
|
meta.logger.warn("scrapeURL: Site failed to load in browser", { error });
|
||||||
} else if (error instanceof ActionError) {
|
} else if (error instanceof ActionError) {
|
||||||
meta.logger.warn("scrapeURL: Action(s) failed to complete", { error });
|
meta.logger.warn("scrapeURL: Action(s) failed to complete", { error });
|
||||||
|
} else if (error instanceof UnsupportedFileError) {
|
||||||
|
meta.logger.warn("scrapeURL: Tried to scrape unsupported file", { error });
|
||||||
} else {
|
} else {
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
meta.logger.error("scrapeURL: Unexpected error happened", { error });
|
meta.logger.error("scrapeURL: Unexpected error happened", { error });
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "1.9.8",
|
"version": "1.10.1",
|
||||||
"description": "JavaScript SDK for Firecrawl API",
|
"description": "JavaScript SDK for Firecrawl API",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
import { describe, test, expect, jest } from '@jest/globals';
|
import { describe, expect, jest, test } from '@jest/globals';
|
||||||
import axios from 'axios';
|
|
||||||
import FirecrawlApp from '../index';
|
|
||||||
|
|
||||||
import { readFile } from 'fs/promises';
|
import FirecrawlApp from '../index';
|
||||||
|
import axios from 'axios';
|
||||||
import { join } from 'path';
|
import { join } from 'path';
|
||||||
|
import { readFile } from 'fs/promises';
|
||||||
|
|
||||||
// Mock jest and set the type
|
// Mock jest and set the type
|
||||||
jest.mock('axios');
|
jest.mock('axios');
|
||||||
@ -14,13 +14,22 @@ async function loadFixture(name: string): Promise<string> {
|
|||||||
return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
|
return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
|
||||||
|
|
||||||
describe('the firecrawl JS SDK', () => {
|
describe('the firecrawl JS SDK', () => {
|
||||||
|
|
||||||
test('Should require an API key to instantiate FirecrawlApp', async () => {
|
test('Should require an API key only for cloud service', async () => {
|
||||||
const fn = () => {
|
if (API_URL.includes('api.firecrawl.dev')) {
|
||||||
new FirecrawlApp({ apiKey: undefined });
|
// Should throw for cloud service
|
||||||
};
|
expect(() => {
|
||||||
expect(fn).toThrow('No API key provided');
|
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
|
||||||
|
}).toThrow('No API key provided');
|
||||||
|
} else {
|
||||||
|
// Should not throw for self-hosted
|
||||||
|
expect(() => {
|
||||||
|
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
|
||||||
|
}).not.toThrow();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
test('Should return scraped data from a /scrape API call', async () => {
|
test('Should return scraped data from a /scrape API call', async () => {
|
||||||
|
@ -9,15 +9,28 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
|
|||||||
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
|
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
|
||||||
|
|
||||||
describe('FirecrawlApp E2E Tests', () => {
|
describe('FirecrawlApp E2E Tests', () => {
|
||||||
test.concurrent('should throw error for no API key', async () => {
|
test.concurrent('should throw error for no API key only for cloud service', async () => {
|
||||||
|
if (API_URL.includes('api.firecrawl.dev')) {
|
||||||
|
// Should throw for cloud service
|
||||||
expect(() => {
|
expect(() => {
|
||||||
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
||||||
}).toThrow("No API key provided");
|
}).toThrow("No API key provided");
|
||||||
|
} else {
|
||||||
|
// Should not throw for self-hosted
|
||||||
|
expect(() => {
|
||||||
|
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
||||||
|
}).not.toThrow();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
||||||
|
if (API_URL.includes('api.firecrawl.dev')) {
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404");
|
||||||
|
} else {
|
||||||
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
|
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
||||||
@ -155,8 +168,13 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
||||||
|
if (API_URL.includes('api.firecrawl.dev')) {
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
|
||||||
|
} else {
|
||||||
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
|
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
||||||
@ -331,8 +349,13 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
}, 60000); // 60 seconds timeout
|
}, 60000); // 60 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should throw error for invalid API key on map', async () => {
|
test.concurrent('should throw error for invalid API key on map', async () => {
|
||||||
|
if (API_URL.includes('api.firecrawl.dev')) {
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
|
||||||
|
} else {
|
||||||
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||||
|
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should throw error for blocklisted URL on map', async () => {
|
test.concurrent('should throw error for blocklisted URL on map', async () => {
|
||||||
@ -349,8 +372,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
}, 30000); // 30 seconds timeout
|
}, 30000); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should return successful response for valid map', async () => {
|
test.concurrent('should return successful response for valid map', async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
|
||||||
const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
|
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
|
|
||||||
expect(response.links?.length).toBeGreaterThan(0);
|
expect(response.links?.length).toBeGreaterThan(0);
|
||||||
|
@ -290,17 +290,23 @@ export default class FirecrawlApp {
|
|||||||
public apiKey: string;
|
public apiKey: string;
|
||||||
public apiUrl: string;
|
public apiUrl: string;
|
||||||
|
|
||||||
|
private isCloudService(url: string): boolean {
|
||||||
|
return url.includes('api.firecrawl.dev');
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes a new instance of the FirecrawlApp class.
|
* Initializes a new instance of the FirecrawlApp class.
|
||||||
* @param config - Configuration options for the FirecrawlApp instance.
|
* @param config - Configuration options for the FirecrawlApp instance.
|
||||||
*/
|
*/
|
||||||
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
|
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
|
||||||
if (typeof apiKey !== "string") {
|
const baseUrl = apiUrl || "https://api.firecrawl.dev";
|
||||||
|
|
||||||
|
if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
|
||||||
throw new FirecrawlError("No API key provided", 401);
|
throw new FirecrawlError("No API key provided", 401);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.apiKey = apiKey;
|
this.apiKey = apiKey || '';
|
||||||
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
this.apiUrl = baseUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -464,7 +470,7 @@ export default class FirecrawlApp {
|
|||||||
let statusData = response.data
|
let statusData = response.data
|
||||||
if ("data" in statusData) {
|
if ("data" in statusData) {
|
||||||
let data = statusData.data;
|
let data = statusData.data;
|
||||||
while ('next' in statusData) {
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
||||||
statusData = (await this.getRequest(statusData.next, headers)).data;
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
||||||
data = data.concat(statusData.data);
|
data = data.concat(statusData.data);
|
||||||
}
|
}
|
||||||
@ -698,7 +704,7 @@ export default class FirecrawlApp {
|
|||||||
let statusData = response.data
|
let statusData = response.data
|
||||||
if ("data" in statusData) {
|
if ("data" in statusData) {
|
||||||
let data = statusData.data;
|
let data = statusData.data;
|
||||||
while ('next' in statusData) {
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
||||||
statusData = (await this.getRequest(statusData.next, headers)).data;
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
||||||
data = data.concat(statusData.data);
|
data = data.concat(statusData.data);
|
||||||
}
|
}
|
||||||
@ -857,6 +863,7 @@ export default class FirecrawlApp {
|
|||||||
headers: AxiosRequestHeaders,
|
headers: AxiosRequestHeaders,
|
||||||
checkInterval: number
|
checkInterval: number
|
||||||
): Promise<CrawlStatusResponse | ErrorResponse> {
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||||
|
try {
|
||||||
while (true) {
|
while (true) {
|
||||||
let statusResponse: AxiosResponse = await this.getRequest(
|
let statusResponse: AxiosResponse = await this.getRequest(
|
||||||
`${this.apiUrl}/v1/crawl/${id}`,
|
`${this.apiUrl}/v1/crawl/${id}`,
|
||||||
@ -867,7 +874,7 @@ export default class FirecrawlApp {
|
|||||||
if (statusData.status === "completed") {
|
if (statusData.status === "completed") {
|
||||||
if ("data" in statusData) {
|
if ("data" in statusData) {
|
||||||
let data = statusData.data;
|
let data = statusData.data;
|
||||||
while ('next' in statusData) {
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
||||||
statusResponse = await this.getRequest(statusData.next, headers);
|
statusResponse = await this.getRequest(statusData.next, headers);
|
||||||
statusData = statusResponse.data;
|
statusData = statusResponse.data;
|
||||||
data = data.concat(statusData.data);
|
data = data.concat(statusData.data);
|
||||||
@ -894,6 +901,9 @@ export default class FirecrawlApp {
|
|||||||
this.handleError(statusResponse, "check crawl status");
|
this.handleError(statusResponse, "check crawl status");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
throw new FirecrawlError(error, 500);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
x
Reference in New Issue
Block a user