mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 15:49:02 +08:00
Nick:
This commit is contained in:
parent
8be75accb8
commit
70bff7f8fb
@ -1,9 +1,16 @@
|
|||||||
|
import { Logger } from "./logger";
|
||||||
|
|
||||||
export function performCosineSimilarity(links: string[], searchQuery: string) {
|
export function performCosineSimilarity(links: string[], searchQuery: string) {
|
||||||
|
try {
|
||||||
// Function to calculate cosine similarity
|
// Function to calculate cosine similarity
|
||||||
const cosineSimilarity = (vec1: number[], vec2: number[]): number => {
|
const cosineSimilarity = (vec1: number[], vec2: number[]): number => {
|
||||||
const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0);
|
const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0);
|
||||||
const magnitude1 = Math.sqrt(vec1.reduce((sum, val) => sum + val * val, 0));
|
const magnitude1 = Math.sqrt(
|
||||||
const magnitude2 = Math.sqrt(vec2.reduce((sum, val) => sum + val * val, 0));
|
vec1.reduce((sum, val) => sum + val * val, 0)
|
||||||
|
);
|
||||||
|
const magnitude2 = Math.sqrt(
|
||||||
|
vec2.reduce((sum, val) => sum + val * val, 0)
|
||||||
|
);
|
||||||
if (magnitude1 === 0 || magnitude2 === 0) return 0;
|
if (magnitude1 === 0 || magnitude2 === 0) return 0;
|
||||||
return dotProduct / (magnitude1 * magnitude2);
|
return dotProduct / (magnitude1 * magnitude2);
|
||||||
};
|
};
|
||||||
@ -32,4 +39,8 @@ export function performCosineSimilarity(links: string[], searchQuery: string) {
|
|||||||
|
|
||||||
links = a.map((item) => item.link);
|
links = a.map((item) => item.link);
|
||||||
return links;
|
return links;
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error(`Error performing cosine similarity: ${error}`);
|
||||||
|
return links;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,13 +1,4 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
||||||
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
||||||
return new (P || (P = Promise))(function (resolve, reject) {
|
|
||||||
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
||||||
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
||||||
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
||||||
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
||||||
});
|
|
||||||
};
|
|
||||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||||
};
|
};
|
||||||
@ -17,47 +8,59 @@ const zod_1 = require("zod");
|
|||||||
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
||||||
/**
|
/**
|
||||||
* Main class for interacting with the Firecrawl API.
|
* Main class for interacting with the Firecrawl API.
|
||||||
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||||
*/
|
*/
|
||||||
class FirecrawlApp {
|
class FirecrawlApp {
|
||||||
/**
|
/**
|
||||||
* Initializes a new instance of the FirecrawlApp class.
|
* Initializes a new instance of the FirecrawlApp class.
|
||||||
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
* @param config - Configuration options for the FirecrawlApp instance.
|
||||||
*/
|
*/
|
||||||
constructor({ apiKey = null, apiUrl = null }) {
|
constructor({ apiKey = null, apiUrl = null, version = "v1" }) {
|
||||||
this.apiKey = apiKey || "";
|
this.apiKey = apiKey || "";
|
||||||
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
||||||
|
this.version = version;
|
||||||
if (!this.apiKey) {
|
if (!this.apiKey) {
|
||||||
throw new Error("No API key provided");
|
throw new Error("No API key provided");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Scrapes a URL using the Firecrawl API.
|
* Scrapes a URL using the Firecrawl API.
|
||||||
* @param {string} url - The URL to scrape.
|
* @param url - The URL to scrape.
|
||||||
* @param {Params | null} params - Additional parameters for the scrape request.
|
* @param params - Additional parameters for the scrape request.
|
||||||
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
* @returns The response from the scrape operation.
|
||||||
*/
|
*/
|
||||||
scrapeUrl(url_1) {
|
async scrapeUrl(url, params) {
|
||||||
return __awaiter(this, arguments, void 0, function* (url, params = null) {
|
|
||||||
var _a;
|
|
||||||
const headers = {
|
const headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
};
|
};
|
||||||
let jsonData = Object.assign({ url }, params);
|
let jsonData = { url, ...params };
|
||||||
if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
|
if (jsonData?.extractorOptions?.extractionSchema) {
|
||||||
let schema = params.extractorOptions.extractionSchema;
|
let schema = jsonData.extractorOptions.extractionSchema;
|
||||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||||
if (schema instanceof zod_1.z.ZodSchema) {
|
if (schema instanceof zod_1.z.ZodSchema) {
|
||||||
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
||||||
}
|
}
|
||||||
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
jsonData = {
|
||||||
|
...jsonData,
|
||||||
|
extractorOptions: {
|
||||||
|
...jsonData.extractorOptions,
|
||||||
|
extractionSchema: schema,
|
||||||
|
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
||||||
|
},
|
||||||
|
};
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const response = yield axios_1.default.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
|
const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const responseData = response.data;
|
const responseData = response.data;
|
||||||
if (responseData.success) {
|
if (responseData.success) {
|
||||||
return responseData;
|
return (this.version === 'v0' ? responseData : {
|
||||||
|
success: true,
|
||||||
|
warning: responseData.warning,
|
||||||
|
error: responseData.error,
|
||||||
|
...responseData.data
|
||||||
|
});
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||||
@ -71,26 +74,27 @@ class FirecrawlApp {
|
|||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." };
|
||||||
});
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Searches for a query using the Firecrawl API.
|
* Searches for a query using the Firecrawl API.
|
||||||
* @param {string} query - The query to search for.
|
* @param query - The query to search for.
|
||||||
* @param {Params | null} params - Additional parameters for the search request.
|
* @param params - Additional parameters for the search request.
|
||||||
* @returns {Promise<SearchResponse>} The response from the search operation.
|
* @returns The response from the search operation.
|
||||||
*/
|
*/
|
||||||
search(query_1) {
|
async search(query, params) {
|
||||||
return __awaiter(this, arguments, void 0, function* (query, params = null) {
|
if (this.version === "v1") {
|
||||||
|
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
||||||
|
}
|
||||||
const headers = {
|
const headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
};
|
};
|
||||||
let jsonData = { query };
|
let jsonData = { query };
|
||||||
if (params) {
|
if (params) {
|
||||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
jsonData = { ...jsonData, ...params };
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const response = yield axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
const response = await axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const responseData = response.data;
|
const responseData = response.data;
|
||||||
if (responseData.success) {
|
if (responseData.success) {
|
||||||
@ -108,33 +112,43 @@ class FirecrawlApp {
|
|||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." };
|
||||||
});
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||||
* @param {string} url - The URL to crawl.
|
* @param url - The URL to crawl.
|
||||||
* @param {Params | null} params - Additional parameters for the crawl request.
|
* @param params - Additional parameters for the crawl request.
|
||||||
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
||||||
* @param {number} pollInterval - Time in seconds for job status checks.
|
* @param pollInterval - Time in seconds for job status checks.
|
||||||
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
* @param idempotencyKey - Optional idempotency key for the request.
|
||||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
* @returns The response from the crawl operation.
|
||||||
*/
|
*/
|
||||||
crawlUrl(url_1) {
|
async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
||||||
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
||||||
const headers = this.prepareHeaders(idempotencyKey);
|
const headers = this.prepareHeaders(idempotencyKey);
|
||||||
let jsonData = { url };
|
let jsonData = { url, ...params };
|
||||||
if (params) {
|
|
||||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
||||||
}
|
|
||||||
try {
|
try {
|
||||||
const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
|
const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const jobId = response.data.jobId;
|
const id = this.version === 'v0' ? response.data.jobId : response.data.id;
|
||||||
|
let checkUrl = undefined;
|
||||||
if (waitUntilDone) {
|
if (waitUntilDone) {
|
||||||
return this.monitorJobStatus(jobId, headers, pollInterval);
|
if (this.version === 'v1') {
|
||||||
|
checkUrl = response.data.url;
|
||||||
|
}
|
||||||
|
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return { success: true, jobId };
|
if (this.version === 'v0') {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
jobId: id
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
id: id
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -142,23 +156,31 @@ class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (error) {
|
catch (error) {
|
||||||
console.log(error);
|
if (error.response?.data?.error) {
|
||||||
|
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
||||||
|
}
|
||||||
|
else {
|
||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." };
|
||||||
});
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Checks the status of a crawl job using the Firecrawl API.
|
* Checks the status of a crawl job using the Firecrawl API.
|
||||||
* @param {string} jobId - The job ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
* @returns The response containing the job status.
|
||||||
*/
|
*/
|
||||||
checkCrawlStatus(jobId) {
|
async checkCrawlStatus(id) {
|
||||||
return __awaiter(this, void 0, void 0, function* () {
|
if (!id) {
|
||||||
|
throw new Error("No crawl ID provided");
|
||||||
|
}
|
||||||
const headers = this.prepareHeaders();
|
const headers = this.prepareHeaders();
|
||||||
try {
|
try {
|
||||||
const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
const response = await this.getRequest(this.version === 'v1' ?
|
||||||
|
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
||||||
|
`${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
|
if (this.version === 'v0') {
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
status: response.data.status,
|
status: response.data.status,
|
||||||
@ -172,6 +194,20 @@ class FirecrawlApp {
|
|||||||
: undefined,
|
: undefined,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
status: response.data.status,
|
||||||
|
total: response.data.total,
|
||||||
|
completed: response.data.completed,
|
||||||
|
creditsUsed: response.data.creditsUsed,
|
||||||
|
expiresAt: new Date(response.data.expiresAt),
|
||||||
|
next: response.data.next,
|
||||||
|
data: response.data.data,
|
||||||
|
error: response.data.error
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
this.handleError(response, "check crawl status");
|
this.handleError(response, "check crawl status");
|
||||||
}
|
}
|
||||||
@ -179,7 +215,8 @@ class FirecrawlApp {
|
|||||||
catch (error) {
|
catch (error) {
|
||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
return {
|
return this.version === 'v0' ?
|
||||||
|
{
|
||||||
success: false,
|
success: false,
|
||||||
status: "unknown",
|
status: "unknown",
|
||||||
current: 0,
|
current: 0,
|
||||||
@ -187,61 +224,94 @@ class FirecrawlApp {
|
|||||||
current_step: "",
|
current_step: "",
|
||||||
total: 0,
|
total: 0,
|
||||||
error: "Internal server error.",
|
error: "Internal server error.",
|
||||||
|
} :
|
||||||
|
{
|
||||||
|
success: false,
|
||||||
|
error: "Internal server error.",
|
||||||
};
|
};
|
||||||
});
|
}
|
||||||
|
async mapUrl(url, params) {
|
||||||
|
if (this.version == 'v0') {
|
||||||
|
throw new Error("Map is not supported in v0");
|
||||||
|
}
|
||||||
|
const headers = this.prepareHeaders();
|
||||||
|
let jsonData = { url, ...params };
|
||||||
|
try {
|
||||||
|
const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers);
|
||||||
|
if (response.status === 200) {
|
||||||
|
return response.data;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
this.handleError(response, "map");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
throw new Error(error.message);
|
||||||
|
}
|
||||||
|
return { success: false, error: "Internal server error." };
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Prepares the headers for an API request.
|
* Prepares the headers for an API request.
|
||||||
* @returns {AxiosRequestHeaders} The prepared headers.
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
||||||
|
* @returns The prepared headers.
|
||||||
*/
|
*/
|
||||||
prepareHeaders(idempotencyKey) {
|
prepareHeaders(idempotencyKey) {
|
||||||
return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
|
return {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
|
...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Sends a POST request to the specified URL.
|
* Sends a POST request to the specified URL.
|
||||||
* @param {string} url - The URL to send the request to.
|
* @param url - The URL to send the request to.
|
||||||
* @param {Params} data - The data to send in the request.
|
* @param data - The data to send in the request.
|
||||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
* @param headers - The headers for the request.
|
||||||
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
* @returns The response from the POST request.
|
||||||
*/
|
*/
|
||||||
postRequest(url, data, headers) {
|
postRequest(url, data, headers) {
|
||||||
return axios_1.default.post(url, data, { headers });
|
return axios_1.default.post(url, data, { headers });
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Sends a GET request to the specified URL.
|
* Sends a GET request to the specified URL.
|
||||||
* @param {string} url - The URL to send the request to.
|
* @param url - The URL to send the request to.
|
||||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
* @param headers - The headers for the request.
|
||||||
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
* @returns The response from the GET request.
|
||||||
*/
|
*/
|
||||||
getRequest(url, headers) {
|
getRequest(url, headers) {
|
||||||
return axios_1.default.get(url, { headers });
|
return axios_1.default.get(url, { headers });
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Monitors the status of a crawl job until completion or failure.
|
* Monitors the status of a crawl job until completion or failure.
|
||||||
* @param {string} jobId - The job ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
* @param headers - The headers for the request.
|
||||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
* @param checkInterval - Interval in seconds for job status checks.
|
||||||
* @returns {Promise<any>} The final job status or data.
|
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
||||||
|
* @returns The final job status or data.
|
||||||
*/
|
*/
|
||||||
monitorJobStatus(jobId, headers, checkInterval) {
|
async monitorJobStatus(id, headers, checkInterval, checkUrl) {
|
||||||
return __awaiter(this, void 0, void 0, function* () {
|
let apiUrl = '';
|
||||||
while (true) {
|
while (true) {
|
||||||
const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
if (this.version === 'v1') {
|
||||||
|
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
||||||
|
}
|
||||||
|
else if (this.version === 'v0') {
|
||||||
|
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
||||||
|
}
|
||||||
|
const statusResponse = await this.getRequest(apiUrl, headers);
|
||||||
if (statusResponse.status === 200) {
|
if (statusResponse.status === 200) {
|
||||||
const statusData = statusResponse.data;
|
const statusData = statusResponse.data;
|
||||||
if (statusData.status === "completed") {
|
if (statusData.status === "completed") {
|
||||||
if ("data" in statusData) {
|
if ("data" in statusData) {
|
||||||
return statusData.data;
|
return this.version === 'v0' ? statusData.data : statusData;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new Error("Crawl job completed but no data was returned");
|
throw new Error("Crawl job completed but no data was returned");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) {
|
||||||
if (checkInterval < 2) {
|
checkInterval = Math.max(checkInterval, 2);
|
||||||
checkInterval = 2;
|
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000));
|
||||||
}
|
|
||||||
yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
||||||
@ -251,7 +321,6 @@ class FirecrawlApp {
|
|||||||
this.handleError(statusResponse, "check crawl status");
|
this.handleError(statusResponse, "check crawl status");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Handles errors from API responses.
|
* Handles errors from API responses.
|
||||||
|
@ -1,58 +1,61 @@
|
|||||||
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
||||||
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
||||||
return new (P || (P = Promise))(function (resolve, reject) {
|
|
||||||
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
||||||
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
||||||
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
||||||
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
||||||
});
|
|
||||||
};
|
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||||
/**
|
/**
|
||||||
* Main class for interacting with the Firecrawl API.
|
* Main class for interacting with the Firecrawl API.
|
||||||
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||||
*/
|
*/
|
||||||
export default class FirecrawlApp {
|
export default class FirecrawlApp {
|
||||||
/**
|
/**
|
||||||
* Initializes a new instance of the FirecrawlApp class.
|
* Initializes a new instance of the FirecrawlApp class.
|
||||||
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
* @param config - Configuration options for the FirecrawlApp instance.
|
||||||
*/
|
*/
|
||||||
constructor({ apiKey = null, apiUrl = null }) {
|
constructor({ apiKey = null, apiUrl = null, version = "v1" }) {
|
||||||
this.apiKey = apiKey || "";
|
this.apiKey = apiKey || "";
|
||||||
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
||||||
|
this.version = version;
|
||||||
if (!this.apiKey) {
|
if (!this.apiKey) {
|
||||||
throw new Error("No API key provided");
|
throw new Error("No API key provided");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Scrapes a URL using the Firecrawl API.
|
* Scrapes a URL using the Firecrawl API.
|
||||||
* @param {string} url - The URL to scrape.
|
* @param url - The URL to scrape.
|
||||||
* @param {Params | null} params - Additional parameters for the scrape request.
|
* @param params - Additional parameters for the scrape request.
|
||||||
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
* @returns The response from the scrape operation.
|
||||||
*/
|
*/
|
||||||
scrapeUrl(url_1) {
|
async scrapeUrl(url, params) {
|
||||||
return __awaiter(this, arguments, void 0, function* (url, params = null) {
|
|
||||||
var _a;
|
|
||||||
const headers = {
|
const headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
};
|
};
|
||||||
let jsonData = Object.assign({ url }, params);
|
let jsonData = { url, ...params };
|
||||||
if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
|
if (jsonData?.extractorOptions?.extractionSchema) {
|
||||||
let schema = params.extractorOptions.extractionSchema;
|
let schema = jsonData.extractorOptions.extractionSchema;
|
||||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||||
if (schema instanceof z.ZodSchema) {
|
if (schema instanceof z.ZodSchema) {
|
||||||
schema = zodToJsonSchema(schema);
|
schema = zodToJsonSchema(schema);
|
||||||
}
|
}
|
||||||
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
jsonData = {
|
||||||
|
...jsonData,
|
||||||
|
extractorOptions: {
|
||||||
|
...jsonData.extractorOptions,
|
||||||
|
extractionSchema: schema,
|
||||||
|
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
||||||
|
},
|
||||||
|
};
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const response = yield axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
|
const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const responseData = response.data;
|
const responseData = response.data;
|
||||||
if (responseData.success) {
|
if (responseData.success) {
|
||||||
return responseData;
|
return (this.version === 'v0' ? responseData : {
|
||||||
|
success: true,
|
||||||
|
warning: responseData.warning,
|
||||||
|
error: responseData.error,
|
||||||
|
...responseData.data
|
||||||
|
});
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||||
@ -66,26 +69,27 @@ export default class FirecrawlApp {
|
|||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." };
|
||||||
});
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Searches for a query using the Firecrawl API.
|
* Searches for a query using the Firecrawl API.
|
||||||
* @param {string} query - The query to search for.
|
* @param query - The query to search for.
|
||||||
* @param {Params | null} params - Additional parameters for the search request.
|
* @param params - Additional parameters for the search request.
|
||||||
* @returns {Promise<SearchResponse>} The response from the search operation.
|
* @returns The response from the search operation.
|
||||||
*/
|
*/
|
||||||
search(query_1) {
|
async search(query, params) {
|
||||||
return __awaiter(this, arguments, void 0, function* (query, params = null) {
|
if (this.version === "v1") {
|
||||||
|
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
||||||
|
}
|
||||||
const headers = {
|
const headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
};
|
};
|
||||||
let jsonData = { query };
|
let jsonData = { query };
|
||||||
if (params) {
|
if (params) {
|
||||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
jsonData = { ...jsonData, ...params };
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const response = yield axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
const response = await axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const responseData = response.data;
|
const responseData = response.data;
|
||||||
if (responseData.success) {
|
if (responseData.success) {
|
||||||
@ -103,33 +107,43 @@ export default class FirecrawlApp {
|
|||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." };
|
||||||
});
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||||
* @param {string} url - The URL to crawl.
|
* @param url - The URL to crawl.
|
||||||
* @param {Params | null} params - Additional parameters for the crawl request.
|
* @param params - Additional parameters for the crawl request.
|
||||||
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
||||||
* @param {number} pollInterval - Time in seconds for job status checks.
|
* @param pollInterval - Time in seconds for job status checks.
|
||||||
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
* @param idempotencyKey - Optional idempotency key for the request.
|
||||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
* @returns The response from the crawl operation.
|
||||||
*/
|
*/
|
||||||
crawlUrl(url_1) {
|
async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
||||||
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
||||||
const headers = this.prepareHeaders(idempotencyKey);
|
const headers = this.prepareHeaders(idempotencyKey);
|
||||||
let jsonData = { url };
|
let jsonData = { url, ...params };
|
||||||
if (params) {
|
|
||||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
||||||
}
|
|
||||||
try {
|
try {
|
||||||
const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
|
const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const jobId = response.data.jobId;
|
const id = this.version === 'v0' ? response.data.jobId : response.data.id;
|
||||||
|
let checkUrl = undefined;
|
||||||
if (waitUntilDone) {
|
if (waitUntilDone) {
|
||||||
return this.monitorJobStatus(jobId, headers, pollInterval);
|
if (this.version === 'v1') {
|
||||||
|
checkUrl = response.data.url;
|
||||||
|
}
|
||||||
|
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return { success: true, jobId };
|
if (this.version === 'v0') {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
jobId: id
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
id: id
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -137,23 +151,31 @@ export default class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (error) {
|
catch (error) {
|
||||||
console.log(error);
|
if (error.response?.data?.error) {
|
||||||
|
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
||||||
|
}
|
||||||
|
else {
|
||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." };
|
||||||
});
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Checks the status of a crawl job using the Firecrawl API.
|
* Checks the status of a crawl job using the Firecrawl API.
|
||||||
* @param {string} jobId - The job ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
* @returns The response containing the job status.
|
||||||
*/
|
*/
|
||||||
checkCrawlStatus(jobId) {
|
async checkCrawlStatus(id) {
|
||||||
return __awaiter(this, void 0, void 0, function* () {
|
if (!id) {
|
||||||
|
throw new Error("No crawl ID provided");
|
||||||
|
}
|
||||||
const headers = this.prepareHeaders();
|
const headers = this.prepareHeaders();
|
||||||
try {
|
try {
|
||||||
const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
const response = await this.getRequest(this.version === 'v1' ?
|
||||||
|
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
||||||
|
`${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
|
if (this.version === 'v0') {
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
status: response.data.status,
|
status: response.data.status,
|
||||||
@ -167,6 +189,20 @@ export default class FirecrawlApp {
|
|||||||
: undefined,
|
: undefined,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
status: response.data.status,
|
||||||
|
total: response.data.total,
|
||||||
|
completed: response.data.completed,
|
||||||
|
creditsUsed: response.data.creditsUsed,
|
||||||
|
expiresAt: new Date(response.data.expiresAt),
|
||||||
|
next: response.data.next,
|
||||||
|
data: response.data.data,
|
||||||
|
error: response.data.error
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
this.handleError(response, "check crawl status");
|
this.handleError(response, "check crawl status");
|
||||||
}
|
}
|
||||||
@ -174,7 +210,8 @@ export default class FirecrawlApp {
|
|||||||
catch (error) {
|
catch (error) {
|
||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
return {
|
return this.version === 'v0' ?
|
||||||
|
{
|
||||||
success: false,
|
success: false,
|
||||||
status: "unknown",
|
status: "unknown",
|
||||||
current: 0,
|
current: 0,
|
||||||
@ -182,61 +219,94 @@ export default class FirecrawlApp {
|
|||||||
current_step: "",
|
current_step: "",
|
||||||
total: 0,
|
total: 0,
|
||||||
error: "Internal server error.",
|
error: "Internal server error.",
|
||||||
|
} :
|
||||||
|
{
|
||||||
|
success: false,
|
||||||
|
error: "Internal server error.",
|
||||||
};
|
};
|
||||||
});
|
}
|
||||||
|
async mapUrl(url, params) {
|
||||||
|
if (this.version == 'v0') {
|
||||||
|
throw new Error("Map is not supported in v0");
|
||||||
|
}
|
||||||
|
const headers = this.prepareHeaders();
|
||||||
|
let jsonData = { url, ...params };
|
||||||
|
try {
|
||||||
|
const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers);
|
||||||
|
if (response.status === 200) {
|
||||||
|
return response.data;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
this.handleError(response, "map");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
throw new Error(error.message);
|
||||||
|
}
|
||||||
|
return { success: false, error: "Internal server error." };
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Prepares the headers for an API request.
|
* Prepares the headers for an API request.
|
||||||
* @returns {AxiosRequestHeaders} The prepared headers.
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
||||||
|
* @returns The prepared headers.
|
||||||
*/
|
*/
|
||||||
prepareHeaders(idempotencyKey) {
|
prepareHeaders(idempotencyKey) {
|
||||||
return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
|
return {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
|
...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Sends a POST request to the specified URL.
|
* Sends a POST request to the specified URL.
|
||||||
* @param {string} url - The URL to send the request to.
|
* @param url - The URL to send the request to.
|
||||||
* @param {Params} data - The data to send in the request.
|
* @param data - The data to send in the request.
|
||||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
* @param headers - The headers for the request.
|
||||||
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
* @returns The response from the POST request.
|
||||||
*/
|
*/
|
||||||
postRequest(url, data, headers) {
|
postRequest(url, data, headers) {
|
||||||
return axios.post(url, data, { headers });
|
return axios.post(url, data, { headers });
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Sends a GET request to the specified URL.
|
* Sends a GET request to the specified URL.
|
||||||
* @param {string} url - The URL to send the request to.
|
* @param url - The URL to send the request to.
|
||||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
* @param headers - The headers for the request.
|
||||||
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
* @returns The response from the GET request.
|
||||||
*/
|
*/
|
||||||
getRequest(url, headers) {
|
getRequest(url, headers) {
|
||||||
return axios.get(url, { headers });
|
return axios.get(url, { headers });
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Monitors the status of a crawl job until completion or failure.
|
* Monitors the status of a crawl job until completion or failure.
|
||||||
* @param {string} jobId - The job ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
* @param headers - The headers for the request.
|
||||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
* @param checkInterval - Interval in seconds for job status checks.
|
||||||
* @returns {Promise<any>} The final job status or data.
|
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
||||||
|
* @returns The final job status or data.
|
||||||
*/
|
*/
|
||||||
monitorJobStatus(jobId, headers, checkInterval) {
|
async monitorJobStatus(id, headers, checkInterval, checkUrl) {
|
||||||
return __awaiter(this, void 0, void 0, function* () {
|
let apiUrl = '';
|
||||||
while (true) {
|
while (true) {
|
||||||
const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
if (this.version === 'v1') {
|
||||||
|
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
||||||
|
}
|
||||||
|
else if (this.version === 'v0') {
|
||||||
|
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
||||||
|
}
|
||||||
|
const statusResponse = await this.getRequest(apiUrl, headers);
|
||||||
if (statusResponse.status === 200) {
|
if (statusResponse.status === 200) {
|
||||||
const statusData = statusResponse.data;
|
const statusData = statusResponse.data;
|
||||||
if (statusData.status === "completed") {
|
if (statusData.status === "completed") {
|
||||||
if ("data" in statusData) {
|
if ("data" in statusData) {
|
||||||
return statusData.data;
|
return this.version === 'v0' ? statusData.data : statusData;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new Error("Crawl job completed but no data was returned");
|
throw new Error("Crawl job completed but no data was returned");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) {
|
||||||
if (checkInterval < 2) {
|
checkInterval = Math.max(checkInterval, 2);
|
||||||
checkInterval = 2;
|
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000));
|
||||||
}
|
|
||||||
yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
||||||
@ -246,7 +316,6 @@ export default class FirecrawlApp {
|
|||||||
this.handleError(statusResponse, "check crawl status");
|
this.handleError(statusResponse, "check crawl status");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Handles errors from API responses.
|
* Handles errors from API responses.
|
||||||
|
289
apps/js-sdk/firecrawl/types/index.d.ts
vendored
289
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -2,13 +2,18 @@ import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
/**
|
/**
|
||||||
* Configuration interface for FirecrawlApp.
|
* Configuration interface for FirecrawlApp.
|
||||||
|
* @param apiKey - Optional API key for authentication.
|
||||||
|
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
||||||
|
* @param version - API version, either 'v0' or 'v1'.
|
||||||
*/
|
*/
|
||||||
export interface FirecrawlAppConfig {
|
export interface FirecrawlAppConfig {
|
||||||
apiKey?: string | null;
|
apiKey?: string | null;
|
||||||
apiUrl?: string | null;
|
apiUrl?: string | null;
|
||||||
|
version?: "v0" | "v1";
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Metadata for a Firecrawl document.
|
* Metadata for a Firecrawl document.
|
||||||
|
* Includes various optional properties for document metadata.
|
||||||
*/
|
*/
|
||||||
export interface FirecrawlDocumentMetadata {
|
export interface FirecrawlDocumentMetadata {
|
||||||
title?: string;
|
title?: string;
|
||||||
@ -41,14 +46,37 @@ export interface FirecrawlDocumentMetadata {
|
|||||||
articleTag?: string;
|
articleTag?: string;
|
||||||
articleSection?: string;
|
articleSection?: string;
|
||||||
sourceURL?: string;
|
sourceURL?: string;
|
||||||
|
statusCode?: number;
|
||||||
|
error?: string;
|
||||||
|
[key: string]: any;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Metadata for a Firecrawl document on v0.
|
||||||
|
* Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
|
||||||
|
*/
|
||||||
|
export interface FirecrawlDocumentMetadataV0 {
|
||||||
pageStatusCode?: number;
|
pageStatusCode?: number;
|
||||||
pageError?: string;
|
pageError?: string;
|
||||||
[key: string]: any;
|
[key: string]: any;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Document interface for Firecrawl.
|
* Document interface for Firecrawl.
|
||||||
|
* Represents a document retrieved or processed by Firecrawl.
|
||||||
*/
|
*/
|
||||||
export interface FirecrawlDocument {
|
export interface FirecrawlDocument {
|
||||||
|
url?: string;
|
||||||
|
markdown?: string;
|
||||||
|
html?: string;
|
||||||
|
rawHtml?: string;
|
||||||
|
links?: string[];
|
||||||
|
screenshot?: string;
|
||||||
|
metadata: FirecrawlDocumentMetadata;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Document interface for Firecrawl on v0.
|
||||||
|
* Represents a document specifically for API version v0 with additional properties.
|
||||||
|
*/
|
||||||
|
export interface FirecrawlDocumentV0 {
|
||||||
id?: string;
|
id?: string;
|
||||||
url?: string;
|
url?: string;
|
||||||
content: string;
|
content: string;
|
||||||
@ -58,132 +86,283 @@ export interface FirecrawlDocument {
|
|||||||
createdAt?: Date;
|
createdAt?: Date;
|
||||||
updatedAt?: Date;
|
updatedAt?: Date;
|
||||||
type?: string;
|
type?: string;
|
||||||
metadata: FirecrawlDocumentMetadata;
|
metadata: FirecrawlDocumentMetadataV0;
|
||||||
childrenLinks?: string[];
|
childrenLinks?: string[];
|
||||||
provider?: string;
|
provider?: string;
|
||||||
warning?: string;
|
warning?: string;
|
||||||
index?: number;
|
index?: number;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Response interface for scraping operations.
|
* Parameters for scraping operations.
|
||||||
|
* Defines the options and configurations available for scraping web content.
|
||||||
*/
|
*/
|
||||||
export interface ScrapeResponse {
|
export interface ScrapeParams {
|
||||||
|
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
||||||
|
headers?: Record<string, string>;
|
||||||
|
includeTags?: string[];
|
||||||
|
excludeTags?: string[];
|
||||||
|
onlyMainContent?: boolean;
|
||||||
|
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
||||||
|
waitFor?: number;
|
||||||
|
timeout?: number;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Parameters for scraping operations on v0.
|
||||||
|
* Includes page and extractor options specific to API version v0.
|
||||||
|
*/
|
||||||
|
export interface ScrapeParamsV0 {
|
||||||
|
pageOptions?: {
|
||||||
|
headers?: Record<string, string>;
|
||||||
|
includeHtml?: boolean;
|
||||||
|
includeRawHtml?: boolean;
|
||||||
|
onlyIncludeTags?: string[];
|
||||||
|
onlyMainContent?: boolean;
|
||||||
|
removeTags?: string[];
|
||||||
|
replaceAllPathsWithAbsolutePaths?: boolean;
|
||||||
|
screenshot?: boolean;
|
||||||
|
fullPageScreenshot?: boolean;
|
||||||
|
waitFor?: number;
|
||||||
|
};
|
||||||
|
extractorOptions?: {
|
||||||
|
mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
|
||||||
|
extractionPrompt?: string;
|
||||||
|
extractionSchema?: Record<string, any> | z.ZodSchema | any;
|
||||||
|
};
|
||||||
|
timeout?: number;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for scraping operations.
|
||||||
|
* Defines the structure of the response received after a scraping operation.
|
||||||
|
*/
|
||||||
|
export interface ScrapeResponse extends FirecrawlDocument {
|
||||||
success: boolean;
|
success: boolean;
|
||||||
data?: FirecrawlDocument;
|
warning?: string;
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Response interface for searching operations.
|
* Response interface for scraping operations on v0.
|
||||||
|
* Similar to ScrapeResponse but tailored for responses from API version v0.
|
||||||
*/
|
*/
|
||||||
export interface SearchResponse {
|
export interface ScrapeResponseV0 {
|
||||||
success: boolean;
|
success: boolean;
|
||||||
data?: FirecrawlDocument[];
|
data?: FirecrawlDocumentV0;
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Parameters for crawling operations.
|
||||||
|
* Includes options for both scraping and mapping during a crawl.
|
||||||
|
*/
|
||||||
|
export interface CrawlParams {
|
||||||
|
scrapeOptions?: ScrapeParams;
|
||||||
|
crawlerOptions?: {
|
||||||
|
includePaths?: string[];
|
||||||
|
excludePaths?: string[];
|
||||||
|
maxDepth?: number;
|
||||||
|
limit?: number;
|
||||||
|
allowBackwardLinks?: boolean;
|
||||||
|
allowExternalLinks?: boolean;
|
||||||
|
ignoreSitemap?: boolean;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Parameters for crawling operations on v0.
|
||||||
|
* Tailored for API version v0, includes specific options for crawling.
|
||||||
|
*/
|
||||||
|
export interface CrawlParamsV0 {
|
||||||
|
crawlerOptions?: {
|
||||||
|
includes?: string[];
|
||||||
|
excludes?: string[];
|
||||||
|
generateImgAltText?: boolean;
|
||||||
|
returnOnlyUrls?: boolean;
|
||||||
|
maxDepth?: number;
|
||||||
|
mode?: "default" | "fast";
|
||||||
|
ignoreSitemap?: boolean;
|
||||||
|
limit?: number;
|
||||||
|
allowBackwardCrawling?: boolean;
|
||||||
|
allowExternalContentLinks?: boolean;
|
||||||
|
};
|
||||||
|
pageOptions?: {
|
||||||
|
headers?: Record<string, string>;
|
||||||
|
includeHtml?: boolean;
|
||||||
|
includeRawHtml?: boolean;
|
||||||
|
onlyIncludeTags?: string[];
|
||||||
|
onlyMainContent?: boolean;
|
||||||
|
removeTags?: string[];
|
||||||
|
replaceAllPathsWithAbsolutePaths?: boolean;
|
||||||
|
screenshot?: boolean;
|
||||||
|
fullPageScreenshot?: boolean;
|
||||||
|
waitFor?: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Response interface for crawling operations.
|
* Response interface for crawling operations.
|
||||||
|
* Defines the structure of the response received after initiating a crawl.
|
||||||
*/
|
*/
|
||||||
export interface CrawlResponse {
|
export interface CrawlResponse {
|
||||||
|
id?: string;
|
||||||
|
url?: string;
|
||||||
success: boolean;
|
success: boolean;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for crawling operations on v0.
|
||||||
|
* Similar to CrawlResponse but tailored for responses from API version v0.
|
||||||
|
*/
|
||||||
|
export interface CrawlResponseV0 {
|
||||||
jobId?: string;
|
jobId?: string;
|
||||||
data?: FirecrawlDocument[];
|
success: boolean;
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Response interface for job status checks.
|
* Response interface for job status checks.
|
||||||
|
* Provides detailed status of a crawl job including progress and results.
|
||||||
*/
|
*/
|
||||||
export interface JobStatusResponse {
|
export interface CrawlStatusResponse {
|
||||||
|
success: boolean;
|
||||||
|
total: number;
|
||||||
|
completed: number;
|
||||||
|
creditsUsed: number;
|
||||||
|
expiresAt: Date;
|
||||||
|
status: "scraping" | "completed" | "failed";
|
||||||
|
next: string;
|
||||||
|
data?: FirecrawlDocument[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for job status checks on v0.
|
||||||
|
* Tailored for API version v0, provides status and partial data of a crawl job.
|
||||||
|
*/
|
||||||
|
export interface CrawlStatusResponseV0 {
|
||||||
success: boolean;
|
success: boolean;
|
||||||
status: string;
|
status: string;
|
||||||
current?: number;
|
current?: number;
|
||||||
current_url?: string;
|
current_url?: string;
|
||||||
current_step?: string;
|
current_step?: string;
|
||||||
total?: number;
|
total?: number;
|
||||||
jobId?: string;
|
data?: FirecrawlDocumentV0[];
|
||||||
data?: FirecrawlDocument[];
|
partial_data?: FirecrawlDocumentV0[];
|
||||||
partial_data?: FirecrawlDocument[];
|
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Generic parameter interface.
|
* Parameters for mapping operations.
|
||||||
|
* Defines options for mapping URLs during a crawl.
|
||||||
*/
|
*/
|
||||||
export interface Params {
|
export interface MapParams {
|
||||||
[key: string]: any;
|
includePaths?: string[];
|
||||||
extractorOptions?: {
|
excludePaths?: string[];
|
||||||
extractionSchema: z.ZodSchema | any;
|
maxDepth?: number;
|
||||||
mode?: "llm-extraction";
|
limit?: number;
|
||||||
extractionPrompt?: string;
|
allowBackwardLinks?: boolean;
|
||||||
|
allowExternalLinks?: boolean;
|
||||||
|
ignoreSitemap?: boolean;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Response interface for mapping operations.
|
||||||
|
* Defines the structure of the response received after a mapping operation.
|
||||||
|
*/
|
||||||
|
export interface MapResponse {
|
||||||
|
success: boolean;
|
||||||
|
links?: string[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Parameters for searching operations on v0.
|
||||||
|
* Tailored for API version v0, includes specific options for searching content.
|
||||||
|
*/
|
||||||
|
export interface SearchParamsV0 {
|
||||||
|
pageOptions?: {
|
||||||
|
onlyMainContent?: boolean;
|
||||||
|
fetchPageContent?: boolean;
|
||||||
|
includeHtml?: boolean;
|
||||||
|
includeRawHtml?: boolean;
|
||||||
|
};
|
||||||
|
searchOptions?: {
|
||||||
|
limit?: number;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Main class for interacting with the Firecrawl API.
|
* Response interface for searching operations on v0.
|
||||||
|
* Defines the structure of the response received after a search operation on v0.
|
||||||
*/
|
*/
|
||||||
export default class FirecrawlApp {
|
export interface SearchResponseV0 {
|
||||||
|
success: boolean;
|
||||||
|
data?: FirecrawlDocumentV0[];
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Main class for interacting with the Firecrawl API.
|
||||||
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||||
|
*/
|
||||||
|
export default class FirecrawlApp<T extends "v0" | "v1"> {
|
||||||
private apiKey;
|
private apiKey;
|
||||||
private apiUrl;
|
private apiUrl;
|
||||||
|
version: T;
|
||||||
/**
|
/**
|
||||||
* Initializes a new instance of the FirecrawlApp class.
|
* Initializes a new instance of the FirecrawlApp class.
|
||||||
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
* @param config - Configuration options for the FirecrawlApp instance.
|
||||||
*/
|
*/
|
||||||
constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
|
constructor({ apiKey, apiUrl, version }: FirecrawlAppConfig);
|
||||||
/**
|
/**
|
||||||
* Scrapes a URL using the Firecrawl API.
|
* Scrapes a URL using the Firecrawl API.
|
||||||
* @param {string} url - The URL to scrape.
|
* @param url - The URL to scrape.
|
||||||
* @param {Params | null} params - Additional parameters for the scrape request.
|
* @param params - Additional parameters for the scrape request.
|
||||||
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
* @returns The response from the scrape operation.
|
||||||
*/
|
*/
|
||||||
scrapeUrl(url: string, params?: Params | null): Promise<ScrapeResponse>;
|
scrapeUrl(url: string, params?: ScrapeParams | ScrapeParamsV0): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse>;
|
||||||
/**
|
/**
|
||||||
* Searches for a query using the Firecrawl API.
|
* Searches for a query using the Firecrawl API.
|
||||||
* @param {string} query - The query to search for.
|
* @param query - The query to search for.
|
||||||
* @param {Params | null} params - Additional parameters for the search request.
|
* @param params - Additional parameters for the search request.
|
||||||
* @returns {Promise<SearchResponse>} The response from the search operation.
|
* @returns The response from the search operation.
|
||||||
*/
|
*/
|
||||||
search(query: string, params?: Params | null): Promise<SearchResponse>;
|
search(query: string, params?: SearchParamsV0): Promise<SearchResponseV0>;
|
||||||
/**
|
/**
|
||||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||||
* @param {string} url - The URL to crawl.
|
* @param url - The URL to crawl.
|
||||||
* @param {Params | null} params - Additional parameters for the crawl request.
|
* @param params - Additional parameters for the crawl request.
|
||||||
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
||||||
* @param {number} pollInterval - Time in seconds for job status checks.
|
* @param pollInterval - Time in seconds for job status checks.
|
||||||
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
* @param idempotencyKey - Optional idempotency key for the request.
|
||||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
* @returns The response from the crawl operation.
|
||||||
*/
|
*/
|
||||||
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
|
crawlUrl(url: string, params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<this['version'] extends 'v0' ? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlResponse | CrawlStatusResponse>;
|
||||||
/**
|
/**
|
||||||
* Checks the status of a crawl job using the Firecrawl API.
|
* Checks the status of a crawl job using the Firecrawl API.
|
||||||
* @param {string} jobId - The job ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
* @returns The response containing the job status.
|
||||||
*/
|
*/
|
||||||
checkCrawlStatus(jobId: string): Promise<JobStatusResponse>;
|
checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse>;
|
||||||
|
mapUrl(url: string, params?: MapParams): Promise<MapResponse>;
|
||||||
/**
|
/**
|
||||||
* Prepares the headers for an API request.
|
* Prepares the headers for an API request.
|
||||||
* @returns {AxiosRequestHeaders} The prepared headers.
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
||||||
|
* @returns The prepared headers.
|
||||||
*/
|
*/
|
||||||
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
|
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
|
||||||
/**
|
/**
|
||||||
* Sends a POST request to the specified URL.
|
* Sends a POST request to the specified URL.
|
||||||
* @param {string} url - The URL to send the request to.
|
* @param url - The URL to send the request to.
|
||||||
* @param {Params} data - The data to send in the request.
|
* @param data - The data to send in the request.
|
||||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
* @param headers - The headers for the request.
|
||||||
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
* @returns The response from the POST request.
|
||||||
*/
|
*/
|
||||||
postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
postRequest(url: string, data: any, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
||||||
/**
|
/**
|
||||||
* Sends a GET request to the specified URL.
|
* Sends a GET request to the specified URL.
|
||||||
* @param {string} url - The URL to send the request to.
|
* @param url - The URL to send the request to.
|
||||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
* @param headers - The headers for the request.
|
||||||
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
* @returns The response from the GET request.
|
||||||
*/
|
*/
|
||||||
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
||||||
/**
|
/**
|
||||||
* Monitors the status of a crawl job until completion or failure.
|
* Monitors the status of a crawl job until completion or failure.
|
||||||
* @param {string} jobId - The job ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
* @param headers - The headers for the request.
|
||||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
* @param checkInterval - Interval in seconds for job status checks.
|
||||||
* @returns {Promise<any>} The final job status or data.
|
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
||||||
|
* @returns The final job status or data.
|
||||||
*/
|
*/
|
||||||
monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<any>;
|
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number, checkUrl?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse>;
|
||||||
/**
|
/**
|
||||||
* Handles errors from API responses.
|
* Handles errors from API responses.
|
||||||
* @param {AxiosResponse} response - The response from the API.
|
* @param {AxiosResponse} response - The response from the API.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user