mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 13:00:37 +08:00
Merge pull request #611 from MonsterDeveloper/fix-attw
build(js-sdk): simplify build process
This commit is contained in:
commit
c92065ea24
2
.gitignore
vendored
2
.gitignore
vendored
@ -21,3 +21,5 @@ apps/playwright-service-ts/package-lock.json
|
||||
|
||||
*.pyc
|
||||
.rdb
|
||||
|
||||
apps/js-sdk/firecrawl/dist
|
@ -1,347 +0,0 @@
|
||||
"use strict";
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.CrawlWatcher = void 0;
|
||||
const axios_1 = __importDefault(require("axios"));
|
||||
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
||||
const isows_1 = require("isows");
|
||||
const typescript_event_target_1 = require("typescript-event-target");
|
||||
/**
|
||||
* Main class for interacting with the Firecrawl API.
|
||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||
*/
|
||||
class FirecrawlApp {
|
||||
/**
|
||||
* Initializes a new instance of the FirecrawlApp class.
|
||||
* @param config - Configuration options for the FirecrawlApp instance.
|
||||
*/
|
||||
constructor({ apiKey = null, apiUrl = null }) {
|
||||
this.apiKey = apiKey || "";
|
||||
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
||||
}
|
||||
/**
|
||||
* Scrapes a URL using the Firecrawl API.
|
||||
* @param url - The URL to scrape.
|
||||
* @param params - Additional parameters for the scrape request.
|
||||
* @returns The response from the scrape operation.
|
||||
*/
|
||||
async scrapeUrl(url, params) {
|
||||
const headers = {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { url, ...params };
|
||||
if (jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
// Try parsing the schema as a Zod schema
|
||||
try {
|
||||
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
||||
}
|
||||
catch (error) {
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extract: {
|
||||
...jsonData.extract,
|
||||
schema: schema,
|
||||
},
|
||||
};
|
||||
}
|
||||
try {
|
||||
const response = await axios_1.default.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
return {
|
||||
success: true,
|
||||
warning: responseData.warning,
|
||||
error: responseData.error,
|
||||
...responseData.data
|
||||
};
|
||||
}
|
||||
else {
|
||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, "scrape URL");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
/**
|
||||
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
||||
* @param query - The search query string.
|
||||
* @param params - Additional parameters for the search.
|
||||
* @returns Throws an error advising to use version 0 of the API.
|
||||
*/
|
||||
async search(query, params) {
|
||||
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
||||
}
|
||||
/**
|
||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||
* @param url - The URL to crawl.
|
||||
* @param params - Additional parameters for the crawl request.
|
||||
* @param pollInterval - Time in seconds for job status checks.
|
||||
* @param idempotencyKey - Optional idempotency key for the request.
|
||||
* @returns The response from the crawl operation.
|
||||
*/
|
||||
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData = { url, ...params };
|
||||
try {
|
||||
const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
|
||||
if (response.status === 200) {
|
||||
const id = response.data.id;
|
||||
return this.monitorJobStatus(id, headers, pollInterval);
|
||||
}
|
||||
else {
|
||||
this.handleError(response, "start crawl job");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
if (error.response?.data?.error) {
|
||||
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
||||
}
|
||||
else {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
async asyncCrawlUrl(url, params, idempotencyKey) {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData = { url, ...params };
|
||||
try {
|
||||
const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
}
|
||||
else {
|
||||
this.handleError(response, "start crawl job");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
if (error.response?.data?.error) {
|
||||
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
||||
}
|
||||
else {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
/**
|
||||
* Checks the status of a crawl job using the Firecrawl API.
|
||||
* @param id - The ID of the crawl operation.
|
||||
* @returns The response containing the job status.
|
||||
*/
|
||||
async checkCrawlStatus(id) {
|
||||
if (!id) {
|
||||
throw new Error("No crawl ID provided");
|
||||
}
|
||||
const headers = this.prepareHeaders();
|
||||
try {
|
||||
const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
|
||||
if (response.status === 200) {
|
||||
return ({
|
||||
success: true,
|
||||
status: response.data.status,
|
||||
total: response.data.total,
|
||||
completed: response.data.completed,
|
||||
creditsUsed: response.data.creditsUsed,
|
||||
expiresAt: new Date(response.data.expiresAt),
|
||||
next: response.data.next,
|
||||
data: response.data.data,
|
||||
error: response.data.error
|
||||
});
|
||||
}
|
||||
else {
|
||||
this.handleError(response, "check crawl status");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
async crawlUrlAndWatch(url, params, idempotencyKey) {
|
||||
const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
|
||||
if (crawl.success && crawl.id) {
|
||||
const id = crawl.id;
|
||||
return new CrawlWatcher(id, this);
|
||||
}
|
||||
throw new Error("Crawl job failed to start");
|
||||
}
|
||||
async mapUrl(url, params) {
|
||||
const headers = this.prepareHeaders();
|
||||
let jsonData = { url, ...params };
|
||||
try {
|
||||
const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
}
|
||||
else {
|
||||
this.handleError(response, "map");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
/**
|
||||
* Prepares the headers for an API request.
|
||||
* @param idempotencyKey - Optional key to ensure idempotency.
|
||||
* @returns The prepared headers.
|
||||
*/
|
||||
prepareHeaders(idempotencyKey) {
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Sends a POST request to the specified URL.
|
||||
* @param url - The URL to send the request to.
|
||||
* @param data - The data to send in the request.
|
||||
* @param headers - The headers for the request.
|
||||
* @returns The response from the POST request.
|
||||
*/
|
||||
postRequest(url, data, headers) {
|
||||
return axios_1.default.post(url, data, { headers });
|
||||
}
|
||||
/**
|
||||
* Sends a GET request to the specified URL.
|
||||
* @param url - The URL to send the request to.
|
||||
* @param headers - The headers for the request.
|
||||
* @returns The response from the GET request.
|
||||
*/
|
||||
getRequest(url, headers) {
|
||||
return axios_1.default.get(url, { headers });
|
||||
}
|
||||
/**
|
||||
* Monitors the status of a crawl job until completion or failure.
|
||||
* @param id - The ID of the crawl operation.
|
||||
* @param headers - The headers for the request.
|
||||
* @param checkInterval - Interval in seconds for job status checks.
|
||||
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
||||
* @returns The final job status or data.
|
||||
*/
|
||||
async monitorJobStatus(id, headers, checkInterval) {
|
||||
while (true) {
|
||||
const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
|
||||
if (statusResponse.status === 200) {
|
||||
const statusData = statusResponse.data;
|
||||
if (statusData.status === "completed") {
|
||||
if ("data" in statusData) {
|
||||
return statusData;
|
||||
}
|
||||
else {
|
||||
throw new Error("Crawl job completed but no data was returned");
|
||||
}
|
||||
}
|
||||
else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) {
|
||||
checkInterval = Math.max(checkInterval, 2);
|
||||
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000));
|
||||
}
|
||||
else {
|
||||
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(statusResponse, "check crawl status");
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Handles errors from API responses.
|
||||
* @param {AxiosResponse} response - The response from the API.
|
||||
* @param {string} action - The action being performed when the error occurred.
|
||||
*/
|
||||
handleError(response, action) {
|
||||
if ([402, 408, 409, 500].includes(response.status)) {
|
||||
const errorMessage = response.data.error || "Unknown error occurred";
|
||||
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
||||
}
|
||||
else {
|
||||
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
exports.default = FirecrawlApp;
|
||||
class CrawlWatcher extends typescript_event_target_1.TypedEventTarget {
|
||||
constructor(id, app) {
|
||||
super();
|
||||
this.ws = new isows_1.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
||||
this.status = "scraping";
|
||||
this.data = [];
|
||||
const messageHandler = (msg) => {
|
||||
if (msg.type === "done") {
|
||||
this.status = "completed";
|
||||
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
||||
detail: {
|
||||
status: this.status,
|
||||
data: this.data,
|
||||
},
|
||||
}));
|
||||
}
|
||||
else if (msg.type === "error") {
|
||||
this.status = "failed";
|
||||
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||
detail: {
|
||||
status: this.status,
|
||||
data: this.data,
|
||||
error: msg.error,
|
||||
},
|
||||
}));
|
||||
}
|
||||
else if (msg.type === "catchup") {
|
||||
this.status = msg.data.status;
|
||||
this.data.push(...(msg.data.data ?? []));
|
||||
for (const doc of this.data) {
|
||||
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||
detail: doc,
|
||||
}));
|
||||
}
|
||||
}
|
||||
else if (msg.type === "document") {
|
||||
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||
detail: msg.data,
|
||||
}));
|
||||
}
|
||||
};
|
||||
this.ws.onmessage = ((ev) => {
|
||||
if (typeof ev.data !== "string") {
|
||||
this.ws.close();
|
||||
return;
|
||||
}
|
||||
const msg = JSON.parse(ev.data);
|
||||
messageHandler(msg);
|
||||
}).bind(this);
|
||||
this.ws.onclose = ((ev) => {
|
||||
const msg = JSON.parse(ev.reason);
|
||||
messageHandler(msg);
|
||||
}).bind(this);
|
||||
this.ws.onerror = ((_) => {
|
||||
this.status = "failed";
|
||||
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||
detail: {
|
||||
status: this.status,
|
||||
data: this.data,
|
||||
error: "WebSocket error",
|
||||
},
|
||||
}));
|
||||
}).bind(this);
|
||||
}
|
||||
close() {
|
||||
this.ws.close();
|
||||
}
|
||||
}
|
||||
exports.CrawlWatcher = CrawlWatcher;
|
@ -1 +0,0 @@
|
||||
{"type": "commonjs"}
|
@ -1,339 +0,0 @@
|
||||
import axios from "axios";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
import { WebSocket } from "isows";
|
||||
import { TypedEventTarget } from "typescript-event-target";
|
||||
/**
|
||||
* Main class for interacting with the Firecrawl API.
|
||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||
*/
|
||||
export default class FirecrawlApp {
|
||||
/**
|
||||
* Initializes a new instance of the FirecrawlApp class.
|
||||
* @param config - Configuration options for the FirecrawlApp instance.
|
||||
*/
|
||||
constructor({ apiKey = null, apiUrl = null }) {
|
||||
this.apiKey = apiKey || "";
|
||||
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
||||
}
|
||||
/**
|
||||
* Scrapes a URL using the Firecrawl API.
|
||||
* @param url - The URL to scrape.
|
||||
* @param params - Additional parameters for the scrape request.
|
||||
* @returns The response from the scrape operation.
|
||||
*/
|
||||
async scrapeUrl(url, params) {
|
||||
const headers = {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { url, ...params };
|
||||
if (jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
// Try parsing the schema as a Zod schema
|
||||
try {
|
||||
schema = zodToJsonSchema(schema);
|
||||
}
|
||||
catch (error) {
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extract: {
|
||||
...jsonData.extract,
|
||||
schema: schema,
|
||||
},
|
||||
};
|
||||
}
|
||||
try {
|
||||
const response = await axios.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
return {
|
||||
success: true,
|
||||
warning: responseData.warning,
|
||||
error: responseData.error,
|
||||
...responseData.data
|
||||
};
|
||||
}
|
||||
else {
|
||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, "scrape URL");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
/**
|
||||
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
||||
* @param query - The search query string.
|
||||
* @param params - Additional parameters for the search.
|
||||
* @returns Throws an error advising to use version 0 of the API.
|
||||
*/
|
||||
async search(query, params) {
|
||||
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
||||
}
|
||||
/**
|
||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||
* @param url - The URL to crawl.
|
||||
* @param params - Additional parameters for the crawl request.
|
||||
* @param pollInterval - Time in seconds for job status checks.
|
||||
* @param idempotencyKey - Optional idempotency key for the request.
|
||||
* @returns The response from the crawl operation.
|
||||
*/
|
||||
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData = { url, ...params };
|
||||
try {
|
||||
const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
|
||||
if (response.status === 200) {
|
||||
const id = response.data.id;
|
||||
return this.monitorJobStatus(id, headers, pollInterval);
|
||||
}
|
||||
else {
|
||||
this.handleError(response, "start crawl job");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
if (error.response?.data?.error) {
|
||||
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
||||
}
|
||||
else {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
async asyncCrawlUrl(url, params, idempotencyKey) {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData = { url, ...params };
|
||||
try {
|
||||
const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
}
|
||||
else {
|
||||
this.handleError(response, "start crawl job");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
if (error.response?.data?.error) {
|
||||
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
||||
}
|
||||
else {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
/**
|
||||
* Checks the status of a crawl job using the Firecrawl API.
|
||||
* @param id - The ID of the crawl operation.
|
||||
* @returns The response containing the job status.
|
||||
*/
|
||||
async checkCrawlStatus(id) {
|
||||
if (!id) {
|
||||
throw new Error("No crawl ID provided");
|
||||
}
|
||||
const headers = this.prepareHeaders();
|
||||
try {
|
||||
const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
|
||||
if (response.status === 200) {
|
||||
return ({
|
||||
success: true,
|
||||
status: response.data.status,
|
||||
total: response.data.total,
|
||||
completed: response.data.completed,
|
||||
creditsUsed: response.data.creditsUsed,
|
||||
expiresAt: new Date(response.data.expiresAt),
|
||||
next: response.data.next,
|
||||
data: response.data.data,
|
||||
error: response.data.error
|
||||
});
|
||||
}
|
||||
else {
|
||||
this.handleError(response, "check crawl status");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
async crawlUrlAndWatch(url, params, idempotencyKey) {
|
||||
const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
|
||||
if (crawl.success && crawl.id) {
|
||||
const id = crawl.id;
|
||||
return new CrawlWatcher(id, this);
|
||||
}
|
||||
throw new Error("Crawl job failed to start");
|
||||
}
|
||||
async mapUrl(url, params) {
|
||||
const headers = this.prepareHeaders();
|
||||
let jsonData = { url, ...params };
|
||||
try {
|
||||
const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
}
|
||||
else {
|
||||
this.handleError(response, "map");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
/**
|
||||
* Prepares the headers for an API request.
|
||||
* @param idempotencyKey - Optional key to ensure idempotency.
|
||||
* @returns The prepared headers.
|
||||
*/
|
||||
prepareHeaders(idempotencyKey) {
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Sends a POST request to the specified URL.
|
||||
* @param url - The URL to send the request to.
|
||||
* @param data - The data to send in the request.
|
||||
* @param headers - The headers for the request.
|
||||
* @returns The response from the POST request.
|
||||
*/
|
||||
postRequest(url, data, headers) {
|
||||
return axios.post(url, data, { headers });
|
||||
}
|
||||
/**
|
||||
* Sends a GET request to the specified URL.
|
||||
* @param url - The URL to send the request to.
|
||||
* @param headers - The headers for the request.
|
||||
* @returns The response from the GET request.
|
||||
*/
|
||||
getRequest(url, headers) {
|
||||
return axios.get(url, { headers });
|
||||
}
|
||||
/**
|
||||
* Monitors the status of a crawl job until completion or failure.
|
||||
* @param id - The ID of the crawl operation.
|
||||
* @param headers - The headers for the request.
|
||||
* @param checkInterval - Interval in seconds for job status checks.
|
||||
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
||||
* @returns The final job status or data.
|
||||
*/
|
||||
async monitorJobStatus(id, headers, checkInterval) {
|
||||
while (true) {
|
||||
const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
|
||||
if (statusResponse.status === 200) {
|
||||
const statusData = statusResponse.data;
|
||||
if (statusData.status === "completed") {
|
||||
if ("data" in statusData) {
|
||||
return statusData;
|
||||
}
|
||||
else {
|
||||
throw new Error("Crawl job completed but no data was returned");
|
||||
}
|
||||
}
|
||||
else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) {
|
||||
checkInterval = Math.max(checkInterval, 2);
|
||||
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000));
|
||||
}
|
||||
else {
|
||||
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(statusResponse, "check crawl status");
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Handles errors from API responses.
|
||||
* @param {AxiosResponse} response - The response from the API.
|
||||
* @param {string} action - The action being performed when the error occurred.
|
||||
*/
|
||||
handleError(response, action) {
|
||||
if ([402, 408, 409, 500].includes(response.status)) {
|
||||
const errorMessage = response.data.error || "Unknown error occurred";
|
||||
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
||||
}
|
||||
else {
|
||||
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
export class CrawlWatcher extends TypedEventTarget {
|
||||
constructor(id, app) {
|
||||
super();
|
||||
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
||||
this.status = "scraping";
|
||||
this.data = [];
|
||||
const messageHandler = (msg) => {
|
||||
if (msg.type === "done") {
|
||||
this.status = "completed";
|
||||
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
||||
detail: {
|
||||
status: this.status,
|
||||
data: this.data,
|
||||
},
|
||||
}));
|
||||
}
|
||||
else if (msg.type === "error") {
|
||||
this.status = "failed";
|
||||
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||
detail: {
|
||||
status: this.status,
|
||||
data: this.data,
|
||||
error: msg.error,
|
||||
},
|
||||
}));
|
||||
}
|
||||
else if (msg.type === "catchup") {
|
||||
this.status = msg.data.status;
|
||||
this.data.push(...(msg.data.data ?? []));
|
||||
for (const doc of this.data) {
|
||||
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||
detail: doc,
|
||||
}));
|
||||
}
|
||||
}
|
||||
else if (msg.type === "document") {
|
||||
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||
detail: msg.data,
|
||||
}));
|
||||
}
|
||||
};
|
||||
this.ws.onmessage = ((ev) => {
|
||||
if (typeof ev.data !== "string") {
|
||||
this.ws.close();
|
||||
return;
|
||||
}
|
||||
const msg = JSON.parse(ev.data);
|
||||
messageHandler(msg);
|
||||
}).bind(this);
|
||||
this.ws.onclose = ((ev) => {
|
||||
const msg = JSON.parse(ev.reason);
|
||||
messageHandler(msg);
|
||||
}).bind(this);
|
||||
this.ws.onerror = ((_) => {
|
||||
this.status = "failed";
|
||||
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||
detail: {
|
||||
status: this.status,
|
||||
data: this.data,
|
||||
error: "WebSocket error",
|
||||
},
|
||||
}));
|
||||
}).bind(this);
|
||||
}
|
||||
close() {
|
||||
this.ws.close();
|
||||
}
|
||||
}
|
@ -1 +0,0 @@
|
||||
{"type": "module"}
|
1545
apps/js-sdk/firecrawl/package-lock.json
generated
1545
apps/js-sdk/firecrawl/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -2,21 +2,18 @@
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.2.3",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/cjs/index.js",
|
||||
"types": "types/index.d.ts",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"exports": {
|
||||
"require": {
|
||||
"types": "./types/index.d.ts",
|
||||
"default": "./build/cjs/index.js"
|
||||
},
|
||||
"import": {
|
||||
"types": "./types/index.d.ts",
|
||||
"default": "./build/esm/index.js"
|
||||
"./package.json": "./package.json",
|
||||
".": {
|
||||
"import": "./dist/index.js",
|
||||
"default": "./dist/index.cjs"
|
||||
}
|
||||
},
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json",
|
||||
"build": "tsup",
|
||||
"build-and-publish": "npm run build && npm publish --access public",
|
||||
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
||||
"test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
||||
@ -50,6 +47,7 @@
|
||||
"@types/uuid": "^9.0.8",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.2.2",
|
||||
"tsup": "^8.2.4",
|
||||
"typescript": "^5.4.5"
|
||||
},
|
||||
"keywords": [
|
||||
|
@ -1,4 +1,4 @@
|
||||
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||
import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios";
|
||||
import type { ZodSchema } from "zod";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
import { WebSocket } from "isows";
|
||||
|
@ -1,110 +1,24 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
/* Visit https://aka.ms/tsconfig to read more about this file */
|
||||
// See https://www.totaltypescript.com/tsconfig-cheat-sheet
|
||||
/* Base Options: */
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"target": "es2022",
|
||||
"allowJs": true,
|
||||
"resolveJsonModule": true,
|
||||
"moduleDetection": "force",
|
||||
"isolatedModules": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
|
||||
/* Projects */
|
||||
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
|
||||
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
|
||||
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
|
||||
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
|
||||
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
|
||||
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
|
||||
/* Strictness */
|
||||
"strict": true,
|
||||
"noUncheckedIndexedAccess": true,
|
||||
"noImplicitOverride": true,
|
||||
|
||||
/* Language and Environment */
|
||||
"target": "es2020", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
|
||||
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
|
||||
// "jsx": "preserve", /* Specify what JSX code is generated. */
|
||||
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
|
||||
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
|
||||
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
|
||||
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
|
||||
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
|
||||
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
|
||||
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
|
||||
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
|
||||
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
|
||||
|
||||
/* Modules */
|
||||
"module": "commonjs", /* Specify what module code is generated. */
|
||||
"rootDir": "./src", /* Specify the root folder within your source files. */
|
||||
"moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */
|
||||
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
|
||||
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
|
||||
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
|
||||
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
|
||||
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
|
||||
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
|
||||
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
|
||||
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
|
||||
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
|
||||
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
|
||||
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
|
||||
// "resolveJsonModule": true, /* Enable importing .json files. */
|
||||
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
|
||||
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
|
||||
|
||||
/* JavaScript Support */
|
||||
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
|
||||
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
|
||||
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
|
||||
|
||||
/* Emit */
|
||||
"declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
|
||||
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
|
||||
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
|
||||
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
|
||||
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
|
||||
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
|
||||
"outDir": "./build", /* Specify an output folder for all emitted files. */
|
||||
// "removeComments": true, /* Disable emitting comments. */
|
||||
// "noEmit": true, /* Disable emitting files from a compilation. */
|
||||
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
|
||||
// "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */
|
||||
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
|
||||
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
|
||||
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
|
||||
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
|
||||
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
|
||||
// "newLine": "crlf", /* Set the newline character for emitting files. */
|
||||
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
|
||||
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
|
||||
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
|
||||
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
|
||||
"declarationDir": "./types", /* Specify the output directory for generated declaration files. */
|
||||
// "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
|
||||
|
||||
/* Interop Constraints */
|
||||
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
|
||||
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
|
||||
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
|
||||
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
|
||||
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
|
||||
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
|
||||
|
||||
/* Type Checking */
|
||||
"strict": true, /* Enable all strict type-checking options. */
|
||||
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
|
||||
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
|
||||
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
|
||||
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
|
||||
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
|
||||
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
|
||||
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
|
||||
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
|
||||
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
|
||||
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
|
||||
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
|
||||
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
|
||||
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
|
||||
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
|
||||
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
|
||||
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
|
||||
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
|
||||
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
|
||||
|
||||
/* Completeness */
|
||||
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
|
||||
"skipLibCheck": true /* Skip type checking all .d.ts files. */
|
||||
/* If NOT transpiling with TypeScript: */
|
||||
"module": "NodeNext",
|
||||
"noEmit": true,
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "**/__tests__/*"]
|
||||
|
9
apps/js-sdk/firecrawl/tsup.config.ts
Normal file
9
apps/js-sdk/firecrawl/tsup.config.ts
Normal file
@ -0,0 +1,9 @@
|
||||
import { defineConfig } from "tsup";
|
||||
|
||||
export default defineConfig({
|
||||
entryPoints: ["src/index.ts"],
|
||||
format: ["cjs", "esm"],
|
||||
dts: true,
|
||||
outDir: "dist",
|
||||
clean: true,
|
||||
});
|
260
apps/js-sdk/firecrawl/types/index.d.ts
vendored
260
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -1,260 +0,0 @@
|
||||
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||
import { z } from "zod";
|
||||
import { TypedEventTarget } from "typescript-event-target";
|
||||
/**
|
||||
* Configuration interface for FirecrawlApp.
|
||||
* @param apiKey - Optional API key for authentication.
|
||||
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
||||
*/
|
||||
export interface FirecrawlAppConfig {
|
||||
apiKey?: string | null;
|
||||
apiUrl?: string | null;
|
||||
}
|
||||
/**
|
||||
* Metadata for a Firecrawl document.
|
||||
* Includes various optional properties for document metadata.
|
||||
*/
|
||||
export interface FirecrawlDocumentMetadata {
|
||||
title?: string;
|
||||
description?: string;
|
||||
language?: string;
|
||||
keywords?: string;
|
||||
robots?: string;
|
||||
ogTitle?: string;
|
||||
ogDescription?: string;
|
||||
ogUrl?: string;
|
||||
ogImage?: string;
|
||||
ogAudio?: string;
|
||||
ogDeterminer?: string;
|
||||
ogLocale?: string;
|
||||
ogLocaleAlternate?: string[];
|
||||
ogSiteName?: string;
|
||||
ogVideo?: string;
|
||||
dctermsCreated?: string;
|
||||
dcDateCreated?: string;
|
||||
dcDate?: string;
|
||||
dctermsType?: string;
|
||||
dcType?: string;
|
||||
dctermsAudience?: string;
|
||||
dctermsSubject?: string;
|
||||
dcSubject?: string;
|
||||
dcDescription?: string;
|
||||
dctermsKeywords?: string;
|
||||
modifiedTime?: string;
|
||||
publishedTime?: string;
|
||||
articleTag?: string;
|
||||
articleSection?: string;
|
||||
sourceURL?: string;
|
||||
statusCode?: number;
|
||||
error?: string;
|
||||
[key: string]: any;
|
||||
}
|
||||
/**
|
||||
* Document interface for Firecrawl.
|
||||
* Represents a document retrieved or processed by Firecrawl.
|
||||
*/
|
||||
export interface FirecrawlDocument {
|
||||
url?: string;
|
||||
markdown?: string;
|
||||
html?: string;
|
||||
rawHtml?: string;
|
||||
links?: string[];
|
||||
extract?: Record<any, any>;
|
||||
screenshot?: string;
|
||||
metadata?: FirecrawlDocumentMetadata;
|
||||
}
|
||||
/**
|
||||
* Parameters for scraping operations.
|
||||
* Defines the options and configurations available for scraping web content.
|
||||
*/
|
||||
export interface ScrapeParams {
|
||||
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
||||
headers?: Record<string, string>;
|
||||
includeTags?: string[];
|
||||
excludeTags?: string[];
|
||||
onlyMainContent?: boolean;
|
||||
extract?: {
|
||||
prompt?: string;
|
||||
schema?: z.ZodSchema | any;
|
||||
systemPrompt?: string;
|
||||
};
|
||||
waitFor?: number;
|
||||
timeout?: number;
|
||||
}
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
* Defines the structure of the response received after a scraping operation.
|
||||
*/
|
||||
export interface ScrapeResponse extends FirecrawlDocument {
|
||||
success: true;
|
||||
warning?: string;
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Parameters for crawling operations.
|
||||
* Includes options for both scraping and mapping during a crawl.
|
||||
*/
|
||||
export interface CrawlParams {
|
||||
includePaths?: string[];
|
||||
excludePaths?: string[];
|
||||
maxDepth?: number;
|
||||
limit?: number;
|
||||
allowBackwardLinks?: boolean;
|
||||
allowExternalLinks?: boolean;
|
||||
ignoreSitemap?: boolean;
|
||||
scrapeOptions?: ScrapeParams;
|
||||
webhook?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for crawling operations.
|
||||
* Defines the structure of the response received after initiating a crawl.
|
||||
*/
|
||||
export interface CrawlResponse {
|
||||
id?: string;
|
||||
url?: string;
|
||||
success: true;
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Response interface for job status checks.
|
||||
* Provides detailed status of a crawl job including progress and results.
|
||||
*/
|
||||
export interface CrawlStatusResponse {
|
||||
success: true;
|
||||
total: number;
|
||||
completed: number;
|
||||
creditsUsed: number;
|
||||
expiresAt: Date;
|
||||
status: "scraping" | "completed" | "failed";
|
||||
next: string;
|
||||
data?: FirecrawlDocument[];
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Parameters for mapping operations.
|
||||
* Defines options for mapping URLs during a crawl.
|
||||
*/
|
||||
export interface MapParams {
|
||||
search?: string;
|
||||
ignoreSitemap?: boolean;
|
||||
includeSubdomains?: boolean;
|
||||
limit?: number;
|
||||
}
|
||||
/**
|
||||
* Response interface for mapping operations.
|
||||
* Defines the structure of the response received after a mapping operation.
|
||||
*/
|
||||
export interface MapResponse {
|
||||
success: true;
|
||||
links?: string[];
|
||||
error?: string;
|
||||
}
|
||||
/**
|
||||
* Error response interface.
|
||||
* Defines the structure of the response received when an error occurs.
|
||||
*/
|
||||
export interface ErrorResponse {
|
||||
success: false;
|
||||
error: string;
|
||||
}
|
||||
/**
|
||||
* Main class for interacting with the Firecrawl API.
|
||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||
*/
|
||||
export default class FirecrawlApp {
|
||||
apiKey: string;
|
||||
apiUrl: string;
|
||||
/**
|
||||
* Initializes a new instance of the FirecrawlApp class.
|
||||
* @param config - Configuration options for the FirecrawlApp instance.
|
||||
*/
|
||||
constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
|
||||
/**
|
||||
* Scrapes a URL using the Firecrawl API.
|
||||
* @param url - The URL to scrape.
|
||||
* @param params - Additional parameters for the scrape request.
|
||||
* @returns The response from the scrape operation.
|
||||
*/
|
||||
scrapeUrl(url: string, params?: ScrapeParams): Promise<ScrapeResponse | ErrorResponse>;
|
||||
/**
|
||||
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
||||
* @param query - The search query string.
|
||||
* @param params - Additional parameters for the search.
|
||||
* @returns Throws an error advising to use version 0 of the API.
|
||||
*/
|
||||
search(query: string, params?: any): Promise<any>;
|
||||
/**
|
||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||
* @param url - The URL to crawl.
|
||||
* @param params - Additional parameters for the crawl request.
|
||||
* @param pollInterval - Time in seconds for job status checks.
|
||||
* @param idempotencyKey - Optional idempotency key for the request.
|
||||
* @returns The response from the crawl operation.
|
||||
*/
|
||||
crawlUrl(url: string, params?: CrawlParams, pollInterval?: number, idempotencyKey?: string): Promise<CrawlStatusResponse | ErrorResponse>;
|
||||
asyncCrawlUrl(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlResponse | ErrorResponse>;
|
||||
/**
|
||||
* Checks the status of a crawl job using the Firecrawl API.
|
||||
* @param id - The ID of the crawl operation.
|
||||
* @returns The response containing the job status.
|
||||
*/
|
||||
checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse>;
|
||||
crawlUrlAndWatch(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlWatcher>;
|
||||
mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse>;
|
||||
/**
|
||||
* Prepares the headers for an API request.
|
||||
* @param idempotencyKey - Optional key to ensure idempotency.
|
||||
* @returns The prepared headers.
|
||||
*/
|
||||
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
|
||||
/**
|
||||
* Sends a POST request to the specified URL.
|
||||
* @param url - The URL to send the request to.
|
||||
* @param data - The data to send in the request.
|
||||
* @param headers - The headers for the request.
|
||||
* @returns The response from the POST request.
|
||||
*/
|
||||
postRequest(url: string, data: any, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
||||
/**
|
||||
* Sends a GET request to the specified URL.
|
||||
* @param url - The URL to send the request to.
|
||||
* @param headers - The headers for the request.
|
||||
* @returns The response from the GET request.
|
||||
*/
|
||||
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse>;
|
||||
/**
|
||||
* Monitors the status of a crawl job until completion or failure.
|
||||
* @param id - The ID of the crawl operation.
|
||||
* @param headers - The headers for the request.
|
||||
* @param checkInterval - Interval in seconds for job status checks.
|
||||
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
||||
* @returns The final job status or data.
|
||||
*/
|
||||
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse>;
|
||||
/**
|
||||
* Handles errors from API responses.
|
||||
* @param {AxiosResponse} response - The response from the API.
|
||||
* @param {string} action - The action being performed when the error occurred.
|
||||
*/
|
||||
handleError(response: AxiosResponse, action: string): void;
|
||||
}
|
||||
interface CrawlWatcherEvents {
|
||||
document: CustomEvent<FirecrawlDocument>;
|
||||
done: CustomEvent<{
|
||||
status: CrawlStatusResponse["status"];
|
||||
data: FirecrawlDocument[];
|
||||
}>;
|
||||
error: CustomEvent<{
|
||||
status: CrawlStatusResponse["status"];
|
||||
data: FirecrawlDocument[];
|
||||
error: string;
|
||||
}>;
|
||||
}
|
||||
export declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
||||
private ws;
|
||||
data: FirecrawlDocument[];
|
||||
status: CrawlStatusResponse["status"];
|
||||
constructor(id: string, app: FirecrawlApp);
|
||||
close(): void;
|
||||
}
|
||||
export {};
|
Loading…
x
Reference in New Issue
Block a user