mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 02:19:02 +08:00
Nick:
This commit is contained in:
parent
71dab56e36
commit
282962e36f
@ -3,9 +3,12 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|||||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||||
};
|
};
|
||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
|
exports.CrawlWatcher = void 0;
|
||||||
const axios_1 = __importDefault(require("axios"));
|
const axios_1 = __importDefault(require("axios"));
|
||||||
const zod_1 = require("zod");
|
const zod_1 = require("zod");
|
||||||
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
||||||
|
const isows_1 = require("isows");
|
||||||
|
const typescript_event_target_1 = require("typescript-event-target");
|
||||||
/**
|
/**
|
||||||
* Main class for interacting with the Firecrawl API.
|
* Main class for interacting with the Firecrawl API.
|
||||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||||
@ -15,13 +18,9 @@ class FirecrawlApp {
|
|||||||
* Initializes a new instance of the FirecrawlApp class.
|
* Initializes a new instance of the FirecrawlApp class.
|
||||||
* @param config - Configuration options for the FirecrawlApp instance.
|
* @param config - Configuration options for the FirecrawlApp instance.
|
||||||
*/
|
*/
|
||||||
constructor({ apiKey = null, apiUrl = null, version = "v1" }) {
|
constructor({ apiKey = null, apiUrl = null }) {
|
||||||
this.apiKey = apiKey || "";
|
this.apiKey = apiKey || "";
|
||||||
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
||||||
this.version = version;
|
|
||||||
if (!this.apiKey) {
|
|
||||||
throw new Error("No API key provided");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Scrapes a URL using the Firecrawl API.
|
* Scrapes a URL using the Firecrawl API.
|
||||||
@ -51,16 +50,16 @@ class FirecrawlApp {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
const response = await axios_1.default.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const responseData = response.data;
|
const responseData = response.data;
|
||||||
if (responseData.success) {
|
if (responseData.success) {
|
||||||
return (this.version === 'v0' ? responseData : {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
warning: responseData.warning,
|
warning: responseData.warning,
|
||||||
error: responseData.error,
|
error: responseData.error,
|
||||||
...responseData.data
|
...responseData.data
|
||||||
});
|
};
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||||
@ -76,80 +75,52 @@ class FirecrawlApp {
|
|||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." };
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Searches for a query using the Firecrawl API.
|
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
||||||
* @param query - The query to search for.
|
* @param query - The search query string.
|
||||||
* @param params - Additional parameters for the search request.
|
* @param params - Additional parameters for the search.
|
||||||
* @returns The response from the search operation.
|
* @returns Throws an error advising to use version 0 of the API.
|
||||||
*/
|
*/
|
||||||
async search(query, params) {
|
async search(query, params) {
|
||||||
if (this.version === "v1") {
|
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
||||||
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
||||||
}
|
|
||||||
const headers = {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
|
||||||
};
|
|
||||||
let jsonData = { query };
|
|
||||||
if (params) {
|
|
||||||
jsonData = { ...jsonData, ...params };
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
const response = await axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
|
||||||
if (response.status === 200) {
|
|
||||||
const responseData = response.data;
|
|
||||||
if (responseData.success) {
|
|
||||||
return responseData;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
this.handleError(response, "search");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (error) {
|
|
||||||
throw new Error(error.message);
|
|
||||||
}
|
|
||||||
return { success: false, error: "Internal server error." };
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||||
* @param url - The URL to crawl.
|
* @param url - The URL to crawl.
|
||||||
* @param params - Additional parameters for the crawl request.
|
* @param params - Additional parameters for the crawl request.
|
||||||
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
||||||
* @param pollInterval - Time in seconds for job status checks.
|
* @param pollInterval - Time in seconds for job status checks.
|
||||||
* @param idempotencyKey - Optional idempotency key for the request.
|
* @param idempotencyKey - Optional idempotency key for the request.
|
||||||
* @returns The response from the crawl operation.
|
* @returns The response from the crawl operation.
|
||||||
*/
|
*/
|
||||||
async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
||||||
const headers = this.prepareHeaders(idempotencyKey);
|
const headers = this.prepareHeaders(idempotencyKey);
|
||||||
let jsonData = { url, ...params };
|
let jsonData = { url, ...params };
|
||||||
try {
|
try {
|
||||||
const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers);
|
const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const id = this.version === 'v0' ? response.data.jobId : response.data.id;
|
const id = response.data.id;
|
||||||
let checkUrl = undefined;
|
return this.monitorJobStatus(id, headers, pollInterval);
|
||||||
if (waitUntilDone) {
|
}
|
||||||
if (this.version === 'v1') {
|
else {
|
||||||
checkUrl = response.data.url;
|
this.handleError(response, "start crawl job");
|
||||||
}
|
}
|
||||||
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
}
|
||||||
}
|
catch (error) {
|
||||||
else {
|
if (error.response?.data?.error) {
|
||||||
if (this.version === 'v0') {
|
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
||||||
return {
|
}
|
||||||
success: true,
|
else {
|
||||||
jobId: id
|
throw new Error(error.message);
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
else {
|
return { success: false, error: "Internal server error." };
|
||||||
return {
|
}
|
||||||
success: true,
|
async asyncCrawlUrl(url, params, idempotencyKey) {
|
||||||
id: id
|
const headers = this.prepareHeaders(idempotencyKey);
|
||||||
};
|
let jsonData = { url, ...params };
|
||||||
}
|
try {
|
||||||
}
|
const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
|
||||||
|
if (response.status === 200) {
|
||||||
|
return response.data;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this.handleError(response, "start crawl job");
|
this.handleError(response, "start crawl job");
|
||||||
@ -176,37 +147,19 @@ class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
const headers = this.prepareHeaders();
|
const headers = this.prepareHeaders();
|
||||||
try {
|
try {
|
||||||
const response = await this.getRequest(this.version === 'v1' ?
|
const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
|
||||||
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
|
||||||
`${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
|
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
if (this.version === 'v0') {
|
return ({
|
||||||
return {
|
success: true,
|
||||||
success: true,
|
status: response.data.status,
|
||||||
status: response.data.status,
|
total: response.data.total,
|
||||||
current: response.data.current,
|
completed: response.data.completed,
|
||||||
current_url: response.data.current_url,
|
creditsUsed: response.data.creditsUsed,
|
||||||
current_step: response.data.current_step,
|
expiresAt: new Date(response.data.expiresAt),
|
||||||
total: response.data.total,
|
next: response.data.next,
|
||||||
data: response.data.data,
|
data: response.data.data,
|
||||||
partial_data: !response.data.data
|
error: response.data.error
|
||||||
? response.data.partial_data
|
});
|
||||||
: undefined,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return {
|
|
||||||
success: true,
|
|
||||||
status: response.data.status,
|
|
||||||
total: response.data.total,
|
|
||||||
completed: response.data.completed,
|
|
||||||
creditsUsed: response.data.creditsUsed,
|
|
||||||
expiresAt: new Date(response.data.expiresAt),
|
|
||||||
next: response.data.next,
|
|
||||||
data: response.data.data,
|
|
||||||
error: response.data.error
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this.handleError(response, "check crawl status");
|
this.handleError(response, "check crawl status");
|
||||||
@ -215,29 +168,21 @@ class FirecrawlApp {
|
|||||||
catch (error) {
|
catch (error) {
|
||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
return this.version === 'v0' ?
|
return { success: false, error: "Internal server error." };
|
||||||
{
|
}
|
||||||
success: false,
|
async crawlUrlAndWatch(url, params, idempotencyKey) {
|
||||||
status: "unknown",
|
const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
|
||||||
current: 0,
|
if (crawl.success && crawl.id) {
|
||||||
current_url: "",
|
const id = crawl.id;
|
||||||
current_step: "",
|
return new CrawlWatcher(id, this);
|
||||||
total: 0,
|
}
|
||||||
error: "Internal server error.",
|
throw new Error("Crawl job failed to start");
|
||||||
} :
|
|
||||||
{
|
|
||||||
success: false,
|
|
||||||
error: "Internal server error.",
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
async mapUrl(url, params) {
|
async mapUrl(url, params) {
|
||||||
if (this.version == 'v0') {
|
|
||||||
throw new Error("Map is not supported in v0");
|
|
||||||
}
|
|
||||||
const headers = this.prepareHeaders();
|
const headers = this.prepareHeaders();
|
||||||
let jsonData = { url, ...params };
|
let jsonData = { url, ...params };
|
||||||
try {
|
try {
|
||||||
const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers);
|
const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
return response.data;
|
return response.data;
|
||||||
}
|
}
|
||||||
@ -289,21 +234,14 @@ class FirecrawlApp {
|
|||||||
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
||||||
* @returns The final job status or data.
|
* @returns The final job status or data.
|
||||||
*/
|
*/
|
||||||
async monitorJobStatus(id, headers, checkInterval, checkUrl) {
|
async monitorJobStatus(id, headers, checkInterval) {
|
||||||
let apiUrl = '';
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (this.version === 'v1') {
|
const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
|
||||||
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
|
||||||
}
|
|
||||||
else if (this.version === 'v0') {
|
|
||||||
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
|
||||||
}
|
|
||||||
const statusResponse = await this.getRequest(apiUrl, headers);
|
|
||||||
if (statusResponse.status === 200) {
|
if (statusResponse.status === 200) {
|
||||||
const statusData = statusResponse.data;
|
const statusData = statusResponse.data;
|
||||||
if (statusData.status === "completed") {
|
if (statusData.status === "completed") {
|
||||||
if ("data" in statusData) {
|
if ("data" in statusData) {
|
||||||
return this.version === 'v0' ? statusData.data : statusData;
|
return statusData;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new Error("Crawl job completed but no data was returned");
|
throw new Error("Crawl job completed but no data was returned");
|
||||||
@ -338,3 +276,72 @@ class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
exports.default = FirecrawlApp;
|
exports.default = FirecrawlApp;
|
||||||
|
class CrawlWatcher extends typescript_event_target_1.TypedEventTarget {
|
||||||
|
constructor(id, app) {
|
||||||
|
super();
|
||||||
|
this.ws = new isows_1.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
||||||
|
this.status = "scraping";
|
||||||
|
this.data = [];
|
||||||
|
const messageHandler = (msg) => {
|
||||||
|
if (msg.type === "done") {
|
||||||
|
this.status = "completed";
|
||||||
|
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
||||||
|
detail: {
|
||||||
|
status: this.status,
|
||||||
|
data: this.data,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
else if (msg.type === "error") {
|
||||||
|
this.status = "failed";
|
||||||
|
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||||
|
detail: {
|
||||||
|
status: this.status,
|
||||||
|
data: this.data,
|
||||||
|
error: msg.error,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
else if (msg.type === "catchup") {
|
||||||
|
this.status = msg.data.status;
|
||||||
|
this.data.push(...(msg.data.data ?? []));
|
||||||
|
for (const doc of this.data) {
|
||||||
|
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||||
|
detail: doc,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (msg.type === "document") {
|
||||||
|
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||||
|
detail: msg.data,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
this.ws.onmessage = ((ev) => {
|
||||||
|
if (typeof ev.data !== "string") {
|
||||||
|
this.ws.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const msg = JSON.parse(ev.data);
|
||||||
|
messageHandler(msg);
|
||||||
|
}).bind(this);
|
||||||
|
this.ws.onclose = ((ev) => {
|
||||||
|
const msg = JSON.parse(ev.reason);
|
||||||
|
messageHandler(msg);
|
||||||
|
}).bind(this);
|
||||||
|
this.ws.onerror = ((_) => {
|
||||||
|
this.status = "failed";
|
||||||
|
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||||
|
detail: {
|
||||||
|
status: this.status,
|
||||||
|
data: this.data,
|
||||||
|
error: "WebSocket error",
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
}).bind(this);
|
||||||
|
}
|
||||||
|
close() {
|
||||||
|
this.ws.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exports.CrawlWatcher = CrawlWatcher;
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||||
|
import { WebSocket } from "isows";
|
||||||
|
import { TypedEventTarget } from "typescript-event-target";
|
||||||
/**
|
/**
|
||||||
* Main class for interacting with the Firecrawl API.
|
* Main class for interacting with the Firecrawl API.
|
||||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||||
@ -10,13 +12,9 @@ export default class FirecrawlApp {
|
|||||||
* Initializes a new instance of the FirecrawlApp class.
|
* Initializes a new instance of the FirecrawlApp class.
|
||||||
* @param config - Configuration options for the FirecrawlApp instance.
|
* @param config - Configuration options for the FirecrawlApp instance.
|
||||||
*/
|
*/
|
||||||
constructor({ apiKey = null, apiUrl = null, version = "v1" }) {
|
constructor({ apiKey = null, apiUrl = null }) {
|
||||||
this.apiKey = apiKey || "";
|
this.apiKey = apiKey || "";
|
||||||
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
||||||
this.version = version;
|
|
||||||
if (!this.apiKey) {
|
|
||||||
throw new Error("No API key provided");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Scrapes a URL using the Firecrawl API.
|
* Scrapes a URL using the Firecrawl API.
|
||||||
@ -46,16 +44,16 @@ export default class FirecrawlApp {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
const response = await axios.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const responseData = response.data;
|
const responseData = response.data;
|
||||||
if (responseData.success) {
|
if (responseData.success) {
|
||||||
return (this.version === 'v0' ? responseData : {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
warning: responseData.warning,
|
warning: responseData.warning,
|
||||||
error: responseData.error,
|
error: responseData.error,
|
||||||
...responseData.data
|
...responseData.data
|
||||||
});
|
};
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||||
@ -71,80 +69,52 @@ export default class FirecrawlApp {
|
|||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." };
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Searches for a query using the Firecrawl API.
|
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
||||||
* @param query - The query to search for.
|
* @param query - The search query string.
|
||||||
* @param params - Additional parameters for the search request.
|
* @param params - Additional parameters for the search.
|
||||||
* @returns The response from the search operation.
|
* @returns Throws an error advising to use version 0 of the API.
|
||||||
*/
|
*/
|
||||||
async search(query, params) {
|
async search(query, params) {
|
||||||
if (this.version === "v1") {
|
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
||||||
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
||||||
}
|
|
||||||
const headers = {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
|
||||||
};
|
|
||||||
let jsonData = { query };
|
|
||||||
if (params) {
|
|
||||||
jsonData = { ...jsonData, ...params };
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
const response = await axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
|
||||||
if (response.status === 200) {
|
|
||||||
const responseData = response.data;
|
|
||||||
if (responseData.success) {
|
|
||||||
return responseData;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
this.handleError(response, "search");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (error) {
|
|
||||||
throw new Error(error.message);
|
|
||||||
}
|
|
||||||
return { success: false, error: "Internal server error." };
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||||
* @param url - The URL to crawl.
|
* @param url - The URL to crawl.
|
||||||
* @param params - Additional parameters for the crawl request.
|
* @param params - Additional parameters for the crawl request.
|
||||||
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
||||||
* @param pollInterval - Time in seconds for job status checks.
|
* @param pollInterval - Time in seconds for job status checks.
|
||||||
* @param idempotencyKey - Optional idempotency key for the request.
|
* @param idempotencyKey - Optional idempotency key for the request.
|
||||||
* @returns The response from the crawl operation.
|
* @returns The response from the crawl operation.
|
||||||
*/
|
*/
|
||||||
async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
||||||
const headers = this.prepareHeaders(idempotencyKey);
|
const headers = this.prepareHeaders(idempotencyKey);
|
||||||
let jsonData = { url, ...params };
|
let jsonData = { url, ...params };
|
||||||
try {
|
try {
|
||||||
const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers);
|
const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const id = this.version === 'v0' ? response.data.jobId : response.data.id;
|
const id = response.data.id;
|
||||||
let checkUrl = undefined;
|
return this.monitorJobStatus(id, headers, pollInterval);
|
||||||
if (waitUntilDone) {
|
}
|
||||||
if (this.version === 'v1') {
|
else {
|
||||||
checkUrl = response.data.url;
|
this.handleError(response, "start crawl job");
|
||||||
}
|
}
|
||||||
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
}
|
||||||
}
|
catch (error) {
|
||||||
else {
|
if (error.response?.data?.error) {
|
||||||
if (this.version === 'v0') {
|
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
||||||
return {
|
}
|
||||||
success: true,
|
else {
|
||||||
jobId: id
|
throw new Error(error.message);
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
else {
|
return { success: false, error: "Internal server error." };
|
||||||
return {
|
}
|
||||||
success: true,
|
async asyncCrawlUrl(url, params, idempotencyKey) {
|
||||||
id: id
|
const headers = this.prepareHeaders(idempotencyKey);
|
||||||
};
|
let jsonData = { url, ...params };
|
||||||
}
|
try {
|
||||||
}
|
const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
|
||||||
|
if (response.status === 200) {
|
||||||
|
return response.data;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this.handleError(response, "start crawl job");
|
this.handleError(response, "start crawl job");
|
||||||
@ -171,37 +141,19 @@ export default class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
const headers = this.prepareHeaders();
|
const headers = this.prepareHeaders();
|
||||||
try {
|
try {
|
||||||
const response = await this.getRequest(this.version === 'v1' ?
|
const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
|
||||||
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
|
||||||
`${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
|
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
if (this.version === 'v0') {
|
return ({
|
||||||
return {
|
success: true,
|
||||||
success: true,
|
status: response.data.status,
|
||||||
status: response.data.status,
|
total: response.data.total,
|
||||||
current: response.data.current,
|
completed: response.data.completed,
|
||||||
current_url: response.data.current_url,
|
creditsUsed: response.data.creditsUsed,
|
||||||
current_step: response.data.current_step,
|
expiresAt: new Date(response.data.expiresAt),
|
||||||
total: response.data.total,
|
next: response.data.next,
|
||||||
data: response.data.data,
|
data: response.data.data,
|
||||||
partial_data: !response.data.data
|
error: response.data.error
|
||||||
? response.data.partial_data
|
});
|
||||||
: undefined,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return {
|
|
||||||
success: true,
|
|
||||||
status: response.data.status,
|
|
||||||
total: response.data.total,
|
|
||||||
completed: response.data.completed,
|
|
||||||
creditsUsed: response.data.creditsUsed,
|
|
||||||
expiresAt: new Date(response.data.expiresAt),
|
|
||||||
next: response.data.next,
|
|
||||||
data: response.data.data,
|
|
||||||
error: response.data.error
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this.handleError(response, "check crawl status");
|
this.handleError(response, "check crawl status");
|
||||||
@ -210,29 +162,21 @@ export default class FirecrawlApp {
|
|||||||
catch (error) {
|
catch (error) {
|
||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
return this.version === 'v0' ?
|
return { success: false, error: "Internal server error." };
|
||||||
{
|
}
|
||||||
success: false,
|
async crawlUrlAndWatch(url, params, idempotencyKey) {
|
||||||
status: "unknown",
|
const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
|
||||||
current: 0,
|
if (crawl.success && crawl.id) {
|
||||||
current_url: "",
|
const id = crawl.id;
|
||||||
current_step: "",
|
return new CrawlWatcher(id, this);
|
||||||
total: 0,
|
}
|
||||||
error: "Internal server error.",
|
throw new Error("Crawl job failed to start");
|
||||||
} :
|
|
||||||
{
|
|
||||||
success: false,
|
|
||||||
error: "Internal server error.",
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
async mapUrl(url, params) {
|
async mapUrl(url, params) {
|
||||||
if (this.version == 'v0') {
|
|
||||||
throw new Error("Map is not supported in v0");
|
|
||||||
}
|
|
||||||
const headers = this.prepareHeaders();
|
const headers = this.prepareHeaders();
|
||||||
let jsonData = { url, ...params };
|
let jsonData = { url, ...params };
|
||||||
try {
|
try {
|
||||||
const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers);
|
const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
return response.data;
|
return response.data;
|
||||||
}
|
}
|
||||||
@ -284,21 +228,14 @@ export default class FirecrawlApp {
|
|||||||
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
||||||
* @returns The final job status or data.
|
* @returns The final job status or data.
|
||||||
*/
|
*/
|
||||||
async monitorJobStatus(id, headers, checkInterval, checkUrl) {
|
async monitorJobStatus(id, headers, checkInterval) {
|
||||||
let apiUrl = '';
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (this.version === 'v1') {
|
const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
|
||||||
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
|
||||||
}
|
|
||||||
else if (this.version === 'v0') {
|
|
||||||
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
|
||||||
}
|
|
||||||
const statusResponse = await this.getRequest(apiUrl, headers);
|
|
||||||
if (statusResponse.status === 200) {
|
if (statusResponse.status === 200) {
|
||||||
const statusData = statusResponse.data;
|
const statusData = statusResponse.data;
|
||||||
if (statusData.status === "completed") {
|
if (statusData.status === "completed") {
|
||||||
if ("data" in statusData) {
|
if ("data" in statusData) {
|
||||||
return this.version === 'v0' ? statusData.data : statusData;
|
return statusData;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new Error("Crawl job completed but no data was returned");
|
throw new Error("Crawl job completed but no data was returned");
|
||||||
@ -332,3 +269,71 @@ export default class FirecrawlApp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
export class CrawlWatcher extends TypedEventTarget {
|
||||||
|
constructor(id, app) {
|
||||||
|
super();
|
||||||
|
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
||||||
|
this.status = "scraping";
|
||||||
|
this.data = [];
|
||||||
|
const messageHandler = (msg) => {
|
||||||
|
if (msg.type === "done") {
|
||||||
|
this.status = "completed";
|
||||||
|
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
||||||
|
detail: {
|
||||||
|
status: this.status,
|
||||||
|
data: this.data,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
else if (msg.type === "error") {
|
||||||
|
this.status = "failed";
|
||||||
|
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||||
|
detail: {
|
||||||
|
status: this.status,
|
||||||
|
data: this.data,
|
||||||
|
error: msg.error,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
else if (msg.type === "catchup") {
|
||||||
|
this.status = msg.data.status;
|
||||||
|
this.data.push(...(msg.data.data ?? []));
|
||||||
|
for (const doc of this.data) {
|
||||||
|
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||||
|
detail: doc,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (msg.type === "document") {
|
||||||
|
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||||
|
detail: msg.data,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
this.ws.onmessage = ((ev) => {
|
||||||
|
if (typeof ev.data !== "string") {
|
||||||
|
this.ws.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const msg = JSON.parse(ev.data);
|
||||||
|
messageHandler(msg);
|
||||||
|
}).bind(this);
|
||||||
|
this.ws.onclose = ((ev) => {
|
||||||
|
const msg = JSON.parse(ev.reason);
|
||||||
|
messageHandler(msg);
|
||||||
|
}).bind(this);
|
||||||
|
this.ws.onerror = ((_) => {
|
||||||
|
this.status = "failed";
|
||||||
|
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||||
|
detail: {
|
||||||
|
status: this.status,
|
||||||
|
data: this.data,
|
||||||
|
error: "WebSocket error",
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
}).bind(this);
|
||||||
|
}
|
||||||
|
close() {
|
||||||
|
this.ws.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
4
apps/js-sdk/firecrawl/package-lock.json
generated
4
apps/js-sdk/firecrawl/package-lock.json
generated
@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "1.0.3",
|
"version": "1.1.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "1.0.3",
|
"version": "1.1.0",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.6.8",
|
"axios": "^1.6.8",
|
||||||
|
211
apps/js-sdk/firecrawl/types/index.d.ts
vendored
211
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -1,15 +1,13 @@
|
|||||||
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||||
import { z } from "zod";
|
import { TypedEventTarget } from "typescript-event-target";
|
||||||
/**
|
/**
|
||||||
* Configuration interface for FirecrawlApp.
|
* Configuration interface for FirecrawlApp.
|
||||||
* @param apiKey - Optional API key for authentication.
|
* @param apiKey - Optional API key for authentication.
|
||||||
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
||||||
* @param version - API version, either 'v0' or 'v1'.
|
|
||||||
*/
|
*/
|
||||||
export interface FirecrawlAppConfig {
|
export interface FirecrawlAppConfig {
|
||||||
apiKey?: string | null;
|
apiKey?: string | null;
|
||||||
apiUrl?: string | null;
|
apiUrl?: string | null;
|
||||||
version?: "v0" | "v1";
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Metadata for a Firecrawl document.
|
* Metadata for a Firecrawl document.
|
||||||
@ -50,15 +48,6 @@ export interface FirecrawlDocumentMetadata {
|
|||||||
error?: string;
|
error?: string;
|
||||||
[key: string]: any;
|
[key: string]: any;
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Metadata for a Firecrawl document on v0.
|
|
||||||
* Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
|
|
||||||
*/
|
|
||||||
export interface FirecrawlDocumentMetadataV0 {
|
|
||||||
pageStatusCode?: number;
|
|
||||||
pageError?: string;
|
|
||||||
[key: string]: any;
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Document interface for Firecrawl.
|
* Document interface for Firecrawl.
|
||||||
* Represents a document retrieved or processed by Firecrawl.
|
* Represents a document retrieved or processed by Firecrawl.
|
||||||
@ -70,84 +59,30 @@ export interface FirecrawlDocument {
|
|||||||
rawHtml?: string;
|
rawHtml?: string;
|
||||||
links?: string[];
|
links?: string[];
|
||||||
screenshot?: string;
|
screenshot?: string;
|
||||||
metadata: FirecrawlDocumentMetadata;
|
metadata?: FirecrawlDocumentMetadata;
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Document interface for Firecrawl on v0.
|
|
||||||
* Represents a document specifically for API version v0 with additional properties.
|
|
||||||
*/
|
|
||||||
export interface FirecrawlDocumentV0 {
|
|
||||||
id?: string;
|
|
||||||
url?: string;
|
|
||||||
content: string;
|
|
||||||
markdown?: string;
|
|
||||||
html?: string;
|
|
||||||
llm_extraction?: Record<string, any>;
|
|
||||||
createdAt?: Date;
|
|
||||||
updatedAt?: Date;
|
|
||||||
type?: string;
|
|
||||||
metadata: FirecrawlDocumentMetadataV0;
|
|
||||||
childrenLinks?: string[];
|
|
||||||
provider?: string;
|
|
||||||
warning?: string;
|
|
||||||
index?: number;
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Parameters for scraping operations.
|
* Parameters for scraping operations.
|
||||||
* Defines the options and configurations available for scraping web content.
|
* Defines the options and configurations available for scraping web content.
|
||||||
*/
|
*/
|
||||||
export interface ScrapeParams {
|
export interface ScrapeParams {
|
||||||
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
|
||||||
headers?: Record<string, string>;
|
headers?: Record<string, string>;
|
||||||
includeTags?: string[];
|
includeTags?: string[];
|
||||||
excludeTags?: string[];
|
excludeTags?: string[];
|
||||||
onlyMainContent?: boolean;
|
onlyMainContent?: boolean;
|
||||||
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
|
||||||
waitFor?: number;
|
waitFor?: number;
|
||||||
timeout?: number;
|
timeout?: number;
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Parameters for scraping operations on v0.
|
|
||||||
* Includes page and extractor options specific to API version v0.
|
|
||||||
*/
|
|
||||||
export interface ScrapeParamsV0 {
|
|
||||||
pageOptions?: {
|
|
||||||
headers?: Record<string, string>;
|
|
||||||
includeHtml?: boolean;
|
|
||||||
includeRawHtml?: boolean;
|
|
||||||
onlyIncludeTags?: string[];
|
|
||||||
onlyMainContent?: boolean;
|
|
||||||
removeTags?: string[];
|
|
||||||
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
||||||
screenshot?: boolean;
|
|
||||||
fullPageScreenshot?: boolean;
|
|
||||||
waitFor?: number;
|
|
||||||
};
|
|
||||||
extractorOptions?: {
|
|
||||||
mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
|
|
||||||
extractionPrompt?: string;
|
|
||||||
extractionSchema?: Record<string, any> | z.ZodSchema | any;
|
|
||||||
};
|
|
||||||
timeout?: number;
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Response interface for scraping operations.
|
* Response interface for scraping operations.
|
||||||
* Defines the structure of the response received after a scraping operation.
|
* Defines the structure of the response received after a scraping operation.
|
||||||
*/
|
*/
|
||||||
export interface ScrapeResponse extends FirecrawlDocument {
|
export interface ScrapeResponse extends FirecrawlDocument {
|
||||||
success: boolean;
|
success: true;
|
||||||
warning?: string;
|
warning?: string;
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Response interface for scraping operations on v0.
|
|
||||||
* Similar to ScrapeResponse but tailored for responses from API version v0.
|
|
||||||
*/
|
|
||||||
export interface ScrapeResponseV0 {
|
|
||||||
success: boolean;
|
|
||||||
data?: FirecrawlDocumentV0;
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Parameters for crawling operations.
|
* Parameters for crawling operations.
|
||||||
* Includes options for both scraping and mapping during a crawl.
|
* Includes options for both scraping and mapping during a crawl.
|
||||||
@ -162,36 +97,6 @@ export interface CrawlParams {
|
|||||||
ignoreSitemap?: boolean;
|
ignoreSitemap?: boolean;
|
||||||
scrapeOptions?: ScrapeParams;
|
scrapeOptions?: ScrapeParams;
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Parameters for crawling operations on v0.
|
|
||||||
* Tailored for API version v0, includes specific options for crawling.
|
|
||||||
*/
|
|
||||||
export interface CrawlParamsV0 {
|
|
||||||
crawlerOptions?: {
|
|
||||||
includes?: string[];
|
|
||||||
excludes?: string[];
|
|
||||||
generateImgAltText?: boolean;
|
|
||||||
returnOnlyUrls?: boolean;
|
|
||||||
maxDepth?: number;
|
|
||||||
mode?: "default" | "fast";
|
|
||||||
ignoreSitemap?: boolean;
|
|
||||||
limit?: number;
|
|
||||||
allowBackwardCrawling?: boolean;
|
|
||||||
allowExternalContentLinks?: boolean;
|
|
||||||
};
|
|
||||||
pageOptions?: {
|
|
||||||
headers?: Record<string, string>;
|
|
||||||
includeHtml?: boolean;
|
|
||||||
includeRawHtml?: boolean;
|
|
||||||
onlyIncludeTags?: string[];
|
|
||||||
onlyMainContent?: boolean;
|
|
||||||
removeTags?: string[];
|
|
||||||
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
||||||
screenshot?: boolean;
|
|
||||||
fullPageScreenshot?: boolean;
|
|
||||||
waitFor?: number;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Response interface for crawling operations.
|
* Response interface for crawling operations.
|
||||||
* Defines the structure of the response received after initiating a crawl.
|
* Defines the structure of the response received after initiating a crawl.
|
||||||
@ -199,16 +104,7 @@ export interface CrawlParamsV0 {
|
|||||||
export interface CrawlResponse {
|
export interface CrawlResponse {
|
||||||
id?: string;
|
id?: string;
|
||||||
url?: string;
|
url?: string;
|
||||||
success: boolean;
|
success: true;
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Response interface for crawling operations on v0.
|
|
||||||
* Similar to CrawlResponse but tailored for responses from API version v0.
|
|
||||||
*/
|
|
||||||
export interface CrawlResponseV0 {
|
|
||||||
jobId?: string;
|
|
||||||
success: boolean;
|
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@ -216,7 +112,7 @@ export interface CrawlResponseV0 {
|
|||||||
* Provides detailed status of a crawl job including progress and results.
|
* Provides detailed status of a crawl job including progress and results.
|
||||||
*/
|
*/
|
||||||
export interface CrawlStatusResponse {
|
export interface CrawlStatusResponse {
|
||||||
success: boolean;
|
success: true;
|
||||||
total: number;
|
total: number;
|
||||||
completed: number;
|
completed: number;
|
||||||
creditsUsed: number;
|
creditsUsed: number;
|
||||||
@ -226,21 +122,6 @@ export interface CrawlStatusResponse {
|
|||||||
data?: FirecrawlDocument[];
|
data?: FirecrawlDocument[];
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Response interface for job status checks on v0.
|
|
||||||
* Tailored for API version v0, provides status and partial data of a crawl job.
|
|
||||||
*/
|
|
||||||
export interface CrawlStatusResponseV0 {
|
|
||||||
success: boolean;
|
|
||||||
status: string;
|
|
||||||
current?: number;
|
|
||||||
current_url?: string;
|
|
||||||
current_step?: string;
|
|
||||||
total?: number;
|
|
||||||
data?: FirecrawlDocumentV0[];
|
|
||||||
partial_data?: FirecrawlDocumentV0[];
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Parameters for mapping operations.
|
* Parameters for mapping operations.
|
||||||
* Defines options for mapping URLs during a crawl.
|
* Defines options for mapping URLs during a crawl.
|
||||||
@ -256,78 +137,62 @@ export interface MapParams {
|
|||||||
* Defines the structure of the response received after a mapping operation.
|
* Defines the structure of the response received after a mapping operation.
|
||||||
*/
|
*/
|
||||||
export interface MapResponse {
|
export interface MapResponse {
|
||||||
success: boolean;
|
success: true;
|
||||||
links?: string[];
|
links?: string[];
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Parameters for searching operations on v0.
|
* Error response interface.
|
||||||
* Tailored for API version v0, includes specific options for searching content.
|
* Defines the structure of the response received when an error occurs.
|
||||||
*/
|
*/
|
||||||
export interface SearchParamsV0 {
|
export interface ErrorResponse {
|
||||||
pageOptions?: {
|
success: false;
|
||||||
onlyMainContent?: boolean;
|
error: string;
|
||||||
fetchPageContent?: boolean;
|
|
||||||
includeHtml?: boolean;
|
|
||||||
includeRawHtml?: boolean;
|
|
||||||
};
|
|
||||||
searchOptions?: {
|
|
||||||
limit?: number;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Response interface for searching operations on v0.
|
|
||||||
* Defines the structure of the response received after a search operation on v0.
|
|
||||||
*/
|
|
||||||
export interface SearchResponseV0 {
|
|
||||||
success: boolean;
|
|
||||||
data?: FirecrawlDocumentV0[];
|
|
||||||
error?: string;
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Main class for interacting with the Firecrawl API.
|
* Main class for interacting with the Firecrawl API.
|
||||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||||
*/
|
*/
|
||||||
export default class FirecrawlApp<T extends "v0" | "v1"> {
|
export default class FirecrawlApp {
|
||||||
private apiKey;
|
apiKey: string;
|
||||||
private apiUrl;
|
apiUrl: string;
|
||||||
version: T;
|
|
||||||
/**
|
/**
|
||||||
* Initializes a new instance of the FirecrawlApp class.
|
* Initializes a new instance of the FirecrawlApp class.
|
||||||
* @param config - Configuration options for the FirecrawlApp instance.
|
* @param config - Configuration options for the FirecrawlApp instance.
|
||||||
*/
|
*/
|
||||||
constructor({ apiKey, apiUrl, version }: FirecrawlAppConfig);
|
constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
|
||||||
/**
|
/**
|
||||||
* Scrapes a URL using the Firecrawl API.
|
* Scrapes a URL using the Firecrawl API.
|
||||||
* @param url - The URL to scrape.
|
* @param url - The URL to scrape.
|
||||||
* @param params - Additional parameters for the scrape request.
|
* @param params - Additional parameters for the scrape request.
|
||||||
* @returns The response from the scrape operation.
|
* @returns The response from the scrape operation.
|
||||||
*/
|
*/
|
||||||
scrapeUrl(url: string, params?: ScrapeParams | ScrapeParamsV0): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse>;
|
scrapeUrl(url: string, params?: ScrapeParams): Promise<ScrapeResponse | ErrorResponse>;
|
||||||
/**
|
/**
|
||||||
* Searches for a query using the Firecrawl API.
|
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
||||||
* @param query - The query to search for.
|
* @param query - The search query string.
|
||||||
* @param params - Additional parameters for the search request.
|
* @param params - Additional parameters for the search.
|
||||||
* @returns The response from the search operation.
|
* @returns Throws an error advising to use version 0 of the API.
|
||||||
*/
|
*/
|
||||||
search(query: string, params?: SearchParamsV0): Promise<SearchResponseV0>;
|
search(query: string, params?: any): Promise<any>;
|
||||||
/**
|
/**
|
||||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||||
* @param url - The URL to crawl.
|
* @param url - The URL to crawl.
|
||||||
* @param params - Additional parameters for the crawl request.
|
* @param params - Additional parameters for the crawl request.
|
||||||
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
||||||
* @param pollInterval - Time in seconds for job status checks.
|
* @param pollInterval - Time in seconds for job status checks.
|
||||||
* @param idempotencyKey - Optional idempotency key for the request.
|
* @param idempotencyKey - Optional idempotency key for the request.
|
||||||
* @returns The response from the crawl operation.
|
* @returns The response from the crawl operation.
|
||||||
*/
|
*/
|
||||||
crawlUrl(url: string, params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<this['version'] extends 'v0' ? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlResponse | CrawlStatusResponse>;
|
crawlUrl(url: string, params?: CrawlParams, pollInterval?: number, idempotencyKey?: string): Promise<CrawlStatusResponse | ErrorResponse>;
|
||||||
|
asyncCrawlUrl(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlResponse | ErrorResponse>;
|
||||||
/**
|
/**
|
||||||
* Checks the status of a crawl job using the Firecrawl API.
|
* Checks the status of a crawl job using the Firecrawl API.
|
||||||
* @param id - The ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
* @returns The response containing the job status.
|
* @returns The response containing the job status.
|
||||||
*/
|
*/
|
||||||
checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse>;
|
checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse>;
|
||||||
mapUrl(url: string, params?: MapParams): Promise<MapResponse>;
|
crawlUrlAndWatch(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlWatcher>;
|
||||||
|
mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse>;
|
||||||
/**
|
/**
|
||||||
* Prepares the headers for an API request.
|
* Prepares the headers for an API request.
|
||||||
* @param idempotencyKey - Optional key to ensure idempotency.
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
||||||
@ -357,7 +222,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|||||||
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
||||||
* @returns The final job status or data.
|
* @returns The final job status or data.
|
||||||
*/
|
*/
|
||||||
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number, checkUrl?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse>;
|
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse>;
|
||||||
/**
|
/**
|
||||||
* Handles errors from API responses.
|
* Handles errors from API responses.
|
||||||
* @param {AxiosResponse} response - The response from the API.
|
* @param {AxiosResponse} response - The response from the API.
|
||||||
@ -365,3 +230,23 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|||||||
*/
|
*/
|
||||||
handleError(response: AxiosResponse, action: string): void;
|
handleError(response: AxiosResponse, action: string): void;
|
||||||
}
|
}
|
||||||
|
interface CrawlWatcherEvents {
|
||||||
|
document: CustomEvent<FirecrawlDocument>;
|
||||||
|
done: CustomEvent<{
|
||||||
|
status: CrawlStatusResponse["status"];
|
||||||
|
data: FirecrawlDocument[];
|
||||||
|
}>;
|
||||||
|
error: CustomEvent<{
|
||||||
|
status: CrawlStatusResponse["status"];
|
||||||
|
data: FirecrawlDocument[];
|
||||||
|
error: string;
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
export declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
||||||
|
private ws;
|
||||||
|
data: FirecrawlDocument[];
|
||||||
|
status: CrawlStatusResponse["status"];
|
||||||
|
constructor(id: string, app: FirecrawlApp);
|
||||||
|
close(): void;
|
||||||
|
}
|
||||||
|
export {};
|
||||||
|
@ -13,7 +13,7 @@ import os
|
|||||||
|
|
||||||
from .firecrawl import FirecrawlApp
|
from .firecrawl import FirecrawlApp
|
||||||
|
|
||||||
__version__ = "1.0.1"
|
__version__ = "1.1.1"
|
||||||
|
|
||||||
# Define the logger for the Firecrawl project
|
# Define the logger for the Firecrawl project
|
||||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||||
|
@ -10,6 +10,10 @@ readme = {file="README.md", content-type = "text/markdown"}
|
|||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"requests",
|
"requests",
|
||||||
|
"python-dotenv",
|
||||||
|
"websockets",
|
||||||
|
"asyncio",
|
||||||
|
"nest-asyncio"
|
||||||
]
|
]
|
||||||
authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
|
authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
|
||||||
maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
|
maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
|
||||||
|
@ -30,6 +30,9 @@ setup(
|
|||||||
'requests',
|
'requests',
|
||||||
'pytest',
|
'pytest',
|
||||||
'python-dotenv',
|
'python-dotenv',
|
||||||
|
'websockets',
|
||||||
|
'asyncio',
|
||||||
|
'nest-asyncio'
|
||||||
],
|
],
|
||||||
python_requires=">=3.8",
|
python_requires=">=3.8",
|
||||||
classifiers=[
|
classifiers=[
|
||||||
|
Loading…
x
Reference in New Issue
Block a user