This commit is contained in:
Nicolas 2024-08-30 15:29:41 -03:00
parent 71dab56e36
commit 282962e36f
7 changed files with 320 additions and 416 deletions

View File

@ -3,9 +3,12 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod }; return (mod && mod.__esModule) ? mod : { "default": mod };
}; };
Object.defineProperty(exports, "__esModule", { value: true }); Object.defineProperty(exports, "__esModule", { value: true });
exports.CrawlWatcher = void 0;
const axios_1 = __importDefault(require("axios")); const axios_1 = __importDefault(require("axios"));
const zod_1 = require("zod"); const zod_1 = require("zod");
const zod_to_json_schema_1 = require("zod-to-json-schema"); const zod_to_json_schema_1 = require("zod-to-json-schema");
const isows_1 = require("isows");
const typescript_event_target_1 = require("typescript-event-target");
/** /**
* Main class for interacting with the Firecrawl API. * Main class for interacting with the Firecrawl API.
* Provides methods for scraping, searching, crawling, and mapping web content. * Provides methods for scraping, searching, crawling, and mapping web content.
@ -15,13 +18,9 @@ class FirecrawlApp {
* Initializes a new instance of the FirecrawlApp class. * Initializes a new instance of the FirecrawlApp class.
* @param config - Configuration options for the FirecrawlApp instance. * @param config - Configuration options for the FirecrawlApp instance.
*/ */
constructor({ apiKey = null, apiUrl = null, version = "v1" }) { constructor({ apiKey = null, apiUrl = null }) {
this.apiKey = apiKey || ""; this.apiKey = apiKey || "";
this.apiUrl = apiUrl || "https://api.firecrawl.dev"; this.apiUrl = apiUrl || "https://api.firecrawl.dev";
this.version = version;
if (!this.apiKey) {
throw new Error("No API key provided");
}
} }
/** /**
* Scrapes a URL using the Firecrawl API. * Scrapes a URL using the Firecrawl API.
@ -51,16 +50,16 @@ class FirecrawlApp {
}; };
} }
try { try {
const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers }); const response = await axios_1.default.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
if (response.status === 200) { if (response.status === 200) {
const responseData = response.data; const responseData = response.data;
if (responseData.success) { if (responseData.success) {
return (this.version === 'v0' ? responseData : { return {
success: true, success: true,
warning: responseData.warning, warning: responseData.warning,
error: responseData.error, error: responseData.error,
...responseData.data ...responseData.data
}); };
} }
else { else {
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
@ -76,80 +75,52 @@ class FirecrawlApp {
return { success: false, error: "Internal server error." }; return { success: false, error: "Internal server error." };
} }
/** /**
* Searches for a query using the Firecrawl API. * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
* @param query - The query to search for. * @param query - The search query string.
* @param params - Additional parameters for the search request. * @param params - Additional parameters for the search.
* @returns The response from the search operation. * @returns Throws an error advising to use version 0 of the API.
*/ */
async search(query, params) { async search(query, params) {
if (this.version === "v1") { throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
}
const headers = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
};
let jsonData = { query };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response = await axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers });
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return responseData;
}
else {
throw new Error(`Failed to search. Error: ${responseData.error}`);
}
}
else {
this.handleError(response, "search");
}
}
catch (error) {
throw new Error(error.message);
}
return { success: false, error: "Internal server error." };
} }
/** /**
* Initiates a crawl job for a URL using the Firecrawl API. * Initiates a crawl job for a URL using the Firecrawl API.
* @param url - The URL to crawl. * @param url - The URL to crawl.
* @param params - Additional parameters for the crawl request. * @param params - Additional parameters for the crawl request.
* @param waitUntilDone - Whether to wait for the crawl job to complete.
* @param pollInterval - Time in seconds for job status checks. * @param pollInterval - Time in seconds for job status checks.
* @param idempotencyKey - Optional idempotency key for the request. * @param idempotencyKey - Optional idempotency key for the request.
* @returns The response from the crawl operation. * @returns The response from the crawl operation.
*/ */
async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) { async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
const headers = this.prepareHeaders(idempotencyKey); const headers = this.prepareHeaders(idempotencyKey);
let jsonData = { url, ...params }; let jsonData = { url, ...params };
try { try {
const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers); const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
if (response.status === 200) { if (response.status === 200) {
const id = this.version === 'v0' ? response.data.jobId : response.data.id; const id = response.data.id;
let checkUrl = undefined; return this.monitorJobStatus(id, headers, pollInterval);
if (waitUntilDone) { }
if (this.version === 'v1') { else {
checkUrl = response.data.url; this.handleError(response, "start crawl job");
} }
return this.monitorJobStatus(id, headers, pollInterval, checkUrl); }
} catch (error) {
else { if (error.response?.data?.error) {
if (this.version === 'v0') { throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
return { }
success: true, else {
jobId: id throw new Error(error.message);
}; }
} }
else { return { success: false, error: "Internal server error." };
return { }
success: true, async asyncCrawlUrl(url, params, idempotencyKey) {
id: id const headers = this.prepareHeaders(idempotencyKey);
}; let jsonData = { url, ...params };
} try {
} const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
if (response.status === 200) {
return response.data;
} }
else { else {
this.handleError(response, "start crawl job"); this.handleError(response, "start crawl job");
@ -176,37 +147,19 @@ class FirecrawlApp {
} }
const headers = this.prepareHeaders(); const headers = this.prepareHeaders();
try { try {
const response = await this.getRequest(this.version === 'v1' ? const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
`${this.apiUrl}/${this.version}/crawl/${id}` :
`${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
if (response.status === 200) { if (response.status === 200) {
if (this.version === 'v0') { return ({
return { success: true,
success: true, status: response.data.status,
status: response.data.status, total: response.data.total,
current: response.data.current, completed: response.data.completed,
current_url: response.data.current_url, creditsUsed: response.data.creditsUsed,
current_step: response.data.current_step, expiresAt: new Date(response.data.expiresAt),
total: response.data.total, next: response.data.next,
data: response.data.data, data: response.data.data,
partial_data: !response.data.data error: response.data.error
? response.data.partial_data });
: undefined,
};
}
else {
return {
success: true,
status: response.data.status,
total: response.data.total,
completed: response.data.completed,
creditsUsed: response.data.creditsUsed,
expiresAt: new Date(response.data.expiresAt),
next: response.data.next,
data: response.data.data,
error: response.data.error
};
}
} }
else { else {
this.handleError(response, "check crawl status"); this.handleError(response, "check crawl status");
@ -215,29 +168,21 @@ class FirecrawlApp {
catch (error) { catch (error) {
throw new Error(error.message); throw new Error(error.message);
} }
return this.version === 'v0' ? return { success: false, error: "Internal server error." };
{ }
success: false, async crawlUrlAndWatch(url, params, idempotencyKey) {
status: "unknown", const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
current: 0, if (crawl.success && crawl.id) {
current_url: "", const id = crawl.id;
current_step: "", return new CrawlWatcher(id, this);
total: 0, }
error: "Internal server error.", throw new Error("Crawl job failed to start");
} :
{
success: false,
error: "Internal server error.",
};
} }
async mapUrl(url, params) { async mapUrl(url, params) {
if (this.version == 'v0') {
throw new Error("Map is not supported in v0");
}
const headers = this.prepareHeaders(); const headers = this.prepareHeaders();
let jsonData = { url, ...params }; let jsonData = { url, ...params };
try { try {
const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers); const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
if (response.status === 200) { if (response.status === 200) {
return response.data; return response.data;
} }
@ -289,21 +234,14 @@ class FirecrawlApp {
* @param checkUrl - Optional URL to check the status (used for v1 API) * @param checkUrl - Optional URL to check the status (used for v1 API)
* @returns The final job status or data. * @returns The final job status or data.
*/ */
async monitorJobStatus(id, headers, checkInterval, checkUrl) { async monitorJobStatus(id, headers, checkInterval) {
let apiUrl = '';
while (true) { while (true) {
if (this.version === 'v1') { const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
}
else if (this.version === 'v0') {
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
}
const statusResponse = await this.getRequest(apiUrl, headers);
if (statusResponse.status === 200) { if (statusResponse.status === 200) {
const statusData = statusResponse.data; const statusData = statusResponse.data;
if (statusData.status === "completed") { if (statusData.status === "completed") {
if ("data" in statusData) { if ("data" in statusData) {
return this.version === 'v0' ? statusData.data : statusData; return statusData;
} }
else { else {
throw new Error("Crawl job completed but no data was returned"); throw new Error("Crawl job completed but no data was returned");
@ -338,3 +276,72 @@ class FirecrawlApp {
} }
} }
exports.default = FirecrawlApp; exports.default = FirecrawlApp;
class CrawlWatcher extends typescript_event_target_1.TypedEventTarget {
constructor(id, app) {
super();
this.ws = new isows_1.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
this.status = "scraping";
this.data = [];
const messageHandler = (msg) => {
if (msg.type === "done") {
this.status = "completed";
this.dispatchTypedEvent("done", new CustomEvent("done", {
detail: {
status: this.status,
data: this.data,
},
}));
}
else if (msg.type === "error") {
this.status = "failed";
this.dispatchTypedEvent("error", new CustomEvent("error", {
detail: {
status: this.status,
data: this.data,
error: msg.error,
},
}));
}
else if (msg.type === "catchup") {
this.status = msg.data.status;
this.data.push(...(msg.data.data ?? []));
for (const doc of this.data) {
this.dispatchTypedEvent("document", new CustomEvent("document", {
detail: doc,
}));
}
}
else if (msg.type === "document") {
this.dispatchTypedEvent("document", new CustomEvent("document", {
detail: msg.data,
}));
}
};
this.ws.onmessage = ((ev) => {
if (typeof ev.data !== "string") {
this.ws.close();
return;
}
const msg = JSON.parse(ev.data);
messageHandler(msg);
}).bind(this);
this.ws.onclose = ((ev) => {
const msg = JSON.parse(ev.reason);
messageHandler(msg);
}).bind(this);
this.ws.onerror = ((_) => {
this.status = "failed";
this.dispatchTypedEvent("error", new CustomEvent("error", {
detail: {
status: this.status,
data: this.data,
error: "WebSocket error",
},
}));
}).bind(this);
}
close() {
this.ws.close();
}
}
exports.CrawlWatcher = CrawlWatcher;

View File

@ -1,6 +1,8 @@
import axios from "axios"; import axios from "axios";
import { z } from "zod"; import { z } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema"; import { zodToJsonSchema } from "zod-to-json-schema";
import { WebSocket } from "isows";
import { TypedEventTarget } from "typescript-event-target";
/** /**
* Main class for interacting with the Firecrawl API. * Main class for interacting with the Firecrawl API.
* Provides methods for scraping, searching, crawling, and mapping web content. * Provides methods for scraping, searching, crawling, and mapping web content.
@ -10,13 +12,9 @@ export default class FirecrawlApp {
* Initializes a new instance of the FirecrawlApp class. * Initializes a new instance of the FirecrawlApp class.
* @param config - Configuration options for the FirecrawlApp instance. * @param config - Configuration options for the FirecrawlApp instance.
*/ */
constructor({ apiKey = null, apiUrl = null, version = "v1" }) { constructor({ apiKey = null, apiUrl = null }) {
this.apiKey = apiKey || ""; this.apiKey = apiKey || "";
this.apiUrl = apiUrl || "https://api.firecrawl.dev"; this.apiUrl = apiUrl || "https://api.firecrawl.dev";
this.version = version;
if (!this.apiKey) {
throw new Error("No API key provided");
}
} }
/** /**
* Scrapes a URL using the Firecrawl API. * Scrapes a URL using the Firecrawl API.
@ -46,16 +44,16 @@ export default class FirecrawlApp {
}; };
} }
try { try {
const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers }); const response = await axios.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
if (response.status === 200) { if (response.status === 200) {
const responseData = response.data; const responseData = response.data;
if (responseData.success) { if (responseData.success) {
return (this.version === 'v0' ? responseData : { return {
success: true, success: true,
warning: responseData.warning, warning: responseData.warning,
error: responseData.error, error: responseData.error,
...responseData.data ...responseData.data
}); };
} }
else { else {
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
@ -71,80 +69,52 @@ export default class FirecrawlApp {
return { success: false, error: "Internal server error." }; return { success: false, error: "Internal server error." };
} }
/** /**
* Searches for a query using the Firecrawl API. * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
* @param query - The query to search for. * @param query - The search query string.
* @param params - Additional parameters for the search request. * @param params - Additional parameters for the search.
* @returns The response from the search operation. * @returns Throws an error advising to use version 0 of the API.
*/ */
async search(query, params) { async search(query, params) {
if (this.version === "v1") { throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
}
const headers = {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
};
let jsonData = { query };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response = await axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return responseData;
}
else {
throw new Error(`Failed to search. Error: ${responseData.error}`);
}
}
else {
this.handleError(response, "search");
}
}
catch (error) {
throw new Error(error.message);
}
return { success: false, error: "Internal server error." };
} }
/** /**
* Initiates a crawl job for a URL using the Firecrawl API. * Initiates a crawl job for a URL using the Firecrawl API.
* @param url - The URL to crawl. * @param url - The URL to crawl.
* @param params - Additional parameters for the crawl request. * @param params - Additional parameters for the crawl request.
* @param waitUntilDone - Whether to wait for the crawl job to complete.
* @param pollInterval - Time in seconds for job status checks. * @param pollInterval - Time in seconds for job status checks.
* @param idempotencyKey - Optional idempotency key for the request. * @param idempotencyKey - Optional idempotency key for the request.
* @returns The response from the crawl operation. * @returns The response from the crawl operation.
*/ */
async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) { async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
const headers = this.prepareHeaders(idempotencyKey); const headers = this.prepareHeaders(idempotencyKey);
let jsonData = { url, ...params }; let jsonData = { url, ...params };
try { try {
const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers); const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
if (response.status === 200) { if (response.status === 200) {
const id = this.version === 'v0' ? response.data.jobId : response.data.id; const id = response.data.id;
let checkUrl = undefined; return this.monitorJobStatus(id, headers, pollInterval);
if (waitUntilDone) { }
if (this.version === 'v1') { else {
checkUrl = response.data.url; this.handleError(response, "start crawl job");
} }
return this.monitorJobStatus(id, headers, pollInterval, checkUrl); }
} catch (error) {
else { if (error.response?.data?.error) {
if (this.version === 'v0') { throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
return { }
success: true, else {
jobId: id throw new Error(error.message);
}; }
} }
else { return { success: false, error: "Internal server error." };
return { }
success: true, async asyncCrawlUrl(url, params, idempotencyKey) {
id: id const headers = this.prepareHeaders(idempotencyKey);
}; let jsonData = { url, ...params };
} try {
} const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
if (response.status === 200) {
return response.data;
} }
else { else {
this.handleError(response, "start crawl job"); this.handleError(response, "start crawl job");
@ -171,37 +141,19 @@ export default class FirecrawlApp {
} }
const headers = this.prepareHeaders(); const headers = this.prepareHeaders();
try { try {
const response = await this.getRequest(this.version === 'v1' ? const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
`${this.apiUrl}/${this.version}/crawl/${id}` :
`${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
if (response.status === 200) { if (response.status === 200) {
if (this.version === 'v0') { return ({
return { success: true,
success: true, status: response.data.status,
status: response.data.status, total: response.data.total,
current: response.data.current, completed: response.data.completed,
current_url: response.data.current_url, creditsUsed: response.data.creditsUsed,
current_step: response.data.current_step, expiresAt: new Date(response.data.expiresAt),
total: response.data.total, next: response.data.next,
data: response.data.data, data: response.data.data,
partial_data: !response.data.data error: response.data.error
? response.data.partial_data });
: undefined,
};
}
else {
return {
success: true,
status: response.data.status,
total: response.data.total,
completed: response.data.completed,
creditsUsed: response.data.creditsUsed,
expiresAt: new Date(response.data.expiresAt),
next: response.data.next,
data: response.data.data,
error: response.data.error
};
}
} }
else { else {
this.handleError(response, "check crawl status"); this.handleError(response, "check crawl status");
@ -210,29 +162,21 @@ export default class FirecrawlApp {
catch (error) { catch (error) {
throw new Error(error.message); throw new Error(error.message);
} }
return this.version === 'v0' ? return { success: false, error: "Internal server error." };
{ }
success: false, async crawlUrlAndWatch(url, params, idempotencyKey) {
status: "unknown", const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
current: 0, if (crawl.success && crawl.id) {
current_url: "", const id = crawl.id;
current_step: "", return new CrawlWatcher(id, this);
total: 0, }
error: "Internal server error.", throw new Error("Crawl job failed to start");
} :
{
success: false,
error: "Internal server error.",
};
} }
async mapUrl(url, params) { async mapUrl(url, params) {
if (this.version == 'v0') {
throw new Error("Map is not supported in v0");
}
const headers = this.prepareHeaders(); const headers = this.prepareHeaders();
let jsonData = { url, ...params }; let jsonData = { url, ...params };
try { try {
const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers); const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
if (response.status === 200) { if (response.status === 200) {
return response.data; return response.data;
} }
@ -284,21 +228,14 @@ export default class FirecrawlApp {
* @param checkUrl - Optional URL to check the status (used for v1 API) * @param checkUrl - Optional URL to check the status (used for v1 API)
* @returns The final job status or data. * @returns The final job status or data.
*/ */
async monitorJobStatus(id, headers, checkInterval, checkUrl) { async monitorJobStatus(id, headers, checkInterval) {
let apiUrl = '';
while (true) { while (true) {
if (this.version === 'v1') { const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
}
else if (this.version === 'v0') {
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
}
const statusResponse = await this.getRequest(apiUrl, headers);
if (statusResponse.status === 200) { if (statusResponse.status === 200) {
const statusData = statusResponse.data; const statusData = statusResponse.data;
if (statusData.status === "completed") { if (statusData.status === "completed") {
if ("data" in statusData) { if ("data" in statusData) {
return this.version === 'v0' ? statusData.data : statusData; return statusData;
} }
else { else {
throw new Error("Crawl job completed but no data was returned"); throw new Error("Crawl job completed but no data was returned");
@ -332,3 +269,71 @@ export default class FirecrawlApp {
} }
} }
} }
export class CrawlWatcher extends TypedEventTarget {
constructor(id, app) {
super();
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
this.status = "scraping";
this.data = [];
const messageHandler = (msg) => {
if (msg.type === "done") {
this.status = "completed";
this.dispatchTypedEvent("done", new CustomEvent("done", {
detail: {
status: this.status,
data: this.data,
},
}));
}
else if (msg.type === "error") {
this.status = "failed";
this.dispatchTypedEvent("error", new CustomEvent("error", {
detail: {
status: this.status,
data: this.data,
error: msg.error,
},
}));
}
else if (msg.type === "catchup") {
this.status = msg.data.status;
this.data.push(...(msg.data.data ?? []));
for (const doc of this.data) {
this.dispatchTypedEvent("document", new CustomEvent("document", {
detail: doc,
}));
}
}
else if (msg.type === "document") {
this.dispatchTypedEvent("document", new CustomEvent("document", {
detail: msg.data,
}));
}
};
this.ws.onmessage = ((ev) => {
if (typeof ev.data !== "string") {
this.ws.close();
return;
}
const msg = JSON.parse(ev.data);
messageHandler(msg);
}).bind(this);
this.ws.onclose = ((ev) => {
const msg = JSON.parse(ev.reason);
messageHandler(msg);
}).bind(this);
this.ws.onerror = ((_) => {
this.status = "failed";
this.dispatchTypedEvent("error", new CustomEvent("error", {
detail: {
status: this.status,
data: this.data,
error: "WebSocket error",
},
}));
}).bind(this);
}
close() {
this.ws.close();
}
}

View File

@ -1,12 +1,12 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.0.3", "version": "1.1.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "1.0.3", "version": "1.1.0",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"axios": "^1.6.8", "axios": "^1.6.8",

View File

@ -1,15 +1,13 @@
import { AxiosResponse, AxiosRequestHeaders } from "axios"; import { AxiosResponse, AxiosRequestHeaders } from "axios";
import { z } from "zod"; import { TypedEventTarget } from "typescript-event-target";
/** /**
* Configuration interface for FirecrawlApp. * Configuration interface for FirecrawlApp.
* @param apiKey - Optional API key for authentication. * @param apiKey - Optional API key for authentication.
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'. * @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
* @param version - API version, either 'v0' or 'v1'.
*/ */
export interface FirecrawlAppConfig { export interface FirecrawlAppConfig {
apiKey?: string | null; apiKey?: string | null;
apiUrl?: string | null; apiUrl?: string | null;
version?: "v0" | "v1";
} }
/** /**
* Metadata for a Firecrawl document. * Metadata for a Firecrawl document.
@ -50,15 +48,6 @@ export interface FirecrawlDocumentMetadata {
error?: string; error?: string;
[key: string]: any; [key: string]: any;
} }
/**
* Metadata for a Firecrawl document on v0.
* Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
*/
export interface FirecrawlDocumentMetadataV0 {
pageStatusCode?: number;
pageError?: string;
[key: string]: any;
}
/** /**
* Document interface for Firecrawl. * Document interface for Firecrawl.
* Represents a document retrieved or processed by Firecrawl. * Represents a document retrieved or processed by Firecrawl.
@ -70,84 +59,30 @@ export interface FirecrawlDocument {
rawHtml?: string; rawHtml?: string;
links?: string[]; links?: string[];
screenshot?: string; screenshot?: string;
metadata: FirecrawlDocumentMetadata; metadata?: FirecrawlDocumentMetadata;
}
/**
* Document interface for Firecrawl on v0.
* Represents a document specifically for API version v0 with additional properties.
*/
export interface FirecrawlDocumentV0 {
id?: string;
url?: string;
content: string;
markdown?: string;
html?: string;
llm_extraction?: Record<string, any>;
createdAt?: Date;
updatedAt?: Date;
type?: string;
metadata: FirecrawlDocumentMetadataV0;
childrenLinks?: string[];
provider?: string;
warning?: string;
index?: number;
} }
/** /**
* Parameters for scraping operations. * Parameters for scraping operations.
* Defines the options and configurations available for scraping web content. * Defines the options and configurations available for scraping web content.
*/ */
export interface ScrapeParams { export interface ScrapeParams {
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[]; formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
headers?: Record<string, string>; headers?: Record<string, string>;
includeTags?: string[]; includeTags?: string[];
excludeTags?: string[]; excludeTags?: string[];
onlyMainContent?: boolean; onlyMainContent?: boolean;
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
waitFor?: number; waitFor?: number;
timeout?: number; timeout?: number;
} }
/**
* Parameters for scraping operations on v0.
* Includes page and extractor options specific to API version v0.
*/
export interface ScrapeParamsV0 {
pageOptions?: {
headers?: Record<string, string>;
includeHtml?: boolean;
includeRawHtml?: boolean;
onlyIncludeTags?: string[];
onlyMainContent?: boolean;
removeTags?: string[];
replaceAllPathsWithAbsolutePaths?: boolean;
screenshot?: boolean;
fullPageScreenshot?: boolean;
waitFor?: number;
};
extractorOptions?: {
mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
extractionPrompt?: string;
extractionSchema?: Record<string, any> | z.ZodSchema | any;
};
timeout?: number;
}
/** /**
* Response interface for scraping operations. * Response interface for scraping operations.
* Defines the structure of the response received after a scraping operation. * Defines the structure of the response received after a scraping operation.
*/ */
export interface ScrapeResponse extends FirecrawlDocument { export interface ScrapeResponse extends FirecrawlDocument {
success: boolean; success: true;
warning?: string; warning?: string;
error?: string; error?: string;
} }
/**
* Response interface for scraping operations on v0.
* Similar to ScrapeResponse but tailored for responses from API version v0.
*/
export interface ScrapeResponseV0 {
success: boolean;
data?: FirecrawlDocumentV0;
error?: string;
}
/** /**
* Parameters for crawling operations. * Parameters for crawling operations.
* Includes options for both scraping and mapping during a crawl. * Includes options for both scraping and mapping during a crawl.
@ -162,36 +97,6 @@ export interface CrawlParams {
ignoreSitemap?: boolean; ignoreSitemap?: boolean;
scrapeOptions?: ScrapeParams; scrapeOptions?: ScrapeParams;
} }
/**
* Parameters for crawling operations on v0.
* Tailored for API version v0, includes specific options for crawling.
*/
export interface CrawlParamsV0 {
crawlerOptions?: {
includes?: string[];
excludes?: string[];
generateImgAltText?: boolean;
returnOnlyUrls?: boolean;
maxDepth?: number;
mode?: "default" | "fast";
ignoreSitemap?: boolean;
limit?: number;
allowBackwardCrawling?: boolean;
allowExternalContentLinks?: boolean;
};
pageOptions?: {
headers?: Record<string, string>;
includeHtml?: boolean;
includeRawHtml?: boolean;
onlyIncludeTags?: string[];
onlyMainContent?: boolean;
removeTags?: string[];
replaceAllPathsWithAbsolutePaths?: boolean;
screenshot?: boolean;
fullPageScreenshot?: boolean;
waitFor?: number;
};
}
/** /**
* Response interface for crawling operations. * Response interface for crawling operations.
* Defines the structure of the response received after initiating a crawl. * Defines the structure of the response received after initiating a crawl.
@ -199,16 +104,7 @@ export interface CrawlParamsV0 {
export interface CrawlResponse { export interface CrawlResponse {
id?: string; id?: string;
url?: string; url?: string;
success: boolean; success: true;
error?: string;
}
/**
* Response interface for crawling operations on v0.
* Similar to CrawlResponse but tailored for responses from API version v0.
*/
export interface CrawlResponseV0 {
jobId?: string;
success: boolean;
error?: string; error?: string;
} }
/** /**
@ -216,7 +112,7 @@ export interface CrawlResponseV0 {
* Provides detailed status of a crawl job including progress and results. * Provides detailed status of a crawl job including progress and results.
*/ */
export interface CrawlStatusResponse { export interface CrawlStatusResponse {
success: boolean; success: true;
total: number; total: number;
completed: number; completed: number;
creditsUsed: number; creditsUsed: number;
@ -226,21 +122,6 @@ export interface CrawlStatusResponse {
data?: FirecrawlDocument[]; data?: FirecrawlDocument[];
error?: string; error?: string;
} }
/**
* Response interface for job status checks on v0.
* Tailored for API version v0, provides status and partial data of a crawl job.
*/
export interface CrawlStatusResponseV0 {
success: boolean;
status: string;
current?: number;
current_url?: string;
current_step?: string;
total?: number;
data?: FirecrawlDocumentV0[];
partial_data?: FirecrawlDocumentV0[];
error?: string;
}
/** /**
* Parameters for mapping operations. * Parameters for mapping operations.
* Defines options for mapping URLs during a crawl. * Defines options for mapping URLs during a crawl.
@ -256,78 +137,62 @@ export interface MapParams {
* Defines the structure of the response received after a mapping operation. * Defines the structure of the response received after a mapping operation.
*/ */
export interface MapResponse { export interface MapResponse {
success: boolean; success: true;
links?: string[]; links?: string[];
error?: string; error?: string;
} }
/** /**
* Parameters for searching operations on v0. * Error response interface.
* Tailored for API version v0, includes specific options for searching content. * Defines the structure of the response received when an error occurs.
*/ */
export interface SearchParamsV0 { export interface ErrorResponse {
pageOptions?: { success: false;
onlyMainContent?: boolean; error: string;
fetchPageContent?: boolean;
includeHtml?: boolean;
includeRawHtml?: boolean;
};
searchOptions?: {
limit?: number;
};
}
/**
* Response interface for searching operations on v0.
* Defines the structure of the response received after a search operation on v0.
*/
export interface SearchResponseV0 {
success: boolean;
data?: FirecrawlDocumentV0[];
error?: string;
} }
/** /**
* Main class for interacting with the Firecrawl API. * Main class for interacting with the Firecrawl API.
* Provides methods for scraping, searching, crawling, and mapping web content. * Provides methods for scraping, searching, crawling, and mapping web content.
*/ */
export default class FirecrawlApp<T extends "v0" | "v1"> { export default class FirecrawlApp {
private apiKey; apiKey: string;
private apiUrl; apiUrl: string;
version: T;
/** /**
* Initializes a new instance of the FirecrawlApp class. * Initializes a new instance of the FirecrawlApp class.
* @param config - Configuration options for the FirecrawlApp instance. * @param config - Configuration options for the FirecrawlApp instance.
*/ */
constructor({ apiKey, apiUrl, version }: FirecrawlAppConfig); constructor({ apiKey, apiUrl }: FirecrawlAppConfig);
/** /**
* Scrapes a URL using the Firecrawl API. * Scrapes a URL using the Firecrawl API.
* @param url - The URL to scrape. * @param url - The URL to scrape.
* @param params - Additional parameters for the scrape request. * @param params - Additional parameters for the scrape request.
* @returns The response from the scrape operation. * @returns The response from the scrape operation.
*/ */
scrapeUrl(url: string, params?: ScrapeParams | ScrapeParamsV0): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse>; scrapeUrl(url: string, params?: ScrapeParams): Promise<ScrapeResponse | ErrorResponse>;
/** /**
* Searches for a query using the Firecrawl API. * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
* @param query - The query to search for. * @param query - The search query string.
* @param params - Additional parameters for the search request. * @param params - Additional parameters for the search.
* @returns The response from the search operation. * @returns Throws an error advising to use version 0 of the API.
*/ */
search(query: string, params?: SearchParamsV0): Promise<SearchResponseV0>; search(query: string, params?: any): Promise<any>;
/** /**
* Initiates a crawl job for a URL using the Firecrawl API. * Initiates a crawl job for a URL using the Firecrawl API.
* @param url - The URL to crawl. * @param url - The URL to crawl.
* @param params - Additional parameters for the crawl request. * @param params - Additional parameters for the crawl request.
* @param waitUntilDone - Whether to wait for the crawl job to complete.
* @param pollInterval - Time in seconds for job status checks. * @param pollInterval - Time in seconds for job status checks.
* @param idempotencyKey - Optional idempotency key for the request. * @param idempotencyKey - Optional idempotency key for the request.
* @returns The response from the crawl operation. * @returns The response from the crawl operation.
*/ */
crawlUrl(url: string, params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams, waitUntilDone?: boolean, pollInterval?: number, idempotencyKey?: string): Promise<this['version'] extends 'v0' ? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlResponse | CrawlStatusResponse>; crawlUrl(url: string, params?: CrawlParams, pollInterval?: number, idempotencyKey?: string): Promise<CrawlStatusResponse | ErrorResponse>;
asyncCrawlUrl(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlResponse | ErrorResponse>;
/** /**
* Checks the status of a crawl job using the Firecrawl API. * Checks the status of a crawl job using the Firecrawl API.
* @param id - The ID of the crawl operation. * @param id - The ID of the crawl operation.
* @returns The response containing the job status. * @returns The response containing the job status.
*/ */
checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse>; checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse>;
mapUrl(url: string, params?: MapParams): Promise<MapResponse>; crawlUrlAndWatch(url: string, params?: CrawlParams, idempotencyKey?: string): Promise<CrawlWatcher>;
mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse>;
/** /**
* Prepares the headers for an API request. * Prepares the headers for an API request.
* @param idempotencyKey - Optional key to ensure idempotency. * @param idempotencyKey - Optional key to ensure idempotency.
@ -357,7 +222,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
* @param checkUrl - Optional URL to check the status (used for v1 API) * @param checkUrl - Optional URL to check the status (used for v1 API)
* @returns The final job status or data. * @returns The final job status or data.
*/ */
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number, checkUrl?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse>; monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse>;
/** /**
* Handles errors from API responses. * Handles errors from API responses.
* @param {AxiosResponse} response - The response from the API. * @param {AxiosResponse} response - The response from the API.
@ -365,3 +230,23 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
*/ */
handleError(response: AxiosResponse, action: string): void; handleError(response: AxiosResponse, action: string): void;
} }
interface CrawlWatcherEvents {
document: CustomEvent<FirecrawlDocument>;
done: CustomEvent<{
status: CrawlStatusResponse["status"];
data: FirecrawlDocument[];
}>;
error: CustomEvent<{
status: CrawlStatusResponse["status"];
data: FirecrawlDocument[];
error: string;
}>;
}
export declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
private ws;
data: FirecrawlDocument[];
status: CrawlStatusResponse["status"];
constructor(id: string, app: FirecrawlApp);
close(): void;
}
export {};

View File

@ -13,7 +13,7 @@ import os
from .firecrawl import FirecrawlApp from .firecrawl import FirecrawlApp
__version__ = "1.0.1" __version__ = "1.1.1"
# Define the logger for the Firecrawl project # Define the logger for the Firecrawl project
logger: logging.Logger = logging.getLogger("firecrawl") logger: logging.Logger = logging.getLogger("firecrawl")

View File

@ -10,6 +10,10 @@ readme = {file="README.md", content-type = "text/markdown"}
requires-python = ">=3.8" requires-python = ">=3.8"
dependencies = [ dependencies = [
"requests", "requests",
"python-dotenv",
"websockets",
"asyncio",
"nest-asyncio"
] ]
authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}] authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}] maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}]

View File

@ -30,6 +30,9 @@ setup(
'requests', 'requests',
'pytest', 'pytest',
'python-dotenv', 'python-dotenv',
'websockets',
'asyncio',
'nest-asyncio'
], ],
python_requires=">=3.8", python_requires=">=3.8",
classifiers=[ classifiers=[