From f294d3922cde4f66a717d3e6b7e1664660b31b19 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 6 Aug 2024 18:44:45 -0400 Subject: [PATCH] Nick: revert --- apps/js-sdk/firecrawl/build/cjs/index.js | 271 +++++++++++++++++++ apps/js-sdk/firecrawl/build/cjs/package.json | 1 + apps/js-sdk/firecrawl/build/esm/index.js | 265 ++++++++++++++++++ apps/js-sdk/firecrawl/build/esm/package.json | 1 + apps/js-sdk/firecrawl/build/index.js | 14 +- apps/js-sdk/firecrawl/package.json | 2 +- apps/js-sdk/firecrawl/types/index.d.ts | 22 +- 7 files changed, 559 insertions(+), 17 deletions(-) create mode 100644 apps/js-sdk/firecrawl/build/cjs/index.js create mode 100644 apps/js-sdk/firecrawl/build/cjs/package.json create mode 100644 apps/js-sdk/firecrawl/build/esm/index.js create mode 100644 apps/js-sdk/firecrawl/build/esm/package.json diff --git a/apps/js-sdk/firecrawl/build/cjs/index.js b/apps/js-sdk/firecrawl/build/cjs/index.js new file mode 100644 index 00000000..da340cae --- /dev/null +++ b/apps/js-sdk/firecrawl/build/cjs/index.js @@ -0,0 +1,271 @@ +"use strict"; +var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { + function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } + return new (P || (P = Promise))(function (resolve, reject) { + function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } + function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } + function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } + step((generator = generator.apply(thisArg, _arguments || [])).next()); + }); +}; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +const axios_1 = __importDefault(require("axios")); +const zod_1 = require("zod"); +const zod_to_json_schema_1 = require("zod-to-json-schema"); +/** + * Main class for interacting with the Firecrawl API. + */ +class FirecrawlApp { + /** + * Initializes a new instance of the FirecrawlApp class. + * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. + */ + constructor({ apiKey = null, apiUrl = null }) { + this.apiKey = apiKey || ""; + this.apiUrl = apiUrl || "https://api.firecrawl.dev"; + if (!this.apiKey) { + throw new Error("No API key provided"); + } + } + /** + * Scrapes a URL using the Firecrawl API. + * @param {string} url - The URL to scrape. + * @param {Params | null} params - Additional parameters for the scrape request. + * @returns {Promise} The response from the scrape operation. + */ + scrapeUrl(url, params = null) { + var _a; + return __awaiter(this, void 0, void 0, function* () { + const headers = { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + }; + let jsonData = Object.assign({ url }, params); + if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) { + let schema = params.extractorOptions.extractionSchema; + // Check if schema is an instance of ZodSchema to correctly identify Zod schemas + if (schema instanceof zod_1.z.ZodSchema) { + schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema); + } + jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) }); + } + try { + const response = yield axios_1.default.post(this.apiUrl + "/v0/scrape", jsonData, { headers }); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData; + } + else { + throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); + } + } + else { + this.handleError(response, "scrape URL"); + } + } + catch (error) { + throw new Error(error.message); + } + return { success: false, error: "Internal server error." }; + }); + } + /** + * Searches for a query using the Firecrawl API. + * @param {string} query - The query to search for. + * @param {Params | null} params - Additional parameters for the search request. + * @returns {Promise} The response from the search operation. + */ + search(query, params = null) { + return __awaiter(this, void 0, void 0, function* () { + const headers = { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + }; + let jsonData = { query }; + if (params) { + jsonData = Object.assign(Object.assign({}, jsonData), params); + } + try { + const response = yield axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers }); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData; + } + else { + throw new Error(`Failed to search. Error: ${responseData.error}`); + } + } + else { + this.handleError(response, "search"); + } + } + catch (error) { + throw new Error(error.message); + } + return { success: false, error: "Internal server error." }; + }); + } + /** + * Initiates a crawl job for a URL using the Firecrawl API. + * @param {string} url - The URL to crawl. + * @param {Params | null} params - Additional parameters for the crawl request. + * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. + * @param {number} pollInterval - Time in seconds for job status checks. + * @param {string} idempotencyKey - Optional idempotency key for the request. + * @returns {Promise} The response from the crawl operation. + */ + crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) { + return __awaiter(this, void 0, void 0, function* () { + const headers = this.prepareHeaders(idempotencyKey); + let jsonData = { url }; + if (params) { + jsonData = Object.assign(Object.assign({}, jsonData), params); + } + try { + const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers); + if (response.status === 200) { + const jobId = response.data.jobId; + if (waitUntilDone) { + return this.monitorJobStatus(jobId, headers, pollInterval); + } + else { + return { success: true, jobId }; + } + } + else { + this.handleError(response, "start crawl job"); + } + } + catch (error) { + console.log(error); + throw new Error(error.message); + } + return { success: false, error: "Internal server error." }; + }); + } + /** + * Checks the status of a crawl job using the Firecrawl API. + * @param {string} jobId - The job ID of the crawl operation. + * @returns {Promise} The response containing the job status. + */ + checkCrawlStatus(jobId) { + return __awaiter(this, void 0, void 0, function* () { + const headers = this.prepareHeaders(); + try { + const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers); + if (response.status === 200) { + return { + success: true, + status: response.data.status, + current: response.data.current, + current_url: response.data.current_url, + current_step: response.data.current_step, + total: response.data.total, + data: response.data.data, + partial_data: !response.data.data + ? response.data.partial_data + : undefined, + }; + } + else { + this.handleError(response, "check crawl status"); + } + } + catch (error) { + throw new Error(error.message); + } + return { + success: false, + status: "unknown", + current: 0, + current_url: "", + current_step: "", + total: 0, + error: "Internal server error.", + }; + }); + } + /** + * Prepares the headers for an API request. + * @returns {AxiosRequestHeaders} The prepared headers. + */ + prepareHeaders(idempotencyKey) { + return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {})); + } + /** + * Sends a POST request to the specified URL. + * @param {string} url - The URL to send the request to. + * @param {Params} data - The data to send in the request. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @returns {Promise} The response from the POST request. + */ + postRequest(url, data, headers) { + return axios_1.default.post(url, data, { headers }); + } + /** + * Sends a GET request to the specified URL. + * @param {string} url - The URL to send the request to. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @returns {Promise} The response from the GET request. + */ + getRequest(url, headers) { + return axios_1.default.get(url, { headers }); + } + /** + * Monitors the status of a crawl job until completion or failure. + * @param {string} jobId - The job ID of the crawl operation. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @param {number} timeout - Timeout in seconds for job status checks. + * @returns {Promise} The final job status or data. + */ + monitorJobStatus(jobId, headers, checkInterval) { + return __awaiter(this, void 0, void 0, function* () { + while (true) { + const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers); + if (statusResponse.status === 200) { + const statusData = statusResponse.data; + if (statusData.status === "completed") { + if ("data" in statusData) { + return statusData.data; + } + else { + throw new Error("Crawl job completed but no data was returned"); + } + } + else if (["active", "paused", "pending", "queued"].includes(statusData.status)) { + if (checkInterval < 2) { + checkInterval = 2; + } + yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again + } + else { + throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); + } + } + else { + this.handleError(statusResponse, "check crawl status"); + } + } + }); + } + /** + * Handles errors from API responses. + * @param {AxiosResponse} response - The response from the API. + * @param {string} action - The action being performed when the error occurred. + */ + handleError(response, action) { + if ([402, 408, 409, 500].includes(response.status)) { + const errorMessage = response.data.error || "Unknown error occurred"; + throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); + } + else { + throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); + } + } +} +exports.default = FirecrawlApp; diff --git a/apps/js-sdk/firecrawl/build/cjs/package.json b/apps/js-sdk/firecrawl/build/cjs/package.json new file mode 100644 index 00000000..b731bd61 --- /dev/null +++ b/apps/js-sdk/firecrawl/build/cjs/package.json @@ -0,0 +1 @@ +{"type": "commonjs"} diff --git a/apps/js-sdk/firecrawl/build/esm/index.js b/apps/js-sdk/firecrawl/build/esm/index.js new file mode 100644 index 00000000..ef79f180 --- /dev/null +++ b/apps/js-sdk/firecrawl/build/esm/index.js @@ -0,0 +1,265 @@ +var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { + function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } + return new (P || (P = Promise))(function (resolve, reject) { + function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } + function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } + function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } + step((generator = generator.apply(thisArg, _arguments || [])).next()); + }); +}; +import axios from "axios"; +import { z } from "zod"; +import { zodToJsonSchema } from "zod-to-json-schema"; +/** + * Main class for interacting with the Firecrawl API. + */ +export default class FirecrawlApp { + /** + * Initializes a new instance of the FirecrawlApp class. + * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. + */ + constructor({ apiKey = null, apiUrl = null }) { + this.apiKey = apiKey || ""; + this.apiUrl = apiUrl || "https://api.firecrawl.dev"; + if (!this.apiKey) { + throw new Error("No API key provided"); + } + } + /** + * Scrapes a URL using the Firecrawl API. + * @param {string} url - The URL to scrape. + * @param {Params | null} params - Additional parameters for the scrape request. + * @returns {Promise} The response from the scrape operation. + */ + scrapeUrl(url, params = null) { + var _a; + return __awaiter(this, void 0, void 0, function* () { + const headers = { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + }; + let jsonData = Object.assign({ url }, params); + if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) { + let schema = params.extractorOptions.extractionSchema; + // Check if schema is an instance of ZodSchema to correctly identify Zod schemas + if (schema instanceof z.ZodSchema) { + schema = zodToJsonSchema(schema); + } + jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) }); + } + try { + const response = yield axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers }); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData; + } + else { + throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); + } + } + else { + this.handleError(response, "scrape URL"); + } + } + catch (error) { + throw new Error(error.message); + } + return { success: false, error: "Internal server error." }; + }); + } + /** + * Searches for a query using the Firecrawl API. + * @param {string} query - The query to search for. + * @param {Params | null} params - Additional parameters for the search request. + * @returns {Promise} The response from the search operation. + */ + search(query, params = null) { + return __awaiter(this, void 0, void 0, function* () { + const headers = { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + }; + let jsonData = { query }; + if (params) { + jsonData = Object.assign(Object.assign({}, jsonData), params); + } + try { + const response = yield axios.post(this.apiUrl + "/v0/search", jsonData, { headers }); + if (response.status === 200) { + const responseData = response.data; + if (responseData.success) { + return responseData; + } + else { + throw new Error(`Failed to search. Error: ${responseData.error}`); + } + } + else { + this.handleError(response, "search"); + } + } + catch (error) { + throw new Error(error.message); + } + return { success: false, error: "Internal server error." }; + }); + } + /** + * Initiates a crawl job for a URL using the Firecrawl API. + * @param {string} url - The URL to crawl. + * @param {Params | null} params - Additional parameters for the crawl request. + * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. + * @param {number} pollInterval - Time in seconds for job status checks. + * @param {string} idempotencyKey - Optional idempotency key for the request. + * @returns {Promise} The response from the crawl operation. + */ + crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) { + return __awaiter(this, void 0, void 0, function* () { + const headers = this.prepareHeaders(idempotencyKey); + let jsonData = { url }; + if (params) { + jsonData = Object.assign(Object.assign({}, jsonData), params); + } + try { + const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers); + if (response.status === 200) { + const jobId = response.data.jobId; + if (waitUntilDone) { + return this.monitorJobStatus(jobId, headers, pollInterval); + } + else { + return { success: true, jobId }; + } + } + else { + this.handleError(response, "start crawl job"); + } + } + catch (error) { + console.log(error); + throw new Error(error.message); + } + return { success: false, error: "Internal server error." }; + }); + } + /** + * Checks the status of a crawl job using the Firecrawl API. + * @param {string} jobId - The job ID of the crawl operation. + * @returns {Promise} The response containing the job status. + */ + checkCrawlStatus(jobId) { + return __awaiter(this, void 0, void 0, function* () { + const headers = this.prepareHeaders(); + try { + const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers); + if (response.status === 200) { + return { + success: true, + status: response.data.status, + current: response.data.current, + current_url: response.data.current_url, + current_step: response.data.current_step, + total: response.data.total, + data: response.data.data, + partial_data: !response.data.data + ? response.data.partial_data + : undefined, + }; + } + else { + this.handleError(response, "check crawl status"); + } + } + catch (error) { + throw new Error(error.message); + } + return { + success: false, + status: "unknown", + current: 0, + current_url: "", + current_step: "", + total: 0, + error: "Internal server error.", + }; + }); + } + /** + * Prepares the headers for an API request. + * @returns {AxiosRequestHeaders} The prepared headers. + */ + prepareHeaders(idempotencyKey) { + return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {})); + } + /** + * Sends a POST request to the specified URL. + * @param {string} url - The URL to send the request to. + * @param {Params} data - The data to send in the request. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @returns {Promise} The response from the POST request. + */ + postRequest(url, data, headers) { + return axios.post(url, data, { headers }); + } + /** + * Sends a GET request to the specified URL. + * @param {string} url - The URL to send the request to. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @returns {Promise} The response from the GET request. + */ + getRequest(url, headers) { + return axios.get(url, { headers }); + } + /** + * Monitors the status of a crawl job until completion or failure. + * @param {string} jobId - The job ID of the crawl operation. + * @param {AxiosRequestHeaders} headers - The headers for the request. + * @param {number} timeout - Timeout in seconds for job status checks. + * @returns {Promise} The final job status or data. + */ + monitorJobStatus(jobId, headers, checkInterval) { + return __awaiter(this, void 0, void 0, function* () { + while (true) { + const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers); + if (statusResponse.status === 200) { + const statusData = statusResponse.data; + if (statusData.status === "completed") { + if ("data" in statusData) { + return statusData.data; + } + else { + throw new Error("Crawl job completed but no data was returned"); + } + } + else if (["active", "paused", "pending", "queued"].includes(statusData.status)) { + if (checkInterval < 2) { + checkInterval = 2; + } + yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again + } + else { + throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); + } + } + else { + this.handleError(statusResponse, "check crawl status"); + } + } + }); + } + /** + * Handles errors from API responses. + * @param {AxiosResponse} response - The response from the API. + * @param {string} action - The action being performed when the error occurred. + */ + handleError(response, action) { + if ([402, 408, 409, 500].includes(response.status)) { + const errorMessage = response.data.error || "Unknown error occurred"; + throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); + } + else { + throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); + } + } +} diff --git a/apps/js-sdk/firecrawl/build/esm/package.json b/apps/js-sdk/firecrawl/build/esm/package.json new file mode 100644 index 00000000..6990891f --- /dev/null +++ b/apps/js-sdk/firecrawl/build/esm/package.json @@ -0,0 +1 @@ +{"type": "module"} diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js index 99de5e2b..ef79f180 100644 --- a/apps/js-sdk/firecrawl/build/index.js +++ b/apps/js-sdk/firecrawl/build/index.js @@ -31,9 +31,9 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the scrape request. * @returns {Promise} The response from the scrape operation. */ - scrapeUrl(url_1) { - return __awaiter(this, arguments, void 0, function* (url, params = null) { - var _a; + scrapeUrl(url, params = null) { + var _a; + return __awaiter(this, void 0, void 0, function* () { const headers = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, @@ -74,8 +74,8 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the search request. * @returns {Promise} The response from the search operation. */ - search(query_1) { - return __awaiter(this, arguments, void 0, function* (query, params = null) { + search(query, params = null) { + return __awaiter(this, void 0, void 0, function* () { const headers = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, @@ -114,8 +114,8 @@ export default class FirecrawlApp { * @param {string} idempotencyKey - Optional idempotency key for the request. * @returns {Promise} The response from the crawl operation. */ - crawlUrl(url_1) { - return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) { + crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) { + return __awaiter(this, void 0, void 0, function* () { const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url }; if (params) { diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 71d2362e..e6a398e4 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.29", + "version": "0.0.34", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index 91a58043..bd6cfc20 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -73,16 +73,16 @@ export interface ScrapeResponse { error?: string; } /** -* Response interface for searching operations. -*/ + * Response interface for searching operations. + */ export interface SearchResponse { success: boolean; data?: FirecrawlDocument[]; error?: string; } /** -* Response interface for crawling operations. -*/ + * Response interface for crawling operations. + */ export interface CrawlResponse { success: boolean; jobId?: string; @@ -90,24 +90,28 @@ export interface CrawlResponse { error?: string; } /** -* Response interface for job status checks. -*/ + * Response interface for job status checks. + */ export interface JobStatusResponse { success: boolean; status: string; + current?: number; + current_url?: string; + current_step?: string; + total?: number; jobId?: string; data?: FirecrawlDocument[]; partial_data?: FirecrawlDocument[]; error?: string; } /** - * Generic parameter interface. - */ + * Generic parameter interface. + */ export interface Params { [key: string]: any; extractorOptions?: { extractionSchema: z.ZodSchema | any; - mode?: "llm-extraction" | "llm-extraction-from-raw-html"; + mode?: "llm-extraction"; extractionPrompt?: string; }; }