mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-04-23 22:49:53 +08:00
typescript fixes
This commit is contained in:
parent
c16437e933
commit
cf9d77d889
@ -26,7 +26,7 @@ export async function crawlController(req: Request, res: Response) {
|
||||
// limit: number
|
||||
// allowBackwardLinks: boolean >> TODO: CHANGE THIS NAME???
|
||||
// allowExternalLinks: boolean
|
||||
// ignoreSitemap: number
|
||||
// ignoreSitemap: boolean
|
||||
// }
|
||||
// scrapeOptions: Exclude<Scrape, "url">
|
||||
// }
|
||||
|
@ -36,12 +36,13 @@ export async function scrapeController(req: Request, res: Response) {
|
||||
// headers: {
|
||||
// "x-key": "test"
|
||||
// },
|
||||
// formats: ["markdown", "html", "rawHtml", "content", "linksOnPage", "screenshot", "fullPageScreenshot"],
|
||||
// formats: ["markdown", "html", "rawHtml", "content", "links", "screenshot"],
|
||||
// includeTags: ["test"],
|
||||
// excludeTags: ["test"],
|
||||
// onlyMainContent: false,
|
||||
// timeout: 30000,
|
||||
// waitFor: number
|
||||
// screenshotMode: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
||||
// }
|
||||
|
||||
try {
|
||||
|
@ -1,16 +1,16 @@
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import FirecrawlApp from '@mendable/firecrawl-js';
|
||||
import { z } from "zod";
|
||||
import FirecrawlApp from './firecrawl/src/index'; //'@mendable/firecrawl-js';
|
||||
|
||||
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
||||
|
||||
// Scrape a website:
|
||||
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
|
||||
console.log(scrapeResult.data.content)
|
||||
|
||||
if (scrapeResult.data) {
|
||||
console.log(scrapeResult.data.markdown)
|
||||
}
|
||||
|
||||
// Crawl a website:
|
||||
const idempotencyKey = uuidv4(); // optional
|
||||
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey);
|
||||
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
|
||||
console.log(crawlResult)
|
||||
|
||||
const jobId = await crawlResult['jobId'];
|
||||
@ -19,67 +19,15 @@ console.log(jobId);
|
||||
let job;
|
||||
while (true) {
|
||||
job = await app.checkCrawlStatus(jobId);
|
||||
if (job.status == 'completed') {
|
||||
if (job.status === 'completed') {
|
||||
break;
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
|
||||
}
|
||||
|
||||
console.log(job.data[0].content);
|
||||
|
||||
// Search for a query:
|
||||
const query = 'what is mendable?'
|
||||
const searchResult = await app.search(query)
|
||||
console.log(searchResult)
|
||||
|
||||
// LLM Extraction:
|
||||
// Define schema to extract contents into using zod schema
|
||||
const zodSchema = z.object({
|
||||
top: z
|
||||
.array(
|
||||
z.object({
|
||||
title: z.string(),
|
||||
points: z.number(),
|
||||
by: z.string(),
|
||||
commentsURL: z.string(),
|
||||
})
|
||||
)
|
||||
.length(5)
|
||||
.describe("Top 5 stories on Hacker News"),
|
||||
});
|
||||
|
||||
let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
||||
extractorOptions: { extractionSchema: zodSchema },
|
||||
});
|
||||
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
||||
|
||||
// Define schema to extract contents into using json schema
|
||||
const jsonSchema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"top": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"points": {"type": "number"},
|
||||
"by": {"type": "string"},
|
||||
"commentsURL": {"type": "string"}
|
||||
},
|
||||
"required": ["title", "points", "by", "commentsURL"]
|
||||
},
|
||||
"minItems": 5,
|
||||
"maxItems": 5,
|
||||
"description": "Top 5 stories on Hacker News"
|
||||
}
|
||||
},
|
||||
"required": ["top"]
|
||||
if (job.data) {
|
||||
console.log(job.data[0].markdown);
|
||||
}
|
||||
|
||||
llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
||||
extractorOptions: { extractionSchema: jsonSchema },
|
||||
});
|
||||
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
||||
const mapResult = await app.map('https://firecrawl.dev');
|
||||
console.log(mapResult)
|
||||
|
@ -1,5 +1,5 @@
|
||||
import FirecrawlApp, { JobStatusResponse } from './firecrawl/src/index' //'@mendable/firecrawl-js';
|
||||
import { z } from "zod";
|
||||
import FirecrawlApp from './firecrawl/src/index' //'@mendable/firecrawl-js';
|
||||
import { CrawlStatusResponse } from './firecrawl/src/index';
|
||||
|
||||
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
||||
|
||||
@ -7,7 +7,7 @@ const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
||||
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
|
||||
|
||||
if (scrapeResult.data) {
|
||||
console.log(scrapeResult.data.content)
|
||||
console.log(scrapeResult.data.markdown)
|
||||
}
|
||||
|
||||
// Crawl a website:
|
||||
@ -17,9 +17,9 @@ console.log(crawlResult)
|
||||
const jobId: string = await crawlResult['jobId'];
|
||||
console.log(jobId);
|
||||
|
||||
let job: JobStatusResponse;
|
||||
let job: CrawlStatusResponse;
|
||||
while (true) {
|
||||
job = await app.checkCrawlStatus(jobId);
|
||||
job = await app.checkCrawlStatus(jobId) as CrawlStatusResponse;
|
||||
if (job.status === 'completed') {
|
||||
break;
|
||||
}
|
||||
@ -27,66 +27,8 @@ while (true) {
|
||||
}
|
||||
|
||||
if (job.data) {
|
||||
console.log(job.data[0].content);
|
||||
}
|
||||
|
||||
// Search for a query:
|
||||
const query = 'what is mendable?'
|
||||
const searchResult = await app.search(query)
|
||||
|
||||
// LLM Extraction:
|
||||
// Define schema to extract contents into using zod schema
|
||||
const zodSchema = z.object({
|
||||
top: z
|
||||
.array(
|
||||
z.object({
|
||||
title: z.string(),
|
||||
points: z.number(),
|
||||
by: z.string(),
|
||||
commentsURL: z.string(),
|
||||
})
|
||||
)
|
||||
.length(5)
|
||||
.describe("Top 5 stories on Hacker News"),
|
||||
});
|
||||
|
||||
let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
||||
extractorOptions: { extractionSchema: zodSchema },
|
||||
});
|
||||
|
||||
if (llmExtractionResult.data) {
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
||||
}
|
||||
|
||||
// Define schema to extract contents into using json schema
|
||||
const jsonSchema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"top": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"points": {"type": "number"},
|
||||
"by": {"type": "string"},
|
||||
"commentsURL": {"type": "string"}
|
||||
},
|
||||
"required": ["title", "points", "by", "commentsURL"]
|
||||
},
|
||||
"minItems": 5,
|
||||
"maxItems": 5,
|
||||
"description": "Top 5 stories on Hacker News"
|
||||
}
|
||||
},
|
||||
"required": ["top"]
|
||||
}
|
||||
|
||||
llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
||||
extractorOptions: { extractionSchema: jsonSchema },
|
||||
});
|
||||
|
||||
if (llmExtractionResult.data) {
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
||||
console.log(job.data[0].markdown);
|
||||
}
|
||||
|
||||
const mapResult = await app.map('https://firecrawl.dev');
|
||||
console.log(mapResult)
|
||||
|
85
apps/js-sdk/exampleV0.js
Normal file
85
apps/js-sdk/exampleV0.js
Normal file
@ -0,0 +1,85 @@
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import FirecrawlApp from '@mendable/firecrawl-js';
|
||||
import { z } from "zod";
|
||||
|
||||
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
||||
|
||||
// Scrape a website:
|
||||
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
|
||||
console.log(scrapeResult.data.content)
|
||||
|
||||
// Crawl a website:
|
||||
const idempotencyKey = uuidv4(); // optional
|
||||
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey);
|
||||
console.log(crawlResult)
|
||||
|
||||
const jobId = await crawlResult['jobId'];
|
||||
console.log(jobId);
|
||||
|
||||
let job;
|
||||
while (true) {
|
||||
job = await app.checkCrawlStatus(jobId);
|
||||
if (job.status == 'completed') {
|
||||
break;
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
|
||||
}
|
||||
|
||||
console.log(job.data[0].content);
|
||||
|
||||
// Search for a query:
|
||||
const query = 'what is mendable?'
|
||||
const searchResult = await app.search(query)
|
||||
console.log(searchResult)
|
||||
|
||||
// LLM Extraction:
|
||||
// Define schema to extract contents into using zod schema
|
||||
const zodSchema = z.object({
|
||||
top: z
|
||||
.array(
|
||||
z.object({
|
||||
title: z.string(),
|
||||
points: z.number(),
|
||||
by: z.string(),
|
||||
commentsURL: z.string(),
|
||||
})
|
||||
)
|
||||
.length(5)
|
||||
.describe("Top 5 stories on Hacker News"),
|
||||
});
|
||||
|
||||
let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
||||
extractorOptions: { extractionSchema: zodSchema },
|
||||
});
|
||||
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
||||
|
||||
// Define schema to extract contents into using json schema
|
||||
const jsonSchema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"top": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"points": {"type": "number"},
|
||||
"by": {"type": "string"},
|
||||
"commentsURL": {"type": "string"}
|
||||
},
|
||||
"required": ["title", "points", "by", "commentsURL"]
|
||||
},
|
||||
"minItems": 5,
|
||||
"maxItems": 5,
|
||||
"description": "Top 5 stories on Hacker News"
|
||||
}
|
||||
},
|
||||
"required": ["top"]
|
||||
}
|
||||
|
||||
llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
||||
extractorOptions: { extractionSchema: jsonSchema },
|
||||
});
|
||||
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
95
apps/js-sdk/exampleV0.ts
Normal file
95
apps/js-sdk/exampleV0.ts
Normal file
@ -0,0 +1,95 @@
|
||||
import FirecrawlApp, { ScrapeResponseV0, CrawlStatusResponseV0, SearchResponseV0 } from './firecrawl/src/index' //'@mendable/firecrawl-js';
|
||||
import { z } from "zod";
|
||||
|
||||
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY", version: "v0"});
|
||||
|
||||
// Scrape a website:
|
||||
const scrapeResult = await app.scrapeUrl('firecrawl.dev') as ScrapeResponseV0;
|
||||
|
||||
if (scrapeResult.data) {
|
||||
console.log(scrapeResult.data.content)
|
||||
}
|
||||
|
||||
// Crawl a website:
|
||||
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
|
||||
console.log(crawlResult)
|
||||
|
||||
const jobId: string = await crawlResult['jobId'];
|
||||
console.log(jobId);
|
||||
|
||||
let job: CrawlStatusResponseV0;
|
||||
while (true) {
|
||||
job = await app.checkCrawlStatus(jobId) as CrawlStatusResponseV0;
|
||||
if (job.status === 'completed') {
|
||||
break;
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
|
||||
}
|
||||
|
||||
if (job.data) {
|
||||
console.log(job.data[0].content);
|
||||
}
|
||||
|
||||
// Search for a query:
|
||||
const query = 'what is mendable?'
|
||||
const searchResult = await app.search(query) as SearchResponseV0;
|
||||
if (searchResult.data) {
|
||||
console.log(searchResult.data[0].content)
|
||||
}
|
||||
|
||||
// LLM Extraction:
|
||||
// Define schema to extract contents into using zod schema
|
||||
const zodSchema = z.object({
|
||||
top: z
|
||||
.array(
|
||||
z.object({
|
||||
title: z.string(),
|
||||
points: z.number(),
|
||||
by: z.string(),
|
||||
commentsURL: z.string(),
|
||||
})
|
||||
)
|
||||
.length(5)
|
||||
.describe("Top 5 stories on Hacker News"),
|
||||
});
|
||||
|
||||
let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
||||
extractorOptions: { extractionSchema: zodSchema },
|
||||
});
|
||||
|
||||
if (llmExtractionResult.data) {
|
||||
console.log(llmExtractionResult.data[0].llm_extraction);
|
||||
}
|
||||
|
||||
// Define schema to extract contents into using json schema
|
||||
const jsonSchema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"top": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"points": {"type": "number"},
|
||||
"by": {"type": "string"},
|
||||
"commentsURL": {"type": "string"}
|
||||
},
|
||||
"required": ["title", "points", "by", "commentsURL"]
|
||||
},
|
||||
"minItems": 5,
|
||||
"maxItems": 5,
|
||||
"description": "Top 5 stories on Hacker News"
|
||||
}
|
||||
},
|
||||
"required": ["top"]
|
||||
}
|
||||
|
||||
llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
||||
extractorOptions: { extractionSchema: jsonSchema },
|
||||
});
|
||||
|
||||
if (llmExtractionResult.data) {
|
||||
console.log(llmExtractionResult.data[0].llm_extraction);
|
||||
}
|
||||
|
@ -1,8 +1,12 @@
|
||||
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||
import { z } from "zod";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
|
||||
/**
|
||||
* Configuration interface for FirecrawlApp.
|
||||
* @param apiKey - Optional API key for authentication.
|
||||
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
||||
* @param version - API version, either 'v0' or 'v1'.
|
||||
*/
|
||||
export interface FirecrawlAppConfig {
|
||||
apiKey?: string | null;
|
||||
@ -12,6 +16,7 @@ export interface FirecrawlAppConfig {
|
||||
|
||||
/**
|
||||
* Metadata for a Firecrawl document.
|
||||
* Includes various optional properties for document metadata.
|
||||
*/
|
||||
export interface FirecrawlDocumentMetadata {
|
||||
title?: string;
|
||||
@ -46,43 +51,15 @@ export interface FirecrawlDocumentMetadata {
|
||||
sourceURL?: string;
|
||||
statusCode?: number;
|
||||
error?: string;
|
||||
[key: string]: any;
|
||||
[key: string]: any; // Allows for additional metadata properties not explicitly defined.
|
||||
}
|
||||
|
||||
/**
|
||||
* Metadata for a Firecrawl document on v0.
|
||||
* Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
|
||||
*/
|
||||
export interface FirecrawlDocumentMetadataV0 {
|
||||
title?: string;
|
||||
description?: string;
|
||||
language?: string;
|
||||
keywords?: string;
|
||||
robots?: string;
|
||||
ogTitle?: string;
|
||||
ogDescription?: string;
|
||||
ogUrl?: string;
|
||||
ogImage?: string;
|
||||
ogAudio?: string;
|
||||
ogDeterminer?: string;
|
||||
ogLocale?: string;
|
||||
ogLocaleAlternate?: string[];
|
||||
ogSiteName?: string;
|
||||
ogVideo?: string;
|
||||
dctermsCreated?: string;
|
||||
dcDateCreated?: string;
|
||||
dcDate?: string;
|
||||
dctermsType?: string;
|
||||
dcType?: string;
|
||||
dctermsAudience?: string;
|
||||
dctermsSubject?: string;
|
||||
dcSubject?: string;
|
||||
dcDescription?: string;
|
||||
dctermsKeywords?: string;
|
||||
modifiedTime?: string;
|
||||
publishedTime?: string;
|
||||
articleTag?: string;
|
||||
articleSection?: string;
|
||||
sourceURL?: string;
|
||||
// Similar properties as FirecrawlDocumentMetadata with additional v0 specific adjustments
|
||||
pageStatusCode?: number;
|
||||
pageError?: string;
|
||||
[key: string]: any;
|
||||
@ -90,6 +67,7 @@ export interface FirecrawlDocumentMetadataV0 {
|
||||
|
||||
/**
|
||||
* Document interface for Firecrawl.
|
||||
* Represents a document retrieved or processed by Firecrawl.
|
||||
*/
|
||||
export interface FirecrawlDocument {
|
||||
url?: string;
|
||||
@ -103,6 +81,7 @@ export interface FirecrawlDocument {
|
||||
|
||||
/**
|
||||
* Document interface for Firecrawl on v0.
|
||||
* Represents a document specifically for API version v0 with additional properties.
|
||||
*/
|
||||
export interface FirecrawlDocumentV0 {
|
||||
id?: string;
|
||||
@ -121,8 +100,49 @@ export interface FirecrawlDocumentV0 {
|
||||
index?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for scraping operations.
|
||||
* Defines the options and configurations available for scraping web content.
|
||||
*/
|
||||
export interface ScrapeParams {
|
||||
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
||||
headers?: Record<string, string>;
|
||||
includeTags?: string[];
|
||||
excludeTags?: string[];
|
||||
onlyMainContent?: boolean;
|
||||
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
||||
waitFor?: number;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for scraping operations on v0.
|
||||
* Includes page and extractor options specific to API version v0.
|
||||
*/
|
||||
export interface ScrapeParamsV0 {
|
||||
pageOptions?: {
|
||||
headers?: Record<string, string>;
|
||||
includeHtml?: boolean;
|
||||
includeRawHtml?: boolean;
|
||||
onlyIncludeTags?: string[];
|
||||
onlyMainContent?: boolean;
|
||||
removeTags?: string[];
|
||||
replaceAllPathsWithAbsolutePaths?: boolean;
|
||||
screenshot?: boolean;
|
||||
fullPageScreenshot?: boolean;
|
||||
waitFor?: number;
|
||||
};
|
||||
extractorOptions?: {
|
||||
mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
|
||||
extractionPrompt?: string;
|
||||
extractionSchema?: Record<string, any> | z.ZodSchema | any;
|
||||
};
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
* Defines the structure of the response received after a scraping operation.
|
||||
*/
|
||||
export interface ScrapeResponse {
|
||||
success: boolean;
|
||||
@ -133,6 +153,7 @@ export interface ScrapeResponse {
|
||||
|
||||
/**
|
||||
* Response interface for scraping operations on v0.
|
||||
* Similar to ScrapeResponse but tailored for responses from API version v0.
|
||||
*/
|
||||
export interface ScrapeResponseV0 {
|
||||
success: boolean;
|
||||
@ -141,38 +162,71 @@ export interface ScrapeResponseV0 {
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for searching operations.
|
||||
* Parameters for crawling operations.
|
||||
* Includes options for both scraping and mapping during a crawl.
|
||||
*/
|
||||
export interface SearchResponseV0 {
|
||||
success: boolean;
|
||||
data?: FirecrawlDocument[];
|
||||
error?: string;
|
||||
export interface CrawlParams {
|
||||
scrapeOptions?: ScrapeParams;
|
||||
crawlerOptions?: MapParams;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for crawling operations on v0.
|
||||
* Tailored for API version v0, includes specific options for crawling.
|
||||
*/
|
||||
export interface CrawlParamsV0 {
|
||||
crawlerOptions?: {
|
||||
includes?: string[];
|
||||
excludes?: string[];
|
||||
generateImgAltText?: boolean;
|
||||
returnOnlyUrls?: boolean;
|
||||
maxDepth?: number;
|
||||
mode?: "default" | "fast";
|
||||
ignoreSitemap?: boolean;
|
||||
limit?: number;
|
||||
allowBackwardCrawling?: boolean;
|
||||
allowExternalContentLinks?: boolean;
|
||||
};
|
||||
pageOptions?: {
|
||||
headers?: Record<string, string>;
|
||||
includeHtml?: boolean;
|
||||
includeRawHtml?: boolean;
|
||||
onlyIncludeTags?: string[];
|
||||
onlyMainContent?: boolean;
|
||||
removeTags?: string[];
|
||||
replaceAllPathsWithAbsolutePaths?: boolean;
|
||||
screenshot?: boolean;
|
||||
fullPageScreenshot?: boolean;
|
||||
waitFor?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for crawling operations.
|
||||
* Defines the structure of the response received after initiating a crawl.
|
||||
*/
|
||||
export interface CrawlResponse {
|
||||
success: boolean;
|
||||
jobId?: string;
|
||||
data?: FirecrawlDocument[];
|
||||
url?: string;
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for crawling operations on v0.
|
||||
* Similar to CrawlResponse but tailored for responses from API version v0.
|
||||
*/
|
||||
export interface CrawlResponseV0 {
|
||||
success: boolean;
|
||||
jobId?: string;
|
||||
data?: FirecrawlDocument[];
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for job status checks.
|
||||
* Provides detailed status of a crawl job including progress and results.
|
||||
*/
|
||||
export interface JobStatusResponse {
|
||||
export interface CrawlStatusResponse {
|
||||
success: boolean;
|
||||
totalCount: number;
|
||||
creditsUsed: number;
|
||||
@ -185,8 +239,9 @@ export interface JobStatusResponse {
|
||||
|
||||
/**
|
||||
* Response interface for job status checks on v0.
|
||||
* Tailored for API version v0, provides status and partial data of a crawl job.
|
||||
*/
|
||||
export interface JobStatusResponseV0 {
|
||||
export interface CrawlStatusResponseV0 {
|
||||
success: boolean;
|
||||
status: string;
|
||||
current?: number;
|
||||
@ -199,18 +254,58 @@ export interface JobStatusResponseV0 {
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic parameter interface.
|
||||
* Parameters for mapping operations.
|
||||
* Defines options for mapping URLs during a crawl.
|
||||
*/
|
||||
export interface Params {
|
||||
[key: string]: any;
|
||||
extractorOptions?: {
|
||||
extractionSchema: z.ZodSchema | any;
|
||||
mode?: "llm-extraction";
|
||||
extractionPrompt?: string;
|
||||
export interface MapParams {
|
||||
includePaths?: string[]
|
||||
excludePaths?: string[]
|
||||
maxDepth?: number
|
||||
limit?: number
|
||||
allowBackwardLinks?: boolean
|
||||
allowExternalLinks?: boolean
|
||||
ignoreSitemap?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for mapping operations.
|
||||
* Defines the structure of the response received after a mapping operation.
|
||||
*/
|
||||
export interface MapResponse {
|
||||
success: boolean;
|
||||
data?: string[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for searching operations on v0.
|
||||
* Tailored for API version v0, includes specific options for searching content.
|
||||
*/
|
||||
export interface SearchParamsV0 {
|
||||
pageOptions?: {
|
||||
onlyMainContent?: boolean;
|
||||
fetchPageContent?: boolean;
|
||||
includeHtml?: boolean;
|
||||
includeRawHtml?: boolean;
|
||||
};
|
||||
searchOptions?: {
|
||||
limit?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Response interface for searching operations on v0.
|
||||
* Defines the structure of the response received after a search operation on v0.
|
||||
*/
|
||||
export interface SearchResponseV0 {
|
||||
success: boolean;
|
||||
data?: FirecrawlDocumentV0[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main class for interacting with the Firecrawl API.
|
||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||
*/
|
||||
export default class FirecrawlApp {
|
||||
private apiKey: string;
|
||||
@ -219,7 +314,7 @@ export default class FirecrawlApp {
|
||||
|
||||
/**
|
||||
* Initializes a new instance of the FirecrawlApp class.
|
||||
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
||||
* @param config - Configuration options for the FirecrawlApp instance.
|
||||
*/
|
||||
constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) {
|
||||
this.apiKey = apiKey || "";
|
||||
@ -232,26 +327,21 @@ export default class FirecrawlApp {
|
||||
|
||||
/**
|
||||
* Scrapes a URL using the Firecrawl API.
|
||||
* @param {string} url - The URL to scrape.
|
||||
* @param {Params | null} params - Additional parameters for the scrape request.
|
||||
* @returns {Promise<ScrapeResponse | ScrapeResponseV0>} The response from the scrape operation.
|
||||
* @param url - The URL to scrape.
|
||||
* @param params - Additional parameters for the scrape request.
|
||||
* @returns The response from the scrape operation.
|
||||
*/
|
||||
async scrapeUrl(
|
||||
url: string,
|
||||
params: Params | null = null,
|
||||
version: "v0" | "v1" = "v1"
|
||||
params?: ScrapeParams | ScrapeParamsV0
|
||||
): Promise<ScrapeResponse | ScrapeResponseV0> {
|
||||
if (version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
const headers: AxiosRequestHeaders = {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
let jsonData: Params = { url, ...params };
|
||||
if (params?.extractorOptions?.extractionSchema) {
|
||||
let schema = params.extractorOptions.extractionSchema;
|
||||
let jsonData: any = { url, ...params };
|
||||
if (jsonData?.extractorOptions?.extractionSchema) {
|
||||
let schema = jsonData.extractorOptions.extractionSchema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
schema = zodToJsonSchema(schema);
|
||||
@ -259,9 +349,9 @@ export default class FirecrawlApp {
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extractorOptions: {
|
||||
...params.extractorOptions,
|
||||
...jsonData.extractorOptions,
|
||||
extractionSchema: schema,
|
||||
mode: params.extractorOptions.mode || "llm-extraction",
|
||||
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
||||
},
|
||||
};
|
||||
}
|
||||
@ -274,7 +364,11 @@ export default class FirecrawlApp {
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
return responseData;
|
||||
if (this.version == 'v0') {
|
||||
return responseData as ScrapeResponseV0;
|
||||
} else {
|
||||
return responseData as ScrapeResponse;
|
||||
}
|
||||
} else {
|
||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||
}
|
||||
@ -289,13 +383,13 @@ export default class FirecrawlApp {
|
||||
|
||||
/**
|
||||
* Searches for a query using the Firecrawl API.
|
||||
* @param {string} query - The query to search for.
|
||||
* @param {Params | null} params - Additional parameters for the search request.
|
||||
* @returns {Promise<SearchResponse>} The response from the search operation.
|
||||
* @param query - The query to search for.
|
||||
* @param params - Additional parameters for the search request.
|
||||
* @returns The response from the search operation.
|
||||
*/
|
||||
async search(
|
||||
query: string,
|
||||
params: Params | null = null
|
||||
params?: SearchParamsV0
|
||||
): Promise<SearchResponseV0> {
|
||||
if (this.version === "v1") {
|
||||
throw new Error("Search is not supported in v1");
|
||||
@ -305,7 +399,7 @@ export default class FirecrawlApp {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
let jsonData: Params = { query };
|
||||
let jsonData: any = { query };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
}
|
||||
@ -333,30 +427,22 @@ export default class FirecrawlApp {
|
||||
|
||||
/**
|
||||
* Initiates a crawl job for a URL using the Firecrawl API.
|
||||
* @param {string} url - The URL to crawl.
|
||||
* @param {Params | null} params - Additional parameters for the crawl request.
|
||||
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
||||
* @param {number} pollInterval - Time in seconds for job status checks.
|
||||
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
||||
* @param url - The URL to crawl.
|
||||
* @param params - Additional parameters for the crawl request.
|
||||
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
||||
* @param pollInterval - Time in seconds for job status checks.
|
||||
* @param idempotencyKey - Optional idempotency key for the request.
|
||||
* @returns The response from the crawl operation.
|
||||
*/
|
||||
async crawlUrl(
|
||||
url: string,
|
||||
params: Params | null = null,
|
||||
params?: CrawlParams | CrawlParamsV0,
|
||||
waitUntilDone: boolean = true,
|
||||
pollInterval: number = 2,
|
||||
idempotencyKey?: string,
|
||||
version: "v0" | "v1" = "v1"
|
||||
): Promise<CrawlResponse | CrawlResponseV0 | JobStatusResponse | JobStatusResponseV0> {
|
||||
if (version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
idempotencyKey?: string
|
||||
): Promise<CrawlResponse | CrawlResponseV0 | CrawlStatusResponse | CrawlStatusResponseV0> {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData: Params = { url };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
}
|
||||
let jsonData: any = { url, ...params };
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
this.apiUrl + `/${this.version}/crawl`,
|
||||
@ -382,10 +468,10 @@ export default class FirecrawlApp {
|
||||
|
||||
/**
|
||||
* Checks the status of a crawl job using the Firecrawl API.
|
||||
* @param {string} jobId - The job ID of the crawl operation.
|
||||
* @returns {Promise<JobStatusResponse | JobStatusResponseV0>} The response containing the job status.
|
||||
* @param jobId - The job ID of the crawl operation.
|
||||
* @returns The response containing the job status.
|
||||
*/
|
||||
async checkCrawlStatus(jobId: string): Promise<JobStatusResponse | JobStatusResponseV0> {
|
||||
async checkCrawlStatus(jobId: string): Promise<CrawlStatusResponse | CrawlStatusResponseV0> {
|
||||
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
||||
try {
|
||||
const response: AxiosResponse = await this.getRequest(
|
||||
@ -395,38 +481,80 @@ export default class FirecrawlApp {
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
return {
|
||||
success: true,
|
||||
status: response.data.status,
|
||||
current: response.data.current,
|
||||
current_url: response.data.current_url,
|
||||
current_step: response.data.current_step,
|
||||
total: response.data.total,
|
||||
data: response.data.data,
|
||||
partial_data: !response.data.data
|
||||
? response.data.partial_data
|
||||
: undefined,
|
||||
};
|
||||
if (this.version == 'v0') {
|
||||
return {
|
||||
success: true,
|
||||
status: response.data.status,
|
||||
current: response.data.current,
|
||||
current_url: response.data.current_url,
|
||||
current_step: response.data.current_step,
|
||||
total: response.data.total,
|
||||
data: response.data.data,
|
||||
partial_data: !response.data.data
|
||||
? response.data.partial_data
|
||||
: undefined,
|
||||
} as CrawlStatusResponseV0;
|
||||
} else if (this.version == 'v1') {
|
||||
return {
|
||||
success: true,
|
||||
status: response.data.status,
|
||||
data: response.data.data,
|
||||
error: response.data.error,
|
||||
} as CrawlStatusResponse;
|
||||
}
|
||||
} else {
|
||||
this.handleError(response, "check crawl status");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return {
|
||||
success: false,
|
||||
status: "unknown",
|
||||
current: 0,
|
||||
current_url: "",
|
||||
current_step: "",
|
||||
total: 0,
|
||||
error: "Internal server error.",
|
||||
};
|
||||
|
||||
if (this.version == 'v0') {
|
||||
return {
|
||||
success: false,
|
||||
status: "unknown",
|
||||
current: 0,
|
||||
current_url: "",
|
||||
current_step: "",
|
||||
total: 0,
|
||||
error: "Internal server error.",
|
||||
} as CrawlStatusResponseV0;
|
||||
} else {
|
||||
return {
|
||||
success: false,
|
||||
error: "Internal server error.",
|
||||
} as CrawlStatusResponse;
|
||||
}
|
||||
}
|
||||
|
||||
async map(url: string, params?: MapParams): Promise<MapResponse> {
|
||||
if (this.version == 'v0') {
|
||||
throw new Error("Map is not supported in v0");
|
||||
}
|
||||
const headers = this.prepareHeaders();
|
||||
let jsonData: { url: string } & MapParams = { url, ...params };
|
||||
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
this.apiUrl + `/${this.version}/map`,
|
||||
jsonData,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
return response.data as MapResponse;
|
||||
} else {
|
||||
this.handleError(response, "map");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: "Internal server error." } as MapResponse;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares the headers for an API request.
|
||||
* @returns {AxiosRequestHeaders} The prepared headers.
|
||||
* @param idempotencyKey - Optional key to ensure idempotency.
|
||||
* @returns The prepared headers.
|
||||
*/
|
||||
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
|
||||
return {
|
||||
@ -438,14 +566,14 @@ export default class FirecrawlApp {
|
||||
|
||||
/**
|
||||
* Sends a POST request to the specified URL.
|
||||
* @param {string} url - The URL to send the request to.
|
||||
* @param {Params} data - The data to send in the request.
|
||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
||||
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
||||
* @param url - The URL to send the request to.
|
||||
* @param data - The data to send in the request.
|
||||
* @param headers - The headers for the request.
|
||||
* @returns The response from the POST request.
|
||||
*/
|
||||
postRequest(
|
||||
url: string,
|
||||
data: Params,
|
||||
data: any,
|
||||
headers: AxiosRequestHeaders
|
||||
): Promise<AxiosResponse> {
|
||||
return axios.post(url, data, { headers });
|
||||
@ -453,9 +581,9 @@ export default class FirecrawlApp {
|
||||
|
||||
/**
|
||||
* Sends a GET request to the specified URL.
|
||||
* @param {string} url - The URL to send the request to.
|
||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
||||
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
||||
* @param url - The URL to send the request to.
|
||||
* @param headers - The headers for the request.
|
||||
* @returns The response from the GET request.
|
||||
*/
|
||||
getRequest(
|
||||
url: string,
|
||||
@ -466,10 +594,10 @@ export default class FirecrawlApp {
|
||||
|
||||
/**
|
||||
* Monitors the status of a crawl job until completion or failure.
|
||||
* @param {string} jobId - The job ID of the crawl operation.
|
||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
||||
* @returns {Promise<any>} The final job status or data.
|
||||
* @param jobId - The job ID of the crawl operation.
|
||||
* @param headers - The headers for the request.
|
||||
* @param checkInterval - Interval in seconds for job status checks.
|
||||
* @returns The final job status or data.
|
||||
*/
|
||||
async monitorJobStatus(
|
||||
jobId: string,
|
||||
|
@ -11,7 +11,7 @@
|
||||
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
|
||||
|
||||
/* Language and Environment */
|
||||
"target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
|
||||
"target": "es2020", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
|
||||
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
|
||||
// "jsx": "preserve", /* Specify what JSX code is generated. */
|
||||
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
|
||||
@ -25,9 +25,9 @@
|
||||
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
|
||||
|
||||
/* Modules */
|
||||
"module": "NodeNext", /* Specify what module code is generated. */
|
||||
"module": "commonjs", /* Specify what module code is generated. */
|
||||
"rootDir": "./src", /* Specify the root folder within your source files. */
|
||||
"moduleResolution": "nodenext", /* Specify how TypeScript looks up a file from a given module specifier. */
|
||||
"moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */
|
||||
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
|
||||
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
|
||||
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
|
||||
|
Loading…
x
Reference in New Issue
Block a user