Nick: llm extract support on node sdk

This commit is contained in:
Nicolas 2024-08-30 13:43:19 -03:00
parent 45e33563eb
commit a2881e9288
4 changed files with 69 additions and 24 deletions

View File

@ -35,7 +35,7 @@ class FirecrawlApp {
Authorization: `Bearer ${this.apiKey}`, Authorization: `Bearer ${this.apiKey}`,
}; };
let jsonData = { url, ...params }; let jsonData = { url, ...params };
if (jsonData?.extractorOptions?.extractionSchema) { if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
let schema = jsonData.extractorOptions.extractionSchema; let schema = jsonData.extractorOptions.extractionSchema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof zod_1.z.ZodSchema) { if (schema instanceof zod_1.z.ZodSchema) {
@ -50,6 +50,20 @@ class FirecrawlApp {
}, },
}; };
} }
else if (this.version === 'v1' && jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof zod_1.z.ZodSchema) {
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
}
jsonData = {
...jsonData,
extract: {
...jsonData.extract,
schema: schema,
},
};
}
try { try {
const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers }); const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
if (response.status === 200) { if (response.status === 200) {

View File

@ -30,7 +30,7 @@ export default class FirecrawlApp {
Authorization: `Bearer ${this.apiKey}`, Authorization: `Bearer ${this.apiKey}`,
}; };
let jsonData = { url, ...params }; let jsonData = { url, ...params };
if (jsonData?.extractorOptions?.extractionSchema) { if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
let schema = jsonData.extractorOptions.extractionSchema; let schema = jsonData.extractorOptions.extractionSchema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof z.ZodSchema) { if (schema instanceof z.ZodSchema) {
@ -45,6 +45,20 @@ export default class FirecrawlApp {
}, },
}; };
} }
else if (this.version === 'v1' && jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof z.ZodSchema) {
schema = zodToJsonSchema(schema);
}
jsonData = {
...jsonData,
extract: {
...jsonData.extract,
schema: schema,
},
};
}
try { try {
const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers }); const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
if (response.status === 200) { if (response.status === 200) {

View File

@ -106,7 +106,7 @@ export interface FirecrawlDocumentV0 {
* Defines the options and configurations available for scraping web content. * Defines the options and configurations available for scraping web content.
*/ */
export interface ScrapeParams { export interface ScrapeParams {
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[]; formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract")[];
headers?: Record<string, string>; headers?: Record<string, string>;
includeTags?: string[]; includeTags?: string[];
excludeTags?: string[]; excludeTags?: string[];
@ -114,6 +114,11 @@ export interface ScrapeParams {
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile"; screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
waitFor?: number; waitFor?: number;
timeout?: number; timeout?: number;
extract?: {
prompt?: string;
schema?: z.ZodSchema | any;
systemPrompt?: string;
};
} }
/** /**
@ -345,13 +350,12 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
Authorization: `Bearer ${this.apiKey}`, Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders; } as AxiosRequestHeaders;
let jsonData: any = { url, ...params }; let jsonData: any = { url, ...params };
if (jsonData?.extractorOptions?.extractionSchema || jsonData?.extract?.schema) { if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
let schema = jsonData.extractorOptions?.extractionSchema || jsonData.extract?.schema; let schema = jsonData.extractorOptions.extractionSchema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof z.ZodSchema) { if (schema instanceof z.ZodSchema || schema instanceof z.ZodObject) {
schema = zodToJsonSchema(schema); schema = zodToJsonSchema(schema);
} }
if(this.version === 'v0') {
jsonData = { jsonData = {
...jsonData, ...jsonData,
extractorOptions: { extractorOptions: {
@ -360,7 +364,15 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
mode: jsonData.extractorOptions.mode || "llm-extraction", mode: jsonData.extractorOptions.mode || "llm-extraction",
}, },
}; };
} else { } else if (this.version === 'v1' && jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
// Try parsing the schema as a Zod schema
try {
schema = zodToJsonSchema(schema);
} catch (error) {
}
jsonData = { jsonData = {
...jsonData, ...jsonData,
extract: { extract: {
@ -369,7 +381,6 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
}, },
}; };
} }
}
try { try {
const response: AxiosResponse = await axios.post( const response: AxiosResponse = await axios.post(
this.apiUrl + `/${this.version}/scrape`, this.apiUrl + `/${this.version}/scrape`,

View File

@ -69,6 +69,7 @@ export interface FirecrawlDocument {
html?: string; html?: string;
rawHtml?: string; rawHtml?: string;
links?: string[]; links?: string[];
extract?: Record<any, any>;
screenshot?: string; screenshot?: string;
metadata: FirecrawlDocumentMetadata; metadata: FirecrawlDocumentMetadata;
} }
@ -97,7 +98,7 @@ export interface FirecrawlDocumentV0 {
* Defines the options and configurations available for scraping web content. * Defines the options and configurations available for scraping web content.
*/ */
export interface ScrapeParams { export interface ScrapeParams {
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[]; formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract")[];
headers?: Record<string, string>; headers?: Record<string, string>;
includeTags?: string[]; includeTags?: string[];
excludeTags?: string[]; excludeTags?: string[];
@ -105,6 +106,11 @@ export interface ScrapeParams {
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile"; screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
waitFor?: number; waitFor?: number;
timeout?: number; timeout?: number;
extract?: {
prompt?: string;
schema?: z.ZodSchema | any;
systemPrompt?: string;
};
} }
/** /**
* Parameters for scraping operations on v0. * Parameters for scraping operations on v0.