mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 20:45:57 +08:00
Nick: llm extract support on node sdk
This commit is contained in:
parent
45e33563eb
commit
a2881e9288
@ -35,7 +35,7 @@ class FirecrawlApp {
|
|||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
};
|
};
|
||||||
let jsonData = { url, ...params };
|
let jsonData = { url, ...params };
|
||||||
if (jsonData?.extractorOptions?.extractionSchema) {
|
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
|
||||||
let schema = jsonData.extractorOptions.extractionSchema;
|
let schema = jsonData.extractorOptions.extractionSchema;
|
||||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||||
if (schema instanceof zod_1.z.ZodSchema) {
|
if (schema instanceof zod_1.z.ZodSchema) {
|
||||||
@ -50,6 +50,20 @@ class FirecrawlApp {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
else if (this.version === 'v1' && jsonData?.extract?.schema) {
|
||||||
|
let schema = jsonData.extract.schema;
|
||||||
|
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||||
|
if (schema instanceof zod_1.z.ZodSchema) {
|
||||||
|
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
||||||
|
}
|
||||||
|
jsonData = {
|
||||||
|
...jsonData,
|
||||||
|
extract: {
|
||||||
|
...jsonData.extract,
|
||||||
|
schema: schema,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
|
@ -30,7 +30,7 @@ export default class FirecrawlApp {
|
|||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
};
|
};
|
||||||
let jsonData = { url, ...params };
|
let jsonData = { url, ...params };
|
||||||
if (jsonData?.extractorOptions?.extractionSchema) {
|
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
|
||||||
let schema = jsonData.extractorOptions.extractionSchema;
|
let schema = jsonData.extractorOptions.extractionSchema;
|
||||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||||
if (schema instanceof z.ZodSchema) {
|
if (schema instanceof z.ZodSchema) {
|
||||||
@ -45,6 +45,20 @@ export default class FirecrawlApp {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
else if (this.version === 'v1' && jsonData?.extract?.schema) {
|
||||||
|
let schema = jsonData.extract.schema;
|
||||||
|
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||||
|
if (schema instanceof z.ZodSchema) {
|
||||||
|
schema = zodToJsonSchema(schema);
|
||||||
|
}
|
||||||
|
jsonData = {
|
||||||
|
...jsonData,
|
||||||
|
extract: {
|
||||||
|
...jsonData.extract,
|
||||||
|
schema: schema,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
|
@ -106,7 +106,7 @@ export interface FirecrawlDocumentV0 {
|
|||||||
* Defines the options and configurations available for scraping web content.
|
* Defines the options and configurations available for scraping web content.
|
||||||
*/
|
*/
|
||||||
export interface ScrapeParams {
|
export interface ScrapeParams {
|
||||||
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract")[];
|
||||||
headers?: Record<string, string>;
|
headers?: Record<string, string>;
|
||||||
includeTags?: string[];
|
includeTags?: string[];
|
||||||
excludeTags?: string[];
|
excludeTags?: string[];
|
||||||
@ -114,6 +114,11 @@ export interface ScrapeParams {
|
|||||||
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
||||||
waitFor?: number;
|
waitFor?: number;
|
||||||
timeout?: number;
|
timeout?: number;
|
||||||
|
extract?: {
|
||||||
|
prompt?: string;
|
||||||
|
schema?: z.ZodSchema | any;
|
||||||
|
systemPrompt?: string;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -345,13 +350,12 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
} as AxiosRequestHeaders;
|
} as AxiosRequestHeaders;
|
||||||
let jsonData: any = { url, ...params };
|
let jsonData: any = { url, ...params };
|
||||||
if (jsonData?.extractorOptions?.extractionSchema || jsonData?.extract?.schema) {
|
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
|
||||||
let schema = jsonData.extractorOptions?.extractionSchema || jsonData.extract?.schema;
|
let schema = jsonData.extractorOptions.extractionSchema;
|
||||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||||
if (schema instanceof z.ZodSchema) {
|
if (schema instanceof z.ZodSchema || schema instanceof z.ZodObject) {
|
||||||
schema = zodToJsonSchema(schema);
|
schema = zodToJsonSchema(schema);
|
||||||
}
|
}
|
||||||
if(this.version === 'v0') {
|
|
||||||
jsonData = {
|
jsonData = {
|
||||||
...jsonData,
|
...jsonData,
|
||||||
extractorOptions: {
|
extractorOptions: {
|
||||||
@ -360,7 +364,15 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|||||||
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
} else {
|
} else if (this.version === 'v1' && jsonData?.extract?.schema) {
|
||||||
|
let schema = jsonData.extract.schema;
|
||||||
|
|
||||||
|
// Try parsing the schema as a Zod schema
|
||||||
|
try {
|
||||||
|
schema = zodToJsonSchema(schema);
|
||||||
|
} catch (error) {
|
||||||
|
|
||||||
|
}
|
||||||
jsonData = {
|
jsonData = {
|
||||||
...jsonData,
|
...jsonData,
|
||||||
extract: {
|
extract: {
|
||||||
@ -369,7 +381,6 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
|
||||||
try {
|
try {
|
||||||
const response: AxiosResponse = await axios.post(
|
const response: AxiosResponse = await axios.post(
|
||||||
this.apiUrl + `/${this.version}/scrape`,
|
this.apiUrl + `/${this.version}/scrape`,
|
||||||
|
8
apps/js-sdk/firecrawl/types/index.d.ts
vendored
8
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -69,6 +69,7 @@ export interface FirecrawlDocument {
|
|||||||
html?: string;
|
html?: string;
|
||||||
rawHtml?: string;
|
rawHtml?: string;
|
||||||
links?: string[];
|
links?: string[];
|
||||||
|
extract?: Record<any, any>;
|
||||||
screenshot?: string;
|
screenshot?: string;
|
||||||
metadata: FirecrawlDocumentMetadata;
|
metadata: FirecrawlDocumentMetadata;
|
||||||
}
|
}
|
||||||
@ -97,7 +98,7 @@ export interface FirecrawlDocumentV0 {
|
|||||||
* Defines the options and configurations available for scraping web content.
|
* Defines the options and configurations available for scraping web content.
|
||||||
*/
|
*/
|
||||||
export interface ScrapeParams {
|
export interface ScrapeParams {
|
||||||
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract")[];
|
||||||
headers?: Record<string, string>;
|
headers?: Record<string, string>;
|
||||||
includeTags?: string[];
|
includeTags?: string[];
|
||||||
excludeTags?: string[];
|
excludeTags?: string[];
|
||||||
@ -105,6 +106,11 @@ export interface ScrapeParams {
|
|||||||
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
||||||
waitFor?: number;
|
waitFor?: number;
|
||||||
timeout?: number;
|
timeout?: number;
|
||||||
|
extract?: {
|
||||||
|
prompt?: string;
|
||||||
|
schema?: z.ZodSchema | any;
|
||||||
|
systemPrompt?: string;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Parameters for scraping operations on v0.
|
* Parameters for scraping operations on v0.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user