mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 04:59:09 +08:00
Nick: 1.2 - v1 llm extract
This commit is contained in:
parent
055177cf0b
commit
af5cc5f16b
@ -44,7 +44,7 @@ const strictMessage = "Unrecognized key in body -- please review the v1 API docu
|
||||
export const extractOptions = z.object({
|
||||
mode: z.enum(["llm"]).default("llm"),
|
||||
schema: z.any().optional(),
|
||||
systemPrompt: z.string().default("Based on the information on the page, extract the information from the schema."),
|
||||
systemPrompt: z.string().default("Based on the information on the page, extract all the information from the schema. Try to extract all the fields even those that might not be marked as required."),
|
||||
prompt: z.string().optional()
|
||||
}).strict(strictMessage);
|
||||
|
||||
|
@ -5,7 +5,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.CrawlWatcher = void 0;
|
||||
const axios_1 = __importDefault(require("axios"));
|
||||
const zod_1 = require("zod");
|
||||
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
||||
const isows_1 = require("isows");
|
||||
const typescript_event_target_1 = require("typescript-event-target");
|
||||
@ -34,26 +33,13 @@ class FirecrawlApp {
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { url, ...params };
|
||||
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
|
||||
let schema = jsonData.extractorOptions.extractionSchema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof zod_1.z.ZodSchema) {
|
||||
if (jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
// Try parsing the schema as a Zod schema
|
||||
try {
|
||||
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extractorOptions: {
|
||||
...jsonData.extractorOptions,
|
||||
extractionSchema: schema,
|
||||
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
||||
},
|
||||
};
|
||||
}
|
||||
else if (this.version === 'v1' && jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof zod_1.z.ZodSchema) {
|
||||
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
||||
catch (error) {
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
|
@ -1,5 +1,4 @@
|
||||
import axios from "axios";
|
||||
import { z } from "zod";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
import { WebSocket } from "isows";
|
||||
import { TypedEventTarget } from "typescript-event-target";
|
||||
@ -28,26 +27,13 @@ export default class FirecrawlApp {
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { url, ...params };
|
||||
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
|
||||
let schema = jsonData.extractorOptions.extractionSchema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
if (jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
// Try parsing the schema as a Zod schema
|
||||
try {
|
||||
schema = zodToJsonSchema(schema);
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extractorOptions: {
|
||||
...jsonData.extractorOptions,
|
||||
extractionSchema: schema,
|
||||
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
||||
},
|
||||
};
|
||||
}
|
||||
else if (this.version === 'v1' && jsonData?.extract?.schema) {
|
||||
let schema = jsonData.extract.schema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
schema = zodToJsonSchema(schema);
|
||||
catch (error) {
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.1.0",
|
||||
"version": "1.2.0",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/cjs/index.js",
|
||||
"types": "types/index.d.ts",
|
||||
|
6
apps/js-sdk/firecrawl/types/index.d.ts
vendored
6
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -1,6 +1,6 @@
|
||||
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||
import { TypedEventTarget } from "typescript-event-target";
|
||||
import { z } from "zod";
|
||||
import { TypedEventTarget } from "typescript-event-target";
|
||||
/**
|
||||
* Configuration interface for FirecrawlApp.
|
||||
* @param apiKey - Optional API key for authentication.
|
||||
@ -73,13 +73,13 @@ export interface ScrapeParams {
|
||||
includeTags?: string[];
|
||||
excludeTags?: string[];
|
||||
onlyMainContent?: boolean;
|
||||
waitFor?: number;
|
||||
timeout?: number;
|
||||
extract?: {
|
||||
prompt?: string;
|
||||
schema?: z.ZodSchema | any;
|
||||
systemPrompt?: string;
|
||||
};
|
||||
waitFor?: number;
|
||||
timeout?: number;
|
||||
}
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
|
@ -13,7 +13,7 @@ import os
|
||||
|
||||
from .firecrawl import FirecrawlApp
|
||||
|
||||
__version__ = "1.1.1"
|
||||
__version__ = "1.2.1"
|
||||
|
||||
# Define the logger for the Firecrawl project
|
||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||
|
@ -59,31 +59,17 @@ class FirecrawlApp:
|
||||
|
||||
# If there are additional params, process them
|
||||
if params:
|
||||
if self.version == 'v0':
|
||||
# Handle extractorOptions (for v0 compatibility)
|
||||
extractor_options = params.get('extractorOptions', {})
|
||||
if extractor_options:
|
||||
if 'extractionSchema' in extractor_options and hasattr(extractor_options['extractionSchema'], 'schema'):
|
||||
extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema()
|
||||
extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction')
|
||||
scrape_params['extractorOptions'] = extractor_options
|
||||
# Handle extract (for v1)
|
||||
extract = params.get('extract', {})
|
||||
if extract:
|
||||
if 'schema' in extract and hasattr(extract['schema'], 'schema'):
|
||||
extract['schema'] = extract['schema'].schema()
|
||||
scrape_params['extract'] = extract
|
||||
|
||||
# Include any other params directly at the top level of scrape_params
|
||||
for key, value in params.items():
|
||||
if key not in ['extractorOptions']:
|
||||
scrape_params[key] = value
|
||||
elif self.version == 'v1':
|
||||
# Handle extract (for v1)
|
||||
extract = params.get('extract', {})
|
||||
if extract:
|
||||
if 'schema' in extract and hasattr(extract['schema'], 'schema'):
|
||||
extract['schema'] = extract['schema'].schema()
|
||||
scrape_params['extract'] = extract
|
||||
|
||||
# Include any other params directly at the top level of scrape_params
|
||||
for key, value in params.items():
|
||||
if key not in ['extract']:
|
||||
scrape_params[key] = value
|
||||
# Include any other params directly at the top level of scrape_params
|
||||
for key, value in params.items():
|
||||
if key not in ['extract']:
|
||||
scrape_params[key] = value
|
||||
|
||||
endpoint = f'/v1/scrape'
|
||||
# Make the POST request with the prepared headers and JSON data
|
||||
|
Loading…
x
Reference in New Issue
Block a user