mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 03:06:00 +08:00
Nick: 1.2 - v1 llm extract
This commit is contained in:
parent
055177cf0b
commit
af5cc5f16b
@ -44,7 +44,7 @@ const strictMessage = "Unrecognized key in body -- please review the v1 API docu
|
|||||||
export const extractOptions = z.object({
|
export const extractOptions = z.object({
|
||||||
mode: z.enum(["llm"]).default("llm"),
|
mode: z.enum(["llm"]).default("llm"),
|
||||||
schema: z.any().optional(),
|
schema: z.any().optional(),
|
||||||
systemPrompt: z.string().default("Based on the information on the page, extract the information from the schema."),
|
systemPrompt: z.string().default("Based on the information on the page, extract all the information from the schema. Try to extract all the fields even those that might not be marked as required."),
|
||||||
prompt: z.string().optional()
|
prompt: z.string().optional()
|
||||||
}).strict(strictMessage);
|
}).strict(strictMessage);
|
||||||
|
|
||||||
|
@ -5,7 +5,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|||||||
Object.defineProperty(exports, "__esModule", { value: true });
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||||||
exports.CrawlWatcher = void 0;
|
exports.CrawlWatcher = void 0;
|
||||||
const axios_1 = __importDefault(require("axios"));
|
const axios_1 = __importDefault(require("axios"));
|
||||||
const zod_1 = require("zod");
|
|
||||||
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
||||||
const isows_1 = require("isows");
|
const isows_1 = require("isows");
|
||||||
const typescript_event_target_1 = require("typescript-event-target");
|
const typescript_event_target_1 = require("typescript-event-target");
|
||||||
@ -34,26 +33,13 @@ class FirecrawlApp {
|
|||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
};
|
};
|
||||||
let jsonData = { url, ...params };
|
let jsonData = { url, ...params };
|
||||||
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
|
if (jsonData?.extract?.schema) {
|
||||||
let schema = jsonData.extractorOptions.extractionSchema;
|
let schema = jsonData.extract.schema;
|
||||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
// Try parsing the schema as a Zod schema
|
||||||
if (schema instanceof zod_1.z.ZodSchema) {
|
try {
|
||||||
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
||||||
}
|
}
|
||||||
jsonData = {
|
catch (error) {
|
||||||
...jsonData,
|
|
||||||
extractorOptions: {
|
|
||||||
...jsonData.extractorOptions,
|
|
||||||
extractionSchema: schema,
|
|
||||||
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
else if (this.version === 'v1' && jsonData?.extract?.schema) {
|
|
||||||
let schema = jsonData.extract.schema;
|
|
||||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
||||||
if (schema instanceof zod_1.z.ZodSchema) {
|
|
||||||
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
||||||
}
|
}
|
||||||
jsonData = {
|
jsonData = {
|
||||||
...jsonData,
|
...jsonData,
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import { z } from "zod";
|
|
||||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||||
import { WebSocket } from "isows";
|
import { WebSocket } from "isows";
|
||||||
import { TypedEventTarget } from "typescript-event-target";
|
import { TypedEventTarget } from "typescript-event-target";
|
||||||
@ -28,26 +27,13 @@ export default class FirecrawlApp {
|
|||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
};
|
};
|
||||||
let jsonData = { url, ...params };
|
let jsonData = { url, ...params };
|
||||||
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
|
if (jsonData?.extract?.schema) {
|
||||||
let schema = jsonData.extractorOptions.extractionSchema;
|
let schema = jsonData.extract.schema;
|
||||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
// Try parsing the schema as a Zod schema
|
||||||
if (schema instanceof z.ZodSchema) {
|
try {
|
||||||
schema = zodToJsonSchema(schema);
|
schema = zodToJsonSchema(schema);
|
||||||
}
|
}
|
||||||
jsonData = {
|
catch (error) {
|
||||||
...jsonData,
|
|
||||||
extractorOptions: {
|
|
||||||
...jsonData.extractorOptions,
|
|
||||||
extractionSchema: schema,
|
|
||||||
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
else if (this.version === 'v1' && jsonData?.extract?.schema) {
|
|
||||||
let schema = jsonData.extract.schema;
|
|
||||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
||||||
if (schema instanceof z.ZodSchema) {
|
|
||||||
schema = zodToJsonSchema(schema);
|
|
||||||
}
|
}
|
||||||
jsonData = {
|
jsonData = {
|
||||||
...jsonData,
|
...jsonData,
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "1.1.0",
|
"version": "1.2.0",
|
||||||
"description": "JavaScript SDK for Firecrawl API",
|
"description": "JavaScript SDK for Firecrawl API",
|
||||||
"main": "build/cjs/index.js",
|
"main": "build/cjs/index.js",
|
||||||
"types": "types/index.d.ts",
|
"types": "types/index.d.ts",
|
||||||
|
6
apps/js-sdk/firecrawl/types/index.d.ts
vendored
6
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -1,6 +1,6 @@
|
|||||||
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||||
import { TypedEventTarget } from "typescript-event-target";
|
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
|
import { TypedEventTarget } from "typescript-event-target";
|
||||||
/**
|
/**
|
||||||
* Configuration interface for FirecrawlApp.
|
* Configuration interface for FirecrawlApp.
|
||||||
* @param apiKey - Optional API key for authentication.
|
* @param apiKey - Optional API key for authentication.
|
||||||
@ -73,13 +73,13 @@ export interface ScrapeParams {
|
|||||||
includeTags?: string[];
|
includeTags?: string[];
|
||||||
excludeTags?: string[];
|
excludeTags?: string[];
|
||||||
onlyMainContent?: boolean;
|
onlyMainContent?: boolean;
|
||||||
waitFor?: number;
|
|
||||||
timeout?: number;
|
|
||||||
extract?: {
|
extract?: {
|
||||||
prompt?: string;
|
prompt?: string;
|
||||||
schema?: z.ZodSchema | any;
|
schema?: z.ZodSchema | any;
|
||||||
systemPrompt?: string;
|
systemPrompt?: string;
|
||||||
};
|
};
|
||||||
|
waitFor?: number;
|
||||||
|
timeout?: number;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Response interface for scraping operations.
|
* Response interface for scraping operations.
|
||||||
|
@ -13,7 +13,7 @@ import os
|
|||||||
|
|
||||||
from .firecrawl import FirecrawlApp
|
from .firecrawl import FirecrawlApp
|
||||||
|
|
||||||
__version__ = "1.1.1"
|
__version__ = "1.2.1"
|
||||||
|
|
||||||
# Define the logger for the Firecrawl project
|
# Define the logger for the Firecrawl project
|
||||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||||
|
@ -59,31 +59,17 @@ class FirecrawlApp:
|
|||||||
|
|
||||||
# If there are additional params, process them
|
# If there are additional params, process them
|
||||||
if params:
|
if params:
|
||||||
if self.version == 'v0':
|
# Handle extract (for v1)
|
||||||
# Handle extractorOptions (for v0 compatibility)
|
extract = params.get('extract', {})
|
||||||
extractor_options = params.get('extractorOptions', {})
|
if extract:
|
||||||
if extractor_options:
|
if 'schema' in extract and hasattr(extract['schema'], 'schema'):
|
||||||
if 'extractionSchema' in extractor_options and hasattr(extractor_options['extractionSchema'], 'schema'):
|
extract['schema'] = extract['schema'].schema()
|
||||||
extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema()
|
scrape_params['extract'] = extract
|
||||||
extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction')
|
|
||||||
scrape_params['extractorOptions'] = extractor_options
|
|
||||||
|
|
||||||
# Include any other params directly at the top level of scrape_params
|
# Include any other params directly at the top level of scrape_params
|
||||||
for key, value in params.items():
|
for key, value in params.items():
|
||||||
if key not in ['extractorOptions']:
|
if key not in ['extract']:
|
||||||
scrape_params[key] = value
|
scrape_params[key] = value
|
||||||
elif self.version == 'v1':
|
|
||||||
# Handle extract (for v1)
|
|
||||||
extract = params.get('extract', {})
|
|
||||||
if extract:
|
|
||||||
if 'schema' in extract and hasattr(extract['schema'], 'schema'):
|
|
||||||
extract['schema'] = extract['schema'].schema()
|
|
||||||
scrape_params['extract'] = extract
|
|
||||||
|
|
||||||
# Include any other params directly at the top level of scrape_params
|
|
||||||
for key, value in params.items():
|
|
||||||
if key not in ['extract']:
|
|
||||||
scrape_params[key] = value
|
|
||||||
|
|
||||||
endpoint = f'/v1/scrape'
|
endpoint = f'/v1/scrape'
|
||||||
# Make the POST request with the prepared headers and JSON data
|
# Make the POST request with the prepared headers and JSON data
|
||||||
|
Loading…
x
Reference in New Issue
Block a user