Nick: 1.2 - v1 llm extract

This commit is contained in:
Nicolas 2024-08-31 13:25:48 -03:00
parent 055177cf0b
commit af5cc5f16b
7 changed files with 26 additions and 68 deletions

View File

@ -44,7 +44,7 @@ const strictMessage = "Unrecognized key in body -- please review the v1 API docu
export const extractOptions = z.object({
mode: z.enum(["llm"]).default("llm"),
schema: z.any().optional(),
systemPrompt: z.string().default("Based on the information on the page, extract the information from the schema."),
systemPrompt: z.string().default("Based on the information on the page, extract all the information from the schema. Try to extract all the fields even those that might not be marked as required."),
prompt: z.string().optional()
}).strict(strictMessage);

View File

@ -5,7 +5,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
Object.defineProperty(exports, "__esModule", { value: true });
exports.CrawlWatcher = void 0;
const axios_1 = __importDefault(require("axios"));
const zod_1 = require("zod");
const zod_to_json_schema_1 = require("zod-to-json-schema");
const isows_1 = require("isows");
const typescript_event_target_1 = require("typescript-event-target");
@ -34,26 +33,13 @@ class FirecrawlApp {
Authorization: `Bearer ${this.apiKey}`,
};
let jsonData = { url, ...params };
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
let schema = jsonData.extractorOptions.extractionSchema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof zod_1.z.ZodSchema) {
if (jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
// Try parsing the schema as a Zod schema
try {
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
}
jsonData = {
...jsonData,
extractorOptions: {
...jsonData.extractorOptions,
extractionSchema: schema,
mode: jsonData.extractorOptions.mode || "llm-extraction",
},
};
}
else if (this.version === 'v1' && jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof zod_1.z.ZodSchema) {
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
catch (error) {
}
jsonData = {
...jsonData,

View File

@ -1,5 +1,4 @@
import axios from "axios";
import { z } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema";
import { WebSocket } from "isows";
import { TypedEventTarget } from "typescript-event-target";
@ -28,26 +27,13 @@ export default class FirecrawlApp {
Authorization: `Bearer ${this.apiKey}`,
};
let jsonData = { url, ...params };
if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) {
let schema = jsonData.extractorOptions.extractionSchema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof z.ZodSchema) {
if (jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
// Try parsing the schema as a Zod schema
try {
schema = zodToJsonSchema(schema);
}
jsonData = {
...jsonData,
extractorOptions: {
...jsonData.extractorOptions,
extractionSchema: schema,
mode: jsonData.extractorOptions.mode || "llm-extraction",
},
};
}
else if (this.version === 'v1' && jsonData?.extract?.schema) {
let schema = jsonData.extract.schema;
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
if (schema instanceof z.ZodSchema) {
schema = zodToJsonSchema(schema);
catch (error) {
}
jsonData = {
...jsonData,

View File

@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "1.1.0",
"version": "1.2.0",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/cjs/index.js",
"types": "types/index.d.ts",

View File

@ -1,6 +1,6 @@
import { AxiosResponse, AxiosRequestHeaders } from "axios";
import { TypedEventTarget } from "typescript-event-target";
import { z } from "zod";
import { TypedEventTarget } from "typescript-event-target";
/**
* Configuration interface for FirecrawlApp.
* @param apiKey - Optional API key for authentication.
@ -73,13 +73,13 @@ export interface ScrapeParams {
includeTags?: string[];
excludeTags?: string[];
onlyMainContent?: boolean;
waitFor?: number;
timeout?: number;
extract?: {
prompt?: string;
schema?: z.ZodSchema | any;
systemPrompt?: string;
};
waitFor?: number;
timeout?: number;
}
/**
* Response interface for scraping operations.

View File

@ -13,7 +13,7 @@ import os
from .firecrawl import FirecrawlApp
__version__ = "1.1.1"
__version__ = "1.2.1"
# Define the logger for the Firecrawl project
logger: logging.Logger = logging.getLogger("firecrawl")

View File

@ -59,31 +59,17 @@ class FirecrawlApp:
# If there are additional params, process them
if params:
if self.version == 'v0':
# Handle extractorOptions (for v0 compatibility)
extractor_options = params.get('extractorOptions', {})
if extractor_options:
if 'extractionSchema' in extractor_options and hasattr(extractor_options['extractionSchema'], 'schema'):
extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema()
extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction')
scrape_params['extractorOptions'] = extractor_options
# Handle extract (for v1)
extract = params.get('extract', {})
if extract:
if 'schema' in extract and hasattr(extract['schema'], 'schema'):
extract['schema'] = extract['schema'].schema()
scrape_params['extract'] = extract
# Include any other params directly at the top level of scrape_params
for key, value in params.items():
if key not in ['extractorOptions']:
scrape_params[key] = value
elif self.version == 'v1':
# Handle extract (for v1)
extract = params.get('extract', {})
if extract:
if 'schema' in extract and hasattr(extract['schema'], 'schema'):
extract['schema'] = extract['schema'].schema()
scrape_params['extract'] = extract
# Include any other params directly at the top level of scrape_params
for key, value in params.items():
if key not in ['extract']:
scrape_params[key] = value
# Include any other params directly at the top level of scrape_params
for key, value in params.items():
if key not in ['extract']:
scrape_params[key] = value
endpoint = f'/v1/scrape'
# Make the POST request with the prepared headers and JSON data