diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index f7f7dc75..4393cd29 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -44,7 +44,7 @@ const strictMessage = "Unrecognized key in body -- please review the v1 API docu export const extractOptions = z.object({ mode: z.enum(["llm"]).default("llm"), schema: z.any().optional(), - systemPrompt: z.string().default("Based on the information on the page, extract the information from the schema."), + systemPrompt: z.string().default("Based on the information on the page, extract all the information from the schema. Try to extract all the fields even those that might not be marked as required."), prompt: z.string().optional() }).strict(strictMessage); diff --git a/apps/js-sdk/firecrawl/build/cjs/index.js b/apps/js-sdk/firecrawl/build/cjs/index.js index 31bb2715..2908b09d 100644 --- a/apps/js-sdk/firecrawl/build/cjs/index.js +++ b/apps/js-sdk/firecrawl/build/cjs/index.js @@ -5,7 +5,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) { Object.defineProperty(exports, "__esModule", { value: true }); exports.CrawlWatcher = void 0; const axios_1 = __importDefault(require("axios")); -const zod_1 = require("zod"); const zod_to_json_schema_1 = require("zod-to-json-schema"); const isows_1 = require("isows"); const typescript_event_target_1 = require("typescript-event-target"); @@ -34,26 +33,13 @@ class FirecrawlApp { Authorization: `Bearer ${this.apiKey}`, }; let jsonData = { url, ...params }; - if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) { - let schema = jsonData.extractorOptions.extractionSchema; - // Check if schema is an instance of ZodSchema to correctly identify Zod schemas - if (schema instanceof zod_1.z.ZodSchema) { + if (jsonData?.extract?.schema) { + let schema = jsonData.extract.schema; + // Try parsing the schema as a Zod schema + try { schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema); } - jsonData = { - ...jsonData, - extractorOptions: { - ...jsonData.extractorOptions, - extractionSchema: schema, - mode: jsonData.extractorOptions.mode || "llm-extraction", - }, - }; - } - else if (this.version === 'v1' && jsonData?.extract?.schema) { - let schema = jsonData.extract.schema; - // Check if schema is an instance of ZodSchema to correctly identify Zod schemas - if (schema instanceof zod_1.z.ZodSchema) { - schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema); + catch (error) { } jsonData = { ...jsonData, diff --git a/apps/js-sdk/firecrawl/build/esm/index.js b/apps/js-sdk/firecrawl/build/esm/index.js index b846e44b..4245cc37 100644 --- a/apps/js-sdk/firecrawl/build/esm/index.js +++ b/apps/js-sdk/firecrawl/build/esm/index.js @@ -1,5 +1,4 @@ import axios from "axios"; -import { z } from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; import { WebSocket } from "isows"; import { TypedEventTarget } from "typescript-event-target"; @@ -28,26 +27,13 @@ export default class FirecrawlApp { Authorization: `Bearer ${this.apiKey}`, }; let jsonData = { url, ...params }; - if (this.version === 'v0' && jsonData?.extractorOptions?.extractionSchema) { - let schema = jsonData.extractorOptions.extractionSchema; - // Check if schema is an instance of ZodSchema to correctly identify Zod schemas - if (schema instanceof z.ZodSchema) { + if (jsonData?.extract?.schema) { + let schema = jsonData.extract.schema; + // Try parsing the schema as a Zod schema + try { schema = zodToJsonSchema(schema); } - jsonData = { - ...jsonData, - extractorOptions: { - ...jsonData.extractorOptions, - extractionSchema: schema, - mode: jsonData.extractorOptions.mode || "llm-extraction", - }, - }; - } - else if (this.version === 'v1' && jsonData?.extract?.schema) { - let schema = jsonData.extract.schema; - // Check if schema is an instance of ZodSchema to correctly identify Zod schemas - if (schema instanceof z.ZodSchema) { - schema = zodToJsonSchema(schema); + catch (error) { } jsonData = { ...jsonData, diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index a9e36a24..002e10d2 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.1.0", + "version": "1.2.0", "description": "JavaScript SDK for Firecrawl API", "main": "build/cjs/index.js", "types": "types/index.d.ts", diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index 8997fccd..8b620f85 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -1,6 +1,6 @@ import { AxiosResponse, AxiosRequestHeaders } from "axios"; -import { TypedEventTarget } from "typescript-event-target"; import { z } from "zod"; +import { TypedEventTarget } from "typescript-event-target"; /** * Configuration interface for FirecrawlApp. * @param apiKey - Optional API key for authentication. @@ -73,13 +73,13 @@ export interface ScrapeParams { includeTags?: string[]; excludeTags?: string[]; onlyMainContent?: boolean; - waitFor?: number; - timeout?: number; extract?: { prompt?: string; schema?: z.ZodSchema | any; systemPrompt?: string; }; + waitFor?: number; + timeout?: number; } /** * Response interface for scraping operations. diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index 13df20d9..4b3807be 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ import os from .firecrawl import FirecrawlApp -__version__ = "1.1.1" +__version__ = "1.2.1" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 7a7ff5f5..75245e8d 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -59,31 +59,17 @@ class FirecrawlApp: # If there are additional params, process them if params: - if self.version == 'v0': - # Handle extractorOptions (for v0 compatibility) - extractor_options = params.get('extractorOptions', {}) - if extractor_options: - if 'extractionSchema' in extractor_options and hasattr(extractor_options['extractionSchema'], 'schema'): - extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema() - extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction') - scrape_params['extractorOptions'] = extractor_options + # Handle extract (for v1) + extract = params.get('extract', {}) + if extract: + if 'schema' in extract and hasattr(extract['schema'], 'schema'): + extract['schema'] = extract['schema'].schema() + scrape_params['extract'] = extract - # Include any other params directly at the top level of scrape_params - for key, value in params.items(): - if key not in ['extractorOptions']: - scrape_params[key] = value - elif self.version == 'v1': - # Handle extract (for v1) - extract = params.get('extract', {}) - if extract: - if 'schema' in extract and hasattr(extract['schema'], 'schema'): - extract['schema'] = extract['schema'].schema() - scrape_params['extract'] = extract - - # Include any other params directly at the top level of scrape_params - for key, value in params.items(): - if key not in ['extract']: - scrape_params[key] = value + # Include any other params directly at the top level of scrape_params + for key, value in params.items(): + if key not in ['extract']: + scrape_params[key] = value endpoint = f'/v1/scrape' # Make the POST request with the prepared headers and JSON data