From 27cc3dba309e34a30e2768b0c7f01a8f7685927f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 31 Dec 2024 12:53:07 -0300 Subject: [PATCH 1/3] Nick: rm vscode settings --- .gitignore | 3 +++ .vscode/settings.json | 5 ----- 2 files changed, 3 insertions(+), 5 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index fc527490..537c4cad 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,6 @@ apps/js-sdk/firecrawl/dist /examples/haiku_web_crawler/firecrawl_env /examples/sonnet_web_crawler/firecrawl_env /examples/internal_link_assitant/firecrawl_env + + +.vscode \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9d2a5d8e..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "rust-analyzer.linkedProjects": [ - "apps/rust-sdk/Cargo.toml" - ] -} \ No newline at end of file From 07f4b714af564fdde38a6fcc231e41708ecce73c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 31 Dec 2024 15:23:02 -0300 Subject: [PATCH 2/3] Update removeUnwantedElements.ts --- apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts b/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts index 3afbabd5..3536211d 100644 --- a/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts +++ b/apps/api/src/scraper/scrapeURL/lib/removeUnwantedElements.ts @@ -39,9 +39,6 @@ const excludeNonMainTags = [ "#nav", ".breadcrumbs", "#breadcrumbs", - "#search-form", - ".search", - "#search", ".share", "#share", ".widget", From c822e34d37fb6454d580f0eb15c5522739792a93 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 2 Jan 2025 14:03:23 -0300 Subject: [PATCH 3/3] Nick: fixed extract schema --- apps/python-sdk/firecrawl/__init__.py | 2 +- apps/python-sdk/firecrawl/firecrawl.py | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index 352305a4..c860967a 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ import os from .firecrawl import FirecrawlApp # noqa -__version__ = "1.7.0" +__version__ = "1.7.1" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 0181db90..33d43b99 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -472,20 +472,24 @@ class FirecrawlApp: if not params or not params.get('prompt'): raise ValueError("Prompt is required") - if not params.get('schema'): - raise ValueError("Schema is required for extraction") + schema = params.get('schema') + if schema: + if hasattr(schema, 'model_json_schema'): + # Convert Pydantic model to JSON schema + schema = schema.model_json_schema() + # Otherwise assume it's already a JSON schema dict jsonData = {'urls': urls, **params} - jsonSchema = params['schema'].schema() if hasattr(params['schema'], 'schema') else None + request_data = { + **jsonData, + 'allowExternalLinks': params.get('allow_external_links', False), + 'schema': schema + } try: response = self._post_request( f'{self.api_url}/v1/extract', - { - **jsonData, - 'allowExternalLinks': params.get('allow_external_links', False), - 'schema': jsonSchema - }, + request_data, headers ) if response.status_code == 200: