Merge branch 'main' into nsc/semantic-index-extract

This commit is contained in:
Nicolas 2025-01-02 15:26:40 -03:00
commit c9d91af86f
5 changed files with 16 additions and 18 deletions

4
.gitignore vendored
View File

@ -35,4 +35,6 @@ apps/js-sdk/firecrawl/dist
/examples/internal_link_assitant/firecrawl_env
/apps/api/logs/*
/apps/api/debug/*
/apps/api/debug/*
.vscode

View File

@ -1,5 +0,0 @@
{
"rust-analyzer.linkedProjects": [
"apps/rust-sdk/Cargo.toml"
]
}

View File

@ -39,9 +39,6 @@ const excludeNonMainTags = [
"#nav",
".breadcrumbs",
"#breadcrumbs",
"#search-form",
".search",
"#search",
".share",
"#share",
".widget",

View File

@ -13,7 +13,7 @@ import os
from .firecrawl import FirecrawlApp # noqa
__version__ = "1.7.0"
__version__ = "1.7.1"
# Define the logger for the Firecrawl project
logger: logging.Logger = logging.getLogger("firecrawl")

View File

@ -472,20 +472,24 @@ class FirecrawlApp:
if not params or not params.get('prompt'):
raise ValueError("Prompt is required")
if not params.get('schema'):
raise ValueError("Schema is required for extraction")
schema = params.get('schema')
if schema:
if hasattr(schema, 'model_json_schema'):
# Convert Pydantic model to JSON schema
schema = schema.model_json_schema()
# Otherwise assume it's already a JSON schema dict
jsonData = {'urls': urls, **params}
jsonSchema = params['schema'].schema() if hasattr(params['schema'], 'schema') else None
request_data = {
**jsonData,
'allowExternalLinks': params.get('allow_external_links', False),
'schema': schema
}
try:
response = self._post_request(
f'{self.api_url}/v1/extract',
{
**jsonData,
'allowExternalLinks': params.get('allow_external_links', False),
'schema': jsonSchema
},
request_data,
headers
)
if response.status_code == 200: