Merge branch 'main' into nsc/semantic-index-extract

This commit is contained in:
Nicolas 2025-01-02 15:26:40 -03:00
commit c9d91af86f
5 changed files with 16 additions and 18 deletions

2
.gitignore vendored
View File

@ -36,3 +36,5 @@ apps/js-sdk/firecrawl/dist
/apps/api/logs/* /apps/api/logs/*
/apps/api/debug/* /apps/api/debug/*
.vscode

View File

@ -1,5 +0,0 @@
{
"rust-analyzer.linkedProjects": [
"apps/rust-sdk/Cargo.toml"
]
}

View File

@ -39,9 +39,6 @@ const excludeNonMainTags = [
"#nav", "#nav",
".breadcrumbs", ".breadcrumbs",
"#breadcrumbs", "#breadcrumbs",
"#search-form",
".search",
"#search",
".share", ".share",
"#share", "#share",
".widget", ".widget",

View File

@ -13,7 +13,7 @@ import os
from .firecrawl import FirecrawlApp # noqa from .firecrawl import FirecrawlApp # noqa
__version__ = "1.7.0" __version__ = "1.7.1"
# Define the logger for the Firecrawl project # Define the logger for the Firecrawl project
logger: logging.Logger = logging.getLogger("firecrawl") logger: logging.Logger = logging.getLogger("firecrawl")

View File

@ -472,20 +472,24 @@ class FirecrawlApp:
if not params or not params.get('prompt'): if not params or not params.get('prompt'):
raise ValueError("Prompt is required") raise ValueError("Prompt is required")
if not params.get('schema'): schema = params.get('schema')
raise ValueError("Schema is required for extraction") if schema:
if hasattr(schema, 'model_json_schema'):
# Convert Pydantic model to JSON schema
schema = schema.model_json_schema()
# Otherwise assume it's already a JSON schema dict
jsonData = {'urls': urls, **params} jsonData = {'urls': urls, **params}
jsonSchema = params['schema'].schema() if hasattr(params['schema'], 'schema') else None request_data = {
**jsonData,
'allowExternalLinks': params.get('allow_external_links', False),
'schema': schema
}
try: try:
response = self._post_request( response = self._post_request(
f'{self.api_url}/v1/extract', f'{self.api_url}/v1/extract',
{ request_data,
**jsonData,
'allowExternalLinks': params.get('allow_external_links', False),
'schema': jsonSchema
},
headers headers
) )
if response.status_code == 200: if response.status_code == 200: