mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 03:46:04 +08:00
Fix sdk/schemas (#1507)
* sdk-fix/schema-check * version bump * schema validation for extract and jsonOptions parameters * Update firecrawl.py --------- Co-authored-by: Nicolas <nicolascamara29@gmail.com>
This commit is contained in:
parent
a0a1675829
commit
317fa43f9e
@ -13,7 +13,7 @@ import os
|
|||||||
|
|
||||||
from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
||||||
|
|
||||||
__version__ = "2.5.0"
|
__version__ = "2.5.1"
|
||||||
|
|
||||||
# Define the logger for the Firecrawl project
|
# Define the logger for the Firecrawl project
|
||||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||||
|
@ -29,7 +29,7 @@ warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDoc
|
|||||||
warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
|
warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
|
||||||
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
|
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
|
||||||
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
|
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
|
||||||
|
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ChangeTrackingOptions\" shadows an attribute in parent \"BaseModel\"")
|
||||||
|
|
||||||
def get_version():
|
def get_version():
|
||||||
try:
|
try:
|
||||||
@ -529,14 +529,16 @@ class FirecrawlApp:
|
|||||||
scrape_params['blockAds'] = block_ads
|
scrape_params['blockAds'] = block_ads
|
||||||
if proxy:
|
if proxy:
|
||||||
scrape_params['proxy'] = proxy
|
scrape_params['proxy'] = proxy
|
||||||
if extract:
|
if extract is not None:
|
||||||
if hasattr(extract.schema, 'schema'):
|
extract = self._ensure_schema_dict(extract)
|
||||||
extract.schema = extract.schema.schema()
|
if isinstance(extract, dict) and "schema" in extract:
|
||||||
scrape_params['extract'] = extract.dict(exclude_none=True)
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
||||||
if json_options:
|
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
||||||
if hasattr(json_options.schema, 'schema'):
|
if json_options is not None:
|
||||||
json_options.schema = json_options.schema.schema()
|
json_options = self._ensure_schema_dict(json_options)
|
||||||
scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
|
if isinstance(json_options, dict) and "schema" in json_options:
|
||||||
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
||||||
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
||||||
if actions:
|
if actions:
|
||||||
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
||||||
if change_tracking_options:
|
if change_tracking_options:
|
||||||
@ -544,6 +546,11 @@ class FirecrawlApp:
|
|||||||
|
|
||||||
scrape_params.update(kwargs)
|
scrape_params.update(kwargs)
|
||||||
|
|
||||||
|
if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
|
||||||
|
scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
|
||||||
|
if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
|
||||||
|
scrape_params['jsonOptions']['schema'] = self._ensure_schema_dict(scrape_params['jsonOptions']['schema'])
|
||||||
|
|
||||||
# Make request
|
# Make request
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f'{self.api_url}/v1/scrape',
|
f'{self.api_url}/v1/scrape',
|
||||||
@ -1252,13 +1259,15 @@ class FirecrawlApp:
|
|||||||
if proxy is not None:
|
if proxy is not None:
|
||||||
scrape_params['proxy'] = proxy
|
scrape_params['proxy'] = proxy
|
||||||
if extract is not None:
|
if extract is not None:
|
||||||
if hasattr(extract.schema, 'schema'):
|
extract = self._ensure_schema_dict(extract)
|
||||||
extract.schema = extract.schema.schema()
|
if isinstance(extract, dict) and "schema" in extract:
|
||||||
scrape_params['extract'] = extract.dict(exclude_none=True)
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
||||||
|
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
||||||
if json_options is not None:
|
if json_options is not None:
|
||||||
if hasattr(json_options.schema, 'schema'):
|
json_options = self._ensure_schema_dict(json_options)
|
||||||
json_options.schema = json_options.schema.schema()
|
if isinstance(json_options, dict) and "schema" in json_options:
|
||||||
scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
||||||
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
||||||
if actions is not None:
|
if actions is not None:
|
||||||
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
||||||
if agent is not None:
|
if agent is not None:
|
||||||
@ -1273,6 +1282,11 @@ class FirecrawlApp:
|
|||||||
params_dict['urls'] = urls
|
params_dict['urls'] = urls
|
||||||
params_dict['origin'] = f"python-sdk@{version}"
|
params_dict['origin'] = f"python-sdk@{version}"
|
||||||
|
|
||||||
|
if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
|
||||||
|
params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
|
||||||
|
if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
|
||||||
|
params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
|
||||||
|
|
||||||
# Make request
|
# Make request
|
||||||
headers = self._prepare_headers(idempotency_key)
|
headers = self._prepare_headers(idempotency_key)
|
||||||
response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
|
response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
|
||||||
@ -1378,13 +1392,15 @@ class FirecrawlApp:
|
|||||||
if proxy is not None:
|
if proxy is not None:
|
||||||
scrape_params['proxy'] = proxy
|
scrape_params['proxy'] = proxy
|
||||||
if extract is not None:
|
if extract is not None:
|
||||||
if hasattr(extract.schema, 'schema'):
|
extract = self._ensure_schema_dict(extract)
|
||||||
extract.schema = extract.schema.schema()
|
if isinstance(extract, dict) and "schema" in extract:
|
||||||
scrape_params['extract'] = extract.dict(exclude_none=True)
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
||||||
|
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
||||||
if json_options is not None:
|
if json_options is not None:
|
||||||
if hasattr(json_options.schema, 'schema'):
|
json_options = self._ensure_schema_dict(json_options)
|
||||||
json_options.schema = json_options.schema.schema()
|
if isinstance(json_options, dict) and "schema" in json_options:
|
||||||
scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
||||||
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
||||||
if actions is not None:
|
if actions is not None:
|
||||||
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
||||||
if agent is not None:
|
if agent is not None:
|
||||||
@ -1399,6 +1415,11 @@ class FirecrawlApp:
|
|||||||
params_dict['urls'] = urls
|
params_dict['urls'] = urls
|
||||||
params_dict['origin'] = f"python-sdk@{version}"
|
params_dict['origin'] = f"python-sdk@{version}"
|
||||||
|
|
||||||
|
if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
|
||||||
|
params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
|
||||||
|
if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
|
||||||
|
params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
|
||||||
|
|
||||||
# Make request
|
# Make request
|
||||||
headers = self._prepare_headers(idempotency_key)
|
headers = self._prepare_headers(idempotency_key)
|
||||||
response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
|
response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
|
||||||
@ -1499,13 +1520,15 @@ class FirecrawlApp:
|
|||||||
if proxy is not None:
|
if proxy is not None:
|
||||||
scrape_params['proxy'] = proxy
|
scrape_params['proxy'] = proxy
|
||||||
if extract is not None:
|
if extract is not None:
|
||||||
if hasattr(extract.schema, 'schema'):
|
extract = self._ensure_schema_dict(extract)
|
||||||
extract.schema = extract.schema.schema()
|
if isinstance(extract, dict) and "schema" in extract:
|
||||||
scrape_params['extract'] = extract.dict(exclude_none=True)
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
||||||
|
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
||||||
if json_options is not None:
|
if json_options is not None:
|
||||||
if hasattr(json_options.schema, 'schema'):
|
json_options = self._ensure_schema_dict(json_options)
|
||||||
json_options.schema = json_options.schema.schema()
|
if isinstance(json_options, dict) and "schema" in json_options:
|
||||||
scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
||||||
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
||||||
if actions is not None:
|
if actions is not None:
|
||||||
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
||||||
if agent is not None:
|
if agent is not None:
|
||||||
@ -1520,6 +1543,11 @@ class FirecrawlApp:
|
|||||||
params_dict['urls'] = urls
|
params_dict['urls'] = urls
|
||||||
params_dict['origin'] = f"python-sdk@{version}"
|
params_dict['origin'] = f"python-sdk@{version}"
|
||||||
|
|
||||||
|
if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
|
||||||
|
params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
|
||||||
|
if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
|
||||||
|
params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
|
||||||
|
|
||||||
# Make request
|
# Make request
|
||||||
headers = self._prepare_headers(idempotency_key)
|
headers = self._prepare_headers(idempotency_key)
|
||||||
response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
|
response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
|
||||||
@ -1606,7 +1634,7 @@ class FirecrawlApp:
|
|||||||
id (str): The ID of the crawl job.
|
id (str): The ID of the crawl job.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
CrawlErrorsResponse: A response containing:
|
CrawlErrorsResponse containing:
|
||||||
* errors (List[Dict[str, str]]): List of errors with fields:
|
* errors (List[Dict[str, str]]): List of errors with fields:
|
||||||
* id (str): Error ID
|
* id (str): Error ID
|
||||||
* timestamp (str): When the error occurred
|
* timestamp (str): When the error occurred
|
||||||
@ -1669,10 +1697,7 @@ class FirecrawlApp:
|
|||||||
raise ValueError("Either urls or prompt is required")
|
raise ValueError("Either urls or prompt is required")
|
||||||
|
|
||||||
if schema:
|
if schema:
|
||||||
if hasattr(schema, 'model_json_schema'):
|
schema = self._ensure_schema_dict(schema)
|
||||||
# Convert Pydantic model to JSON schema
|
|
||||||
schema = schema.model_json_schema()
|
|
||||||
# Otherwise assume it's already a JSON schema dict
|
|
||||||
|
|
||||||
request_data = {
|
request_data = {
|
||||||
'urls': urls or [],
|
'urls': urls or [],
|
||||||
@ -1801,10 +1826,7 @@ class FirecrawlApp:
|
|||||||
|
|
||||||
schema = schema
|
schema = schema
|
||||||
if schema:
|
if schema:
|
||||||
if hasattr(schema, 'model_json_schema'):
|
schema = self._ensure_schema_dict(schema)
|
||||||
# Convert Pydantic model to JSON schema
|
|
||||||
schema = schema.model_json_schema()
|
|
||||||
# Otherwise assume it's already a JSON schema dict
|
|
||||||
|
|
||||||
request_data = {
|
request_data = {
|
||||||
'urls': urls,
|
'urls': urls,
|
||||||
@ -2467,6 +2489,24 @@ class FirecrawlApp:
|
|||||||
# Additional type validation can be added here if needed
|
# Additional type validation can be added here if needed
|
||||||
# For now, we rely on Pydantic models for detailed type validation
|
# For now, we rely on Pydantic models for detailed type validation
|
||||||
|
|
||||||
|
def _ensure_schema_dict(self, schema):
|
||||||
|
"""
|
||||||
|
Utility to ensure a schema is a dict, not a Pydantic model class. Recursively checks dicts and lists.
|
||||||
|
"""
|
||||||
|
if schema is None:
|
||||||
|
return schema
|
||||||
|
if isinstance(schema, type):
|
||||||
|
# Pydantic v1/v2 model class
|
||||||
|
if hasattr(schema, 'model_json_schema'):
|
||||||
|
return schema.model_json_schema()
|
||||||
|
elif hasattr(schema, 'schema'):
|
||||||
|
return schema.schema()
|
||||||
|
if isinstance(schema, dict):
|
||||||
|
return {k: self._ensure_schema_dict(v) for k, v in schema.items()}
|
||||||
|
if isinstance(schema, (list, tuple)):
|
||||||
|
return [self._ensure_schema_dict(v) for v in schema]
|
||||||
|
return schema
|
||||||
|
|
||||||
class CrawlWatcher:
|
class CrawlWatcher:
|
||||||
"""
|
"""
|
||||||
A class to watch and handle crawl job events via WebSocket connection.
|
A class to watch and handle crawl job events via WebSocket connection.
|
||||||
@ -2873,19 +2913,24 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
scrape_params['blockAds'] = block_ads
|
scrape_params['blockAds'] = block_ads
|
||||||
if proxy:
|
if proxy:
|
||||||
scrape_params['proxy'] = proxy
|
scrape_params['proxy'] = proxy
|
||||||
if extract:
|
if extract is not None:
|
||||||
extract_dict = extract.dict(exclude_none=True)
|
extract = self._ensure_schema_dict(extract)
|
||||||
if 'schema' in extract_dict and hasattr(extract.schema, 'schema'):
|
if isinstance(extract, dict) and "schema" in extract:
|
||||||
extract_dict['schema'] = extract.schema.schema() # Ensure pydantic model schema is converted
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
||||||
scrape_params['extract'] = extract_dict
|
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
||||||
if json_options:
|
if json_options is not None:
|
||||||
json_options_dict = json_options.dict(exclude_none=True)
|
json_options = self._ensure_schema_dict(json_options)
|
||||||
if 'schema' in json_options_dict and hasattr(json_options.schema, 'schema'):
|
if isinstance(json_options, dict) and "schema" in json_options:
|
||||||
json_options_dict['schema'] = json_options.schema.schema() # Ensure pydantic model schema is converted
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
||||||
scrape_params['jsonOptions'] = json_options_dict
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
||||||
if actions:
|
if actions:
|
||||||
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
||||||
|
|
||||||
|
if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
|
||||||
|
scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
|
||||||
|
if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
|
||||||
|
scrape_params['jsonOptions']['schema'] = self._ensure_schema_dict(scrape_params['jsonOptions']['schema'])
|
||||||
|
|
||||||
# Make async request
|
# Make async request
|
||||||
endpoint = f'/v1/scrape'
|
endpoint = f'/v1/scrape'
|
||||||
response = await self._async_post_request(
|
response = await self._async_post_request(
|
||||||
@ -2996,13 +3041,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
if proxy is not None:
|
if proxy is not None:
|
||||||
scrape_params['proxy'] = proxy
|
scrape_params['proxy'] = proxy
|
||||||
if extract is not None:
|
if extract is not None:
|
||||||
if hasattr(extract.schema, 'schema'):
|
extract = self._ensure_schema_dict(extract)
|
||||||
extract.schema = extract.schema.schema()
|
if isinstance(extract, dict) and "schema" in extract:
|
||||||
scrape_params['extract'] = extract.dict(exclude_none=True)
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
||||||
|
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
||||||
if json_options is not None:
|
if json_options is not None:
|
||||||
if hasattr(json_options.schema, 'schema'):
|
json_options = self._ensure_schema_dict(json_options)
|
||||||
json_options.schema = json_options.schema.schema()
|
if isinstance(json_options, dict) and "schema" in json_options:
|
||||||
scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
||||||
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
||||||
if actions is not None:
|
if actions is not None:
|
||||||
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
||||||
if agent is not None:
|
if agent is not None:
|
||||||
@ -3017,6 +3064,11 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
params_dict['urls'] = urls
|
params_dict['urls'] = urls
|
||||||
params_dict['origin'] = f"python-sdk@{version}"
|
params_dict['origin'] = f"python-sdk@{version}"
|
||||||
|
|
||||||
|
if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
|
||||||
|
params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
|
||||||
|
if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
|
||||||
|
params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
|
||||||
|
|
||||||
# Make request
|
# Make request
|
||||||
headers = self._prepare_headers(idempotency_key)
|
headers = self._prepare_headers(idempotency_key)
|
||||||
response = await self._async_post_request(
|
response = await self._async_post_request(
|
||||||
@ -3127,13 +3179,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
if proxy is not None:
|
if proxy is not None:
|
||||||
scrape_params['proxy'] = proxy
|
scrape_params['proxy'] = proxy
|
||||||
if extract is not None:
|
if extract is not None:
|
||||||
if hasattr(extract.schema, 'schema'):
|
extract = self._ensure_schema_dict(extract)
|
||||||
extract.schema = extract.schema.schema()
|
if isinstance(extract, dict) and "schema" in extract:
|
||||||
scrape_params['extract'] = extract.dict(exclude_none=True)
|
extract["schema"] = self._ensure_schema_dict(extract["schema"])
|
||||||
|
scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
|
||||||
if json_options is not None:
|
if json_options is not None:
|
||||||
if hasattr(json_options.schema, 'schema'):
|
json_options = self._ensure_schema_dict(json_options)
|
||||||
json_options.schema = json_options.schema.schema()
|
if isinstance(json_options, dict) and "schema" in json_options:
|
||||||
scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
||||||
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
||||||
if actions is not None:
|
if actions is not None:
|
||||||
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
||||||
if agent is not None:
|
if agent is not None:
|
||||||
@ -3148,6 +3202,11 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
params_dict['urls'] = urls
|
params_dict['urls'] = urls
|
||||||
params_dict['origin'] = f"python-sdk@{version}"
|
params_dict['origin'] = f"python-sdk@{version}"
|
||||||
|
|
||||||
|
if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
|
||||||
|
params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
|
||||||
|
if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
|
||||||
|
params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
|
||||||
|
|
||||||
# Make request
|
# Make request
|
||||||
headers = self._prepare_headers(idempotency_key)
|
headers = self._prepare_headers(idempotency_key)
|
||||||
response = await self._async_post_request(
|
response = await self._async_post_request(
|
||||||
@ -3605,10 +3664,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
raise ValueError("Either urls or prompt is required")
|
raise ValueError("Either urls or prompt is required")
|
||||||
|
|
||||||
if schema:
|
if schema:
|
||||||
if hasattr(schema, 'model_json_schema'):
|
schema = self._ensure_schema_dict(schema)
|
||||||
# Convert Pydantic model to JSON schema
|
|
||||||
schema = schema.model_json_schema()
|
|
||||||
# Otherwise assume it's already a JSON schema dict
|
|
||||||
|
|
||||||
request_data = {
|
request_data = {
|
||||||
'urls': urls or [],
|
'urls': urls or [],
|
||||||
@ -3862,8 +3918,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
raise ValueError("Either urls or prompt is required")
|
raise ValueError("Either urls or prompt is required")
|
||||||
|
|
||||||
if schema:
|
if schema:
|
||||||
if hasattr(schema, 'model_json_schema'):
|
schema = self._ensure_schema_dict(schema)
|
||||||
schema = schema.model_json_schema()
|
|
||||||
|
|
||||||
request_data = ExtractResponse(
|
request_data = ExtractResponse(
|
||||||
urls=urls or [],
|
urls=urls or [],
|
||||||
|
Loading…
x
Reference in New Issue
Block a user