mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 04:49:05 +08:00
[fix/sdk] kwargs params (#1490)
* fix sdk kwargs params * version * Update __init__.py --------- Co-authored-by: Nicolas <nicolascamara29@gmail.com>
This commit is contained in:
parent
1a02ef56e6
commit
e10d4c7b0c
@ -13,7 +13,7 @@ import os
|
|||||||
|
|
||||||
from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions # noqa
|
from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions # noqa
|
||||||
|
|
||||||
__version__ = "2.1.2"
|
__version__ = "2.2.0"
|
||||||
|
|
||||||
# Define the logger for the Firecrawl project
|
# Define the logger for the Firecrawl project
|
||||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||||
|
@ -570,7 +570,6 @@ class FirecrawlApp:
|
|||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
timeout: Optional[int] = None,
|
timeout: Optional[int] = None,
|
||||||
scrape_options: Optional[ScrapeOptions] = None,
|
scrape_options: Optional[ScrapeOptions] = None,
|
||||||
params: Optional[Union[Dict[str, Any], SearchParams]] = None,
|
|
||||||
**kwargs) -> SearchResponse:
|
**kwargs) -> SearchResponse:
|
||||||
"""
|
"""
|
||||||
Search for content using Firecrawl.
|
Search for content using Firecrawl.
|
||||||
@ -585,7 +584,6 @@ class FirecrawlApp:
|
|||||||
location (Optional[str]): Geo-targeting
|
location (Optional[str]): Geo-targeting
|
||||||
timeout (Optional[int]): Request timeout in milliseconds
|
timeout (Optional[int]): Request timeout in milliseconds
|
||||||
scrape_options (Optional[ScrapeOptions]): Result scraping configuration
|
scrape_options (Optional[ScrapeOptions]): Result scraping configuration
|
||||||
params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
|
|
||||||
**kwargs: Additional keyword arguments for future compatibility
|
**kwargs: Additional keyword arguments for future compatibility
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -598,13 +596,11 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If search fails or response cannot be parsed
|
Exception: If search fails or response cannot be parsed
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "search")
|
||||||
|
|
||||||
# Build search parameters
|
# Build search parameters
|
||||||
search_params = {}
|
search_params = {}
|
||||||
if params:
|
|
||||||
if isinstance(params, dict):
|
|
||||||
search_params.update(params)
|
|
||||||
else:
|
|
||||||
search_params.update(params.dict(exclude_none=True))
|
|
||||||
|
|
||||||
# Add individual parameters
|
# Add individual parameters
|
||||||
if limit is not None:
|
if limit is not None:
|
||||||
@ -705,6 +701,9 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If crawl fails
|
Exception: If crawl fails
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "crawl_url")
|
||||||
|
|
||||||
crawl_params = {}
|
crawl_params = {}
|
||||||
|
|
||||||
# Add individual parameters
|
# Add individual parameters
|
||||||
@ -808,6 +807,9 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If crawl initiation fails
|
Exception: If crawl initiation fails
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "async_crawl_url")
|
||||||
|
|
||||||
crawl_params = {}
|
crawl_params = {}
|
||||||
|
|
||||||
# Add individual parameters
|
# Add individual parameters
|
||||||
@ -1076,7 +1078,7 @@ class FirecrawlApp:
|
|||||||
sitemap_only: Optional[bool] = None,
|
sitemap_only: Optional[bool] = None,
|
||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
timeout: Optional[int] = None,
|
timeout: Optional[int] = None,
|
||||||
params: Optional[MapParams] = None) -> MapResponse:
|
**kwargs) -> MapResponse:
|
||||||
"""
|
"""
|
||||||
Map and discover links from a URL.
|
Map and discover links from a URL.
|
||||||
|
|
||||||
@ -1088,7 +1090,7 @@ class FirecrawlApp:
|
|||||||
sitemap_only (Optional[bool]): Only use sitemap.xml
|
sitemap_only (Optional[bool]): Only use sitemap.xml
|
||||||
limit (Optional[int]): Maximum URLs to return
|
limit (Optional[int]): Maximum URLs to return
|
||||||
timeout (Optional[int]): Request timeout in milliseconds
|
timeout (Optional[int]): Request timeout in milliseconds
|
||||||
params (Optional[MapParams]): Additional mapping parameters
|
**kwargs: Additional parameters to pass to the API
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
MapResponse: Response containing:
|
MapResponse: Response containing:
|
||||||
@ -1099,10 +1101,11 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If mapping fails or response cannot be parsed
|
Exception: If mapping fails or response cannot be parsed
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "map_url")
|
||||||
|
|
||||||
# Build map parameters
|
# Build map parameters
|
||||||
map_params = {}
|
map_params = {}
|
||||||
if params:
|
|
||||||
map_params.update(params.dict(exclude_none=True))
|
|
||||||
|
|
||||||
# Add individual parameters
|
# Add individual parameters
|
||||||
if search is not None:
|
if search is not None:
|
||||||
@ -1118,6 +1121,9 @@ class FirecrawlApp:
|
|||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
map_params['timeout'] = timeout
|
map_params['timeout'] = timeout
|
||||||
|
|
||||||
|
# Add any additional kwargs
|
||||||
|
map_params.update(kwargs)
|
||||||
|
|
||||||
# Create final params object
|
# Create final params object
|
||||||
final_params = MapParams(**map_params)
|
final_params = MapParams(**map_params)
|
||||||
params_dict = final_params.dict(exclude_none=True)
|
params_dict = final_params.dict(exclude_none=True)
|
||||||
@ -1205,6 +1211,9 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If batch scrape fails
|
Exception: If batch scrape fails
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "batch_scrape_urls")
|
||||||
|
|
||||||
scrape_params = {}
|
scrape_params = {}
|
||||||
|
|
||||||
# Add individual parameters
|
# Add individual parameters
|
||||||
@ -1328,6 +1337,9 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If job initiation fails
|
Exception: If job initiation fails
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "async_batch_scrape_urls")
|
||||||
|
|
||||||
scrape_params = {}
|
scrape_params = {}
|
||||||
|
|
||||||
# Add individual parameters
|
# Add individual parameters
|
||||||
@ -1446,6 +1458,9 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If batch scrape job fails to start
|
Exception: If batch scrape job fails to start
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "batch_scrape_urls_and_watch")
|
||||||
|
|
||||||
scrape_params = {}
|
scrape_params = {}
|
||||||
|
|
||||||
# Add individual parameters
|
# Add individual parameters
|
||||||
@ -2394,6 +2409,56 @@ class FirecrawlApp:
|
|||||||
|
|
||||||
return {'success': False, 'error': 'Internal server error'}
|
return {'success': False, 'error': 'Internal server error'}
|
||||||
|
|
||||||
|
def _validate_kwargs(self, kwargs: Dict[str, Any], method_name: str) -> None:
|
||||||
|
"""
|
||||||
|
Validate additional keyword arguments before they are passed to the API.
|
||||||
|
This provides early validation before the Pydantic model validation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
kwargs (Dict[str, Any]): Additional keyword arguments to validate
|
||||||
|
method_name (str): Name of the method these kwargs are for
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If kwargs contain invalid or unsupported parameters
|
||||||
|
"""
|
||||||
|
if not kwargs:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Known parameter mappings for each method
|
||||||
|
method_params = {
|
||||||
|
"scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
|
||||||
|
"timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
|
||||||
|
"block_ads", "proxy", "extract", "json_options", "actions"},
|
||||||
|
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options"},
|
||||||
|
"crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
|
||||||
|
"allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
|
||||||
|
"webhook", "deduplicate_similar_urls", "ignore_query_parameters", "regex_on_full_url"},
|
||||||
|
"map_url": {"search", "ignore_sitemap", "include_subdomains", "sitemap_only", "limit", "timeout"},
|
||||||
|
"batch_scrape_urls": {"formats", "headers", "include_tags", "exclude_tags", "only_main_content",
|
||||||
|
"wait_for", "timeout", "location", "mobile", "skip_tls_verification",
|
||||||
|
"remove_base64_images", "block_ads", "proxy", "extract", "json_options",
|
||||||
|
"actions", "agent"},
|
||||||
|
"async_batch_scrape_urls": {"formats", "headers", "include_tags", "exclude_tags", "only_main_content",
|
||||||
|
"wait_for", "timeout", "location", "mobile", "skip_tls_verification",
|
||||||
|
"remove_base64_images", "block_ads", "proxy", "extract", "json_options",
|
||||||
|
"actions", "agent"},
|
||||||
|
"batch_scrape_urls_and_watch": {"formats", "headers", "include_tags", "exclude_tags", "only_main_content",
|
||||||
|
"wait_for", "timeout", "location", "mobile", "skip_tls_verification",
|
||||||
|
"remove_base64_images", "block_ads", "proxy", "extract", "json_options",
|
||||||
|
"actions", "agent"}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get allowed parameters for this method
|
||||||
|
allowed_params = method_params.get(method_name, set())
|
||||||
|
|
||||||
|
# Check for unknown parameters
|
||||||
|
unknown_params = set(kwargs.keys()) - allowed_params
|
||||||
|
if unknown_params:
|
||||||
|
raise ValueError(f"Unsupported parameter(s) for {method_name}: {', '.join(unknown_params)}. Please refer to the API documentation for the correct parameters.")
|
||||||
|
|
||||||
|
# Additional type validation can be added here if needed
|
||||||
|
# For now, we rely on Pydantic models for detailed type validation
|
||||||
|
|
||||||
class CrawlWatcher:
|
class CrawlWatcher:
|
||||||
"""
|
"""
|
||||||
A class to watch and handle crawl job events via WebSocket connection.
|
A class to watch and handle crawl job events via WebSocket connection.
|
||||||
@ -2710,7 +2775,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
async def scrape_url(
|
async def scrape_url(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json"]]] = None,
|
*,
|
||||||
|
formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None,
|
||||||
include_tags: Optional[List[str]] = None,
|
include_tags: Optional[List[str]] = None,
|
||||||
exclude_tags: Optional[List[str]] = None,
|
exclude_tags: Optional[List[str]] = None,
|
||||||
only_main_content: Optional[bool] = None,
|
only_main_content: Optional[bool] = None,
|
||||||
@ -2724,9 +2790,10 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
proxy: Optional[Literal["basic", "stealth"]] = None,
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
||||||
extract: Optional[JsonConfig] = None,
|
extract: Optional[JsonConfig] = None,
|
||||||
json_options: Optional[JsonConfig] = None,
|
json_options: Optional[JsonConfig] = None,
|
||||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None) -> ScrapeResponse[Any]:
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
||||||
|
**kwargs) -> ScrapeResponse[Any]:
|
||||||
"""
|
"""
|
||||||
Scrape and extract content from a URL asynchronously.
|
Scrape a single URL asynchronously.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url (str): Target URL to scrape
|
url (str): Target URL to scrape
|
||||||
@ -2745,17 +2812,26 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
extract (Optional[JsonConfig]): Content extraction settings
|
extract (Optional[JsonConfig]): Content extraction settings
|
||||||
json_options (Optional[JsonConfig]): JSON extraction settings
|
json_options (Optional[JsonConfig]): JSON extraction settings
|
||||||
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
||||||
|
**kwargs: Additional parameters to pass to the API
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
ScrapeResponse with:
|
ScrapeResponse with:
|
||||||
* Requested content formats
|
* success - Whether scrape was successful
|
||||||
* Page metadata
|
* markdown - Markdown content if requested
|
||||||
* Extraction results
|
* html - HTML content if requested
|
||||||
* Success/error status
|
* rawHtml - Raw HTML content if requested
|
||||||
|
* links - Extracted links if requested
|
||||||
|
* screenshot - Screenshot if requested
|
||||||
|
* extract - Extracted data if requested
|
||||||
|
* json - JSON data if requested
|
||||||
|
* error - Error message if scrape failed
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
Exception: If scraping fails
|
Exception: If scraping fails
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "scrape_url")
|
||||||
|
|
||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
|
|
||||||
# Build scrape parameters
|
# Build scrape parameters
|
||||||
@ -2879,6 +2955,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If batch scrape fails
|
Exception: If batch scrape fails
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "batch_scrape_urls")
|
||||||
|
|
||||||
scrape_params = {}
|
scrape_params = {}
|
||||||
|
|
||||||
# Add individual parameters
|
# Add individual parameters
|
||||||
@ -3007,6 +3086,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If job initiation fails
|
Exception: If job initiation fails
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "async_batch_scrape_urls")
|
||||||
|
|
||||||
scrape_params = {}
|
scrape_params = {}
|
||||||
|
|
||||||
# Add individual parameters
|
# Add individual parameters
|
||||||
@ -3126,6 +3208,9 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If crawl fails
|
Exception: If crawl fails
|
||||||
"""
|
"""
|
||||||
|
# Validate any additional kwargs
|
||||||
|
self._validate_kwargs(kwargs, "crawl_url")
|
||||||
|
|
||||||
crawl_params = {}
|
crawl_params = {}
|
||||||
|
|
||||||
# Add individual parameters
|
# Add individual parameters
|
||||||
|
Loading…
x
Reference in New Issue
Block a user