diff --git a/apps/python-sdk/example_async.py b/apps/python-sdk/example_async.py index c554d695..b4b76abf 100644 --- a/apps/python-sdk/example_async.py +++ b/apps/python-sdk/example_async.py @@ -60,7 +60,7 @@ async def example_llm_extraction(): class TopArticlesSchema(BaseModel): top: List[ArticleSchema] = Field(..., description="Top 5 stories") - extract_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema()) + extract_config = JsonConfig(schema=TopArticlesSchema.model_json_schema()) llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config) diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index eea9ba54..b641b7cd 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/ import logging import os -from .firecrawl import FirecrawlApp, ExtractConfig # noqa +from .firecrawl import FirecrawlApp, JsonConfig # noqa -__version__ = "2.0.1" +__version__ = "2.0.2" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index fb35bb78..39323044 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -27,7 +27,7 @@ from pydantic import Field # Suppress Pydantic warnings about attribute shadowing warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"") warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"") -warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractConfig\" shadows an attribute in parent \"BaseModel\"") +warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"") warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"") @@ -186,7 +186,7 @@ class ExtractAgent(pydantic.BaseModel): """Configuration for the agent in extract operations.""" model: Literal["FIRE-1"] = "FIRE-1" -class ExtractConfig(pydantic.BaseModel): +class JsonConfig(pydantic.BaseModel): """Configuration for extraction.""" prompt: Optional[str] = None schema: Optional[Any] = None @@ -195,8 +195,8 @@ class ExtractConfig(pydantic.BaseModel): class ScrapeParams(CommonOptions): """Parameters for scraping operations.""" - extract: Optional[ExtractConfig] = None - jsonOptions: Optional[ExtractConfig] = None + extract: Optional[JsonConfig] = None + jsonOptions: Optional[JsonConfig] = None actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None agent: Optional[AgentOptions] = None @@ -454,8 +454,8 @@ class FirecrawlApp: remove_base64_images: Optional[bool] = None, block_ads: Optional[bool] = None, proxy: Optional[Literal["basic", "stealth"]] = None, - extract: Optional[ExtractConfig] = None, - json_options: Optional[ExtractConfig] = None, + extract: Optional[JsonConfig] = None, + json_options: Optional[JsonConfig] = None, actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None, **kwargs) -> ScrapeResponse[Any]: """ @@ -475,8 +475,8 @@ class FirecrawlApp: remove_base64_images (Optional[bool]): Remove base64 images block_ads (Optional[bool]): Block ads proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth) - extract (Optional[ExtractConfig]): Content extraction settings - json_options (Optional[ExtractConfig]): JSON extraction settings + extract (Optional[JsonConfig]): Content extraction settings + json_options (Optional[JsonConfig]): JSON extraction settings actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform @@ -1161,8 +1161,8 @@ class FirecrawlApp: remove_base64_images: Optional[bool] = None, block_ads: Optional[bool] = None, proxy: Optional[Literal["basic", "stealth"]] = None, - extract: Optional[ExtractConfig] = None, - json_options: Optional[ExtractConfig] = None, + extract: Optional[JsonConfig] = None, + json_options: Optional[JsonConfig] = None, actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None, agent: Optional[AgentOptions] = None, poll_interval: Optional[int] = 2, @@ -1187,8 +1187,8 @@ class FirecrawlApp: remove_base64_images (Optional[bool]): Remove base64 encoded images block_ads (Optional[bool]): Block advertisements proxy (Optional[Literal]): Proxy type to use - extract (Optional[ExtractConfig]): Content extraction config - json_options (Optional[ExtractConfig]): JSON extraction config + extract (Optional[JsonConfig]): Content extraction config + json_options (Optional[JsonConfig]): JSON extraction config actions (Optional[List[Union]]): Actions to perform agent (Optional[AgentOptions]): Agent configuration poll_interval (Optional[int]): Seconds between status checks (default: 2) @@ -1285,8 +1285,8 @@ class FirecrawlApp: remove_base64_images: Optional[bool] = None, block_ads: Optional[bool] = None, proxy: Optional[Literal["basic", "stealth"]] = None, - extract: Optional[ExtractConfig] = None, - json_options: Optional[ExtractConfig] = None, + extract: Optional[JsonConfig] = None, + json_options: Optional[JsonConfig] = None, actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None, agent: Optional[AgentOptions] = None, idempotency_key: Optional[str] = None, @@ -1310,8 +1310,8 @@ class FirecrawlApp: remove_base64_images (Optional[bool]): Remove base64 encoded images block_ads (Optional[bool]): Block advertisements proxy (Optional[Literal]): Proxy type to use - extract (Optional[ExtractConfig]): Content extraction config - json_options (Optional[ExtractConfig]): JSON extraction config + extract (Optional[JsonConfig]): Content extraction config + json_options (Optional[JsonConfig]): JSON extraction config actions (Optional[List[Union]]): Actions to perform agent (Optional[AgentOptions]): Agent configuration idempotency_key (Optional[str]): Unique key to prevent duplicate requests @@ -1407,8 +1407,8 @@ class FirecrawlApp: remove_base64_images: Optional[bool] = None, block_ads: Optional[bool] = None, proxy: Optional[Literal["basic", "stealth"]] = None, - extract: Optional[ExtractConfig] = None, - json_options: Optional[ExtractConfig] = None, + extract: Optional[JsonConfig] = None, + json_options: Optional[JsonConfig] = None, actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None, agent: Optional[AgentOptions] = None, idempotency_key: Optional[str] = None, @@ -1432,8 +1432,8 @@ class FirecrawlApp: remove_base64_images (Optional[bool]): Remove base64 encoded images block_ads (Optional[bool]): Block advertisements proxy (Optional[Literal]): Proxy type to use - extract (Optional[ExtractConfig]): Content extraction config - json_options (Optional[ExtractConfig]): JSON extraction config + extract (Optional[JsonConfig]): Content extraction config + json_options (Optional[JsonConfig]): JSON extraction config actions (Optional[List[Union]]): Actions to perform agent (Optional[AgentOptions]): Agent configuration idempotency_key (Optional[str]): Unique key to prevent duplicate requests @@ -2706,8 +2706,8 @@ class AsyncFirecrawlApp(FirecrawlApp): remove_base64_images: Optional[bool] = None, block_ads: Optional[bool] = None, proxy: Optional[Literal["basic", "stealth"]] = None, - extract: Optional[ExtractConfig] = None, - json_options: Optional[ExtractConfig] = None, + extract: Optional[JsonConfig] = None, + json_options: Optional[JsonConfig] = None, actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None) -> ScrapeResponse[Any]: """ Scrape and extract content from a URL asynchronously. @@ -2726,8 +2726,8 @@ class AsyncFirecrawlApp(FirecrawlApp): remove_base64_images (Optional[bool]): Remove base64 images block_ads (Optional[bool]): Block ads proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth) - extract (Optional[ExtractConfig]): Content extraction settings - json_options (Optional[ExtractConfig]): JSON extraction settings + extract (Optional[JsonConfig]): Content extraction settings + json_options (Optional[JsonConfig]): JSON extraction settings actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform Returns: @@ -2820,8 +2820,8 @@ class AsyncFirecrawlApp(FirecrawlApp): remove_base64_images: Optional[bool] = None, block_ads: Optional[bool] = None, proxy: Optional[Literal["basic", "stealth"]] = None, - extract: Optional[ExtractConfig] = None, - json_options: Optional[ExtractConfig] = None, + extract: Optional[JsonConfig] = None, + json_options: Optional[JsonConfig] = None, actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None, agent: Optional[AgentOptions] = None, poll_interval: Optional[int] = 2, @@ -2846,8 +2846,8 @@ class AsyncFirecrawlApp(FirecrawlApp): remove_base64_images (Optional[bool]): Remove base64 encoded images block_ads (Optional[bool]): Block advertisements proxy (Optional[Literal]): Proxy type to use - extract (Optional[ExtractConfig]): Content extraction config - json_options (Optional[ExtractConfig]): JSON extraction config + extract (Optional[JsonConfig]): Content extraction config + json_options (Optional[JsonConfig]): JSON extraction config actions (Optional[List[Union]]): Actions to perform agent (Optional[AgentOptions]): Agent configuration poll_interval (Optional[int]): Seconds between status checks (default: 2) @@ -2949,8 +2949,8 @@ class AsyncFirecrawlApp(FirecrawlApp): remove_base64_images: Optional[bool] = None, block_ads: Optional[bool] = None, proxy: Optional[Literal["basic", "stealth"]] = None, - extract: Optional[ExtractConfig] = None, - json_options: Optional[ExtractConfig] = None, + extract: Optional[JsonConfig] = None, + json_options: Optional[JsonConfig] = None, actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None, agent: Optional[AgentOptions] = None, idempotency_key: Optional[str] = None, @@ -2974,8 +2974,8 @@ class AsyncFirecrawlApp(FirecrawlApp): remove_base64_images (Optional[bool]): Remove base64 encoded images block_ads (Optional[bool]): Block advertisements proxy (Optional[Literal]): Proxy type to use - extract (Optional[ExtractConfig]): Content extraction config - json_options (Optional[ExtractConfig]): JSON extraction config + extract (Optional[JsonConfig]): Content extraction config + json_options (Optional[JsonConfig]): JSON extraction config actions (Optional[List[Union]]): Actions to perform agent (Optional[AgentOptions]): Agent configuration idempotency_key (Optional[str]): Unique key to prevent duplicate requests