mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-08 01:49:04 +08:00
Nick: json config instead of extract config
This commit is contained in:
parent
9d0baec589
commit
a74b2dc59f
@ -60,7 +60,7 @@ async def example_llm_extraction():
|
||||
class TopArticlesSchema(BaseModel):
|
||||
top: List[ArticleSchema] = Field(..., description="Top 5 stories")
|
||||
|
||||
extract_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema())
|
||||
extract_config = JsonConfig(schema=TopArticlesSchema.model_json_schema())
|
||||
|
||||
llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)
|
||||
|
||||
|
@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
|
||||
import logging
|
||||
import os
|
||||
|
||||
from .firecrawl import FirecrawlApp, ExtractConfig # noqa
|
||||
from .firecrawl import FirecrawlApp, JsonConfig # noqa
|
||||
|
||||
__version__ = "2.0.1"
|
||||
__version__ = "2.0.2"
|
||||
|
||||
# Define the logger for the Firecrawl project
|
||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||
|
@ -27,7 +27,7 @@ from pydantic import Field
|
||||
# Suppress Pydantic warnings about attribute shadowing
|
||||
warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
|
||||
warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
|
||||
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractConfig\" shadows an attribute in parent \"BaseModel\"")
|
||||
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
|
||||
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
|
||||
|
||||
|
||||
@ -186,7 +186,7 @@ class ExtractAgent(pydantic.BaseModel):
|
||||
"""Configuration for the agent in extract operations."""
|
||||
model: Literal["FIRE-1"] = "FIRE-1"
|
||||
|
||||
class ExtractConfig(pydantic.BaseModel):
|
||||
class JsonConfig(pydantic.BaseModel):
|
||||
"""Configuration for extraction."""
|
||||
prompt: Optional[str] = None
|
||||
schema: Optional[Any] = None
|
||||
@ -195,8 +195,8 @@ class ExtractConfig(pydantic.BaseModel):
|
||||
|
||||
class ScrapeParams(CommonOptions):
|
||||
"""Parameters for scraping operations."""
|
||||
extract: Optional[ExtractConfig] = None
|
||||
jsonOptions: Optional[ExtractConfig] = None
|
||||
extract: Optional[JsonConfig] = None
|
||||
jsonOptions: Optional[JsonConfig] = None
|
||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None
|
||||
agent: Optional[AgentOptions] = None
|
||||
|
||||
@ -454,8 +454,8 @@ class FirecrawlApp:
|
||||
remove_base64_images: Optional[bool] = None,
|
||||
block_ads: Optional[bool] = None,
|
||||
proxy: Optional[Literal["basic", "stealth"]] = None,
|
||||
extract: Optional[ExtractConfig] = None,
|
||||
json_options: Optional[ExtractConfig] = None,
|
||||
extract: Optional[JsonConfig] = None,
|
||||
json_options: Optional[JsonConfig] = None,
|
||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
||||
**kwargs) -> ScrapeResponse[Any]:
|
||||
"""
|
||||
@ -475,8 +475,8 @@ class FirecrawlApp:
|
||||
remove_base64_images (Optional[bool]): Remove base64 images
|
||||
block_ads (Optional[bool]): Block ads
|
||||
proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth)
|
||||
extract (Optional[ExtractConfig]): Content extraction settings
|
||||
json_options (Optional[ExtractConfig]): JSON extraction settings
|
||||
extract (Optional[JsonConfig]): Content extraction settings
|
||||
json_options (Optional[JsonConfig]): JSON extraction settings
|
||||
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
||||
|
||||
|
||||
@ -1161,8 +1161,8 @@ class FirecrawlApp:
|
||||
remove_base64_images: Optional[bool] = None,
|
||||
block_ads: Optional[bool] = None,
|
||||
proxy: Optional[Literal["basic", "stealth"]] = None,
|
||||
extract: Optional[ExtractConfig] = None,
|
||||
json_options: Optional[ExtractConfig] = None,
|
||||
extract: Optional[JsonConfig] = None,
|
||||
json_options: Optional[JsonConfig] = None,
|
||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
||||
agent: Optional[AgentOptions] = None,
|
||||
poll_interval: Optional[int] = 2,
|
||||
@ -1187,8 +1187,8 @@ class FirecrawlApp:
|
||||
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
||||
block_ads (Optional[bool]): Block advertisements
|
||||
proxy (Optional[Literal]): Proxy type to use
|
||||
extract (Optional[ExtractConfig]): Content extraction config
|
||||
json_options (Optional[ExtractConfig]): JSON extraction config
|
||||
extract (Optional[JsonConfig]): Content extraction config
|
||||
json_options (Optional[JsonConfig]): JSON extraction config
|
||||
actions (Optional[List[Union]]): Actions to perform
|
||||
agent (Optional[AgentOptions]): Agent configuration
|
||||
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
||||
@ -1285,8 +1285,8 @@ class FirecrawlApp:
|
||||
remove_base64_images: Optional[bool] = None,
|
||||
block_ads: Optional[bool] = None,
|
||||
proxy: Optional[Literal["basic", "stealth"]] = None,
|
||||
extract: Optional[ExtractConfig] = None,
|
||||
json_options: Optional[ExtractConfig] = None,
|
||||
extract: Optional[JsonConfig] = None,
|
||||
json_options: Optional[JsonConfig] = None,
|
||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
||||
agent: Optional[AgentOptions] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
@ -1310,8 +1310,8 @@ class FirecrawlApp:
|
||||
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
||||
block_ads (Optional[bool]): Block advertisements
|
||||
proxy (Optional[Literal]): Proxy type to use
|
||||
extract (Optional[ExtractConfig]): Content extraction config
|
||||
json_options (Optional[ExtractConfig]): JSON extraction config
|
||||
extract (Optional[JsonConfig]): Content extraction config
|
||||
json_options (Optional[JsonConfig]): JSON extraction config
|
||||
actions (Optional[List[Union]]): Actions to perform
|
||||
agent (Optional[AgentOptions]): Agent configuration
|
||||
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
||||
@ -1407,8 +1407,8 @@ class FirecrawlApp:
|
||||
remove_base64_images: Optional[bool] = None,
|
||||
block_ads: Optional[bool] = None,
|
||||
proxy: Optional[Literal["basic", "stealth"]] = None,
|
||||
extract: Optional[ExtractConfig] = None,
|
||||
json_options: Optional[ExtractConfig] = None,
|
||||
extract: Optional[JsonConfig] = None,
|
||||
json_options: Optional[JsonConfig] = None,
|
||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
||||
agent: Optional[AgentOptions] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
@ -1432,8 +1432,8 @@ class FirecrawlApp:
|
||||
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
||||
block_ads (Optional[bool]): Block advertisements
|
||||
proxy (Optional[Literal]): Proxy type to use
|
||||
extract (Optional[ExtractConfig]): Content extraction config
|
||||
json_options (Optional[ExtractConfig]): JSON extraction config
|
||||
extract (Optional[JsonConfig]): Content extraction config
|
||||
json_options (Optional[JsonConfig]): JSON extraction config
|
||||
actions (Optional[List[Union]]): Actions to perform
|
||||
agent (Optional[AgentOptions]): Agent configuration
|
||||
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
||||
@ -2706,8 +2706,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
remove_base64_images: Optional[bool] = None,
|
||||
block_ads: Optional[bool] = None,
|
||||
proxy: Optional[Literal["basic", "stealth"]] = None,
|
||||
extract: Optional[ExtractConfig] = None,
|
||||
json_options: Optional[ExtractConfig] = None,
|
||||
extract: Optional[JsonConfig] = None,
|
||||
json_options: Optional[JsonConfig] = None,
|
||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None) -> ScrapeResponse[Any]:
|
||||
"""
|
||||
Scrape and extract content from a URL asynchronously.
|
||||
@ -2726,8 +2726,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
remove_base64_images (Optional[bool]): Remove base64 images
|
||||
block_ads (Optional[bool]): Block ads
|
||||
proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth)
|
||||
extract (Optional[ExtractConfig]): Content extraction settings
|
||||
json_options (Optional[ExtractConfig]): JSON extraction settings
|
||||
extract (Optional[JsonConfig]): Content extraction settings
|
||||
json_options (Optional[JsonConfig]): JSON extraction settings
|
||||
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
||||
|
||||
Returns:
|
||||
@ -2820,8 +2820,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
remove_base64_images: Optional[bool] = None,
|
||||
block_ads: Optional[bool] = None,
|
||||
proxy: Optional[Literal["basic", "stealth"]] = None,
|
||||
extract: Optional[ExtractConfig] = None,
|
||||
json_options: Optional[ExtractConfig] = None,
|
||||
extract: Optional[JsonConfig] = None,
|
||||
json_options: Optional[JsonConfig] = None,
|
||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
||||
agent: Optional[AgentOptions] = None,
|
||||
poll_interval: Optional[int] = 2,
|
||||
@ -2846,8 +2846,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
||||
block_ads (Optional[bool]): Block advertisements
|
||||
proxy (Optional[Literal]): Proxy type to use
|
||||
extract (Optional[ExtractConfig]): Content extraction config
|
||||
json_options (Optional[ExtractConfig]): JSON extraction config
|
||||
extract (Optional[JsonConfig]): Content extraction config
|
||||
json_options (Optional[JsonConfig]): JSON extraction config
|
||||
actions (Optional[List[Union]]): Actions to perform
|
||||
agent (Optional[AgentOptions]): Agent configuration
|
||||
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
||||
@ -2949,8 +2949,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
remove_base64_images: Optional[bool] = None,
|
||||
block_ads: Optional[bool] = None,
|
||||
proxy: Optional[Literal["basic", "stealth"]] = None,
|
||||
extract: Optional[ExtractConfig] = None,
|
||||
json_options: Optional[ExtractConfig] = None,
|
||||
extract: Optional[JsonConfig] = None,
|
||||
json_options: Optional[JsonConfig] = None,
|
||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
||||
agent: Optional[AgentOptions] = None,
|
||||
idempotency_key: Optional[str] = None,
|
||||
@ -2974,8 +2974,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
||||
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
||||
block_ads (Optional[bool]): Block advertisements
|
||||
proxy (Optional[Literal]): Proxy type to use
|
||||
extract (Optional[ExtractConfig]): Content extraction config
|
||||
json_options (Optional[ExtractConfig]): JSON extraction config
|
||||
extract (Optional[JsonConfig]): Content extraction config
|
||||
json_options (Optional[JsonConfig]): JSON extraction config
|
||||
actions (Optional[List[Union]]): Actions to perform
|
||||
agent (Optional[AgentOptions]): Agent configuration
|
||||
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
||||
|
Loading…
x
Reference in New Issue
Block a user