mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-04 11:24:40 +08:00
Nick: (#1506)
This commit is contained in:
parent
8053a7cedd
commit
a0a1675829
@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions # noqa
|
from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
||||||
|
|
||||||
__version__ = "2.4.3"
|
__version__ = "2.5.0"
|
||||||
|
|
||||||
# Define the logger for the Firecrawl project
|
# Define the logger for the Firecrawl project
|
||||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||||
|
@ -135,6 +135,12 @@ class WebhookConfig(pydantic.BaseModel):
|
|||||||
metadata: Optional[Dict[str, str]] = None
|
metadata: Optional[Dict[str, str]] = None
|
||||||
events: Optional[List[Literal["completed", "failed", "page", "started"]]] = None
|
events: Optional[List[Literal["completed", "failed", "page", "started"]]] = None
|
||||||
|
|
||||||
|
class ChangeTrackingOptions(pydantic.BaseModel):
|
||||||
|
"""Configuration for change tracking."""
|
||||||
|
modes: Optional[List[Literal["git-diff", "json"]]] = None
|
||||||
|
schema: Optional[Any] = None
|
||||||
|
prompt: Optional[str] = None
|
||||||
|
|
||||||
class ScrapeOptions(pydantic.BaseModel):
|
class ScrapeOptions(pydantic.BaseModel):
|
||||||
"""Parameters for scraping operations."""
|
"""Parameters for scraping operations."""
|
||||||
formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None
|
formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None
|
||||||
@ -150,6 +156,7 @@ class ScrapeOptions(pydantic.BaseModel):
|
|||||||
removeBase64Images: Optional[bool] = None
|
removeBase64Images: Optional[bool] = None
|
||||||
blockAds: Optional[bool] = None
|
blockAds: Optional[bool] = None
|
||||||
proxy: Optional[Literal["basic", "stealth"]] = None
|
proxy: Optional[Literal["basic", "stealth"]] = None
|
||||||
|
changeTrackingOptions: Optional[ChangeTrackingOptions] = None
|
||||||
|
|
||||||
class WaitAction(pydantic.BaseModel):
|
class WaitAction(pydantic.BaseModel):
|
||||||
"""Wait action to perform during scraping."""
|
"""Wait action to perform during scraping."""
|
||||||
@ -454,6 +461,7 @@ class FirecrawlApp:
|
|||||||
extract: Optional[JsonConfig] = None,
|
extract: Optional[JsonConfig] = None,
|
||||||
json_options: Optional[JsonConfig] = None,
|
json_options: Optional[JsonConfig] = None,
|
||||||
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
||||||
|
change_tracking_options: Optional[ChangeTrackingOptions] = None,
|
||||||
**kwargs) -> ScrapeResponse[Any]:
|
**kwargs) -> ScrapeResponse[Any]:
|
||||||
"""
|
"""
|
||||||
Scrape and extract content from a URL.
|
Scrape and extract content from a URL.
|
||||||
@ -475,6 +483,7 @@ class FirecrawlApp:
|
|||||||
extract (Optional[JsonConfig]): Content extraction settings
|
extract (Optional[JsonConfig]): Content extraction settings
|
||||||
json_options (Optional[JsonConfig]): JSON extraction settings
|
json_options (Optional[JsonConfig]): JSON extraction settings
|
||||||
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
||||||
|
change_tracking_options (Optional[ChangeTrackingOptions]): Change tracking settings
|
||||||
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -530,6 +539,9 @@ class FirecrawlApp:
|
|||||||
scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
|
scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
|
||||||
if actions:
|
if actions:
|
||||||
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
||||||
|
if change_tracking_options:
|
||||||
|
scrape_params['changeTrackingOptions'] = change_tracking_options.dict(exclude_none=True)
|
||||||
|
|
||||||
scrape_params.update(kwargs)
|
scrape_params.update(kwargs)
|
||||||
|
|
||||||
# Make request
|
# Make request
|
||||||
@ -2424,7 +2436,7 @@ class FirecrawlApp:
|
|||||||
method_params = {
|
method_params = {
|
||||||
"scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
|
"scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
|
||||||
"timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
|
"timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
|
||||||
"block_ads", "proxy", "extract", "json_options", "actions"},
|
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options"},
|
||||||
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options"},
|
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options"},
|
||||||
"crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
|
"crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
|
||||||
"allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
|
"allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user