diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py index ae4258f7..705d2e0c 100644 --- a/apps/python-sdk/example.py +++ b/apps/python-sdk/example.py @@ -1,53 +1,45 @@ -import time -import nest_asyncio -import uuid -from firecrawl.firecrawl import FirecrawlApp +from firecrawl.firecrawl import ExtractConfig, FirecrawlApp from pydantic import BaseModel, Field from typing import List +import time +app = FirecrawlApp(api_url="https://api.firecrawl.dev") -app = FirecrawlApp(api_key="fc-") - -# Scrape a website: -scrape_result = app.scrape_url('firecrawl.dev') -print(scrape_result['markdown']) +# # Scrape a website: +scrape_result = app.scrape_url('example.com', formats=["markdown", "html"]) +print(scrape_result.markdown) -# Test batch scrape +# # Test batch scrapeq urls = ['https://example.com', 'https://docs.firecrawl.dev'] -batch_scrape_params = { - 'formats': ['markdown', 'html'], -} - # Synchronous batch scrape -batch_result = app.batch_scrape_urls(urls, batch_scrape_params) +batch_result = app.batch_scrape_urls(urls, formats=["markdown", "html"]) print("Synchronous Batch Scrape Result:") -print(batch_result['data'][0]['markdown']) +print(batch_result.data[0].markdown) -# Asynchronous batch scrape -async_batch_result = app.async_batch_scrape_urls(urls, batch_scrape_params) +# # Asynchronous batch scrape +async_batch_result = app.async_batch_scrape_urls(urls, formats=["markdown", "html"]) print("\nAsynchronous Batch Scrape Result:") print(async_batch_result) # Crawl a website: -idempotency_key = str(uuid.uuid4()) # optional idempotency key -crawl_result = app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key) -print(crawl_result) +crawl_result = app.crawl_url('firecrawl.dev', exclude_paths=['blog/*']) +print(crawl_result.data[0].markdown) -# Asynchronous Crawl a website: -async_result = app.async_crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, "") +# # Asynchronous Crawl a website: +async_result = app.async_crawl_url('firecrawl.dev', exclude_paths=['blog/*']) print(async_result) -crawl_status = app.check_crawl_status(async_result['id']) +crawl_status = app.check_crawl_status(async_result.id) print(crawl_status) attempts = 15 -while attempts > 0 and crawl_status['status'] != 'completed': +while attempts > 0 and crawl_status.status != 'completed': print(crawl_status) - crawl_status = app.check_crawl_status(async_result['id']) + crawl_status = app.check_crawl_status(async_result.id) attempts -= 1 time.sleep(1) -crawl_status = app.check_crawl_status(async_result['id']) +crawl_status = app.check_crawl_status(async_result.id) print(crawl_status) # LLM Extraction: @@ -61,14 +53,11 @@ class ArticleSchema(BaseModel): class TopArticlesSchema(BaseModel): top: List[ArticleSchema] = Field(..., description="Top 5 stories") -llm_extraction_result = app.scrape_url('https://news.ycombinator.com', { - 'formats': ['extract'], - 'extract': { - 'schema': TopArticlesSchema.model_json_schema() - } -}) +extract_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema()) -print(llm_extraction_result['extract']) +llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config) + +print(llm_extraction_result.extract) # # Define schema to extract contents into using json schema json_schema = { @@ -94,24 +83,16 @@ json_schema = { "required": ["top"] } -app2 = FirecrawlApp(api_key="fc-", version="v0") +extract_config = ExtractConfig(extractionSchema=json_schema, mode="llm-extraction", pageOptions={"onlyMainContent": True}) +llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config) - -llm_extraction_result = app2.scrape_url('https://news.ycombinator.com', { - 'extractorOptions': { - 'extractionSchema': json_schema, - 'mode': 'llm-extraction' - }, - 'pageOptions':{ - 'onlyMainContent': True - } -}) +print(llm_extraction_result.extract) # print(llm_extraction_result['llm_extraction']) # Map a website: -map_result = app.map_url('https://firecrawl.dev', { 'search': 'blog' }) +map_result = app.map_url('https://firecrawl.dev', search="blog") print(map_result) # Extract URLs: @@ -124,14 +105,12 @@ class ExtractSchema(BaseModel): extract_schema = ExtractSchema.schema() # Perform the extraction -extract_result = app.extract(['https://firecrawl.dev'], { - 'prompt': "Extract the title, description, and links from the website", - 'schema': extract_schema -}) +extract_result = app.extract(['https://firecrawl.dev'], prompt="Extract the title, description, and links from the website", schema=extract_schema) print(extract_result) # Crawl a website with WebSockets: # inside an async function... +import nest_asyncio nest_asyncio.apply() # Define event handlers diff --git a/apps/python-sdk/example_async.py b/apps/python-sdk/example_async.py index d5251515..c554d695 100644 --- a/apps/python-sdk/example_async.py +++ b/apps/python-sdk/example_async.py @@ -6,51 +6,47 @@ from firecrawl.firecrawl import AsyncFirecrawlApp from pydantic import BaseModel, Field from typing import List -app = AsyncFirecrawlApp(api_key="fc-") +app = AsyncFirecrawlApp(api_url="https://api.firecrawl.dev") async def example_scrape(): # Scrape a website: - scrape_result = await app.scrape_url('firecrawl.dev') - print(scrape_result['markdown']) + scrape_result = await app.scrape_url('example.com', formats=["markdown", "html"]) + print(scrape_result.markdown) async def example_batch_scrape(): # Batch scrape urls = ['https://example.com', 'https://docs.firecrawl.dev'] - batch_scrape_params = { - 'formats': ['markdown', 'html'], - } # Synchronous batch scrape - batch_result = await app.batch_scrape_urls(urls, batch_scrape_params) + batch_result = await app.batch_scrape_urls(urls, formats=["markdown", "html"]) print("Synchronous Batch Scrape Result:") - print(batch_result['data'][0]['markdown']) + print(batch_result.data[0].markdown) # Asynchronous batch scrape - async_batch_result = await app.async_batch_scrape_urls(urls, batch_scrape_params) + async_batch_result = await app.async_batch_scrape_urls(urls, formats=["markdown", "html"]) print("\nAsynchronous Batch Scrape Result:") print(async_batch_result) async def example_crawl(): # Crawl a website: - idempotency_key = str(uuid.uuid4()) # optional idempotency key - crawl_result = await app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key) - print(crawl_result) + crawl_result = await app.crawl_url('firecrawl.dev', exclude_paths=['blog/*']) + print(crawl_result.data[0].markdown) # Asynchronous Crawl a website: - async_result = await app.async_crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, "") + async_result = await app.async_crawl_url('firecrawl.dev', exclude_paths=['blog/*']) print(async_result) - crawl_status = await app.check_crawl_status(async_result['id']) + crawl_status = await app.check_crawl_status(async_result.id) print(crawl_status) attempts = 15 - while attempts > 0 and crawl_status['status'] != 'completed': + while attempts > 0 and crawl_status.status != 'completed': print(crawl_status) - crawl_status = await app.check_crawl_status(async_result['id']) + crawl_status = await app.check_crawl_status(async_result.id) attempts -= 1 await asyncio.sleep(1) # Use async sleep instead of time.sleep - crawl_status = await app.check_crawl_status(async_result['id']) + crawl_status = await app.check_crawl_status(async_result.id) print(crawl_status) async def example_llm_extraction(): @@ -64,18 +60,15 @@ async def example_llm_extraction(): class TopArticlesSchema(BaseModel): top: List[ArticleSchema] = Field(..., description="Top 5 stories") - llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', { - 'formats': ['extract'], - 'extract': { - 'schema': TopArticlesSchema.model_json_schema() - } - }) + extract_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema()) - print(llm_extraction_result['extract']) + llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config) + + print(llm_extraction_result.extract) async def example_map_and_extract(): # Map a website: - map_result = await app.map_url('https://firecrawl.dev', { 'search': 'blog' }) + map_result = await app.map_url('https://firecrawl.dev', search="blog") print(map_result) # Extract URLs: @@ -88,10 +81,7 @@ async def example_map_and_extract(): extract_schema = ExtractSchema.schema() # Perform the extraction - extract_result = await app.extract(['https://firecrawl.dev'], { - 'prompt': "Extract the title, description, and links from the website", - 'schema': extract_schema - }) + extract_result = await app.extract(['https://firecrawl.dev'], prompt="Extract the title, description, and links from the website", schema=extract_schema) print(extract_result) # Define event handlers for websocket diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index c30ba0fb..10431768 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ import os from .firecrawl import FirecrawlApp # noqa -__version__ = "1.17.0" +__version__ = "2.0.0" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index d622a7ce..896b89ab 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -3648,12 +3648,12 @@ class AsyncFirecrawlApp(FirecrawlApp): job_id (str): The ID of the extraction job Returns: - ExtractResponse containing: - * success (bool): Whether extraction completed successfully - * data (Any): Extracted structured data - * error (str, optional): Error message if extraction failed - * warning (str, optional): Warning message if any - * sources (List[str], optional): Source URLs if requested + ExtractResponse[Any] with: + * success (bool): Whether request succeeded + * data (Optional[Any]): Extracted data matching schema + * error (Optional[str]): Error message if any + * warning (Optional[str]): Warning message if any + * sources (Optional[List[str]]): Source URLs if requested Raises: ValueError: If status check fails @@ -3669,54 +3669,67 @@ class AsyncFirecrawlApp(FirecrawlApp): async def async_extract( self, - urls: List[str], - params: Optional[ExtractParams] = None, + urls: Optional[List[str]] = None, + *, + prompt: Optional[str] = None, + schema: Optional[Any] = None, + system_prompt: Optional[str] = None, + allow_external_links: Optional[bool] = False, + enable_web_search: Optional[bool] = False, + show_sources: Optional[bool] = False, + agent: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> ExtractResponse[Any]: """ Initiate an asynchronous extraction job without waiting for completion. Args: - urls (List[str]): URLs to extract information from - params (Optional[ExtractParams]): See ExtractParams model: - Extraction Config: - * prompt - Custom extraction prompt - * schema - JSON schema/Pydantic model - * systemPrompt - System context - - Behavior Options: - * allowExternalLinks - Follow external links - * enableWebSearch - Enable web search - * includeSubdomains - Include subdomains - * showSources - Include source URLs - - Scraping Options: - * scrapeOptions - Page scraping config + urls (Optional[List[str]]): URLs to extract from + prompt (Optional[str]): Custom extraction prompt + schema (Optional[Any]): JSON schema/Pydantic model + system_prompt (Optional[str]): System context + allow_external_links (Optional[bool]): Follow external links + enable_web_search (Optional[bool]): Enable web search + show_sources (Optional[bool]): Include source URLs + agent (Optional[Dict[str, Any]]): Agent configuration idempotency_key (Optional[str]): Unique key to prevent duplicate requests Returns: - ExtractResponse containing: - * success (bool): Whether job started successfully - * id (str): Unique identifier for the job - * error (str, optional): Error message if start failed + ExtractResponse[Any] with: + * success (bool): Whether request succeeded + * data (Optional[Any]): Extracted data matching schema + * error (Optional[str]): Error message if any Raises: - ValueError: If job initiation fails + ValueError: If job initiation fails """ headers = self._prepare_headers(idempotency_key) - - schema = params.get('schema') if params else None + + if not prompt and not schema: + raise ValueError("Either prompt or schema is required") + + if not urls and not prompt: + raise ValueError("Either urls or prompt is required") + if schema: if hasattr(schema, 'model_json_schema'): schema = schema.model_json_schema() - jsonData = {'urls': urls, **(params or {})} request_data = { - **jsonData, - 'allowExternalLinks': params.get('allow_external_links', False) if params else False, + 'urls': urls or [], + 'allowExternalLinks': allow_external_links, + 'enableWebSearch': enable_web_search, + 'showSources': show_sources, 'schema': schema, 'origin': f'python-sdk@{version}' } + if prompt: + request_data['prompt'] = prompt + if system_prompt: + request_data['systemPrompt'] = system_prompt + if agent: + request_data['agent'] = agent + try: return await self._async_post_request( f'{self.api_url}/v1/extract', @@ -3729,16 +3742,18 @@ class AsyncFirecrawlApp(FirecrawlApp): async def generate_llms_text( self, url: str, - params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextStatusResponse: + *, + max_urls: Optional[int] = None, + show_full_text: Optional[bool] = None, + experimental_stream: Optional[bool] = None) -> GenerateLLMsTextStatusResponse: """ Generate LLMs.txt for a given URL and monitor until completion. Args: url (str): Target URL to generate LLMs.txt from - params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): See GenerateLLMsTextParams model: - Generation Options: - * maxUrls - Maximum URLs to process (default: 10) - * showFullText - Include full text in output (default: False) + max_urls (Optional[int]): Maximum URLs to process (default: 10) + show_full_text (Optional[bool]): Include full text in output (default: False) + experimental_stream (Optional[bool]): Enable experimental streaming Returns: GenerateLLMsTextStatusResponse containing: @@ -3753,15 +3768,15 @@ class AsyncFirecrawlApp(FirecrawlApp): Raises: Exception: If generation fails """ - if params is None: - params = {} + params = {} + if max_urls is not None: + params['maxUrls'] = max_urls + if show_full_text is not None: + params['showFullText'] = show_full_text + if experimental_stream is not None: + params['__experimental_stream'] = experimental_stream - if isinstance(params, dict): - generation_params = GenerateLLMsTextParams(**params) - else: - generation_params = params - - response = await self.async_generate_llms_text(url, generation_params) + response = await self.async_generate_llms_text(url, params) if not response.get('success') or 'id' not in response: return response @@ -3783,36 +3798,38 @@ class AsyncFirecrawlApp(FirecrawlApp): async def async_generate_llms_text( self, url: str, - params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextResponse: + *, + max_urls: Optional[int] = None, + show_full_text: Optional[bool] = None, + experimental_stream: Optional[bool] = None) -> GenerateLLMsTextResponse: """ Initiate an asynchronous LLMs.txt generation job without waiting for completion. Args: - url (str): Target URL to generate LLMs.txt from - params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): See GenerateLLMsTextParams model: - Generation Options: - * maxUrls - Maximum URLs to process (default: 10) - * showFullText - Include full text in output (default: False) + url (str): Target URL to generate LLMs.txt from + max_urls (Optional[int]): Maximum URLs to process (default: 10) + show_full_text (Optional[bool]): Include full text in output (default: False) + experimental_stream (Optional[bool]): Enable experimental streaming Returns: - GenerateLLMsTextResponse containing: - * success (bool): Whether job started successfully - * id (str): Unique identifier for the job - * error (str, optional): Error message if start failed + GenerateLLMsTextResponse containing: + * success (bool): Whether job started successfully + * id (str): Unique identifier for the job + * error (str, optional): Error message if start failed Raises: - ValueError: If job initiation fails + ValueError: If job initiation fails """ - if params is None: - params = {} - - if isinstance(params, dict): - generation_params = GenerateLLMsTextParams(**params) - else: - generation_params = params + params = {} + if max_urls is not None: + params['maxUrls'] = max_urls + if show_full_text is not None: + params['showFullText'] = show_full_text + if experimental_stream is not None: + params['__experimental_stream'] = experimental_stream headers = self._prepare_headers() - json_data = {'url': url, **generation_params.dict(exclude_none=True)} + json_data = {'url': url, **params.dict(exclude_none=True)} json_data['origin'] = f"python-sdk@{version}" try: @@ -3856,52 +3873,57 @@ class AsyncFirecrawlApp(FirecrawlApp): async def deep_research( self, query: str, - params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None, + *, + max_depth: Optional[int] = None, + time_limit: Optional[int] = None, + max_urls: Optional[int] = None, + analysis_prompt: Optional[str] = None, + system_prompt: Optional[str] = None, + __experimental_stream_steps: Optional[bool] = None, on_activity: Optional[Callable[[Dict[str, Any]], None]] = None, on_source: Optional[Callable[[Dict[str, Any]], None]] = None) -> DeepResearchStatusResponse: """ - Initiates a deep research operation on a given query and polls until completion, providing real-time updates via callbacks. + Initiates a deep research operation on a given query and polls until completion. Args: - query: Research query or topic to investigate - - params: See DeepResearchParams model: - Research Settings: - * maxDepth - Maximum research depth (default: 7) - * timeLimit - Time limit in seconds (default: 270) - * maxUrls - Maximum URLs to process (default: 20) - - Callbacks: - * on_activity - Progress callback receiving: - {type, status, message, timestamp, depth} - * on_source - Source discovery callback receiving: - {url, title, description} + query (str): Research query or topic to investigate + max_depth (Optional[int]): Maximum depth of research exploration + time_limit (Optional[int]): Time limit in seconds for research + max_urls (Optional[int]): Maximum number of URLs to process + analysis_prompt (Optional[str]): Custom prompt for analysis + system_prompt (Optional[str]): Custom system prompt + __experimental_stream_steps (Optional[bool]): Enable experimental streaming + on_activity (Optional[Callable]): Progress callback receiving {type, status, message, timestamp, depth} + on_source (Optional[Callable]): Source discovery callback receiving {url, title, description} Returns: - DeepResearchResponse containing: - - Status: - * success - Whether research completed successfully - * status - Current state (processing/completed/failed) - * error - Error message if failed - - Results: - * id - Unique identifier for the research job - * data - Research findings and analysis - * sources - List of discovered sources - * activities - Research progress log - * summaries - Generated research summaries + DeepResearchStatusResponse containing: + * success (bool): Whether research completed successfully + * status (str): Current state (processing/completed/failed) + * error (Optional[str]): Error message if failed + * id (str): Unique identifier for the research job + * data (Any): Research findings and analysis + * sources (List[Dict]): List of discovered sources + * activities (List[Dict]): Research progress log + * summaries (List[str]): Generated research summaries Raises: - Exception: If research fails + Exception: If research fails """ - if params is None: - params = {} - - if isinstance(params, dict): - research_params = DeepResearchParams(**params) - else: - research_params = params + research_params = {} + if max_depth is not None: + research_params['maxDepth'] = max_depth + if time_limit is not None: + research_params['timeLimit'] = time_limit + if max_urls is not None: + research_params['maxUrls'] = max_urls + if analysis_prompt is not None: + research_params['analysisPrompt'] = analysis_prompt + if system_prompt is not None: + research_params['systemPrompt'] = system_prompt + if __experimental_stream_steps is not None: + research_params['__experimental_streamSteps'] = __experimental_stream_steps + research_params = DeepResearchParams(**research_params) response = await self.async_deep_research(query, research_params) if not response.get('success') or 'id' not in response: @@ -3940,38 +3962,54 @@ class AsyncFirecrawlApp(FirecrawlApp): async def async_deep_research( self, query: str, - params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> DeepResearchResponse: + *, + max_depth: Optional[int] = None, + time_limit: Optional[int] = None, + max_urls: Optional[int] = None, + analysis_prompt: Optional[str] = None, + system_prompt: Optional[str] = None, + __experimental_stream_steps: Optional[bool] = None) -> Dict[str, Any]: """ - Initiate an asynchronous deep research job without waiting for completion. + Initiates an asynchronous deep research operation. Args: query (str): Research query or topic to investigate - params (Optional[Union[Dict[str, Any], DeepResearchParams]]): See DeepResearchParams model: - Research Settings: - * maxDepth - Maximum research depth (default: 7) - * timeLimit - Time limit in seconds (default: 270) - * maxUrls - Maximum URLs to process (default: 20) + max_depth (Optional[int]): Maximum depth of research exploration + time_limit (Optional[int]): Time limit in seconds for research + max_urls (Optional[int]): Maximum number of URLs to process + analysis_prompt (Optional[str]): Custom prompt for analysis + system_prompt (Optional[str]): Custom system prompt + __experimental_stream_steps (Optional[bool]): Enable experimental streaming Returns: - DeepResearchResponse containing: - * success (bool): Whether job started successfully - * id (str): Unique identifier for the job - * error (str, optional): Error message if start failed + Dict[str, Any]: A response containing: + * success (bool): Whether the research initiation was successful + * id (str): The unique identifier for the research job + * error (str, optional): Error message if initiation failed Raises: - ValueError: If job initiation fails + Exception: If the research initiation fails. """ - if params is None: - params = {} - - if isinstance(params, dict): - research_params = DeepResearchParams(**params) - else: - research_params = params + research_params = {} + if max_depth is not None: + research_params['maxDepth'] = max_depth + if time_limit is not None: + research_params['timeLimit'] = time_limit + if max_urls is not None: + research_params['maxUrls'] = max_urls + if analysis_prompt is not None: + research_params['analysisPrompt'] = analysis_prompt + if system_prompt is not None: + research_params['systemPrompt'] = system_prompt + if __experimental_stream_steps is not None: + research_params['__experimental_streamSteps'] = __experimental_stream_steps + research_params = DeepResearchParams(**research_params) headers = self._prepare_headers() + json_data = {'query': query, **research_params.dict(exclude_none=True)} json_data['origin'] = f"python-sdk@{version}" + try: return await self._async_post_request( f'{self.api_url}/v1/deep-research', @@ -3983,26 +4021,28 @@ class AsyncFirecrawlApp(FirecrawlApp): async def check_deep_research_status(self, id: str) -> DeepResearchStatusResponse: """ - Check the status of an asynchronous deep research job. + Check the status of a deep research operation. Args: - id (str): The ID of the research job + id (str): The ID of the deep research operation. Returns: - DeepResearchStatusResponse containing: - * success (bool): Whether research completed successfully - * status (str): Current state (processing/completed/failed) - * data (Dict[str, Any], optional): Research findings and analysis - * error (str, optional): Error message if failed - * expiresAt (str): When the research data expires - * currentDepth (int): Current research depth - * maxDepth (int): Maximum research depth - * activities (List[Dict[str, Any]]): Research progress log - * sources (List[Dict[str, Any]]): Discovered sources - * summaries (List[str]): Generated research summaries + DeepResearchResponse containing: + + Status: + * success - Whether research completed successfully + * status - Current state (processing/completed/failed) + * error - Error message if failed + + Results: + * id - Unique identifier for the research job + * data - Research findings and analysis + * sources - List of discovered sources + * activities - Research progress log + * summaries - Generated research summaries Raises: - ValueError: If status check fails + Exception: If the status check fails. """ headers = self._prepare_headers() try: @@ -4016,52 +4056,80 @@ class AsyncFirecrawlApp(FirecrawlApp): async def search( self, query: str, - params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> SearchResponse: + *, + limit: Optional[int] = None, + tbs: Optional[str] = None, + filter: Optional[str] = None, + lang: Optional[str] = None, + country: Optional[str] = None, + location: Optional[str] = None, + timeout: Optional[int] = None, + scrape_options: Optional[CommonOptions] = None, + params: Optional[Union[Dict[str, Any], SearchParams]] = None, + **kwargs) -> SearchResponse: """ Asynchronously search for content using Firecrawl. Args: - query (str): Search query string - params (Optional[Union[Dict[str, Any], SearchParams]]): See SearchParams model: - Search Options: - * limit - Max results (default: 5) - * tbs - Time filter (e.g. "qdr:d") - * filter - Custom result filter - - Localization: - * lang - Language code (default: "en") - * country - Country code (default: "us") - * location - Geo-targeting - - Request Options: - * timeout - Request timeout (ms) - * scrapeOptions - Result scraping config + query (str): Search query string + limit (Optional[int]): Max results (default: 5) + tbs (Optional[str]): Time filter (e.g. "qdr:d") + filter (Optional[str]): Custom result filter + lang (Optional[str]): Language code (default: "en") + country (Optional[str]): Country code (default: "us") + location (Optional[str]): Geo-targeting + timeout (Optional[int]): Request timeout in milliseconds + scrape_options (Optional[CommonOptions]): Result scraping configuration + params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters + **kwargs: Additional keyword arguments for future compatibility Returns: - SearchResponse containing: - * success (bool): Whether search completed successfully - * data (List[FirecrawlDocument]): Search results - * warning (str, optional): Warning message if any - * error (str, optional): Error message if search failed + SearchResponse: Response containing: + * success (bool): Whether request succeeded + * data (List[FirecrawlDocument]): Search results + * warning (Optional[str]): Warning message if any + * error (Optional[str]): Error message if any Raises: - Exception: If search fails + Exception: If search fails or response cannot be parsed """ - if params is None: - params = {} + # Build search parameters + search_params = {} + if params: + if isinstance(params, dict): + search_params.update(params) + else: + search_params.update(params.dict(exclude_none=True)) - if isinstance(params, dict): - search_params = SearchParams(query=query, **params) - else: - search_params = params - search_params.query = query + # Add individual parameters + if limit is not None: + search_params['limit'] = limit + if tbs is not None: + search_params['tbs'] = tbs + if filter is not None: + search_params['filter'] = filter + if lang is not None: + search_params['lang'] = lang + if country is not None: + search_params['country'] = country + if location is not None: + search_params['location'] = location + if timeout is not None: + search_params['timeout'] = timeout + if scrape_options is not None: + search_params['scrapeOptions'] = scrape_options.dict(exclude_none=True) + + # Add any additional kwargs + search_params.update(kwargs) - search_params_dict = search_params.dict(exclude_none=True) - search_params_dict['origin'] = f"python-sdk@{version}" + # Create final params object + final_params = SearchParams(query=query, **search_params) + params_dict = final_params.dict(exclude_none=True) + params_dict['origin'] = f"python-sdk@{version}" return await self._async_post_request( f"{self.api_url}/v1/search", - search_params_dict, + params_dict, {"Authorization": f"Bearer {self.api_key}"} )