Merge branch 'sdk-improv/async' of https://github.com/mendableai/firecrawl into sdk-improv/async

This commit is contained in:
rafaelmmiller 2025-04-18 01:21:29 -07:00
commit 55c04d615e
4 changed files with 283 additions and 246 deletions

View File

@ -1,53 +1,45 @@
import time from firecrawl.firecrawl import ExtractConfig, FirecrawlApp
import nest_asyncio
import uuid
from firecrawl.firecrawl import FirecrawlApp
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing import List from typing import List
import time
app = FirecrawlApp(api_url="https://api.firecrawl.dev")
app = FirecrawlApp(api_key="fc-") # # Scrape a website:
scrape_result = app.scrape_url('example.com', formats=["markdown", "html"])
# Scrape a website: print(scrape_result.markdown)
scrape_result = app.scrape_url('firecrawl.dev')
print(scrape_result['markdown'])
# Test batch scrape # # Test batch scrapeq
urls = ['https://example.com', 'https://docs.firecrawl.dev'] urls = ['https://example.com', 'https://docs.firecrawl.dev']
batch_scrape_params = {
'formats': ['markdown', 'html'],
}
# Synchronous batch scrape # Synchronous batch scrape
batch_result = app.batch_scrape_urls(urls, batch_scrape_params) batch_result = app.batch_scrape_urls(urls, formats=["markdown", "html"])
print("Synchronous Batch Scrape Result:") print("Synchronous Batch Scrape Result:")
print(batch_result['data'][0]['markdown']) print(batch_result.data[0].markdown)
# Asynchronous batch scrape # # Asynchronous batch scrape
async_batch_result = app.async_batch_scrape_urls(urls, batch_scrape_params) async_batch_result = app.async_batch_scrape_urls(urls, formats=["markdown", "html"])
print("\nAsynchronous Batch Scrape Result:") print("\nAsynchronous Batch Scrape Result:")
print(async_batch_result) print(async_batch_result)
# Crawl a website: # Crawl a website:
idempotency_key = str(uuid.uuid4()) # optional idempotency key crawl_result = app.crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
crawl_result = app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key) print(crawl_result.data[0].markdown)
print(crawl_result)
# Asynchronous Crawl a website: # # Asynchronous Crawl a website:
async_result = app.async_crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, "") async_result = app.async_crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
print(async_result) print(async_result)
crawl_status = app.check_crawl_status(async_result['id']) crawl_status = app.check_crawl_status(async_result.id)
print(crawl_status) print(crawl_status)
attempts = 15 attempts = 15
while attempts > 0 and crawl_status['status'] != 'completed': while attempts > 0 and crawl_status.status != 'completed':
print(crawl_status) print(crawl_status)
crawl_status = app.check_crawl_status(async_result['id']) crawl_status = app.check_crawl_status(async_result.id)
attempts -= 1 attempts -= 1
time.sleep(1) time.sleep(1)
crawl_status = app.check_crawl_status(async_result['id']) crawl_status = app.check_crawl_status(async_result.id)
print(crawl_status) print(crawl_status)
# LLM Extraction: # LLM Extraction:
@ -61,14 +53,11 @@ class ArticleSchema(BaseModel):
class TopArticlesSchema(BaseModel): class TopArticlesSchema(BaseModel):
top: List[ArticleSchema] = Field(..., description="Top 5 stories") top: List[ArticleSchema] = Field(..., description="Top 5 stories")
llm_extraction_result = app.scrape_url('https://news.ycombinator.com', { extract_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema())
'formats': ['extract'],
'extract': {
'schema': TopArticlesSchema.model_json_schema()
}
})
print(llm_extraction_result['extract']) llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)
print(llm_extraction_result.extract)
# # Define schema to extract contents into using json schema # # Define schema to extract contents into using json schema
json_schema = { json_schema = {
@ -94,24 +83,16 @@ json_schema = {
"required": ["top"] "required": ["top"]
} }
app2 = FirecrawlApp(api_key="fc-", version="v0") extract_config = ExtractConfig(extractionSchema=json_schema, mode="llm-extraction", pageOptions={"onlyMainContent": True})
llm_extraction_result = app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)
print(llm_extraction_result.extract)
llm_extraction_result = app2.scrape_url('https://news.ycombinator.com', {
'extractorOptions': {
'extractionSchema': json_schema,
'mode': 'llm-extraction'
},
'pageOptions':{
'onlyMainContent': True
}
})
# print(llm_extraction_result['llm_extraction']) # print(llm_extraction_result['llm_extraction'])
# Map a website: # Map a website:
map_result = app.map_url('https://firecrawl.dev', { 'search': 'blog' }) map_result = app.map_url('https://firecrawl.dev', search="blog")
print(map_result) print(map_result)
# Extract URLs: # Extract URLs:
@ -124,14 +105,12 @@ class ExtractSchema(BaseModel):
extract_schema = ExtractSchema.schema() extract_schema = ExtractSchema.schema()
# Perform the extraction # Perform the extraction
extract_result = app.extract(['https://firecrawl.dev'], { extract_result = app.extract(['https://firecrawl.dev'], prompt="Extract the title, description, and links from the website", schema=extract_schema)
'prompt': "Extract the title, description, and links from the website",
'schema': extract_schema
})
print(extract_result) print(extract_result)
# Crawl a website with WebSockets: # Crawl a website with WebSockets:
# inside an async function... # inside an async function...
import nest_asyncio
nest_asyncio.apply() nest_asyncio.apply()
# Define event handlers # Define event handlers

View File

@ -6,51 +6,47 @@ from firecrawl.firecrawl import AsyncFirecrawlApp
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing import List from typing import List
app = AsyncFirecrawlApp(api_key="fc-") app = AsyncFirecrawlApp(api_url="https://api.firecrawl.dev")
async def example_scrape(): async def example_scrape():
# Scrape a website: # Scrape a website:
scrape_result = await app.scrape_url('firecrawl.dev') scrape_result = await app.scrape_url('example.com', formats=["markdown", "html"])
print(scrape_result['markdown']) print(scrape_result.markdown)
async def example_batch_scrape(): async def example_batch_scrape():
# Batch scrape # Batch scrape
urls = ['https://example.com', 'https://docs.firecrawl.dev'] urls = ['https://example.com', 'https://docs.firecrawl.dev']
batch_scrape_params = {
'formats': ['markdown', 'html'],
}
# Synchronous batch scrape # Synchronous batch scrape
batch_result = await app.batch_scrape_urls(urls, batch_scrape_params) batch_result = await app.batch_scrape_urls(urls, formats=["markdown", "html"])
print("Synchronous Batch Scrape Result:") print("Synchronous Batch Scrape Result:")
print(batch_result['data'][0]['markdown']) print(batch_result.data[0].markdown)
# Asynchronous batch scrape # Asynchronous batch scrape
async_batch_result = await app.async_batch_scrape_urls(urls, batch_scrape_params) async_batch_result = await app.async_batch_scrape_urls(urls, formats=["markdown", "html"])
print("\nAsynchronous Batch Scrape Result:") print("\nAsynchronous Batch Scrape Result:")
print(async_batch_result) print(async_batch_result)
async def example_crawl(): async def example_crawl():
# Crawl a website: # Crawl a website:
idempotency_key = str(uuid.uuid4()) # optional idempotency key crawl_result = await app.crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
crawl_result = await app.crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, 2, idempotency_key) print(crawl_result.data[0].markdown)
print(crawl_result)
# Asynchronous Crawl a website: # Asynchronous Crawl a website:
async_result = await app.async_crawl_url('firecrawl.dev', {'excludePaths': ['blog/*']}, "") async_result = await app.async_crawl_url('firecrawl.dev', exclude_paths=['blog/*'])
print(async_result) print(async_result)
crawl_status = await app.check_crawl_status(async_result['id']) crawl_status = await app.check_crawl_status(async_result.id)
print(crawl_status) print(crawl_status)
attempts = 15 attempts = 15
while attempts > 0 and crawl_status['status'] != 'completed': while attempts > 0 and crawl_status.status != 'completed':
print(crawl_status) print(crawl_status)
crawl_status = await app.check_crawl_status(async_result['id']) crawl_status = await app.check_crawl_status(async_result.id)
attempts -= 1 attempts -= 1
await asyncio.sleep(1) # Use async sleep instead of time.sleep await asyncio.sleep(1) # Use async sleep instead of time.sleep
crawl_status = await app.check_crawl_status(async_result['id']) crawl_status = await app.check_crawl_status(async_result.id)
print(crawl_status) print(crawl_status)
async def example_llm_extraction(): async def example_llm_extraction():
@ -64,18 +60,15 @@ async def example_llm_extraction():
class TopArticlesSchema(BaseModel): class TopArticlesSchema(BaseModel):
top: List[ArticleSchema] = Field(..., description="Top 5 stories") top: List[ArticleSchema] = Field(..., description="Top 5 stories")
llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', { extract_config = ExtractConfig(schema=TopArticlesSchema.model_json_schema())
'formats': ['extract'],
'extract': {
'schema': TopArticlesSchema.model_json_schema()
}
})
print(llm_extraction_result['extract']) llm_extraction_result = await app.scrape_url('https://news.ycombinator.com', formats=["extract"], extract=extract_config)
print(llm_extraction_result.extract)
async def example_map_and_extract(): async def example_map_and_extract():
# Map a website: # Map a website:
map_result = await app.map_url('https://firecrawl.dev', { 'search': 'blog' }) map_result = await app.map_url('https://firecrawl.dev', search="blog")
print(map_result) print(map_result)
# Extract URLs: # Extract URLs:
@ -88,10 +81,7 @@ async def example_map_and_extract():
extract_schema = ExtractSchema.schema() extract_schema = ExtractSchema.schema()
# Perform the extraction # Perform the extraction
extract_result = await app.extract(['https://firecrawl.dev'], { extract_result = await app.extract(['https://firecrawl.dev'], prompt="Extract the title, description, and links from the website", schema=extract_schema)
'prompt': "Extract the title, description, and links from the website",
'schema': extract_schema
})
print(extract_result) print(extract_result)
# Define event handlers for websocket # Define event handlers for websocket

View File

@ -13,7 +13,7 @@ import os
from .firecrawl import FirecrawlApp # noqa from .firecrawl import FirecrawlApp # noqa
__version__ = "1.17.0" __version__ = "2.0.0"
# Define the logger for the Firecrawl project # Define the logger for the Firecrawl project
logger: logging.Logger = logging.getLogger("firecrawl") logger: logging.Logger = logging.getLogger("firecrawl")

View File

@ -3648,12 +3648,12 @@ class AsyncFirecrawlApp(FirecrawlApp):
job_id (str): The ID of the extraction job job_id (str): The ID of the extraction job
Returns: Returns:
ExtractResponse containing: ExtractResponse[Any] with:
* success (bool): Whether extraction completed successfully * success (bool): Whether request succeeded
* data (Any): Extracted structured data * data (Optional[Any]): Extracted data matching schema
* error (str, optional): Error message if extraction failed * error (Optional[str]): Error message if any
* warning (str, optional): Warning message if any * warning (Optional[str]): Warning message if any
* sources (List[str], optional): Source URLs if requested * sources (Optional[List[str]]): Source URLs if requested
Raises: Raises:
ValueError: If status check fails ValueError: If status check fails
@ -3669,54 +3669,67 @@ class AsyncFirecrawlApp(FirecrawlApp):
async def async_extract( async def async_extract(
self, self,
urls: List[str], urls: Optional[List[str]] = None,
params: Optional[ExtractParams] = None, *,
prompt: Optional[str] = None,
schema: Optional[Any] = None,
system_prompt: Optional[str] = None,
allow_external_links: Optional[bool] = False,
enable_web_search: Optional[bool] = False,
show_sources: Optional[bool] = False,
agent: Optional[Dict[str, Any]] = None,
idempotency_key: Optional[str] = None) -> ExtractResponse[Any]: idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
""" """
Initiate an asynchronous extraction job without waiting for completion. Initiate an asynchronous extraction job without waiting for completion.
Args: Args:
urls (List[str]): URLs to extract information from urls (Optional[List[str]]): URLs to extract from
params (Optional[ExtractParams]): See ExtractParams model: prompt (Optional[str]): Custom extraction prompt
Extraction Config: schema (Optional[Any]): JSON schema/Pydantic model
* prompt - Custom extraction prompt system_prompt (Optional[str]): System context
* schema - JSON schema/Pydantic model allow_external_links (Optional[bool]): Follow external links
* systemPrompt - System context enable_web_search (Optional[bool]): Enable web search
show_sources (Optional[bool]): Include source URLs
Behavior Options: agent (Optional[Dict[str, Any]]): Agent configuration
* allowExternalLinks - Follow external links
* enableWebSearch - Enable web search
* includeSubdomains - Include subdomains
* showSources - Include source URLs
Scraping Options:
* scrapeOptions - Page scraping config
idempotency_key (Optional[str]): Unique key to prevent duplicate requests idempotency_key (Optional[str]): Unique key to prevent duplicate requests
Returns: Returns:
ExtractResponse containing: ExtractResponse[Any] with:
* success (bool): Whether job started successfully * success (bool): Whether request succeeded
* id (str): Unique identifier for the job * data (Optional[Any]): Extracted data matching schema
* error (str, optional): Error message if start failed * error (Optional[str]): Error message if any
Raises: Raises:
ValueError: If job initiation fails ValueError: If job initiation fails
""" """
headers = self._prepare_headers(idempotency_key) headers = self._prepare_headers(idempotency_key)
schema = params.get('schema') if params else None if not prompt and not schema:
raise ValueError("Either prompt or schema is required")
if not urls and not prompt:
raise ValueError("Either urls or prompt is required")
if schema: if schema:
if hasattr(schema, 'model_json_schema'): if hasattr(schema, 'model_json_schema'):
schema = schema.model_json_schema() schema = schema.model_json_schema()
jsonData = {'urls': urls, **(params or {})}
request_data = { request_data = {
**jsonData, 'urls': urls or [],
'allowExternalLinks': params.get('allow_external_links', False) if params else False, 'allowExternalLinks': allow_external_links,
'enableWebSearch': enable_web_search,
'showSources': show_sources,
'schema': schema, 'schema': schema,
'origin': f'python-sdk@{version}' 'origin': f'python-sdk@{version}'
} }
if prompt:
request_data['prompt'] = prompt
if system_prompt:
request_data['systemPrompt'] = system_prompt
if agent:
request_data['agent'] = agent
try: try:
return await self._async_post_request( return await self._async_post_request(
f'{self.api_url}/v1/extract', f'{self.api_url}/v1/extract',
@ -3729,16 +3742,18 @@ class AsyncFirecrawlApp(FirecrawlApp):
async def generate_llms_text( async def generate_llms_text(
self, self,
url: str, url: str,
params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextStatusResponse: *,
max_urls: Optional[int] = None,
show_full_text: Optional[bool] = None,
experimental_stream: Optional[bool] = None) -> GenerateLLMsTextStatusResponse:
""" """
Generate LLMs.txt for a given URL and monitor until completion. Generate LLMs.txt for a given URL and monitor until completion.
Args: Args:
url (str): Target URL to generate LLMs.txt from url (str): Target URL to generate LLMs.txt from
params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): See GenerateLLMsTextParams model: max_urls (Optional[int]): Maximum URLs to process (default: 10)
Generation Options: show_full_text (Optional[bool]): Include full text in output (default: False)
* maxUrls - Maximum URLs to process (default: 10) experimental_stream (Optional[bool]): Enable experimental streaming
* showFullText - Include full text in output (default: False)
Returns: Returns:
GenerateLLMsTextStatusResponse containing: GenerateLLMsTextStatusResponse containing:
@ -3753,15 +3768,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
Raises: Raises:
Exception: If generation fails Exception: If generation fails
""" """
if params is None: params = {}
params = {} if max_urls is not None:
params['maxUrls'] = max_urls
if show_full_text is not None:
params['showFullText'] = show_full_text
if experimental_stream is not None:
params['__experimental_stream'] = experimental_stream
if isinstance(params, dict): response = await self.async_generate_llms_text(url, params)
generation_params = GenerateLLMsTextParams(**params)
else:
generation_params = params
response = await self.async_generate_llms_text(url, generation_params)
if not response.get('success') or 'id' not in response: if not response.get('success') or 'id' not in response:
return response return response
@ -3783,36 +3798,38 @@ class AsyncFirecrawlApp(FirecrawlApp):
async def async_generate_llms_text( async def async_generate_llms_text(
self, self,
url: str, url: str,
params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextResponse: *,
max_urls: Optional[int] = None,
show_full_text: Optional[bool] = None,
experimental_stream: Optional[bool] = None) -> GenerateLLMsTextResponse:
""" """
Initiate an asynchronous LLMs.txt generation job without waiting for completion. Initiate an asynchronous LLMs.txt generation job without waiting for completion.
Args: Args:
url (str): Target URL to generate LLMs.txt from url (str): Target URL to generate LLMs.txt from
params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): See GenerateLLMsTextParams model: max_urls (Optional[int]): Maximum URLs to process (default: 10)
Generation Options: show_full_text (Optional[bool]): Include full text in output (default: False)
* maxUrls - Maximum URLs to process (default: 10) experimental_stream (Optional[bool]): Enable experimental streaming
* showFullText - Include full text in output (default: False)
Returns: Returns:
GenerateLLMsTextResponse containing: GenerateLLMsTextResponse containing:
* success (bool): Whether job started successfully * success (bool): Whether job started successfully
* id (str): Unique identifier for the job * id (str): Unique identifier for the job
* error (str, optional): Error message if start failed * error (str, optional): Error message if start failed
Raises: Raises:
ValueError: If job initiation fails ValueError: If job initiation fails
""" """
if params is None: params = {}
params = {} if max_urls is not None:
params['maxUrls'] = max_urls
if isinstance(params, dict): if show_full_text is not None:
generation_params = GenerateLLMsTextParams(**params) params['showFullText'] = show_full_text
else: if experimental_stream is not None:
generation_params = params params['__experimental_stream'] = experimental_stream
headers = self._prepare_headers() headers = self._prepare_headers()
json_data = {'url': url, **generation_params.dict(exclude_none=True)} json_data = {'url': url, **params.dict(exclude_none=True)}
json_data['origin'] = f"python-sdk@{version}" json_data['origin'] = f"python-sdk@{version}"
try: try:
@ -3856,52 +3873,57 @@ class AsyncFirecrawlApp(FirecrawlApp):
async def deep_research( async def deep_research(
self, self,
query: str, query: str,
params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None, *,
max_depth: Optional[int] = None,
time_limit: Optional[int] = None,
max_urls: Optional[int] = None,
analysis_prompt: Optional[str] = None,
system_prompt: Optional[str] = None,
__experimental_stream_steps: Optional[bool] = None,
on_activity: Optional[Callable[[Dict[str, Any]], None]] = None, on_activity: Optional[Callable[[Dict[str, Any]], None]] = None,
on_source: Optional[Callable[[Dict[str, Any]], None]] = None) -> DeepResearchStatusResponse: on_source: Optional[Callable[[Dict[str, Any]], None]] = None) -> DeepResearchStatusResponse:
""" """
Initiates a deep research operation on a given query and polls until completion, providing real-time updates via callbacks. Initiates a deep research operation on a given query and polls until completion.
Args: Args:
query: Research query or topic to investigate query (str): Research query or topic to investigate
max_depth (Optional[int]): Maximum depth of research exploration
params: See DeepResearchParams model: time_limit (Optional[int]): Time limit in seconds for research
Research Settings: max_urls (Optional[int]): Maximum number of URLs to process
* maxDepth - Maximum research depth (default: 7) analysis_prompt (Optional[str]): Custom prompt for analysis
* timeLimit - Time limit in seconds (default: 270) system_prompt (Optional[str]): Custom system prompt
* maxUrls - Maximum URLs to process (default: 20) __experimental_stream_steps (Optional[bool]): Enable experimental streaming
on_activity (Optional[Callable]): Progress callback receiving {type, status, message, timestamp, depth}
Callbacks: on_source (Optional[Callable]): Source discovery callback receiving {url, title, description}
* on_activity - Progress callback receiving:
{type, status, message, timestamp, depth}
* on_source - Source discovery callback receiving:
{url, title, description}
Returns: Returns:
DeepResearchResponse containing: DeepResearchStatusResponse containing:
* success (bool): Whether research completed successfully
Status: * status (str): Current state (processing/completed/failed)
* success - Whether research completed successfully * error (Optional[str]): Error message if failed
* status - Current state (processing/completed/failed) * id (str): Unique identifier for the research job
* error - Error message if failed * data (Any): Research findings and analysis
* sources (List[Dict]): List of discovered sources
Results: * activities (List[Dict]): Research progress log
* id - Unique identifier for the research job * summaries (List[str]): Generated research summaries
* data - Research findings and analysis
* sources - List of discovered sources
* activities - Research progress log
* summaries - Generated research summaries
Raises: Raises:
Exception: If research fails Exception: If research fails
""" """
if params is None: research_params = {}
params = {} if max_depth is not None:
research_params['maxDepth'] = max_depth
if isinstance(params, dict): if time_limit is not None:
research_params = DeepResearchParams(**params) research_params['timeLimit'] = time_limit
else: if max_urls is not None:
research_params = params research_params['maxUrls'] = max_urls
if analysis_prompt is not None:
research_params['analysisPrompt'] = analysis_prompt
if system_prompt is not None:
research_params['systemPrompt'] = system_prompt
if __experimental_stream_steps is not None:
research_params['__experimental_streamSteps'] = __experimental_stream_steps
research_params = DeepResearchParams(**research_params)
response = await self.async_deep_research(query, research_params) response = await self.async_deep_research(query, research_params)
if not response.get('success') or 'id' not in response: if not response.get('success') or 'id' not in response:
@ -3940,38 +3962,54 @@ class AsyncFirecrawlApp(FirecrawlApp):
async def async_deep_research( async def async_deep_research(
self, self,
query: str, query: str,
params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> DeepResearchResponse: *,
max_depth: Optional[int] = None,
time_limit: Optional[int] = None,
max_urls: Optional[int] = None,
analysis_prompt: Optional[str] = None,
system_prompt: Optional[str] = None,
__experimental_stream_steps: Optional[bool] = None) -> Dict[str, Any]:
""" """
Initiate an asynchronous deep research job without waiting for completion. Initiates an asynchronous deep research operation.
Args: Args:
query (str): Research query or topic to investigate query (str): Research query or topic to investigate
params (Optional[Union[Dict[str, Any], DeepResearchParams]]): See DeepResearchParams model: max_depth (Optional[int]): Maximum depth of research exploration
Research Settings: time_limit (Optional[int]): Time limit in seconds for research
* maxDepth - Maximum research depth (default: 7) max_urls (Optional[int]): Maximum number of URLs to process
* timeLimit - Time limit in seconds (default: 270) analysis_prompt (Optional[str]): Custom prompt for analysis
* maxUrls - Maximum URLs to process (default: 20) system_prompt (Optional[str]): Custom system prompt
__experimental_stream_steps (Optional[bool]): Enable experimental streaming
Returns: Returns:
DeepResearchResponse containing: Dict[str, Any]: A response containing:
* success (bool): Whether job started successfully * success (bool): Whether the research initiation was successful
* id (str): Unique identifier for the job * id (str): The unique identifier for the research job
* error (str, optional): Error message if start failed * error (str, optional): Error message if initiation failed
Raises: Raises:
ValueError: If job initiation fails Exception: If the research initiation fails.
""" """
if params is None: research_params = {}
params = {} if max_depth is not None:
research_params['maxDepth'] = max_depth
if isinstance(params, dict): if time_limit is not None:
research_params = DeepResearchParams(**params) research_params['timeLimit'] = time_limit
else: if max_urls is not None:
research_params = params research_params['maxUrls'] = max_urls
if analysis_prompt is not None:
research_params['analysisPrompt'] = analysis_prompt
if system_prompt is not None:
research_params['systemPrompt'] = system_prompt
if __experimental_stream_steps is not None:
research_params['__experimental_streamSteps'] = __experimental_stream_steps
research_params = DeepResearchParams(**research_params)
headers = self._prepare_headers() headers = self._prepare_headers()
json_data = {'query': query, **research_params.dict(exclude_none=True)} json_data = {'query': query, **research_params.dict(exclude_none=True)}
json_data['origin'] = f"python-sdk@{version}" json_data['origin'] = f"python-sdk@{version}"
try: try:
return await self._async_post_request( return await self._async_post_request(
f'{self.api_url}/v1/deep-research', f'{self.api_url}/v1/deep-research',
@ -3983,26 +4021,28 @@ class AsyncFirecrawlApp(FirecrawlApp):
async def check_deep_research_status(self, id: str) -> DeepResearchStatusResponse: async def check_deep_research_status(self, id: str) -> DeepResearchStatusResponse:
""" """
Check the status of an asynchronous deep research job. Check the status of a deep research operation.
Args: Args:
id (str): The ID of the research job id (str): The ID of the deep research operation.
Returns: Returns:
DeepResearchStatusResponse containing: DeepResearchResponse containing:
* success (bool): Whether research completed successfully
* status (str): Current state (processing/completed/failed) Status:
* data (Dict[str, Any], optional): Research findings and analysis * success - Whether research completed successfully
* error (str, optional): Error message if failed * status - Current state (processing/completed/failed)
* expiresAt (str): When the research data expires * error - Error message if failed
* currentDepth (int): Current research depth
* maxDepth (int): Maximum research depth Results:
* activities (List[Dict[str, Any]]): Research progress log * id - Unique identifier for the research job
* sources (List[Dict[str, Any]]): Discovered sources * data - Research findings and analysis
* summaries (List[str]): Generated research summaries * sources - List of discovered sources
* activities - Research progress log
* summaries - Generated research summaries
Raises: Raises:
ValueError: If status check fails Exception: If the status check fails.
""" """
headers = self._prepare_headers() headers = self._prepare_headers()
try: try:
@ -4016,52 +4056,80 @@ class AsyncFirecrawlApp(FirecrawlApp):
async def search( async def search(
self, self,
query: str, query: str,
params: Optional[Union[Dict[str, Any], SearchParams]] = None) -> SearchResponse: *,
limit: Optional[int] = None,
tbs: Optional[str] = None,
filter: Optional[str] = None,
lang: Optional[str] = None,
country: Optional[str] = None,
location: Optional[str] = None,
timeout: Optional[int] = None,
scrape_options: Optional[CommonOptions] = None,
params: Optional[Union[Dict[str, Any], SearchParams]] = None,
**kwargs) -> SearchResponse:
""" """
Asynchronously search for content using Firecrawl. Asynchronously search for content using Firecrawl.
Args: Args:
query (str): Search query string query (str): Search query string
params (Optional[Union[Dict[str, Any], SearchParams]]): See SearchParams model: limit (Optional[int]): Max results (default: 5)
Search Options: tbs (Optional[str]): Time filter (e.g. "qdr:d")
* limit - Max results (default: 5) filter (Optional[str]): Custom result filter
* tbs - Time filter (e.g. "qdr:d") lang (Optional[str]): Language code (default: "en")
* filter - Custom result filter country (Optional[str]): Country code (default: "us")
location (Optional[str]): Geo-targeting
Localization: timeout (Optional[int]): Request timeout in milliseconds
* lang - Language code (default: "en") scrape_options (Optional[CommonOptions]): Result scraping configuration
* country - Country code (default: "us") params (Optional[Union[Dict[str, Any], SearchParams]]): Additional search parameters
* location - Geo-targeting **kwargs: Additional keyword arguments for future compatibility
Request Options:
* timeout - Request timeout (ms)
* scrapeOptions - Result scraping config
Returns: Returns:
SearchResponse containing: SearchResponse: Response containing:
* success (bool): Whether search completed successfully * success (bool): Whether request succeeded
* data (List[FirecrawlDocument]): Search results * data (List[FirecrawlDocument]): Search results
* warning (str, optional): Warning message if any * warning (Optional[str]): Warning message if any
* error (str, optional): Error message if search failed * error (Optional[str]): Error message if any
Raises: Raises:
Exception: If search fails Exception: If search fails or response cannot be parsed
""" """
if params is None: # Build search parameters
params = {} search_params = {}
if params:
if isinstance(params, dict):
search_params.update(params)
else:
search_params.update(params.dict(exclude_none=True))
if isinstance(params, dict): # Add individual parameters
search_params = SearchParams(query=query, **params) if limit is not None:
else: search_params['limit'] = limit
search_params = params if tbs is not None:
search_params.query = query search_params['tbs'] = tbs
if filter is not None:
search_params['filter'] = filter
if lang is not None:
search_params['lang'] = lang
if country is not None:
search_params['country'] = country
if location is not None:
search_params['location'] = location
if timeout is not None:
search_params['timeout'] = timeout
if scrape_options is not None:
search_params['scrapeOptions'] = scrape_options.dict(exclude_none=True)
# Add any additional kwargs
search_params.update(kwargs)
search_params_dict = search_params.dict(exclude_none=True) # Create final params object
search_params_dict['origin'] = f"python-sdk@{version}" final_params = SearchParams(query=query, **search_params)
params_dict = final_params.dict(exclude_none=True)
params_dict['origin'] = f"python-sdk@{version}"
return await self._async_post_request( return await self._async_post_request(
f"{self.api_url}/v1/search", f"{self.api_url}/v1/search",
search_params_dict, params_dict,
{"Authorization": f"Bearer {self.api_key}"} {"Authorization": f"Bearer {self.api_key}"}
) )