mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 10:48:59 +08:00
Update firecrawl.py
This commit is contained in:
parent
5e6e41ab17
commit
d8792d2301
@ -1608,47 +1608,45 @@ class FirecrawlApp:
|
|||||||
def extract(
|
def extract(
|
||||||
self,
|
self,
|
||||||
urls: Optional[List[str]] = None,
|
urls: Optional[List[str]] = None,
|
||||||
params: Optional[ExtractParams] = None) -> ExtractResponse[Any]:
|
*,
|
||||||
|
prompt: Optional[str] = None,
|
||||||
|
schema_: Optional[Any] = None,
|
||||||
|
system_prompt: Optional[str] = None,
|
||||||
|
allow_external_links: Optional[bool] = False,
|
||||||
|
enable_web_search: Optional[bool] = False,
|
||||||
|
show_sources: Optional[bool] = False,
|
||||||
|
agent: Optional[Dict[str, Any]] = None) -> ExtractResponse[Any]:
|
||||||
"""
|
"""
|
||||||
Extract structured information from URLs.
|
Extract structured information from URLs.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
urls: URLs to extract from
|
urls (Optional[List[str]]): URLs to extract from
|
||||||
|
prompt (Optional[str]): Custom extraction prompt
|
||||||
params: See ExtractParams model:
|
schema_ (Optional[Any]): JSON schema/Pydantic model
|
||||||
|
system_prompt (Optional[str]): System context
|
||||||
Extraction Config:
|
allow_external_links (Optional[bool]): Follow external links
|
||||||
* prompt - Custom extraction prompt
|
enable_web_search (Optional[bool]): Enable web search
|
||||||
* schema - JSON schema/Pydantic model
|
show_sources (Optional[bool]): Include source URLs
|
||||||
* systemPrompt - System context
|
agent (Optional[Dict[str, Any]]): Agent configuration
|
||||||
|
|
||||||
Behavior Options:
|
|
||||||
* allowExternalLinks - Follow external links
|
|
||||||
* enableWebSearch - Enable web search
|
|
||||||
* includeSubdomains - Include subdomains
|
|
||||||
* showSources - Include source URLs
|
|
||||||
|
|
||||||
Scraping Options:
|
|
||||||
* scrapeOptions - Page scraping config
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
ExtractResponse with:
|
ExtractResponse[Any] with:
|
||||||
* Structured data matching schema
|
* success (bool): Whether request succeeded
|
||||||
* Source information if requested
|
* data (Optional[Any]): Extracted data matching schema
|
||||||
* Success/error status
|
* error (Optional[str]): Error message if any
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If prompt/schema missing or extraction fails
|
ValueError: If prompt/schema missing or extraction fails
|
||||||
"""
|
"""
|
||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
|
|
||||||
if not params or (not params.get('prompt') and not params.get('schema')):
|
if not prompt and not schema_:
|
||||||
raise ValueError("Either prompt or schema is required")
|
raise ValueError("Either prompt or schema is required")
|
||||||
|
|
||||||
if not urls and not params.get('prompt'):
|
if not urls and not prompt:
|
||||||
raise ValueError("Either urls or prompt is required")
|
raise ValueError("Either urls or prompt is required")
|
||||||
|
|
||||||
schema = params.get('schema')
|
schema = schema_
|
||||||
if schema:
|
if schema:
|
||||||
if hasattr(schema, 'model_json_schema'):
|
if hasattr(schema, 'model_json_schema'):
|
||||||
# Convert Pydantic model to JSON schema
|
# Convert Pydantic model to JSON schema
|
||||||
@ -1656,26 +1654,22 @@ class FirecrawlApp:
|
|||||||
# Otherwise assume it's already a JSON schema dict
|
# Otherwise assume it's already a JSON schema dict
|
||||||
|
|
||||||
request_data = {
|
request_data = {
|
||||||
'urls': urls,
|
'urls': urls or [],
|
||||||
'allowExternalLinks': params.get('allow_external_links', params.get('allowExternalLinks', False)),
|
'allowExternalLinks': allow_external_links,
|
||||||
'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
|
'enableWebSearch': enable_web_search,
|
||||||
'showSources': params.get('show_sources', params.get('showSources', False)),
|
'showSources': show_sources,
|
||||||
'schema': schema,
|
'schema': schema,
|
||||||
'origin': f'python-sdk@{get_version()}'
|
'origin': f'python-sdk@{get_version()}'
|
||||||
}
|
}
|
||||||
|
|
||||||
if not request_data['urls']:
|
|
||||||
request_data['urls'] = []
|
|
||||||
# Only add prompt and systemPrompt if they exist
|
# Only add prompt and systemPrompt if they exist
|
||||||
if params.get('prompt'):
|
if prompt:
|
||||||
request_data['prompt'] = params['prompt']
|
request_data['prompt'] = prompt
|
||||||
if params.get('system_prompt'):
|
if system_prompt:
|
||||||
request_data['systemPrompt'] = params['system_prompt']
|
request_data['systemPrompt'] = system_prompt
|
||||||
elif params.get('systemPrompt'): # Check legacy field name
|
|
||||||
request_data['systemPrompt'] = params['systemPrompt']
|
|
||||||
|
|
||||||
if params.get('agent'):
|
if agent:
|
||||||
request_data['agent'] = params['agent']
|
request_data['agent'] = agent
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Send the initial extract request
|
# Send the initial extract request
|
||||||
@ -1706,7 +1700,7 @@ class FirecrawlApp:
|
|||||||
except:
|
except:
|
||||||
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
if status_data['status'] == 'completed':
|
if status_data['status'] == 'completed':
|
||||||
return status_data
|
return ExtractResponse(**status_data)
|
||||||
elif status_data['status'] in ['failed', 'cancelled']:
|
elif status_data['status'] in ['failed', 'cancelled']:
|
||||||
raise Exception(f'Extract job {status_data["status"]}. Error: {status_data["error"]}')
|
raise Exception(f'Extract job {status_data["status"]}. Error: {status_data["error"]}')
|
||||||
else:
|
else:
|
||||||
@ -1720,7 +1714,7 @@ class FirecrawlApp:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ValueError(str(e), 500)
|
raise ValueError(str(e), 500)
|
||||||
|
|
||||||
return {'success': False, 'error': "Internal server error."}
|
return ExtractResponse(success=False, error="Internal server error.")
|
||||||
|
|
||||||
def get_extract_status(self, job_id: str) -> ExtractResponse[Any]:
|
def get_extract_status(self, job_id: str) -> ExtractResponse[Any]:
|
||||||
"""
|
"""
|
||||||
@ -1740,7 +1734,7 @@ class FirecrawlApp:
|
|||||||
response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
|
response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
try:
|
try:
|
||||||
return response.json()
|
return ExtractResponse(**response.json())
|
||||||
except:
|
except:
|
||||||
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
else:
|
else:
|
||||||
@ -1751,60 +1745,68 @@ class FirecrawlApp:
|
|||||||
def async_extract(
|
def async_extract(
|
||||||
self,
|
self,
|
||||||
urls: List[str],
|
urls: List[str],
|
||||||
params: Optional[ExtractParams] = None,
|
*,
|
||||||
|
prompt: Optional[str] = None,
|
||||||
|
schema_: Optional[Any] = None,
|
||||||
|
system_prompt: Optional[str] = None,
|
||||||
|
allow_external_links: Optional[bool] = False,
|
||||||
|
enable_web_search: Optional[bool] = False,
|
||||||
|
show_sources: Optional[bool] = False,
|
||||||
|
agent: Optional[Dict[str, Any]] = None,
|
||||||
idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
|
idempotency_key: Optional[str] = None) -> ExtractResponse[Any]:
|
||||||
"""
|
"""
|
||||||
Initiate an asynchronous extract job.
|
Initiate an asynchronous extract job.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
urls (List[str]): URLs to extract information from
|
urls (List[str]): URLs to extract information from
|
||||||
params (Optional[ExtractParams]): See ExtractParams model:
|
prompt (Optional[str]): Custom extraction prompt
|
||||||
Extraction Config:
|
schema_ (Optional[Any]): JSON schema/Pydantic model
|
||||||
* prompt - Custom extraction prompt
|
system_prompt (Optional[str]): System context
|
||||||
* schema - JSON schema/Pydantic model
|
allow_external_links (Optional[bool]): Follow external links
|
||||||
* systemPrompt - System context
|
enable_web_search (Optional[bool]): Enable web search
|
||||||
|
show_sources (Optional[bool]): Include source URLs
|
||||||
Behavior Options:
|
agent (Optional[Dict[str, Any]]): Agent configuration
|
||||||
* allowExternalLinks - Follow external links
|
|
||||||
* enableWebSearch - Enable web search
|
|
||||||
* includeSubdomains - Include subdomains
|
|
||||||
* showSources - Include source URLs
|
|
||||||
|
|
||||||
Scraping Options:
|
|
||||||
* scrapeOptions - Page scraping config
|
|
||||||
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
ExtractResponse containing:
|
ExtractResponse[Any] with:
|
||||||
* success (bool): Whether job started successfully
|
* success (bool): Whether request succeeded
|
||||||
* id (str): Unique identifier for the job
|
* data (Optional[Any]): Extracted data matching schema
|
||||||
* error (str, optional): Error message if start failed
|
* error (Optional[str]): Error message if any
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If job initiation fails
|
ValueError: If job initiation fails
|
||||||
"""
|
"""
|
||||||
headers = self._prepare_headers(idempotency_key)
|
headers = self._prepare_headers(idempotency_key)
|
||||||
|
|
||||||
schema = params.get('schema') if params else None
|
schema = schema_
|
||||||
if schema:
|
if schema:
|
||||||
if hasattr(schema, 'model_json_schema'):
|
if hasattr(schema, 'model_json_schema'):
|
||||||
# Convert Pydantic model to JSON schema
|
# Convert Pydantic model to JSON schema
|
||||||
schema = schema.model_json_schema()
|
schema = schema.model_json_schema()
|
||||||
# Otherwise assume it's already a JSON schema dict
|
# Otherwise assume it's already a JSON schema dict
|
||||||
|
|
||||||
jsonData = {'urls': urls, **(params or {})}
|
|
||||||
request_data = {
|
request_data = {
|
||||||
**jsonData,
|
'urls': urls,
|
||||||
'allowExternalLinks': params.get('allow_external_links', False) if params else False,
|
'allowExternalLinks': allow_external_links,
|
||||||
|
'enableWebSearch': enable_web_search,
|
||||||
|
'showSources': show_sources,
|
||||||
'schema': schema,
|
'schema': schema,
|
||||||
'origin': f'python-sdk@{version}'
|
'origin': f'python-sdk@{version}'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if prompt:
|
||||||
|
request_data['prompt'] = prompt
|
||||||
|
if system_prompt:
|
||||||
|
request_data['systemPrompt'] = system_prompt
|
||||||
|
if agent:
|
||||||
|
request_data['agent'] = agent
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
|
response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
try:
|
try:
|
||||||
return response.json()
|
return ExtractResponse(**response.json())
|
||||||
except:
|
except:
|
||||||
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
||||||
else:
|
else:
|
||||||
@ -1815,21 +1817,18 @@ class FirecrawlApp:
|
|||||||
def generate_llms_text(
|
def generate_llms_text(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextStatusResponse:
|
*,
|
||||||
|
max_urls: Optional[int] = None,
|
||||||
|
show_full_text: Optional[bool] = None,
|
||||||
|
experimental_stream: Optional[bool] = None) -> GenerateLLMsTextStatusResponse:
|
||||||
"""
|
"""
|
||||||
Generate LLMs.txt for a given URL and poll until completion.
|
Generate LLMs.txt for a given URL and poll until completion.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url: Target URL to generate LLMs.txt from
|
url (str): Target URL to generate LLMs.txt from
|
||||||
|
max_urls (Optional[int]): Maximum URLs to process (default: 10)
|
||||||
params: See GenerateLLMsTextParams model:
|
show_full_text (Optional[bool]): Include full text in output (default: False)
|
||||||
params: See GenerateLLMsTextParams model:
|
experimental_stream (Optional[bool]): Enable experimental streaming
|
||||||
|
|
||||||
params: See GenerateLLMsTextParams model:
|
|
||||||
|
|
||||||
Generation Options:
|
|
||||||
* maxUrls - Maximum URLs to process (default: 10)
|
|
||||||
* showFullText - Include full text in output (default: False)
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
GenerateLLMsTextStatusResponse with:
|
GenerateLLMsTextStatusResponse with:
|
||||||
@ -1841,15 +1840,13 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If generation fails
|
Exception: If generation fails
|
||||||
"""
|
"""
|
||||||
if params is None:
|
params = GenerateLLMsTextParams(
|
||||||
params = {}
|
maxUrls=max_urls,
|
||||||
|
showFullText=show_full_text,
|
||||||
|
__experimental_stream=experimental_stream
|
||||||
|
)
|
||||||
|
|
||||||
if isinstance(params, dict):
|
response = self.async_generate_llms_text(url, params)
|
||||||
generation_params = GenerateLLMsTextParams(**params)
|
|
||||||
else:
|
|
||||||
generation_params = params
|
|
||||||
|
|
||||||
response = self.async_generate_llms_text(url, generation_params)
|
|
||||||
if not response.get('success') or 'id' not in response:
|
if not response.get('success') or 'id' not in response:
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@ -1871,35 +1868,36 @@ class FirecrawlApp:
|
|||||||
def async_generate_llms_text(
|
def async_generate_llms_text(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> GenerateLLMsTextResponse:
|
*,
|
||||||
|
max_urls: Optional[int] = None,
|
||||||
|
show_full_text: Optional[bool] = None,
|
||||||
|
experimental_stream: Optional[bool] = None) -> GenerateLLMsTextResponse:
|
||||||
"""
|
"""
|
||||||
Initiate an asynchronous LLMs.txt generation operation.
|
Initiate an asynchronous LLMs.txt generation operation.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url (str): The target URL to generate LLMs.txt from. Must be a valid HTTP/HTTPS URL.
|
url (str): The target URL to generate LLMs.txt from. Must be a valid HTTP/HTTPS URL.
|
||||||
params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): Generation configuration parameters:
|
max_urls (Optional[int]): Maximum URLs to process (default: 10)
|
||||||
* maxUrls (int, optional): Maximum number of URLs to process (default: 10)
|
show_full_text (Optional[bool]): Include full text in output (default: False)
|
||||||
* showFullText (bool, optional): Include full text in output (default: False)
|
experimental_stream (Optional[bool]): Enable experimental streaming
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
GenerateLLMsTextResponse: A response containing:
|
GenerateLLMsTextResponse: A response containing:
|
||||||
- success (bool): Whether the generation initiation was successful
|
* success (bool): Whether the generation initiation was successful
|
||||||
- id (str): The unique identifier for the generation job
|
* id (str): The unique identifier for the generation job
|
||||||
- error (str, optional): Error message if initiation failed
|
* error (str, optional): Error message if initiation failed
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
Exception: If the generation job initiation fails.
|
Exception: If the generation job initiation fails.
|
||||||
"""
|
"""
|
||||||
if params is None:
|
params = GenerateLLMsTextParams(
|
||||||
params = {}
|
maxUrls=max_urls,
|
||||||
|
showFullText=show_full_text,
|
||||||
if isinstance(params, dict):
|
__experimental_stream=experimental_stream
|
||||||
generation_params = GenerateLLMsTextParams(**params)
|
)
|
||||||
else:
|
|
||||||
generation_params = params
|
|
||||||
|
|
||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
json_data = {'url': url, **generation_params.dict(exclude_none=True)}
|
json_data = {'url': url, **params.dict(exclude_none=True)}
|
||||||
json_data['origin'] = f"python-sdk@{version}"
|
json_data['origin'] = f"python-sdk@{version}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -2172,52 +2170,57 @@ class FirecrawlApp:
|
|||||||
def deep_research(
|
def deep_research(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None,
|
*,
|
||||||
|
max_depth: Optional[int] = None,
|
||||||
|
time_limit: Optional[int] = None,
|
||||||
|
max_urls: Optional[int] = None,
|
||||||
|
analysis_prompt: Optional[str] = None,
|
||||||
|
system_prompt: Optional[str] = None,
|
||||||
|
__experimental_stream_steps: Optional[bool] = None,
|
||||||
on_activity: Optional[Callable[[Dict[str, Any]], None]] = None,
|
on_activity: Optional[Callable[[Dict[str, Any]], None]] = None,
|
||||||
on_source: Optional[Callable[[Dict[str, Any]], None]] = None) -> DeepResearchStatusResponse:
|
on_source: Optional[Callable[[Dict[str, Any]], None]] = None) -> DeepResearchStatusResponse:
|
||||||
"""
|
"""
|
||||||
Initiates a deep research operation on a given query and polls until completion.
|
Initiates a deep research operation on a given query and polls until completion.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: Research query or topic to investigate
|
query (str): Research query or topic to investigate
|
||||||
|
max_depth (Optional[int]): Maximum depth of research exploration
|
||||||
params: See DeepResearchParams model:
|
time_limit (Optional[int]): Time limit in seconds for research
|
||||||
Research Settings:
|
max_urls (Optional[int]): Maximum number of URLs to process
|
||||||
* maxDepth - Maximum research depth (default: 7)
|
analysis_prompt (Optional[str]): Custom prompt for analysis
|
||||||
* timeLimit - Time limit in seconds (default: 270)
|
system_prompt (Optional[str]): Custom system prompt
|
||||||
* maxUrls - Maximum URLs to process (default: 20)
|
__experimental_stream_steps (Optional[bool]): Enable experimental streaming
|
||||||
|
on_activity (Optional[Callable]): Progress callback receiving {type, status, message, timestamp, depth}
|
||||||
Callbacks:
|
on_source (Optional[Callable]): Source discovery callback receiving {url, title, description}
|
||||||
* on_activity - Progress callback receiving:
|
|
||||||
{type, status, message, timestamp, depth}
|
|
||||||
* on_source - Source discovery callback receiving:
|
|
||||||
{url, title, description}
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DeepResearchResponse containing:
|
DeepResearchStatusResponse containing:
|
||||||
|
* success (bool): Whether research completed successfully
|
||||||
Status:
|
* status (str): Current state (processing/completed/failed)
|
||||||
* success - Whether research completed successfully
|
* error (Optional[str]): Error message if failed
|
||||||
* status - Current state (processing/completed/failed)
|
* id (str): Unique identifier for the research job
|
||||||
* error - Error message if failed
|
* data (Any): Research findings and analysis
|
||||||
|
* sources (List[Dict]): List of discovered sources
|
||||||
Results:
|
* activities (List[Dict]): Research progress log
|
||||||
* id - Unique identifier for the research job
|
* summaries (List[str]): Generated research summaries
|
||||||
* data - Research findings and analysis
|
|
||||||
* sources - List of discovered sources
|
|
||||||
* activities - Research progress log
|
|
||||||
* summaries - Generated research summaries
|
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
Exception: If research fails
|
Exception: If research fails
|
||||||
"""
|
"""
|
||||||
if params is None:
|
research_params = {}
|
||||||
params = {}
|
if max_depth is not None:
|
||||||
|
research_params['maxDepth'] = max_depth
|
||||||
if isinstance(params, dict):
|
if time_limit is not None:
|
||||||
research_params = DeepResearchParams(**params)
|
research_params['timeLimit'] = time_limit
|
||||||
else:
|
if max_urls is not None:
|
||||||
research_params = params
|
research_params['maxUrls'] = max_urls
|
||||||
|
if analysis_prompt is not None:
|
||||||
|
research_params['analysisPrompt'] = analysis_prompt
|
||||||
|
if system_prompt is not None:
|
||||||
|
research_params['systemPrompt'] = system_prompt
|
||||||
|
if __experimental_stream_steps is not None:
|
||||||
|
research_params['__experimental_streamSteps'] = __experimental_stream_steps
|
||||||
|
research_params = DeepResearchParams(**research_params)
|
||||||
|
|
||||||
response = self.async_deep_research(query, research_params)
|
response = self.async_deep_research(query, research_params)
|
||||||
if not response.get('success') or 'id' not in response:
|
if not response.get('success') or 'id' not in response:
|
||||||
@ -2253,19 +2256,30 @@ class FirecrawlApp:
|
|||||||
|
|
||||||
return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
|
return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
|
||||||
|
|
||||||
def async_deep_research(self, query: str, params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> Dict[str, Any]:
|
def async_deep_research(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
*,
|
||||||
|
max_depth: Optional[int] = None,
|
||||||
|
time_limit: Optional[int] = None,
|
||||||
|
max_urls: Optional[int] = None,
|
||||||
|
analysis_prompt: Optional[str] = None,
|
||||||
|
system_prompt: Optional[str] = None,
|
||||||
|
__experimental_stream_steps: Optional[bool] = None) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Initiates an asynchronous deep research operation.
|
Initiates an asynchronous deep research operation.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query (str): The research query to investigate. Should be a clear, specific question or topic.
|
query (str): Research query or topic to investigate
|
||||||
params (Optional[Union[Dict[str, Any], DeepResearchParams]]): Research configuration parameters:
|
max_depth (Optional[int]): Maximum depth of research exploration
|
||||||
* maxDepth (int, optional): Maximum depth of research exploration (default: 7)
|
time_limit (Optional[int]): Time limit in seconds for research
|
||||||
* timeLimit (int, optional): Time limit in seconds for research (default: 270)
|
max_urls (Optional[int]): Maximum number of URLs to process
|
||||||
* maxUrls (int, optional): Maximum number of URLs to process (default: 20)
|
analysis_prompt (Optional[str]): Custom prompt for analysis
|
||||||
|
system_prompt (Optional[str]): Custom system prompt
|
||||||
|
__experimental_stream_steps (Optional[bool]): Enable experimental streaming
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DeepResearchResponse: A response containing:
|
Dict[str, Any]: A response containing:
|
||||||
* success (bool): Whether the research initiation was successful
|
* success (bool): Whether the research initiation was successful
|
||||||
* id (str): The unique identifier for the research job
|
* id (str): The unique identifier for the research job
|
||||||
* error (str, optional): Error message if initiation failed
|
* error (str, optional): Error message if initiation failed
|
||||||
@ -2273,13 +2287,20 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If the research initiation fails.
|
Exception: If the research initiation fails.
|
||||||
"""
|
"""
|
||||||
if params is None:
|
research_params = {}
|
||||||
params = {}
|
if max_depth is not None:
|
||||||
|
research_params['maxDepth'] = max_depth
|
||||||
if isinstance(params, dict):
|
if time_limit is not None:
|
||||||
research_params = DeepResearchParams(**params)
|
research_params['timeLimit'] = time_limit
|
||||||
else:
|
if max_urls is not None:
|
||||||
research_params = params
|
research_params['maxUrls'] = max_urls
|
||||||
|
if analysis_prompt is not None:
|
||||||
|
research_params['analysisPrompt'] = analysis_prompt
|
||||||
|
if system_prompt is not None:
|
||||||
|
research_params['systemPrompt'] = system_prompt
|
||||||
|
if __experimental_stream_steps is not None:
|
||||||
|
research_params['__experimental_streamSteps'] = __experimental_stream_steps
|
||||||
|
research_params = DeepResearchParams(**research_params)
|
||||||
|
|
||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user