Fix deep_research method to convert camelCase to snake_case and add dot notation access

Co-Authored-By: Nicolas Camara <nicolascamara29@gmail.com>
This commit is contained in:
Devin AI 2025-05-09 15:45:32 +00:00 committed by rafaelmmiller
parent 06189b9646
commit fc366f948c
4 changed files with 147 additions and 19 deletions

View File

@ -13,7 +13,7 @@ import os
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
__version__ = "2.5.4" __version__ = "2.6.0"
# Define the logger for the Firecrawl project # Define the logger for the Firecrawl project
logger: logging.Logger = logging.getLogger("firecrawl") logger: logging.Logger = logging.getLogger("firecrawl")

View File

@ -23,6 +23,7 @@ import websockets
import aiohttp import aiohttp
import asyncio import asyncio
from pydantic import Field from pydantic import Field
from .utils import convert_dict_keys_to_snake_case, convert_to_dot_dict
# Suppress Pydantic warnings about attribute shadowing # Suppress Pydantic warnings about attribute shadowing
warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"") warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
@ -2270,8 +2271,9 @@ class FirecrawlApp:
* status (str): Current state (processing/completed/failed) * status (str): Current state (processing/completed/failed)
* error (Optional[str]): Error message if failed * error (Optional[str]): Error message if failed
* id (str): Unique identifier for the research job * id (str): Unique identifier for the research job
* data (Any): Research findings and analysis * data (Any): Research findings and analysis with dot notation access
* sources (List[Dict]): List of discovered sources * final_analysis (str): Final analysis of the research (converted from camelCase)
* sources (List[Dict]): List of discovered sources
* activities (List[Dict]): Research progress log * activities (List[Dict]): Research progress log
* summaries (List[str]): Generated research summaries * summaries (List[str]): Generated research summaries
@ -2301,38 +2303,41 @@ class FirecrawlApp:
analysis_prompt=analysis_prompt, analysis_prompt=analysis_prompt,
system_prompt=system_prompt system_prompt=system_prompt
) )
if not response.get('success') or 'id' not in response:
return response dot_dict_response = convert_to_dot_dict(response)
if not dot_dict_response.get('success') or 'id' not in dot_dict_response:
return dot_dict_response
job_id = response['id'] job_id = dot_dict_response.id
last_activity_count = 0 last_activity_count = 0
last_source_count = 0 last_source_count = 0
while True: while True:
status = self.check_deep_research_status(job_id) status = self.check_deep_research_status(job_id)
if on_activity and 'activities' in status: if on_activity and hasattr(status, 'activities'):
new_activities = status['activities'][last_activity_count:] new_activities = status.activities[last_activity_count:]
for activity in new_activities: for activity in new_activities:
on_activity(activity) on_activity(activity)
last_activity_count = len(status['activities']) last_activity_count = len(status.activities)
if on_source and 'sources' in status: if on_source and hasattr(status, 'sources'):
new_sources = status['sources'][last_source_count:] new_sources = status.sources[last_source_count:]
for source in new_sources: for source in new_sources:
on_source(source) on_source(source)
last_source_count = len(status['sources']) last_source_count = len(status.sources)
if status['status'] == 'completed': if status.status == 'completed':
return status return status
elif status['status'] == 'failed': elif status.status == 'failed':
raise Exception(f'Deep research failed. Error: {status.get("error")}') raise Exception(f'Deep research failed. Error: {status.get("error")}')
elif status['status'] != 'processing': elif status.status != 'processing':
break break
time.sleep(2) # Polling interval time.sleep(2) # Polling interval
return {'success': False, 'error': 'Deep research job terminated unexpectedly'} return convert_to_dot_dict({'success': False, 'error': 'Deep research job terminated unexpectedly'})
def async_deep_research( def async_deep_research(
self, self,
@ -2422,8 +2427,9 @@ class FirecrawlApp:
Results: Results:
* id - Unique identifier for the research job * id - Unique identifier for the research job
* data - Research findings and analysis * data - Research findings and analysis with dot notation access
* sources - List of discovered sources * final_analysis - Final analysis of the research (converted from camelCase)
* sources - List of discovered sources
* activities - Research progress log * activities - Research progress log
* summaries - Generated research summaries * summaries - Generated research summaries
@ -2435,7 +2441,13 @@ class FirecrawlApp:
response = self._get_request(f'{self.api_url}/v1/deep-research/{id}', headers) response = self._get_request(f'{self.api_url}/v1/deep-research/{id}', headers)
if response.status_code == 200: if response.status_code == 200:
try: try:
return response.json() json_response = response.json()
snake_case_response = convert_dict_keys_to_snake_case(json_response)
dot_dict_response = convert_to_dot_dict(snake_case_response)
return dot_dict_response
except: except:
raise Exception('Failed to parse Firecrawl response as JSON.') raise Exception('Failed to parse Firecrawl response as JSON.')
elif response.status_code == 404: elif response.status_code == 404:

View File

@ -0,0 +1,87 @@
"""
Utility functions for the Firecrawl SDK.
"""
import re
from typing import Any, Dict, List, Union
def camel_to_snake(name: str) -> str:
"""
Convert a camelCase string to snake_case.
Args:
name (str): The camelCase string to convert.
Returns:
str: The snake_case string.
"""
if not name:
return name
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
def convert_dict_keys_to_snake_case(data: Any) -> Any:
"""
Recursively convert all dictionary keys from camelCase to snake_case.
Args:
data (Any): The data to convert. Can be a dictionary, list, or primitive type.
Returns:
Any: The converted data with snake_case keys.
"""
if isinstance(data, dict):
return {camel_to_snake(k): convert_dict_keys_to_snake_case(v) for k, v in data.items()}
elif isinstance(data, list):
return [convert_dict_keys_to_snake_case(item) for item in data]
else:
return data
class DotDict(dict):
"""
A dictionary that supports dot notation access to its items.
Example:
>>> d = DotDict({'foo': 'bar'})
>>> d.foo
'bar'
>>> d['foo']
'bar'
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
for key, value in self.items():
if isinstance(value, dict):
self[key] = DotDict(value)
elif isinstance(value, list):
self[key] = [DotDict(item) if isinstance(item, dict) else item for item in value]
def __getattr__(self, key):
try:
return self[key]
except KeyError:
raise AttributeError(f"'DotDict' object has no attribute '{key}'")
def __setattr__(self, key, value):
self[key] = value
def convert_to_dot_dict(data: Union[Dict, List, Any]) -> Union[DotDict, List, Any]:
"""
Convert a dictionary or list of dictionaries to DotDict objects.
Args:
data (Union[Dict, List, Any]): The data to convert.
Returns:
Union[DotDict, List, Any]: The converted data with DotDict objects.
"""
if isinstance(data, dict):
return DotDict(data)
elif isinstance(data, list):
return [convert_to_dot_dict(item) for item in data]
else:
return data

View File

@ -0,0 +1,29 @@
"""
Test script for the deep_research method with the new camelCase to snake_case conversion.
"""
from firecrawl import FirecrawlApp
firecrawl = FirecrawlApp(api_key="your_api_key")
def on_activity(activity):
print(f"[{activity.type}] {activity.message}")
results = firecrawl.deep_research(
query="What are the latest developments in quantum computing?",
max_depth=5,
time_limit=180,
max_urls=15,
on_activity=on_activity
)
print(f"Final Analysis: {results.data.final_analysis}")
print(f"Sources: {len(results.data.sources)} references")
print("\nAll available fields in the response:")
print(f"Success: {results.success}")
print(f"Status: {results.status}")
print(f"Current Depth: {results.current_depth}")
print(f"Max Depth: {results.max_depth}")
print(f"Activities: {len(results.activities)} activities")
print(f"Summaries: {len(results.summaries)} summaries")