Fix deep_research method to convert camelCase to snake_case and add dot notation access

Co-Authored-By: Nicolas Camara <nicolascamara29@gmail.com>
2025-08-15 00:45:52 +08:00 · 2025-05-09 15:45:32 +00:00 · 2025-05-09 15:45:32 +00:00 · fc366f948c
commit fc366f948c
parent 06189b9646
4 changed files with 147 additions and 19 deletions
--- a/apps/python-sdk/firecrawl/init.py
+++ b/apps/python-sdk/firecrawl/init.py
@ -13,7 +13,7 @@ import os

 from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa

-__version__ = "2.5.4"
+__version__ = "2.6.0"

 # Define the logger for the Firecrawl project
 logger: logging.Logger = logging.getLogger("firecrawl")
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@ -23,6 +23,7 @@ import websockets
 import aiohttp
 import asyncio
 from pydantic import Field
+from .utils import convert_dict_keys_to_snake_case, convert_to_dot_dict

 # Suppress Pydantic warnings about attribute shadowing
 warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
@ -2270,8 +2271,9 @@ class FirecrawlApp:
            * status (str): Current state (processing/completed/failed)
            * error (Optional[str]): Error message if failed
            * id (str): Unique identifier for the research job
-            * data (Any): Research findings and analysis
-            * sources (List[Dict]): List of discovered sources
+            * data (Any): Research findings and analysis with dot notation access
+                * final_analysis (str): Final analysis of the research (converted from camelCase)
+                * sources (List[Dict]): List of discovered sources
            * activities (List[Dict]): Research progress log
            * summaries (List[str]): Generated research summaries

@ -2301,38 +2303,41 @@ class FirecrawlApp:
            analysis_prompt=analysis_prompt,
            system_prompt=system_prompt
        )
-        if not response.get('success') or 'id' not in response:
-            return response
        
-        job_id = response['id']
+        dot_dict_response = convert_to_dot_dict(response)
+        
+        if not dot_dict_response.get('success') or 'id' not in dot_dict_response:
+            return dot_dict_response
+
+        job_id = dot_dict_response.id
        last_activity_count = 0
        last_source_count = 0

        while True:
            status = self.check_deep_research_status(job_id)
            
-            if on_activity and 'activities' in status:
-                new_activities = status['activities'][last_activity_count:]
+            if on_activity and hasattr(status, 'activities'):
+                new_activities = status.activities[last_activity_count:]
                for activity in new_activities:
                    on_activity(activity)
-                last_activity_count = len(status['activities'])
+                last_activity_count = len(status.activities)
            
-            if on_source and 'sources' in status:
-                new_sources = status['sources'][last_source_count:]
+            if on_source and hasattr(status, 'sources'):
+                new_sources = status.sources[last_source_count:]
                for source in new_sources:
                    on_source(source)
-                last_source_count = len(status['sources'])
+                last_source_count = len(status.sources)
            
-            if status['status'] == 'completed':
+            if status.status == 'completed':
                return status
-            elif status['status'] == 'failed':
+            elif status.status == 'failed':
                raise Exception(f'Deep research failed. Error: {status.get("error")}')
-            elif status['status'] != 'processing':
+            elif status.status != 'processing':
                break

            time.sleep(2)  # Polling interval

-        return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
+        return convert_to_dot_dict({'success': False, 'error': 'Deep research job terminated unexpectedly'})

    def async_deep_research(
            self,
@ -2422,8 +2427,9 @@ class FirecrawlApp:
            
            Results:
            * id - Unique identifier for the research job
-            * data - Research findings and analysis
-            * sources - List of discovered sources
+            * data - Research findings and analysis with dot notation access
+                * final_analysis - Final analysis of the research (converted from camelCase)
+                * sources - List of discovered sources
            * activities - Research progress log
            * summaries - Generated research summaries

@ -2435,7 +2441,13 @@ class FirecrawlApp:
            response = self._get_request(f'{self.api_url}/v1/deep-research/{id}', headers)
            if response.status_code == 200:
                try:
-                    return response.json()
+                    json_response = response.json()
+                    
+                    snake_case_response = convert_dict_keys_to_snake_case(json_response)
+                    
+                    dot_dict_response = convert_to_dot_dict(snake_case_response)
+                    
+                    return dot_dict_response
                except:
                    raise Exception('Failed to parse Firecrawl response as JSON.')
            elif response.status_code == 404:
--- a/apps/python-sdk/firecrawl/utils.py
+++ b/apps/python-sdk/firecrawl/utils.py
@ -0,0 +1,87 @@
+"""
+Utility functions for the Firecrawl SDK.
+"""
+import re
+from typing import Any, Dict, List, Union
+
+
+def camel_to_snake(name: str) -> str:
+    """
+    Convert a camelCase string to snake_case.
+    
+    Args:
+        name (str): The camelCase string to convert.
+        
+    Returns:
+        str: The snake_case string.
+    """
+    if not name:
+        return name
+        
+    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
+    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
+
+
+def convert_dict_keys_to_snake_case(data: Any) -> Any:
+    """
+    Recursively convert all dictionary keys from camelCase to snake_case.
+    
+    Args:
+        data (Any): The data to convert. Can be a dictionary, list, or primitive type.
+        
+    Returns:
+        Any: The converted data with snake_case keys.
+    """
+    if isinstance(data, dict):
+        return {camel_to_snake(k): convert_dict_keys_to_snake_case(v) for k, v in data.items()}
+    elif isinstance(data, list):
+        return [convert_dict_keys_to_snake_case(item) for item in data]
+    else:
+        return data
+
+
+class DotDict(dict):
+    """
+    A dictionary that supports dot notation access to its items.
+    
+    Example:
+        >>> d = DotDict({'foo': 'bar'})
+        >>> d.foo
+        'bar'
+        >>> d['foo']
+        'bar'
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        for key, value in self.items():
+            if isinstance(value, dict):
+                self[key] = DotDict(value)
+            elif isinstance(value, list):
+                self[key] = [DotDict(item) if isinstance(item, dict) else item for item in value]
+    
+    def __getattr__(self, key):
+        try:
+            return self[key]
+        except KeyError:
+            raise AttributeError(f"'DotDict' object has no attribute '{key}'")
+    
+    def __setattr__(self, key, value):
+        self[key] = value
+
+
+def convert_to_dot_dict(data: Union[Dict, List, Any]) -> Union[DotDict, List, Any]:
+    """
+    Convert a dictionary or list of dictionaries to DotDict objects.
+    
+    Args:
+        data (Union[Dict, List, Any]): The data to convert.
+        
+    Returns:
+        Union[DotDict, List, Any]: The converted data with DotDict objects.
+    """
+    if isinstance(data, dict):
+        return DotDict(data)
+    elif isinstance(data, list):
+        return [convert_to_dot_dict(item) for item in data]
+    else:
+        return data
--- a/apps/python-sdk/test_deep_research.py
+++ b/apps/python-sdk/test_deep_research.py
@ -0,0 +1,29 @@
+"""
+Test script for the deep_research method with the new camelCase to snake_case conversion.
+"""
+from firecrawl import FirecrawlApp
+
+firecrawl = FirecrawlApp(api_key="your_api_key")
+
+
+def on_activity(activity):
+    print(f"[{activity.type}] {activity.message}")
+
+results = firecrawl.deep_research(
+    query="What are the latest developments in quantum computing?",
+    max_depth=5,
+    time_limit=180,
+    max_urls=15,
+    on_activity=on_activity
+)
+
+print(f"Final Analysis: {results.data.final_analysis}")
+print(f"Sources: {len(results.data.sources)} references")
+
+print("\nAll available fields in the response:")
+print(f"Success: {results.success}")
+print(f"Status: {results.status}")
+print(f"Current Depth: {results.current_depth}")
+print(f"Max Depth: {results.max_depth}")
+print(f"Activities: {len(results.activities)} activities")
+print(f"Summaries: {len(results.summaries)} summaries")