diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index 4031da9f..3c7c8b3b 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ import os from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa -__version__ = "2.5.4" +__version__ = "2.6.0" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 1cf62cf7..006c017d 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -23,6 +23,7 @@ import websockets import aiohttp import asyncio from pydantic import Field +from .utils import convert_dict_keys_to_snake_case, convert_to_dot_dict # Suppress Pydantic warnings about attribute shadowing warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"") @@ -2270,8 +2271,9 @@ class FirecrawlApp: * status (str): Current state (processing/completed/failed) * error (Optional[str]): Error message if failed * id (str): Unique identifier for the research job - * data (Any): Research findings and analysis - * sources (List[Dict]): List of discovered sources + * data (Any): Research findings and analysis with dot notation access + * final_analysis (str): Final analysis of the research (converted from camelCase) + * sources (List[Dict]): List of discovered sources * activities (List[Dict]): Research progress log * summaries (List[str]): Generated research summaries @@ -2301,38 +2303,41 @@ class FirecrawlApp: analysis_prompt=analysis_prompt, system_prompt=system_prompt ) - if not response.get('success') or 'id' not in response: - return response + + dot_dict_response = convert_to_dot_dict(response) + + if not dot_dict_response.get('success') or 'id' not in dot_dict_response: + return dot_dict_response - job_id = response['id'] + job_id = dot_dict_response.id last_activity_count = 0 last_source_count = 0 while True: status = self.check_deep_research_status(job_id) - if on_activity and 'activities' in status: - new_activities = status['activities'][last_activity_count:] + if on_activity and hasattr(status, 'activities'): + new_activities = status.activities[last_activity_count:] for activity in new_activities: on_activity(activity) - last_activity_count = len(status['activities']) + last_activity_count = len(status.activities) - if on_source and 'sources' in status: - new_sources = status['sources'][last_source_count:] + if on_source and hasattr(status, 'sources'): + new_sources = status.sources[last_source_count:] for source in new_sources: on_source(source) - last_source_count = len(status['sources']) + last_source_count = len(status.sources) - if status['status'] == 'completed': + if status.status == 'completed': return status - elif status['status'] == 'failed': + elif status.status == 'failed': raise Exception(f'Deep research failed. Error: {status.get("error")}') - elif status['status'] != 'processing': + elif status.status != 'processing': break time.sleep(2) # Polling interval - return {'success': False, 'error': 'Deep research job terminated unexpectedly'} + return convert_to_dot_dict({'success': False, 'error': 'Deep research job terminated unexpectedly'}) def async_deep_research( self, @@ -2422,8 +2427,9 @@ class FirecrawlApp: Results: * id - Unique identifier for the research job - * data - Research findings and analysis - * sources - List of discovered sources + * data - Research findings and analysis with dot notation access + * final_analysis - Final analysis of the research (converted from camelCase) + * sources - List of discovered sources * activities - Research progress log * summaries - Generated research summaries @@ -2435,7 +2441,13 @@ class FirecrawlApp: response = self._get_request(f'{self.api_url}/v1/deep-research/{id}', headers) if response.status_code == 200: try: - return response.json() + json_response = response.json() + + snake_case_response = convert_dict_keys_to_snake_case(json_response) + + dot_dict_response = convert_to_dot_dict(snake_case_response) + + return dot_dict_response except: raise Exception('Failed to parse Firecrawl response as JSON.') elif response.status_code == 404: diff --git a/apps/python-sdk/firecrawl/utils.py b/apps/python-sdk/firecrawl/utils.py new file mode 100644 index 00000000..2a40cfc7 --- /dev/null +++ b/apps/python-sdk/firecrawl/utils.py @@ -0,0 +1,87 @@ +""" +Utility functions for the Firecrawl SDK. +""" +import re +from typing import Any, Dict, List, Union + + +def camel_to_snake(name: str) -> str: + """ + Convert a camelCase string to snake_case. + + Args: + name (str): The camelCase string to convert. + + Returns: + str: The snake_case string. + """ + if not name: + return name + + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + + +def convert_dict_keys_to_snake_case(data: Any) -> Any: + """ + Recursively convert all dictionary keys from camelCase to snake_case. + + Args: + data (Any): The data to convert. Can be a dictionary, list, or primitive type. + + Returns: + Any: The converted data with snake_case keys. + """ + if isinstance(data, dict): + return {camel_to_snake(k): convert_dict_keys_to_snake_case(v) for k, v in data.items()} + elif isinstance(data, list): + return [convert_dict_keys_to_snake_case(item) for item in data] + else: + return data + + +class DotDict(dict): + """ + A dictionary that supports dot notation access to its items. + + Example: + >>> d = DotDict({'foo': 'bar'}) + >>> d.foo + 'bar' + >>> d['foo'] + 'bar' + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + for key, value in self.items(): + if isinstance(value, dict): + self[key] = DotDict(value) + elif isinstance(value, list): + self[key] = [DotDict(item) if isinstance(item, dict) else item for item in value] + + def __getattr__(self, key): + try: + return self[key] + except KeyError: + raise AttributeError(f"'DotDict' object has no attribute '{key}'") + + def __setattr__(self, key, value): + self[key] = value + + +def convert_to_dot_dict(data: Union[Dict, List, Any]) -> Union[DotDict, List, Any]: + """ + Convert a dictionary or list of dictionaries to DotDict objects. + + Args: + data (Union[Dict, List, Any]): The data to convert. + + Returns: + Union[DotDict, List, Any]: The converted data with DotDict objects. + """ + if isinstance(data, dict): + return DotDict(data) + elif isinstance(data, list): + return [convert_to_dot_dict(item) for item in data] + else: + return data diff --git a/apps/python-sdk/test_deep_research.py b/apps/python-sdk/test_deep_research.py new file mode 100644 index 00000000..5f094a3f --- /dev/null +++ b/apps/python-sdk/test_deep_research.py @@ -0,0 +1,29 @@ +""" +Test script for the deep_research method with the new camelCase to snake_case conversion. +""" +from firecrawl import FirecrawlApp + +firecrawl = FirecrawlApp(api_key="your_api_key") + + +def on_activity(activity): + print(f"[{activity.type}] {activity.message}") + +results = firecrawl.deep_research( + query="What are the latest developments in quantum computing?", + max_depth=5, + time_limit=180, + max_urls=15, + on_activity=on_activity +) + +print(f"Final Analysis: {results.data.final_analysis}") +print(f"Sources: {len(results.data.sources)} references") + +print("\nAll available fields in the response:") +print(f"Success: {results.success}") +print(f"Status: {results.status}") +print(f"Current Depth: {results.current_depth}") +print(f"Max Depth: {results.max_depth}") +print(f"Activities: {len(results.activities)} activities") +print(f"Summaries: {len(results.summaries)} summaries")