mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-15 00:45:52 +08:00
Fix deep_research method to convert camelCase to snake_case and add dot notation access
Co-Authored-By: Nicolas Camara <nicolascamara29@gmail.com>
This commit is contained in:
parent
06189b9646
commit
fc366f948c
@ -13,7 +13,7 @@ import os
|
||||
|
||||
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
||||
|
||||
__version__ = "2.5.4"
|
||||
__version__ = "2.6.0"
|
||||
|
||||
# Define the logger for the Firecrawl project
|
||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||
|
@ -23,6 +23,7 @@ import websockets
|
||||
import aiohttp
|
||||
import asyncio
|
||||
from pydantic import Field
|
||||
from .utils import convert_dict_keys_to_snake_case, convert_to_dot_dict
|
||||
|
||||
# Suppress Pydantic warnings about attribute shadowing
|
||||
warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
|
||||
@ -2270,8 +2271,9 @@ class FirecrawlApp:
|
||||
* status (str): Current state (processing/completed/failed)
|
||||
* error (Optional[str]): Error message if failed
|
||||
* id (str): Unique identifier for the research job
|
||||
* data (Any): Research findings and analysis
|
||||
* sources (List[Dict]): List of discovered sources
|
||||
* data (Any): Research findings and analysis with dot notation access
|
||||
* final_analysis (str): Final analysis of the research (converted from camelCase)
|
||||
* sources (List[Dict]): List of discovered sources
|
||||
* activities (List[Dict]): Research progress log
|
||||
* summaries (List[str]): Generated research summaries
|
||||
|
||||
@ -2301,38 +2303,41 @@ class FirecrawlApp:
|
||||
analysis_prompt=analysis_prompt,
|
||||
system_prompt=system_prompt
|
||||
)
|
||||
if not response.get('success') or 'id' not in response:
|
||||
return response
|
||||
|
||||
job_id = response['id']
|
||||
dot_dict_response = convert_to_dot_dict(response)
|
||||
|
||||
if not dot_dict_response.get('success') or 'id' not in dot_dict_response:
|
||||
return dot_dict_response
|
||||
|
||||
job_id = dot_dict_response.id
|
||||
last_activity_count = 0
|
||||
last_source_count = 0
|
||||
|
||||
while True:
|
||||
status = self.check_deep_research_status(job_id)
|
||||
|
||||
if on_activity and 'activities' in status:
|
||||
new_activities = status['activities'][last_activity_count:]
|
||||
if on_activity and hasattr(status, 'activities'):
|
||||
new_activities = status.activities[last_activity_count:]
|
||||
for activity in new_activities:
|
||||
on_activity(activity)
|
||||
last_activity_count = len(status['activities'])
|
||||
last_activity_count = len(status.activities)
|
||||
|
||||
if on_source and 'sources' in status:
|
||||
new_sources = status['sources'][last_source_count:]
|
||||
if on_source and hasattr(status, 'sources'):
|
||||
new_sources = status.sources[last_source_count:]
|
||||
for source in new_sources:
|
||||
on_source(source)
|
||||
last_source_count = len(status['sources'])
|
||||
last_source_count = len(status.sources)
|
||||
|
||||
if status['status'] == 'completed':
|
||||
if status.status == 'completed':
|
||||
return status
|
||||
elif status['status'] == 'failed':
|
||||
elif status.status == 'failed':
|
||||
raise Exception(f'Deep research failed. Error: {status.get("error")}')
|
||||
elif status['status'] != 'processing':
|
||||
elif status.status != 'processing':
|
||||
break
|
||||
|
||||
time.sleep(2) # Polling interval
|
||||
|
||||
return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
|
||||
return convert_to_dot_dict({'success': False, 'error': 'Deep research job terminated unexpectedly'})
|
||||
|
||||
def async_deep_research(
|
||||
self,
|
||||
@ -2422,8 +2427,9 @@ class FirecrawlApp:
|
||||
|
||||
Results:
|
||||
* id - Unique identifier for the research job
|
||||
* data - Research findings and analysis
|
||||
* sources - List of discovered sources
|
||||
* data - Research findings and analysis with dot notation access
|
||||
* final_analysis - Final analysis of the research (converted from camelCase)
|
||||
* sources - List of discovered sources
|
||||
* activities - Research progress log
|
||||
* summaries - Generated research summaries
|
||||
|
||||
@ -2435,7 +2441,13 @@ class FirecrawlApp:
|
||||
response = self._get_request(f'{self.api_url}/v1/deep-research/{id}', headers)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
return response.json()
|
||||
json_response = response.json()
|
||||
|
||||
snake_case_response = convert_dict_keys_to_snake_case(json_response)
|
||||
|
||||
dot_dict_response = convert_to_dot_dict(snake_case_response)
|
||||
|
||||
return dot_dict_response
|
||||
except:
|
||||
raise Exception('Failed to parse Firecrawl response as JSON.')
|
||||
elif response.status_code == 404:
|
||||
|
87
apps/python-sdk/firecrawl/utils.py
Normal file
87
apps/python-sdk/firecrawl/utils.py
Normal file
@ -0,0 +1,87 @@
|
||||
"""
|
||||
Utility functions for the Firecrawl SDK.
|
||||
"""
|
||||
import re
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
|
||||
def camel_to_snake(name: str) -> str:
|
||||
"""
|
||||
Convert a camelCase string to snake_case.
|
||||
|
||||
Args:
|
||||
name (str): The camelCase string to convert.
|
||||
|
||||
Returns:
|
||||
str: The snake_case string.
|
||||
"""
|
||||
if not name:
|
||||
return name
|
||||
|
||||
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
||||
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
||||
|
||||
|
||||
def convert_dict_keys_to_snake_case(data: Any) -> Any:
|
||||
"""
|
||||
Recursively convert all dictionary keys from camelCase to snake_case.
|
||||
|
||||
Args:
|
||||
data (Any): The data to convert. Can be a dictionary, list, or primitive type.
|
||||
|
||||
Returns:
|
||||
Any: The converted data with snake_case keys.
|
||||
"""
|
||||
if isinstance(data, dict):
|
||||
return {camel_to_snake(k): convert_dict_keys_to_snake_case(v) for k, v in data.items()}
|
||||
elif isinstance(data, list):
|
||||
return [convert_dict_keys_to_snake_case(item) for item in data]
|
||||
else:
|
||||
return data
|
||||
|
||||
|
||||
class DotDict(dict):
|
||||
"""
|
||||
A dictionary that supports dot notation access to its items.
|
||||
|
||||
Example:
|
||||
>>> d = DotDict({'foo': 'bar'})
|
||||
>>> d.foo
|
||||
'bar'
|
||||
>>> d['foo']
|
||||
'bar'
|
||||
"""
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
for key, value in self.items():
|
||||
if isinstance(value, dict):
|
||||
self[key] = DotDict(value)
|
||||
elif isinstance(value, list):
|
||||
self[key] = [DotDict(item) if isinstance(item, dict) else item for item in value]
|
||||
|
||||
def __getattr__(self, key):
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
raise AttributeError(f"'DotDict' object has no attribute '{key}'")
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
self[key] = value
|
||||
|
||||
|
||||
def convert_to_dot_dict(data: Union[Dict, List, Any]) -> Union[DotDict, List, Any]:
|
||||
"""
|
||||
Convert a dictionary or list of dictionaries to DotDict objects.
|
||||
|
||||
Args:
|
||||
data (Union[Dict, List, Any]): The data to convert.
|
||||
|
||||
Returns:
|
||||
Union[DotDict, List, Any]: The converted data with DotDict objects.
|
||||
"""
|
||||
if isinstance(data, dict):
|
||||
return DotDict(data)
|
||||
elif isinstance(data, list):
|
||||
return [convert_to_dot_dict(item) for item in data]
|
||||
else:
|
||||
return data
|
29
apps/python-sdk/test_deep_research.py
Normal file
29
apps/python-sdk/test_deep_research.py
Normal file
@ -0,0 +1,29 @@
|
||||
"""
|
||||
Test script for the deep_research method with the new camelCase to snake_case conversion.
|
||||
"""
|
||||
from firecrawl import FirecrawlApp
|
||||
|
||||
firecrawl = FirecrawlApp(api_key="your_api_key")
|
||||
|
||||
|
||||
def on_activity(activity):
|
||||
print(f"[{activity.type}] {activity.message}")
|
||||
|
||||
results = firecrawl.deep_research(
|
||||
query="What are the latest developments in quantum computing?",
|
||||
max_depth=5,
|
||||
time_limit=180,
|
||||
max_urls=15,
|
||||
on_activity=on_activity
|
||||
)
|
||||
|
||||
print(f"Final Analysis: {results.data.final_analysis}")
|
||||
print(f"Sources: {len(results.data.sources)} references")
|
||||
|
||||
print("\nAll available fields in the response:")
|
||||
print(f"Success: {results.success}")
|
||||
print(f"Status: {results.status}")
|
||||
print(f"Current Depth: {results.current_depth}")
|
||||
print(f"Max Depth: {results.max_depth}")
|
||||
print(f"Activities: {len(results.activities)} activities")
|
||||
print(f"Summaries: {len(results.summaries)} summaries")
|
Loading…
x
Reference in New Issue
Block a user