mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 22:55:59 +08:00
Fix deep_research method to convert camelCase to snake_case and add dot notation access
Co-Authored-By: Nicolas Camara <nicolascamara29@gmail.com>
This commit is contained in:
parent
06189b9646
commit
fc366f948c
@ -13,7 +13,7 @@ import os
|
|||||||
|
|
||||||
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
||||||
|
|
||||||
__version__ = "2.5.4"
|
__version__ = "2.6.0"
|
||||||
|
|
||||||
# Define the logger for the Firecrawl project
|
# Define the logger for the Firecrawl project
|
||||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||||
|
@ -23,6 +23,7 @@ import websockets
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
import asyncio
|
import asyncio
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
|
from .utils import convert_dict_keys_to_snake_case, convert_to_dot_dict
|
||||||
|
|
||||||
# Suppress Pydantic warnings about attribute shadowing
|
# Suppress Pydantic warnings about attribute shadowing
|
||||||
warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
|
warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
|
||||||
@ -2270,8 +2271,9 @@ class FirecrawlApp:
|
|||||||
* status (str): Current state (processing/completed/failed)
|
* status (str): Current state (processing/completed/failed)
|
||||||
* error (Optional[str]): Error message if failed
|
* error (Optional[str]): Error message if failed
|
||||||
* id (str): Unique identifier for the research job
|
* id (str): Unique identifier for the research job
|
||||||
* data (Any): Research findings and analysis
|
* data (Any): Research findings and analysis with dot notation access
|
||||||
* sources (List[Dict]): List of discovered sources
|
* final_analysis (str): Final analysis of the research (converted from camelCase)
|
||||||
|
* sources (List[Dict]): List of discovered sources
|
||||||
* activities (List[Dict]): Research progress log
|
* activities (List[Dict]): Research progress log
|
||||||
* summaries (List[str]): Generated research summaries
|
* summaries (List[str]): Generated research summaries
|
||||||
|
|
||||||
@ -2301,38 +2303,41 @@ class FirecrawlApp:
|
|||||||
analysis_prompt=analysis_prompt,
|
analysis_prompt=analysis_prompt,
|
||||||
system_prompt=system_prompt
|
system_prompt=system_prompt
|
||||||
)
|
)
|
||||||
if not response.get('success') or 'id' not in response:
|
|
||||||
return response
|
dot_dict_response = convert_to_dot_dict(response)
|
||||||
|
|
||||||
|
if not dot_dict_response.get('success') or 'id' not in dot_dict_response:
|
||||||
|
return dot_dict_response
|
||||||
|
|
||||||
job_id = response['id']
|
job_id = dot_dict_response.id
|
||||||
last_activity_count = 0
|
last_activity_count = 0
|
||||||
last_source_count = 0
|
last_source_count = 0
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
status = self.check_deep_research_status(job_id)
|
status = self.check_deep_research_status(job_id)
|
||||||
|
|
||||||
if on_activity and 'activities' in status:
|
if on_activity and hasattr(status, 'activities'):
|
||||||
new_activities = status['activities'][last_activity_count:]
|
new_activities = status.activities[last_activity_count:]
|
||||||
for activity in new_activities:
|
for activity in new_activities:
|
||||||
on_activity(activity)
|
on_activity(activity)
|
||||||
last_activity_count = len(status['activities'])
|
last_activity_count = len(status.activities)
|
||||||
|
|
||||||
if on_source and 'sources' in status:
|
if on_source and hasattr(status, 'sources'):
|
||||||
new_sources = status['sources'][last_source_count:]
|
new_sources = status.sources[last_source_count:]
|
||||||
for source in new_sources:
|
for source in new_sources:
|
||||||
on_source(source)
|
on_source(source)
|
||||||
last_source_count = len(status['sources'])
|
last_source_count = len(status.sources)
|
||||||
|
|
||||||
if status['status'] == 'completed':
|
if status.status == 'completed':
|
||||||
return status
|
return status
|
||||||
elif status['status'] == 'failed':
|
elif status.status == 'failed':
|
||||||
raise Exception(f'Deep research failed. Error: {status.get("error")}')
|
raise Exception(f'Deep research failed. Error: {status.get("error")}')
|
||||||
elif status['status'] != 'processing':
|
elif status.status != 'processing':
|
||||||
break
|
break
|
||||||
|
|
||||||
time.sleep(2) # Polling interval
|
time.sleep(2) # Polling interval
|
||||||
|
|
||||||
return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
|
return convert_to_dot_dict({'success': False, 'error': 'Deep research job terminated unexpectedly'})
|
||||||
|
|
||||||
def async_deep_research(
|
def async_deep_research(
|
||||||
self,
|
self,
|
||||||
@ -2422,8 +2427,9 @@ class FirecrawlApp:
|
|||||||
|
|
||||||
Results:
|
Results:
|
||||||
* id - Unique identifier for the research job
|
* id - Unique identifier for the research job
|
||||||
* data - Research findings and analysis
|
* data - Research findings and analysis with dot notation access
|
||||||
* sources - List of discovered sources
|
* final_analysis - Final analysis of the research (converted from camelCase)
|
||||||
|
* sources - List of discovered sources
|
||||||
* activities - Research progress log
|
* activities - Research progress log
|
||||||
* summaries - Generated research summaries
|
* summaries - Generated research summaries
|
||||||
|
|
||||||
@ -2435,7 +2441,13 @@ class FirecrawlApp:
|
|||||||
response = self._get_request(f'{self.api_url}/v1/deep-research/{id}', headers)
|
response = self._get_request(f'{self.api_url}/v1/deep-research/{id}', headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
try:
|
try:
|
||||||
return response.json()
|
json_response = response.json()
|
||||||
|
|
||||||
|
snake_case_response = convert_dict_keys_to_snake_case(json_response)
|
||||||
|
|
||||||
|
dot_dict_response = convert_to_dot_dict(snake_case_response)
|
||||||
|
|
||||||
|
return dot_dict_response
|
||||||
except:
|
except:
|
||||||
raise Exception('Failed to parse Firecrawl response as JSON.')
|
raise Exception('Failed to parse Firecrawl response as JSON.')
|
||||||
elif response.status_code == 404:
|
elif response.status_code == 404:
|
||||||
|
87
apps/python-sdk/firecrawl/utils.py
Normal file
87
apps/python-sdk/firecrawl/utils.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
"""
|
||||||
|
Utility functions for the Firecrawl SDK.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from typing import Any, Dict, List, Union
|
||||||
|
|
||||||
|
|
||||||
|
def camel_to_snake(name: str) -> str:
|
||||||
|
"""
|
||||||
|
Convert a camelCase string to snake_case.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name (str): The camelCase string to convert.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The snake_case string.
|
||||||
|
"""
|
||||||
|
if not name:
|
||||||
|
return name
|
||||||
|
|
||||||
|
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
||||||
|
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
||||||
|
|
||||||
|
|
||||||
|
def convert_dict_keys_to_snake_case(data: Any) -> Any:
|
||||||
|
"""
|
||||||
|
Recursively convert all dictionary keys from camelCase to snake_case.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data (Any): The data to convert. Can be a dictionary, list, or primitive type.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Any: The converted data with snake_case keys.
|
||||||
|
"""
|
||||||
|
if isinstance(data, dict):
|
||||||
|
return {camel_to_snake(k): convert_dict_keys_to_snake_case(v) for k, v in data.items()}
|
||||||
|
elif isinstance(data, list):
|
||||||
|
return [convert_dict_keys_to_snake_case(item) for item in data]
|
||||||
|
else:
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class DotDict(dict):
|
||||||
|
"""
|
||||||
|
A dictionary that supports dot notation access to its items.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> d = DotDict({'foo': 'bar'})
|
||||||
|
>>> d.foo
|
||||||
|
'bar'
|
||||||
|
>>> d['foo']
|
||||||
|
'bar'
|
||||||
|
"""
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
for key, value in self.items():
|
||||||
|
if isinstance(value, dict):
|
||||||
|
self[key] = DotDict(value)
|
||||||
|
elif isinstance(value, list):
|
||||||
|
self[key] = [DotDict(item) if isinstance(item, dict) else item for item in value]
|
||||||
|
|
||||||
|
def __getattr__(self, key):
|
||||||
|
try:
|
||||||
|
return self[key]
|
||||||
|
except KeyError:
|
||||||
|
raise AttributeError(f"'DotDict' object has no attribute '{key}'")
|
||||||
|
|
||||||
|
def __setattr__(self, key, value):
|
||||||
|
self[key] = value
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_dot_dict(data: Union[Dict, List, Any]) -> Union[DotDict, List, Any]:
|
||||||
|
"""
|
||||||
|
Convert a dictionary or list of dictionaries to DotDict objects.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data (Union[Dict, List, Any]): The data to convert.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Union[DotDict, List, Any]: The converted data with DotDict objects.
|
||||||
|
"""
|
||||||
|
if isinstance(data, dict):
|
||||||
|
return DotDict(data)
|
||||||
|
elif isinstance(data, list):
|
||||||
|
return [convert_to_dot_dict(item) for item in data]
|
||||||
|
else:
|
||||||
|
return data
|
29
apps/python-sdk/test_deep_research.py
Normal file
29
apps/python-sdk/test_deep_research.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
"""
|
||||||
|
Test script for the deep_research method with the new camelCase to snake_case conversion.
|
||||||
|
"""
|
||||||
|
from firecrawl import FirecrawlApp
|
||||||
|
|
||||||
|
firecrawl = FirecrawlApp(api_key="your_api_key")
|
||||||
|
|
||||||
|
|
||||||
|
def on_activity(activity):
|
||||||
|
print(f"[{activity.type}] {activity.message}")
|
||||||
|
|
||||||
|
results = firecrawl.deep_research(
|
||||||
|
query="What are the latest developments in quantum computing?",
|
||||||
|
max_depth=5,
|
||||||
|
time_limit=180,
|
||||||
|
max_urls=15,
|
||||||
|
on_activity=on_activity
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Final Analysis: {results.data.final_analysis}")
|
||||||
|
print(f"Sources: {len(results.data.sources)} references")
|
||||||
|
|
||||||
|
print("\nAll available fields in the response:")
|
||||||
|
print(f"Success: {results.success}")
|
||||||
|
print(f"Status: {results.status}")
|
||||||
|
print(f"Current Depth: {results.current_depth}")
|
||||||
|
print(f"Max Depth: {results.max_depth}")
|
||||||
|
print(f"Activities: {len(results.activities)} activities")
|
||||||
|
print(f"Summaries: {len(results.summaries)} summaries")
|
Loading…
x
Reference in New Issue
Block a user