Merge pull request #990 from RutamBhagat/python-sdk-conditionally-enforce-api-key

feat(python-sdk): Make API key optional for self-hosted instances
This commit is contained in:
Nicolas 2024-12-27 15:43:37 -03:00 committed by GitHub
commit cd08be7f37
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 58 additions and 31 deletions

View File

@ -8,7 +8,7 @@ from datetime import datetime
load_dotenv() load_dotenv()
API_URL = "http://127.0.0.1:3002"; API_URL = os.getenv('API_URL', 'http://127.0.0.1:3002')
ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py" ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py"
TEST_API_KEY = os.getenv('TEST_API_KEY') TEST_API_KEY = os.getenv('TEST_API_KEY')
@ -20,15 +20,26 @@ spec.loader.exec_module(firecrawl)
FirecrawlApp = firecrawl.FirecrawlApp FirecrawlApp = firecrawl.FirecrawlApp
def test_no_api_key(): def test_no_api_key():
with pytest.raises(Exception) as excinfo: if 'api.firecrawl.dev' in API_URL:
invalid_app = FirecrawlApp(api_url=API_URL) with pytest.raises(Exception) as excinfo:
assert "No API key provided" in str(excinfo.value) invalid_app = FirecrawlApp(api_url=API_URL)
assert "No API key provided" in str(excinfo.value)
else:
# Should not raise error for self-hosted
app = FirecrawlApp(api_url=API_URL)
assert app is not None
def test_scrape_url_invalid_api_key(): def test_scrape_url_invalid_api_key():
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") if 'api.firecrawl.dev' in API_URL:
with pytest.raises(Exception) as excinfo: invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
invalid_app.scrape_url('https://firecrawl.dev') with pytest.raises(Exception) as excinfo:
assert "Unauthorized: Invalid token" in str(excinfo.value) invalid_app.scrape_url('https://firecrawl.dev')
assert "Unauthorized: Invalid token" in str(excinfo.value)
else:
# Should work without API key for self-hosted
app = FirecrawlApp(api_url=API_URL)
response = app.scrape_url('https://firecrawl.dev')
assert response is not None
# def test_blocklisted_url(): # def test_blocklisted_url():
# blocklisted_url = "https://facebook.com/fake-test" # blocklisted_url = "https://facebook.com/fake-test"
@ -131,10 +142,16 @@ def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_ext
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['markdown'] assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['markdown']
def test_crawl_url_invalid_api_key(): def test_crawl_url_invalid_api_key():
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") if 'api.firecrawl.dev' in API_URL:
with pytest.raises(Exception) as excinfo: invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
invalid_app.crawl_url('https://firecrawl.dev') with pytest.raises(Exception) as excinfo:
assert "Unauthorized: Invalid token" in str(excinfo.value) invalid_app.crawl_url('https://firecrawl.dev')
assert "Unauthorized: Invalid token" in str(excinfo.value)
else:
# Should work without API key for self-hosted
app = FirecrawlApp(api_url=API_URL)
response = app.crawl_url('https://firecrawl.dev')
assert response is not None
# def test_should_return_error_for_blocklisted_url(): # def test_should_return_error_for_blocklisted_url():
# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
@ -291,10 +308,16 @@ def test_check_crawl_status_e2e():
assert 'error' not in status_response['data'][0]['metadata'] assert 'error' not in status_response['data'][0]['metadata']
def test_invalid_api_key_on_map(): def test_invalid_api_key_on_map():
invalid_app = FirecrawlApp(api_key="invalid_api_key", api_url=API_URL) if 'api.firecrawl.dev' in API_URL:
with pytest.raises(Exception) as excinfo: invalid_app = FirecrawlApp(api_key="invalid_api_key", api_url=API_URL)
invalid_app.map_url('https://roastmywebsite.ai') with pytest.raises(Exception) as excinfo:
assert "Unauthorized: Invalid token" in str(excinfo.value) invalid_app.map_url('https://roastmywebsite.ai')
assert "Unauthorized: Invalid token" in str(excinfo.value)
else:
# Should work without API key for self-hosted
app = FirecrawlApp(api_url=API_URL)
response = app.map_url('https://roastmywebsite.ai')
assert response is not None
# def test_blocklisted_url_on_map(): # def test_blocklisted_url_on_map():
# app = FirecrawlApp(api_key=TEST_API_KEY, api_url=API_URL) # app = FirecrawlApp(api_key=TEST_API_KEY, api_url=API_URL)
@ -349,4 +372,3 @@ def test_search_e2e():
# assert isinstance(llm_extraction['is_open_source'], bool) # assert isinstance(llm_extraction['is_open_source'], bool)

View File

@ -40,19 +40,22 @@ class FirecrawlApp:
error: Optional[str] = None error: Optional[str] = None
def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None: def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None:
""" """
Initialize the FirecrawlApp instance with API key, API URL. Initialize the FirecrawlApp instance with API key, API URL.
Args: Args:
api_key (Optional[str]): API key for authenticating with the Firecrawl API. api_key (Optional[str]): API key for authenticating with the Firecrawl API.
api_url (Optional[str]): Base URL for the Firecrawl API. api_url (Optional[str]): Base URL for the Firecrawl API.
""" """
self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY') self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY')
self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev') self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev')
if self.api_key is None:
logger.warning("No API key provided") # Only require API key when using cloud service
raise ValueError('No API key provided') if 'api.firecrawl.dev' in self.api_url and self.api_key is None:
logger.debug(f"Initialized FirecrawlApp with API key: {self.api_key}") logger.warning("No API key provided for cloud service")
raise ValueError('No API key provided')
logger.debug(f"Initialized FirecrawlApp with API URL: {self.api_url}")
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
""" """

View File

@ -12,7 +12,8 @@ dependencies = [
"requests", "requests",
"python-dotenv", "python-dotenv",
"websockets", "websockets",
"nest-asyncio" "nest-asyncio",
"pydantic>=2.10.3",
] ]
authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}] authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}]
maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}] maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}]

View File

@ -2,4 +2,5 @@ requests
pytest pytest
python-dotenv python-dotenv
websockets websockets
nest-asyncio nest-asyncio
pydantic