From 29cea4c51d17d9e1ea64b9e0053fa9261bf2fc7c Mon Sep 17 00:00:00 2001 From: RutamBhagat Date: Tue, 17 Dec 2024 13:31:35 -0800 Subject: [PATCH 1/2] feat(python-sdk): improve API key handling for cloud vs self-hosted services in FirecrawlApp --- .../__tests__/v1/e2e_withAuth/test.py | 56 +++++++++++++------ apps/python-sdk/firecrawl/firecrawl.py | 27 +++++---- apps/python-sdk/pyproject.toml | 4 +- apps/python-sdk/requirements.txt | 4 +- 4 files changed, 60 insertions(+), 31 deletions(-) diff --git a/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py index 12fa10ce..1ed53968 100644 --- a/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py +++ b/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py @@ -8,7 +8,7 @@ from datetime import datetime load_dotenv() -API_URL = "http://127.0.0.1:3002"; +API_URL = os.getenv('API_URL', 'http://127.0.0.1:3002') ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py" TEST_API_KEY = os.getenv('TEST_API_KEY') @@ -20,15 +20,26 @@ spec.loader.exec_module(firecrawl) FirecrawlApp = firecrawl.FirecrawlApp def test_no_api_key(): - with pytest.raises(Exception) as excinfo: - invalid_app = FirecrawlApp(api_url=API_URL) - assert "No API key provided" in str(excinfo.value) + if 'api.firecrawl.dev' in API_URL: + with pytest.raises(Exception) as excinfo: + invalid_app = FirecrawlApp(api_url=API_URL) + assert "No API key provided" in str(excinfo.value) + else: + # Should not raise error for self-hosted + app = FirecrawlApp(api_url=API_URL) + assert app is not None def test_scrape_url_invalid_api_key(): - invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") - with pytest.raises(Exception) as excinfo: - invalid_app.scrape_url('https://firecrawl.dev') - assert "Unauthorized: Invalid token" in str(excinfo.value) + if 'api.firecrawl.dev' in API_URL: + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.scrape_url('https://firecrawl.dev') + assert "Unauthorized: Invalid token" in str(excinfo.value) + else: + # Should work without API key for self-hosted + app = FirecrawlApp(api_url=API_URL) + response = app.scrape_url('https://firecrawl.dev') + assert response is not None def test_blocklisted_url(): blocklisted_url = "https://facebook.com/fake-test" @@ -131,10 +142,16 @@ def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_ext assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['markdown'] def test_crawl_url_invalid_api_key(): - invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") - with pytest.raises(Exception) as excinfo: - invalid_app.crawl_url('https://firecrawl.dev') - assert "Unauthorized: Invalid token" in str(excinfo.value) + if 'api.firecrawl.dev' in API_URL: + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.crawl_url('https://firecrawl.dev') + assert "Unauthorized: Invalid token" in str(excinfo.value) + else: + # Should work without API key for self-hosted + app = FirecrawlApp(api_url=API_URL) + response = app.crawl_url('https://firecrawl.dev') + assert response is not None def test_should_return_error_for_blocklisted_url(): app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) @@ -291,10 +308,16 @@ def test_check_crawl_status_e2e(): assert 'error' not in status_response['data'][0]['metadata'] def test_invalid_api_key_on_map(): - invalid_app = FirecrawlApp(api_key="invalid_api_key", api_url=API_URL) - with pytest.raises(Exception) as excinfo: - invalid_app.map_url('https://roastmywebsite.ai') - assert "Unauthorized: Invalid token" in str(excinfo.value) + if 'api.firecrawl.dev' in API_URL: + invalid_app = FirecrawlApp(api_key="invalid_api_key", api_url=API_URL) + with pytest.raises(Exception) as excinfo: + invalid_app.map_url('https://roastmywebsite.ai') + assert "Unauthorized: Invalid token" in str(excinfo.value) + else: + # Should work without API key for self-hosted + app = FirecrawlApp(api_url=API_URL) + response = app.map_url('https://roastmywebsite.ai') + assert response is not None def test_blocklisted_url_on_map(): app = FirecrawlApp(api_key=TEST_API_KEY, api_url=API_URL) @@ -349,4 +372,3 @@ def test_search_e2e(): # assert isinstance(llm_extraction['is_open_source'], bool) - \ No newline at end of file diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 45ed27d8..4bc6697a 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -40,19 +40,22 @@ class FirecrawlApp: error: Optional[str] = None def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None: - """ - Initialize the FirecrawlApp instance with API key, API URL. + """ + Initialize the FirecrawlApp instance with API key, API URL. - Args: - api_key (Optional[str]): API key for authenticating with the Firecrawl API. - api_url (Optional[str]): Base URL for the Firecrawl API. - """ - self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY') - self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev') - if self.api_key is None: - logger.warning("No API key provided") - raise ValueError('No API key provided') - logger.debug(f"Initialized FirecrawlApp with API key: {self.api_key}") + Args: + api_key (Optional[str]): API key for authenticating with the Firecrawl API. + api_url (Optional[str]): Base URL for the Firecrawl API. + """ + self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY') + self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev') + + # Only require API key when using cloud service + if 'api.firecrawl.dev' in self.api_url and self.api_key is None: + logger.warning("No API key provided for cloud service") + raise ValueError('No API key provided') + + logger.debug(f"Initialized FirecrawlApp with API URL: {self.api_url}") def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: """ diff --git a/apps/python-sdk/pyproject.toml b/apps/python-sdk/pyproject.toml index 87cb91f1..31c16941 100644 --- a/apps/python-sdk/pyproject.toml +++ b/apps/python-sdk/pyproject.toml @@ -12,7 +12,9 @@ dependencies = [ "requests", "python-dotenv", "websockets", - "nest-asyncio" + "nest-asyncio", + "pytest>=8.3.4", + "pydantic>=2.10.3", ] authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}] maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}] diff --git a/apps/python-sdk/requirements.txt b/apps/python-sdk/requirements.txt index db67ceeb..dd4e7a08 100644 --- a/apps/python-sdk/requirements.txt +++ b/apps/python-sdk/requirements.txt @@ -2,4 +2,6 @@ requests pytest python-dotenv websockets -nest-asyncio \ No newline at end of file +nest-asyncio +pytest +pydantic \ No newline at end of file From 2b488cac3d380470e95da10583b48320f7793510 Mon Sep 17 00:00:00 2001 From: RutamBhagat Date: Fri, 20 Dec 2024 01:54:29 -0800 Subject: [PATCH 2/2] chore: remove pytest dependency from pyproject.toml --- apps/python-sdk/pyproject.toml | 1 - apps/python-sdk/requirements.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/apps/python-sdk/pyproject.toml b/apps/python-sdk/pyproject.toml index 31c16941..67082d5e 100644 --- a/apps/python-sdk/pyproject.toml +++ b/apps/python-sdk/pyproject.toml @@ -13,7 +13,6 @@ dependencies = [ "python-dotenv", "websockets", "nest-asyncio", - "pytest>=8.3.4", "pydantic>=2.10.3", ] authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}] diff --git a/apps/python-sdk/requirements.txt b/apps/python-sdk/requirements.txt index dd4e7a08..5dcd8f6c 100644 --- a/apps/python-sdk/requirements.txt +++ b/apps/python-sdk/requirements.txt @@ -3,5 +3,4 @@ pytest python-dotenv websockets nest-asyncio -pytest pydantic \ No newline at end of file