diff --git a/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py index 0ada6c1d..d25d43f3 100644 --- a/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py +++ b/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py @@ -8,7 +8,7 @@ from datetime import datetime load_dotenv() -API_URL = "http://127.0.0.1:3002"; +API_URL = os.getenv('API_URL', 'http://127.0.0.1:3002') ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py" TEST_API_KEY = os.getenv('TEST_API_KEY') @@ -20,15 +20,26 @@ spec.loader.exec_module(firecrawl) FirecrawlApp = firecrawl.FirecrawlApp def test_no_api_key(): - with pytest.raises(Exception) as excinfo: - invalid_app = FirecrawlApp(api_url=API_URL) - assert "No API key provided" in str(excinfo.value) + if 'api.firecrawl.dev' in API_URL: + with pytest.raises(Exception) as excinfo: + invalid_app = FirecrawlApp(api_url=API_URL) + assert "No API key provided" in str(excinfo.value) + else: + # Should not raise error for self-hosted + app = FirecrawlApp(api_url=API_URL) + assert app is not None def test_scrape_url_invalid_api_key(): - invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") - with pytest.raises(Exception) as excinfo: - invalid_app.scrape_url('https://firecrawl.dev') - assert "Unauthorized: Invalid token" in str(excinfo.value) + if 'api.firecrawl.dev' in API_URL: + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.scrape_url('https://firecrawl.dev') + assert "Unauthorized: Invalid token" in str(excinfo.value) + else: + # Should work without API key for self-hosted + app = FirecrawlApp(api_url=API_URL) + response = app.scrape_url('https://firecrawl.dev') + assert response is not None # def test_blocklisted_url(): # blocklisted_url = "https://facebook.com/fake-test" @@ -131,10 +142,16 @@ def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_ext assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['markdown'] def test_crawl_url_invalid_api_key(): - invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") - with pytest.raises(Exception) as excinfo: - invalid_app.crawl_url('https://firecrawl.dev') - assert "Unauthorized: Invalid token" in str(excinfo.value) + if 'api.firecrawl.dev' in API_URL: + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.crawl_url('https://firecrawl.dev') + assert "Unauthorized: Invalid token" in str(excinfo.value) + else: + # Should work without API key for self-hosted + app = FirecrawlApp(api_url=API_URL) + response = app.crawl_url('https://firecrawl.dev') + assert response is not None # def test_should_return_error_for_blocklisted_url(): # app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) @@ -291,10 +308,16 @@ def test_check_crawl_status_e2e(): assert 'error' not in status_response['data'][0]['metadata'] def test_invalid_api_key_on_map(): - invalid_app = FirecrawlApp(api_key="invalid_api_key", api_url=API_URL) - with pytest.raises(Exception) as excinfo: - invalid_app.map_url('https://roastmywebsite.ai') - assert "Unauthorized: Invalid token" in str(excinfo.value) + if 'api.firecrawl.dev' in API_URL: + invalid_app = FirecrawlApp(api_key="invalid_api_key", api_url=API_URL) + with pytest.raises(Exception) as excinfo: + invalid_app.map_url('https://roastmywebsite.ai') + assert "Unauthorized: Invalid token" in str(excinfo.value) + else: + # Should work without API key for self-hosted + app = FirecrawlApp(api_url=API_URL) + response = app.map_url('https://roastmywebsite.ai') + assert response is not None # def test_blocklisted_url_on_map(): # app = FirecrawlApp(api_key=TEST_API_KEY, api_url=API_URL) @@ -349,4 +372,3 @@ def test_search_e2e(): # assert isinstance(llm_extraction['is_open_source'], bool) - \ No newline at end of file diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index e4ac2726..0181db90 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -40,19 +40,22 @@ class FirecrawlApp: error: Optional[str] = None def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None: - """ - Initialize the FirecrawlApp instance with API key, API URL. + """ + Initialize the FirecrawlApp instance with API key, API URL. - Args: - api_key (Optional[str]): API key for authenticating with the Firecrawl API. - api_url (Optional[str]): Base URL for the Firecrawl API. - """ - self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY') - self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev') - if self.api_key is None: - logger.warning("No API key provided") - raise ValueError('No API key provided') - logger.debug(f"Initialized FirecrawlApp with API key: {self.api_key}") + Args: + api_key (Optional[str]): API key for authenticating with the Firecrawl API. + api_url (Optional[str]): Base URL for the Firecrawl API. + """ + self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY') + self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev') + + # Only require API key when using cloud service + if 'api.firecrawl.dev' in self.api_url and self.api_key is None: + logger.warning("No API key provided for cloud service") + raise ValueError('No API key provided') + + logger.debug(f"Initialized FirecrawlApp with API URL: {self.api_url}") def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: """ diff --git a/apps/python-sdk/pyproject.toml b/apps/python-sdk/pyproject.toml index 87cb91f1..67082d5e 100644 --- a/apps/python-sdk/pyproject.toml +++ b/apps/python-sdk/pyproject.toml @@ -12,7 +12,8 @@ dependencies = [ "requests", "python-dotenv", "websockets", - "nest-asyncio" + "nest-asyncio", + "pydantic>=2.10.3", ] authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}] maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}] diff --git a/apps/python-sdk/requirements.txt b/apps/python-sdk/requirements.txt index db67ceeb..5dcd8f6c 100644 --- a/apps/python-sdk/requirements.txt +++ b/apps/python-sdk/requirements.txt @@ -2,4 +2,5 @@ requests pytest python-dotenv websockets -nest-asyncio \ No newline at end of file +nest-asyncio +pydantic \ No newline at end of file