diff --git a/api/core/rag/extractor/watercrawl/client.py b/api/core/rag/extractor/watercrawl/client.py index 6eaede7dbc..6d596e07d8 100644 --- a/api/core/rag/extractor/watercrawl/client.py +++ b/api/core/rag/extractor/watercrawl/client.py @@ -6,6 +6,12 @@ from urllib.parse import urljoin import requests from requests import Response +from core.rag.extractor.watercrawl.exceptions import ( + WaterCrawlAuthenticationError, + WaterCrawlBadRequestError, + WaterCrawlPermissionError, +) + class BaseAPIClient: def __init__(self, api_key, base_url): @@ -53,6 +59,15 @@ class WaterCrawlAPIClient(BaseAPIClient): yield data def process_response(self, response: Response) -> dict | bytes | list | None | Generator: + if response.status_code == 401: + raise WaterCrawlAuthenticationError(response) + + if response.status_code == 403: + raise WaterCrawlPermissionError(response) + + if 400 <= response.status_code < 500: + raise WaterCrawlBadRequestError(response) + response.raise_for_status() if response.status_code == 204: return None diff --git a/api/core/rag/extractor/watercrawl/exceptions.py b/api/core/rag/extractor/watercrawl/exceptions.py new file mode 100644 index 0000000000..e407a594e0 --- /dev/null +++ b/api/core/rag/extractor/watercrawl/exceptions.py @@ -0,0 +1,32 @@ +import json + + +class WaterCrawlError(Exception): + pass + + +class WaterCrawlBadRequestError(WaterCrawlError): + def __init__(self, response): + self.status_code = response.status_code + self.response = response + data = response.json() + self.message = data.get("message", "Unknown error occurred") + self.errors = data.get("errors", {}) + super().__init__(self.message) + + @property + def flat_errors(self): + return json.dumps(self.errors) + + def __str__(self): + return f"WaterCrawlBadRequestError: {self.message} \n {self.flat_errors}" + + +class WaterCrawlPermissionError(WaterCrawlBadRequestError): + def __str__(self): + return f"You are exceeding your WaterCrawl API limits. {self.message}" + + +class WaterCrawlAuthenticationError(WaterCrawlBadRequestError): + def __str__(self): + return "WaterCrawl API key is invalid or expired. Please check your API key and try again."