From 2faa45162c50eef3c65f3ce7c45980030cdcc5ae Mon Sep 17 00:00:00 2001 From: rafaelmmiller <8574157+rafaelmmiller@users.noreply.github.com> Date: Fri, 30 May 2025 10:16:44 -0300 Subject: [PATCH] sdk(v3): all tests complete --- apps/python-sdk/firecrawl/types.py | 2 +- ...scrape.py => test_a_async_batch_scrape.py} | 0 ...t_async_crawl.py => test_a_async_crawl.py} | 0 ...batch_scrape.py => test_a_batch_scrape.py} | 0 .../async/{test_crawl.py => test_a_crawl.py} | 0 .../tests/e2e/async/test_a_extract.py | 520 ++++++++++++++++++ .../e2e/async/{test_map.py => test_a_map.py} | 0 .../{test_scrape.py => test_a_scrape.py} | 0 .../tests/e2e/async/test_extract.py | 99 ---- apps/python-sdk/tests/e2e/test_extract.py | 404 +++++++++++++- 10 files changed, 904 insertions(+), 121 deletions(-) rename apps/python-sdk/tests/e2e/async/{test_async_batch_scrape.py => test_a_async_batch_scrape.py} (100%) rename apps/python-sdk/tests/e2e/async/{test_async_crawl.py => test_a_async_crawl.py} (100%) rename apps/python-sdk/tests/e2e/async/{test_batch_scrape.py => test_a_batch_scrape.py} (100%) rename apps/python-sdk/tests/e2e/async/{test_crawl.py => test_a_crawl.py} (100%) create mode 100644 apps/python-sdk/tests/e2e/async/test_a_extract.py rename apps/python-sdk/tests/e2e/async/{test_map.py => test_a_map.py} (100%) rename apps/python-sdk/tests/e2e/async/{test_scrape.py => test_a_scrape.py} (100%) delete mode 100644 apps/python-sdk/tests/e2e/async/test_extract.py diff --git a/apps/python-sdk/firecrawl/types.py b/apps/python-sdk/firecrawl/types.py index ab6e22e3..e0ee7b8d 100644 --- a/apps/python-sdk/firecrawl/types.py +++ b/apps/python-sdk/firecrawl/types.py @@ -279,7 +279,7 @@ class ExtractResponse(pydantic.BaseModel, Generic[T]): data: Optional[T] = None error: Optional[str] = None warning: Optional[str] = None - sources: Optional[List[str]] = None + sources: Optional[Union[List[str], Dict[str, List[str]]]] = None class SearchParams(pydantic.BaseModel): query: str diff --git a/apps/python-sdk/tests/e2e/async/test_async_batch_scrape.py b/apps/python-sdk/tests/e2e/async/test_a_async_batch_scrape.py similarity index 100% rename from apps/python-sdk/tests/e2e/async/test_async_batch_scrape.py rename to apps/python-sdk/tests/e2e/async/test_a_async_batch_scrape.py diff --git a/apps/python-sdk/tests/e2e/async/test_async_crawl.py b/apps/python-sdk/tests/e2e/async/test_a_async_crawl.py similarity index 100% rename from apps/python-sdk/tests/e2e/async/test_async_crawl.py rename to apps/python-sdk/tests/e2e/async/test_a_async_crawl.py diff --git a/apps/python-sdk/tests/e2e/async/test_batch_scrape.py b/apps/python-sdk/tests/e2e/async/test_a_batch_scrape.py similarity index 100% rename from apps/python-sdk/tests/e2e/async/test_batch_scrape.py rename to apps/python-sdk/tests/e2e/async/test_a_batch_scrape.py diff --git a/apps/python-sdk/tests/e2e/async/test_crawl.py b/apps/python-sdk/tests/e2e/async/test_a_crawl.py similarity index 100% rename from apps/python-sdk/tests/e2e/async/test_crawl.py rename to apps/python-sdk/tests/e2e/async/test_a_crawl.py diff --git a/apps/python-sdk/tests/e2e/async/test_a_extract.py b/apps/python-sdk/tests/e2e/async/test_a_extract.py new file mode 100644 index 00000000..25a8b654 --- /dev/null +++ b/apps/python-sdk/tests/e2e/async/test_a_extract.py @@ -0,0 +1,520 @@ +import os +import sys +import pytest +from dotenv import load_dotenv +from pydantic import BaseModel +from typing import List + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) +from firecrawl.firecrawl import AsyncFirecrawlApp + +load_dotenv() + +API_URL = os.getenv("TEST_API_URL") or "https://api.firecrawl.dev" +API_KEY = os.getenv("TEST_API_KEY") + +app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY) + +class ExtractSchema(BaseModel): + title: str + description: str + links: List[str] + + +@pytest.mark.asyncio +async def test_extract_simple_schema_class(): + result = await app.extract( + ["https://example.com"], + prompt="Extract the title, description, and links from the website", + schema=ExtractSchema + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "data") + assert result.data is not None + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Warning handling assertions + assert hasattr(result, "warning") + # Warning can be present or None, so we just check it exists + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "description" in result.data + assert "links" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["description"], str) + assert isinstance(result.data["links"], list) + assert len(result.data["title"]) > 0 + assert len(result.data["description"]) > 0 + + # Links validation + for link in result.data["links"]: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.title, str) + assert isinstance(result.data.description, str) + assert isinstance(result.data.links, list) + assert len(result.data.title) > 0 + assert len(result.data.description) > 0 + + # Links validation + for link in result.data.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + + # Response metadata validation + assert hasattr(result, "id") + assert hasattr(result, "status") + if result.status is not None: + assert result.status in ["processing", "completed", "failed"] + + # Sources validation + assert hasattr(result, "sources") + if result.sources is not None: + if isinstance(result.sources, dict): + # Sources returned as dictionary with keys like 'links[0]' + for key, source_list in result.sources.items(): + assert isinstance(key, str) + assert isinstance(source_list, list) + for source_url in source_list: + assert isinstance(source_url, str) + assert len(source_url) > 0 + # Sources should be valid URLs + assert source_url.startswith(("http://", "https://")) + elif isinstance(result.sources, list): + # Sources returned as simple list (fallback) + for source in result.sources: + assert isinstance(source, str) + assert len(source) > 0 + +@pytest.mark.asyncio +async def test_extract_simple_schema_dict(): + result = await app.extract( + ["https://example.com"], + prompt="Extract the title, description, and links from the website", + schema=ExtractSchema.schema() + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "data") + assert result.data is not None + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Warning handling assertions + assert hasattr(result, "warning") + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "description" in result.data + assert "links" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["description"], str) + assert isinstance(result.data["links"], list) + assert len(result.data["title"]) > 0 + assert len(result.data["description"]) > 0 + + # Links validation + for link in result.data["links"]: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.title, str) + assert isinstance(result.data.description, str) + assert isinstance(result.data.links, list) + assert len(result.data.title) > 0 + assert len(result.data.description) > 0 + + # Links validation + for link in result.data.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + + # Response metadata validation + assert hasattr(result, "id") + assert hasattr(result, "status") + if result.status is not None: + assert result.status in ["processing", "completed", "failed"] + + # Sources validation + assert hasattr(result, "sources") + if result.sources is not None: + if isinstance(result.sources, dict): + # Sources returned as dictionary with keys like 'links[0]' + for key, source_list in result.sources.items(): + assert isinstance(key, str) + assert isinstance(source_list, list) + for source_url in source_list: + assert isinstance(source_url, str) + assert len(source_url) > 0 + # Sources should be valid URLs + assert source_url.startswith(("http://", "https://")) + elif isinstance(result.sources, list): + # Sources returned as simple list (fallback) + for source in result.sources: + assert isinstance(source, str) + assert len(source) > 0 + +@pytest.mark.asyncio +async def test_extract_simple_schema_json(): + schema = None + if hasattr(ExtractSchema, "model_json_schema"): + schema = ExtractSchema.model_json_schema() + elif hasattr(ExtractSchema, "schema_json"): + schema = ExtractSchema.schema_json() + else: + pytest.skip("No JSON schema export method available on ExtractSchema") + + result = await app.extract( + ["https://example.com"], + prompt="Extract the title, description, and links from the website", + schema=schema + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "data") + assert result.data is not None + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Warning handling assertions + assert hasattr(result, "warning") + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "description" in result.data + assert "links" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["description"], str) + assert isinstance(result.data["links"], list) + assert len(result.data["title"]) > 0 + assert len(result.data["description"]) > 0 + + # Links validation + for link in result.data["links"]: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.title, str) + assert isinstance(result.data.description, str) + assert isinstance(result.data.links, list) + assert len(result.data.title) > 0 + assert len(result.data.description) > 0 + + # Links validation + for link in result.data.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + + # Response metadata validation + assert hasattr(result, "id") + assert hasattr(result, "status") + if result.status is not None: + assert result.status in ["processing", "completed", "failed"] + + # Sources validation + assert hasattr(result, "sources") + if result.sources is not None: + if isinstance(result.sources, dict): + # Sources returned as dictionary with keys like 'links[0]' + for key, source_list in result.sources.items(): + assert isinstance(key, str) + assert isinstance(source_list, list) + for source_url in source_list: + assert isinstance(source_url, str) + assert len(source_url) > 0 + # Sources should be valid URLs + assert source_url.startswith(("http://", "https://")) + elif isinstance(result.sources, list): + # Sources returned as simple list (fallback) + for source in result.sources: + assert isinstance(source, str) + assert len(source) > 0 + +@pytest.mark.asyncio +async def test_extract_all_params(): + class AllParamsSchema(BaseModel): + title: str + description: str + links: List[str] + author: str + + schema = AllParamsSchema.schema() + result = await app.extract( + ["https://www.iana.org"], + prompt="Extract the title, description, links, and author from the website", + schema=schema, + system_prompt="You are a helpful extraction agent.", + allow_external_links=False, + enable_web_search=False, + show_sources=True, + # agent={"model": "FIRE-1"} + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "data") + assert result.data is not None + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Warning handling assertions + assert hasattr(result, "warning") + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "description" in result.data + assert "links" in result.data + assert "author" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["description"], str) + assert isinstance(result.data["links"], list) + assert isinstance(result.data["author"], str) + assert len(result.data["title"]) > 0 + assert len(result.data["description"]) > 0 + + # Links validation + for link in result.data["links"]: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert hasattr(result.data, "author") + assert isinstance(result.data.title, str) + assert isinstance(result.data.description, str) + assert isinstance(result.data.links, list) + assert isinstance(result.data.author, str) + assert len(result.data.title) > 0 + assert len(result.data.description) > 0 + + # Links validation + for link in result.data.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + + # Sources validation (show_sources=True) + assert hasattr(result, "sources") + if result.sources is not None: + if isinstance(result.sources, dict): + # Sources returned as dictionary with keys like 'links[0]' + for key, source_list in result.sources.items(): + assert isinstance(key, str) + assert isinstance(source_list, list) + for source_url in source_list: + assert isinstance(source_url, str) + assert len(source_url) > 0 + # Sources should be valid URLs + assert source_url.startswith(("http://", "https://")) + elif isinstance(result.sources, list): + # Sources returned as simple list (fallback) + for source in result.sources: + assert isinstance(source, str) + assert len(source) > 0 + + # Response metadata validation + assert hasattr(result, "id") + assert hasattr(result, "status") + if result.status is not None: + assert result.status in ["processing", "completed", "failed"] + + # Expires at validation + assert hasattr(result, "expires_at") + # expires_at can be None or a datetime, so we just check it exists + +@pytest.mark.asyncio +async def test_extract_multiple_urls(): + """Test extraction with multiple URLs.""" + class MultiUrlSchema(BaseModel): + title: str + description: str + links: List[str] + + urls = ["https://example.com", "https://www.iana.org"] + result = await app.extract( + urls, + prompt="Extract the title, description, and links from the websites", + schema=MultiUrlSchema + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "data") + assert result.data is not None + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Sources validation + assert hasattr(result, "sources") + if result.sources is not None: + if isinstance(result.sources, dict): + # Sources returned as dictionary with keys like 'links[0]' + for key, source_list in result.sources.items(): + assert isinstance(key, str) + assert isinstance(source_list, list) + for source_url in source_list: + assert isinstance(source_url, str) + assert len(source_url) > 0 + # Sources should be valid URLs + assert source_url.startswith(("http://", "https://")) + elif isinstance(result.sources, list): + # Sources returned as simple list (fallback) + for source in result.sources: + assert isinstance(source, str) + assert len(source) > 0 + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "description" in result.data + assert "links" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["description"], str) + assert isinstance(result.data["links"], list) + assert len(result.data["title"]) > 0 + assert len(result.data["description"]) > 0 + + # Links validation + for link in result.data["links"]: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.title, str) + assert isinstance(result.data.description, str) + assert isinstance(result.data.links, list) + assert len(result.data.title) > 0 + assert len(result.data.description) > 0 + + # Links validation + for link in result.data.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + +@pytest.mark.asyncio +async def test_extract_with_system_prompt(): + """Test extraction with custom system prompt.""" + class SystemPromptSchema(BaseModel): + title: str + summary: str + + result = await app.extract( + ["https://example.com"], + prompt="Extract the title and create a brief summary", + schema=SystemPromptSchema, + system_prompt="You are an expert content analyzer. Focus on accuracy and brevity." + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "data") + assert result.data is not None + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Sources validation + assert hasattr(result, "sources") + if result.sources is not None: + if isinstance(result.sources, dict): + # Sources returned as dictionary with keys like 'links[0]' + for key, source_list in result.sources.items(): + assert isinstance(key, str) + assert isinstance(source_list, list) + for source_url in source_list: + assert isinstance(source_url, str) + assert len(source_url) > 0 + # Sources should be valid URLs + assert source_url.startswith(("http://", "https://")) + elif isinstance(result.sources, list): + # Sources returned as simple list (fallback) + for source in result.sources: + assert isinstance(source, str) + assert len(source) > 0 + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "summary" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["summary"], str) + assert len(result.data["title"]) > 0 + assert len(result.data["summary"]) > 0 + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "summary") + assert isinstance(result.data.title, str) + assert isinstance(result.data.summary, str) + assert len(result.data.title) > 0 + assert len(result.data.summary) > 0 \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/async/test_map.py b/apps/python-sdk/tests/e2e/async/test_a_map.py similarity index 100% rename from apps/python-sdk/tests/e2e/async/test_map.py rename to apps/python-sdk/tests/e2e/async/test_a_map.py diff --git a/apps/python-sdk/tests/e2e/async/test_scrape.py b/apps/python-sdk/tests/e2e/async/test_a_scrape.py similarity index 100% rename from apps/python-sdk/tests/e2e/async/test_scrape.py rename to apps/python-sdk/tests/e2e/async/test_a_scrape.py diff --git a/apps/python-sdk/tests/e2e/async/test_extract.py b/apps/python-sdk/tests/e2e/async/test_extract.py deleted file mode 100644 index 37d4b224..00000000 --- a/apps/python-sdk/tests/e2e/async/test_extract.py +++ /dev/null @@ -1,99 +0,0 @@ -import os -import sys -import pytest -from dotenv import load_dotenv -from pydantic import BaseModel -from typing import List - -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) -from firecrawl.firecrawl import AsyncFirecrawlApp - -load_dotenv() - -API_URL = "https://api.firecrawl.dev" -API_KEY = os.getenv("TEST_API_KEY") - -app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY) - -class ExtractSchema(BaseModel): - title: str - description: str - links: List[str] - -@pytest.mark.asyncio -async def test_extract_async_simple_schema_class(): - result = await app.extract( - ["https://example.com"], - prompt="Extract the title, description, and links from the website", - schema=ExtractSchema - ) - assert result.success - assert result.data is not None - assert hasattr(result.data, "title") - assert hasattr(result.data, "description") - assert hasattr(result.data, "links") - assert isinstance(result.data.links, list) - -@pytest.mark.asyncio -async def test_extract_async_simple_schema_dict(): - result = await app.extract( - ["https://example.com"], - prompt="Extract the title, description, and links from the website", - schema=ExtractSchema.schema() - ) - assert result.success - assert result.data is not None - assert hasattr(result.data, "title") - assert hasattr(result.data, "description") - assert hasattr(result.data, "links") - assert isinstance(result.data.links, list) - -@pytest.mark.asyncio -async def test_extract_async_simple_schema_json(): - # Try both model_json_schema (Pydantic v2) and schema_json (Pydantic v1) - schema = None - if hasattr(ExtractSchema, "model_json_schema"): - schema = ExtractSchema.model_json_schema() - elif hasattr(ExtractSchema, "schema_json"): - schema = ExtractSchema.schema_json() - else: - pytest.skip("No JSON schema export method available on ExtractSchema") - result = await app.extract( - ["https://example.com"], - prompt="Extract the title, description, and links from the website", - schema=schema - ) - assert result.success - assert result.data is not None - assert hasattr(result.data, "title") - assert hasattr(result.data, "description") - assert hasattr(result.data, "links") - assert isinstance(result.data.links, list) - -@pytest.mark.asyncio -async def test_extract_async_all_params(): - class AllParamsSchema(BaseModel): - title: str - description: str - links: List[str] - author: str - schema = AllParamsSchema.schema() - result = await app.extract( - ["https://www.iana.org"], - prompt="Extract the title, description, links, and author from the website", - schema=schema, - system_prompt="You are a helpful extraction agent.", - allow_external_links=False, - enable_web_search=True, - show_sources=True, - agent={"model": "FIRE-1"} - ) - assert result.success - assert result.data is not None - assert hasattr(result.data, "title") - assert hasattr(result.data, "description") - assert hasattr(result.data, "links") - assert hasattr(result.data, "author") - assert isinstance(result.data.links, list) - assert hasattr(result, "sources") - assert isinstance(result.sources, list) \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_extract.py b/apps/python-sdk/tests/e2e/test_extract.py index f3c5f1e7..fc6e9973 100644 --- a/apps/python-sdk/tests/e2e/test_extract.py +++ b/apps/python-sdk/tests/e2e/test_extract.py @@ -10,7 +10,7 @@ from firecrawl.firecrawl import FirecrawlApp load_dotenv() -API_URL = "https://api.firecrawl.dev" +API_URL = os.getenv("TEST_API_URL") or "https://api.firecrawl.dev" API_KEY = os.getenv("TEST_API_KEY") app = FirecrawlApp(api_url=API_URL, api_key=API_KEY) @@ -26,12 +26,62 @@ def test_extract_simple_schema_class(): prompt="Extract the title, description, and links from the website", schema=ExtractSchema ) + + # Basic response assertions + assert result is not None assert result.success + assert hasattr(result, "data") assert result.data is not None - assert hasattr(result.data, "title") - assert hasattr(result.data, "description") - assert hasattr(result.data, "links") - assert isinstance(result.data.links, list) + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Warning handling assertions + assert hasattr(result, "warning") + # Warning can be present or None, so we just check it exists + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "description" in result.data + assert "links" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["description"], str) + assert isinstance(result.data["links"], list) + assert len(result.data["title"]) > 0 + assert len(result.data["description"]) > 0 + + # Links validation + for link in result.data["links"]: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.title, str) + assert isinstance(result.data.description, str) + assert isinstance(result.data.links, list) + assert len(result.data.title) > 0 + assert len(result.data.description) > 0 + + # Links validation + for link in result.data.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + + # Response metadata validation + assert hasattr(result, "id") + assert hasattr(result, "status") + if result.status is not None: + assert result.status in ["processing", "completed", "failed"] def test_extract_simple_schema_dict(): result = app.extract( @@ -39,12 +89,80 @@ def test_extract_simple_schema_dict(): prompt="Extract the title, description, and links from the website", schema=ExtractSchema.schema() ) + + # Basic response assertions + assert result is not None assert result.success + assert hasattr(result, "data") assert result.data is not None - assert hasattr(result.data, "title") - assert hasattr(result.data, "description") - assert hasattr(result.data, "links") - assert isinstance(result.data.links, list) + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Warning handling assertions + assert hasattr(result, "warning") + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "description" in result.data + assert "links" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["description"], str) + assert isinstance(result.data["links"], list) + assert len(result.data["title"]) > 0 + assert len(result.data["description"]) > 0 + + # Links validation + for link in result.data["links"]: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.title, str) + assert isinstance(result.data.description, str) + assert isinstance(result.data.links, list) + assert len(result.data.title) > 0 + assert len(result.data.description) > 0 + + # Links validation + for link in result.data.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + + # Response metadata validation + assert hasattr(result, "id") + assert hasattr(result, "status") + if result.status is not None: + assert result.status in ["processing", "completed", "failed"] + + # Sources validation + assert hasattr(result, "sources") + if result.sources is not None: + if isinstance(result.sources, dict): + # Sources returned as dictionary with keys like 'links[0]' + for key, source_list in result.sources.items(): + assert isinstance(key, str) + assert isinstance(source_list, list) + for source_url in source_list: + assert isinstance(source_url, str) + assert len(source_url) > 0 + # Sources should be valid URLs + assert source_url.startswith(("http://", "https://")) + elif isinstance(result.sources, list): + # Sources returned as simple list (fallback) + for source in result.sources: + assert isinstance(source, str) + assert len(source) > 0 def test_extract_simple_schema_json(): schema = None @@ -54,17 +172,86 @@ def test_extract_simple_schema_json(): schema = ExtractSchema.schema_json() else: pytest.skip("No JSON schema export method available on ExtractSchema") + result = app.extract( ["https://example.com"], prompt="Extract the title, description, and links from the website", schema=schema ) + + # Basic response assertions + assert result is not None assert result.success + assert hasattr(result, "data") assert result.data is not None - assert hasattr(result.data, "title") - assert hasattr(result.data, "description") - assert hasattr(result.data, "links") - assert isinstance(result.data.links, list) + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Warning handling assertions + assert hasattr(result, "warning") + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "description" in result.data + assert "links" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["description"], str) + assert isinstance(result.data["links"], list) + assert len(result.data["title"]) > 0 + assert len(result.data["description"]) > 0 + + # Links validation + for link in result.data["links"]: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.title, str) + assert isinstance(result.data.description, str) + assert isinstance(result.data.links, list) + assert len(result.data.title) > 0 + assert len(result.data.description) > 0 + + # Links validation + for link in result.data.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + + # Response metadata validation + assert hasattr(result, "id") + assert hasattr(result, "status") + if result.status is not None: + assert result.status in ["processing", "completed", "failed"] + + # Sources validation + assert hasattr(result, "sources") + if result.sources is not None: + if isinstance(result.sources, dict): + # Sources returned as dictionary with keys like 'links[0]' + for key, source_list in result.sources.items(): + assert isinstance(key, str) + assert isinstance(source_list, list) + for source_url in source_list: + assert isinstance(source_url, str) + assert len(source_url) > 0 + # Sources should be valid URLs + assert source_url.startswith(("http://", "https://")) + elif isinstance(result.sources, list): + # Sources returned as simple list (fallback) + for source in result.sources: + assert isinstance(source, str) + assert len(source) > 0 def test_extract_all_params(): class AllParamsSchema(BaseModel): @@ -72,6 +259,7 @@ def test_extract_all_params(): description: str links: List[str] author: str + schema = AllParamsSchema.schema() result = app.extract( ["https://www.iana.org"], @@ -79,16 +267,190 @@ def test_extract_all_params(): schema=schema, system_prompt="You are a helpful extraction agent.", allow_external_links=False, - enable_web_search=True, + enable_web_search=False, show_sources=True, - agent={"model": "FIRE-1"} + # agent={"model": "FIRE-1"} ) + + # Basic response assertions + assert result is not None assert result.success + assert hasattr(result, "data") assert result.data is not None - assert hasattr(result.data, "title") - assert hasattr(result.data, "description") - assert hasattr(result.data, "links") - assert hasattr(result.data, "author") - assert isinstance(result.data.links, list) + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Warning handling assertions + assert hasattr(result, "warning") + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "description" in result.data + assert "links" in result.data + assert "author" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["description"], str) + assert isinstance(result.data["links"], list) + assert isinstance(result.data["author"], str) + assert len(result.data["title"]) > 0 + assert len(result.data["description"]) > 0 + + # Links validation + for link in result.data["links"]: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert hasattr(result.data, "author") + assert isinstance(result.data.title, str) + assert isinstance(result.data.description, str) + assert isinstance(result.data.links, list) + assert isinstance(result.data.author, str) + assert len(result.data.title) > 0 + assert len(result.data.description) > 0 + + # Links validation + for link in result.data.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + + # Sources validation (show_sources=True) assert hasattr(result, "sources") - assert isinstance(result.sources, list) \ No newline at end of file + if result.sources is not None: + if isinstance(result.sources, dict): + # Sources returned as dictionary with keys like 'links[0]' + for key, source_list in result.sources.items(): + assert isinstance(key, str) + assert isinstance(source_list, list) + for source_url in source_list: + assert isinstance(source_url, str) + assert len(source_url) > 0 + # Sources should be valid URLs + assert source_url.startswith(("http://", "https://")) + elif isinstance(result.sources, list): + # Sources returned as simple list (fallback) + for source in result.sources: + assert isinstance(source, str) + assert len(source) > 0 + + # Response metadata validation + assert hasattr(result, "id") + assert hasattr(result, "status") + if result.status is not None: + assert result.status in ["processing", "completed", "failed"] + + # Expires at validation + assert hasattr(result, "expires_at") + # expires_at can be None or a datetime, so we just check it exists + +def test_extract_multiple_urls(): + """Test extraction with multiple URLs.""" + class MultiUrlSchema(BaseModel): + title: str + description: str + links: List[str] + + urls = ["https://example.com", "https://www.iana.org"] + result = app.extract( + urls, + prompt="Extract the title, description, and links from the websites", + schema=MultiUrlSchema + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "data") + assert result.data is not None + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "description" in result.data + assert "links" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["description"], str) + assert isinstance(result.data["links"], list) + assert len(result.data["title"]) > 0 + assert len(result.data["description"]) > 0 + + # Links validation + for link in result.data["links"]: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.title, str) + assert isinstance(result.data.description, str) + assert isinstance(result.data.links, list) + assert len(result.data.title) > 0 + assert len(result.data.description) > 0 + + # Links validation + for link in result.data.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/", "#")) or "." in link + +def test_extract_with_system_prompt(): + """Test extraction with custom system prompt.""" + class SystemPromptSchema(BaseModel): + title: str + summary: str + + result = app.extract( + ["https://example.com"], + prompt="Extract the title and create a brief summary", + schema=SystemPromptSchema, + system_prompt="You are an expert content analyzer. Focus on accuracy and brevity." + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "data") + assert result.data is not None + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Handle both dictionary and object responses + if isinstance(result.data, dict): + # Data returned as dictionary + assert "title" in result.data + assert "summary" in result.data + assert isinstance(result.data["title"], str) + assert isinstance(result.data["summary"], str) + assert len(result.data["title"]) > 0 + assert len(result.data["summary"]) > 0 + else: + # Data returned as Pydantic model object + assert hasattr(result.data, "title") + assert hasattr(result.data, "summary") + assert isinstance(result.data.title, str) + assert isinstance(result.data.summary, str) + assert len(result.data.title) > 0 + assert len(result.data.summary) > 0 \ No newline at end of file