diff --git a/apps/python-sdk/tests/e2e/async/test_map.py b/apps/python-sdk/tests/e2e/async/test_map.py index b034e9f0..37852420 100644 --- a/apps/python-sdk/tests/e2e/async/test_map.py +++ b/apps/python-sdk/tests/e2e/async/test_map.py @@ -1,11 +1,9 @@ import sys import os -import subprocess -import time import pytest from dotenv import load_dotenv -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) from firecrawl.firecrawl import AsyncFirecrawlApp load_dotenv() @@ -18,15 +16,43 @@ app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY) TEST_URL = "example.com" @pytest.mark.asyncio -async def test_map_url_async_simple(): +async def test_map_url_simple(): result = await app.map_url(TEST_URL) + + # Basic response assertions assert result is not None assert result.success assert hasattr(result, "links") + assert result.links is not None + assert isinstance(result.links, list) + assert len(result.links) > 0 + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Links content validation + for link in result.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/")) or "." in link + + # Verify we get links related to the domain assert any("example.com" in url for url in result.links) + + # Check for common website structure links + domain_links = [link for link in result.links if "example.com" in link] + assert len(domain_links) > 0 + + # Validate URL formats + for link in domain_links: + if link.startswith("http"): + assert link.startswith(("http://", "https://")) + assert "example.com" in link @pytest.mark.asyncio -async def test_map_url_async_all_params(): +async def test_map_url_all_params(): result = await app.map_url( TEST_URL, search="test", @@ -34,7 +60,119 @@ async def test_map_url_async_all_params(): include_subdomains=False, limit=10 ) + + # Basic response assertions assert result is not None assert result.success assert hasattr(result, "links") - assert any("example.com" in url for url in result.links) \ No newline at end of file + assert result.links is not None + assert isinstance(result.links, list) + assert len(result.links) > 0 + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Parameter validation - limit should be respected + assert len(result.links) <= 10 # Should respect the limit parameter + + # Links content validation + for link in result.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/")) or "." in link + + # Verify we get links related to the domain + assert any("example.com" in url for url in result.links) + + # Check subdomain exclusion (include_subdomains=False) + domain_links = [link for link in result.links if "example.com" in link] + assert len(domain_links) > 0 + + # Validate that subdomains are excluded when include_subdomains=False + for link in domain_links: + if link.startswith("http"): + # Should not have subdomains like subdomain.example.com + # Extract domain part + if "://" in link: + domain_part = link.split("://")[1].split("/")[0] + # Should be example.com or www.example.com, not subdomain.example.com + assert domain_part in ["example.com", "www.example.com"] or domain_part.endswith(".example.com") + + # Validate URL formats + for link in domain_links: + if link.startswith("http"): + assert link.startswith(("http://", "https://")) + assert "example.com" in link + + # Check that we have a reasonable number of links (not empty, not excessive) + assert 1 <= len(result.links) <= 10 + +@pytest.mark.asyncio +async def test_map_url_with_sitemap(): + """Test mapping with sitemap-only option.""" + result = await app.map_url( + TEST_URL, + sitemap_only=True, + limit=5 + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "links") + assert result.links is not None + assert isinstance(result.links, list) + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Links validation (if any are returned) + if len(result.links) > 0: + for link in result.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs + assert link.startswith(("http://", "https://", "/")) or "." in link + + # Should respect limit + assert len(result.links) <= 5 + + # Should contain domain links + assert any("example.com" in url for url in result.links) + +@pytest.mark.asyncio +async def test_map_url_with_search(): + """Test mapping with search parameter.""" + result = await app.map_url( + TEST_URL, + search="contact", + limit=3 + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "links") + assert result.links is not None + assert isinstance(result.links, list) + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Links validation + if len(result.links) > 0: + for link in result.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs + assert link.startswith(("http://", "https://", "/")) or "." in link + + # Should respect limit + assert len(result.links) <= 3 + + # Should contain domain links + assert any("example.com" in url for url in result.links) \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_map.py b/apps/python-sdk/tests/e2e/test_map.py index da9bb4d5..60a6dc4b 100644 --- a/apps/python-sdk/tests/e2e/test_map.py +++ b/apps/python-sdk/tests/e2e/test_map.py @@ -17,10 +17,38 @@ TEST_URL = "example.com" def test_map_url_simple(): result = app.map_url(TEST_URL) + + # Basic response assertions assert result is not None assert result.success assert hasattr(result, "links") + assert result.links is not None + assert isinstance(result.links, list) + assert len(result.links) > 0 + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Links content validation + for link in result.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/")) or "." in link + + # Verify we get links related to the domain assert any("example.com" in url for url in result.links) + + # Check for common website structure links + domain_links = [link for link in result.links if "example.com" in link] + assert len(domain_links) > 0 + + # Validate URL formats + for link in domain_links: + if link.startswith("http"): + assert link.startswith(("http://", "https://")) + assert "example.com" in link def test_map_url_all_params(): result = app.map_url( @@ -30,7 +58,117 @@ def test_map_url_all_params(): include_subdomains=False, limit=10 ) + + # Basic response assertions assert result is not None assert result.success assert hasattr(result, "links") - assert any("example.com" in url for url in result.links) \ No newline at end of file + assert result.links is not None + assert isinstance(result.links, list) + assert len(result.links) > 0 + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Parameter validation - limit should be respected + assert len(result.links) <= 10 # Should respect the limit parameter + + # Links content validation + for link in result.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs or relative paths + assert link.startswith(("http://", "https://", "/")) or "." in link + + # Verify we get links related to the domain + assert any("example.com" in url for url in result.links) + + # Check subdomain exclusion (include_subdomains=False) + domain_links = [link for link in result.links if "example.com" in link] + assert len(domain_links) > 0 + + # Validate that subdomains are excluded when include_subdomains=False + for link in domain_links: + if link.startswith("http"): + # Should not have subdomains like subdomain.example.com + # Extract domain part + if "://" in link: + domain_part = link.split("://")[1].split("/")[0] + # Should be example.com or www.example.com, not subdomain.example.com + assert domain_part in ["example.com", "www.example.com"] or domain_part.endswith(".example.com") + + # Validate URL formats + for link in domain_links: + if link.startswith("http"): + assert link.startswith(("http://", "https://")) + assert "example.com" in link + + # Check that we have a reasonable number of links (not empty, not excessive) + assert 1 <= len(result.links) <= 10 + +def test_map_url_with_sitemap(): + """Test mapping with sitemap-only option.""" + result = app.map_url( + TEST_URL, + sitemap_only=True, + limit=5 + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "links") + assert result.links is not None + assert isinstance(result.links, list) + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Links validation (if any are returned) + if len(result.links) > 0: + for link in result.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs + assert link.startswith(("http://", "https://", "/")) or "." in link + + # Should respect limit + assert len(result.links) <= 5 + + # Should contain domain links + assert any("example.com" in url for url in result.links) + +def test_map_url_with_search(): + """Test mapping with search parameter.""" + result = app.map_url( + TEST_URL, + search="contact", + limit=3 + ) + + # Basic response assertions + assert result is not None + assert result.success + assert hasattr(result, "links") + assert result.links is not None + assert isinstance(result.links, list) + + # Error handling assertions + assert hasattr(result, "error") + assert result.error is None + + # Links validation + if len(result.links) > 0: + for link in result.links: + assert isinstance(link, str) + assert len(link) > 0 + # Links should be valid URLs + assert link.startswith(("http://", "https://", "/")) or "." in link + + # Should respect limit + assert len(result.links) <= 3 + + # Should contain domain links + assert any("example.com" in url for url in result.links) \ No newline at end of file