sdk(v3) map ok

This commit is contained in:
rafaelmmiller 2025-05-29 16:21:42 -03:00
parent b7f54d874f
commit 91099e2dba
2 changed files with 283 additions and 7 deletions

View File

@ -1,11 +1,9 @@
import sys import sys
import os import os
import subprocess
import time
import pytest import pytest
from dotenv import load_dotenv from dotenv import load_dotenv
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
from firecrawl.firecrawl import AsyncFirecrawlApp from firecrawl.firecrawl import AsyncFirecrawlApp
load_dotenv() load_dotenv()
@ -18,15 +16,43 @@ app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY)
TEST_URL = "example.com" TEST_URL = "example.com"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_map_url_async_simple(): async def test_map_url_simple():
result = await app.map_url(TEST_URL) result = await app.map_url(TEST_URL)
# Basic response assertions
assert result is not None assert result is not None
assert result.success assert result.success
assert hasattr(result, "links") assert hasattr(result, "links")
assert result.links is not None
assert isinstance(result.links, list)
assert len(result.links) > 0
# Error handling assertions
assert hasattr(result, "error")
assert result.error is None
# Links content validation
for link in result.links:
assert isinstance(link, str)
assert len(link) > 0
# Links should be valid URLs or relative paths
assert link.startswith(("http://", "https://", "/")) or "." in link
# Verify we get links related to the domain
assert any("example.com" in url for url in result.links) assert any("example.com" in url for url in result.links)
# Check for common website structure links
domain_links = [link for link in result.links if "example.com" in link]
assert len(domain_links) > 0
# Validate URL formats
for link in domain_links:
if link.startswith("http"):
assert link.startswith(("http://", "https://"))
assert "example.com" in link
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_map_url_async_all_params(): async def test_map_url_all_params():
result = await app.map_url( result = await app.map_url(
TEST_URL, TEST_URL,
search="test", search="test",
@ -34,7 +60,119 @@ async def test_map_url_async_all_params():
include_subdomains=False, include_subdomains=False,
limit=10 limit=10
) )
# Basic response assertions
assert result is not None assert result is not None
assert result.success assert result.success
assert hasattr(result, "links") assert hasattr(result, "links")
assert any("example.com" in url for url in result.links) assert result.links is not None
assert isinstance(result.links, list)
assert len(result.links) > 0
# Error handling assertions
assert hasattr(result, "error")
assert result.error is None
# Parameter validation - limit should be respected
assert len(result.links) <= 10 # Should respect the limit parameter
# Links content validation
for link in result.links:
assert isinstance(link, str)
assert len(link) > 0
# Links should be valid URLs or relative paths
assert link.startswith(("http://", "https://", "/")) or "." in link
# Verify we get links related to the domain
assert any("example.com" in url for url in result.links)
# Check subdomain exclusion (include_subdomains=False)
domain_links = [link for link in result.links if "example.com" in link]
assert len(domain_links) > 0
# Validate that subdomains are excluded when include_subdomains=False
for link in domain_links:
if link.startswith("http"):
# Should not have subdomains like subdomain.example.com
# Extract domain part
if "://" in link:
domain_part = link.split("://")[1].split("/")[0]
# Should be example.com or www.example.com, not subdomain.example.com
assert domain_part in ["example.com", "www.example.com"] or domain_part.endswith(".example.com")
# Validate URL formats
for link in domain_links:
if link.startswith("http"):
assert link.startswith(("http://", "https://"))
assert "example.com" in link
# Check that we have a reasonable number of links (not empty, not excessive)
assert 1 <= len(result.links) <= 10
@pytest.mark.asyncio
async def test_map_url_with_sitemap():
"""Test mapping with sitemap-only option."""
result = await app.map_url(
TEST_URL,
sitemap_only=True,
limit=5
)
# Basic response assertions
assert result is not None
assert result.success
assert hasattr(result, "links")
assert result.links is not None
assert isinstance(result.links, list)
# Error handling assertions
assert hasattr(result, "error")
assert result.error is None
# Links validation (if any are returned)
if len(result.links) > 0:
for link in result.links:
assert isinstance(link, str)
assert len(link) > 0
# Links should be valid URLs
assert link.startswith(("http://", "https://", "/")) or "." in link
# Should respect limit
assert len(result.links) <= 5
# Should contain domain links
assert any("example.com" in url for url in result.links)
@pytest.mark.asyncio
async def test_map_url_with_search():
"""Test mapping with search parameter."""
result = await app.map_url(
TEST_URL,
search="contact",
limit=3
)
# Basic response assertions
assert result is not None
assert result.success
assert hasattr(result, "links")
assert result.links is not None
assert isinstance(result.links, list)
# Error handling assertions
assert hasattr(result, "error")
assert result.error is None
# Links validation
if len(result.links) > 0:
for link in result.links:
assert isinstance(link, str)
assert len(link) > 0
# Links should be valid URLs
assert link.startswith(("http://", "https://", "/")) or "." in link
# Should respect limit
assert len(result.links) <= 3
# Should contain domain links
assert any("example.com" in url for url in result.links)

View File

@ -17,10 +17,38 @@ TEST_URL = "example.com"
def test_map_url_simple(): def test_map_url_simple():
result = app.map_url(TEST_URL) result = app.map_url(TEST_URL)
# Basic response assertions
assert result is not None assert result is not None
assert result.success assert result.success
assert hasattr(result, "links") assert hasattr(result, "links")
assert result.links is not None
assert isinstance(result.links, list)
assert len(result.links) > 0
# Error handling assertions
assert hasattr(result, "error")
assert result.error is None
# Links content validation
for link in result.links:
assert isinstance(link, str)
assert len(link) > 0
# Links should be valid URLs or relative paths
assert link.startswith(("http://", "https://", "/")) or "." in link
# Verify we get links related to the domain
assert any("example.com" in url for url in result.links) assert any("example.com" in url for url in result.links)
# Check for common website structure links
domain_links = [link for link in result.links if "example.com" in link]
assert len(domain_links) > 0
# Validate URL formats
for link in domain_links:
if link.startswith("http"):
assert link.startswith(("http://", "https://"))
assert "example.com" in link
def test_map_url_all_params(): def test_map_url_all_params():
result = app.map_url( result = app.map_url(
@ -30,7 +58,117 @@ def test_map_url_all_params():
include_subdomains=False, include_subdomains=False,
limit=10 limit=10
) )
# Basic response assertions
assert result is not None assert result is not None
assert result.success assert result.success
assert hasattr(result, "links") assert hasattr(result, "links")
assert any("example.com" in url for url in result.links) assert result.links is not None
assert isinstance(result.links, list)
assert len(result.links) > 0
# Error handling assertions
assert hasattr(result, "error")
assert result.error is None
# Parameter validation - limit should be respected
assert len(result.links) <= 10 # Should respect the limit parameter
# Links content validation
for link in result.links:
assert isinstance(link, str)
assert len(link) > 0
# Links should be valid URLs or relative paths
assert link.startswith(("http://", "https://", "/")) or "." in link
# Verify we get links related to the domain
assert any("example.com" in url for url in result.links)
# Check subdomain exclusion (include_subdomains=False)
domain_links = [link for link in result.links if "example.com" in link]
assert len(domain_links) > 0
# Validate that subdomains are excluded when include_subdomains=False
for link in domain_links:
if link.startswith("http"):
# Should not have subdomains like subdomain.example.com
# Extract domain part
if "://" in link:
domain_part = link.split("://")[1].split("/")[0]
# Should be example.com or www.example.com, not subdomain.example.com
assert domain_part in ["example.com", "www.example.com"] or domain_part.endswith(".example.com")
# Validate URL formats
for link in domain_links:
if link.startswith("http"):
assert link.startswith(("http://", "https://"))
assert "example.com" in link
# Check that we have a reasonable number of links (not empty, not excessive)
assert 1 <= len(result.links) <= 10
def test_map_url_with_sitemap():
"""Test mapping with sitemap-only option."""
result = app.map_url(
TEST_URL,
sitemap_only=True,
limit=5
)
# Basic response assertions
assert result is not None
assert result.success
assert hasattr(result, "links")
assert result.links is not None
assert isinstance(result.links, list)
# Error handling assertions
assert hasattr(result, "error")
assert result.error is None
# Links validation (if any are returned)
if len(result.links) > 0:
for link in result.links:
assert isinstance(link, str)
assert len(link) > 0
# Links should be valid URLs
assert link.startswith(("http://", "https://", "/")) or "." in link
# Should respect limit
assert len(result.links) <= 5
# Should contain domain links
assert any("example.com" in url for url in result.links)
def test_map_url_with_search():
"""Test mapping with search parameter."""
result = app.map_url(
TEST_URL,
search="contact",
limit=3
)
# Basic response assertions
assert result is not None
assert result.success
assert hasattr(result, "links")
assert result.links is not None
assert isinstance(result.links, list)
# Error handling assertions
assert hasattr(result, "error")
assert result.error is None
# Links validation
if len(result.links) > 0:
for link in result.links:
assert isinstance(link, str)
assert len(link) > 0
# Links should be valid URLs
assert link.startswith(("http://", "https://", "/")) or "." in link
# Should respect limit
assert len(result.links) <= 3
# Should contain domain links
assert any("example.com" in url for url in result.links)