mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 05:25:57 +08:00
sdk(v3): all tests complete
This commit is contained in:
parent
91099e2dba
commit
2faa45162c
@ -279,7 +279,7 @@ class ExtractResponse(pydantic.BaseModel, Generic[T]):
|
||||
data: Optional[T] = None
|
||||
error: Optional[str] = None
|
||||
warning: Optional[str] = None
|
||||
sources: Optional[List[str]] = None
|
||||
sources: Optional[Union[List[str], Dict[str, List[str]]]] = None
|
||||
|
||||
class SearchParams(pydantic.BaseModel):
|
||||
query: str
|
||||
|
520
apps/python-sdk/tests/e2e/async/test_a_extract.py
Normal file
520
apps/python-sdk/tests/e2e/async/test_a_extract.py
Normal file
@ -0,0 +1,520 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
|
||||
from firecrawl.firecrawl import AsyncFirecrawlApp
|
||||
|
||||
load_dotenv()
|
||||
|
||||
API_URL = os.getenv("TEST_API_URL") or "https://api.firecrawl.dev"
|
||||
API_KEY = os.getenv("TEST_API_KEY")
|
||||
|
||||
app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY)
|
||||
|
||||
class ExtractSchema(BaseModel):
|
||||
title: str
|
||||
description: str
|
||||
links: List[str]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_simple_schema_class():
|
||||
result = await app.extract(
|
||||
["https://example.com"],
|
||||
prompt="Extract the title, description, and links from the website",
|
||||
schema=ExtractSchema
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Warning handling assertions
|
||||
assert hasattr(result, "warning")
|
||||
# Warning can be present or None, so we just check it exists
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "description" in result.data
|
||||
assert "links" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["description"], str)
|
||||
assert isinstance(result.data["links"], list)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["description"]) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data["links"]:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.description, str)
|
||||
assert isinstance(result.data.links, list)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.description) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data.links:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
|
||||
# Response metadata validation
|
||||
assert hasattr(result, "id")
|
||||
assert hasattr(result, "status")
|
||||
if result.status is not None:
|
||||
assert result.status in ["processing", "completed", "failed"]
|
||||
|
||||
# Sources validation
|
||||
assert hasattr(result, "sources")
|
||||
if result.sources is not None:
|
||||
if isinstance(result.sources, dict):
|
||||
# Sources returned as dictionary with keys like 'links[0]'
|
||||
for key, source_list in result.sources.items():
|
||||
assert isinstance(key, str)
|
||||
assert isinstance(source_list, list)
|
||||
for source_url in source_list:
|
||||
assert isinstance(source_url, str)
|
||||
assert len(source_url) > 0
|
||||
# Sources should be valid URLs
|
||||
assert source_url.startswith(("http://", "https://"))
|
||||
elif isinstance(result.sources, list):
|
||||
# Sources returned as simple list (fallback)
|
||||
for source in result.sources:
|
||||
assert isinstance(source, str)
|
||||
assert len(source) > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_simple_schema_dict():
|
||||
result = await app.extract(
|
||||
["https://example.com"],
|
||||
prompt="Extract the title, description, and links from the website",
|
||||
schema=ExtractSchema.schema()
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Warning handling assertions
|
||||
assert hasattr(result, "warning")
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "description" in result.data
|
||||
assert "links" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["description"], str)
|
||||
assert isinstance(result.data["links"], list)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["description"]) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data["links"]:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.description, str)
|
||||
assert isinstance(result.data.links, list)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.description) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data.links:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
|
||||
# Response metadata validation
|
||||
assert hasattr(result, "id")
|
||||
assert hasattr(result, "status")
|
||||
if result.status is not None:
|
||||
assert result.status in ["processing", "completed", "failed"]
|
||||
|
||||
# Sources validation
|
||||
assert hasattr(result, "sources")
|
||||
if result.sources is not None:
|
||||
if isinstance(result.sources, dict):
|
||||
# Sources returned as dictionary with keys like 'links[0]'
|
||||
for key, source_list in result.sources.items():
|
||||
assert isinstance(key, str)
|
||||
assert isinstance(source_list, list)
|
||||
for source_url in source_list:
|
||||
assert isinstance(source_url, str)
|
||||
assert len(source_url) > 0
|
||||
# Sources should be valid URLs
|
||||
assert source_url.startswith(("http://", "https://"))
|
||||
elif isinstance(result.sources, list):
|
||||
# Sources returned as simple list (fallback)
|
||||
for source in result.sources:
|
||||
assert isinstance(source, str)
|
||||
assert len(source) > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_simple_schema_json():
|
||||
schema = None
|
||||
if hasattr(ExtractSchema, "model_json_schema"):
|
||||
schema = ExtractSchema.model_json_schema()
|
||||
elif hasattr(ExtractSchema, "schema_json"):
|
||||
schema = ExtractSchema.schema_json()
|
||||
else:
|
||||
pytest.skip("No JSON schema export method available on ExtractSchema")
|
||||
|
||||
result = await app.extract(
|
||||
["https://example.com"],
|
||||
prompt="Extract the title, description, and links from the website",
|
||||
schema=schema
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Warning handling assertions
|
||||
assert hasattr(result, "warning")
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "description" in result.data
|
||||
assert "links" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["description"], str)
|
||||
assert isinstance(result.data["links"], list)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["description"]) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data["links"]:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.description, str)
|
||||
assert isinstance(result.data.links, list)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.description) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data.links:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
|
||||
# Response metadata validation
|
||||
assert hasattr(result, "id")
|
||||
assert hasattr(result, "status")
|
||||
if result.status is not None:
|
||||
assert result.status in ["processing", "completed", "failed"]
|
||||
|
||||
# Sources validation
|
||||
assert hasattr(result, "sources")
|
||||
if result.sources is not None:
|
||||
if isinstance(result.sources, dict):
|
||||
# Sources returned as dictionary with keys like 'links[0]'
|
||||
for key, source_list in result.sources.items():
|
||||
assert isinstance(key, str)
|
||||
assert isinstance(source_list, list)
|
||||
for source_url in source_list:
|
||||
assert isinstance(source_url, str)
|
||||
assert len(source_url) > 0
|
||||
# Sources should be valid URLs
|
||||
assert source_url.startswith(("http://", "https://"))
|
||||
elif isinstance(result.sources, list):
|
||||
# Sources returned as simple list (fallback)
|
||||
for source in result.sources:
|
||||
assert isinstance(source, str)
|
||||
assert len(source) > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_all_params():
|
||||
class AllParamsSchema(BaseModel):
|
||||
title: str
|
||||
description: str
|
||||
links: List[str]
|
||||
author: str
|
||||
|
||||
schema = AllParamsSchema.schema()
|
||||
result = await app.extract(
|
||||
["https://www.iana.org"],
|
||||
prompt="Extract the title, description, links, and author from the website",
|
||||
schema=schema,
|
||||
system_prompt="You are a helpful extraction agent.",
|
||||
allow_external_links=False,
|
||||
enable_web_search=False,
|
||||
show_sources=True,
|
||||
# agent={"model": "FIRE-1"}
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Warning handling assertions
|
||||
assert hasattr(result, "warning")
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "description" in result.data
|
||||
assert "links" in result.data
|
||||
assert "author" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["description"], str)
|
||||
assert isinstance(result.data["links"], list)
|
||||
assert isinstance(result.data["author"], str)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["description"]) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data["links"]:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert hasattr(result.data, "author")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.description, str)
|
||||
assert isinstance(result.data.links, list)
|
||||
assert isinstance(result.data.author, str)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.description) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data.links:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
|
||||
# Sources validation (show_sources=True)
|
||||
assert hasattr(result, "sources")
|
||||
if result.sources is not None:
|
||||
if isinstance(result.sources, dict):
|
||||
# Sources returned as dictionary with keys like 'links[0]'
|
||||
for key, source_list in result.sources.items():
|
||||
assert isinstance(key, str)
|
||||
assert isinstance(source_list, list)
|
||||
for source_url in source_list:
|
||||
assert isinstance(source_url, str)
|
||||
assert len(source_url) > 0
|
||||
# Sources should be valid URLs
|
||||
assert source_url.startswith(("http://", "https://"))
|
||||
elif isinstance(result.sources, list):
|
||||
# Sources returned as simple list (fallback)
|
||||
for source in result.sources:
|
||||
assert isinstance(source, str)
|
||||
assert len(source) > 0
|
||||
|
||||
# Response metadata validation
|
||||
assert hasattr(result, "id")
|
||||
assert hasattr(result, "status")
|
||||
if result.status is not None:
|
||||
assert result.status in ["processing", "completed", "failed"]
|
||||
|
||||
# Expires at validation
|
||||
assert hasattr(result, "expires_at")
|
||||
# expires_at can be None or a datetime, so we just check it exists
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_multiple_urls():
|
||||
"""Test extraction with multiple URLs."""
|
||||
class MultiUrlSchema(BaseModel):
|
||||
title: str
|
||||
description: str
|
||||
links: List[str]
|
||||
|
||||
urls = ["https://example.com", "https://www.iana.org"]
|
||||
result = await app.extract(
|
||||
urls,
|
||||
prompt="Extract the title, description, and links from the websites",
|
||||
schema=MultiUrlSchema
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Sources validation
|
||||
assert hasattr(result, "sources")
|
||||
if result.sources is not None:
|
||||
if isinstance(result.sources, dict):
|
||||
# Sources returned as dictionary with keys like 'links[0]'
|
||||
for key, source_list in result.sources.items():
|
||||
assert isinstance(key, str)
|
||||
assert isinstance(source_list, list)
|
||||
for source_url in source_list:
|
||||
assert isinstance(source_url, str)
|
||||
assert len(source_url) > 0
|
||||
# Sources should be valid URLs
|
||||
assert source_url.startswith(("http://", "https://"))
|
||||
elif isinstance(result.sources, list):
|
||||
# Sources returned as simple list (fallback)
|
||||
for source in result.sources:
|
||||
assert isinstance(source, str)
|
||||
assert len(source) > 0
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "description" in result.data
|
||||
assert "links" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["description"], str)
|
||||
assert isinstance(result.data["links"], list)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["description"]) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data["links"]:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.description, str)
|
||||
assert isinstance(result.data.links, list)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.description) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data.links:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_with_system_prompt():
|
||||
"""Test extraction with custom system prompt."""
|
||||
class SystemPromptSchema(BaseModel):
|
||||
title: str
|
||||
summary: str
|
||||
|
||||
result = await app.extract(
|
||||
["https://example.com"],
|
||||
prompt="Extract the title and create a brief summary",
|
||||
schema=SystemPromptSchema,
|
||||
system_prompt="You are an expert content analyzer. Focus on accuracy and brevity."
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Sources validation
|
||||
assert hasattr(result, "sources")
|
||||
if result.sources is not None:
|
||||
if isinstance(result.sources, dict):
|
||||
# Sources returned as dictionary with keys like 'links[0]'
|
||||
for key, source_list in result.sources.items():
|
||||
assert isinstance(key, str)
|
||||
assert isinstance(source_list, list)
|
||||
for source_url in source_list:
|
||||
assert isinstance(source_url, str)
|
||||
assert len(source_url) > 0
|
||||
# Sources should be valid URLs
|
||||
assert source_url.startswith(("http://", "https://"))
|
||||
elif isinstance(result.sources, list):
|
||||
# Sources returned as simple list (fallback)
|
||||
for source in result.sources:
|
||||
assert isinstance(source, str)
|
||||
assert len(source) > 0
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "summary" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["summary"], str)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["summary"]) > 0
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "summary")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.summary, str)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.summary) > 0
|
@ -1,99 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../')))
|
||||
from firecrawl.firecrawl import AsyncFirecrawlApp
|
||||
|
||||
load_dotenv()
|
||||
|
||||
API_URL = "https://api.firecrawl.dev"
|
||||
API_KEY = os.getenv("TEST_API_KEY")
|
||||
|
||||
app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY)
|
||||
|
||||
class ExtractSchema(BaseModel):
|
||||
title: str
|
||||
description: str
|
||||
links: List[str]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_async_simple_schema_class():
|
||||
result = await app.extract(
|
||||
["https://example.com"],
|
||||
prompt="Extract the title, description, and links from the website",
|
||||
schema=ExtractSchema
|
||||
)
|
||||
assert result.success
|
||||
assert result.data is not None
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.links, list)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_async_simple_schema_dict():
|
||||
result = await app.extract(
|
||||
["https://example.com"],
|
||||
prompt="Extract the title, description, and links from the website",
|
||||
schema=ExtractSchema.schema()
|
||||
)
|
||||
assert result.success
|
||||
assert result.data is not None
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.links, list)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_async_simple_schema_json():
|
||||
# Try both model_json_schema (Pydantic v2) and schema_json (Pydantic v1)
|
||||
schema = None
|
||||
if hasattr(ExtractSchema, "model_json_schema"):
|
||||
schema = ExtractSchema.model_json_schema()
|
||||
elif hasattr(ExtractSchema, "schema_json"):
|
||||
schema = ExtractSchema.schema_json()
|
||||
else:
|
||||
pytest.skip("No JSON schema export method available on ExtractSchema")
|
||||
result = await app.extract(
|
||||
["https://example.com"],
|
||||
prompt="Extract the title, description, and links from the website",
|
||||
schema=schema
|
||||
)
|
||||
assert result.success
|
||||
assert result.data is not None
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.links, list)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_async_all_params():
|
||||
class AllParamsSchema(BaseModel):
|
||||
title: str
|
||||
description: str
|
||||
links: List[str]
|
||||
author: str
|
||||
schema = AllParamsSchema.schema()
|
||||
result = await app.extract(
|
||||
["https://www.iana.org"],
|
||||
prompt="Extract the title, description, links, and author from the website",
|
||||
schema=schema,
|
||||
system_prompt="You are a helpful extraction agent.",
|
||||
allow_external_links=False,
|
||||
enable_web_search=True,
|
||||
show_sources=True,
|
||||
agent={"model": "FIRE-1"}
|
||||
)
|
||||
assert result.success
|
||||
assert result.data is not None
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert hasattr(result.data, "author")
|
||||
assert isinstance(result.data.links, list)
|
||||
assert hasattr(result, "sources")
|
||||
assert isinstance(result.sources, list)
|
@ -10,7 +10,7 @@ from firecrawl.firecrawl import FirecrawlApp
|
||||
|
||||
load_dotenv()
|
||||
|
||||
API_URL = "https://api.firecrawl.dev"
|
||||
API_URL = os.getenv("TEST_API_URL") or "https://api.firecrawl.dev"
|
||||
API_KEY = os.getenv("TEST_API_KEY")
|
||||
|
||||
app = FirecrawlApp(api_url=API_URL, api_key=API_KEY)
|
||||
@ -26,12 +26,62 @@ def test_extract_simple_schema_class():
|
||||
prompt="Extract the title, description, and links from the website",
|
||||
schema=ExtractSchema
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.links, list)
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Warning handling assertions
|
||||
assert hasattr(result, "warning")
|
||||
# Warning can be present or None, so we just check it exists
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "description" in result.data
|
||||
assert "links" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["description"], str)
|
||||
assert isinstance(result.data["links"], list)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["description"]) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data["links"]:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.description, str)
|
||||
assert isinstance(result.data.links, list)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.description) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data.links:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
|
||||
# Response metadata validation
|
||||
assert hasattr(result, "id")
|
||||
assert hasattr(result, "status")
|
||||
if result.status is not None:
|
||||
assert result.status in ["processing", "completed", "failed"]
|
||||
|
||||
def test_extract_simple_schema_dict():
|
||||
result = app.extract(
|
||||
@ -39,12 +89,80 @@ def test_extract_simple_schema_dict():
|
||||
prompt="Extract the title, description, and links from the website",
|
||||
schema=ExtractSchema.schema()
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.links, list)
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Warning handling assertions
|
||||
assert hasattr(result, "warning")
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "description" in result.data
|
||||
assert "links" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["description"], str)
|
||||
assert isinstance(result.data["links"], list)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["description"]) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data["links"]:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.description, str)
|
||||
assert isinstance(result.data.links, list)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.description) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data.links:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
|
||||
# Response metadata validation
|
||||
assert hasattr(result, "id")
|
||||
assert hasattr(result, "status")
|
||||
if result.status is not None:
|
||||
assert result.status in ["processing", "completed", "failed"]
|
||||
|
||||
# Sources validation
|
||||
assert hasattr(result, "sources")
|
||||
if result.sources is not None:
|
||||
if isinstance(result.sources, dict):
|
||||
# Sources returned as dictionary with keys like 'links[0]'
|
||||
for key, source_list in result.sources.items():
|
||||
assert isinstance(key, str)
|
||||
assert isinstance(source_list, list)
|
||||
for source_url in source_list:
|
||||
assert isinstance(source_url, str)
|
||||
assert len(source_url) > 0
|
||||
# Sources should be valid URLs
|
||||
assert source_url.startswith(("http://", "https://"))
|
||||
elif isinstance(result.sources, list):
|
||||
# Sources returned as simple list (fallback)
|
||||
for source in result.sources:
|
||||
assert isinstance(source, str)
|
||||
assert len(source) > 0
|
||||
|
||||
def test_extract_simple_schema_json():
|
||||
schema = None
|
||||
@ -54,17 +172,86 @@ def test_extract_simple_schema_json():
|
||||
schema = ExtractSchema.schema_json()
|
||||
else:
|
||||
pytest.skip("No JSON schema export method available on ExtractSchema")
|
||||
|
||||
result = app.extract(
|
||||
["https://example.com"],
|
||||
prompt="Extract the title, description, and links from the website",
|
||||
schema=schema
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.links, list)
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Warning handling assertions
|
||||
assert hasattr(result, "warning")
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "description" in result.data
|
||||
assert "links" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["description"], str)
|
||||
assert isinstance(result.data["links"], list)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["description"]) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data["links"]:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.description, str)
|
||||
assert isinstance(result.data.links, list)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.description) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data.links:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
|
||||
# Response metadata validation
|
||||
assert hasattr(result, "id")
|
||||
assert hasattr(result, "status")
|
||||
if result.status is not None:
|
||||
assert result.status in ["processing", "completed", "failed"]
|
||||
|
||||
# Sources validation
|
||||
assert hasattr(result, "sources")
|
||||
if result.sources is not None:
|
||||
if isinstance(result.sources, dict):
|
||||
# Sources returned as dictionary with keys like 'links[0]'
|
||||
for key, source_list in result.sources.items():
|
||||
assert isinstance(key, str)
|
||||
assert isinstance(source_list, list)
|
||||
for source_url in source_list:
|
||||
assert isinstance(source_url, str)
|
||||
assert len(source_url) > 0
|
||||
# Sources should be valid URLs
|
||||
assert source_url.startswith(("http://", "https://"))
|
||||
elif isinstance(result.sources, list):
|
||||
# Sources returned as simple list (fallback)
|
||||
for source in result.sources:
|
||||
assert isinstance(source, str)
|
||||
assert len(source) > 0
|
||||
|
||||
def test_extract_all_params():
|
||||
class AllParamsSchema(BaseModel):
|
||||
@ -72,6 +259,7 @@ def test_extract_all_params():
|
||||
description: str
|
||||
links: List[str]
|
||||
author: str
|
||||
|
||||
schema = AllParamsSchema.schema()
|
||||
result = app.extract(
|
||||
["https://www.iana.org"],
|
||||
@ -79,16 +267,190 @@ def test_extract_all_params():
|
||||
schema=schema,
|
||||
system_prompt="You are a helpful extraction agent.",
|
||||
allow_external_links=False,
|
||||
enable_web_search=True,
|
||||
enable_web_search=False,
|
||||
show_sources=True,
|
||||
agent={"model": "FIRE-1"}
|
||||
# agent={"model": "FIRE-1"}
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert hasattr(result.data, "author")
|
||||
assert isinstance(result.data.links, list)
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Warning handling assertions
|
||||
assert hasattr(result, "warning")
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "description" in result.data
|
||||
assert "links" in result.data
|
||||
assert "author" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["description"], str)
|
||||
assert isinstance(result.data["links"], list)
|
||||
assert isinstance(result.data["author"], str)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["description"]) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data["links"]:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert hasattr(result.data, "author")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.description, str)
|
||||
assert isinstance(result.data.links, list)
|
||||
assert isinstance(result.data.author, str)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.description) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data.links:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
|
||||
# Sources validation (show_sources=True)
|
||||
assert hasattr(result, "sources")
|
||||
assert isinstance(result.sources, list)
|
||||
if result.sources is not None:
|
||||
if isinstance(result.sources, dict):
|
||||
# Sources returned as dictionary with keys like 'links[0]'
|
||||
for key, source_list in result.sources.items():
|
||||
assert isinstance(key, str)
|
||||
assert isinstance(source_list, list)
|
||||
for source_url in source_list:
|
||||
assert isinstance(source_url, str)
|
||||
assert len(source_url) > 0
|
||||
# Sources should be valid URLs
|
||||
assert source_url.startswith(("http://", "https://"))
|
||||
elif isinstance(result.sources, list):
|
||||
# Sources returned as simple list (fallback)
|
||||
for source in result.sources:
|
||||
assert isinstance(source, str)
|
||||
assert len(source) > 0
|
||||
|
||||
# Response metadata validation
|
||||
assert hasattr(result, "id")
|
||||
assert hasattr(result, "status")
|
||||
if result.status is not None:
|
||||
assert result.status in ["processing", "completed", "failed"]
|
||||
|
||||
# Expires at validation
|
||||
assert hasattr(result, "expires_at")
|
||||
# expires_at can be None or a datetime, so we just check it exists
|
||||
|
||||
def test_extract_multiple_urls():
|
||||
"""Test extraction with multiple URLs."""
|
||||
class MultiUrlSchema(BaseModel):
|
||||
title: str
|
||||
description: str
|
||||
links: List[str]
|
||||
|
||||
urls = ["https://example.com", "https://www.iana.org"]
|
||||
result = app.extract(
|
||||
urls,
|
||||
prompt="Extract the title, description, and links from the websites",
|
||||
schema=MultiUrlSchema
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "description" in result.data
|
||||
assert "links" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["description"], str)
|
||||
assert isinstance(result.data["links"], list)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["description"]) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data["links"]:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "description")
|
||||
assert hasattr(result.data, "links")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.description, str)
|
||||
assert isinstance(result.data.links, list)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.description) > 0
|
||||
|
||||
# Links validation
|
||||
for link in result.data.links:
|
||||
assert isinstance(link, str)
|
||||
assert len(link) > 0
|
||||
# Links should be valid URLs or relative paths
|
||||
assert link.startswith(("http://", "https://", "/", "#")) or "." in link
|
||||
|
||||
def test_extract_with_system_prompt():
|
||||
"""Test extraction with custom system prompt."""
|
||||
class SystemPromptSchema(BaseModel):
|
||||
title: str
|
||||
summary: str
|
||||
|
||||
result = app.extract(
|
||||
["https://example.com"],
|
||||
prompt="Extract the title and create a brief summary",
|
||||
schema=SystemPromptSchema,
|
||||
system_prompt="You are an expert content analyzer. Focus on accuracy and brevity."
|
||||
)
|
||||
|
||||
# Basic response assertions
|
||||
assert result is not None
|
||||
assert result.success
|
||||
assert hasattr(result, "data")
|
||||
assert result.data is not None
|
||||
|
||||
# Error handling assertions
|
||||
assert hasattr(result, "error")
|
||||
assert result.error is None
|
||||
|
||||
# Handle both dictionary and object responses
|
||||
if isinstance(result.data, dict):
|
||||
# Data returned as dictionary
|
||||
assert "title" in result.data
|
||||
assert "summary" in result.data
|
||||
assert isinstance(result.data["title"], str)
|
||||
assert isinstance(result.data["summary"], str)
|
||||
assert len(result.data["title"]) > 0
|
||||
assert len(result.data["summary"]) > 0
|
||||
else:
|
||||
# Data returned as Pydantic model object
|
||||
assert hasattr(result.data, "title")
|
||||
assert hasattr(result.data, "summary")
|
||||
assert isinstance(result.data.title, str)
|
||||
assert isinstance(result.data.summary, str)
|
||||
assert len(result.data.title) > 0
|
||||
assert len(result.data.summary) > 0
|
Loading…
x
Reference in New Issue
Block a user