diff --git a/apps/python-sdk/tests/e2e/async/test_async_batch_scrape.py b/apps/python-sdk/tests/e2e/async/test_async_batch_scrape.py index ce366a40..f28a525c 100644 --- a/apps/python-sdk/tests/e2e/async/test_async_batch_scrape.py +++ b/apps/python-sdk/tests/e2e/async/test_async_batch_scrape.py @@ -1,9 +1,11 @@ import os import pytest import sys - from dotenv import load_dotenv -from firecrawl.firecrawl import AsyncFirecrawlApp +import asyncio + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) +from firecrawl.firecrawl import AsyncFirecrawlApp, JsonConfig, LocationConfig, WaitAction, ScreenshotAction, WriteAction, PressAction, ScrollAction, ExecuteJavascriptAction load_dotenv() @@ -12,7 +14,117 @@ API_KEY = os.getenv("TEST_API_KEY") app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY) +TEST_URLS = [ + "https://example.com", + "https://www.iana.org" +] + +async def wait_for_batch_completion(app, job_id, timeout=60): + for _ in range(timeout): + status = await app.check_batch_scrape_status(job_id) + if status.status == "completed": + return status + await asyncio.sleep(1) + raise TimeoutError("Batch scrape did not complete in time") + @pytest.mark.asyncio -async def test_async_batch_scrape_urls_async_placeholder(): - # TODO: Implement async E2E tests for async_batch_scrape_urls - assert True \ No newline at end of file +async def test_async_batch_scrape_urls_simple(): + job = await app.async_batch_scrape_urls( + TEST_URLS, + formats=["markdown"] + ) + assert job.id is not None + status = await wait_for_batch_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) == len(TEST_URLS) + assert any("Example Domain" in doc.markdown for doc in status.data) + assert any("Internet Assigned Numbers" in doc.markdown for doc in status.data) + +@pytest.mark.asyncio +async def test_async_batch_scrape_urls_all_params_with_extract(): + location = LocationConfig(country="us", languages=["en"]) + extract_schema = {"type": "object", "properties": {"title": {"type": "string"}}} + extract = JsonConfig(prompt="Extract the title", schema=extract_schema) + actions = [ + WaitAction(type="wait", milliseconds=500), + ScreenshotAction(type="screenshot", fullPage=True), + WriteAction(type="write", text="test input"), + PressAction(type="press", key="Enter"), + ScrollAction(type="scroll", direction="down"), + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") + ] + job = await app.async_batch_scrape_urls( + TEST_URLS, + formats=["markdown", "html", "raw_html", "links", "screenshot", "extract"], + include_tags=["h1", "p"], + exclude_tags=["footer"], + only_main_content=True, + wait_for=1000, + timeout=15000, + location=location, + mobile=True, + skip_tls_verification=True, + remove_base64_images=True, + block_ads=True, + proxy="basic", + extract=extract, + actions=actions + ) + assert job.id is not None + status = await wait_for_batch_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) == len(TEST_URLS) + for doc in status.data: + assert hasattr(doc, "markdown") + assert hasattr(doc, "html") + assert hasattr(doc, "raw_html") + assert hasattr(doc, "links") + assert hasattr(doc, "screenshot") + assert hasattr(doc, "extract") + assert hasattr(doc, "metadata") + +@pytest.mark.asyncio +async def test_async_batch_scrape_urls_all_params_with_json_options(): + location = LocationConfig(country="us", languages=["en"]) + json_schema = {"type": "object", "properties": {"title": {"type": "string"}}} + json_options = JsonConfig(prompt="Extract the title as JSON", schema=json_schema) + actions = [ + WaitAction(type="wait", milliseconds=500), + ScreenshotAction(type="screenshot", fullPage=True), + WriteAction(type="write", text="test input"), + PressAction(type="press", key="Enter"), + ScrollAction(type="scroll", direction="down"), + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") + ] + job = await app.async_batch_scrape_urls( + TEST_URLS, + formats=["markdown", "html", "raw_html", "links", "screenshot", "json"], + include_tags=["h1", "p"], + exclude_tags=["footer"], + only_main_content=True, + wait_for=1000, + timeout=15000, + location=location, + mobile=True, + skip_tls_verification=True, + remove_base64_images=True, + block_ads=True, + proxy="basic", + json_options=json_options, + actions=actions + ) + assert job.id is not None + status = await wait_for_batch_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) == len(TEST_URLS) + for doc in status.data: + assert hasattr(doc, "markdown") + assert hasattr(doc, "html") + assert hasattr(doc, "raw_html") + assert hasattr(doc, "links") + assert hasattr(doc, "screenshot") + assert hasattr(doc, "json") + assert hasattr(doc, "metadata") \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/async/test_async_crawl.py b/apps/python-sdk/tests/e2e/async/test_async_crawl.py index 02d05f01..634a06e7 100644 --- a/apps/python-sdk/tests/e2e/async/test_async_crawl.py +++ b/apps/python-sdk/tests/e2e/async/test_async_crawl.py @@ -1,7 +1,11 @@ import os +import sys import pytest +import asyncio from dotenv import load_dotenv -from firecrawl import AsyncFirecrawlApp + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) +from firecrawl.firecrawl import AsyncFirecrawlApp, LocationConfig, WaitAction, ScreenshotAction, WriteAction, PressAction, ScrollAction, ExecuteJavascriptAction, ScrapeOptions load_dotenv() @@ -10,7 +14,81 @@ API_KEY = os.getenv("TEST_API_KEY") app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY) +TEST_URL = "https://example.com" + +async def wait_for_crawl_completion(app, job_id, timeout=60): + for _ in range(timeout): + status = await app.check_crawl_status(job_id) + if status.status == "completed": + return status + await asyncio.sleep(1) + raise TimeoutError("Crawl did not complete in time") + @pytest.mark.asyncio -async def test_async_crawl_url_async_placeholder(): - # TODO: Implement async E2E tests for async_crawl_url - assert True \ No newline at end of file +async def test_async_crawl_url_simple(): + job = await app.async_crawl_url( + TEST_URL, + limit=1, + scrape_options=ScrapeOptions(formats=["markdown"]) + ) + assert job.id is not None + status = await wait_for_crawl_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) >= 1 + assert any("Example Domain" in doc.markdown for doc in status.data) + +@pytest.mark.asyncio +async def test_async_crawl_url_all_params(): + location = LocationConfig(country="us", languages=["en"]) + actions = [ + WaitAction(type="wait", milliseconds=500), + ScreenshotAction(type="screenshot", fullPage=True), + WriteAction(type="write", text="test input"), + PressAction(type="press", key="Enter"), + ScrollAction(type="scroll", direction="down"), + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") + ] + scrape_options = ScrapeOptions( + formats=["markdown", "html", "raw_html", "links", "screenshot"], + include_tags=["h1", "p"], + exclude_tags=["footer"], + only_main_content=True, + wait_for=1000, + timeout=15000, + location=location, + mobile=True, + skip_tls_verification=True, + remove_base64_images=True, + block_ads=True, + proxy="basic", + actions=actions + ) + job = await app.async_crawl_url( + TEST_URL, + include_paths=["/"], + exclude_paths=["/notarealpage"], + max_depth=2, + max_discovery_depth=2, + limit=2, + allow_backward_links=True, + allow_external_links=True, + ignore_sitemap=True, + scrape_options=scrape_options, + deduplicate_similar_urls=True, + ignore_query_parameters=True, + regex_on_full_url=True, + delay=1 + ) + assert job.id is not None + status = await wait_for_crawl_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) >= 1 + for doc in status.data: + assert hasattr(doc, "markdown") + assert hasattr(doc, "html") + assert hasattr(doc, "raw_html") + assert hasattr(doc, "links") + assert hasattr(doc, "screenshot") + assert hasattr(doc, "metadata") diff --git a/apps/python-sdk/tests/e2e/async/test_batch_scrape.py b/apps/python-sdk/tests/e2e/async/test_batch_scrape.py index a5dde5d5..49bddf95 100644 --- a/apps/python-sdk/tests/e2e/async/test_batch_scrape.py +++ b/apps/python-sdk/tests/e2e/async/test_batch_scrape.py @@ -42,11 +42,11 @@ async def test_batch_scrape_urls_async_all_params_with_extract(): WriteAction(type="write", text="test input"), PressAction(type="press", key="Enter"), ScrollAction(type="scroll", direction="down"), - ExecuteJavascriptAction(type="executeJavascript", script="function getTitle() { return document.title; }; getTitle();") + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") ] result = await app.batch_scrape_urls( TEST_URLS, - formats=["markdown", "html", "rawHtml", "links", "screenshot", "extract"], + formats=["markdown", "html", "raw_html", "links", "screenshot", "extract"], include_tags=["h1", "p"], exclude_tags=["footer"], only_main_content=True, @@ -68,7 +68,7 @@ async def test_batch_scrape_urls_async_all_params_with_extract(): for doc in result.data: assert hasattr(doc, "markdown") assert hasattr(doc, "html") - assert hasattr(doc, "rawHtml") + assert hasattr(doc, "raw_html") assert hasattr(doc, "links") assert hasattr(doc, "screenshot") assert hasattr(doc, "extract") @@ -85,11 +85,11 @@ async def test_batch_scrape_urls_async_all_params_with_json_options(): WriteAction(type="write", text="test input"), PressAction(type="press", key="Enter"), ScrollAction(type="scroll", direction="down"), - ExecuteJavascriptAction(type="executeJavascript", script="function getTitle() { return document.title; }; getTitle();") + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") ] result = await app.batch_scrape_urls( TEST_URLS, - formats=["markdown", "html", "rawHtml", "links", "screenshot", "json"], + formats=["markdown", "html", "raw_html", "links", "screenshot", "json"], include_tags=["h1", "p"], exclude_tags=["footer"], only_main_content=True, @@ -110,7 +110,7 @@ async def test_batch_scrape_urls_async_all_params_with_json_options(): for doc in result.data: assert hasattr(doc, "markdown") assert hasattr(doc, "html") - assert hasattr(doc, "rawHtml") + assert hasattr(doc, "raw_html") assert hasattr(doc, "links") assert hasattr(doc, "screenshot") assert hasattr(doc, "json") diff --git a/apps/python-sdk/tests/e2e/async/test_check_crawl_status.py b/apps/python-sdk/tests/e2e/async/test_check_crawl_status.py deleted file mode 100644 index ac929fde..00000000 --- a/apps/python-sdk/tests/e2e/async/test_check_crawl_status.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -import pytest -from dotenv import load_dotenv -from firecrawl import AsyncFirecrawlApp - -load_dotenv() - -API_URL = "https://api.firecrawl.dev" -API_KEY = os.getenv("TEST_API_KEY") - -app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY) - -@pytest.mark.asyncio -async def test_check_crawl_status_async_placeholder(): - # TODO: Implement async E2E tests for check_crawl_status - assert True \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/async/test_crawl.py b/apps/python-sdk/tests/e2e/async/test_crawl.py index fec46b1a..4cf2aa8a 100644 --- a/apps/python-sdk/tests/e2e/async/test_crawl.py +++ b/apps/python-sdk/tests/e2e/async/test_crawl.py @@ -1,7 +1,11 @@ import os +import sys import pytest +import asyncio from dotenv import load_dotenv -from firecrawl import AsyncFirecrawlApp + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) +from firecrawl.firecrawl import AsyncFirecrawlApp, LocationConfig, WaitAction, ScreenshotAction, WriteAction, PressAction, ScrollAction, ExecuteJavascriptAction, ScrapeOptions load_dotenv() @@ -10,7 +14,81 @@ API_KEY = os.getenv("TEST_API_KEY") app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY) +TEST_URL = "https://example.com" + +async def wait_for_crawl_completion(app, job_id, timeout=60): + for _ in range(timeout): + status = await app.check_crawl_status(job_id) + if status.status == "completed": + return status + await asyncio.sleep(1) + raise TimeoutError("Crawl did not complete in time") + @pytest.mark.asyncio -async def test_crawl_url_async_placeholder(): - # TODO: Implement async E2E tests for crawl_url - assert True \ No newline at end of file +async def test_crawl_url_async_simple(): + job = await app.async_crawl_url( + TEST_URL, + limit=1, + scrape_options=ScrapeOptions(formats=["markdown"]) + ) + assert job.id is not None + status = await wait_for_crawl_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) >= 1 + assert any("Example Domain" in doc.markdown for doc in status.data) + +@pytest.mark.asyncio +async def test_crawl_url_async_all_params(): + location = LocationConfig(country="us", languages=["en"]) + actions = [ + WaitAction(type="wait", milliseconds=500), + ScreenshotAction(type="screenshot", fullPage=True), + WriteAction(type="write", text="test input"), + PressAction(type="press", key="Enter"), + ScrollAction(type="scroll", direction="down"), + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") + ] + scrape_options = ScrapeOptions( + formats=["markdown", "html", "raw_html", "links", "screenshot"], + include_tags=["h1", "p"], + exclude_tags=["footer"], + only_main_content=True, + wait_for=1000, + timeout=15000, + location=location, + mobile=True, + skip_tls_verification=True, + remove_base64_images=True, + block_ads=True, + proxy="basic", + actions=actions + ) + job = await app.async_crawl_url( + TEST_URL, + include_paths=["/"], + exclude_paths=["/notarealpage"], + max_depth=2, + max_discovery_depth=2, + limit=2, + allow_backward_links=True, + allow_external_links=True, + ignore_sitemap=True, + scrape_options=scrape_options, + deduplicate_similar_urls=True, + ignore_query_parameters=True, + regex_on_full_url=True, + delay=1 + ) + assert job.id is not None + status = await wait_for_crawl_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) >= 1 + for doc in status.data: + assert hasattr(doc, "markdown") + assert hasattr(doc, "html") + assert hasattr(doc, "raw_html") + assert hasattr(doc, "links") + assert hasattr(doc, "screenshot") + assert hasattr(doc, "metadata") \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/async/test_deep_research.py b/apps/python-sdk/tests/e2e/async/test_deep_research.py index 66cb51d5..663f0702 100644 --- a/apps/python-sdk/tests/e2e/async/test_deep_research.py +++ b/apps/python-sdk/tests/e2e/async/test_deep_research.py @@ -1,7 +1,10 @@ import os +import sys import pytest from dotenv import load_dotenv -from firecrawl import AsyncFirecrawlApp + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) +from firecrawl.firecrawl import AsyncFirecrawlApp load_dotenv() @@ -11,6 +14,35 @@ API_KEY = os.getenv("TEST_API_KEY") app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY) @pytest.mark.asyncio -async def test_deep_research_async_placeholder(): - # TODO: Implement async E2E tests for deep_research - assert True \ No newline at end of file +async def test_deep_research_async_simple(): + result = await app.deep_research("What is the capital of France?", max_urls=2) + assert hasattr(result, "status") + assert result.status == "completed" + assert hasattr(result, "success") + assert result.success + assert hasattr(result, "data") + assert result.data is not None + assert any("Paris" in str(result.data) or "France" in str(result.data) for _ in [0]) + +@pytest.mark.asyncio +async def test_deep_research_async_all_params(): + result = await app.deep_research( + "What are the latest advancements in AI?", + max_depth=2, + time_limit=60, + max_urls=3, + analysis_prompt="Summarize the most important recent AI advancements.", + system_prompt="You are an expert AI researcher." + ) + assert hasattr(result, "status") + assert result.status == "completed" + assert hasattr(result, "success") + assert result.success + assert hasattr(result, "data") + assert hasattr(result, "activities") + assert isinstance(result.activities, list) + assert result.data is not None + assert hasattr(result.data, "sources") + assert isinstance(result.data.sources, list) + assert hasattr(result.data, "final_analysis") + assert isinstance(result.data.final_analysis, str) \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/async/test_extract.py b/apps/python-sdk/tests/e2e/async/test_extract.py index 4a059f13..22597c2d 100644 --- a/apps/python-sdk/tests/e2e/async/test_extract.py +++ b/apps/python-sdk/tests/e2e/async/test_extract.py @@ -1,7 +1,12 @@ import os +import sys import pytest from dotenv import load_dotenv -from firecrawl import AsyncFirecrawlApp +from pydantic import BaseModel +from typing import List + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) +from firecrawl.firecrawl import AsyncFirecrawlApp load_dotenv() @@ -10,7 +15,85 @@ API_KEY = os.getenv("TEST_API_KEY") app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY) +class ExtractSchema(BaseModel): + title: str + description: str + links: List[str] + @pytest.mark.asyncio -async def test_extract_async_placeholder(): - # TODO: Implement async E2E tests for extract - assert True \ No newline at end of file +async def test_extract_async_simple_schema_class(): + result = await app.extract( + ["https://example.com"], + prompt="Extract the title, description, and links from the website", + schema=ExtractSchema + ) + assert result.success + assert result.data is not None + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.links, list) + +@pytest.mark.asyncio +async def test_extract_async_simple_schema_dict(): + result = await app.extract( + ["https://example.com"], + prompt="Extract the title, description, and links from the website", + schema=ExtractSchema.schema() + ) + assert result.success + assert result.data is not None + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.links, list) + +@pytest.mark.asyncio +async def test_extract_async_simple_schema_json(): + # Try both model_json_schema (Pydantic v2) and schema_json (Pydantic v1) + schema = None + if hasattr(ExtractSchema, "model_json_schema"): + schema = ExtractSchema.model_json_schema() + elif hasattr(ExtractSchema, "schema_json"): + schema = ExtractSchema.schema_json() + else: + pytest.skip("No JSON schema export method available on ExtractSchema") + result = await app.extract( + ["https://example.com"], + prompt="Extract the title, description, and links from the website", + schema=schema + ) + assert result.success + assert result.data is not None + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.links, list) + +@pytest.mark.asyncio +async def test_extract_async_all_params(): + class AllParamsSchema(BaseModel): + title: str + description: str + links: List[str] + author: str + schema = AllParamsSchema.schema() + result = await app.extract( + ["https://www.iana.org"], + prompt="Extract the title, description, links, and author from the website", + schema=schema, + system_prompt="You are a helpful extraction agent.", + allow_external_links=True, + enable_web_search=True, + show_sources=True, + agent={"model": "FIRE-1"} + ) + assert result.success + assert result.data is not None + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert hasattr(result.data, "author") + assert isinstance(result.data.links, list) + assert hasattr(result, "sources") + assert isinstance(result.sources, list) \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/async/test_generate_llms_text.py b/apps/python-sdk/tests/e2e/async/test_generate_llms_text.py index 00d6b987..b05dd31c 100644 --- a/apps/python-sdk/tests/e2e/async/test_generate_llms_text.py +++ b/apps/python-sdk/tests/e2e/async/test_generate_llms_text.py @@ -1,7 +1,10 @@ import os +import sys import pytest from dotenv import load_dotenv -from firecrawl import AsyncFirecrawlApp + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) +from firecrawl.firecrawl import AsyncFirecrawlApp load_dotenv() @@ -11,6 +14,30 @@ API_KEY = os.getenv("TEST_API_KEY") app = AsyncFirecrawlApp(api_url=API_URL, api_key=API_KEY) @pytest.mark.asyncio -async def test_generate_llms_text_async_placeholder(): - # TODO: Implement async E2E tests for generate_llms_text - assert True \ No newline at end of file +async def test_generate_llms_text_async_simple(): + result = await app.generate_llms_text("https://example.com") + assert hasattr(result, "status") + assert result.status == "completed" + assert hasattr(result, "data") + assert result.data is not None + assert hasattr(result.data, "llmstxt") + assert isinstance(result.data.llmstxt, str) + assert len(result.data.llmstxt) > 0 + +@pytest.mark.asyncio +async def test_generate_llms_text_async_all_params(): + result = await app.generate_llms_text( + "https://www.iana.org", + max_urls=5, + show_full_text=True, + experimental_stream=True + ) + assert hasattr(result, "status") + assert result.status == "completed" + assert hasattr(result, "data") + assert result.data is not None + assert hasattr(result.data, "llmstxt") + assert isinstance(result.data.llmstxt, str) + assert len(result.data.llmstxt) > 0 + assert hasattr(result.data, "llmsfulltxt") + assert result.data.llmsfulltxt is None or isinstance(result.data.llmsfulltxt, str) \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/async/test_scrape.py b/apps/python-sdk/tests/e2e/async/test_scrape.py index ae0e0a46..0969937b 100644 --- a/apps/python-sdk/tests/e2e/async/test_scrape.py +++ b/apps/python-sdk/tests/e2e/async/test_scrape.py @@ -40,11 +40,11 @@ async def test_scrape_url_async_all_params_with_extract_screenshot(): WriteAction(type="write", text="test input"), PressAction(type="press", key="Enter"), ScrollAction(type="scroll", direction="down"), - ExecuteJavascriptAction(type="executeJavascript", script="function getTitle() { return document.title; }; getTitle();") + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") ] response = await app.scrape_url( url=TEST_URL, - formats=["markdown", "html", "rawHtml", "links", "screenshot", "extract", "changeTracking"], + formats=["markdown", "html", "raw_html", "links", "screenshot", "extract", "change_tracking"], include_tags=["h1", "p"], exclude_tags=["footer"], only_main_content=True, @@ -64,12 +64,12 @@ async def test_scrape_url_async_all_params_with_extract_screenshot(): assert response.success assert hasattr(response, "markdown") assert hasattr(response, "html") - assert hasattr(response, "rawHtml") + assert hasattr(response, "raw_html") assert hasattr(response, "links") assert hasattr(response, "screenshot") assert hasattr(response, "extract") assert hasattr(response, "metadata") - assert hasattr(response, "changeTracking") + assert hasattr(response, "change_tracking") @pytest.mark.asyncio async def test_scrape_url_async_all_params_with_extract_full_page_screenshot(): @@ -83,11 +83,11 @@ async def test_scrape_url_async_all_params_with_extract_full_page_screenshot(): WriteAction(type="write", text="test input"), PressAction(type="press", key="Enter"), ScrollAction(type="scroll", direction="down"), - ExecuteJavascriptAction(type="executeJavascript", script="function getTitle() { return document.title; }; getTitle();") + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") ] response = await app.scrape_url( url=TEST_URL, - formats=["markdown", "html", "rawHtml", "links", "screenshot@fullPage", "extract", "changeTracking"], + formats=["markdown", "html", "raw_html", "links", "screenshot@full_page", "extract", "change_tracking"], include_tags=["h1", "p"], exclude_tags=["footer"], only_main_content=True, @@ -107,12 +107,12 @@ async def test_scrape_url_async_all_params_with_extract_full_page_screenshot(): assert response.success assert hasattr(response, "markdown") assert hasattr(response, "html") - assert hasattr(response, "rawHtml") + assert hasattr(response, "raw_html") assert hasattr(response, "links") assert hasattr(response, "screenshot") assert hasattr(response, "extract") assert hasattr(response, "metadata") - assert hasattr(response, "changeTracking") + assert hasattr(response, "change_tracking") @pytest.mark.asyncio async def test_scrape_url_async_all_params_with_json_options(): @@ -126,11 +126,11 @@ async def test_scrape_url_async_all_params_with_json_options(): WriteAction(type="write", text="test input"), PressAction(type="press", key="Enter"), ScrollAction(type="scroll", direction="down"), - ExecuteJavascriptAction(type="executeJavascript", script="function getTitle() { return document.title; }; getTitle();") + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") ] response = await app.scrape_url( url=TEST_URL, - formats=["markdown", "html", "rawHtml", "links", "screenshot", "json", "changeTracking"], + formats=["markdown", "html", "raw_html", "links", "screenshot", "json", "change_tracking"], include_tags=["h1", "p"], exclude_tags=["footer"], only_main_content=True, @@ -150,9 +150,9 @@ async def test_scrape_url_async_all_params_with_json_options(): assert response.success assert hasattr(response, "markdown") assert hasattr(response, "html") - assert hasattr(response, "rawHtml") + assert hasattr(response, "raw_html") assert hasattr(response, "links") assert hasattr(response, "screenshot") assert hasattr(response, "json") assert hasattr(response, "metadata") - assert hasattr(response, "changeTracking") \ No newline at end of file + assert hasattr(response, "change_tracking") \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_async_batch_scrape.py b/apps/python-sdk/tests/e2e/test_async_batch_scrape.py index d3fc8c6c..b05617fe 100644 --- a/apps/python-sdk/tests/e2e/test_async_batch_scrape.py +++ b/apps/python-sdk/tests/e2e/test_async_batch_scrape.py @@ -1,7 +1,10 @@ import os import pytest from dotenv import load_dotenv -from firecrawl import FirecrawlApp +import sys + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) +from firecrawl.firecrawl import FirecrawlApp, JsonConfig, LocationConfig, WaitAction, ScreenshotAction, WriteAction, PressAction, ScrollAction, ExecuteJavascriptAction load_dotenv() @@ -10,6 +13,114 @@ API_KEY = os.getenv("TEST_API_KEY") app = FirecrawlApp(api_url=API_URL, api_key=API_KEY) -def test_async_batch_scrape_urls_placeholder(): - # TODO: Implement E2E tests for async_batch_scrape_urls - assert True \ No newline at end of file +TEST_URLS = [ + "https://example.com", + "https://www.iana.org" +] + +def wait_for_batch_completion(app, job_id, timeout=60): + for _ in range(timeout): + status = app.check_batch_scrape_status(job_id) + if status.status == "completed": + return status + import time; time.sleep(1) + raise TimeoutError("Batch scrape did not complete in time") + +def test_async_batch_scrape_urls_simple(): + job = app.async_batch_scrape_urls( + TEST_URLS, + formats=["markdown"] + ) + assert job.id is not None + status = wait_for_batch_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) == len(TEST_URLS) + assert any("Example Domain" in doc.markdown for doc in status.data) + assert any("Internet Assigned Numbers Authority" in doc.markdown for doc in status.data) + +def test_async_batch_scrape_urls_all_params_with_extract(): + location = LocationConfig(country="us", languages=["en"]) + extract_schema = {"type": "object", "properties": {"title": {"type": "string"}}} + extract = JsonConfig(prompt="Extract the title", schema=extract_schema) + actions = [ + WaitAction(type="wait", milliseconds=500), + ScreenshotAction(type="screenshot", fullPage=True), + WriteAction(type="write", text="test input"), + PressAction(type="press", key="Enter"), + ScrollAction(type="scroll", direction="down"), + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") + ] + job = app.async_batch_scrape_urls( + TEST_URLS, + formats=["markdown", "html", "raw_html", "links", "screenshot", "extract"], + include_tags=["h1", "p"], + exclude_tags=["footer"], + only_main_content=True, + wait_for=1000, + timeout=15000, + location=location, + mobile=True, + skip_tls_verification=True, + remove_base64_images=True, + block_ads=True, + proxy="basic", + extract=extract, + actions=actions + ) + assert job.id is not None + status = wait_for_batch_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) == len(TEST_URLS) + for doc in status.data: + assert hasattr(doc, "markdown") + assert hasattr(doc, "html") + assert hasattr(doc, "raw_html") + assert hasattr(doc, "links") + assert hasattr(doc, "screenshot") + assert hasattr(doc, "extract") + assert hasattr(doc, "metadata") + +def test_async_batch_scrape_urls_all_params_with_json_options(): + location = LocationConfig(country="us", languages=["en"]) + json_schema = {"type": "object", "properties": {"title": {"type": "string"}}} + json_options = JsonConfig(prompt="Extract the title as JSON", schema=json_schema) + actions = [ + WaitAction(type="wait", milliseconds=500), + ScreenshotAction(type="screenshot", fullPage=True), + WriteAction(type="write", text="test input"), + PressAction(type="press", key="Enter"), + ScrollAction(type="scroll", direction="down"), + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") + ] + job = app.async_batch_scrape_urls( + TEST_URLS, + formats=["markdown", "html", "raw_html", "links", "screenshot", "json"], + include_tags=["h1", "p"], + exclude_tags=["footer"], + only_main_content=True, + wait_for=1000, + timeout=15000, + location=location, + mobile=True, + skip_tls_verification=True, + remove_base64_images=True, + block_ads=True, + proxy="basic", + json_options=json_options, + actions=actions + ) + assert job.id is not None + status = wait_for_batch_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) == len(TEST_URLS) + for doc in status.data: + assert hasattr(doc, "markdown") + assert hasattr(doc, "html") + assert hasattr(doc, "raw_html") + assert hasattr(doc, "links") + assert hasattr(doc, "screenshot") + assert hasattr(doc, "json") + assert hasattr(doc, "metadata") \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_async_crawl.py b/apps/python-sdk/tests/e2e/test_async_crawl.py index a7b3dc06..e63cff54 100644 --- a/apps/python-sdk/tests/e2e/test_async_crawl.py +++ b/apps/python-sdk/tests/e2e/test_async_crawl.py @@ -1,7 +1,10 @@ import os import pytest +import sys from dotenv import load_dotenv -from firecrawl import FirecrawlApp + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) +from firecrawl.firecrawl import FirecrawlApp, LocationConfig, WaitAction, ScreenshotAction, WriteAction, PressAction, ScrollAction, ExecuteJavascriptAction, ScrapeOptions load_dotenv() @@ -10,6 +13,79 @@ API_KEY = os.getenv("TEST_API_KEY") app = FirecrawlApp(api_url=API_URL, api_key=API_KEY) -def test_async_crawl_url_placeholder(): - # TODO: Implement E2E tests for async_crawl_url - assert True \ No newline at end of file +TEST_URL = "https://example.com" + +def wait_for_crawl_completion(app, job_id, timeout=60): + for _ in range(timeout): + status = app.check_crawl_status(job_id) + if status.status == "completed": + return status + import time; time.sleep(1) + raise TimeoutError("Crawl did not complete in time") + +def test_crawl_url_simple(): + job = app.async_crawl_url( + TEST_URL, + limit=1, + scrape_options=ScrapeOptions(formats=["markdown"]) + ) + assert job.id is not None + status = wait_for_crawl_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) >= 1 + assert any("Example Domain" in doc.markdown for doc in status.data) + +def test_crawl_url_all_params(): + location = LocationConfig(country="us", languages=["en"]) + actions = [ + WaitAction(type="wait", milliseconds=500), + ScreenshotAction(type="screenshot", fullPage=True), + WriteAction(type="write", text="test input"), + PressAction(type="press", key="Enter"), + ScrollAction(type="scroll", direction="down"), + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") + ] + scrape_options = ScrapeOptions( + formats=["markdown", "html", "raw_html", "links", "screenshot"], + include_tags=["h1", "p"], + exclude_tags=["footer"], + only_main_content=True, + wait_for=1000, + timeout=15000, + location=location, + mobile=True, + skip_tls_verification=True, + remove_base64_images=True, + block_ads=True, + proxy="basic", + actions=actions + ) + job = app.async_crawl_url( + TEST_URL, + include_paths=["/"], + exclude_paths=["/notarealpage"], + max_depth=2, + max_discovery_depth=2, + limit=2, + allow_backward_links=True, + allow_external_links=True, + ignore_sitemap=True, + scrape_options=scrape_options, + deduplicate_similar_urls=True, + ignore_query_parameters=True, + regex_on_full_url=True, + delay=1 + ) + assert job.id is not None + status = wait_for_crawl_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) >= 1 + for doc in status.data: + assert hasattr(doc, "markdown") + assert hasattr(doc, "html") + assert hasattr(doc, "raw_html") + assert hasattr(doc, "links") + assert hasattr(doc, "screenshot") + assert hasattr(doc, "metadata") \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_batch_scrape.py b/apps/python-sdk/tests/e2e/test_batch_scrape.py index ff8aa0af..57a19639 100644 --- a/apps/python-sdk/tests/e2e/test_batch_scrape.py +++ b/apps/python-sdk/tests/e2e/test_batch_scrape.py @@ -1,7 +1,10 @@ import os import pytest from dotenv import load_dotenv -from firecrawl import FirecrawlApp +import sys + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) +from firecrawl.firecrawl import FirecrawlApp, JsonConfig, LocationConfig, WaitAction, ScreenshotAction, WriteAction, PressAction, ScrollAction, ExecuteJavascriptAction load_dotenv() @@ -10,6 +13,114 @@ API_KEY = os.getenv("TEST_API_KEY") app = FirecrawlApp(api_url=API_URL, api_key=API_KEY) -def test_batch_scrape_urls_placeholder(): - # TODO: Implement E2E tests for batch_scrape_urls - assert True \ No newline at end of file +TEST_URLS = [ + "https://example.com", + "https://www.iana.org" +] + +def wait_for_batch_completion(app, job_id, timeout=60): + for _ in range(timeout): + status = app.check_batch_scrape_status(job_id) + if status.status == "completed": + return status + import time; time.sleep(1) + raise TimeoutError("Batch scrape did not complete in time") + +def test_batch_scrape_urls_simple(): + job = app.async_batch_scrape_urls( + TEST_URLS, + formats=["markdown"] + ) + assert job.id is not None + status = wait_for_batch_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) == len(TEST_URLS) + assert any("Example Domain" in doc.markdown for doc in status.data) + assert any("Internet Assigned Numbers Authority" in doc.markdown for doc in status.data) + +def test_batch_scrape_urls_all_params_with_extract(): + location = LocationConfig(country="us", languages=["en"]) + extract_schema = {"type": "object", "properties": {"title": {"type": "string"}}} + extract = JsonConfig(prompt="Extract the title", schema=extract_schema) + actions = [ + WaitAction(type="wait", milliseconds=500), + ScreenshotAction(type="screenshot", fullPage=True), + WriteAction(type="write", text="test input"), + PressAction(type="press", key="Enter"), + ScrollAction(type="scroll", direction="down"), + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") + ] + job = app.async_batch_scrape_urls( + TEST_URLS, + formats=["markdown", "html", "raw_html", "links", "screenshot", "extract"], + include_tags=["h1", "p"], + exclude_tags=["footer"], + only_main_content=True, + wait_for=1000, + timeout=15000, + location=location, + mobile=True, + skip_tls_verification=True, + remove_base64_images=True, + block_ads=True, + proxy="basic", + extract=extract, + actions=actions + ) + assert job.id is not None + status = wait_for_batch_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) == len(TEST_URLS) + for doc in status.data: + assert hasattr(doc, "markdown") + assert hasattr(doc, "html") + assert hasattr(doc, "raw_html") + assert hasattr(doc, "links") + assert hasattr(doc, "screenshot") + assert hasattr(doc, "extract") + assert hasattr(doc, "metadata") + +def test_batch_scrape_urls_all_params_with_json_options(): + location = LocationConfig(country="us", languages=["en"]) + json_schema = {"type": "object", "properties": {"title": {"type": "string"}}} + json_options = JsonConfig(prompt="Extract the title as JSON", schema=json_schema) + actions = [ + WaitAction(type="wait", milliseconds=500), + ScreenshotAction(type="screenshot", fullPage=True), + WriteAction(type="write", text="test input"), + PressAction(type="press", key="Enter"), + ScrollAction(type="scroll", direction="down"), + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") + ] + job = app.async_batch_scrape_urls( + TEST_URLS, + formats=["markdown", "html", "raw_html", "links", "screenshot", "json"], + include_tags=["h1", "p"], + exclude_tags=["footer"], + only_main_content=True, + wait_for=1000, + timeout=15000, + location=location, + mobile=True, + skip_tls_verification=True, + remove_base64_images=True, + block_ads=True, + proxy="basic", + json_options=json_options, + actions=actions + ) + assert job.id is not None + status = wait_for_batch_completion(app, job.id) + assert status.success + assert hasattr(status, "data") + assert len(status.data) == len(TEST_URLS) + for doc in status.data: + assert hasattr(doc, "markdown") + assert hasattr(doc, "html") + assert hasattr(doc, "raw_html") + assert hasattr(doc, "links") + assert hasattr(doc, "screenshot") + assert hasattr(doc, "json") + assert hasattr(doc, "metadata") \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_check_crawl_status.py b/apps/python-sdk/tests/e2e/test_check_crawl_status.py deleted file mode 100644 index 22b1a4d4..00000000 --- a/apps/python-sdk/tests/e2e/test_check_crawl_status.py +++ /dev/null @@ -1,15 +0,0 @@ -import os -import pytest -from dotenv import load_dotenv -from firecrawl import FirecrawlApp - -load_dotenv() - -API_URL = "https://api.firecrawl.dev" -API_KEY = os.getenv("TEST_API_KEY") - -app = FirecrawlApp(api_url=API_URL, api_key=API_KEY) - -def test_check_crawl_status_placeholder(): - # TODO: Implement E2E tests for check_crawl_status - assert True \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_crawl.py b/apps/python-sdk/tests/e2e/test_crawl.py index bf758919..ac00f7be 100644 --- a/apps/python-sdk/tests/e2e/test_crawl.py +++ b/apps/python-sdk/tests/e2e/test_crawl.py @@ -1,7 +1,9 @@ import os -import pytest +import sys from dotenv import load_dotenv -from firecrawl import FirecrawlApp + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) +from firecrawl.firecrawl import FirecrawlApp, LocationConfig, WaitAction, ScreenshotAction, WriteAction, PressAction, ScrollAction, ExecuteJavascriptAction, ScrapeOptions load_dotenv() @@ -10,6 +12,67 @@ API_KEY = os.getenv("TEST_API_KEY") app = FirecrawlApp(api_url=API_URL, api_key=API_KEY) -def test_crawl_url_placeholder(): - # TODO: Implement E2E tests for crawl_url - assert True \ No newline at end of file +TEST_URL = "https://example.com" + +def test_crawl_url_simple(): + status = app.crawl_url( + TEST_URL, + limit=1, + scrape_options=ScrapeOptions(formats=["markdown"]) + ) + assert status.success + assert hasattr(status, "data") + assert len(status.data) >= 1 + assert any("Example Domain" in doc.markdown for doc in status.data) + +def test_crawl_url_all_params(): + location = LocationConfig(country="us", languages=["en"]) + actions = [ + WaitAction(type="wait", milliseconds=500), + ScreenshotAction(type="screenshot", fullPage=True), + WriteAction(type="write", text="test input"), + PressAction(type="press", key="Enter"), + ScrollAction(type="scroll", direction="down"), + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") + ] + scrape_options = ScrapeOptions( + formats=["markdown", "html", "raw_html", "links", "screenshot"], + include_tags=["h1", "p"], + exclude_tags=["footer"], + only_main_content=True, + wait_for=1000, + timeout=15000, + location=location, + mobile=True, + skip_tls_verification=True, + remove_base64_images=True, + block_ads=True, + proxy="basic", + actions=actions + ) + status = app.crawl_url( + TEST_URL, + include_paths=["/"], + exclude_paths=["/notarealpage"], + max_depth=2, + max_discovery_depth=2, + limit=2, + allow_backward_links=True, + allow_external_links=True, + ignore_sitemap=True, + scrape_options=scrape_options, + deduplicate_similar_urls=True, + ignore_query_parameters=True, + regex_on_full_url=True, + delay=1 + ) + assert status.success + assert hasattr(status, "data") + assert len(status.data) >= 1 + for doc in status.data: + assert hasattr(doc, "markdown") + assert hasattr(doc, "html") + assert hasattr(doc, "raw_html") + assert hasattr(doc, "links") + assert hasattr(doc, "screenshot") + assert hasattr(doc, "metadata") \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_deep_research.py b/apps/python-sdk/tests/e2e/test_deep_research.py index 463aea64..d32445e7 100644 --- a/apps/python-sdk/tests/e2e/test_deep_research.py +++ b/apps/python-sdk/tests/e2e/test_deep_research.py @@ -1,7 +1,10 @@ import os +import sys import pytest from dotenv import load_dotenv -from firecrawl import FirecrawlApp + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) +from firecrawl.firecrawl import FirecrawlApp load_dotenv() @@ -10,6 +13,34 @@ API_KEY = os.getenv("TEST_API_KEY") app = FirecrawlApp(api_url=API_URL, api_key=API_KEY) -def test_deep_research_placeholder(): - # TODO: Implement E2E tests for deep_research - assert True \ No newline at end of file +def test_deep_research_simple(): + result = app.deep_research("What is the capital of France?", max_urls=2) + assert hasattr(result, "status") + assert result.status == "completed" + assert hasattr(result, "success") + assert result.success + assert hasattr(result, "data") + assert result.data is not None + assert any("Paris" in str(result.data) or "France" in str(result.data) for _ in [0]) + +def test_deep_research_all_params(): + result = app.deep_research( + "What are the latest advancements in AI?", + max_depth=2, + time_limit=60, + max_urls=3, + analysis_prompt="Summarize the most important recent AI advancements.", + system_prompt="You are an expert AI researcher." + ) + assert hasattr(result, "status") + assert result.status == "completed" + assert hasattr(result, "success") + assert result.success + assert hasattr(result, "data") + assert hasattr(result, "activities") + assert isinstance(result.activities, list) + assert result.data is not None + assert hasattr(result.data, "sources") + assert isinstance(result.data.sources, list) + assert hasattr(result.data, "final_analysis") + assert isinstance(result.data.final_analysis, str) \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_extract.py b/apps/python-sdk/tests/e2e/test_extract.py index 4e925041..93f58d24 100644 --- a/apps/python-sdk/tests/e2e/test_extract.py +++ b/apps/python-sdk/tests/e2e/test_extract.py @@ -1,7 +1,12 @@ import os +import sys import pytest from dotenv import load_dotenv -from firecrawl import FirecrawlApp +from pydantic import BaseModel +from typing import List + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) +from firecrawl.firecrawl import FirecrawlApp load_dotenv() @@ -10,6 +15,80 @@ API_KEY = os.getenv("TEST_API_KEY") app = FirecrawlApp(api_url=API_URL, api_key=API_KEY) -def test_extract_placeholder(): - # TODO: Implement E2E tests for extract - assert True \ No newline at end of file +class ExtractSchema(BaseModel): + title: str + description: str + links: List[str] + +def test_extract_simple_schema_class(): + result = app.extract( + ["https://example.com"], + prompt="Extract the title, description, and links from the website", + schema=ExtractSchema + ) + assert result.success + assert result.data is not None + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.links, list) + +def test_extract_simple_schema_dict(): + result = app.extract( + ["https://example.com"], + prompt="Extract the title, description, and links from the website", + schema=ExtractSchema.schema() + ) + assert result.success + assert result.data is not None + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.links, list) + +def test_extract_simple_schema_json(): + schema = None + if hasattr(ExtractSchema, "model_json_schema"): + schema = ExtractSchema.model_json_schema() + elif hasattr(ExtractSchema, "schema_json"): + schema = ExtractSchema.schema_json() + else: + pytest.skip("No JSON schema export method available on ExtractSchema") + result = app.extract( + ["https://example.com"], + prompt="Extract the title, description, and links from the website", + schema=schema + ) + assert result.success + assert result.data is not None + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert isinstance(result.data.links, list) + +def test_extract_all_params(): + class AllParamsSchema(BaseModel): + title: str + description: str + links: List[str] + author: str + schema = AllParamsSchema.schema() + result = app.extract( + ["https://www.iana.org"], + prompt="Extract the title, description, links, and author from the website", + schema=schema, + system_prompt="You are a helpful extraction agent.", + allow_external_links=True, + enable_web_search=True, + show_sources=True, + agent={"model": "FIRE-1"} + ) + assert result.success + assert result.data is not None + assert hasattr(result.data, "title") + assert hasattr(result.data, "description") + assert hasattr(result.data, "links") + assert hasattr(result.data, "author") + assert isinstance(result.data.links, list) + assert hasattr(result, "sources") + assert isinstance(result.sources, list) \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_generate_llms_text.py b/apps/python-sdk/tests/e2e/test_generate_llms_text.py index fe9928e1..ff245026 100644 --- a/apps/python-sdk/tests/e2e/test_generate_llms_text.py +++ b/apps/python-sdk/tests/e2e/test_generate_llms_text.py @@ -1,7 +1,10 @@ import os +import sys import pytest from dotenv import load_dotenv -from firecrawl import FirecrawlApp + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) +from firecrawl.firecrawl import FirecrawlApp load_dotenv() @@ -10,6 +13,29 @@ API_KEY = os.getenv("TEST_API_KEY") app = FirecrawlApp(api_url=API_URL, api_key=API_KEY) -def test_generate_llms_text_placeholder(): - # TODO: Implement E2E tests for generate_llms_text - assert True \ No newline at end of file +def test_generate_llms_text_simple(): + result = app.generate_llms_text("https://example.com") + assert hasattr(result, "status") + assert result.status == "completed" + assert hasattr(result, "data") + assert result.data is not None + assert hasattr(result.data, "llmstxt") + assert isinstance(result.data.llmstxt, str) + assert len(result.data.llmstxt) > 0 + +def test_generate_llms_text_all_params(): + result = app.generate_llms_text( + "https://www.iana.org", + max_urls=5, + show_full_text=True, + experimental_stream=True + ) + assert hasattr(result, "status") + assert result.status == "completed" + assert hasattr(result, "data") + assert result.data is not None + assert hasattr(result.data, "llmstxt") + assert isinstance(result.data.llmstxt, str) + assert len(result.data.llmstxt) > 0 + assert hasattr(result.data, "llmsfulltxt") + assert result.data.llmsfulltxt is None or isinstance(result.data.llmsfulltxt, str) \ No newline at end of file diff --git a/apps/python-sdk/tests/e2e/test_scrape.py b/apps/python-sdk/tests/e2e/test_scrape.py index f20a80f7..8ab64f5d 100644 --- a/apps/python-sdk/tests/e2e/test_scrape.py +++ b/apps/python-sdk/tests/e2e/test_scrape.py @@ -80,11 +80,11 @@ def test_scrape_url_all_params_with_extract_full_page_screenshot(): WriteAction(type="write", text="test input"), PressAction(type="press", key="Enter"), ScrollAction(type="scroll", direction="down"), - ExecuteJavascriptAction(type="executeJavascript", script="function getTitle() { return document.title; }; getTitle();") + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") ] response = app.scrape_url( url=TEST_URL, - formats=["markdown", "html", "rawHtml", "links", "screenshot@fullPage", "extract", "changeTracking"], + formats=["markdown", "html", "raw_html", "links", "screenshot@full_page", "extract", "change_tracking"], include_tags=["h1", "p"], exclude_tags=["footer"], only_main_content=True, @@ -104,12 +104,12 @@ def test_scrape_url_all_params_with_extract_full_page_screenshot(): assert response.success assert hasattr(response, "markdown") assert hasattr(response, "html") - assert hasattr(response, "rawHtml") + assert hasattr(response, "raw_html") assert hasattr(response, "links") assert hasattr(response, "screenshot") assert hasattr(response, "extract") assert hasattr(response, "metadata") - assert hasattr(response, "changeTracking") + assert hasattr(response, "change_tracking") def test_scrape_url_all_params_with_json_options(): location = LocationConfig(country="us", languages=["en"]) @@ -122,11 +122,11 @@ def test_scrape_url_all_params_with_json_options(): WriteAction(type="write", text="test input"), PressAction(type="press", key="Enter"), ScrollAction(type="scroll", direction="down"), - ExecuteJavascriptAction(type="executeJavascript", script="function getTitle() { return document.title; }; getTitle();") + ExecuteJavascriptAction(type="execute_javascript", script="function get_title() { return document.title; }; get_title();") ] response = app.scrape_url( url=TEST_URL, - formats=["markdown", "html", "rawHtml", "links", "screenshot", "json", "changeTracking"], + formats=["markdown", "html", "raw_html", "links", "screenshot", "json", "change_tracking"], include_tags=["h1", "p"], exclude_tags=["footer"], only_main_content=True, @@ -146,9 +146,9 @@ def test_scrape_url_all_params_with_json_options(): assert response.success assert hasattr(response, "markdown") assert hasattr(response, "html") - assert hasattr(response, "rawHtml") + assert hasattr(response, "raw_html") assert hasattr(response, "links") assert hasattr(response, "screenshot") assert hasattr(response, "json") assert hasattr(response, "metadata") - assert hasattr(response, "changeTracking") \ No newline at end of file + assert hasattr(response, "change_tracking") \ No newline at end of file