python sdk and tests

This commit is contained in:
rafaelsideguide 2024-08-08 14:25:09 -03:00
parent 6cdf4c68ec
commit 0b8df5e264
7 changed files with 317 additions and 36 deletions

View File

@ -0,0 +1,75 @@
import uuid
from firecrawl.firecrawl import FirecrawlApp
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
# Scrape a website:
scrape_result = app.scrape_url('firecrawl.dev')
print(scrape_result['markdown'])
# Crawl a website:
idempotency_key = str(uuid.uuid4()) # optional idempotency key
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, idempotency_key)
print(crawl_result)
# LLM Extraction:
# Define schema to extract contents into using pydantic
from pydantic import BaseModel, Field
from typing import List
class ArticleSchema(BaseModel):
title: str
points: int
by: str
commentsURL: str
class TopArticlesSchema(BaseModel):
top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories")
llm_extraction_result = app.scrape_url('https://news.ycombinator.com', {
'extractorOptions': {
'extractionSchema': TopArticlesSchema.model_json_schema(),
'mode': 'llm-extraction'
},
'pageOptions':{
'onlyMainContent': True
}
})
print(llm_extraction_result['llm_extraction'])
# Define schema to extract contents into using json schema
json_schema = {
"type": "object",
"properties": {
"top": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"points": {"type": "number"},
"by": {"type": "string"},
"commentsURL": {"type": "string"}
},
"required": ["title", "points", "by", "commentsURL"]
},
"minItems": 5,
"maxItems": 5,
"description": "Top 5 stories on Hacker News"
}
},
"required": ["top"]
}
llm_extraction_result = app.scrape_url('https://news.ycombinator.com', {
'extractorOptions': {
'extractionSchema': json_schema,
'mode': 'llm-extraction'
},
'pageOptions':{
'onlyMainContent': True
}
})
print(llm_extraction_result['llm_extraction'])

View File

@ -20,31 +20,31 @@ FirecrawlApp = firecrawl.FirecrawlApp
def test_no_api_key(): def test_no_api_key():
with pytest.raises(Exception) as excinfo: with pytest.raises(Exception) as excinfo:
invalid_app = FirecrawlApp(api_url=API_URL) invalid_app = FirecrawlApp(api_url=API_URL, version='v0')
assert "No API key provided" in str(excinfo.value) assert "No API key provided" in str(excinfo.value)
def test_scrape_url_invalid_api_key(): def test_scrape_url_invalid_api_key():
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0')
with pytest.raises(Exception) as excinfo: with pytest.raises(Exception) as excinfo:
invalid_app.scrape_url('https://firecrawl.dev') invalid_app.scrape_url('https://firecrawl.dev')
assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
def test_blocklisted_url(): def test_blocklisted_url():
blocklisted_url = "https://facebook.com/fake-test" blocklisted_url = "https://facebook.com/fake-test"
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
with pytest.raises(Exception) as excinfo: with pytest.raises(Exception) as excinfo:
app.scrape_url(blocklisted_url) app.scrape_url(blocklisted_url)
assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value) assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
def test_successful_response_with_valid_preview_token(): def test_successful_response_with_valid_preview_token():
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token") app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token", version='v0')
response = app.scrape_url('https://roastmywebsite.ai') response = app.scrape_url('https://roastmywebsite.ai')
assert response is not None assert response is not None
assert 'content' in response assert 'content' in response
assert "_Roast_" in response['content'] assert "_Roast_" in response['content']
def test_scrape_url_e2e(): def test_scrape_url_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
response = app.scrape_url('https://roastmywebsite.ai') response = app.scrape_url('https://roastmywebsite.ai')
assert response is not None assert response is not None
assert 'content' in response assert 'content' in response
@ -54,7 +54,7 @@ def test_scrape_url_e2e():
assert "_Roast_" in response['content'] assert "_Roast_" in response['content']
def test_successful_response_with_valid_api_key_and_include_html(): def test_successful_response_with_valid_api_key_and_include_html():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}}) response = app.scrape_url('https://roastmywebsite.ai', {'pageOptions': {'includeHtml': True}})
assert response is not None assert response is not None
assert 'content' in response assert 'content' in response
@ -66,7 +66,7 @@ def test_successful_response_with_valid_api_key_and_include_html():
assert "<h1" in response['html'] assert "<h1" in response['html']
def test_successful_response_for_valid_scrape_with_pdf_file(): def test_successful_response_for_valid_scrape_with_pdf_file():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf') response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf')
assert response is not None assert response is not None
assert 'content' in response assert 'content' in response
@ -74,7 +74,7 @@ def test_successful_response_for_valid_scrape_with_pdf_file():
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content'] assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension(): def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001') response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001')
time.sleep(6) # wait for 6 seconds time.sleep(6) # wait for 6 seconds
assert response is not None assert response is not None
@ -83,20 +83,20 @@ def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_ext
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content'] assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
def test_crawl_url_invalid_api_key(): def test_crawl_url_invalid_api_key():
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0')
with pytest.raises(Exception) as excinfo: with pytest.raises(Exception) as excinfo:
invalid_app.crawl_url('https://firecrawl.dev') invalid_app.crawl_url('https://firecrawl.dev')
assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
def test_should_return_error_for_blocklisted_url(): def test_should_return_error_for_blocklisted_url():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
blocklisted_url = "https://twitter.com/fake-test" blocklisted_url = "https://twitter.com/fake-test"
with pytest.raises(Exception) as excinfo: with pytest.raises(Exception) as excinfo:
app.crawl_url(blocklisted_url) app.crawl_url(blocklisted_url)
assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value) assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
def test_crawl_url_wait_for_completion_e2e(): def test_crawl_url_wait_for_completion_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True) response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
assert response is not None assert response is not None
assert len(response) > 0 assert len(response) > 0
@ -104,7 +104,7 @@ def test_crawl_url_wait_for_completion_e2e():
assert "_Roast_" in response[0]['content'] assert "_Roast_" in response[0]['content']
def test_crawl_url_with_idempotency_key_e2e(): def test_crawl_url_with_idempotency_key_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
uniqueIdempotencyKey = str(uuid4()) uniqueIdempotencyKey = str(uuid4())
response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
assert response is not None assert response is not None
@ -117,7 +117,7 @@ def test_crawl_url_with_idempotency_key_e2e():
assert "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used" in str(excinfo.value) assert "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used" in str(excinfo.value)
def test_check_crawl_status_e2e(): def test_check_crawl_status_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False) response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False)
assert response is not None assert response is not None
assert 'jobId' in response assert 'jobId' in response
@ -131,20 +131,20 @@ def test_check_crawl_status_e2e():
assert len(status_response['data']) > 0 assert len(status_response['data']) > 0
def test_search_e2e(): def test_search_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
response = app.search("test query") response = app.search("test query")
assert response is not None assert response is not None
assert 'content' in response[0] assert 'content' in response[0]
assert len(response) > 2 assert len(response) > 2
def test_search_invalid_api_key(): def test_search_invalid_api_key():
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key", version='v0')
with pytest.raises(Exception) as excinfo: with pytest.raises(Exception) as excinfo:
invalid_app.search("test query") invalid_app.search("test query")
assert "Unexpected error during search: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) assert "Unexpected error during search: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
def test_llm_extraction(): def test_llm_extraction():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0')
response = app.scrape_url("https://mendable.ai", { response = app.scrape_url("https://mendable.ai", {
'extractorOptions': { 'extractorOptions': {
'mode': 'llm-extraction', 'mode': 'llm-extraction',

View File

@ -0,0 +1,3 @@
API_URL=http://localhost:3002
ABSOLUTE_FIRECRAWL_PATH=/Users/user/firecrawl/apps/python-sdk/firecrawl/firecrawl.py
TEST_API_KEY=fc-YOUR_API_KEY

View File

@ -0,0 +1,168 @@
import importlib.util
import pytest
import time
import os
from uuid import uuid4
from dotenv import load_dotenv
load_dotenv()
API_URL = "http://127.0.0.1:3002";
ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py"
TEST_API_KEY = os.getenv('TEST_API_KEY')
print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}")
spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH)
firecrawl = importlib.util.module_from_spec(spec)
spec.loader.exec_module(firecrawl)
FirecrawlApp = firecrawl.FirecrawlApp
def test_no_api_key():
with pytest.raises(Exception) as excinfo:
invalid_app = FirecrawlApp(api_url=API_URL)
assert "No API key provided" in str(excinfo.value)
def test_scrape_url_invalid_api_key():
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
with pytest.raises(Exception) as excinfo:
invalid_app.scrape_url('https://firecrawl.dev')
assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
def test_blocklisted_url():
blocklisted_url = "https://facebook.com/fake-test"
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
with pytest.raises(Exception) as excinfo:
app.scrape_url(blocklisted_url)
assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
def test_successful_response_with_valid_preview_token():
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
response = app.scrape_url('https://roastmywebsite.ai')
assert response is not None
assert 'content' in response
assert "_Roast_" in response['content']
def test_scrape_url_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url('https://roastmywebsite.ai')
assert response is not None
assert 'content' not in response
assert 'markdown' in response
assert 'metadata' in response
assert 'html' not in response
assert "_Roast_" in response['markdown']
def test_successful_response_with_valid_api_key_and_include_html():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url('https://roastmywebsite.ai', { 'formats': [ 'markdown', 'html' ]})
assert response is not None
assert 'content' not in response
assert 'markdown' in response
assert 'html' in response
assert 'metadata' in response
assert "_Roast_" in response['markdown']
assert "<h1" in response['html']
def test_successful_response_for_valid_scrape_with_pdf_file():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001.pdf')
assert response is not None
assert 'content' not in response
assert 'metadata' in response
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001')
time.sleep(6) # wait for 6 seconds
assert response is not None
assert 'content' not in response
assert 'metadata' in response
assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
def test_crawl_url_invalid_api_key():
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
with pytest.raises(Exception) as excinfo:
invalid_app.crawl_url('https://firecrawl.dev')
assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
def test_should_return_error_for_blocklisted_url():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
blocklisted_url = "https://twitter.com/fake-test"
with pytest.raises(Exception) as excinfo:
app.crawl_url(blocklisted_url)
assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
def test_crawl_url_wait_for_completion_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
assert response is not None
assert len(response) > 0
assert 'content' not in response[0]
assert 'markdown' in response[0]
assert "_Roast_" in response[0]['markdown']
def test_crawl_url_with_idempotency_key_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
uniqueIdempotencyKey = str(uuid4())
response = app.crawl_url('https://roastmywebsite.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
assert response is not None
assert len(response) > 0
assert 'content' in response[0]
assert "_Roast_" in response[0]['content']
with pytest.raises(Exception) as excinfo:
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
assert "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used" in str(excinfo.value)
def test_check_crawl_status_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False)
assert response is not None
assert 'jobId' in response
time.sleep(30) # wait for 30 seconds
status_response = app.check_crawl_status(response['jobId'])
assert status_response is not None
assert 'status' in status_response
assert status_response['status'] == 'completed'
assert 'data' in status_response
assert len(status_response['data']) > 0
def test_search_e2e():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
with pytest.raises(NotImplementedError) as excinfo:
app.search("test query")
assert "Search is not supported in v1" in str(excinfo.value)
def test_llm_extraction():
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url("https://mendable.ai", {
'extractorOptions': {
'mode': 'llm-extraction',
'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
'extractionSchema': {
'type': 'object',
'properties': {
'company_mission': {'type': 'string'},
'supports_sso': {'type': 'boolean'},
'is_open_source': {'type': 'boolean'}
},
'required': ['company_mission', 'supports_sso', 'is_open_source']
}
}
})
assert response is not None
assert 'llm_extraction' in response
llm_extraction = response['llm_extraction']
assert 'company_mission' in llm_extraction
assert isinstance(llm_extraction['supports_sso'], bool)
assert isinstance(llm_extraction['is_open_source'], bool)
def test_map_e2e():
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
response = app.map_url('https://roastmywebsite.ai')
assert response is not None
assert isinstance(response, list)

View File

@ -19,24 +19,22 @@ import requests
logger : logging.Logger = logging.getLogger("firecrawl") logger : logging.Logger = logging.getLogger("firecrawl")
class FirecrawlApp: class FirecrawlApp:
""" def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None, version: str = 'v1') -> None:
Initialize the FirecrawlApp instance. """
Initialize the FirecrawlApp instance with API key, API URL, and version.
Args: Args:
api_key (Optional[str]): API key for authenticating with the Firecrawl API. api_key (Optional[str]): API key for authenticating with the Firecrawl API.
api_url (Optional[str]): Base URL for the Firecrawl API. api_url (Optional[str]): Base URL for the Firecrawl API.
""" version (str): API version, either 'v0' or 'v1'.
def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None: """
self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY') self.api_key = api_key or os.getenv('FIRECRAWL_API_KEY')
if self.api_key is None: self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev')
logger.warning("No API key provided") self.version = version
raise ValueError('No API key provided') if self.api_key is None:
else: logger.warning("No API key provided")
logger.debug("Initialized FirecrawlApp with API key: %s", self.api_key) raise ValueError('No API key provided')
logger.debug(f"Initialized FirecrawlApp with API key: {self.api_key} and version: {self.version}")
self.api_url = api_url or os.getenv('FIRECRAWL_API_URL', 'https://api.firecrawl.dev')
if self.api_url != 'https://api.firecrawl.dev':
logger.debug("Initialized FirecrawlApp with API URL: %s", self.api_url)
def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
""" """
@ -75,9 +73,11 @@ class FirecrawlApp:
for key, value in params.items(): for key, value in params.items():
if key != 'extractorOptions': if key != 'extractorOptions':
scrape_params[key] = value scrape_params[key] = value
endpoint = f'/{self.version}/scrape'
# Make the POST request with the prepared headers and JSON data # Make the POST request with the prepared headers and JSON data
response = requests.post( response = requests.post(
f'{self.api_url}/v0/scrape', f'{self.api_url}{endpoint}',
headers=headers, headers=headers,
json=scrape_params, json=scrape_params,
) )
@ -104,6 +104,9 @@ class FirecrawlApp:
Raises: Raises:
Exception: If the search request fails. Exception: If the search request fails.
""" """
if self.version == 'v1':
raise NotImplementedError("Search is not supported in v1")
headers = self._prepare_headers() headers = self._prepare_headers()
json_data = {'query': query} json_data = {'query': query}
if params: if params:
@ -145,11 +148,12 @@ class FirecrawlApp:
Raises: Raises:
Exception: If the crawl job initiation or monitoring fails. Exception: If the crawl job initiation or monitoring fails.
""" """
endpoint = f'/{self.version}/crawl'
headers = self._prepare_headers(idempotency_key) headers = self._prepare_headers(idempotency_key)
json_data = {'url': url} json_data = {'url': url}
if params: if params:
json_data.update(params) json_data.update(params)
response = self._post_request(f'{self.api_url}/v0/crawl', json_data, headers) response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
if response.status_code == 200: if response.status_code == 200:
job_id = response.json().get('jobId') job_id = response.json().get('jobId')
if wait_until_done: if wait_until_done:
@ -172,13 +176,44 @@ class FirecrawlApp:
Raises: Raises:
Exception: If the status check request fails. Exception: If the status check request fails.
""" """
endpoint = f'/{self.version}/crawl/status/{job_id}'
headers = self._prepare_headers() headers = self._prepare_headers()
response = self._get_request(f'{self.api_url}/v0/crawl/status/{job_id}', headers) response = self._get_request(f'{self.api_url}{endpoint}', headers)
if response.status_code == 200: if response.status_code == 200:
return response.json() return response.json()
else: else:
self._handle_error(response, 'check crawl status') self._handle_error(response, 'check crawl status')
def map_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any:
"""
Perform a map search using the Firecrawl API.
"""
if self.version == 'v0':
raise NotImplementedError("Map is not supported in v0")
endpoint = f'/{self.version}/map'
headers = self._prepare_headers()
# Prepare the base scrape parameters with the URL
json_data = {'url': url}
if params:
json_data.update(params)
# Make the POST request with the prepared headers and JSON data
response = requests.post(
f'{self.api_url}{endpoint}',
headers=headers,
json=json_data,
)
if response.status_code == 200:
response = response.json()
if response['success'] and 'data' in response:
return response['data']
else:
raise Exception(f'Failed to map URL. Error: {response["error"]}')
else:
self._handle_error(response, 'map')
def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]: def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
""" """
Prepare the headers for API requests. Prepare the headers for API requests.