Rust SDK 1.0.0

2025-08-11 16:08:59 +08:00 · 2024-09-20 19:36:07 +02:00 · 2024-09-20 19:36:07 +02:00 · a078cdbd9d
commit a078cdbd9d
parent 93a20442e3
8 changed files with 242 additions and 195 deletions
--- a/apps/rust-sdk/examples/example.rs
+++ b/apps/rust-sdk/examples/example.rs
@ -1,40 +1,38 @@
-use firecrawl::FirecrawlApp;
+use firecrawl::{crawl::CrawlOptions, scrape::{ExtractOptions, ScrapeFormats, ScrapeOptions}, FirecrawlApp};
 use serde_json::json;
 use uuid::Uuid;

 #[tokio::main]
 async fn main() {
    // Initialize the FirecrawlApp with the API key
-    let api_key = Some("fc-YOUR_API_KEY".to_string());
-    let api_url = Some("http://0.0.0.0:3002".to_string());
-    let app = FirecrawlApp::new(api_key, api_url).expect("Failed to initialize FirecrawlApp");
+    let app = FirecrawlApp::new("fc-YOUR-API-KEY").expect("Failed to initialize FirecrawlApp");
+
+    // or, connect to a self-hosted instance:
+    // let app = FirecrawlApp::new_selfhosted("http://localhost:3002", None).expect("Failed to initialize FirecrawlApp");

    // Scrape a website
    let scrape_result = app.scrape_url("https://firecrawl.dev", None).await;
    match scrape_result {
-        Ok(data) => println!("Scrape Result:\n{}", data["markdown"]),
-        Err(e) => eprintln!("Scrape failed: {}", e),
+        Ok(data) => println!("Scrape Result:\n{}", data.markdown.unwrap()),
+        Err(e) => eprintln!("Scrape failed: {:#?}", e),
    }

    // Crawl a website
-    let random_uuid = String::from(Uuid::new_v4());
-    let idempotency_key = Some(random_uuid); // optional idempotency key
-    let crawl_params = json!({
-        "crawlerOptions": {
-            "excludes": ["blog/*"]
-        }
-    });
+    let idempotency_key = String::from(Uuid::new_v4());
+    let crawl_options = CrawlOptions {
+        exclude_paths: Some(vec![ "blog/*".to_string() ]),
+        poll_interval: Some(2000),
+        idempotency_key: Some(idempotency_key),
+        ..Default::default()
+    };
    let crawl_result = app
        .crawl_url(
            "https://mendable.ai",
-            Some(crawl_params),
-            true,
-            2,
-            idempotency_key,
+            crawl_options,
        )
        .await;
    match crawl_result {
-        Ok(data) => println!("Crawl Result:\n{}", data),
+        Ok(data) => println!("Crawl Result (used {} credits):\n{:#?}", data.credits_used, data.data),
        Err(e) => eprintln!("Crawl failed: {}", e),
    }

@ -62,21 +60,20 @@ async fn main() {
        "required": ["top"]
    });

-    let llm_extraction_params = json!({
-        "extractorOptions": {
-            "extractionSchema": json_schema,
-            "mode": "llm-extraction"
-        },
-        "pageOptions": {
-            "onlyMainContent": true
-        }
-    });
+    let llm_extraction_options = ScrapeOptions {
+        formats: Some(vec![ ScrapeFormats::Extract ]),
+        extract: Some(ExtractOptions {
+            schema: Some(json_schema),
+            ..Default::default()
+        }),
+        ..Default::default()
+    };

    let llm_extraction_result = app
-        .scrape_url("https://news.ycombinator.com", Some(llm_extraction_params))
+        .scrape_url("https://news.ycombinator.com", llm_extraction_options)
        .await;
    match llm_extraction_result {
-        Ok(data) => println!("LLM Extraction Result:\n{}", data["llm_extraction"]),
+        Ok(data) => println!("LLM Extraction Result:\n{:#?}", data.extract.unwrap()),
        Err(e) => eprintln!("LLM Extraction failed: {}", e),
    }
 }
--- a/apps/rust-sdk/src/crawl.rs
+++ b/apps/rust-sdk/src/crawl.rs
@ -48,8 +48,8 @@ impl From<CrawlScrapeFormats> for ScrapeFormats {
    }
 }

-#[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde_with::skip_serializing_none]
+#[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct CrawlScrapeOptions {
    /// Formats to extract from the page. (default: `[ Markdown ]`)
@ -93,8 +93,8 @@ impl From<CrawlScrapeOptions> for ScrapeOptions {
    }
 }

-#[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde_with::skip_serializing_none]
+#[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct CrawlOptions {
    /// Options to pass through to the scraper.
@ -103,12 +103,12 @@ pub struct CrawlOptions {
    /// URL RegEx patterns to (exclusively) include.
    /// 
    /// For example, if you specified `"blog"`, only pages that have `blog` somewhere in the URL would be crawled.
-    pub include_paths: Option<String>,
+    pub include_paths: Option<Vec<String>>,

    /// URL RegEx patterns to exclude.
    /// 
    /// For example, if you specified `"blog"`, pages that have `blog` somewhere in the URL would not be crawled.
-    pub exclude_paths: Option<String>,
+    pub exclude_paths: Option<Vec<String>>,

    /// Maximum URL depth to crawl, relative to the base URL. (default: `2`)
    pub max_depth: Option<u32>,
@ -138,7 +138,6 @@ pub struct CrawlOptions {
 }

 #[derive(Deserialize, Serialize, Debug, Default)]
-#[serde_with::skip_serializing_none]
 #[serde(rename_all = "camelCase")]
 struct CrawlRequestBody {
    url: String,
@ -148,7 +147,6 @@ struct CrawlRequestBody {
 }

 #[derive(Deserialize, Serialize, Debug, Default)]
-#[serde_with::skip_serializing_none]
 #[serde(rename_all = "camelCase")]
 struct CrawlResponse {
    /// This will always be `true` due to `FirecrawlApp::handle_response`.
@ -175,8 +173,8 @@ pub enum CrawlStatusTypes {
    Cancelled,
 }

-#[derive(Deserialize, Serialize, Debug, Clone)]
 #[serde_with::skip_serializing_none]
+#[derive(Deserialize, Serialize, Debug, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct CrawlStatus {
    /// The status of the crawl.
@ -203,7 +201,6 @@ pub struct CrawlStatus {
 }

 #[derive(Deserialize, Serialize, Debug, Clone)]
-#[serde_with::skip_serializing_none]
 #[serde(rename_all = "camelCase")]
 pub struct CrawlAsyncResponse {
    success: bool,
@ -216,6 +213,7 @@ pub struct CrawlAsyncResponse {
 }

 impl FirecrawlApp {
+    /// Initiates a crawl job for a URL using the Firecrawl API.
    pub async fn crawl_url_async(
        &self,
        url: impl AsRef<str>,
@ -235,61 +233,63 @@ impl FirecrawlApp {
            .json(&body)
            .send()
            .await
-            .map_err(|e| FirecrawlError::HttpRequestFailed(e.to_string()))?;
+            .map_err(|e| FirecrawlError::HttpError(format!("Crawling {:?}", url.as_ref()), e))?;

        self.handle_response::<CrawlAsyncResponse>(response, "start crawl job").await
    }

+    /// Performs a crawl job for a URL using the Firecrawl API, waiting for the end result. This may take a long time depending on the size of the target page and your options (namely `CrawlOptions.limit`).
    pub async fn crawl_url(
        &self,
        url: impl AsRef<str>,
-        options: Option<CrawlOptions>,
-    ) -> Result<Vec<Document>, FirecrawlError> {
+        options: impl Into<Option<CrawlOptions>>,
+    ) -> Result<CrawlStatus, FirecrawlError> {
+        let options = options.into();
        let poll_interval = options.as_ref().and_then(|x| x.poll_interval).unwrap_or(2000);
-
        let res = self.crawl_url_async(url, options).await?;

        self.monitor_job_status(&res.id, poll_interval).await
    }

-    pub async fn check_crawl_status(&self, id: &str) -> Result<CrawlStatus, FirecrawlError> {
+    /// Checks for the status of a crawl, based on the crawl's ID. To be used in conjunction with `FirecrawlApp::crawl_url_async`.
+    pub async fn check_crawl_status(&self, id: impl AsRef<str>) -> Result<CrawlStatus, FirecrawlError> {
        let response = self
            .client
            .get(&format!(
                "{}{}/crawl/{}",
-                self.api_url, API_VERSION, id
+                self.api_url, API_VERSION, id.as_ref()
            ))
            .headers(self.prepare_headers(None))
            .send()
            .await
-            .map_err(|e| FirecrawlError::HttpRequestFailed(e.to_string()))?;
+            .map_err(|e| FirecrawlError::HttpError(format!("Checking status of crawl {}", id.as_ref()), e))?;

-        self.handle_response(response, "check crawl status").await
+        self.handle_response(response, format!("Checking status of crawl {}", id.as_ref())).await
    }

    async fn monitor_job_status(
        &self,
        id: &str,
        poll_interval: u64,
-    ) -> Result<Vec<Document>, FirecrawlError> {
+    ) -> Result<CrawlStatus, FirecrawlError> {
        loop {
            let status_data = self.check_crawl_status(id).await?;
            match status_data.status {
                CrawlStatusTypes::Completed => {
-                    return Ok(status_data.data);
+                    return Ok(status_data);
                }
                CrawlStatusTypes::Scraping => {
-                    tokio::time::sleep(tokio::time::Duration::from_secs(poll_interval)).await;
+                    tokio::time::sleep(tokio::time::Duration::from_millis(poll_interval)).await;
                }
                CrawlStatusTypes::Failed => {
                    return Err(FirecrawlError::CrawlJobFailed(format!(
                        "Crawl job failed."
-                    )));
+                    ), status_data));
                }
                CrawlStatusTypes::Cancelled => {
                    return Err(FirecrawlError::CrawlJobFailed(format!(
                        "Crawl job was cancelled."
-                    )));
+                    ), status_data));
                }
            }
        }
--- a/apps/rust-sdk/src/document.rs
+++ b/apps/rust-sdk/src/document.rs
@ -1,8 +1,8 @@
 use serde::{Deserialize, Serialize};
 use serde_json::Value;

-#[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde_with::skip_serializing_none]
+#[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct DocumentMetadata {
    // firecrawl specific
@ -12,8 +12,8 @@ pub struct DocumentMetadata {
    pub error: Option<String>,

    // basic meta tags
-    pub title: String,
-    pub description: String,
+    pub title: Option<String>,
+    pub description: Option<String>,
    pub language: Option<String>,
    pub keywords: Option<String>,
    pub robots: Option<String>,
@ -26,7 +26,7 @@ pub struct DocumentMetadata {
    pub og_audio: Option<String>,
    pub og_determiner: Option<String>,
    pub og_locale: Option<String>,
-    pub og_locale_alternate: Option<String>,
+    pub og_locale_alternate: Option<Vec<String>>,
    pub og_site_name: Option<String>,
    pub og_video: Option<String>,

@ -49,8 +49,8 @@ pub struct DocumentMetadata {
    pub dcterms_created: Option<String>,
 }

-#[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde_with::skip_serializing_none]
+#[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct Document {
    /// A list of the links on the page, present if `ScrapeFormats::Markdown` is present in `ScrapeOptions.formats`. (default)
--- a/apps/rust-sdk/src/error.rs
+++ b/apps/rust-sdk/src/error.rs
@ -1,7 +1,11 @@
+use std::fmt::Display;
+
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use thiserror::Error;

+use crate::crawl::CrawlStatus;
+
 #[derive(Debug, Deserialize, Serialize, Clone)]
 pub struct FirecrawlAPIError {
    /// Always false.
@ -14,16 +18,28 @@ pub struct FirecrawlAPIError {
    pub details: Option<Value>,
 }

+impl Display for FirecrawlAPIError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if let Some(details) = self.details.as_ref() {
+            write!(f, "{} ({})", self.error, details)
+        } else {
+            write!(f, "{}", self.error)
+        }
+    }
+}
+
 #[derive(Error, Debug)]
 pub enum FirecrawlError {
-    #[error("HTTP request failed: {0}")]
-    HttpRequestFailed(String),
-    #[error("API key not provided")]
-    APIKeyNotProvided,
+    #[error("{0} failed: HTTP error {1}: {2}")]
+    HttpRequestFailed(String, u16, String),
+    #[error("{0} failed: HTTP error: {1}")]
+    HttpError(String, reqwest::Error),
+    #[error("Failed to parse response as text: {0}")]
+    ResponseParseErrorText(reqwest::Error),
    #[error("Failed to parse response: {0}")]
-    ResponseParseError(String),
-    #[error("API error")]
-    APIError(FirecrawlAPIError),
-    #[error("Crawl job failed or stopped: {0}")]
-    CrawlJobFailed(String),
+    ResponseParseError(serde_json::Error),
+    #[error("{0} failed: {1}")]
+    APIError(String, FirecrawlAPIError),
+    #[error("Crawl job failed: {0}")]
+    CrawlJobFailed(String, CrawlStatus),
 }
--- a/apps/rust-sdk/src/lib.rs
+++ b/apps/rust-sdk/src/lib.rs
@ -1,18 +1,18 @@
 use reqwest::{Client, Response};
 use serde::de::DeserializeOwned;
-use serde_json::json;
 use serde_json::Value;

 pub mod crawl;
 pub mod document;
 mod error;
+pub mod map;
 pub mod scrape;

 pub use error::FirecrawlError;

 #[derive(Clone, Debug)]
 pub struct FirecrawlApp {
-    api_key: String,
+    api_key: Option<String>,
    api_url: String,
    client: Client,
 }
@ -20,15 +20,14 @@ pub struct FirecrawlApp {
 pub(crate) const API_VERSION: &str = "/v1";

 impl FirecrawlApp {
-    pub fn new(api_key: Option<String>, api_url: Option<String>) -> Result<Self, FirecrawlError> {
-        let api_key = api_key
-            .ok_or(FirecrawlError::APIKeyNotProvided)?;
-        let api_url = api_url
-            .unwrap_or_else(|| "https://api.firecrawl.dev".to_string());
+    pub fn new(api_key: impl AsRef<str>) -> Result<Self, FirecrawlError> {
+        FirecrawlApp::new_selfhosted("https://api.firecrawl.dev", Some(api_key))
+    }

+    pub fn new_selfhosted(api_url: impl AsRef<str>, api_key: Option<impl AsRef<str>>) -> Result<Self, FirecrawlError> {
        Ok(FirecrawlApp {
-            api_key,
-            api_url,
+            api_key: api_key.map(|x| x.as_ref().to_string()),
+            api_url: api_url.as_ref().to_string(),
            client: Client::new(),
        })
    }
@ -36,10 +35,12 @@ impl FirecrawlApp {
    fn prepare_headers(&self, idempotency_key: Option<&String>) -> reqwest::header::HeaderMap {
        let mut headers = reqwest::header::HeaderMap::new();
        headers.insert("Content-Type", "application/json".parse().unwrap());
-        headers.insert(
-            "Authorization",
-            format!("Bearer {}", self.api_key).parse().unwrap(),
-        );
+        if let Some(api_key) = self.api_key.as_ref() {
+            headers.insert(
+                "Authorization",
+                format!("Bearer {}", api_key).parse().unwrap(),
+            );
+        }
        if let Some(key) = idempotency_key {
            headers.insert("x-idempotency-key", key.parse().unwrap());
        }
@ -51,48 +52,34 @@ impl FirecrawlApp {
        response: Response,
        action: impl AsRef<str>,
    ) -> Result<T, FirecrawlError> {
-        if response.status().is_success() {
-            let response_json: Value = response
-                .json()
-                .await
-                .map_err(|e| FirecrawlError::ResponseParseError(e.to_string()))?;
-            if response_json["success"].as_bool().unwrap_or(false) {
-                Ok(serde_json::from_value(response_json).map_err(|e| FirecrawlError::ResponseParseError(e.to_string()))?)
-            } else {
-                Err(FirecrawlError::HttpRequestFailed(format!(
-                    "Failed to {}: {}",
-                    action.as_ref(), response_json["error"]
-                )))
-            }
-        } else {
-            let status_code = response.status().as_u16();
-            let error_message = response
-                .json::<Value>()
-                .await
-                .unwrap_or_else(|_| json!({"error": "No additional error details provided."}));
-            let message = match status_code {
-                402 => format!(
-                    "Payment Required: Failed to {}. {}",
-                    action.as_ref(), error_message["error"]
-                ),
-                408 => format!(
-                    "Request Timeout: Failed to {} as the request timed out. {}",
-                    action.as_ref(), error_message["error"]
-                ),
-                409 => format!(
-                    "Conflict: Failed to {} due to a conflict. {}",
-                    action.as_ref(), error_message["error"]
-                ),
-                500 => format!(
-                    "Internal Server Error: Failed to {}. {}",
-                    action.as_ref(), error_message["error"]
-                ),
-                _ => format!(
-                    "Unexpected error during {}: Status code {}. {}",
-                    action.as_ref(), status_code, error_message["error"]
-                ),
-            };
-            Err(FirecrawlError::HttpRequestFailed(message))
+        let (is_success, status) = (response.status().is_success(), response.status());
+
+        let response = response
+            .text()
+            .await
+            .map_err(|e| FirecrawlError::ResponseParseErrorText(e))
+            .and_then(|response_json| serde_json::from_str::<Value>(&response_json).map_err(|e| FirecrawlError::ResponseParseError(e)))
+            .and_then(|response_value| {
+                if response_value["success"].as_bool().unwrap_or(false) {
+                    Ok(serde_json::from_value::<T>(response_value).map_err(|e| FirecrawlError::ResponseParseError(e))?)
+                } else {
+                    Err(FirecrawlError::APIError(
+                        action.as_ref().to_string(),
+                        serde_json::from_value(response_value).map_err(|e| FirecrawlError::ResponseParseError(e))?
+                    ))
+                }
+            });
+
+        match &response {
+            Ok(_) => response,
+            Err(FirecrawlError::ResponseParseError(_)) | Err(FirecrawlError::ResponseParseErrorText(_)) => {
+                if is_success {
+                    response
+                } else {
+                    Err(FirecrawlError::HttpRequestFailed(action.as_ref().to_string(), status.as_u16(), status.as_str().to_string()))
+                }
+            },
+            Err(_) => response,
        }
    }
 }
--- a/apps/rust-sdk/src/map.rs
+++ b/apps/rust-sdk/src/map.rs
@ -0,0 +1,66 @@
+use serde::{Deserialize, Serialize};
+
+use crate::{FirecrawlApp, FirecrawlError, API_VERSION};
+
+#[serde_with::skip_serializing_none]
+#[derive(Deserialize, Serialize, Debug, Default)]
+#[serde(rename_all = "camelCase")]
+pub struct MapOptions {
+    /// Search query to use for mapping
+    pub search: Option<String>,
+
+    /// Ignore the website sitemap when crawling (default: `true`)
+    pub ignore_sitemap: Option<bool>,
+
+    /// Include subdomains of the website (default: `true`)
+    pub include_subdomains: Option<bool>,
+
+    /// Maximum number of links to return (default: `5000`)
+    pub exclude_tags: Option<u32>,
+}
+
+#[derive(Deserialize, Serialize, Debug, Default)]
+#[serde(rename_all = "camelCase")]
+struct MapRequestBody {
+    url: String,
+
+    #[serde(flatten)]
+    options: MapOptions,
+}
+
+#[derive(Deserialize, Serialize, Debug, Default)]
+#[serde(rename_all = "camelCase")]
+struct MapResponse {
+    success: bool,
+
+    links: Vec<String>,
+}
+
+impl FirecrawlApp {
+    /// Returns links from a URL using the Firecrawl API.
+    pub async fn map_url(
+        &self,
+        url: impl AsRef<str>,
+        options: impl Into<Option<MapOptions>>,
+    ) -> Result<Vec<String>, FirecrawlError> {
+        let body = MapRequestBody {
+            url: url.as_ref().to_string(),
+            options: options.into().unwrap_or_default(),
+        };
+
+        let headers = self.prepare_headers(None);
+
+        let response = self
+            .client
+            .post(&format!("{}{}/map", self.api_url, API_VERSION))
+            .headers(headers)
+            .json(&body)
+            .send()
+            .await
+            .map_err(|e| FirecrawlError::HttpError(format!("Mapping {:?}", url.as_ref()), e))?;
+
+        let response = self.handle_response::<MapResponse>(response, "scrape URL").await?;
+
+        Ok(response.links)
+    }
+}
--- a/apps/rust-sdk/src/scrape.rs
+++ b/apps/rust-sdk/src/scrape.rs
@ -42,21 +42,21 @@ pub enum ScrapeFormats {
    Extract,
 }

-#[derive(Deserialize, Serialize, Debug, Default)]
 #[serde_with::skip_serializing_none]
+#[derive(Deserialize, Serialize, Debug, Default)]
 #[serde(rename_all = "camelCase")]
 pub struct ExtractOptions {
    /// Schema the output should adhere to, provided in JSON Schema format.
    pub schema: Option<Value>,

-    pub system_prompt: Option<Value>,
+    pub system_prompt: Option<String>,

    /// Extraction prompt to send to the LLM agent along with the page content.
-    pub prompt: Option<Value>,
+    pub prompt: Option<String>,
 }

-#[derive(Deserialize, Serialize, Debug, Default)]
 #[serde_with::skip_serializing_none]
+#[derive(Deserialize, Serialize, Debug, Default)]
 #[serde(rename_all = "camelCase")]
 pub struct ScrapeOptions {
    /// Formats to extract from the page. (default: `[ Markdown ]`)
@ -89,7 +89,6 @@ pub struct ScrapeOptions {
 }

 #[derive(Deserialize, Serialize, Debug, Default)]
-#[serde_with::skip_serializing_none]
 #[serde(rename_all = "camelCase")]
 struct ScrapeRequestBody {
    url: String,
@ -99,7 +98,6 @@ struct ScrapeRequestBody {
 }

 #[derive(Deserialize, Serialize, Debug, Default)]
-#[serde_with::skip_serializing_none]
 #[serde(rename_all = "camelCase")]
 struct ScrapeResponse {
    /// This will always be `true` due to `FirecrawlApp::handle_response`.
@ -111,14 +109,15 @@ struct ScrapeResponse {
 }

 impl FirecrawlApp {
+    /// Scrapes a URL using the Firecrawl API.
    pub async fn scrape_url(
        &self,
        url: impl AsRef<str>,
-        options: Option<ScrapeOptions>,
+        options: impl Into<Option<ScrapeOptions>>,
    ) -> Result<Document, FirecrawlError> {
        let body = ScrapeRequestBody {
            url: url.as_ref().to_string(),
-            options: options.unwrap_or_default(),
+            options: options.into().unwrap_or_default(),
        };

        let headers = self.prepare_headers(None);
@ -130,7 +129,7 @@ impl FirecrawlApp {
            .json(&body)
            .send()
            .await
-            .map_err(|e| FirecrawlError::HttpRequestFailed(e.to_string()))?;
+            .map_err(|e| FirecrawlError::HttpError(format!("Scraping {:?}", url.as_ref()), e))?;

        let response = self.handle_response::<ScrapeResponse>(response, "scrape URL").await?;

--- a/apps/rust-sdk/tests/e2e_with_auth.rs
+++ b/apps/rust-sdk/tests/e2e_with_auth.rs
@ -1,24 +1,16 @@
 use assert_matches::assert_matches;
 use dotenvy::dotenv;
+use firecrawl::scrape::{ExtractOptions, ScrapeFormats, ScrapeOptions};
 use firecrawl::FirecrawlApp;
 use serde_json::json;
 use std::env;
-use std::time::Duration;
-use tokio::time::sleep;
-
-#[tokio::test]
-async fn test_no_api_key() {
-    dotenv().ok();
-    let api_url = env::var("API_URL").expect("API_URL environment variable is not set");
-    assert_matches!(FirecrawlApp::new(None, Some(api_url)), Err(e) if e.to_string() == "API key not provided");
-}

 #[tokio::test]
 async fn test_blocklisted_url() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
    let blocklisted_url = "https://facebook.com/fake-test";
    let result = app.scrape_url(blocklisted_url, None).await;

@ -32,74 +24,65 @@ async fn test_blocklisted_url() {
 async fn test_successful_response_with_valid_preview_token() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let app = FirecrawlApp::new(
-        Some("this_is_just_a_preview_token".to_string()),
-        Some(api_url),
+    let app = FirecrawlApp::new_selfhosted(
+        api_url,
+        Some("this_is_just_a_preview_token"),
    )
    .unwrap();
    let result = app
        .scrape_url("https://roastmywebsite.ai", None)
        .await
        .unwrap();
-    assert!(result.as_object().unwrap().contains_key("content"));
-    assert!(result["content"].as_str().unwrap().contains("_Roast_"));
+    assert!(result.markdown.is_some());
+    assert!(result.markdown.unwrap().contains("_Roast_"));
 }

 #[tokio::test]
 async fn test_scrape_url_e2e() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
    let result = app
        .scrape_url("https://roastmywebsite.ai", None)
        .await
        .unwrap();
-    assert!(result.as_object().unwrap().contains_key("content"));
-    assert!(result.as_object().unwrap().contains_key("markdown"));
-    assert!(result.as_object().unwrap().contains_key("metadata"));
-    assert!(!result.as_object().unwrap().contains_key("html"));
-    assert!(result["content"].as_str().unwrap().contains("_Roast_"));
+    assert!(result.markdown.is_some());
+    assert!(result.markdown.unwrap().contains("_Roast_"));
 }

 #[tokio::test]
 async fn test_successful_response_with_valid_api_key_and_include_html() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
-    let params = json!({
-        "pageOptions": {
-            "includeHtml": true
-        }
-    });
+    let api_key = env::var("TEST_API_KEY").ok();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
+    let params = ScrapeOptions {
+        formats: vec! [ ScrapeFormats::Markdown, ScrapeFormats::HTML ].into(),
+        ..Default::default()
+    };
    let result = app
-        .scrape_url("https://roastmywebsite.ai", Some(params))
+        .scrape_url("https://roastmywebsite.ai", params)
        .await
        .unwrap();
-    assert!(result.as_object().unwrap().contains_key("content"));
-    assert!(result.as_object().unwrap().contains_key("markdown"));
-    assert!(result.as_object().unwrap().contains_key("html"));
-    assert!(result.as_object().unwrap().contains_key("metadata"));
-    assert!(result["content"].as_str().unwrap().contains("_Roast_"));
-    assert!(result["markdown"].as_str().unwrap().contains("_Roast_"));
-    assert!(result["html"].as_str().unwrap().contains("<h1"));
+    assert!(result.markdown.is_some());
+    assert!(result.html.is_some());
+    assert!(result.markdown.unwrap().contains("_Roast_"));
+    assert!(result.html.unwrap().contains("<h1"));
 }

 #[tokio::test]
 async fn test_successful_response_for_valid_scrape_with_pdf_file() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
    let result = app
        .scrape_url("https://arxiv.org/pdf/astro-ph/9301001.pdf", None)
        .await
        .unwrap();
-    assert!(result.as_object().unwrap().contains_key("content"));
-    assert!(result.as_object().unwrap().contains_key("metadata"));
-    assert!(result["content"]
-        .as_str()
+    assert!(result.markdown.is_some());
+    assert!(result.markdown
        .unwrap()
        .contains("We present spectrophotometric observations of the Broad Line Radio Galaxy"));
 }
@ -108,17 +91,14 @@ async fn test_successful_response_for_valid_scrape_with_pdf_file() {
 async fn test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
    let result = app
        .scrape_url("https://arxiv.org/pdf/astro-ph/9301001", None)
        .await
        .unwrap();
-    sleep(Duration::from_secs(6)).await; // wait for 6 seconds
-    assert!(result.as_object().unwrap().contains_key("content"));
-    assert!(result.as_object().unwrap().contains_key("metadata"));
-    assert!(result["content"]
-        .as_str()
+    assert!(result.markdown.is_some());
+    assert!(result.markdown
        .unwrap()
        .contains("We present spectrophotometric observations of the Broad Line Radio Galaxy"));
 }
@ -127,10 +107,10 @@ async fn test_successful_response_for_valid_scrape_with_pdf_file_without_explici
 async fn test_should_return_error_for_blocklisted_url() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
    let blocklisted_url = "https://twitter.com/fake-test";
-    let result = app.crawl_url(blocklisted_url, None, true, 1, None).await;
+    let result = app.crawl_url(blocklisted_url, None).await;

    assert_matches!(
        result,
@ -142,13 +122,13 @@ async fn test_should_return_error_for_blocklisted_url() {
 async fn test_llm_extraction() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
-    let params = json!({
-        "extractorOptions": {
-            "mode": "llm-extraction",
-            "extractionPrompt": "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
-            "extractionSchema": {
+    let api_key = env::var("TEST_API_KEY").ok();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
+    let options = ScrapeOptions {
+        formats: vec! [ ScrapeFormats::Extract ].into(),
+        extract: ExtractOptions {
+            prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source".to_string().into(),
+            schema: json!({
                "type": "object",
                "properties": {
                    "company_mission": {"type": "string"},
@ -156,15 +136,17 @@ async fn test_llm_extraction() {
                    "is_open_source": {"type": "boolean"}
                },
                "required": ["company_mission", "supports_sso", "is_open_source"]
-            }
-        }
-    });
+            }).into(),
+            ..Default::default()
+        }.into(),
+        ..Default::default()
+    };
    let result = app
-        .scrape_url("https://mendable.ai", Some(params))
+        .scrape_url("https://mendable.ai", options)
        .await
        .unwrap();
-    assert!(result.as_object().unwrap().contains_key("llm_extraction"));
-    let llm_extraction = &result["llm_extraction"];
+    assert!(result.extract.is_some());
+    let llm_extraction = &result.extract.unwrap();
    assert!(llm_extraction
        .as_object()
        .unwrap()