Rust SDK 1.0.0

2025-08-11 11:29:02 +08:00 · 2024-09-20 19:36:07 +02:00 · 2024-09-20 19:36:07 +02:00 · a078cdbd9d
commit a078cdbd9d
parent 93a20442e3
8 changed files with 242 additions and 195 deletions
--- a/apps/rust-sdk/examples/example.rs
+++ b/apps/rust-sdk/examples/example.rs
@ -1,40 +1,38 @@
-use firecrawl::FirecrawlApp;
+use firecrawl::{crawl::CrawlOptions, scrape::{ExtractOptions, ScrapeFormats, ScrapeOptions}, FirecrawlApp};
 use serde_json::json;
 use uuid::Uuid;
 #[tokio::main]
 async fn main() {
    // Initialize the FirecrawlApp with the API key
-    let api_key = Some("fc-YOUR_API_KEY".to_string());
+    let app = FirecrawlApp::new("fc-YOUR-API-KEY").expect("Failed to initialize FirecrawlApp");
-    let api_url = Some("http://0.0.0.0:3002".to_string());
+
-    let app = FirecrawlApp::new(api_key, api_url).expect("Failed to initialize FirecrawlApp");
+    // or, connect to a self-hosted instance:
    // let app = FirecrawlApp::new_selfhosted("http://localhost:3002", None).expect("Failed to initialize FirecrawlApp");
    // Scrape a website
    let scrape_result = app.scrape_url("https://firecrawl.dev", None).await;
    match scrape_result {
-        Ok(data) => println!("Scrape Result:\n{}", data["markdown"]),
+        Ok(data) => println!("Scrape Result:\n{}", data.markdown.unwrap()),
-        Err(e) => eprintln!("Scrape failed: {}", e),
+        Err(e) => eprintln!("Scrape failed: {:#?}", e),
    }
    // Crawl a website
-    let random_uuid = String::from(Uuid::new_v4());
+    let idempotency_key = String::from(Uuid::new_v4());
-    let idempotency_key = Some(random_uuid); // optional idempotency key
+    let crawl_options = CrawlOptions {
-    let crawl_params = json!({
+        exclude_paths: Some(vec![ "blog/*".to_string() ]),
-        "crawlerOptions": {
+        poll_interval: Some(2000),
-            "excludes": ["blog/*"]
+        idempotency_key: Some(idempotency_key),
-        }
+        ..Default::default()
-    });
+    };
    let crawl_result = app
        .crawl_url(
            "https://mendable.ai",
-            Some(crawl_params),
+            crawl_options,
            true,
            2,
            idempotency_key,
        )
        .await;
    match crawl_result {
-        Ok(data) => println!("Crawl Result:\n{}", data),
+        Ok(data) => println!("Crawl Result (used {} credits):\n{:#?}", data.credits_used, data.data),
        Err(e) => eprintln!("Crawl failed: {}", e),
    }
@ -62,21 +60,20 @@ async fn main() {
        "required": ["top"]
    });
-    let llm_extraction_params = json!({
+    let llm_extraction_options = ScrapeOptions {
-        "extractorOptions": {
+        formats: Some(vec![ ScrapeFormats::Extract ]),
-            "extractionSchema": json_schema,
+        extract: Some(ExtractOptions {
-            "mode": "llm-extraction"
+            schema: Some(json_schema),
-        },
+            ..Default::default()
-        "pageOptions": {
+        }),
-            "onlyMainContent": true
+        ..Default::default()
-        }
+    };
    });
    let llm_extraction_result = app
-        .scrape_url("https://news.ycombinator.com", Some(llm_extraction_params))
+        .scrape_url("https://news.ycombinator.com", llm_extraction_options)
        .await;
    match llm_extraction_result {
-        Ok(data) => println!("LLM Extraction Result:\n{}", data["llm_extraction"]),
+        Ok(data) => println!("LLM Extraction Result:\n{:#?}", data.extract.unwrap()),
        Err(e) => eprintln!("LLM Extraction failed: {}", e),
    }
 }
--- a/apps/rust-sdk/src/crawl.rs
+++ b/apps/rust-sdk/src/crawl.rs
@ -48,8 +48,8 @@ impl From<CrawlScrapeFormats> for ScrapeFormats {
    }
 }
 #[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde_with::skip_serializing_none]
 #[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct CrawlScrapeOptions {
    /// Formats to extract from the page. (default: `[ Markdown ]`)
@ -93,8 +93,8 @@ impl From<CrawlScrapeOptions> for ScrapeOptions {
    }
 }
 #[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde_with::skip_serializing_none]
 #[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct CrawlOptions {
    /// Options to pass through to the scraper.
@ -103,12 +103,12 @@ pub struct CrawlOptions {
    /// URL RegEx patterns to (exclusively) include.
    /// 
    /// For example, if you specified `"blog"`, only pages that have `blog` somewhere in the URL would be crawled.
-    pub include_paths: Option<String>,
+    pub include_paths: Option<Vec<String>>,
    /// URL RegEx patterns to exclude.
    /// 
    /// For example, if you specified `"blog"`, pages that have `blog` somewhere in the URL would not be crawled.
-    pub exclude_paths: Option<String>,
+    pub exclude_paths: Option<Vec<String>>,
    /// Maximum URL depth to crawl, relative to the base URL. (default: `2`)
    pub max_depth: Option<u32>,
@ -138,7 +138,6 @@ pub struct CrawlOptions {
 }
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde_with::skip_serializing_none]
 #[serde(rename_all = "camelCase")]
 struct CrawlRequestBody {
    url: String,
@ -148,7 +147,6 @@ struct CrawlRequestBody {
 }
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde_with::skip_serializing_none]
 #[serde(rename_all = "camelCase")]
 struct CrawlResponse {
    /// This will always be `true` due to `FirecrawlApp::handle_response`.
@ -175,8 +173,8 @@ pub enum CrawlStatusTypes {
    Cancelled,
 }
 #[derive(Deserialize, Serialize, Debug, Clone)]
 #[serde_with::skip_serializing_none]
 #[derive(Deserialize, Serialize, Debug, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct CrawlStatus {
    /// The status of the crawl.
@ -203,7 +201,6 @@ pub struct CrawlStatus {
 }
 #[derive(Deserialize, Serialize, Debug, Clone)]
 #[serde_with::skip_serializing_none]
 #[serde(rename_all = "camelCase")]
 pub struct CrawlAsyncResponse {
    success: bool,
@ -216,6 +213,7 @@ pub struct CrawlAsyncResponse {
 }
 impl FirecrawlApp {
    /// Initiates a crawl job for a URL using the Firecrawl API.
    pub async fn crawl_url_async(
        &self,
        url: impl AsRef<str>,
@ -235,61 +233,63 @@ impl FirecrawlApp {
            .json(&body)
            .send()
            .await
-            .map_err(|e| FirecrawlError::HttpRequestFailed(e.to_string()))?;
+            .map_err(|e| FirecrawlError::HttpError(format!("Crawling {:?}", url.as_ref()), e))?;
        self.handle_response::<CrawlAsyncResponse>(response, "start crawl job").await
    }
    /// Performs a crawl job for a URL using the Firecrawl API, waiting for the end result. This may take a long time depending on the size of the target page and your options (namely `CrawlOptions.limit`).
    pub async fn crawl_url(
        &self,
        url: impl AsRef<str>,
-        options: Option<CrawlOptions>,
+        options: impl Into<Option<CrawlOptions>>,
-    ) -> Result<Vec<Document>, FirecrawlError> {
+    ) -> Result<CrawlStatus, FirecrawlError> {
        let options = options.into();
        let poll_interval = options.as_ref().and_then(|x| x.poll_interval).unwrap_or(2000);
        let res = self.crawl_url_async(url, options).await?;
        self.monitor_job_status(&res.id, poll_interval).await
    }
-    pub async fn check_crawl_status(&self, id: &str) -> Result<CrawlStatus, FirecrawlError> {
+    /// Checks for the status of a crawl, based on the crawl's ID. To be used in conjunction with `FirecrawlApp::crawl_url_async`.
    pub async fn check_crawl_status(&self, id: impl AsRef<str>) -> Result<CrawlStatus, FirecrawlError> {
        let response = self
            .client
            .get(&format!(
                "{}{}/crawl/{}",
-                self.api_url, API_VERSION, id
+                self.api_url, API_VERSION, id.as_ref()
            ))
            .headers(self.prepare_headers(None))
            .send()
            .await
-            .map_err(|e| FirecrawlError::HttpRequestFailed(e.to_string()))?;
+            .map_err(|e| FirecrawlError::HttpError(format!("Checking status of crawl {}", id.as_ref()), e))?;
-        self.handle_response(response, "check crawl status").await
+        self.handle_response(response, format!("Checking status of crawl {}", id.as_ref())).await
    }
    async fn monitor_job_status(
        &self,
        id: &str,
        poll_interval: u64,
-    ) -> Result<Vec<Document>, FirecrawlError> {
+    ) -> Result<CrawlStatus, FirecrawlError> {
        loop {
            let status_data = self.check_crawl_status(id).await?;
            match status_data.status {
                CrawlStatusTypes::Completed => {
-                    return Ok(status_data.data);
+                    return Ok(status_data);
                }
                CrawlStatusTypes::Scraping => {
-                    tokio::time::sleep(tokio::time::Duration::from_secs(poll_interval)).await;
+                    tokio::time::sleep(tokio::time::Duration::from_millis(poll_interval)).await;
                }
                CrawlStatusTypes::Failed => {
                    return Err(FirecrawlError::CrawlJobFailed(format!(
                        "Crawl job failed."
-                    )));
+                    ), status_data));
                }
                CrawlStatusTypes::Cancelled => {
                    return Err(FirecrawlError::CrawlJobFailed(format!(
                        "Crawl job was cancelled."
-                    )));
+                    ), status_data));
                }
            }
        }
--- a/apps/rust-sdk/src/document.rs
+++ b/apps/rust-sdk/src/document.rs
@ -1,8 +1,8 @@
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 #[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde_with::skip_serializing_none]
 #[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct DocumentMetadata {
    // firecrawl specific
@ -12,8 +12,8 @@ pub struct DocumentMetadata {
    pub error: Option<String>,
    // basic meta tags
-    pub title: String,
+    pub title: Option<String>,
-    pub description: String,
+    pub description: Option<String>,
    pub language: Option<String>,
    pub keywords: Option<String>,
    pub robots: Option<String>,
@ -26,7 +26,7 @@ pub struct DocumentMetadata {
    pub og_audio: Option<String>,
    pub og_determiner: Option<String>,
    pub og_locale: Option<String>,
-    pub og_locale_alternate: Option<String>,
+    pub og_locale_alternate: Option<Vec<String>>,
    pub og_site_name: Option<String>,
    pub og_video: Option<String>,
@ -49,8 +49,8 @@ pub struct DocumentMetadata {
    pub dcterms_created: Option<String>,
 }
 #[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde_with::skip_serializing_none]
 #[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct Document {
    /// A list of the links on the page, present if `ScrapeFormats::Markdown` is present in `ScrapeOptions.formats`. (default)
--- a/apps/rust-sdk/src/error.rs
+++ b/apps/rust-sdk/src/error.rs
@ -1,7 +1,11 @@
 use std::fmt::Display;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use thiserror::Error;
 use crate::crawl::CrawlStatus;
 #[derive(Debug, Deserialize, Serialize, Clone)]
 pub struct FirecrawlAPIError {
    /// Always false.
@ -14,16 +18,28 @@ pub struct FirecrawlAPIError {
    pub details: Option<Value>,
 }
 impl Display for FirecrawlAPIError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if let Some(details) = self.details.as_ref() {
            write!(f, "{} ({})", self.error, details)
        } else {
            write!(f, "{}", self.error)
        }
    }
 }
 #[derive(Error, Debug)]
 pub enum FirecrawlError {
-    #[error("HTTP request failed: {0}")]
+    #[error("{0} failed: HTTP error {1}: {2}")]
-    HttpRequestFailed(String),
+    HttpRequestFailed(String, u16, String),
-    #[error("API key not provided")]
+    #[error("{0} failed: HTTP error: {1}")]
-    APIKeyNotProvided,
+    HttpError(String, reqwest::Error),
    #[error("Failed to parse response as text: {0}")]
    ResponseParseErrorText(reqwest::Error),
    #[error("Failed to parse response: {0}")]
-    ResponseParseError(String),
+    ResponseParseError(serde_json::Error),
-    #[error("API error")]
+    #[error("{0} failed: {1}")]
-    APIError(FirecrawlAPIError),
+    APIError(String, FirecrawlAPIError),
-    #[error("Crawl job failed or stopped: {0}")]
+    #[error("Crawl job failed: {0}")]
-    CrawlJobFailed(String),
+    CrawlJobFailed(String, CrawlStatus),
 }
--- a/apps/rust-sdk/src/lib.rs
+++ b/apps/rust-sdk/src/lib.rs
@ -1,18 +1,18 @@
 use reqwest::{Client, Response};
 use serde::de::DeserializeOwned;
 use serde_json::json;
 use serde_json::Value;
 pub mod crawl;
 pub mod document;
 mod error;
 pub mod map;
 pub mod scrape;
 pub use error::FirecrawlError;
 #[derive(Clone, Debug)]
 pub struct FirecrawlApp {
-    api_key: String,
+    api_key: Option<String>,
    api_url: String,
    client: Client,
 }
@ -20,15 +20,14 @@ pub struct FirecrawlApp {
 pub(crate) const API_VERSION: &str = "/v1";
 impl FirecrawlApp {
-    pub fn new(api_key: Option<String>, api_url: Option<String>) -> Result<Self, FirecrawlError> {
+    pub fn new(api_key: impl AsRef<str>) -> Result<Self, FirecrawlError> {
-        let api_key = api_key
+        FirecrawlApp::new_selfhosted("https://api.firecrawl.dev", Some(api_key))
-            .ok_or(FirecrawlError::APIKeyNotProvided)?;
+    }
        let api_url = api_url
            .unwrap_or_else(|| "https://api.firecrawl.dev".to_string());
    pub fn new_selfhosted(api_url: impl AsRef<str>, api_key: Option<impl AsRef<str>>) -> Result<Self, FirecrawlError> {
        Ok(FirecrawlApp {
-            api_key,
+            api_key: api_key.map(|x| x.as_ref().to_string()),
-            api_url,
+            api_url: api_url.as_ref().to_string(),
            client: Client::new(),
        })
    }
@ -36,10 +35,12 @@ impl FirecrawlApp {
    fn prepare_headers(&self, idempotency_key: Option<&String>) -> reqwest::header::HeaderMap {
        let mut headers = reqwest::header::HeaderMap::new();
        headers.insert("Content-Type", "application/json".parse().unwrap());
-        headers.insert(
+        if let Some(api_key) = self.api_key.as_ref() {
-            "Authorization",
+            headers.insert(
-            format!("Bearer {}", self.api_key).parse().unwrap(),
+                "Authorization",
-        );
+                format!("Bearer {}", api_key).parse().unwrap(),
            );
        }
        if let Some(key) = idempotency_key {
            headers.insert("x-idempotency-key", key.parse().unwrap());
        }
@ -51,48 +52,34 @@ impl FirecrawlApp {
        response: Response,
        action: impl AsRef<str>,
    ) -> Result<T, FirecrawlError> {
-        if response.status().is_success() {
+        let (is_success, status) = (response.status().is_success(), response.status());
-            let response_json: Value = response
+
-                .json()
+        let response = response
-                .await
+            .text()
-                .map_err(|e| FirecrawlError::ResponseParseError(e.to_string()))?;
+            .await
-            if response_json["success"].as_bool().unwrap_or(false) {
+            .map_err(|e| FirecrawlError::ResponseParseErrorText(e))
-                Ok(serde_json::from_value(response_json).map_err(|e| FirecrawlError::ResponseParseError(e.to_string()))?)
+            .and_then(|response_json| serde_json::from_str::<Value>(&response_json).map_err(|e| FirecrawlError::ResponseParseError(e)))
-            } else {
+            .and_then(|response_value| {
-                Err(FirecrawlError::HttpRequestFailed(format!(
+                if response_value["success"].as_bool().unwrap_or(false) {
-                    "Failed to {}: {}",
+                    Ok(serde_json::from_value::<T>(response_value).map_err(|e| FirecrawlError::ResponseParseError(e))?)
-                    action.as_ref(), response_json["error"]
+                } else {
-                )))
+                    Err(FirecrawlError::APIError(
-            }
+                        action.as_ref().to_string(),
-        } else {
+                        serde_json::from_value(response_value).map_err(|e| FirecrawlError::ResponseParseError(e))?
-            let status_code = response.status().as_u16();
+                    ))
-            let error_message = response
+                }
-                .json::<Value>()
+            });
-                .await
+
-                .unwrap_or_else(|_| json!({"error": "No additional error details provided."}));
+        match &response {
-            let message = match status_code {
+            Ok(_) => response,
-                402 => format!(
+            Err(FirecrawlError::ResponseParseError(_)) | Err(FirecrawlError::ResponseParseErrorText(_)) => {
-                    "Payment Required: Failed to {}. {}",
+                if is_success {
-                    action.as_ref(), error_message["error"]
+                    response
-                ),
+                } else {
-                408 => format!(
+                    Err(FirecrawlError::HttpRequestFailed(action.as_ref().to_string(), status.as_u16(), status.as_str().to_string()))
-                    "Request Timeout: Failed to {} as the request timed out. {}",
+                }
-                    action.as_ref(), error_message["error"]
+            },
-                ),
+            Err(_) => response,
                409 => format!(
                    "Conflict: Failed to {} due to a conflict. {}",
                    action.as_ref(), error_message["error"]
                ),
                500 => format!(
                    "Internal Server Error: Failed to {}. {}",
                    action.as_ref(), error_message["error"]
                ),
                _ => format!(
                    "Unexpected error during {}: Status code {}. {}",
                    action.as_ref(), status_code, error_message["error"]
                ),
            };
            Err(FirecrawlError::HttpRequestFailed(message))
        }
    }
 }
--- a/apps/rust-sdk/src/map.rs
+++ b/apps/rust-sdk/src/map.rs
@ -0,0 +1,66 @@
 use serde::{Deserialize, Serialize};
 use crate::{FirecrawlApp, FirecrawlError, API_VERSION};
 #[serde_with::skip_serializing_none]
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde(rename_all = "camelCase")]
 pub struct MapOptions {
    /// Search query to use for mapping
    pub search: Option<String>,
    /// Ignore the website sitemap when crawling (default: `true`)
    pub ignore_sitemap: Option<bool>,
    /// Include subdomains of the website (default: `true`)
    pub include_subdomains: Option<bool>,
    /// Maximum number of links to return (default: `5000`)
    pub exclude_tags: Option<u32>,
 }
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde(rename_all = "camelCase")]
 struct MapRequestBody {
    url: String,
    #[serde(flatten)]
    options: MapOptions,
 }
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde(rename_all = "camelCase")]
 struct MapResponse {
    success: bool,
    links: Vec<String>,
 }
 impl FirecrawlApp {
    /// Returns links from a URL using the Firecrawl API.
    pub async fn map_url(
        &self,
        url: impl AsRef<str>,
        options: impl Into<Option<MapOptions>>,
    ) -> Result<Vec<String>, FirecrawlError> {
        let body = MapRequestBody {
            url: url.as_ref().to_string(),
            options: options.into().unwrap_or_default(),
        };
        let headers = self.prepare_headers(None);
        let response = self
            .client
            .post(&format!("{}{}/map", self.api_url, API_VERSION))
            .headers(headers)
            .json(&body)
            .send()
            .await
            .map_err(|e| FirecrawlError::HttpError(format!("Mapping {:?}", url.as_ref()), e))?;
        let response = self.handle_response::<MapResponse>(response, "scrape URL").await?;
        Ok(response.links)
    }
 }
--- a/apps/rust-sdk/src/scrape.rs
+++ b/apps/rust-sdk/src/scrape.rs
@ -42,21 +42,21 @@ pub enum ScrapeFormats {
    Extract,
 }
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde_with::skip_serializing_none]
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde(rename_all = "camelCase")]
 pub struct ExtractOptions {
    /// Schema the output should adhere to, provided in JSON Schema format.
    pub schema: Option<Value>,
-    pub system_prompt: Option<Value>,
+    pub system_prompt: Option<String>,
    /// Extraction prompt to send to the LLM agent along with the page content.
-    pub prompt: Option<Value>,
+    pub prompt: Option<String>,
 }
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde_with::skip_serializing_none]
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde(rename_all = "camelCase")]
 pub struct ScrapeOptions {
    /// Formats to extract from the page. (default: `[ Markdown ]`)
@ -89,7 +89,6 @@ pub struct ScrapeOptions {
 }
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde_with::skip_serializing_none]
 #[serde(rename_all = "camelCase")]
 struct ScrapeRequestBody {
    url: String,
@ -99,7 +98,6 @@ struct ScrapeRequestBody {
 }
 #[derive(Deserialize, Serialize, Debug, Default)]
 #[serde_with::skip_serializing_none]
 #[serde(rename_all = "camelCase")]
 struct ScrapeResponse {
    /// This will always be `true` due to `FirecrawlApp::handle_response`.
@ -111,14 +109,15 @@ struct ScrapeResponse {
 }
 impl FirecrawlApp {
    /// Scrapes a URL using the Firecrawl API.
    pub async fn scrape_url(
        &self,
        url: impl AsRef<str>,
-        options: Option<ScrapeOptions>,
+        options: impl Into<Option<ScrapeOptions>>,
    ) -> Result<Document, FirecrawlError> {
        let body = ScrapeRequestBody {
            url: url.as_ref().to_string(),
-            options: options.unwrap_or_default(),
+            options: options.into().unwrap_or_default(),
        };
        let headers = self.prepare_headers(None);
@ -130,7 +129,7 @@ impl FirecrawlApp {
            .json(&body)
            .send()
            .await
-            .map_err(|e| FirecrawlError::HttpRequestFailed(e.to_string()))?;
+            .map_err(|e| FirecrawlError::HttpError(format!("Scraping {:?}", url.as_ref()), e))?;
        let response = self.handle_response::<ScrapeResponse>(response, "scrape URL").await?;
--- a/apps/rust-sdk/tests/e2e_with_auth.rs
+++ b/apps/rust-sdk/tests/e2e_with_auth.rs
@ -1,24 +1,16 @@
 use assert_matches::assert_matches;
 use dotenvy::dotenv;
 use firecrawl::scrape::{ExtractOptions, ScrapeFormats, ScrapeOptions};
 use firecrawl::FirecrawlApp;
 use serde_json::json;
 use std::env;
 use std::time::Duration;
 use tokio::time::sleep;
 #[tokio::test]
 async fn test_no_api_key() {
    dotenv().ok();
    let api_url = env::var("API_URL").expect("API_URL environment variable is not set");
    assert_matches!(FirecrawlApp::new(None, Some(api_url)), Err(e) if e.to_string() == "API key not provided");
 }
 #[tokio::test]
 async fn test_blocklisted_url() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
    let blocklisted_url = "https://facebook.com/fake-test";
    let result = app.scrape_url(blocklisted_url, None).await;
@ -32,74 +24,65 @@ async fn test_blocklisted_url() {
 async fn test_successful_response_with_valid_preview_token() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let app = FirecrawlApp::new(
+    let app = FirecrawlApp::new_selfhosted(
-        Some("this_is_just_a_preview_token".to_string()),
+        api_url,
-        Some(api_url),
+        Some("this_is_just_a_preview_token"),
    )
    .unwrap();
    let result = app
        .scrape_url("https://roastmywebsite.ai", None)
        .await
        .unwrap();
-    assert!(result.as_object().unwrap().contains_key("content"));
+    assert!(result.markdown.is_some());
-    assert!(result["content"].as_str().unwrap().contains("_Roast_"));
+    assert!(result.markdown.unwrap().contains("_Roast_"));
 }
 #[tokio::test]
 async fn test_scrape_url_e2e() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
    let result = app
        .scrape_url("https://roastmywebsite.ai", None)
        .await
        .unwrap();
-    assert!(result.as_object().unwrap().contains_key("content"));
+    assert!(result.markdown.is_some());
-    assert!(result.as_object().unwrap().contains_key("markdown"));
+    assert!(result.markdown.unwrap().contains("_Roast_"));
    assert!(result.as_object().unwrap().contains_key("metadata"));
    assert!(!result.as_object().unwrap().contains_key("html"));
    assert!(result["content"].as_str().unwrap().contains("_Roast_"));
 }
 #[tokio::test]
 async fn test_successful_response_with_valid_api_key_and_include_html() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
-    let params = json!({
+    let params = ScrapeOptions {
-        "pageOptions": {
+        formats: vec! [ ScrapeFormats::Markdown, ScrapeFormats::HTML ].into(),
-            "includeHtml": true
+        ..Default::default()
-        }
+    };
    });
    let result = app
-        .scrape_url("https://roastmywebsite.ai", Some(params))
+        .scrape_url("https://roastmywebsite.ai", params)
        .await
        .unwrap();
-    assert!(result.as_object().unwrap().contains_key("content"));
+    assert!(result.markdown.is_some());
-    assert!(result.as_object().unwrap().contains_key("markdown"));
+    assert!(result.html.is_some());
-    assert!(result.as_object().unwrap().contains_key("html"));
+    assert!(result.markdown.unwrap().contains("_Roast_"));
-    assert!(result.as_object().unwrap().contains_key("metadata"));
+    assert!(result.html.unwrap().contains("<h1"));
    assert!(result["content"].as_str().unwrap().contains("_Roast_"));
    assert!(result["markdown"].as_str().unwrap().contains("_Roast_"));
    assert!(result["html"].as_str().unwrap().contains("<h1"));
 }
 #[tokio::test]
 async fn test_successful_response_for_valid_scrape_with_pdf_file() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
    let result = app
        .scrape_url("https://arxiv.org/pdf/astro-ph/9301001.pdf", None)
        .await
        .unwrap();
-    assert!(result.as_object().unwrap().contains_key("content"));
+    assert!(result.markdown.is_some());
-    assert!(result.as_object().unwrap().contains_key("metadata"));
+    assert!(result.markdown
    assert!(result["content"]
        .as_str()
        .unwrap()
        .contains("We present spectrophotometric observations of the Broad Line Radio Galaxy"));
 }
@ -108,17 +91,14 @@ async fn test_successful_response_for_valid_scrape_with_pdf_file() {
 async fn test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
    let result = app
        .scrape_url("https://arxiv.org/pdf/astro-ph/9301001", None)
        .await
        .unwrap();
-    sleep(Duration::from_secs(6)).await; // wait for 6 seconds
+    assert!(result.markdown.is_some());
-    assert!(result.as_object().unwrap().contains_key("content"));
+    assert!(result.markdown
    assert!(result.as_object().unwrap().contains_key("metadata"));
    assert!(result["content"]
        .as_str()
        .unwrap()
        .contains("We present spectrophotometric observations of the Broad Line Radio Galaxy"));
 }
@ -127,10 +107,10 @@ async fn test_successful_response_for_valid_scrape_with_pdf_file_without_explici
 async fn test_should_return_error_for_blocklisted_url() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
    let blocklisted_url = "https://twitter.com/fake-test";
-    let result = app.crawl_url(blocklisted_url, None, true, 1, None).await;
+    let result = app.crawl_url(blocklisted_url, None).await;
    assert_matches!(
        result,
@ -142,13 +122,13 @@ async fn test_should_return_error_for_blocklisted_url() {
 async fn test_llm_extraction() {
    dotenv().ok();
    let api_url = env::var("API_URL").unwrap();
-    let api_key = env::var("TEST_API_KEY").unwrap();
+    let api_key = env::var("TEST_API_KEY").ok();
-    let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap();
+    let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap();
-    let params = json!({
+    let options = ScrapeOptions {
-        "extractorOptions": {
+        formats: vec! [ ScrapeFormats::Extract ].into(),
-            "mode": "llm-extraction",
+        extract: ExtractOptions {
-            "extractionPrompt": "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
+            prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source".to_string().into(),
-            "extractionSchema": {
+            schema: json!({
                "type": "object",
                "properties": {
                    "company_mission": {"type": "string"},
@ -156,15 +136,17 @@ async fn test_llm_extraction() {
                    "is_open_source": {"type": "boolean"}
                },
                "required": ["company_mission", "supports_sso", "is_open_source"]
-            }
+            }).into(),
-        }
+            ..Default::default()
-    });
+        }.into(),
        ..Default::default()
    };
    let result = app
-        .scrape_url("https://mendable.ai", Some(params))
+        .scrape_url("https://mendable.ai", options)
        .await
        .unwrap();
-    assert!(result.as_object().unwrap().contains_key("llm_extraction"));
+    assert!(result.extract.is_some());
-    let llm_extraction = &result["llm_extraction"];
+    let llm_extraction = &result.extract.unwrap();
    assert!(llm_extraction
        .as_object()
        .unwrap()