diff --git a/apps/rust-sdk/examples/example.rs b/apps/rust-sdk/examples/example.rs index c6b96b78..a713a82d 100644 --- a/apps/rust-sdk/examples/example.rs +++ b/apps/rust-sdk/examples/example.rs @@ -1,40 +1,38 @@ -use firecrawl::FirecrawlApp; +use firecrawl::{crawl::CrawlOptions, scrape::{ExtractOptions, ScrapeFormats, ScrapeOptions}, FirecrawlApp}; use serde_json::json; use uuid::Uuid; #[tokio::main] async fn main() { // Initialize the FirecrawlApp with the API key - let api_key = Some("fc-YOUR_API_KEY".to_string()); - let api_url = Some("http://0.0.0.0:3002".to_string()); - let app = FirecrawlApp::new(api_key, api_url).expect("Failed to initialize FirecrawlApp"); + let app = FirecrawlApp::new("fc-YOUR-API-KEY").expect("Failed to initialize FirecrawlApp"); + + // or, connect to a self-hosted instance: + // let app = FirecrawlApp::new_selfhosted("http://localhost:3002", None).expect("Failed to initialize FirecrawlApp"); // Scrape a website let scrape_result = app.scrape_url("https://firecrawl.dev", None).await; match scrape_result { - Ok(data) => println!("Scrape Result:\n{}", data["markdown"]), - Err(e) => eprintln!("Scrape failed: {}", e), + Ok(data) => println!("Scrape Result:\n{}", data.markdown.unwrap()), + Err(e) => eprintln!("Scrape failed: {:#?}", e), } // Crawl a website - let random_uuid = String::from(Uuid::new_v4()); - let idempotency_key = Some(random_uuid); // optional idempotency key - let crawl_params = json!({ - "crawlerOptions": { - "excludes": ["blog/*"] - } - }); + let idempotency_key = String::from(Uuid::new_v4()); + let crawl_options = CrawlOptions { + exclude_paths: Some(vec![ "blog/*".to_string() ]), + poll_interval: Some(2000), + idempotency_key: Some(idempotency_key), + ..Default::default() + }; let crawl_result = app .crawl_url( "https://mendable.ai", - Some(crawl_params), - true, - 2, - idempotency_key, + crawl_options, ) .await; match crawl_result { - Ok(data) => println!("Crawl Result:\n{}", data), + Ok(data) => println!("Crawl Result (used {} credits):\n{:#?}", data.credits_used, data.data), Err(e) => eprintln!("Crawl failed: {}", e), } @@ -62,21 +60,20 @@ async fn main() { "required": ["top"] }); - let llm_extraction_params = json!({ - "extractorOptions": { - "extractionSchema": json_schema, - "mode": "llm-extraction" - }, - "pageOptions": { - "onlyMainContent": true - } - }); + let llm_extraction_options = ScrapeOptions { + formats: Some(vec![ ScrapeFormats::Extract ]), + extract: Some(ExtractOptions { + schema: Some(json_schema), + ..Default::default() + }), + ..Default::default() + }; let llm_extraction_result = app - .scrape_url("https://news.ycombinator.com", Some(llm_extraction_params)) + .scrape_url("https://news.ycombinator.com", llm_extraction_options) .await; match llm_extraction_result { - Ok(data) => println!("LLM Extraction Result:\n{}", data["llm_extraction"]), + Ok(data) => println!("LLM Extraction Result:\n{:#?}", data.extract.unwrap()), Err(e) => eprintln!("LLM Extraction failed: {}", e), } } diff --git a/apps/rust-sdk/src/crawl.rs b/apps/rust-sdk/src/crawl.rs index cf211bf0..5a136cf9 100644 --- a/apps/rust-sdk/src/crawl.rs +++ b/apps/rust-sdk/src/crawl.rs @@ -48,8 +48,8 @@ impl From for ScrapeFormats { } } -#[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde_with::skip_serializing_none] +#[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde(rename_all = "camelCase")] pub struct CrawlScrapeOptions { /// Formats to extract from the page. (default: `[ Markdown ]`) @@ -93,8 +93,8 @@ impl From for ScrapeOptions { } } -#[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde_with::skip_serializing_none] +#[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde(rename_all = "camelCase")] pub struct CrawlOptions { /// Options to pass through to the scraper. @@ -103,12 +103,12 @@ pub struct CrawlOptions { /// URL RegEx patterns to (exclusively) include. /// /// For example, if you specified `"blog"`, only pages that have `blog` somewhere in the URL would be crawled. - pub include_paths: Option, + pub include_paths: Option>, /// URL RegEx patterns to exclude. /// /// For example, if you specified `"blog"`, pages that have `blog` somewhere in the URL would not be crawled. - pub exclude_paths: Option, + pub exclude_paths: Option>, /// Maximum URL depth to crawl, relative to the base URL. (default: `2`) pub max_depth: Option, @@ -138,7 +138,6 @@ pub struct CrawlOptions { } #[derive(Deserialize, Serialize, Debug, Default)] -#[serde_with::skip_serializing_none] #[serde(rename_all = "camelCase")] struct CrawlRequestBody { url: String, @@ -148,7 +147,6 @@ struct CrawlRequestBody { } #[derive(Deserialize, Serialize, Debug, Default)] -#[serde_with::skip_serializing_none] #[serde(rename_all = "camelCase")] struct CrawlResponse { /// This will always be `true` due to `FirecrawlApp::handle_response`. @@ -175,8 +173,8 @@ pub enum CrawlStatusTypes { Cancelled, } -#[derive(Deserialize, Serialize, Debug, Clone)] #[serde_with::skip_serializing_none] +#[derive(Deserialize, Serialize, Debug, Clone)] #[serde(rename_all = "camelCase")] pub struct CrawlStatus { /// The status of the crawl. @@ -203,7 +201,6 @@ pub struct CrawlStatus { } #[derive(Deserialize, Serialize, Debug, Clone)] -#[serde_with::skip_serializing_none] #[serde(rename_all = "camelCase")] pub struct CrawlAsyncResponse { success: bool, @@ -216,6 +213,7 @@ pub struct CrawlAsyncResponse { } impl FirecrawlApp { + /// Initiates a crawl job for a URL using the Firecrawl API. pub async fn crawl_url_async( &self, url: impl AsRef, @@ -235,61 +233,63 @@ impl FirecrawlApp { .json(&body) .send() .await - .map_err(|e| FirecrawlError::HttpRequestFailed(e.to_string()))?; + .map_err(|e| FirecrawlError::HttpError(format!("Crawling {:?}", url.as_ref()), e))?; self.handle_response::(response, "start crawl job").await } + /// Performs a crawl job for a URL using the Firecrawl API, waiting for the end result. This may take a long time depending on the size of the target page and your options (namely `CrawlOptions.limit`). pub async fn crawl_url( &self, url: impl AsRef, - options: Option, - ) -> Result, FirecrawlError> { + options: impl Into>, + ) -> Result { + let options = options.into(); let poll_interval = options.as_ref().and_then(|x| x.poll_interval).unwrap_or(2000); - let res = self.crawl_url_async(url, options).await?; self.monitor_job_status(&res.id, poll_interval).await } - pub async fn check_crawl_status(&self, id: &str) -> Result { + /// Checks for the status of a crawl, based on the crawl's ID. To be used in conjunction with `FirecrawlApp::crawl_url_async`. + pub async fn check_crawl_status(&self, id: impl AsRef) -> Result { let response = self .client .get(&format!( "{}{}/crawl/{}", - self.api_url, API_VERSION, id + self.api_url, API_VERSION, id.as_ref() )) .headers(self.prepare_headers(None)) .send() .await - .map_err(|e| FirecrawlError::HttpRequestFailed(e.to_string()))?; + .map_err(|e| FirecrawlError::HttpError(format!("Checking status of crawl {}", id.as_ref()), e))?; - self.handle_response(response, "check crawl status").await + self.handle_response(response, format!("Checking status of crawl {}", id.as_ref())).await } async fn monitor_job_status( &self, id: &str, poll_interval: u64, - ) -> Result, FirecrawlError> { + ) -> Result { loop { let status_data = self.check_crawl_status(id).await?; match status_data.status { CrawlStatusTypes::Completed => { - return Ok(status_data.data); + return Ok(status_data); } CrawlStatusTypes::Scraping => { - tokio::time::sleep(tokio::time::Duration::from_secs(poll_interval)).await; + tokio::time::sleep(tokio::time::Duration::from_millis(poll_interval)).await; } CrawlStatusTypes::Failed => { return Err(FirecrawlError::CrawlJobFailed(format!( "Crawl job failed." - ))); + ), status_data)); } CrawlStatusTypes::Cancelled => { return Err(FirecrawlError::CrawlJobFailed(format!( "Crawl job was cancelled." - ))); + ), status_data)); } } } diff --git a/apps/rust-sdk/src/document.rs b/apps/rust-sdk/src/document.rs index 5eba5dfa..1948a4ce 100644 --- a/apps/rust-sdk/src/document.rs +++ b/apps/rust-sdk/src/document.rs @@ -1,8 +1,8 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; -#[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde_with::skip_serializing_none] +#[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde(rename_all = "camelCase")] pub struct DocumentMetadata { // firecrawl specific @@ -12,8 +12,8 @@ pub struct DocumentMetadata { pub error: Option, // basic meta tags - pub title: String, - pub description: String, + pub title: Option, + pub description: Option, pub language: Option, pub keywords: Option, pub robots: Option, @@ -26,7 +26,7 @@ pub struct DocumentMetadata { pub og_audio: Option, pub og_determiner: Option, pub og_locale: Option, - pub og_locale_alternate: Option, + pub og_locale_alternate: Option>, pub og_site_name: Option, pub og_video: Option, @@ -49,8 +49,8 @@ pub struct DocumentMetadata { pub dcterms_created: Option, } -#[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde_with::skip_serializing_none] +#[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde(rename_all = "camelCase")] pub struct Document { /// A list of the links on the page, present if `ScrapeFormats::Markdown` is present in `ScrapeOptions.formats`. (default) diff --git a/apps/rust-sdk/src/error.rs b/apps/rust-sdk/src/error.rs index a6d11eb0..f04a286a 100644 --- a/apps/rust-sdk/src/error.rs +++ b/apps/rust-sdk/src/error.rs @@ -1,7 +1,11 @@ +use std::fmt::Display; + use serde::{Deserialize, Serialize}; use serde_json::Value; use thiserror::Error; +use crate::crawl::CrawlStatus; + #[derive(Debug, Deserialize, Serialize, Clone)] pub struct FirecrawlAPIError { /// Always false. @@ -14,16 +18,28 @@ pub struct FirecrawlAPIError { pub details: Option, } +impl Display for FirecrawlAPIError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(details) = self.details.as_ref() { + write!(f, "{} ({})", self.error, details) + } else { + write!(f, "{}", self.error) + } + } +} + #[derive(Error, Debug)] pub enum FirecrawlError { - #[error("HTTP request failed: {0}")] - HttpRequestFailed(String), - #[error("API key not provided")] - APIKeyNotProvided, + #[error("{0} failed: HTTP error {1}: {2}")] + HttpRequestFailed(String, u16, String), + #[error("{0} failed: HTTP error: {1}")] + HttpError(String, reqwest::Error), + #[error("Failed to parse response as text: {0}")] + ResponseParseErrorText(reqwest::Error), #[error("Failed to parse response: {0}")] - ResponseParseError(String), - #[error("API error")] - APIError(FirecrawlAPIError), - #[error("Crawl job failed or stopped: {0}")] - CrawlJobFailed(String), + ResponseParseError(serde_json::Error), + #[error("{0} failed: {1}")] + APIError(String, FirecrawlAPIError), + #[error("Crawl job failed: {0}")] + CrawlJobFailed(String, CrawlStatus), } diff --git a/apps/rust-sdk/src/lib.rs b/apps/rust-sdk/src/lib.rs index 6c519a2a..38c2dc11 100644 --- a/apps/rust-sdk/src/lib.rs +++ b/apps/rust-sdk/src/lib.rs @@ -1,18 +1,18 @@ use reqwest::{Client, Response}; use serde::de::DeserializeOwned; -use serde_json::json; use serde_json::Value; pub mod crawl; pub mod document; mod error; +pub mod map; pub mod scrape; pub use error::FirecrawlError; #[derive(Clone, Debug)] pub struct FirecrawlApp { - api_key: String, + api_key: Option, api_url: String, client: Client, } @@ -20,15 +20,14 @@ pub struct FirecrawlApp { pub(crate) const API_VERSION: &str = "/v1"; impl FirecrawlApp { - pub fn new(api_key: Option, api_url: Option) -> Result { - let api_key = api_key - .ok_or(FirecrawlError::APIKeyNotProvided)?; - let api_url = api_url - .unwrap_or_else(|| "https://api.firecrawl.dev".to_string()); + pub fn new(api_key: impl AsRef) -> Result { + FirecrawlApp::new_selfhosted("https://api.firecrawl.dev", Some(api_key)) + } + pub fn new_selfhosted(api_url: impl AsRef, api_key: Option>) -> Result { Ok(FirecrawlApp { - api_key, - api_url, + api_key: api_key.map(|x| x.as_ref().to_string()), + api_url: api_url.as_ref().to_string(), client: Client::new(), }) } @@ -36,10 +35,12 @@ impl FirecrawlApp { fn prepare_headers(&self, idempotency_key: Option<&String>) -> reqwest::header::HeaderMap { let mut headers = reqwest::header::HeaderMap::new(); headers.insert("Content-Type", "application/json".parse().unwrap()); - headers.insert( - "Authorization", - format!("Bearer {}", self.api_key).parse().unwrap(), - ); + if let Some(api_key) = self.api_key.as_ref() { + headers.insert( + "Authorization", + format!("Bearer {}", api_key).parse().unwrap(), + ); + } if let Some(key) = idempotency_key { headers.insert("x-idempotency-key", key.parse().unwrap()); } @@ -51,48 +52,34 @@ impl FirecrawlApp { response: Response, action: impl AsRef, ) -> Result { - if response.status().is_success() { - let response_json: Value = response - .json() - .await - .map_err(|e| FirecrawlError::ResponseParseError(e.to_string()))?; - if response_json["success"].as_bool().unwrap_or(false) { - Ok(serde_json::from_value(response_json).map_err(|e| FirecrawlError::ResponseParseError(e.to_string()))?) - } else { - Err(FirecrawlError::HttpRequestFailed(format!( - "Failed to {}: {}", - action.as_ref(), response_json["error"] - ))) - } - } else { - let status_code = response.status().as_u16(); - let error_message = response - .json::() - .await - .unwrap_or_else(|_| json!({"error": "No additional error details provided."})); - let message = match status_code { - 402 => format!( - "Payment Required: Failed to {}. {}", - action.as_ref(), error_message["error"] - ), - 408 => format!( - "Request Timeout: Failed to {} as the request timed out. {}", - action.as_ref(), error_message["error"] - ), - 409 => format!( - "Conflict: Failed to {} due to a conflict. {}", - action.as_ref(), error_message["error"] - ), - 500 => format!( - "Internal Server Error: Failed to {}. {}", - action.as_ref(), error_message["error"] - ), - _ => format!( - "Unexpected error during {}: Status code {}. {}", - action.as_ref(), status_code, error_message["error"] - ), - }; - Err(FirecrawlError::HttpRequestFailed(message)) + let (is_success, status) = (response.status().is_success(), response.status()); + + let response = response + .text() + .await + .map_err(|e| FirecrawlError::ResponseParseErrorText(e)) + .and_then(|response_json| serde_json::from_str::(&response_json).map_err(|e| FirecrawlError::ResponseParseError(e))) + .and_then(|response_value| { + if response_value["success"].as_bool().unwrap_or(false) { + Ok(serde_json::from_value::(response_value).map_err(|e| FirecrawlError::ResponseParseError(e))?) + } else { + Err(FirecrawlError::APIError( + action.as_ref().to_string(), + serde_json::from_value(response_value).map_err(|e| FirecrawlError::ResponseParseError(e))? + )) + } + }); + + match &response { + Ok(_) => response, + Err(FirecrawlError::ResponseParseError(_)) | Err(FirecrawlError::ResponseParseErrorText(_)) => { + if is_success { + response + } else { + Err(FirecrawlError::HttpRequestFailed(action.as_ref().to_string(), status.as_u16(), status.as_str().to_string())) + } + }, + Err(_) => response, } } } diff --git a/apps/rust-sdk/src/map.rs b/apps/rust-sdk/src/map.rs new file mode 100644 index 00000000..7c3b3a43 --- /dev/null +++ b/apps/rust-sdk/src/map.rs @@ -0,0 +1,66 @@ +use serde::{Deserialize, Serialize}; + +use crate::{FirecrawlApp, FirecrawlError, API_VERSION}; + +#[serde_with::skip_serializing_none] +#[derive(Deserialize, Serialize, Debug, Default)] +#[serde(rename_all = "camelCase")] +pub struct MapOptions { + /// Search query to use for mapping + pub search: Option, + + /// Ignore the website sitemap when crawling (default: `true`) + pub ignore_sitemap: Option, + + /// Include subdomains of the website (default: `true`) + pub include_subdomains: Option, + + /// Maximum number of links to return (default: `5000`) + pub exclude_tags: Option, +} + +#[derive(Deserialize, Serialize, Debug, Default)] +#[serde(rename_all = "camelCase")] +struct MapRequestBody { + url: String, + + #[serde(flatten)] + options: MapOptions, +} + +#[derive(Deserialize, Serialize, Debug, Default)] +#[serde(rename_all = "camelCase")] +struct MapResponse { + success: bool, + + links: Vec, +} + +impl FirecrawlApp { + /// Returns links from a URL using the Firecrawl API. + pub async fn map_url( + &self, + url: impl AsRef, + options: impl Into>, + ) -> Result, FirecrawlError> { + let body = MapRequestBody { + url: url.as_ref().to_string(), + options: options.into().unwrap_or_default(), + }; + + let headers = self.prepare_headers(None); + + let response = self + .client + .post(&format!("{}{}/map", self.api_url, API_VERSION)) + .headers(headers) + .json(&body) + .send() + .await + .map_err(|e| FirecrawlError::HttpError(format!("Mapping {:?}", url.as_ref()), e))?; + + let response = self.handle_response::(response, "scrape URL").await?; + + Ok(response.links) + } +} diff --git a/apps/rust-sdk/src/scrape.rs b/apps/rust-sdk/src/scrape.rs index 4b481624..b879fdaf 100644 --- a/apps/rust-sdk/src/scrape.rs +++ b/apps/rust-sdk/src/scrape.rs @@ -42,21 +42,21 @@ pub enum ScrapeFormats { Extract, } -#[derive(Deserialize, Serialize, Debug, Default)] #[serde_with::skip_serializing_none] +#[derive(Deserialize, Serialize, Debug, Default)] #[serde(rename_all = "camelCase")] pub struct ExtractOptions { /// Schema the output should adhere to, provided in JSON Schema format. pub schema: Option, - pub system_prompt: Option, + pub system_prompt: Option, /// Extraction prompt to send to the LLM agent along with the page content. - pub prompt: Option, + pub prompt: Option, } -#[derive(Deserialize, Serialize, Debug, Default)] #[serde_with::skip_serializing_none] +#[derive(Deserialize, Serialize, Debug, Default)] #[serde(rename_all = "camelCase")] pub struct ScrapeOptions { /// Formats to extract from the page. (default: `[ Markdown ]`) @@ -89,7 +89,6 @@ pub struct ScrapeOptions { } #[derive(Deserialize, Serialize, Debug, Default)] -#[serde_with::skip_serializing_none] #[serde(rename_all = "camelCase")] struct ScrapeRequestBody { url: String, @@ -99,7 +98,6 @@ struct ScrapeRequestBody { } #[derive(Deserialize, Serialize, Debug, Default)] -#[serde_with::skip_serializing_none] #[serde(rename_all = "camelCase")] struct ScrapeResponse { /// This will always be `true` due to `FirecrawlApp::handle_response`. @@ -111,14 +109,15 @@ struct ScrapeResponse { } impl FirecrawlApp { + /// Scrapes a URL using the Firecrawl API. pub async fn scrape_url( &self, url: impl AsRef, - options: Option, + options: impl Into>, ) -> Result { let body = ScrapeRequestBody { url: url.as_ref().to_string(), - options: options.unwrap_or_default(), + options: options.into().unwrap_or_default(), }; let headers = self.prepare_headers(None); @@ -130,7 +129,7 @@ impl FirecrawlApp { .json(&body) .send() .await - .map_err(|e| FirecrawlError::HttpRequestFailed(e.to_string()))?; + .map_err(|e| FirecrawlError::HttpError(format!("Scraping {:?}", url.as_ref()), e))?; let response = self.handle_response::(response, "scrape URL").await?; diff --git a/apps/rust-sdk/tests/e2e_with_auth.rs b/apps/rust-sdk/tests/e2e_with_auth.rs index 99b14df9..6ee7f79c 100644 --- a/apps/rust-sdk/tests/e2e_with_auth.rs +++ b/apps/rust-sdk/tests/e2e_with_auth.rs @@ -1,24 +1,16 @@ use assert_matches::assert_matches; use dotenvy::dotenv; +use firecrawl::scrape::{ExtractOptions, ScrapeFormats, ScrapeOptions}; use firecrawl::FirecrawlApp; use serde_json::json; use std::env; -use std::time::Duration; -use tokio::time::sleep; - -#[tokio::test] -async fn test_no_api_key() { - dotenv().ok(); - let api_url = env::var("API_URL").expect("API_URL environment variable is not set"); - assert_matches!(FirecrawlApp::new(None, Some(api_url)), Err(e) if e.to_string() == "API key not provided"); -} #[tokio::test] async fn test_blocklisted_url() { dotenv().ok(); let api_url = env::var("API_URL").unwrap(); - let api_key = env::var("TEST_API_KEY").unwrap(); - let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap(); + let api_key = env::var("TEST_API_KEY").ok(); + let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap(); let blocklisted_url = "https://facebook.com/fake-test"; let result = app.scrape_url(blocklisted_url, None).await; @@ -32,74 +24,65 @@ async fn test_blocklisted_url() { async fn test_successful_response_with_valid_preview_token() { dotenv().ok(); let api_url = env::var("API_URL").unwrap(); - let app = FirecrawlApp::new( - Some("this_is_just_a_preview_token".to_string()), - Some(api_url), + let app = FirecrawlApp::new_selfhosted( + api_url, + Some("this_is_just_a_preview_token"), ) .unwrap(); let result = app .scrape_url("https://roastmywebsite.ai", None) .await .unwrap(); - assert!(result.as_object().unwrap().contains_key("content")); - assert!(result["content"].as_str().unwrap().contains("_Roast_")); + assert!(result.markdown.is_some()); + assert!(result.markdown.unwrap().contains("_Roast_")); } #[tokio::test] async fn test_scrape_url_e2e() { dotenv().ok(); let api_url = env::var("API_URL").unwrap(); - let api_key = env::var("TEST_API_KEY").unwrap(); - let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap(); + let api_key = env::var("TEST_API_KEY").ok(); + let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap(); let result = app .scrape_url("https://roastmywebsite.ai", None) .await .unwrap(); - assert!(result.as_object().unwrap().contains_key("content")); - assert!(result.as_object().unwrap().contains_key("markdown")); - assert!(result.as_object().unwrap().contains_key("metadata")); - assert!(!result.as_object().unwrap().contains_key("html")); - assert!(result["content"].as_str().unwrap().contains("_Roast_")); + assert!(result.markdown.is_some()); + assert!(result.markdown.unwrap().contains("_Roast_")); } #[tokio::test] async fn test_successful_response_with_valid_api_key_and_include_html() { dotenv().ok(); let api_url = env::var("API_URL").unwrap(); - let api_key = env::var("TEST_API_KEY").unwrap(); - let app = FirecrawlApp::new(Some(api_key), Some(api_url)).unwrap(); - let params = json!({ - "pageOptions": { - "includeHtml": true - } - }); + let api_key = env::var("TEST_API_KEY").ok(); + let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap(); + let params = ScrapeOptions { + formats: vec! [ ScrapeFormats::Markdown, ScrapeFormats::HTML ].into(), + ..Default::default() + }; let result = app - .scrape_url("https://roastmywebsite.ai", Some(params)) + .scrape_url("https://roastmywebsite.ai", params) .await .unwrap(); - assert!(result.as_object().unwrap().contains_key("content")); - assert!(result.as_object().unwrap().contains_key("markdown")); - assert!(result.as_object().unwrap().contains_key("html")); - assert!(result.as_object().unwrap().contains_key("metadata")); - assert!(result["content"].as_str().unwrap().contains("_Roast_")); - assert!(result["markdown"].as_str().unwrap().contains("_Roast_")); - assert!(result["html"].as_str().unwrap().contains("