From ec3d679c5b158311907b2601f4a694f7fbde23af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 17 Apr 2025 22:15:13 -0700 Subject: [PATCH] feat(rust-sdk): add agent options --- apps/rust-sdk/src/extract.rs | 11 +++++++++++ apps/rust-sdk/src/scrape.rs | 35 +++++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/apps/rust-sdk/src/extract.rs b/apps/rust-sdk/src/extract.rs index a1dd2ef9..a8721ee0 100644 --- a/apps/rust-sdk/src/extract.rs +++ b/apps/rust-sdk/src/extract.rs @@ -6,6 +6,14 @@ use serde_json::Value; use crate::{FirecrawlApp, FirecrawlError, API_VERSION}; +/// Agent options for extract requests +#[derive(Deserialize, Serialize, Debug, Default, Clone)] +#[serde(rename_all = "camelCase")] +pub struct AgentOptionsExtract { + /// Model to use for the agent + pub model: String, +} + /// Parameters for extract requests #[serde_with::skip_serializing_none] #[derive(Deserialize, Serialize, Debug, Default, Clone)] @@ -50,6 +58,9 @@ pub struct ExtractParams { /// Maximum number of URLs to process pub limit: Option, + /// Agent options + pub agent: Option, + /// Experimental: Stream steps information #[serde(rename = "__experimental_streamSteps")] pub experimental_stream_steps: Option, diff --git a/apps/rust-sdk/src/scrape.rs b/apps/rust-sdk/src/scrape.rs index 6432b04a..5e2496fb 100644 --- a/apps/rust-sdk/src/scrape.rs +++ b/apps/rust-sdk/src/scrape.rs @@ -37,22 +37,42 @@ pub enum ScrapeFormats { /// Will result in the results of an LLM extraction. /// - /// See `ScrapeOptions.extract` for more options. - #[serde(rename = "extract")] - Extract, + /// See `ScrapeOptions.json_options` for more options. + #[serde(rename = "json")] + Json, +} + +#[derive(Deserialize, Serialize, Debug, Default, Clone)] +#[serde(rename_all = "camelCase")] +pub struct AgentOptionsJson { + pub model: String, + pub prompt: Option, } #[serde_with::skip_serializing_none] #[derive(Deserialize, Serialize, Debug, Default, Clone)] #[serde(rename_all = "camelCase")] -pub struct ExtractOptions { +pub struct JsonOptions { /// Schema the output should adhere to, provided in JSON Schema format. pub schema: Option, + /// System prompt to send to the LLM agent along with the page content. pub system_prompt: Option, /// Extraction prompt to send to the LLM agent along with the page content. pub prompt: Option, + + /// Agent options for JSON extraction. + pub agent: Option, +} + +#[derive(Deserialize, Serialize, Debug, Default, Clone)] +#[serde(rename_all = "camelCase")] +pub struct AgentOptions { + pub model: String, + pub prompt: Option, + pub session_id: Option, + pub wait_before_closing_ms: Option, } #[serde_with::skip_serializing_none] @@ -84,8 +104,11 @@ pub struct ScrapeOptions { // Timeout before returning an error, in milliseconds. (default: `60000`) pub timeout: Option, - /// Extraction options, to be used in conjunction with `ScrapeFormats::Extract`. - pub extract: Option, + /// JSON extraction options, to be used in conjunction with `ScrapeFormats::Json`. + pub json_options: Option, + + /// Agent options for smart scrape. + pub agent: Option, } #[derive(Deserialize, Serialize, Debug, Default)]