mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-04 11:24:40 +08:00

* chore(rust-sdk): cargo fmt * feat(rust-sdk): implement search api + example + test * feat(rust-sdk): implement crawl cancel api + example + test * feat(rust-sdk): implement crawl check errors api + example + test * feat(rust-sdk): implement batch crawl + test + example + Fix MapOptions * feat(rust-sdk): implement extract api + test + example * feat(rust-sdk): implement llmtxt api + test + example * chore(rust-sdk): correct mock tests * chore(rust-sdk): prep for cargo distribution
174 lines
5.6 KiB
Rust
174 lines
5.6 KiB
Rust
#![allow(clippy::option_map_unit_fn)]
|
|
use bat::{Input, PrettyPrinter};
|
|
use firecrawl::{llmstxt::GenerateLLMsTextParams, FirecrawlApp};
|
|
use std::error::Error;
|
|
|
|
use clap::{Parser, ValueEnum};
|
|
|
|
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum)]
|
|
enum Mode {
|
|
Basic,
|
|
Pool,
|
|
Fulltext,
|
|
}
|
|
|
|
#[derive(Parser)]
|
|
#[command(author, version, about, long_about = None)]
|
|
struct Args {
|
|
/// URL for which to generate LLMs.txt
|
|
#[arg(default_value = "https://www.firecrawl.dev/")]
|
|
url: String,
|
|
|
|
#[arg(long, short = 'm', value_enum, default_value = "Mode::Basic")]
|
|
mode: Mode,
|
|
|
|
/// Maximum number of URLs to process
|
|
#[arg(long, short = 'd', default_value = "1")]
|
|
max_urls: u32,
|
|
|
|
/// Whether to show the full LLMs-full.txt in the response
|
|
#[arg(long, short = 'f', default_value = "false")]
|
|
full_text: bool,
|
|
|
|
/// Experimental streaming option
|
|
#[arg(long, short = 's', default_value = "false")]
|
|
stream: bool,
|
|
}
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<(), Box<dyn Error>> {
|
|
let args = Args::parse();
|
|
|
|
let api_url = std::env::var("FIRECRAWL_API_URL")
|
|
.expect("Please set the FIRECRAWL_API_URL environment variable");
|
|
let firecrawl = FirecrawlApp::new_selfhosted(api_url, None::<&str>)?;
|
|
|
|
let params = GenerateLLMsTextParams {
|
|
url: args.url.clone(),
|
|
max_urls: args.max_urls,
|
|
show_full_text: args.full_text,
|
|
experimental_stream: args.stream,
|
|
};
|
|
|
|
match args.mode {
|
|
Mode::Basic => {
|
|
println!("Example 1: Basic LLMs.txt generation (synchronous)");
|
|
println!("Generating LLMs.txt for {}...", args.url);
|
|
firecrawl
|
|
.generate_llms_text(params)
|
|
.await
|
|
.inspect(|result| {
|
|
println!("Expires at: {}", result.expires_at);
|
|
let text = (if args.full_text {
|
|
result.data.full.as_ref()
|
|
} else {
|
|
result.data.compact.as_ref()
|
|
})
|
|
.expect("LLM Text");
|
|
|
|
pretty_print_content("Firecrawl Result", text).expect("Print");
|
|
})?;
|
|
}
|
|
Mode::Pool => {
|
|
println!("Example 2: Asynchronous LLMs.txt generation with manual polling");
|
|
|
|
println!("Starting asynchronous LLMs.txt generation job...");
|
|
let response = firecrawl.async_generate_llms_text(params).await?;
|
|
|
|
println!("LLMs.txt generation job initiated:");
|
|
println!(" Job ID: {}", response.id);
|
|
println!("\nManually polling for status...");
|
|
for _ in 0..10 {
|
|
let status = firecrawl
|
|
.check_generate_llms_text_status(&response.id)
|
|
.await?;
|
|
|
|
match status.status.as_str() {
|
|
"completed" => {
|
|
println!("LLMs.txt generation completed!");
|
|
let text = (if args.full_text {
|
|
status.data.full.as_ref()
|
|
} else {
|
|
status.data.compact.as_ref()
|
|
})
|
|
.expect("LLM Text");
|
|
|
|
pretty_print_content("Pool Result", text).expect("Print");
|
|
|
|
break;
|
|
}
|
|
"failed" => {
|
|
println!(
|
|
"LLMs.txt generation failed: {}",
|
|
status.error.unwrap_or_default()
|
|
);
|
|
break;
|
|
}
|
|
status => println!("Generation status: {}", status),
|
|
}
|
|
|
|
println!("Waiting 2 seconds before checking again...");
|
|
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
|
|
}
|
|
}
|
|
Mode::Fulltext => {
|
|
println!("Example 3: LLMs.txt generation with full text");
|
|
|
|
println!("Generating LLMs.txt with full text...");
|
|
match firecrawl.generate_llms_text(params).await {
|
|
Ok(result) => {
|
|
println!("LLMs.txt generation completed successfully!");
|
|
let llmstxt = result.data.compact.expect("LLMs Text Expected");
|
|
let fulltxt = result.data.full.expect("Full LLMs Text Expected");
|
|
|
|
pretty_print_contents(&[
|
|
("LLMs.txt (compact)", llmstxt),
|
|
("LLMs.txt (full text)", fulltxt),
|
|
])
|
|
.expect("Print")
|
|
}
|
|
Err(e) => {
|
|
println!("LLMs.txt generation failed: {}", e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Pretty prints the provided content with syntax highlighting
|
|
fn pretty_print_content(title: &str, content: &str) -> Result<(), Box<dyn Error>> {
|
|
PrettyPrinter::new()
|
|
.header(true)
|
|
.grid(true)
|
|
.input(
|
|
Input::from_bytes(content.as_bytes())
|
|
.title(title)
|
|
.name("file.md"),
|
|
)
|
|
.print()?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Pretty prints multiple contents with syntax highlighting
|
|
fn pretty_print_contents(title_contents: &[(&'static str, String)]) -> Result<(), Box<dyn Error>> {
|
|
let mut inputs = Vec::new();
|
|
for (title, content) in title_contents {
|
|
inputs.push(
|
|
Input::from_bytes(content.as_bytes())
|
|
.title(*title)
|
|
.name("file.md"),
|
|
);
|
|
}
|
|
|
|
PrettyPrinter::new()
|
|
.header(true)
|
|
.grid(true)
|
|
.inputs(inputs)
|
|
.print()?;
|
|
|
|
Ok(())
|
|
}
|