mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-04 11:24:40 +08:00

* chore(rust-sdk): cargo fmt * feat(rust-sdk): implement search api + example + test * feat(rust-sdk): implement crawl cancel api + example + test * feat(rust-sdk): implement crawl check errors api + example + test * feat(rust-sdk): implement batch crawl + test + example + Fix MapOptions * feat(rust-sdk): implement extract api + test + example * feat(rust-sdk): implement llmtxt api + test + example * chore(rust-sdk): correct mock tests * chore(rust-sdk): prep for cargo distribution
60 lines
2.1 KiB
Rust
60 lines
2.1 KiB
Rust
use firecrawl::FirecrawlApp;
|
|
use std::error::Error;
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<(), Box<dyn Error>> {
|
|
// Get API URL from environment
|
|
let api_url = std::env::var("FIRECRAWL_API_URL")
|
|
.expect("Please set the FIRECRAWL_API_URL environment variable");
|
|
|
|
// Create the FirecrawlApp instance
|
|
let firecrawl = FirecrawlApp::new_selfhosted(api_url, None::<&str>)?;
|
|
|
|
// Start a crawl job that will likely have some errors (invalid URL format)
|
|
println!("Starting a crawl job...");
|
|
let crawl_response = firecrawl
|
|
.crawl_url_async("https://no-wer-agg.invalid", None)
|
|
.await?;
|
|
println!("Crawl job started with ID: {}", crawl_response.id);
|
|
|
|
println!("Let it do it's thing...");
|
|
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
|
|
|
|
// Check the crawl errors
|
|
println!("Checking for crawl errors...");
|
|
match firecrawl.check_crawl_errors(&crawl_response.id).await {
|
|
Ok(error_response) => {
|
|
println!("Crawl errors response:");
|
|
println!(" Number of errors: {}", error_response.errors.len());
|
|
|
|
if !error_response.errors.is_empty() {
|
|
println!("\nDetailed errors:");
|
|
for (i, error) in error_response.errors.iter().enumerate() {
|
|
println!("Error #{}", i + 1);
|
|
println!(" ID: {}", error.id);
|
|
if let Some(timestamp) = &error.timestamp {
|
|
println!(" Timestamp: {}", timestamp);
|
|
}
|
|
println!(" URL: {}", error.url);
|
|
println!(" Error: {}", error.error);
|
|
}
|
|
}
|
|
|
|
println!(
|
|
"\nRobots.txt blocked URLs: {}",
|
|
error_response.robots_blocked.len()
|
|
);
|
|
for (i, url) in error_response.robots_blocked.iter().enumerate() {
|
|
println!(" {}. {}", i + 1, url);
|
|
}
|
|
}
|
|
Err(e) => {
|
|
println!("Failed to check crawl errors: {}", e);
|
|
}
|
|
}
|
|
let cancel = firecrawl.cancel_crawl(&crawl_response.id).await?;
|
|
println!("Cancel: {}", cancel.status);
|
|
|
|
Ok(())
|
|
}
|