mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-04 11:24:40 +08:00

* chore(rust-sdk): cargo fmt * feat(rust-sdk): implement search api + example + test * feat(rust-sdk): implement crawl cancel api + example + test * feat(rust-sdk): implement crawl check errors api + example + test * feat(rust-sdk): implement batch crawl + test + example + Fix MapOptions * feat(rust-sdk): implement extract api + test + example * feat(rust-sdk): implement llmtxt api + test + example * chore(rust-sdk): correct mock tests * chore(rust-sdk): prep for cargo distribution
91 lines
2.8 KiB
Rust
91 lines
2.8 KiB
Rust
use firecrawl::{
|
|
crawl::CrawlOptions,
|
|
scrape::{ExtractOptions, ScrapeFormats, ScrapeOptions},
|
|
FirecrawlApp,
|
|
};
|
|
use serde_json::json;
|
|
|
|
#[tokio::main]
|
|
async fn main() {
|
|
// Initialize the FirecrawlApp with the API key
|
|
let app = FirecrawlApp::new("fc-YOUR-API-KEY").expect("Failed to initialize FirecrawlApp");
|
|
|
|
// Or, connect to a self-hosted instance:
|
|
// let app = FirecrawlApp::new_selfhosted("http://localhost:3002", None).expect("Failed to initialize FirecrawlApp");
|
|
|
|
// Scrape a website
|
|
let scrape_result = app.scrape_url("https://firecrawl.dev", None).await;
|
|
|
|
match scrape_result {
|
|
Ok(data) => println!("Scrape Result:\n{}", data.markdown.unwrap()),
|
|
Err(e) => eprintln!("Scrape failed: {:#?}", e),
|
|
}
|
|
|
|
// Crawl a website
|
|
let crawl_options = CrawlOptions {
|
|
exclude_paths: vec!["blog/*".into()].into(),
|
|
..Default::default()
|
|
};
|
|
|
|
let crawl_result = app.crawl_url("https://mendable.ai", crawl_options).await;
|
|
|
|
match crawl_result {
|
|
Ok(data) => println!(
|
|
"Crawl Result (used {} credits):\n{:#?}",
|
|
data.credits_used, data.data
|
|
),
|
|
Err(e) => eprintln!("Crawl failed: {}", e),
|
|
}
|
|
|
|
// Scrape with Extract
|
|
let json_schema = json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"top": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"title": {"type": "string"},
|
|
"points": {"type": "number"},
|
|
"by": {"type": "string"},
|
|
"commentsURL": {"type": "string"}
|
|
},
|
|
"required": ["title", "points", "by", "commentsURL"]
|
|
},
|
|
"minItems": 5,
|
|
"maxItems": 5,
|
|
"description": "Top 5 stories on Hacker News"
|
|
}
|
|
},
|
|
"required": ["top"]
|
|
});
|
|
|
|
let llm_extraction_options = ScrapeOptions {
|
|
formats: vec![ScrapeFormats::Extract].into(),
|
|
extract: ExtractOptions {
|
|
schema: json_schema.into(),
|
|
..Default::default()
|
|
}
|
|
.into(),
|
|
..Default::default()
|
|
};
|
|
|
|
let llm_extraction_result = app
|
|
.scrape_url("https://news.ycombinator.com", llm_extraction_options)
|
|
.await;
|
|
|
|
match llm_extraction_result {
|
|
Ok(data) => println!("LLM Extraction Result:\n{:#?}", data.extract.unwrap()),
|
|
Err(e) => eprintln!("LLM Extraction failed: {}", e),
|
|
}
|
|
|
|
// Map a website (Alpha)
|
|
let map_result = app.map_url("https://firecrawl.dev", None).await;
|
|
|
|
match map_result {
|
|
Ok(data) => println!("Mapped URLs: {:#?}", data),
|
|
Err(e) => eprintln!("Map failed: {}", e),
|
|
}
|
|
}
|