mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-04 11:24:40 +08:00

* feat: pdf-parser, implementation in scrapeURL * use pdf-parser for page count instead of mu * fix(pdf-parser): bindings * feat(scrapeURL/pdf): adjust MILLISECONDS_PER_PAGE * implement post-runsync polling and fix * fix(Dockerfile): copy in the pdf-parser source code * fix(scrapeURL/pdf): better error for timeout below 0
24 lines
683 B
Rust
24 lines
683 B
Rust
use std::{ffi::CStr};
|
|
|
|
/// Returns the number of pages in a PDF file
|
|
///
|
|
/// # Safety
|
|
/// Input path must be a C string of a path pointing to a PDF file. Output will be an integer, either the number of pages in the PDF or -1 indicating an error.
|
|
#[no_mangle]
|
|
pub unsafe extern "C" fn get_page_count(path: *const libc::c_char) -> i32 {
|
|
let path: String = match unsafe { CStr::from_ptr(path) }.to_str().map_err(|_| ()) {
|
|
Ok(x) => x.to_string(),
|
|
Err(_) => {
|
|
return -1;
|
|
}
|
|
};
|
|
|
|
let doc = match lopdf::Document::load(&path) {
|
|
Ok(x) => x,
|
|
Err(_) => {
|
|
return -1;
|
|
}
|
|
};
|
|
|
|
doc.get_pages().len() as i32
|
|
} |