Gergő Móricz b03670a8b7
feat: parse PDFs on fc side and reject if too long for timeout (FIR-2083) (#1592)
* feat: pdf-parser, implementation in scrapeURL

* use pdf-parser for page count instead of mu

* fix(pdf-parser): bindings

* feat(scrapeURL/pdf): adjust MILLISECONDS_PER_PAGE

* implement post-runsync polling and fix

* fix(Dockerfile): copy in the pdf-parser source code

* fix(scrapeURL/pdf): better error for timeout below 0
2025-05-23 13:45:53 +02:00

24 lines
683 B
Rust

use std::{ffi::CStr};
/// Returns the number of pages in a PDF file
///
/// # Safety
/// Input path must be a C string of a path pointing to a PDF file. Output will be an integer, either the number of pages in the PDF or -1 indicating an error.
#[no_mangle]
pub unsafe extern "C" fn get_page_count(path: *const libc::c_char) -> i32 {
let path: String = match unsafe { CStr::from_ptr(path) }.to_str().map_err(|_| ()) {
Ok(x) => x.to_string(),
Err(_) => {
return -1;
}
};
let doc = match lopdf::Document::load(&path) {
Ok(x) => x,
Err(_) => {
return -1;
}
};
doc.get_pages().len() as i32
}