mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-04 16:30:36 +08:00
Update pdfProcessor.ts
This commit is contained in:
parent
542270f4c2
commit
ff4266f09e
@ -12,10 +12,15 @@ import { Logger } from "../../../lib/logger";
|
|||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
export async function fetchAndProcessPdf(url: string, parsePDF: boolean): Promise<{ content: string, pageStatusCode?: number, pageError?: string }> {
|
export async function fetchAndProcessPdf(url: string, parsePDF: boolean): Promise<{ content: string, pageStatusCode?: number, pageError?: string }> {
|
||||||
|
try {
|
||||||
const { tempFilePath, pageStatusCode, pageError } = await downloadPdf(url);
|
const { tempFilePath, pageStatusCode, pageError } = await downloadPdf(url);
|
||||||
const content = await processPdfToText(tempFilePath, parsePDF);
|
const content = await processPdfToText(tempFilePath, parsePDF);
|
||||||
fs.unlinkSync(tempFilePath); // Clean up the temporary file
|
fs.unlinkSync(tempFilePath); // Clean up the temporary file
|
||||||
return { content, pageStatusCode, pageError };
|
return { content, pageStatusCode, pageError };
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error(`Failed to fetch and process PDF: ${error.message}`);
|
||||||
|
return { content: "", pageStatusCode: 500, pageError: error.message };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function downloadPdf(url: string): Promise<{ tempFilePath: string, pageStatusCode?: number, pageError?: string }> {
|
async function downloadPdf(url: string): Promise<{ tempFilePath: string, pageStatusCode?: number, pageError?: string }> {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user