mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-15 02:45:56 +08:00
Update index.ts
This commit is contained in:
parent
654d6c6e0b
commit
ed7d15d2af
@ -140,9 +140,23 @@ export async function scrapePDF(meta: Meta, timeToRun: number | undefined): Prom
|
||||
const { response, tempFilePath } = await downloadFile(meta.id, meta.url);
|
||||
|
||||
let result: PDFProcessorResult | null = null;
|
||||
if (process.env.LLAMAPARSE_API_KEY) {
|
||||
|
||||
// First, try parsing with PdfParse
|
||||
result = await scrapePDFWithParsePDF(
|
||||
{
|
||||
...meta,
|
||||
logger: meta.logger.child({
|
||||
method: "scrapePDF/scrapePDFWithParsePDF",
|
||||
}),
|
||||
},
|
||||
tempFilePath,
|
||||
);
|
||||
|
||||
|
||||
// If the parsed text is under 500 characters and LLAMAPARSE_API_KEY exists, try LlamaParse
|
||||
if (result.markdown && result.markdown.length < 500 && process.env.LLAMAPARSE_API_KEY) {
|
||||
try {
|
||||
result = await scrapePDFWithLlamaParse(
|
||||
const llamaResult = await scrapePDFWithLlamaParse(
|
||||
{
|
||||
...meta,
|
||||
logger: meta.logger.child({
|
||||
@ -152,16 +166,17 @@ export async function scrapePDF(meta: Meta, timeToRun: number | undefined): Prom
|
||||
tempFilePath,
|
||||
timeToRun,
|
||||
);
|
||||
result = llamaResult; // Use LlamaParse result if successful
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message === "LlamaParse timed out") {
|
||||
meta.logger.warn("LlamaParse timed out -- falling back to parse-pdf", {
|
||||
meta.logger.warn("LlamaParse timed out -- using parse-pdf result", {
|
||||
error,
|
||||
});
|
||||
} else if (error instanceof RemoveFeatureError) {
|
||||
throw error;
|
||||
} else {
|
||||
meta.logger.warn(
|
||||
"LlamaParse failed to parse PDF -- falling back to parse-pdf",
|
||||
"LlamaParse failed to parse PDF -- using parse-pdf result",
|
||||
{ error },
|
||||
);
|
||||
Sentry.captureException(error);
|
||||
@ -169,18 +184,6 @@ export async function scrapePDF(meta: Meta, timeToRun: number | undefined): Prom
|
||||
}
|
||||
}
|
||||
|
||||
if (result === null) {
|
||||
result = await scrapePDFWithParsePDF(
|
||||
{
|
||||
...meta,
|
||||
logger: meta.logger.child({
|
||||
method: "scrapePDF/scrapePDFWithParsePDF",
|
||||
}),
|
||||
},
|
||||
tempFilePath,
|
||||
);
|
||||
}
|
||||
|
||||
await fs.unlink(tempFilePath);
|
||||
|
||||
return {
|
||||
|
Loading…
x
Reference in New Issue
Block a user