mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 20:05:55 +08:00
feat(scrapeURL/pdf): support PDF prefetch when parsePDF is off
This commit is contained in:
parent
5eb0235ccb
commit
11ed679274
@ -76,17 +76,34 @@ export async function scrapePDF(
|
|||||||
timeToRun: number | undefined,
|
timeToRun: number | undefined,
|
||||||
): Promise<EngineScrapeResult> {
|
): Promise<EngineScrapeResult> {
|
||||||
if (!meta.options.parsePDF) {
|
if (!meta.options.parsePDF) {
|
||||||
const file = await fetchFileToBuffer(meta.url, {
|
if (meta.pdfPrefetch !== undefined && meta.pdfPrefetch !== null) {
|
||||||
headers: meta.options.headers,
|
const content = (await readFile(meta.pdfPrefetch.filePath)).toString("base64");
|
||||||
});
|
return {
|
||||||
const content = file.buffer.toString("base64");
|
url: meta.pdfPrefetch.url ?? meta.url,
|
||||||
return {
|
statusCode: meta.pdfPrefetch.status,
|
||||||
url: file.response.url,
|
|
||||||
statusCode: file.response.status,
|
|
||||||
|
|
||||||
html: content,
|
html: content,
|
||||||
markdown: content,
|
markdown: content,
|
||||||
};
|
};
|
||||||
|
} else {
|
||||||
|
const file = await fetchFileToBuffer(meta.url, {
|
||||||
|
headers: meta.options.headers,
|
||||||
|
});
|
||||||
|
|
||||||
|
const ct = file.response.headers.get("Content-Type");
|
||||||
|
if (ct && !ct.includes("application/pdf")) { // if downloaded file wasn't a PDF
|
||||||
|
throw new PDFAntibotError();
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = file.buffer.toString("base64");
|
||||||
|
return {
|
||||||
|
url: file.response.url,
|
||||||
|
statusCode: file.response.status,
|
||||||
|
|
||||||
|
html: content,
|
||||||
|
markdown: content,
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const { response, tempFilePath } = (meta.pdfPrefetch !== undefined && meta.pdfPrefetch !== null)
|
const { response, tempFilePath } = (meta.pdfPrefetch !== undefined && meta.pdfPrefetch !== null)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user