mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-19 04:05:58 +08:00
feat(scrapeURL/pdf): bill n credits per page
This commit is contained in:
parent
cee481a3a9
commit
d0d0845a66
@ -153,6 +153,9 @@ export async function scrapeController(
|
|||||||
} else {
|
} else {
|
||||||
creditsToBeBilled = 150;
|
creditsToBeBilled = 150;
|
||||||
}
|
}
|
||||||
|
} else if (doc.metadata.numPages !== undefined && doc.metadata.numPages > 1) {
|
||||||
|
const creditsPerPDFPage = 5;
|
||||||
|
creditsToBeBilled = creditsPerPDFPage * doc.metadata.numPages;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.body.proxy === "stealth") {
|
if (req.body.proxy === "stealth") {
|
||||||
|
@ -738,6 +738,7 @@ export type Document = {
|
|||||||
statusCode: number;
|
statusCode: number;
|
||||||
scrapeId?: string;
|
scrapeId?: string;
|
||||||
error?: string;
|
error?: string;
|
||||||
|
numPages?: number;
|
||||||
// [key: string]: string | string[] | number | { smartScrape: number; other: number; total: number } | undefined;
|
// [key: string]: string | string[] | number | { smartScrape: number; other: number; total: number } | undefined;
|
||||||
};
|
};
|
||||||
serpResults?: {
|
serpResults?: {
|
||||||
|
@ -101,6 +101,8 @@ export type EngineScrapeResult = {
|
|||||||
value: unknown
|
value: unknown
|
||||||
}[];
|
}[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
numPages?: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
const engineHandlers: {
|
const engineHandlers: {
|
||||||
|
@ -12,7 +12,7 @@ import { readFile, unlink } from "node:fs/promises";
|
|||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
import type { Response } from "undici";
|
import type { Response } from "undici";
|
||||||
|
|
||||||
type PDFProcessorResult = { html: string; markdown?: string };
|
type PDFProcessorResult = { html: string; markdown?: string; numPages: number };
|
||||||
|
|
||||||
const MAX_FILE_SIZE = 19 * 1024 * 1024; // 19MB
|
const MAX_FILE_SIZE = 19 * 1024 * 1024; // 19MB
|
||||||
|
|
||||||
@ -45,6 +45,7 @@ async function scrapePDFWithRunPodMU(
|
|||||||
schema: z.object({
|
schema: z.object({
|
||||||
output: z.object({
|
output: z.object({
|
||||||
markdown: z.string(),
|
markdown: z.string(),
|
||||||
|
num_pages: z.number(),
|
||||||
}),
|
}),
|
||||||
}),
|
}),
|
||||||
mock: meta.mock,
|
mock: meta.mock,
|
||||||
@ -53,6 +54,7 @@ async function scrapePDFWithRunPodMU(
|
|||||||
return {
|
return {
|
||||||
markdown: result.output.markdown,
|
markdown: result.output.markdown,
|
||||||
html: await marked.parse(result.output.markdown, { async: true }),
|
html: await marked.parse(result.output.markdown, { async: true }),
|
||||||
|
numPages: result.output.num_pages,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,6 +70,7 @@ async function scrapePDFWithParsePDF(
|
|||||||
return {
|
return {
|
||||||
markdown: escaped,
|
markdown: escaped,
|
||||||
html: escaped,
|
html: escaped,
|
||||||
|
numPages: result.numpages,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -172,7 +175,8 @@ export async function scrapePDF(
|
|||||||
return {
|
return {
|
||||||
url: response.url ?? meta.url,
|
url: response.url ?? meta.url,
|
||||||
statusCode: response.status,
|
statusCode: response.status,
|
||||||
html: result?.html ?? "",
|
html: result.html ?? "",
|
||||||
markdown: result?.markdown ?? "",
|
markdown: result.markdown ?? "",
|
||||||
|
numPages: result.numPages,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -368,6 +368,7 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
|
|||||||
url: result.result.url,
|
url: result.result.url,
|
||||||
statusCode: result.result.statusCode,
|
statusCode: result.result.statusCode,
|
||||||
error: result.result.error,
|
error: result.result.error,
|
||||||
|
numPages: result.result.numPages,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1365,6 +1365,9 @@ async function processJob(job: Job & { id: string }, token: string) {
|
|||||||
} else {
|
} else {
|
||||||
creditsToBeBilled = 150;
|
creditsToBeBilled = 150;
|
||||||
}
|
}
|
||||||
|
} else if (doc.metadata.numPages !== undefined && doc.metadata.numPages > 1) {
|
||||||
|
const creditsPerPDFPage = 5;
|
||||||
|
creditsToBeBilled = creditsPerPDFPage * doc.metadata.numPages;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (job.data.scrapeOptions.proxy === "stealth") {
|
if (job.data.scrapeOptions.proxy === "stealth") {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user