feat(scrapeURL/pdf): bill n credits per page

This commit is contained in:
Gergő Móricz 2025-05-14 23:56:00 +02:00
parent cee481a3a9
commit d0d0845a66
6 changed files with 17 additions and 3 deletions

View File

@ -153,6 +153,9 @@ export async function scrapeController(
} else {
creditsToBeBilled = 150;
}
} else if (doc.metadata.numPages !== undefined && doc.metadata.numPages > 1) {
const creditsPerPDFPage = 5;
creditsToBeBilled = creditsPerPDFPage * doc.metadata.numPages;
}
if (req.body.proxy === "stealth") {

View File

@ -738,6 +738,7 @@ export type Document = {
statusCode: number;
scrapeId?: string;
error?: string;
numPages?: number;
// [key: string]: string | string[] | number | { smartScrape: number; other: number; total: number } | undefined;
};
serpResults?: {

View File

@ -101,6 +101,8 @@ export type EngineScrapeResult = {
value: unknown
}[];
};
numPages?: number;
};
const engineHandlers: {

View File

@ -12,7 +12,7 @@ import { readFile, unlink } from "node:fs/promises";
import path from "node:path";
import type { Response } from "undici";
type PDFProcessorResult = { html: string; markdown?: string };
type PDFProcessorResult = { html: string; markdown?: string; numPages: number };
const MAX_FILE_SIZE = 19 * 1024 * 1024; // 19MB
@ -45,6 +45,7 @@ async function scrapePDFWithRunPodMU(
schema: z.object({
output: z.object({
markdown: z.string(),
num_pages: z.number(),
}),
}),
mock: meta.mock,
@ -53,6 +54,7 @@ async function scrapePDFWithRunPodMU(
return {
markdown: result.output.markdown,
html: await marked.parse(result.output.markdown, { async: true }),
numPages: result.output.num_pages,
};
}
@ -68,6 +70,7 @@ async function scrapePDFWithParsePDF(
return {
markdown: escaped,
html: escaped,
numPages: result.numpages,
};
}
@ -172,7 +175,8 @@ export async function scrapePDF(
return {
url: response.url ?? meta.url,
statusCode: response.status,
html: result?.html ?? "",
markdown: result?.markdown ?? "",
html: result.html ?? "",
markdown: result.markdown ?? "",
numPages: result.numPages,
};
}

View File

@ -368,6 +368,7 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
url: result.result.url,
statusCode: result.result.statusCode,
error: result.result.error,
numPages: result.result.numPages,
},
};

View File

@ -1365,6 +1365,9 @@ async function processJob(job: Job & { id: string }, token: string) {
} else {
creditsToBeBilled = 150;
}
} else if (doc.metadata.numPages !== undefined && doc.metadata.numPages > 1) {
const creditsPerPDFPage = 5;
creditsToBeBilled = creditsPerPDFPage * doc.metadata.numPages;
}
if (job.data.scrapeOptions.proxy === "stealth") {