mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 02:18:59 +08:00
propagate priority to fire-engine
This commit is contained in:
parent
b79d3d1754
commit
29f0d9ec94
@ -65,6 +65,7 @@ export type WebScraperOptions = {
|
||||
extractorOptions?: ExtractorOptions;
|
||||
concurrentRequests?: number;
|
||||
bullJobId?: string;
|
||||
priority?: number;
|
||||
};
|
||||
|
||||
export interface DocumentUrl {
|
||||
|
@ -49,6 +49,7 @@ export async function startWebScraperPipeline({
|
||||
},
|
||||
team_id: job.data.team_id,
|
||||
bull_job_id: job.id.toString(),
|
||||
priority: job.opts.priority,
|
||||
})) as { success: boolean; message: string; docs: Document[] };
|
||||
}
|
||||
export async function runWebScraper({
|
||||
@ -62,6 +63,7 @@ export async function runWebScraper({
|
||||
onError,
|
||||
team_id,
|
||||
bull_job_id,
|
||||
priority,
|
||||
}: RunWebScraperParams): Promise<RunWebScraperResult> {
|
||||
try {
|
||||
const provider = new WebScraperDataProvider();
|
||||
@ -74,6 +76,7 @@ export async function runWebScraper({
|
||||
crawlerOptions: crawlerOptions,
|
||||
pageOptions: pageOptions,
|
||||
bullJobId: bull_job_id,
|
||||
priority,
|
||||
});
|
||||
} else {
|
||||
await provider.setOptions({
|
||||
@ -83,6 +86,7 @@ export async function runWebScraper({
|
||||
extractorOptions,
|
||||
crawlerOptions: crawlerOptions,
|
||||
pageOptions: pageOptions,
|
||||
priority,
|
||||
});
|
||||
}
|
||||
const docs = (await provider.getDocuments(false, (progress: Progress) => {
|
||||
|
@ -44,6 +44,7 @@ export class WebScraperDataProvider {
|
||||
private crawlerMode: string = "default";
|
||||
private allowBackwardCrawling: boolean = false;
|
||||
private allowExternalContentLinks: boolean = false;
|
||||
private priority?: number;
|
||||
|
||||
authorize(): void {
|
||||
throw new Error("Method not implemented.");
|
||||
@ -72,7 +73,8 @@ export class WebScraperDataProvider {
|
||||
url,
|
||||
this.pageOptions,
|
||||
this.extractorOptions,
|
||||
existingHTML
|
||||
existingHTML,
|
||||
this.priority,
|
||||
);
|
||||
processedUrls++;
|
||||
if (inProgress) {
|
||||
@ -593,6 +595,7 @@ export class WebScraperDataProvider {
|
||||
options.crawlerOptions?.allowBackwardCrawling ?? false;
|
||||
this.allowExternalContentLinks =
|
||||
options.crawlerOptions?.allowExternalContentLinks ?? false;
|
||||
this.priority = options.priority;
|
||||
|
||||
// make sure all urls start with https://
|
||||
this.urls = this.urls.map((url) => {
|
||||
|
@ -26,6 +26,7 @@ export async function scrapWithFireEngine({
|
||||
fireEngineOptions = {},
|
||||
headers,
|
||||
options,
|
||||
priority,
|
||||
}: {
|
||||
url: string;
|
||||
waitFor?: number;
|
||||
@ -35,6 +36,7 @@ export async function scrapWithFireEngine({
|
||||
fireEngineOptions?: FireEngineOptions;
|
||||
headers?: Record<string, string>;
|
||||
options?: any;
|
||||
priority?: number;
|
||||
}): Promise<FireEngineResponse> {
|
||||
const logParams = {
|
||||
url,
|
||||
@ -78,6 +80,7 @@ export async function scrapWithFireEngine({
|
||||
fullPageScreenshot: fullPageScreenshotParam,
|
||||
headers: headers,
|
||||
pageOptions: pageOptions,
|
||||
priority,
|
||||
...fireEngineOptionsParam,
|
||||
},
|
||||
{
|
||||
|
@ -134,7 +134,8 @@ export async function scrapSingleUrl(
|
||||
extractorOptions: ExtractorOptions = {
|
||||
mode: "llm-extraction-from-markdown",
|
||||
},
|
||||
existingHtml: string = ""
|
||||
existingHtml: string = "",
|
||||
priority?: number,
|
||||
): Promise<Document> {
|
||||
urlToScrap = urlToScrap.trim();
|
||||
|
||||
@ -177,7 +178,8 @@ export async function scrapSingleUrl(
|
||||
headers: pageOptions.headers,
|
||||
fireEngineOptions: {
|
||||
engine: engine,
|
||||
}
|
||||
},
|
||||
priority,
|
||||
});
|
||||
scraperResponse.text = response.html;
|
||||
scraperResponse.screenshot = response.screenshot;
|
||||
|
@ -43,6 +43,7 @@ export interface RunWebScraperParams {
|
||||
onError: (error: Error) => void;
|
||||
team_id: string;
|
||||
bull_job_id: string;
|
||||
priority?: number;
|
||||
}
|
||||
|
||||
export interface RunWebScraperResult {
|
||||
|
Loading…
x
Reference in New Issue
Block a user