mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 19:39:16 +08:00
propagate priority to fire-engine
This commit is contained in:
parent
b79d3d1754
commit
29f0d9ec94
@ -65,6 +65,7 @@ export type WebScraperOptions = {
|
|||||||
extractorOptions?: ExtractorOptions;
|
extractorOptions?: ExtractorOptions;
|
||||||
concurrentRequests?: number;
|
concurrentRequests?: number;
|
||||||
bullJobId?: string;
|
bullJobId?: string;
|
||||||
|
priority?: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
export interface DocumentUrl {
|
export interface DocumentUrl {
|
||||||
|
@ -49,6 +49,7 @@ export async function startWebScraperPipeline({
|
|||||||
},
|
},
|
||||||
team_id: job.data.team_id,
|
team_id: job.data.team_id,
|
||||||
bull_job_id: job.id.toString(),
|
bull_job_id: job.id.toString(),
|
||||||
|
priority: job.opts.priority,
|
||||||
})) as { success: boolean; message: string; docs: Document[] };
|
})) as { success: boolean; message: string; docs: Document[] };
|
||||||
}
|
}
|
||||||
export async function runWebScraper({
|
export async function runWebScraper({
|
||||||
@ -62,6 +63,7 @@ export async function runWebScraper({
|
|||||||
onError,
|
onError,
|
||||||
team_id,
|
team_id,
|
||||||
bull_job_id,
|
bull_job_id,
|
||||||
|
priority,
|
||||||
}: RunWebScraperParams): Promise<RunWebScraperResult> {
|
}: RunWebScraperParams): Promise<RunWebScraperResult> {
|
||||||
try {
|
try {
|
||||||
const provider = new WebScraperDataProvider();
|
const provider = new WebScraperDataProvider();
|
||||||
@ -74,6 +76,7 @@ export async function runWebScraper({
|
|||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions: crawlerOptions,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
bullJobId: bull_job_id,
|
bullJobId: bull_job_id,
|
||||||
|
priority,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
await provider.setOptions({
|
await provider.setOptions({
|
||||||
@ -83,6 +86,7 @@ export async function runWebScraper({
|
|||||||
extractorOptions,
|
extractorOptions,
|
||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions: crawlerOptions,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
|
priority,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const docs = (await provider.getDocuments(false, (progress: Progress) => {
|
const docs = (await provider.getDocuments(false, (progress: Progress) => {
|
||||||
|
@ -44,6 +44,7 @@ export class WebScraperDataProvider {
|
|||||||
private crawlerMode: string = "default";
|
private crawlerMode: string = "default";
|
||||||
private allowBackwardCrawling: boolean = false;
|
private allowBackwardCrawling: boolean = false;
|
||||||
private allowExternalContentLinks: boolean = false;
|
private allowExternalContentLinks: boolean = false;
|
||||||
|
private priority?: number;
|
||||||
|
|
||||||
authorize(): void {
|
authorize(): void {
|
||||||
throw new Error("Method not implemented.");
|
throw new Error("Method not implemented.");
|
||||||
@ -72,7 +73,8 @@ export class WebScraperDataProvider {
|
|||||||
url,
|
url,
|
||||||
this.pageOptions,
|
this.pageOptions,
|
||||||
this.extractorOptions,
|
this.extractorOptions,
|
||||||
existingHTML
|
existingHTML,
|
||||||
|
this.priority,
|
||||||
);
|
);
|
||||||
processedUrls++;
|
processedUrls++;
|
||||||
if (inProgress) {
|
if (inProgress) {
|
||||||
@ -593,6 +595,7 @@ export class WebScraperDataProvider {
|
|||||||
options.crawlerOptions?.allowBackwardCrawling ?? false;
|
options.crawlerOptions?.allowBackwardCrawling ?? false;
|
||||||
this.allowExternalContentLinks =
|
this.allowExternalContentLinks =
|
||||||
options.crawlerOptions?.allowExternalContentLinks ?? false;
|
options.crawlerOptions?.allowExternalContentLinks ?? false;
|
||||||
|
this.priority = options.priority;
|
||||||
|
|
||||||
// make sure all urls start with https://
|
// make sure all urls start with https://
|
||||||
this.urls = this.urls.map((url) => {
|
this.urls = this.urls.map((url) => {
|
||||||
|
@ -26,6 +26,7 @@ export async function scrapWithFireEngine({
|
|||||||
fireEngineOptions = {},
|
fireEngineOptions = {},
|
||||||
headers,
|
headers,
|
||||||
options,
|
options,
|
||||||
|
priority,
|
||||||
}: {
|
}: {
|
||||||
url: string;
|
url: string;
|
||||||
waitFor?: number;
|
waitFor?: number;
|
||||||
@ -35,6 +36,7 @@ export async function scrapWithFireEngine({
|
|||||||
fireEngineOptions?: FireEngineOptions;
|
fireEngineOptions?: FireEngineOptions;
|
||||||
headers?: Record<string, string>;
|
headers?: Record<string, string>;
|
||||||
options?: any;
|
options?: any;
|
||||||
|
priority?: number;
|
||||||
}): Promise<FireEngineResponse> {
|
}): Promise<FireEngineResponse> {
|
||||||
const logParams = {
|
const logParams = {
|
||||||
url,
|
url,
|
||||||
@ -78,6 +80,7 @@ export async function scrapWithFireEngine({
|
|||||||
fullPageScreenshot: fullPageScreenshotParam,
|
fullPageScreenshot: fullPageScreenshotParam,
|
||||||
headers: headers,
|
headers: headers,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
|
priority,
|
||||||
...fireEngineOptionsParam,
|
...fireEngineOptionsParam,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -134,7 +134,8 @@ export async function scrapSingleUrl(
|
|||||||
extractorOptions: ExtractorOptions = {
|
extractorOptions: ExtractorOptions = {
|
||||||
mode: "llm-extraction-from-markdown",
|
mode: "llm-extraction-from-markdown",
|
||||||
},
|
},
|
||||||
existingHtml: string = ""
|
existingHtml: string = "",
|
||||||
|
priority?: number,
|
||||||
): Promise<Document> {
|
): Promise<Document> {
|
||||||
urlToScrap = urlToScrap.trim();
|
urlToScrap = urlToScrap.trim();
|
||||||
|
|
||||||
@ -177,7 +178,8 @@ export async function scrapSingleUrl(
|
|||||||
headers: pageOptions.headers,
|
headers: pageOptions.headers,
|
||||||
fireEngineOptions: {
|
fireEngineOptions: {
|
||||||
engine: engine,
|
engine: engine,
|
||||||
}
|
},
|
||||||
|
priority,
|
||||||
});
|
});
|
||||||
scraperResponse.text = response.html;
|
scraperResponse.text = response.html;
|
||||||
scraperResponse.screenshot = response.screenshot;
|
scraperResponse.screenshot = response.screenshot;
|
||||||
|
@ -43,6 +43,7 @@ export interface RunWebScraperParams {
|
|||||||
onError: (error: Error) => void;
|
onError: (error: Error) => void;
|
||||||
team_id: string;
|
team_id: string;
|
||||||
bull_job_id: string;
|
bull_job_id: string;
|
||||||
|
priority?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface RunWebScraperResult {
|
export interface RunWebScraperResult {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user