mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-19 18:29:05 +08:00
Nick: cache /extract scrapes
This commit is contained in:
parent
81cf05885b
commit
6b2e1cbb28
@ -14,10 +14,13 @@ interface ScrapeDocumentOptions {
|
|||||||
timeout: number;
|
timeout: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function scrapeDocument(options: ScrapeDocumentOptions, urlTraces: URLTrace[]): Promise<Document | null> {
|
export async function scrapeDocument(
|
||||||
|
options: ScrapeDocumentOptions,
|
||||||
|
urlTraces: URLTrace[],
|
||||||
|
): Promise<Document | null> {
|
||||||
const trace = urlTraces.find((t) => t.url === options.url);
|
const trace = urlTraces.find((t) => t.url === options.url);
|
||||||
if (trace) {
|
if (trace) {
|
||||||
trace.status = 'scraped';
|
trace.status = "scraped";
|
||||||
trace.timing.scrapedAt = new Date().toISOString();
|
trace.timing.scrapedAt = new Date().toISOString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -35,7 +38,9 @@ export async function scrapeDocument(options: ScrapeDocumentOptions, urlTraces:
|
|||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
team_id: options.teamId,
|
team_id: options.teamId,
|
||||||
scrapeOptions: scrapeOptions.parse({}),
|
scrapeOptions: scrapeOptions.parse({}),
|
||||||
internalOptions: {},
|
internalOptions: {
|
||||||
|
useCache: true,
|
||||||
|
},
|
||||||
plan: options.plan,
|
plan: options.plan,
|
||||||
origin: options.origin,
|
origin: options.origin,
|
||||||
is_scrape: true,
|
is_scrape: true,
|
||||||
@ -61,7 +66,7 @@ export async function scrapeDocument(options: ScrapeDocumentOptions, urlTraces:
|
|||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Error in scrapeDocument: ${error}`);
|
logger.error(`Error in scrapeDocument: ${error}`);
|
||||||
if (trace) {
|
if (trace) {
|
||||||
trace.status = 'error';
|
trace.status = "error";
|
||||||
trace.error = error.message;
|
trace.error = error.message;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
@ -298,6 +298,12 @@ export function buildFallbackList(meta: Meta): {
|
|||||||
engine: Engine;
|
engine: Engine;
|
||||||
unsupportedFeatures: Set<FeatureFlag>;
|
unsupportedFeatures: Set<FeatureFlag>;
|
||||||
}[] {
|
}[] {
|
||||||
|
|
||||||
|
if (meta.internalOptions.useCache !== true) {
|
||||||
|
engines.splice(engines.indexOf("cache"), 1);
|
||||||
|
}else{
|
||||||
|
meta.logger.debug("Cache engine enabled by useCache option");
|
||||||
|
}
|
||||||
const prioritySum = [...meta.featureFlags].reduce(
|
const prioritySum = [...meta.featureFlags].reduce(
|
||||||
(a, x) => a + featureFlagOptions[x].priority,
|
(a, x) => a + featureFlagOptions[x].priority,
|
||||||
0,
|
0,
|
||||||
|
@ -151,7 +151,7 @@ export type InternalOptions = {
|
|||||||
|
|
||||||
v0CrawlOnlyUrls?: boolean;
|
v0CrawlOnlyUrls?: boolean;
|
||||||
v0DisableJsDom?: boolean;
|
v0DisableJsDom?: boolean;
|
||||||
|
useCache?: boolean;
|
||||||
disableSmartWaitCache?: boolean; // Passed along to fire-engine
|
disableSmartWaitCache?: boolean; // Passed along to fire-engine
|
||||||
isBackgroundIndex?: boolean;
|
isBackgroundIndex?: boolean;
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user