mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 02:18:59 +08:00
reduce logging
This commit is contained in:
parent
a3145ccacc
commit
492d97e889
@ -163,15 +163,15 @@ export async function finishCrawlPre(id: string) {
|
|||||||
await redisConnection.expire("crawl:" + id + ":finished_pre", 24 * 60 * 60);
|
await redisConnection.expire("crawl:" + id + ":finished_pre", 24 * 60 * 60);
|
||||||
return set === 1;
|
return set === 1;
|
||||||
} else {
|
} else {
|
||||||
_logger.debug("Crawl can not be pre-finished yet, not marking as finished.", {
|
// _logger.debug("Crawl can not be pre-finished yet, not marking as finished.", {
|
||||||
module: "crawl-redis",
|
// module: "crawl-redis",
|
||||||
method: "finishCrawlPre",
|
// method: "finishCrawlPre",
|
||||||
crawlId: id,
|
// crawlId: id,
|
||||||
jobs_done: await redisConnection.scard("crawl:" + id + ":jobs_done"),
|
// jobs_done: await redisConnection.scard("crawl:" + id + ":jobs_done"),
|
||||||
jobs: await redisConnection.scard("crawl:" + id + ":jobs"),
|
// jobs: await redisConnection.scard("crawl:" + id + ":jobs"),
|
||||||
kickoff_finished:
|
// kickoff_finished:
|
||||||
(await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null,
|
// (await redisConnection.get("crawl:" + id + ":kickoff:finish")) !== null,
|
||||||
});
|
// });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -279,9 +279,9 @@ export async function lockURL(
|
|||||||
(await redisConnection.scard("crawl:" + id + ":visited_unique")) >=
|
(await redisConnection.scard("crawl:" + id + ":visited_unique")) >=
|
||||||
sc.crawlerOptions.limit
|
sc.crawlerOptions.limit
|
||||||
) {
|
) {
|
||||||
logger.debug(
|
// logger.debug(
|
||||||
"Crawl has already hit visited_unique limit, not locking URL.",
|
// "Crawl has already hit visited_unique limit, not locking URL.",
|
||||||
);
|
// );
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -105,9 +105,9 @@ export async function getJobFromGCS(jobId: string): Promise<Document[] | null> {
|
|||||||
|
|
||||||
// TODO: fix the any type (we have multiple Document types in the codebase)
|
// TODO: fix the any type (we have multiple Document types in the codebase)
|
||||||
export async function getDocFromGCS(url: string): Promise<any | null> {
|
export async function getDocFromGCS(url: string): Promise<any | null> {
|
||||||
logger.info(`Getting f-engine document from GCS`, {
|
// logger.info(`Getting f-engine document from GCS`, {
|
||||||
url,
|
// url,
|
||||||
});
|
// });
|
||||||
try {
|
try {
|
||||||
if (!process.env.GCS_FIRE_ENGINE_BUCKET_NAME) {
|
if (!process.env.GCS_FIRE_ENGINE_BUCKET_NAME) {
|
||||||
return null;
|
return null;
|
||||||
|
@ -383,9 +383,8 @@ export function buildFallbackList(meta: Meta): {
|
|||||||
if (cacheIndex !== -1) {
|
if (cacheIndex !== -1) {
|
||||||
_engines.splice(cacheIndex, 1);
|
_engines.splice(cacheIndex, 1);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
meta.logger.debug("Cache engine enabled by useCache option");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const prioritySum = [...meta.featureFlags].reduce(
|
const prioritySum = [...meta.featureFlags].reduce(
|
||||||
(a, x) => a + featureFlagOptions[x].priority,
|
(a, x) => a + featureFlagOptions[x].priority,
|
||||||
0,
|
0,
|
||||||
@ -424,24 +423,6 @@ export function buildFallbackList(meta: Meta): {
|
|||||||
|
|
||||||
if (supportScore >= priorityThreshold) {
|
if (supportScore >= priorityThreshold) {
|
||||||
selectedEngines.push({ engine, supportScore, unsupportedFeatures });
|
selectedEngines.push({ engine, supportScore, unsupportedFeatures });
|
||||||
meta.logger.debug(`Engine ${engine} meets feature priority threshold`, {
|
|
||||||
supportScore,
|
|
||||||
prioritySum,
|
|
||||||
priorityThreshold,
|
|
||||||
featureFlags: [...meta.featureFlags],
|
|
||||||
unsupportedFeatures,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
meta.logger.debug(
|
|
||||||
`Engine ${engine} does not meet feature priority threshold`,
|
|
||||||
{
|
|
||||||
supportScore,
|
|
||||||
prioritySum,
|
|
||||||
priorityThreshold,
|
|
||||||
featureFlags: [...meta.featureFlags],
|
|
||||||
unsupportedFeatures,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -459,6 +440,10 @@ export function buildFallbackList(meta: Meta): {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
meta.logger.info("Selected engines", {
|
||||||
|
selectedEngines,
|
||||||
|
});
|
||||||
|
|
||||||
return selectedEngines;
|
return selectedEngines;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,18 +47,18 @@ async function indexJob(job: FirecrawlJob): Promise<void> {
|
|||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
const errorData = await response.json();
|
const errorData = await response.json();
|
||||||
logger.error(`Failed to send job to external server: ${response.status} ${response.statusText}`, {
|
// logger.error(`Failed to send job to external server: ${response.status} ${response.statusText}`, {
|
||||||
error: errorData,
|
// error: errorData,
|
||||||
scrapeId: job.job_id,
|
// scrapeId: job.job_id,
|
||||||
});
|
// });
|
||||||
} else {
|
} else {
|
||||||
logger.debug("Job sent to external server successfully!", { scrapeId: job.job_id });
|
// logger.debug("Job sent to external server successfully!", { scrapeId: job.job_id });
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Error sending job to external server: ${error.message}`, {
|
// logger.error(`Error sending job to external server: ${error.message}`, {
|
||||||
error,
|
// error,
|
||||||
scrapeId: job.job_id,
|
// scrapeId: job.job_id,
|
||||||
});
|
// });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,13 +132,13 @@ async function addScrapeJobRaw(
|
|||||||
// If above by 2x, send them an email
|
// If above by 2x, send them an email
|
||||||
// No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x
|
// No need to 2x as if there are more than the max concurrency in the concurrency queue, it is already 2x
|
||||||
if(concurrencyQueueJobs > maxConcurrency) {
|
if(concurrencyQueueJobs > maxConcurrency) {
|
||||||
logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id);
|
// logger.info("Concurrency limited 2x (single) - ", "Concurrency queue jobs: ", concurrencyQueueJobs, "Max concurrency: ", maxConcurrency, "Team ID: ", webScraperOptions.team_id);
|
||||||
|
|
||||||
// Only send notification if it's not a crawl or batch scrape
|
// Only send notification if it's not a crawl or batch scrape
|
||||||
const shouldSendNotification = await shouldSendConcurrencyLimitNotification(webScraperOptions.team_id);
|
const shouldSendNotification = await shouldSendConcurrencyLimitNotification(webScraperOptions.team_id);
|
||||||
if (shouldSendNotification) {
|
if (shouldSendNotification) {
|
||||||
sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
|
sendNotificationWithCustomDays(webScraperOptions.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
|
||||||
logger.error("Error sending notification (concurrency limit reached): ", error);
|
logger.error("Error sending notification (concurrency limit reached)", { error });
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -231,13 +231,13 @@ export async function addScrapeJobs(
|
|||||||
|
|
||||||
// equals 2x the max concurrency
|
// equals 2x the max concurrency
|
||||||
if(addToCQ.length > maxConcurrency) {
|
if(addToCQ.length > maxConcurrency) {
|
||||||
logger.info(`Concurrency limited 2x (multiple) - Concurrency queue jobs: ${addToCQ.length} Max concurrency: ${maxConcurrency} Team ID: ${jobs[0].data.team_id}`);
|
// logger.info(`Concurrency limited 2x (multiple) - Concurrency queue jobs: ${addToCQ.length} Max concurrency: ${maxConcurrency} Team ID: ${jobs[0].data.team_id}`);
|
||||||
// Only send notification if it's not a crawl or batch scrape
|
// Only send notification if it's not a crawl or batch scrape
|
||||||
if (!isCrawlOrBatchScrape(dontAddToCCQ[0].data)) {
|
if (!isCrawlOrBatchScrape(dontAddToCCQ[0].data)) {
|
||||||
const shouldSendNotification = await shouldSendConcurrencyLimitNotification(dontAddToCCQ[0].data.team_id);
|
const shouldSendNotification = await shouldSendConcurrencyLimitNotification(dontAddToCCQ[0].data.team_id);
|
||||||
if (shouldSendNotification) {
|
if (shouldSendNotification) {
|
||||||
sendNotificationWithCustomDays(dontAddToCCQ[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
|
sendNotificationWithCustomDays(dontAddToCCQ[0].data.team_id, NotificationType.CONCURRENCY_LIMIT_REACHED, 15, false).catch((error) => {
|
||||||
logger.error("Error sending notification (concurrency limit reached): ", error);
|
logger.error("Error sending notification (concurrency limit reached)", { error });
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user