mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 11:19:02 +08:00
Nick: fixed
This commit is contained in:
parent
8df1c67961
commit
a4f7c38834
@ -300,8 +300,11 @@ export function buildFallbackList(meta: Meta): {
|
|||||||
}[] {
|
}[] {
|
||||||
|
|
||||||
if (meta.internalOptions.useCache !== true) {
|
if (meta.internalOptions.useCache !== true) {
|
||||||
engines.splice(engines.indexOf("cache"), 1);
|
const cacheIndex = engines.indexOf("cache");
|
||||||
}else{
|
if (cacheIndex !== -1) {
|
||||||
|
engines.splice(cacheIndex, 1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
meta.logger.debug("Cache engine enabled by useCache option");
|
meta.logger.debug("Cache engine enabled by useCache option");
|
||||||
}
|
}
|
||||||
const prioritySum = [...meta.featureFlags].reduce(
|
const prioritySum = [...meta.featureFlags].reduce(
|
||||||
|
@ -84,18 +84,43 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
|||||||
// Upload to Supabase if we have URLs and this is a crawl (not a batch scrape)
|
// Upload to Supabase if we have URLs and this is a crawl (not a batch scrape)
|
||||||
if (visitedUrls.length > 0 && job.data.crawlerOptions !== null) {
|
if (visitedUrls.length > 0 && job.data.crawlerOptions !== null) {
|
||||||
try {
|
try {
|
||||||
const { error } = await supabase_service
|
// First check if entry exists for this origin URL
|
||||||
|
const { data: existingMap } = await supabase_service
|
||||||
.from('crawl_maps')
|
.from('crawl_maps')
|
||||||
.insert({
|
.select('urls')
|
||||||
crawl_id: job.data.crawl_id,
|
.eq('origin_url', sc.originUrl)
|
||||||
team_id: job.data.team_id,
|
.single();
|
||||||
origin_url: sc.originUrl,
|
|
||||||
urls: visitedUrls,
|
if (existingMap) {
|
||||||
created_at: new Date().toISOString()
|
// Merge URLs, removing duplicates
|
||||||
});
|
const mergedUrls = [...new Set([...existingMap.urls, ...visitedUrls])];
|
||||||
|
|
||||||
if (error) {
|
const { error } = await supabase_service
|
||||||
_logger.error("Failed to save crawl map", { error });
|
.from('crawl_maps')
|
||||||
|
.update({
|
||||||
|
urls: mergedUrls,
|
||||||
|
num_urls: mergedUrls.length,
|
||||||
|
updated_at: new Date().toISOString()
|
||||||
|
})
|
||||||
|
.eq('origin_url', sc.originUrl);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
_logger.error("Failed to update crawl map", { error });
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Insert new entry if none exists
|
||||||
|
const { error } = await supabase_service
|
||||||
|
.from('crawl_maps')
|
||||||
|
.insert({
|
||||||
|
origin_url: sc.originUrl,
|
||||||
|
urls: visitedUrls,
|
||||||
|
num_urls: visitedUrls.length,
|
||||||
|
created_at: new Date().toISOString()
|
||||||
|
});
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
_logger.error("Failed to save crawl map", { error });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
_logger.error("Error saving crawl map", { error });
|
_logger.error("Error saving crawl map", { error });
|
||||||
@ -802,9 +827,10 @@ async function processJob(job: Job & { id: string }, token: string) {
|
|||||||
newJobId: jobId,
|
newJobId: jobId,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
logger.debug("Could not lock URL " + JSON.stringify(link), {
|
// TODO: removed this, ok? too many 'not useful' logs (?) Mogery!
|
||||||
url: link,
|
// logger.debug("Could not lock URL " + JSON.stringify(link), {
|
||||||
});
|
// url: link,
|
||||||
|
// });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user