Nick: fixed

This commit is contained in:
Nicolas 2025-01-03 22:15:23 -03:00
parent 8df1c67961
commit a4f7c38834
2 changed files with 44 additions and 15 deletions

View File

@ -300,8 +300,11 @@ export function buildFallbackList(meta: Meta): {
}[] { }[] {
if (meta.internalOptions.useCache !== true) { if (meta.internalOptions.useCache !== true) {
engines.splice(engines.indexOf("cache"), 1); const cacheIndex = engines.indexOf("cache");
}else{ if (cacheIndex !== -1) {
engines.splice(cacheIndex, 1);
}
} else {
meta.logger.debug("Cache engine enabled by useCache option"); meta.logger.debug("Cache engine enabled by useCache option");
} }
const prioritySum = [...meta.featureFlags].reduce( const prioritySum = [...meta.featureFlags].reduce(

View File

@ -84,19 +84,44 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
// Upload to Supabase if we have URLs and this is a crawl (not a batch scrape) // Upload to Supabase if we have URLs and this is a crawl (not a batch scrape)
if (visitedUrls.length > 0 && job.data.crawlerOptions !== null) { if (visitedUrls.length > 0 && job.data.crawlerOptions !== null) {
try { try {
// First check if entry exists for this origin URL
const { data: existingMap } = await supabase_service
.from('crawl_maps')
.select('urls')
.eq('origin_url', sc.originUrl)
.single();
if (existingMap) {
// Merge URLs, removing duplicates
const mergedUrls = [...new Set([...existingMap.urls, ...visitedUrls])];
const { error } = await supabase_service
.from('crawl_maps')
.update({
urls: mergedUrls,
num_urls: mergedUrls.length,
updated_at: new Date().toISOString()
})
.eq('origin_url', sc.originUrl);
if (error) {
_logger.error("Failed to update crawl map", { error });
}
} else {
// Insert new entry if none exists
const { error } = await supabase_service const { error } = await supabase_service
.from('crawl_maps') .from('crawl_maps')
.insert({ .insert({
crawl_id: job.data.crawl_id,
team_id: job.data.team_id,
origin_url: sc.originUrl, origin_url: sc.originUrl,
urls: visitedUrls, urls: visitedUrls,
num_urls: visitedUrls.length,
created_at: new Date().toISOString() created_at: new Date().toISOString()
}); });
if (error) { if (error) {
_logger.error("Failed to save crawl map", { error }); _logger.error("Failed to save crawl map", { error });
} }
}
} catch (error) { } catch (error) {
_logger.error("Error saving crawl map", { error }); _logger.error("Error saving crawl map", { error });
} }
@ -802,9 +827,10 @@ async function processJob(job: Job & { id: string }, token: string) {
newJobId: jobId, newJobId: jobId,
}); });
} else { } else {
logger.debug("Could not lock URL " + JSON.stringify(link), { // TODO: removed this, ok? too many 'not useful' logs (?) Mogery!
url: link, // logger.debug("Could not lock URL " + JSON.stringify(link), {
}); // url: link,
// });
} }
} }
} }