Nick: fixed

This commit is contained in:
Nicolas 2025-01-03 22:15:23 -03:00
parent 8df1c67961
commit a4f7c38834
2 changed files with 44 additions and 15 deletions

View File

@ -300,8 +300,11 @@ export function buildFallbackList(meta: Meta): {
}[] { }[] {
if (meta.internalOptions.useCache !== true) { if (meta.internalOptions.useCache !== true) {
engines.splice(engines.indexOf("cache"), 1); const cacheIndex = engines.indexOf("cache");
}else{ if (cacheIndex !== -1) {
engines.splice(cacheIndex, 1);
}
} else {
meta.logger.debug("Cache engine enabled by useCache option"); meta.logger.debug("Cache engine enabled by useCache option");
} }
const prioritySum = [...meta.featureFlags].reduce( const prioritySum = [...meta.featureFlags].reduce(

View File

@ -84,18 +84,43 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
// Upload to Supabase if we have URLs and this is a crawl (not a batch scrape) // Upload to Supabase if we have URLs and this is a crawl (not a batch scrape)
if (visitedUrls.length > 0 && job.data.crawlerOptions !== null) { if (visitedUrls.length > 0 && job.data.crawlerOptions !== null) {
try { try {
const { error } = await supabase_service // First check if entry exists for this origin URL
const { data: existingMap } = await supabase_service
.from('crawl_maps') .from('crawl_maps')
.insert({ .select('urls')
crawl_id: job.data.crawl_id, .eq('origin_url', sc.originUrl)
team_id: job.data.team_id, .single();
origin_url: sc.originUrl,
urls: visitedUrls, if (existingMap) {
created_at: new Date().toISOString() // Merge URLs, removing duplicates
}); const mergedUrls = [...new Set([...existingMap.urls, ...visitedUrls])];
if (error) { const { error } = await supabase_service
_logger.error("Failed to save crawl map", { error }); .from('crawl_maps')
.update({
urls: mergedUrls,
num_urls: mergedUrls.length,
updated_at: new Date().toISOString()
})
.eq('origin_url', sc.originUrl);
if (error) {
_logger.error("Failed to update crawl map", { error });
}
} else {
// Insert new entry if none exists
const { error } = await supabase_service
.from('crawl_maps')
.insert({
origin_url: sc.originUrl,
urls: visitedUrls,
num_urls: visitedUrls.length,
created_at: new Date().toISOString()
});
if (error) {
_logger.error("Failed to save crawl map", { error });
}
} }
} catch (error) { } catch (error) {
_logger.error("Error saving crawl map", { error }); _logger.error("Error saving crawl map", { error });
@ -802,9 +827,10 @@ async function processJob(job: Job & { id: string }, token: string) {
newJobId: jobId, newJobId: jobId,
}); });
} else { } else {
logger.debug("Could not lock URL " + JSON.stringify(link), { // TODO: removed this, ok? too many 'not useful' logs (?) Mogery!
url: link, // logger.debug("Could not lock URL " + JSON.stringify(link), {
}); // url: link,
// });
} }
} }
} }