Nick: fixed

This commit is contained in:
Nicolas 2025-01-03 22:15:23 -03:00
parent 8df1c67961
commit a4f7c38834
2 changed files with 44 additions and 15 deletions

View File

@ -300,8 +300,11 @@ export function buildFallbackList(meta: Meta): {
}[] {
if (meta.internalOptions.useCache !== true) {
engines.splice(engines.indexOf("cache"), 1);
}else{
const cacheIndex = engines.indexOf("cache");
if (cacheIndex !== -1) {
engines.splice(cacheIndex, 1);
}
} else {
meta.logger.debug("Cache engine enabled by useCache option");
}
const prioritySum = [...meta.featureFlags].reduce(

View File

@ -84,18 +84,43 @@ async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
// Upload to Supabase if we have URLs and this is a crawl (not a batch scrape)
if (visitedUrls.length > 0 && job.data.crawlerOptions !== null) {
try {
const { error } = await supabase_service
// First check if entry exists for this origin URL
const { data: existingMap } = await supabase_service
.from('crawl_maps')
.insert({
crawl_id: job.data.crawl_id,
team_id: job.data.team_id,
origin_url: sc.originUrl,
urls: visitedUrls,
created_at: new Date().toISOString()
});
.select('urls')
.eq('origin_url', sc.originUrl)
.single();
if (existingMap) {
// Merge URLs, removing duplicates
const mergedUrls = [...new Set([...existingMap.urls, ...visitedUrls])];
if (error) {
_logger.error("Failed to save crawl map", { error });
const { error } = await supabase_service
.from('crawl_maps')
.update({
urls: mergedUrls,
num_urls: mergedUrls.length,
updated_at: new Date().toISOString()
})
.eq('origin_url', sc.originUrl);
if (error) {
_logger.error("Failed to update crawl map", { error });
}
} else {
// Insert new entry if none exists
const { error } = await supabase_service
.from('crawl_maps')
.insert({
origin_url: sc.originUrl,
urls: visitedUrls,
num_urls: visitedUrls.length,
created_at: new Date().toISOString()
});
if (error) {
_logger.error("Failed to save crawl map", { error });
}
}
} catch (error) {
_logger.error("Error saving crawl map", { error });
@ -802,9 +827,10 @@ async function processJob(job: Job & { id: string }, token: string) {
newJobId: jobId,
});
} else {
logger.debug("Could not lock URL " + JSON.stringify(link), {
url: link,
});
// TODO: removed this, ok? too many 'not useful' logs (?) Mogery!
// logger.debug("Could not lock URL " + JSON.stringify(link), {
// url: link,
// });
}
}
}