diff --git a/apps/api/src/scraper/scrapeURL/engines/index/index.ts b/apps/api/src/scraper/scrapeURL/engines/index/index.ts index 7a756a17..940bf135 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index/index.ts @@ -84,13 +84,15 @@ export async function sendDocumentToIndex(meta: Meta, document: Document) { return document; } +const errorCountToRegister = 3; + export async function scrapeURLWithIndex(meta: Meta): Promise { const normalizedURL = normalizeURLForIndex(meta.url); const urlHash = await hashURL(normalizedURL); let selector = index_supabase_service .from("index") - .select("id, created_at") + .select("id, created_at, status") .eq("url_hash", urlHash) .gte("created_at", new Date(Date.now() - meta.options.maxAge).toISOString()) .eq("is_mobile", meta.options.mobile) @@ -115,7 +117,7 @@ export async function scrapeURLWithIndex(meta: Meta): Promise 0) { + const newest200Index = data.findIndex(x => x.status >= 200 && x.status < 300); + // If the newest 200 index is further back than the allowed error count, we should display the errored index entry + if (newest200Index >= errorCountToRegister || newest200Index === -1) { + selectedRow = data[0]; + } else { + selectedRow = data[newest200Index]; + } + } + + if (selectedRow === null || selectedRow === undefined) { throw new IndexMissError(); }