diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index 15490385..9bbbb7f1 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -25,6 +25,7 @@ import { logger } from "../../lib/logger"; import Redis from "ioredis"; import { querySitemapIndex } from "../../scraper/WebScraper/sitemap-index"; import { getIndexQueue } from "../../services/queue-service"; +import { hashURL, index_supabase_service, normalizeURLForIndex } from "../../services/index"; configDotenv(); const redis = new Redis(process.env.REDIS_URL!); @@ -165,11 +166,23 @@ export async function getMapResults({ } // Parallelize sitemap index query with search results - const [sitemapIndexResult, ...searchResults] = await Promise.all([ + const [sitemapIndexResult, { data: indexResults, error: indexError }, ...searchResults] = await Promise.all([ querySitemapIndex(url, abort), + index_supabase_service + .from("index") + .select("resolved_url") + .overlaps("url_splits_hash", [await hashURL(normalizeURLForIndex(url))]) + .gte("created_at", new Date(Date.now() - 2 * 24 * 60 * 60 * 1000).toISOString()) + .limit(limit), ...(cachedResult ? [] : pagePromises), ]); + if (indexError) { + logger.warn("Error querying index", { error: indexError }); + } else if (indexResults.length > 0) { + links.push(...indexResults.map((x) => x.resolved_url)); + } + const twoDaysAgo = new Date(); twoDaysAgo.setDate(twoDaysAgo.getDate() - 2); diff --git a/apps/api/src/scraper/scrapeURL/transformers/index.ts b/apps/api/src/scraper/scrapeURL/transformers/index.ts index ad6c9d34..31c14c21 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/index.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/index.ts @@ -11,7 +11,8 @@ import { saveToCache } from "./cache"; import { performAgent } from "./agent"; import { deriveDiff } from "./diff"; -import { sendDocumentToIndex, useIndex } from "../engines/index/index"; +import { useIndex } from "../../../services/index"; +import { sendDocumentToIndex } from "../engines/index/index"; export type Transformer = ( meta: Meta,