From d904ec676faa322cf9864b472a756788644826db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 28 May 2025 14:47:38 +0200 Subject: [PATCH] feat(index): integrate into map --- apps/api/src/controllers/v1/map.ts | 15 ++++++++++++++- .../src/scraper/scrapeURL/transformers/index.ts | 3 ++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index 15490385..9bbbb7f1 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -25,6 +25,7 @@ import { logger } from "../../lib/logger"; import Redis from "ioredis"; import { querySitemapIndex } from "../../scraper/WebScraper/sitemap-index"; import { getIndexQueue } from "../../services/queue-service"; +import { hashURL, index_supabase_service, normalizeURLForIndex } from "../../services/index"; configDotenv(); const redis = new Redis(process.env.REDIS_URL!); @@ -165,11 +166,23 @@ export async function getMapResults({ } // Parallelize sitemap index query with search results - const [sitemapIndexResult, ...searchResults] = await Promise.all([ + const [sitemapIndexResult, { data: indexResults, error: indexError }, ...searchResults] = await Promise.all([ querySitemapIndex(url, abort), + index_supabase_service + .from("index") + .select("resolved_url") + .overlaps("url_splits_hash", [await hashURL(normalizeURLForIndex(url))]) + .gte("created_at", new Date(Date.now() - 2 * 24 * 60 * 60 * 1000).toISOString()) + .limit(limit), ...(cachedResult ? [] : pagePromises), ]); + if (indexError) { + logger.warn("Error querying index", { error: indexError }); + } else if (indexResults.length > 0) { + links.push(...indexResults.map((x) => x.resolved_url)); + } + const twoDaysAgo = new Date(); twoDaysAgo.setDate(twoDaysAgo.getDate() - 2); diff --git a/apps/api/src/scraper/scrapeURL/transformers/index.ts b/apps/api/src/scraper/scrapeURL/transformers/index.ts index ad6c9d34..31c14c21 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/index.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/index.ts @@ -11,7 +11,8 @@ import { saveToCache } from "./cache"; import { performAgent } from "./agent"; import { deriveDiff } from "./diff"; -import { sendDocumentToIndex, useIndex } from "../engines/index/index"; +import { useIndex } from "../../../services/index"; +import { sendDocumentToIndex } from "../engines/index/index"; export type Transformer = ( meta: Meta,