mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-16 19:15:56 +08:00
feat(index): integrate into map
This commit is contained in:
parent
550d6bf7bd
commit
d904ec676f
@ -25,6 +25,7 @@ import { logger } from "../../lib/logger";
|
|||||||
import Redis from "ioredis";
|
import Redis from "ioredis";
|
||||||
import { querySitemapIndex } from "../../scraper/WebScraper/sitemap-index";
|
import { querySitemapIndex } from "../../scraper/WebScraper/sitemap-index";
|
||||||
import { getIndexQueue } from "../../services/queue-service";
|
import { getIndexQueue } from "../../services/queue-service";
|
||||||
|
import { hashURL, index_supabase_service, normalizeURLForIndex } from "../../services/index";
|
||||||
|
|
||||||
configDotenv();
|
configDotenv();
|
||||||
const redis = new Redis(process.env.REDIS_URL!);
|
const redis = new Redis(process.env.REDIS_URL!);
|
||||||
@ -165,11 +166,23 @@ export async function getMapResults({
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parallelize sitemap index query with search results
|
// Parallelize sitemap index query with search results
|
||||||
const [sitemapIndexResult, ...searchResults] = await Promise.all([
|
const [sitemapIndexResult, { data: indexResults, error: indexError }, ...searchResults] = await Promise.all([
|
||||||
querySitemapIndex(url, abort),
|
querySitemapIndex(url, abort),
|
||||||
|
index_supabase_service
|
||||||
|
.from("index")
|
||||||
|
.select("resolved_url")
|
||||||
|
.overlaps("url_splits_hash", [await hashURL(normalizeURLForIndex(url))])
|
||||||
|
.gte("created_at", new Date(Date.now() - 2 * 24 * 60 * 60 * 1000).toISOString())
|
||||||
|
.limit(limit),
|
||||||
...(cachedResult ? [] : pagePromises),
|
...(cachedResult ? [] : pagePromises),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
if (indexError) {
|
||||||
|
logger.warn("Error querying index", { error: indexError });
|
||||||
|
} else if (indexResults.length > 0) {
|
||||||
|
links.push(...indexResults.map((x) => x.resolved_url));
|
||||||
|
}
|
||||||
|
|
||||||
const twoDaysAgo = new Date();
|
const twoDaysAgo = new Date();
|
||||||
twoDaysAgo.setDate(twoDaysAgo.getDate() - 2);
|
twoDaysAgo.setDate(twoDaysAgo.getDate() - 2);
|
||||||
|
|
||||||
|
@ -11,7 +11,8 @@ import { saveToCache } from "./cache";
|
|||||||
import { performAgent } from "./agent";
|
import { performAgent } from "./agent";
|
||||||
|
|
||||||
import { deriveDiff } from "./diff";
|
import { deriveDiff } from "./diff";
|
||||||
import { sendDocumentToIndex, useIndex } from "../engines/index/index";
|
import { useIndex } from "../../../services/index";
|
||||||
|
import { sendDocumentToIndex } from "../engines/index/index";
|
||||||
|
|
||||||
export type Transformer = (
|
export type Transformer = (
|
||||||
meta: Meta,
|
meta: Meta,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user