fix(crawl-redis/lockURL): only add to visited_unique if lock succeeds

This commit is contained in:
Gergő Móricz 2024-12-15 21:01:31 +01:00
parent 30fa78cd9e
commit 37f58efe45

View File

@ -233,13 +233,6 @@ export async function lockURL(
url = normalizeURL(url, sc);
logger = logger.child({ url });
await redisConnection.sadd("crawl:" + id + ":visited_unique", url);
await redisConnection.expire(
"crawl:" + id + ":visited_unique",
24 * 60 * 60,
"NX",
);
let res: boolean;
if (!sc.crawlerOptions?.deduplicateSimilarURLs) {
res = (await redisConnection.sadd("crawl:" + id + ":visited", url)) !== 0;
@ -255,6 +248,15 @@ export async function lockURL(
await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX");
if (res) {
await redisConnection.sadd("crawl:" + id + ":visited_unique", url);
await redisConnection.expire(
"crawl:" + id + ":visited_unique",
24 * 60 * 60,
"NX",
);
}
logger.debug("Locking URL " + JSON.stringify(url) + "... result: " + res, {
res,
});