mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-08 16:49:02 +08:00
fix(crawl-redis/lockURL): only add to visited_unique if lock succeeds
This commit is contained in:
parent
30fa78cd9e
commit
37f58efe45
@ -233,13 +233,6 @@ export async function lockURL(
|
||||
url = normalizeURL(url, sc);
|
||||
logger = logger.child({ url });
|
||||
|
||||
await redisConnection.sadd("crawl:" + id + ":visited_unique", url);
|
||||
await redisConnection.expire(
|
||||
"crawl:" + id + ":visited_unique",
|
||||
24 * 60 * 60,
|
||||
"NX",
|
||||
);
|
||||
|
||||
let res: boolean;
|
||||
if (!sc.crawlerOptions?.deduplicateSimilarURLs) {
|
||||
res = (await redisConnection.sadd("crawl:" + id + ":visited", url)) !== 0;
|
||||
@ -255,6 +248,15 @@ export async function lockURL(
|
||||
|
||||
await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX");
|
||||
|
||||
if (res) {
|
||||
await redisConnection.sadd("crawl:" + id + ":visited_unique", url);
|
||||
await redisConnection.expire(
|
||||
"crawl:" + id + ":visited_unique",
|
||||
24 * 60 * 60,
|
||||
"NX",
|
||||
);
|
||||
}
|
||||
|
||||
logger.debug("Locking URL " + JSON.stringify(url) + "... result: " + res, {
|
||||
res,
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user