crawl fixes

This commit is contained in:
Gergő Móricz 2024-11-20 19:47:58 +01:00
parent 103c3f28e6
commit b468bb4014
2 changed files with 3 additions and 2 deletions

View File

@ -148,7 +148,8 @@ export async function lockURL(id: string, sc: StoredCrawl, url: string): Promise
res = (await redisConnection.sadd("crawl:" + id + ":visited", url)) !== 0
} else {
const permutations = generateURLPermutations(url);
res = (await redisConnection.sadd("crawl:" + id + ":visited", ...permutations.map(x => x.href))) === permutations.length;
const x = (await redisConnection.sadd("crawl:" + id + ":visited", ...permutations.map(x => x.href)));
res = x === permutations.length;
}
await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX");

View File

@ -350,7 +350,7 @@ async function processJob(job: Job & { id: string }, token: string) {
await addCrawlJobDone(job.data.crawl_id, job.id);
if (!job.data.sitemapped && job.data.crawlerOptions !== null) {
if (job.data.crawlerOptions !== null) {
if (!sc.cancelled) {
const crawler = crawlToCrawler(job.data.crawl_id, sc, doc.metadata.url ?? doc.metadata.sourceURL ?? sc.originUrl);