diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index 0bdf9876..fd3c9ad1 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -15,6 +15,7 @@ const socialMediaBlocklist = [ 'whatsapp.com', 'wechat.com', 'telegram.org', + 'researchhub.com' ]; const allowedKeywords = [ diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index c15201be..890e6e7b 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -173,9 +173,14 @@ async function processJob(job: Job, token: string) { if (!job.data.sitemapped) { if (!sc.cancelled) { const crawler = crawlToCrawler(job.data.crawl_id, sc); - - const links = crawler.filterLinks((data.docs[0].linksOnPage ?? []) - .map(href => crawler.filterURL(href.trim(), sc.originUrl)) + let linksOnPage = []; + try{ + linksOnPage = data.docs[0]?.linksOnPage ?? []; + }catch(e){ + linksOnPage = [] + } + const links = crawler.filterLinks( + linksOnPage.map(href => crawler.filterURL(href.trim(), sc.originUrl)) .filter(x => x !== null), Infinity, sc.crawlerOptions?.maxDepth ?? 10