mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-20 16:39:10 +08:00
fix(adaptive-crawler): if no sitemap, use recursive instead
This commit is contained in:
parent
ee29be58f1
commit
5a4b35e4b9
@ -120,9 +120,12 @@ export class AdaptiveCrawlerHost extends RPCHost {
|
|||||||
failed: {},
|
failed: {},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let urls: string[] = [];
|
||||||
if (useSitemap) {
|
if (useSitemap) {
|
||||||
const urls = await this.crawlUrlsFromSitemap(targetUrl, maxPages);
|
urls = await this.crawlUrlsFromSitemap(targetUrl, maxPages);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (urls.length > 0) {
|
||||||
await AdaptiveCrawlTask.COLLECTION.doc(shortDigest).update({
|
await AdaptiveCrawlTask.COLLECTION.doc(shortDigest).update({
|
||||||
status: AdaptiveCrawlTaskStatus.PROCESSING,
|
status: AdaptiveCrawlTaskStatus.PROCESSING,
|
||||||
statusText: `Processing 0/${urls.length}`,
|
statusText: `Processing 0/${urls.length}`,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user