mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 12:50:41 +08:00
crawl fix, again
This commit is contained in:
parent
b468bb4014
commit
ba6f29cdda
@ -352,7 +352,9 @@ async function processJob(job: Job & { id: string }, token: string) {
|
||||
|
||||
if (job.data.crawlerOptions !== null) {
|
||||
if (!sc.cancelled) {
|
||||
const crawler = crawlToCrawler(job.data.crawl_id, sc, doc.metadata.url ?? doc.metadata.sourceURL ?? sc.originUrl);
|
||||
const newURL = new URL(doc.metadata.url ?? doc.metadata.sourceURL ?? sc.originUrl!);
|
||||
const useNewURLAsBase = newURL.hostname.split(".").slice(-2).join(".") === new URL(sc.originUrl!).hostname.split(".").slice(-2).join(".");
|
||||
const crawler = crawlToCrawler(job.data.crawl_id, sc, useNewURLAsBase ? newURL.href : undefined);
|
||||
|
||||
const links = crawler.filterLinks(
|
||||
crawler.extractLinksFromHTML(rawHtml ?? "", doc.metadata?.url ?? doc.metadata?.sourceURL ?? sc.originUrl as string),
|
||||
|
Loading…
x
Reference in New Issue
Block a user