mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-04 20:20:40 +08:00
fix: robots.txt laoding
This commit is contained in:
parent
29f0d9ec94
commit
c5597bc722
@ -95,25 +95,22 @@ export async function crawlController(req: Request, res: Response) {
|
|||||||
|
|
||||||
await logCrawl(id, team_id);
|
await logCrawl(id, team_id);
|
||||||
|
|
||||||
let robots;
|
|
||||||
|
|
||||||
try {
|
|
||||||
robots = await this.getRobotsTxt();
|
|
||||||
} catch (_) {}
|
|
||||||
|
|
||||||
const sc: StoredCrawl = {
|
const sc: StoredCrawl = {
|
||||||
originUrl: url,
|
originUrl: url,
|
||||||
crawlerOptions,
|
crawlerOptions,
|
||||||
pageOptions,
|
pageOptions,
|
||||||
team_id,
|
team_id,
|
||||||
robots,
|
|
||||||
createdAt: Date.now(),
|
createdAt: Date.now(),
|
||||||
};
|
};
|
||||||
|
|
||||||
await saveCrawl(id, sc);
|
|
||||||
|
|
||||||
const crawler = crawlToCrawler(id, sc);
|
const crawler = crawlToCrawler(id, sc);
|
||||||
|
|
||||||
|
try {
|
||||||
|
sc.robots = await crawler.getRobotsTxt();
|
||||||
|
} catch (_) {}
|
||||||
|
|
||||||
|
await saveCrawl(id, sc);
|
||||||
|
|
||||||
const sitemap = sc.crawlerOptions?.ignoreSitemap ? null : await crawler.tryGetSitemap();
|
const sitemap = sc.crawlerOptions?.ignoreSitemap ? null : await crawler.tryGetSitemap();
|
||||||
|
|
||||||
if (sitemap !== null) {
|
if (sitemap !== null) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user