mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-10 22:19:02 +08:00
Nick: metadata fixes and lock duration for bull decreased to 2 hrs
This commit is contained in:
parent
e5314ee8e7
commit
e7be17db92
@ -399,12 +399,14 @@ export async function scrapSingleUrl(
|
||||
return {
|
||||
text: await parseMarkdown(cleanedHtml),
|
||||
html: cleanedHtml,
|
||||
rawHtml: scraperResponse.text,
|
||||
screenshot: scraperResponse.screenshot,
|
||||
pageStatusCode: scraperResponse.metadata.pageStatusCode,
|
||||
pageError: scraperResponse.metadata.pageError || undefined
|
||||
};
|
||||
};
|
||||
let { text, html, screenshot, pageStatusCode, pageError } = { text: "", html: "", screenshot: "", pageStatusCode: 200, pageError: undefined };
|
||||
|
||||
let { text, html, rawHtml, screenshot, pageStatusCode, pageError } = { text: "", html: "", rawHtml: "", screenshot: "", pageStatusCode: 200, pageError: undefined };
|
||||
try {
|
||||
let urlKey = urlToScrap;
|
||||
try {
|
||||
@ -432,6 +434,7 @@ export async function scrapSingleUrl(
|
||||
const attempt = await attemptScraping(urlToScrap, scraper);
|
||||
text = attempt.text ?? '';
|
||||
html = attempt.html ?? '';
|
||||
rawHtml = attempt.rawHtml ?? '';
|
||||
screenshot = attempt.screenshot ?? '';
|
||||
if (attempt.pageStatusCode) {
|
||||
pageStatusCode = attempt.pageStatusCode;
|
||||
@ -453,7 +456,7 @@ export async function scrapSingleUrl(
|
||||
throw new Error(`All scraping methods failed for URL: ${urlToScrap}`);
|
||||
}
|
||||
|
||||
const soup = cheerio.load(html);
|
||||
const soup = cheerio.load(rawHtml);
|
||||
const metadata = extractMetadata(soup, urlToScrap);
|
||||
|
||||
let document: Document;
|
||||
|
@ -7,7 +7,7 @@ export function getWebScraperQueue() {
|
||||
if (!webScraperQueue) {
|
||||
webScraperQueue = new Queue("web-scraper", process.env.REDIS_URL, {
|
||||
settings: {
|
||||
lockDuration: 4 * 60 * 60 * 1000, // 4 hours in milliseconds,
|
||||
lockDuration: 2 * 60 * 60 * 1000, // 2 hours in milliseconds,
|
||||
lockRenewTime: 30 * 60 * 1000, // 30 minutes in milliseconds
|
||||
},
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user