mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 00:00:44 +08:00
Nick: fixes
This commit is contained in:
parent
f11137352c
commit
d2de01d342
@ -19,7 +19,10 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const jobStatus = await job.getState();
|
let jobStatus = await job.getState();
|
||||||
|
if (jobStatus === 'waiting' || jobStatus === 'stuck') {
|
||||||
|
jobStatus = 'active';
|
||||||
|
}
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
status: jobStatus,
|
status: jobStatus,
|
||||||
|
@ -31,7 +31,6 @@ export async function scrapWithFireEngine({
|
|||||||
fireEngineOptions?: FireEngineOptions;
|
fireEngineOptions?: FireEngineOptions;
|
||||||
headers?: Record<string, string>;
|
headers?: Record<string, string>;
|
||||||
options?: any;
|
options?: any;
|
||||||
engine?: 'playwright' | 'chrome-cdp' | 'tlsclient';
|
|
||||||
}): Promise<FireEngineResponse> {
|
}): Promise<FireEngineResponse> {
|
||||||
const logParams = {
|
const logParams = {
|
||||||
url,
|
url,
|
||||||
@ -47,6 +46,7 @@ export async function scrapWithFireEngine({
|
|||||||
try {
|
try {
|
||||||
const reqParams = await generateRequestParams(url);
|
const reqParams = await generateRequestParams(url);
|
||||||
const waitParam = reqParams["params"]?.wait ?? waitFor;
|
const waitParam = reqParams["params"]?.wait ?? waitFor;
|
||||||
|
const engineParam = reqParams["params"]?.engine ?? fireEngineOptions?.engine ?? "playwright";
|
||||||
const screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
|
const screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
|
||||||
const fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
|
const fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
|
||||||
|
|
||||||
@ -57,13 +57,13 @@ export async function scrapWithFireEngine({
|
|||||||
endpoint = "/request";
|
endpoint = "/request";
|
||||||
}
|
}
|
||||||
|
|
||||||
let engine = fireEngineOptions?.engine ?? options?.engine ?? "playwright"; // do we want fireEngineOptions as first choice?
|
let engine = engineParam; // do we want fireEngineOptions as first choice?
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`[Fire-Engine] Scraping ${url} with wait: ${waitParam} and screenshot: ${screenshotParam} and method: ${fireEngineOptionsParam?.method ?? "null"}`
|
`[Fire-Engine][${engine}] Scraping ${url} with wait: ${waitParam} and screenshot: ${screenshotParam} and method: ${fireEngineOptionsParam?.method ?? "null"}`
|
||||||
);
|
);
|
||||||
|
|
||||||
console.log(fireEngineOptionsParam)
|
// console.log(fireEngineOptionsParam)
|
||||||
|
|
||||||
const response = await axios.post(
|
const response = await axios.post(
|
||||||
process.env.FIRE_ENGINE_BETA_URL + endpoint,
|
process.env.FIRE_ENGINE_BETA_URL + endpoint,
|
||||||
@ -73,7 +73,6 @@ export async function scrapWithFireEngine({
|
|||||||
screenshot: screenshotParam,
|
screenshot: screenshotParam,
|
||||||
headers: headers,
|
headers: headers,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
engine: engine,
|
|
||||||
...fireEngineOptionsParam,
|
...fireEngineOptionsParam,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -86,14 +85,14 @@ export async function scrapWithFireEngine({
|
|||||||
|
|
||||||
if (response.status !== 200) {
|
if (response.status !== 200) {
|
||||||
console.error(
|
console.error(
|
||||||
`[Fire-Engine] Error fetching url: ${url} with status: ${response.status}`
|
`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}`
|
||||||
);
|
);
|
||||||
|
|
||||||
logParams.error_message = response.data?.pageError;
|
logParams.error_message = response.data?.pageError;
|
||||||
logParams.response_code = response.data?.pageStatusCode;
|
logParams.response_code = response.data?.pageStatusCode;
|
||||||
|
|
||||||
if(response.data && response.data?.pageStatusCode !== 200) {
|
if(response.data && response.data?.pageStatusCode !== 200) {
|
||||||
console.error(`[Fire-Engine] Error fetching url: ${url} with status: ${response.status}`);
|
console.error(`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -161,7 +161,9 @@ export async function scrapSingleUrl(
|
|||||||
screenshot: pageOptions.screenshot,
|
screenshot: pageOptions.screenshot,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
headers: pageOptions.headers,
|
headers: pageOptions.headers,
|
||||||
|
fireEngineOptions: {
|
||||||
engine: engine,
|
engine: engine,
|
||||||
|
}
|
||||||
});
|
});
|
||||||
scraperResponse.text = response.html;
|
scraperResponse.text = response.html;
|
||||||
scraperResponse.screenshot = response.screenshot;
|
scraperResponse.screenshot = response.screenshot;
|
||||||
|
@ -175,6 +175,7 @@ export const urlSpecificParams = {
|
|||||||
"firecrawl.dev":{
|
"firecrawl.dev":{
|
||||||
defaultScraper: "fire-engine",
|
defaultScraper: "fire-engine",
|
||||||
params: {
|
params: {
|
||||||
|
engine: "playwright",
|
||||||
headers: {
|
headers: {
|
||||||
"User-Agent":
|
"User-Agent":
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
||||||
|
@ -7,11 +7,14 @@ export function getWebScraperQueue() {
|
|||||||
if (!webScraperQueue) {
|
if (!webScraperQueue) {
|
||||||
webScraperQueue = new Queue("web-scraper", process.env.REDIS_URL, {
|
webScraperQueue = new Queue("web-scraper", process.env.REDIS_URL, {
|
||||||
settings: {
|
settings: {
|
||||||
lockDuration: 2 * 60 * 1000, // 1 minute in milliseconds,
|
lockDuration: 1 * 60 * 1000, // 1 minute in milliseconds,
|
||||||
lockRenewTime: 15 * 1000, // 15 seconds in milliseconds
|
lockRenewTime: 15 * 1000, // 15 seconds in milliseconds
|
||||||
stalledInterval: 30 * 1000,
|
stalledInterval: 30 * 1000,
|
||||||
maxStalledCount: 10,
|
maxStalledCount: 10,
|
||||||
},
|
},
|
||||||
|
defaultJobOptions:{
|
||||||
|
attempts: 5
|
||||||
|
}
|
||||||
});
|
});
|
||||||
console.log("Web scraper queue created");
|
console.log("Web scraper queue created");
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user