Merge pull request #560 from mendableai/feat/cancel-fire-engine

[Feat] Added check job and cancel to fire-engine requests
This commit is contained in:
Nicolas 2024-08-20 15:43:31 -03:00 committed by GitHub
commit 59eb552df5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -86,7 +86,12 @@ export async function scrapWithFireEngine({
pageOptions.atsv = false; pageOptions.atsv = false;
} }
const response = await axios.post( const axiosInstance = axios.create({
headers: { "Content-Type": "application/json" }
});
const startTime = Date.now();
const response = await axiosInstance.post(
process.env.FIRE_ENGINE_BETA_URL + endpoint, process.env.FIRE_ENGINE_BETA_URL + endpoint,
{ {
url: url, url: url,
@ -98,16 +103,39 @@ export async function scrapWithFireEngine({
disableJsDom: pageOptions?.disableJsDom ?? false, disableJsDom: pageOptions?.disableJsDom ?? false,
priority, priority,
engine, engine,
instantReturn: true,
...fireEngineOptionsParam, ...fireEngineOptionsParam,
}, },
{ {
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
}, }
timeout: universalTimeout + waitParam,
} }
); );
let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${response.data.jobId}`);
while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitParam) {
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${response.data.jobId}`);
}
if (checkStatusResponse.data.processing) {
Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${response.data.jobId}`);
try {
axiosInstance.delete(
process.env.FIRE_ENGINE_BETA_URL + `/scrape/${response.data.jobId}`,
);
} catch (error) {
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${response.data.jobId} | error: ${error}`);
logParams.error_message = "Failed to delete request";
return { html: "", screenshot: "", pageStatusCode: null, pageError: "" };
}
Logger.debug(`⛏️ Fire-Engine (${engine}): Request timed out for ${url}`);
logParams.error_message = "Request timed out";
return { html: "", screenshot: "", pageStatusCode: null, pageError: "" };
}
if (response.status !== 200) { if (response.status !== 200) {
Logger.debug( Logger.debug(
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}` `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`