mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 07:49:00 +08:00
feat: cancel
v0 implementation + e2e test
This commit is contained in:
parent
fb553a020d
commit
1ef41b92a0
@ -912,53 +912,41 @@ describe("GET /v1/crawl/:jobId", () => {
|
|||||||
180000
|
180000
|
||||||
); // 120 seconds
|
); // 120 seconds
|
||||||
|
|
||||||
// it.concurrent(
|
it.concurrent(
|
||||||
// "If someone cancels a crawl job, it should turn into failed status",
|
"If someone cancels a crawl job, it should turn into failed status",
|
||||||
// async () => {
|
async () => {
|
||||||
// const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
// .post("/v1/crawl")
|
.post("/v1/crawl")
|
||||||
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
// .set("Content-Type", "application/json")
|
.set("Content-Type", "application/json")
|
||||||
// .send({ url: "https://docs.tatum.io", limit: 200 });
|
.send({ url: "https://docs.tatum.io", limit: 200 });
|
||||||
|
|
||||||
// expect(crawlResponse.statusCode).toBe(200);
|
expect(crawlResponse.statusCode).toBe(200);
|
||||||
|
|
||||||
// await new Promise((r) => setTimeout(r, 10000));
|
await new Promise((r) => setTimeout(r, 10000));
|
||||||
|
|
||||||
// const responseCancel = await request(TEST_URL)
|
const responseCancel = await request(TEST_URL)
|
||||||
// .delete(`/v1/crawl/${crawlResponse.body.id}`)
|
.delete(`/v1/crawl/${crawlResponse.body.id}`)
|
||||||
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||||
// expect(responseCancel.statusCode).toBe(200);
|
expect(responseCancel.statusCode).toBe(200);
|
||||||
// expect(responseCancel.body).toHaveProperty("status");
|
expect(responseCancel.body).toHaveProperty("status");
|
||||||
// expect(responseCancel.body.status).toBe("cancelled");
|
expect(responseCancel.body.status).toBe("cancelled");
|
||||||
|
|
||||||
// await new Promise((r) => setTimeout(r, 10000));
|
|
||||||
// const completedResponse = await request(TEST_URL)
|
|
||||||
// .get(`/v1/crawl/${crawlResponse.body.id}`)
|
|
||||||
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
|
||||||
|
|
||||||
// expect(completedResponse.statusCode).toBe(200);
|
|
||||||
// expect(completedResponse.body).toHaveProperty("status");
|
|
||||||
// expect(completedResponse.body.status).toBe("failed");
|
|
||||||
// expect(completedResponse.body).toHaveProperty("data");
|
|
||||||
|
|
||||||
// let isNullOrEmptyArray = false;
|
|
||||||
// if (
|
|
||||||
// completedResponse.body.data === null ||
|
|
||||||
// completedResponse.body.data.length === 0
|
|
||||||
// ) {
|
|
||||||
// isNullOrEmptyArray = true;
|
|
||||||
// }
|
|
||||||
// expect(isNullOrEmptyArray).toBe(true);
|
|
||||||
// expect(completedResponse.body.data).toEqual(expect.arrayContaining([]));
|
|
||||||
// expect(completedResponse.body).toHaveProperty("data");
|
|
||||||
// expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
|
||||||
// expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
|
||||||
// expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
|
|
||||||
// expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
|
|
||||||
// },
|
|
||||||
// 60000
|
|
||||||
// ); // 60 seconds
|
|
||||||
|
|
||||||
|
await new Promise((r) => setTimeout(r, 10000));
|
||||||
|
const completedResponse = await request(TEST_URL)
|
||||||
|
.get(`/v1/crawl/${crawlResponse.body.id}`)
|
||||||
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||||
|
|
||||||
|
expect(completedResponse.statusCode).toBe(200);
|
||||||
|
expect(completedResponse.body).toHaveProperty("status");
|
||||||
|
expect(completedResponse.body.status).toBe("cancelled");
|
||||||
|
expect(completedResponse.body).toHaveProperty("data");
|
||||||
|
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
||||||
|
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||||
|
expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
|
||||||
|
expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
|
||||||
|
},
|
||||||
|
60000
|
||||||
|
); // 60 seconds
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
@ -1,11 +1,10 @@
|
|||||||
import { Request, Response } from "express";
|
import { Request, Response } from "express";
|
||||||
import { authenticateUser } from "./auth";
|
import { authenticateUser } from "../auth";
|
||||||
import { RateLimiterMode } from "../../src/types";
|
import { RateLimiterMode } from "../../types";
|
||||||
import { addWebScraperJob } from "../../src/services/queue-jobs";
|
import { supabase_service } from "../../services/supabase";
|
||||||
import { getWebScraperQueue } from "../../src/services/queue-service";
|
import { Logger } from "../../lib/logger";
|
||||||
import { supabase_service } from "../../src/services/supabase";
|
import { getCrawl, saveCrawl } from "../../lib/crawl-redis";
|
||||||
import { billTeam } from "../../src/services/billing/credit_billing";
|
import * as Sentry from "@sentry/node";
|
||||||
import { Logger } from "../../src/lib/logger";
|
|
||||||
|
|
||||||
export async function crawlCancelController(req: Request, res: Response) {
|
export async function crawlCancelController(req: Request, res: Response) {
|
||||||
try {
|
try {
|
||||||
@ -19,8 +18,9 @@ export async function crawlCancelController(req: Request, res: Response) {
|
|||||||
if (!success) {
|
if (!success) {
|
||||||
return res.status(status).json({ error });
|
return res.status(status).json({ error });
|
||||||
}
|
}
|
||||||
const job = await getWebScraperQueue().getJob(req.params.jobId);
|
|
||||||
if (!job) {
|
const sc = await getCrawl(req.params.jobId);
|
||||||
|
if (!sc) {
|
||||||
return res.status(404).json({ error: "Job not found" });
|
return res.status(404).json({ error: "Job not found" });
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,31 +40,18 @@ export async function crawlCancelController(req: Request, res: Response) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const jobState = await job.getState();
|
|
||||||
const { partialDocs } = await job.progress();
|
|
||||||
|
|
||||||
if (partialDocs && partialDocs.length > 0 && jobState === "active") {
|
|
||||||
Logger.info("Billing team for partial docs...");
|
|
||||||
// Note: the credits that we will bill them here might be lower than the actual
|
|
||||||
// due to promises that are not yet resolved
|
|
||||||
await billTeam(team_id, partialDocs.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await getWebScraperQueue().client.del(job.lockKey());
|
sc.cancelled = true;
|
||||||
await job.takeLock();
|
await saveCrawl(req.params.jobId, sc);
|
||||||
await job.discard();
|
|
||||||
await job.moveToFailed(Error("Job cancelled by user"), true);
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
Logger.error(error);
|
Logger.error(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
const newJobState = await job.getState();
|
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
status: "cancelled"
|
status: "cancelled"
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
Sentry.captureException(error);
|
||||||
Logger.error(error);
|
Logger.error(error);
|
||||||
return res.status(500).json({ error: error.message });
|
return res.status(500).json({ error: error.message });
|
||||||
}
|
}
|
@ -1,9 +1,9 @@
|
|||||||
import express, { NextFunction, Request, Response } from "express";
|
import express, { NextFunction, Request, Response } from "express";
|
||||||
import { crawlController } from "../../src/controllers/v1/crawl";
|
import { crawlController } from "../controllers/v1/crawl";
|
||||||
// import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
|
// import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
|
||||||
import { scrapeController } from "../../src/controllers/v1/scrape";
|
import { scrapeController } from "../../src/controllers/v1/scrape";
|
||||||
import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
|
import { crawlStatusController } from "../controllers/v1/crawl-status";
|
||||||
import { mapController } from "../../src/controllers/v1/map";
|
import { mapController } from "../controllers/v1/map";
|
||||||
import { ErrorResponse, RequestWithAuth, RequestWithMaybeAuth } from "../controllers/v1/types";
|
import { ErrorResponse, RequestWithAuth, RequestWithMaybeAuth } from "../controllers/v1/types";
|
||||||
import { RateLimiterMode } from "../types";
|
import { RateLimiterMode } from "../types";
|
||||||
import { authenticateUser } from "../controllers/auth";
|
import { authenticateUser } from "../controllers/auth";
|
||||||
@ -16,6 +16,7 @@ import { v4 as uuidv4 } from "uuid";
|
|||||||
import expressWs from "express-ws";
|
import expressWs from "express-ws";
|
||||||
import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws";
|
import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws";
|
||||||
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
|
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
|
||||||
|
import { crawlCancelController } from "../controllers/v1/crawl-cancel";
|
||||||
// import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
|
// import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
|
||||||
// import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
|
// import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
|
||||||
// import { searchController } from "../../src/controllers/v1/search";
|
// import { searchController } from "../../src/controllers/v1/search";
|
||||||
@ -130,7 +131,13 @@ v1Router.ws(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// v1Router.post("/crawlWebsitePreview", crawlPreviewController);
|
// v1Router.post("/crawlWebsitePreview", crawlPreviewController);
|
||||||
// v1Router.delete("/crawl/:jobId", crawlCancelController);
|
|
||||||
|
|
||||||
|
v1Router.delete(
|
||||||
|
"/crawl/:jobId",
|
||||||
|
authMiddleware(RateLimiterMode.Crawl),
|
||||||
|
crawlCancelController
|
||||||
|
);
|
||||||
// v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController);
|
// v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController);
|
||||||
|
|
||||||
// // Auth route for key based authentication
|
// // Auth route for key based authentication
|
||||||
|
Loading…
x
Reference in New Issue
Block a user