feat: cancel

v0 implementation + e2e test
This commit is contained in:
rafaelsideguide 2024-08-27 09:42:55 -03:00
parent fb553a020d
commit 1ef41b92a0
3 changed files with 55 additions and 73 deletions

View File

@ -912,53 +912,41 @@ describe("GET /v1/crawl/:jobId", () => {
180000 180000
); // 120 seconds ); // 120 seconds
// it.concurrent( it.concurrent(
// "If someone cancels a crawl job, it should turn into failed status", "If someone cancels a crawl job, it should turn into failed status",
// async () => { async () => {
// const crawlResponse = await request(TEST_URL) const crawlResponse = await request(TEST_URL)
// .post("/v1/crawl") .post("/v1/crawl")
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
// .set("Content-Type", "application/json") .set("Content-Type", "application/json")
// .send({ url: "https://docs.tatum.io", limit: 200 }); .send({ url: "https://docs.tatum.io", limit: 200 });
// expect(crawlResponse.statusCode).toBe(200); expect(crawlResponse.statusCode).toBe(200);
// await new Promise((r) => setTimeout(r, 10000)); await new Promise((r) => setTimeout(r, 10000));
// const responseCancel = await request(TEST_URL) const responseCancel = await request(TEST_URL)
// .delete(`/v1/crawl/${crawlResponse.body.id}`) .delete(`/v1/crawl/${crawlResponse.body.id}`)
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
// expect(responseCancel.statusCode).toBe(200); expect(responseCancel.statusCode).toBe(200);
// expect(responseCancel.body).toHaveProperty("status"); expect(responseCancel.body).toHaveProperty("status");
// expect(responseCancel.body.status).toBe("cancelled"); expect(responseCancel.body.status).toBe("cancelled");
// await new Promise((r) => setTimeout(r, 10000));
// const completedResponse = await request(TEST_URL)
// .get(`/v1/crawl/${crawlResponse.body.id}`)
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
// expect(completedResponse.statusCode).toBe(200);
// expect(completedResponse.body).toHaveProperty("status");
// expect(completedResponse.body.status).toBe("failed");
// expect(completedResponse.body).toHaveProperty("data");
// let isNullOrEmptyArray = false;
// if (
// completedResponse.body.data === null ||
// completedResponse.body.data.length === 0
// ) {
// isNullOrEmptyArray = true;
// }
// expect(isNullOrEmptyArray).toBe(true);
// expect(completedResponse.body.data).toEqual(expect.arrayContaining([]));
// expect(completedResponse.body).toHaveProperty("data");
// expect(completedResponse.body.data[0]).toHaveProperty("markdown");
// expect(completedResponse.body.data[0]).toHaveProperty("metadata");
// expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
// expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
// },
// 60000
// ); // 60 seconds
await new Promise((r) => setTimeout(r, 10000));
const completedResponse = await request(TEST_URL)
.get(`/v1/crawl/${crawlResponse.body.id}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty("status");
expect(completedResponse.body.status).toBe("cancelled");
expect(completedResponse.body).toHaveProperty("data");
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
},
60000
); // 60 seconds
}) })
}); });

View File

@ -1,11 +1,10 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "../auth";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../types";
import { addWebScraperJob } from "../../src/services/queue-jobs"; import { supabase_service } from "../../services/supabase";
import { getWebScraperQueue } from "../../src/services/queue-service"; import { Logger } from "../../lib/logger";
import { supabase_service } from "../../src/services/supabase"; import { getCrawl, saveCrawl } from "../../lib/crawl-redis";
import { billTeam } from "../../src/services/billing/credit_billing"; import * as Sentry from "@sentry/node";
import { Logger } from "../../src/lib/logger";
export async function crawlCancelController(req: Request, res: Response) { export async function crawlCancelController(req: Request, res: Response) {
try { try {
@ -19,8 +18,9 @@ export async function crawlCancelController(req: Request, res: Response) {
if (!success) { if (!success) {
return res.status(status).json({ error }); return res.status(status).json({ error });
} }
const job = await getWebScraperQueue().getJob(req.params.jobId);
if (!job) { const sc = await getCrawl(req.params.jobId);
if (!sc) {
return res.status(404).json({ error: "Job not found" }); return res.status(404).json({ error: "Job not found" });
} }
@ -40,31 +40,18 @@ export async function crawlCancelController(req: Request, res: Response) {
} }
} }
const jobState = await job.getState();
const { partialDocs } = await job.progress();
if (partialDocs && partialDocs.length > 0 && jobState === "active") {
Logger.info("Billing team for partial docs...");
// Note: the credits that we will bill them here might be lower than the actual
// due to promises that are not yet resolved
await billTeam(team_id, partialDocs.length);
}
try { try {
await getWebScraperQueue().client.del(job.lockKey()); sc.cancelled = true;
await job.takeLock(); await saveCrawl(req.params.jobId, sc);
await job.discard();
await job.moveToFailed(Error("Job cancelled by user"), true);
} catch (error) { } catch (error) {
Logger.error(error); Logger.error(error);
} }
const newJobState = await job.getState();
res.json({ res.json({
status: "cancelled" status: "cancelled"
}); });
} catch (error) { } catch (error) {
Sentry.captureException(error);
Logger.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ error: error.message });
} }

View File

@ -1,9 +1,9 @@
import express, { NextFunction, Request, Response } from "express"; import express, { NextFunction, Request, Response } from "express";
import { crawlController } from "../../src/controllers/v1/crawl"; import { crawlController } from "../controllers/v1/crawl";
// import { crawlStatusController } from "../../src/controllers/v1/crawl-status"; // import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
import { scrapeController } from "../../src/controllers/v1/scrape"; import { scrapeController } from "../../src/controllers/v1/scrape";
import { crawlStatusController } from "../../src/controllers/v1/crawl-status"; import { crawlStatusController } from "../controllers/v1/crawl-status";
import { mapController } from "../../src/controllers/v1/map"; import { mapController } from "../controllers/v1/map";
import { ErrorResponse, RequestWithAuth, RequestWithMaybeAuth } from "../controllers/v1/types"; import { ErrorResponse, RequestWithAuth, RequestWithMaybeAuth } from "../controllers/v1/types";
import { RateLimiterMode } from "../types"; import { RateLimiterMode } from "../types";
import { authenticateUser } from "../controllers/auth"; import { authenticateUser } from "../controllers/auth";
@ -16,6 +16,7 @@ import { v4 as uuidv4 } from "uuid";
import expressWs from "express-ws"; import expressWs from "express-ws";
import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws"; import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws";
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
import { crawlCancelController } from "../controllers/v1/crawl-cancel";
// import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview"; // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
// import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status"; // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
// import { searchController } from "../../src/controllers/v1/search"; // import { searchController } from "../../src/controllers/v1/search";
@ -130,7 +131,13 @@ v1Router.ws(
); );
// v1Router.post("/crawlWebsitePreview", crawlPreviewController); // v1Router.post("/crawlWebsitePreview", crawlPreviewController);
// v1Router.delete("/crawl/:jobId", crawlCancelController);
v1Router.delete(
"/crawl/:jobId",
authMiddleware(RateLimiterMode.Crawl),
crawlCancelController
);
// v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController); // v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController);
// // Auth route for key based authentication // // Auth route for key based authentication