diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts index 0350d2b6..9c2eaec5 100644 --- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts @@ -912,53 +912,41 @@ describe("GET /v1/crawl/:jobId", () => { 180000 ); // 120 seconds - // it.concurrent( - // "If someone cancels a crawl job, it should turn into failed status", - // async () => { - // const crawlResponse = await request(TEST_URL) - // .post("/v1/crawl") - // .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) - // .set("Content-Type", "application/json") - // .send({ url: "https://docs.tatum.io", limit: 200 }); + it.concurrent( + "If someone cancels a crawl job, it should turn into failed status", + async () => { + const crawlResponse = await request(TEST_URL) + .post("/v1/crawl") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ url: "https://docs.tatum.io", limit: 200 }); - // expect(crawlResponse.statusCode).toBe(200); + expect(crawlResponse.statusCode).toBe(200); - // await new Promise((r) => setTimeout(r, 10000)); + await new Promise((r) => setTimeout(r, 10000)); - // const responseCancel = await request(TEST_URL) - // .delete(`/v1/crawl/${crawlResponse.body.id}`) - // .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); - // expect(responseCancel.statusCode).toBe(200); - // expect(responseCancel.body).toHaveProperty("status"); - // expect(responseCancel.body.status).toBe("cancelled"); - - // await new Promise((r) => setTimeout(r, 10000)); - // const completedResponse = await request(TEST_URL) - // .get(`/v1/crawl/${crawlResponse.body.id}`) - // .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); - - // expect(completedResponse.statusCode).toBe(200); - // expect(completedResponse.body).toHaveProperty("status"); - // expect(completedResponse.body.status).toBe("failed"); - // expect(completedResponse.body).toHaveProperty("data"); - - // let isNullOrEmptyArray = false; - // if ( - // completedResponse.body.data === null || - // completedResponse.body.data.length === 0 - // ) { - // isNullOrEmptyArray = true; - // } - // expect(isNullOrEmptyArray).toBe(true); - // expect(completedResponse.body.data).toEqual(expect.arrayContaining([])); - // expect(completedResponse.body).toHaveProperty("data"); - // expect(completedResponse.body.data[0]).toHaveProperty("markdown"); - // expect(completedResponse.body.data[0]).toHaveProperty("metadata"); - // expect(completedResponse.body.data[0].metadata.statusCode).toBe(200); - // expect(completedResponse.body.data[0].metadata.error).toBeUndefined(); - // }, - // 60000 - // ); // 60 seconds + const responseCancel = await request(TEST_URL) + .delete(`/v1/crawl/${crawlResponse.body.id}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); + expect(responseCancel.statusCode).toBe(200); + expect(responseCancel.body).toHaveProperty("status"); + expect(responseCancel.body.status).toBe("cancelled"); + await new Promise((r) => setTimeout(r, 10000)); + const completedResponse = await request(TEST_URL) + .get(`/v1/crawl/${crawlResponse.body.id}`) + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`); + + expect(completedResponse.statusCode).toBe(200); + expect(completedResponse.body).toHaveProperty("status"); + expect(completedResponse.body.status).toBe("cancelled"); + expect(completedResponse.body).toHaveProperty("data"); + expect(completedResponse.body.data[0]).toHaveProperty("markdown"); + expect(completedResponse.body.data[0]).toHaveProperty("metadata"); + expect(completedResponse.body.data[0].metadata.statusCode).toBe(200); + expect(completedResponse.body.data[0].metadata.error).toBeUndefined(); + }, + 60000 + ); // 60 seconds }) }); diff --git a/apps/api/src/controllers/v1/crawl-cancel.ts.WIP b/apps/api/src/controllers/v1/crawl-cancel.ts similarity index 50% rename from apps/api/src/controllers/v1/crawl-cancel.ts.WIP rename to apps/api/src/controllers/v1/crawl-cancel.ts index d0c109ec..06a5b26e 100644 --- a/apps/api/src/controllers/v1/crawl-cancel.ts.WIP +++ b/apps/api/src/controllers/v1/crawl-cancel.ts @@ -1,11 +1,10 @@ import { Request, Response } from "express"; -import { authenticateUser } from "./auth"; -import { RateLimiterMode } from "../../src/types"; -import { addWebScraperJob } from "../../src/services/queue-jobs"; -import { getWebScraperQueue } from "../../src/services/queue-service"; -import { supabase_service } from "../../src/services/supabase"; -import { billTeam } from "../../src/services/billing/credit_billing"; -import { Logger } from "../../src/lib/logger"; +import { authenticateUser } from "../auth"; +import { RateLimiterMode } from "../../types"; +import { supabase_service } from "../../services/supabase"; +import { Logger } from "../../lib/logger"; +import { getCrawl, saveCrawl } from "../../lib/crawl-redis"; +import * as Sentry from "@sentry/node"; export async function crawlCancelController(req: Request, res: Response) { try { @@ -19,8 +18,9 @@ export async function crawlCancelController(req: Request, res: Response) { if (!success) { return res.status(status).json({ error }); } - const job = await getWebScraperQueue().getJob(req.params.jobId); - if (!job) { + + const sc = await getCrawl(req.params.jobId); + if (!sc) { return res.status(404).json({ error: "Job not found" }); } @@ -40,31 +40,18 @@ export async function crawlCancelController(req: Request, res: Response) { } } - const jobState = await job.getState(); - const { partialDocs } = await job.progress(); - - if (partialDocs && partialDocs.length > 0 && jobState === "active") { - Logger.info("Billing team for partial docs..."); - // Note: the credits that we will bill them here might be lower than the actual - // due to promises that are not yet resolved - await billTeam(team_id, partialDocs.length); - } - try { - await getWebScraperQueue().client.del(job.lockKey()); - await job.takeLock(); - await job.discard(); - await job.moveToFailed(Error("Job cancelled by user"), true); + sc.cancelled = true; + await saveCrawl(req.params.jobId, sc); } catch (error) { Logger.error(error); } - const newJobState = await job.getState(); - res.json({ status: "cancelled" }); } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); } diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index f4afe053..a9f4d396 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -1,9 +1,9 @@ import express, { NextFunction, Request, Response } from "express"; -import { crawlController } from "../../src/controllers/v1/crawl"; +import { crawlController } from "../controllers/v1/crawl"; // import { crawlStatusController } from "../../src/controllers/v1/crawl-status"; import { scrapeController } from "../../src/controllers/v1/scrape"; -import { crawlStatusController } from "../../src/controllers/v1/crawl-status"; -import { mapController } from "../../src/controllers/v1/map"; +import { crawlStatusController } from "../controllers/v1/crawl-status"; +import { mapController } from "../controllers/v1/map"; import { ErrorResponse, RequestWithAuth, RequestWithMaybeAuth } from "../controllers/v1/types"; import { RateLimiterMode } from "../types"; import { authenticateUser } from "../controllers/auth"; @@ -16,6 +16,7 @@ import { v4 as uuidv4 } from "uuid"; import expressWs from "express-ws"; import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws"; import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; +import { crawlCancelController } from "../controllers/v1/crawl-cancel"; // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview"; // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status"; // import { searchController } from "../../src/controllers/v1/search"; @@ -130,7 +131,13 @@ v1Router.ws( ); // v1Router.post("/crawlWebsitePreview", crawlPreviewController); -// v1Router.delete("/crawl/:jobId", crawlCancelController); + + +v1Router.delete( + "/crawl/:jobId", + authMiddleware(RateLimiterMode.Crawl), + crawlCancelController +); // v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController); // // Auth route for key based authentication