diff --git a/apps/api/src/__tests__/snips/crawl.test.ts b/apps/api/src/__tests__/snips/crawl.test.ts index beb8932a..6f8f1ca1 100644 --- a/apps/api/src/__tests__/snips/crawl.test.ts +++ b/apps/api/src/__tests__/snips/crawl.test.ts @@ -54,13 +54,13 @@ describe("Crawl tests", () => { const ongoing = await crawlOngoing(); - expect(ongoing.ids).toContain(res.id) + expect(ongoing.crawls.find(x => x.id === res.id)).toBeDefined(); await asyncCrawlWaitForFinish(res.id); const ongoing2 = await crawlOngoing(); - expect(ongoing2.ids).not.toContain(res.id); + expect(ongoing2.crawls.find(x => x.id === res.id)).toBeUndefined(); }, 120000); // TEMP: Flaky diff --git a/apps/api/src/controllers/v1/crawl-ongoing.ts b/apps/api/src/controllers/v1/crawl-ongoing.ts index 67e08eb1..0b8ef144 100644 --- a/apps/api/src/controllers/v1/crawl-ongoing.ts +++ b/apps/api/src/controllers/v1/crawl-ongoing.ts @@ -2,8 +2,10 @@ import { Response } from "express"; import { OngoingCrawlsResponse, RequestWithAuth, + toNewCrawlerOptions, } from "./types"; import { + getCrawl, getCrawlsByTeamId, } from "../../lib/crawl-redis"; import { configDotenv } from "dotenv"; @@ -15,8 +17,18 @@ export async function ongoingCrawlsController( ) { const ids = await getCrawlsByTeamId(req.auth.team_id); + const crawls = (await Promise.all(ids.map(async id => ({ ...(await getCrawl(id)), id })))).filter((crawl) => crawl !== null && !crawl.cancelled); + res.status(200).json({ success: true, - ids, + crawls: crawls.map(x => ({ + id: x.id, + teamId: x.team_id!, + url: x.originUrl!, + options: { + ...toNewCrawlerOptions(x.crawlerOptions), + scrapeOptions: x.scrapeOptions, + }, + })), }); } diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index f3713e89..fb4b72c8 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -879,7 +879,12 @@ export type OngoingCrawlsResponse = | ErrorResponse | { success: true; - ids: string[]; + crawls: { + id: string; + teamId: string; + url: string; + options: CrawlerOptions; + }[]; }; export type CrawlErrorsResponse = @@ -1016,6 +1021,25 @@ export function toLegacyCrawlerOptions(x: CrawlerOptions) { }; } +export function toNewCrawlerOptions(x: any): CrawlerOptions { + return { + includePaths: x.includes, + excludePaths: x.excludes, + limit: x.limit, + maxDepth: x.maxDepth, + allowBackwardLinks: x.allowBackwardCrawling, + allowExternalLinks: x.allowExternalContentLinks, + allowSubdomains: x.allowSubdomains, + ignoreRobotsTxt: x.ignoreRobotsTxt, + ignoreSitemap: x.ignoreSitemap, + deduplicateSimilarURLs: x.deduplicateSimilarURLs, + ignoreQueryParameters: x.ignoreQueryParameters, + regexOnFullURL: x.regexOnFullURL, + maxDiscoveryDepth: x.maxDiscoveryDepth, + delay: x.delay, + } +} + export function fromLegacyCrawlerOptions(x: any, teamId: string): { crawlOptions: CrawlerOptions; internalOptions: InternalOptions;