feat(api/crawl/ongoing): return more details

This commit is contained in:
Gergő Móricz 2025-06-02 23:33:15 +02:00
parent bf9929da3e
commit 84d0a37d78
3 changed files with 40 additions and 4 deletions

View File

@ -54,13 +54,13 @@ describe("Crawl tests", () => {
const ongoing = await crawlOngoing(); const ongoing = await crawlOngoing();
expect(ongoing.ids).toContain(res.id) expect(ongoing.crawls.find(x => x.id === res.id)).toBeDefined();
await asyncCrawlWaitForFinish(res.id); await asyncCrawlWaitForFinish(res.id);
const ongoing2 = await crawlOngoing(); const ongoing2 = await crawlOngoing();
expect(ongoing2.ids).not.toContain(res.id); expect(ongoing2.crawls.find(x => x.id === res.id)).toBeUndefined();
}, 120000); }, 120000);
// TEMP: Flaky // TEMP: Flaky

View File

@ -2,8 +2,10 @@ import { Response } from "express";
import { import {
OngoingCrawlsResponse, OngoingCrawlsResponse,
RequestWithAuth, RequestWithAuth,
toNewCrawlerOptions,
} from "./types"; } from "./types";
import { import {
getCrawl,
getCrawlsByTeamId, getCrawlsByTeamId,
} from "../../lib/crawl-redis"; } from "../../lib/crawl-redis";
import { configDotenv } from "dotenv"; import { configDotenv } from "dotenv";
@ -15,8 +17,18 @@ export async function ongoingCrawlsController(
) { ) {
const ids = await getCrawlsByTeamId(req.auth.team_id); const ids = await getCrawlsByTeamId(req.auth.team_id);
const crawls = (await Promise.all(ids.map(async id => ({ ...(await getCrawl(id)), id })))).filter((crawl) => crawl !== null && !crawl.cancelled);
res.status(200).json({ res.status(200).json({
success: true, success: true,
ids, crawls: crawls.map(x => ({
id: x.id,
teamId: x.team_id!,
url: x.originUrl!,
options: {
...toNewCrawlerOptions(x.crawlerOptions),
scrapeOptions: x.scrapeOptions,
},
})),
}); });
} }

View File

@ -879,7 +879,12 @@ export type OngoingCrawlsResponse =
| ErrorResponse | ErrorResponse
| { | {
success: true; success: true;
ids: string[]; crawls: {
id: string;
teamId: string;
url: string;
options: CrawlerOptions;
}[];
}; };
export type CrawlErrorsResponse = export type CrawlErrorsResponse =
@ -1016,6 +1021,25 @@ export function toLegacyCrawlerOptions(x: CrawlerOptions) {
}; };
} }
export function toNewCrawlerOptions(x: any): CrawlerOptions {
return {
includePaths: x.includes,
excludePaths: x.excludes,
limit: x.limit,
maxDepth: x.maxDepth,
allowBackwardLinks: x.allowBackwardCrawling,
allowExternalLinks: x.allowExternalContentLinks,
allowSubdomains: x.allowSubdomains,
ignoreRobotsTxt: x.ignoreRobotsTxt,
ignoreSitemap: x.ignoreSitemap,
deduplicateSimilarURLs: x.deduplicateSimilarURLs,
ignoreQueryParameters: x.ignoreQueryParameters,
regexOnFullURL: x.regexOnFullURL,
maxDiscoveryDepth: x.maxDiscoveryDepth,
delay: x.delay,
}
}
export function fromLegacyCrawlerOptions(x: any, teamId: string): { export function fromLegacyCrawlerOptions(x: any, teamId: string): {
crawlOptions: CrawlerOptions; crawlOptions: CrawlerOptions;
internalOptions: InternalOptions; internalOptions: InternalOptions;