diff --git a/apps/api/src/controllers/v1/scrape-status.ts b/apps/api/src/controllers/v1/scrape-status.ts new file mode 100644 index 00000000..5ff16b8a --- /dev/null +++ b/apps/api/src/controllers/v1/scrape-status.ts @@ -0,0 +1,32 @@ +import { Response } from "express"; +import { supabaseGetJobByIdOnlyData } from "../../lib/supabase-jobs"; +import { scrapeStatusRateLimiter } from "../../services/rate-limiter"; + +export async function scrapeStatusController(req: any, res: any) { + try { + const rateLimiter = scrapeStatusRateLimiter; + const incomingIP = (req.headers["x-forwarded-for"] || + req.socket.remoteAddress) as string; + const iptoken = incomingIP; + await rateLimiter.consume(iptoken); + + const job = await supabaseGetJobByIdOnlyData(req.params.jobId); + + return res.status(200).json({ + success: true, + data: job?.docs[0], + }); + } catch (error) { + if (error instanceof Error && error.message == "Too Many Requests") { + return res.status(429).json({ + success: false, + error: "Rate limit exceeded. Please try again later.", + }); + } else { + return res.status(500).json({ + success: false, + error: "An unexpected error occurred.", + }); + } + } +} diff --git a/apps/api/src/lib/supabase-jobs.ts b/apps/api/src/lib/supabase-jobs.ts index 8ff46a23..6c969fce 100644 --- a/apps/api/src/lib/supabase-jobs.ts +++ b/apps/api/src/lib/supabase-jobs.ts @@ -37,3 +37,22 @@ export const supabaseGetJobsById = async (jobIds: string[]) => { return data; }; + + +export const supabaseGetJobByIdOnlyData = async (jobId: string) => { + const { data, error } = await supabase_service + .from("firecrawl_jobs") + .select("docs") + .eq("job_id", jobId) + .single(); + + if (error) { + return null; + } + + if (!data) { + return null; + } + + return data; +}; \ No newline at end of file diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index 27da0a1a..9dcbf111 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -15,6 +15,7 @@ import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws"; import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; import { crawlCancelController } from "../controllers/v1/crawl-cancel"; import { Logger } from "../lib/logger"; +import { scrapeStatusController } from "../controllers/v1/scrape-status"; // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview"; // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status"; // import { searchController } from "../../src/controllers/v1/search"; @@ -124,6 +125,11 @@ v1Router.get( wrap(crawlStatusController) ); +v1Router.get( + "/scrape/:jobId", + wrap(scrapeStatusController) +); + v1Router.ws( "/crawl/:jobId", crawlStatusWSController diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index 1798b23a..0a0e4b98 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -104,6 +104,14 @@ export const devBRateLimiter = new RateLimiterRedis({ duration: 60, // Duration in seconds }); + +export const scrapeStatusRateLimiter = new RateLimiterRedis({ + storeClient: redisRateLimitClient, + keyPrefix: "scrape-status", + points: 200, + duration: 60, // Duration in seconds +}); + export function getRateLimiter( mode: RateLimiterMode, token: string,