diff --git a/apps/api/src/controllers/__tests__/crawl.test.ts b/apps/api/src/controllers/__tests__/crawl.test.ts index 621c7436..4cbe720d 100644 --- a/apps/api/src/controllers/__tests__/crawl.test.ts +++ b/apps/api/src/controllers/__tests__/crawl.test.ts @@ -1,6 +1,6 @@ -import { crawlController } from '../crawl' +import { crawlController } from '../v0/crawl' import { Request, Response } from 'express'; -import { authenticateUser } from '../auth'; // Ensure this import is correct +import { authenticateUser } from '../v0/auth'; // Ensure this import is correct import { createIdempotencyKey } from '../../services/idempotency/create'; import { validateIdempotencyKey } from '../../services/idempotency/validate'; import { v4 as uuidv4 } from 'uuid'; diff --git a/apps/api/src/controllers/admin/queue.ts b/apps/api/src/controllers/v0/admin/queue.ts similarity index 93% rename from apps/api/src/controllers/admin/queue.ts rename to apps/api/src/controllers/v0/admin/queue.ts index 095e7ca7..a5b23e33 100644 --- a/apps/api/src/controllers/admin/queue.ts +++ b/apps/api/src/controllers/v0/admin/queue.ts @@ -1,9 +1,9 @@ import { Request, Response } from "express"; import { Job } from "bullmq"; -import { Logger } from "../../lib/logger"; -import { getScrapeQueue } from "../../services/queue-service"; -import { checkAlerts } from "../../services/alerts"; +import { Logger } from "../../../lib/logger"; +import { getScrapeQueue } from "../../../services/queue-service"; +import { checkAlerts } from "../../../services/alerts"; export async function cleanBefore24hCompleteJobsController( req: Request, diff --git a/apps/api/src/controllers/admin/redis-health.ts b/apps/api/src/controllers/v0/admin/redis-health.ts similarity index 95% rename from apps/api/src/controllers/admin/redis-health.ts rename to apps/api/src/controllers/v0/admin/redis-health.ts index 3b1e2518..dc58d745 100644 --- a/apps/api/src/controllers/admin/redis-health.ts +++ b/apps/api/src/controllers/v0/admin/redis-health.ts @@ -1,7 +1,7 @@ import { Request, Response } from "express"; import Redis from "ioredis"; -import { Logger } from "../../lib/logger"; -import { redisRateLimitClient } from "../../services/rate-limiter"; +import { Logger } from "../../../lib/logger"; +import { redisRateLimitClient } from "../../../services/rate-limiter"; export async function redisHealthController(req: Request, res: Response) { const retryOperation = async (operation, retries = 3) => { diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/v0/auth.ts similarity index 94% rename from apps/api/src/controllers/auth.ts rename to apps/api/src/controllers/v0/auth.ts index e18a8a7c..07984842 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/v0/auth.ts @@ -1,19 +1,19 @@ -import { parseApi } from "../../src/lib/parseApi"; -import { getRateLimiter } from "../../src/services/rate-limiter"; +import { parseApi } from "../../../src/lib/parseApi"; +import { getRateLimiter } from "../../../src/services/rate-limiter"; import { AuthResponse, NotificationType, RateLimiterMode, -} from "../../src/types"; -import { supabase_service } from "../../src/services/supabase"; -import { withAuth } from "../../src/lib/withAuth"; +} from "../../../src/types"; +import { supabase_service } from "../../../src/services/supabase"; +import { withAuth } from "../../../src/lib/withAuth"; import { RateLimiterRedis } from "rate-limiter-flexible"; import { setTraceAttributes } from "@hyperdx/node-opentelemetry"; -import { sendNotification } from "../services/notification/email_notification"; -import { Logger } from "../lib/logger"; -import { redlock } from "../../src/services/redlock"; -import { getValue } from "../../src/services/redis"; -import { setValue } from "../../src/services/redis"; +import { sendNotification } from "../../services/notification/email_notification"; +import { Logger } from "../../lib/logger"; +import { redlock } from "../../../src/services/redlock"; +import { getValue } from "../../../src/services/redis"; +import { setValue } from "../../../src/services/redis"; import { validate } from "uuid"; function normalizedApiIsUuid(potentialUuid: string): boolean { diff --git a/apps/api/src/controllers/crawl-cancel.ts b/apps/api/src/controllers/v0/crawl-cancel.ts similarity index 85% rename from apps/api/src/controllers/crawl-cancel.ts rename to apps/api/src/controllers/v0/crawl-cancel.ts index ed2c4166..4658d90c 100644 --- a/apps/api/src/controllers/crawl-cancel.ts +++ b/apps/api/src/controllers/v0/crawl-cancel.ts @@ -1,9 +1,9 @@ import { Request, Response } from "express"; import { authenticateUser } from "./auth"; -import { RateLimiterMode } from "../../src/types"; -import { supabase_service } from "../../src/services/supabase"; -import { Logger } from "../../src/lib/logger"; -import { getCrawl, saveCrawl } from "../../src/lib/crawl-redis"; +import { RateLimiterMode } from "../../../src/types"; +import { supabase_service } from "../../../src/services/supabase"; +import { Logger } from "../../../src/lib/logger"; +import { getCrawl, saveCrawl } from "../../../src/lib/crawl-redis"; export async function crawlCancelController(req: Request, res: Response) { try { diff --git a/apps/api/src/controllers/crawl-status.ts b/apps/api/src/controllers/v0/crawl-status.ts similarity index 85% rename from apps/api/src/controllers/crawl-status.ts rename to apps/api/src/controllers/v0/crawl-status.ts index 93c463c0..0c982737 100644 --- a/apps/api/src/controllers/crawl-status.ts +++ b/apps/api/src/controllers/v0/crawl-status.ts @@ -1,10 +1,10 @@ import { Request, Response } from "express"; import { authenticateUser } from "./auth"; -import { RateLimiterMode } from "../../src/types"; -import { getScrapeQueue } from "../../src/services/queue-service"; -import { Logger } from "../../src/lib/logger"; -import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; -import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; +import { RateLimiterMode } from "../../../src/types"; +import { getScrapeQueue } from "../../../src/services/queue-service"; +import { Logger } from "../../../src/lib/logger"; +import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis"; +import { supabaseGetJobById } from "../../../src/lib/supabase-jobs"; export async function crawlStatusController(req: Request, res: Response) { try { diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/v0/crawl.ts similarity index 85% rename from apps/api/src/controllers/crawl.ts rename to apps/api/src/controllers/v0/crawl.ts index 54eb1f40..a61e7b6d 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/v0/crawl.ts @@ -1,18 +1,18 @@ import { Request, Response } from "express"; -import { checkTeamCredits } from "../../src/services/billing/credit_billing"; +import { checkTeamCredits } from "../../../src/services/billing/credit_billing"; import { authenticateUser } from "./auth"; -import { RateLimiterMode } from "../../src/types"; -import { addScrapeJob } from "../../src/services/queue-jobs"; -import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; -import { logCrawl } from "../../src/services/logging/crawl_log"; -import { validateIdempotencyKey } from "../../src/services/idempotency/validate"; -import { createIdempotencyKey } from "../../src/services/idempotency/create"; -import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../src/lib/default-values"; +import { RateLimiterMode } from "../../../src/types"; +import { addScrapeJob } from "../../../src/services/queue-jobs"; +import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist"; +import { logCrawl } from "../../../src/services/logging/crawl_log"; +import { validateIdempotencyKey } from "../../../src/services/idempotency/validate"; +import { createIdempotencyKey } from "../../../src/services/idempotency/create"; +import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values"; import { v4 as uuidv4 } from "uuid"; -import { Logger } from "../../src/lib/logger"; -import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../src/lib/crawl-redis"; -import { getScrapeQueue } from "../../src/services/queue-service"; -import { checkAndUpdateURL } from "../../src/lib/validateUrl"; +import { Logger } from "../../../src/lib/logger"; +import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis"; +import { getScrapeQueue } from "../../../src/services/queue-service"; +import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; export async function crawlController(req: Request, res: Response) { try { diff --git a/apps/api/src/controllers/crawlPreview.ts b/apps/api/src/controllers/v0/crawlPreview.ts similarity index 90% rename from apps/api/src/controllers/crawlPreview.ts rename to apps/api/src/controllers/v0/crawlPreview.ts index cc10dc8e..356da835 100644 --- a/apps/api/src/controllers/crawlPreview.ts +++ b/apps/api/src/controllers/v0/crawlPreview.ts @@ -1,12 +1,12 @@ import { Request, Response } from "express"; import { authenticateUser } from "./auth"; -import { RateLimiterMode } from "../../src/types"; -import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; +import { RateLimiterMode } from "../../../src/types"; +import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist"; import { v4 as uuidv4 } from "uuid"; -import { Logger } from "../../src/lib/logger"; -import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../src/lib/crawl-redis"; -import { addScrapeJob } from "../../src/services/queue-jobs"; -import { checkAndUpdateURL } from "../../src/lib/validateUrl"; +import { Logger } from "../../../src/lib/logger"; +import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis"; +import { addScrapeJob } from "../../../src/services/queue-jobs"; +import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; export async function crawlPreviewController(req: Request, res: Response) { try { diff --git a/apps/api/src/controllers/keyAuth.ts b/apps/api/src/controllers/v0/keyAuth.ts similarity index 90% rename from apps/api/src/controllers/keyAuth.ts rename to apps/api/src/controllers/v0/keyAuth.ts index 351edd18..a0a4cabc 100644 --- a/apps/api/src/controllers/keyAuth.ts +++ b/apps/api/src/controllers/v0/keyAuth.ts @@ -1,5 +1,5 @@ -import { AuthResponse, RateLimiterMode } from "../types"; +import { AuthResponse, RateLimiterMode } from "../../types"; import { Request, Response } from "express"; import { authenticateUser } from "./auth"; diff --git a/apps/api/src/controllers/liveness.ts b/apps/api/src/controllers/v0/liveness.ts similarity index 100% rename from apps/api/src/controllers/liveness.ts rename to apps/api/src/controllers/v0/liveness.ts diff --git a/apps/api/src/controllers/readiness.ts b/apps/api/src/controllers/v0/readiness.ts similarity index 100% rename from apps/api/src/controllers/readiness.ts rename to apps/api/src/controllers/v0/readiness.ts diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/v0/scrape.ts similarity index 88% rename from apps/api/src/controllers/scrape.ts rename to apps/api/src/controllers/v0/scrape.ts index 3d568790..4e1b696d 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/v0/scrape.ts @@ -1,17 +1,17 @@ -import { ExtractorOptions, PageOptions } from './../lib/entities'; +import { ExtractorOptions, PageOptions } from './../../lib/entities'; import { Request, Response } from "express"; -import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; +import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing"; import { authenticateUser } from "./auth"; -import { RateLimiterMode } from "../types"; -import { logJob } from "../services/logging/log_job"; -import { Document } from "../lib/entities"; -import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function -import { numTokensFromString } from '../lib/LLM-extraction/helpers'; -import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values'; -import { addScrapeJob } from '../services/queue-jobs'; -import { scrapeQueueEvents } from '../services/queue-service'; +import { RateLimiterMode } from "../../types"; +import { logJob } from "../../services/logging/log_job"; +import { Document } from "../../lib/entities"; +import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function +import { numTokensFromString } from '../../lib/LLM-extraction/helpers'; +import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../lib/default-values'; +import { addScrapeJob } from '../../services/queue-jobs'; +import { scrapeQueueEvents } from '../../services/queue-service'; import { v4 as uuidv4 } from "uuid"; -import { Logger } from '../lib/logger'; +import { Logger } from '../../lib/logger'; export async function scrapeHelper( jobId: string, diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/v0/search.ts similarity index 89% rename from apps/api/src/controllers/search.ts rename to apps/api/src/controllers/v0/search.ts index 873922c4..73d8b678 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/v0/search.ts @@ -1,15 +1,15 @@ import { Request, Response } from "express"; -import { WebScraperDataProvider } from "../scraper/WebScraper"; -import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; +import { WebScraperDataProvider } from "../../scraper/WebScraper"; +import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing"; import { authenticateUser } from "./auth"; -import { RateLimiterMode } from "../types"; -import { logJob } from "../services/logging/log_job"; -import { PageOptions, SearchOptions } from "../lib/entities"; -import { search } from "../search"; -import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; +import { RateLimiterMode } from "../../types"; +import { logJob } from "../../services/logging/log_job"; +import { PageOptions, SearchOptions } from "../../lib/entities"; +import { search } from "../../search"; +import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; import { v4 as uuidv4 } from "uuid"; -import { Logger } from "../lib/logger"; -import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service"; +import { Logger } from "../../lib/logger"; +import { getScrapeQueue, scrapeQueueEvents } from "../../services/queue-service"; export async function searchHelper( jobId: string, diff --git a/apps/api/src/controllers/status.ts b/apps/api/src/controllers/v0/status.ts similarity index 86% rename from apps/api/src/controllers/status.ts rename to apps/api/src/controllers/v0/status.ts index 21a9cf47..5efb0ff1 100644 --- a/apps/api/src/controllers/status.ts +++ b/apps/api/src/controllers/v0/status.ts @@ -1,8 +1,8 @@ import { Request, Response } from "express"; -import { Logger } from "../../src/lib/logger"; -import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; -import { getScrapeQueue } from "../../src/services/queue-service"; -import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; +import { Logger } from "../../../src/lib/logger"; +import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis"; +import { getScrapeQueue } from "../../../src/services/queue-service"; +import { supabaseGetJobById } from "../../../src/lib/supabase-jobs"; export async function crawlJobStatusPreviewController(req: Request, res: Response) { try { diff --git a/apps/api/src/controllers/v1/crawl-status.ts b/apps/api/src/controllers/v1/crawl-status.ts index da2bc11e..e148f8db 100644 --- a/apps/api/src/controllers/v1/crawl-status.ts +++ b/apps/api/src/controllers/v1/crawl-status.ts @@ -1,9 +1,6 @@ import { Request, Response } from "express"; import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../../../src/types"; -import { addWebScraperJob } from "../../../src/services/queue-jobs"; -import { getWebScraperQueue } from "../../../src/services/queue-service"; -import { supabaseGetJobById } from "../../../src/lib/supabase-jobs"; import { Logger } from "../../../src/lib/logger"; import { v4 as uuidv4 } from "uuid"; diff --git a/apps/api/src/controllers/v1/crawl.ts b/apps/api/src/controllers/v1/crawl.ts index a00ad7ca..b4ce293e 100644 --- a/apps/api/src/controllers/v1/crawl.ts +++ b/apps/api/src/controllers/v1/crawl.ts @@ -1,20 +1,16 @@ import { Request, Response } from "express"; -import { WebScraperDataProvider } from "../../../src/scraper/WebScraper"; -import { billTeam } from "../../../src/services/billing/credit_billing"; import { checkTeamCredits } from "../../../src/services/billing/credit_billing"; import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../../../src/types"; -import { addWebScraperJob } from "../../../src/services/queue-jobs"; import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist"; -import { logCrawl } from "../../../src/services/logging/crawl_log"; import { validateIdempotencyKey } from "../../../src/services/idempotency/validate"; import { createIdempotencyKey } from "../../../src/services/idempotency/create"; -import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values"; import { v4 as uuidv4 } from "uuid"; import { Logger } from "../../../src/lib/logger"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; +import { CrawlRequest, CrawlResponse } from "./types"; -export async function crawlController(req: Request, res: Response) { +export async function crawlController(req: Request<{}, {}, CrawlRequest>, res: Response) { // expected req.body // req.body = { @@ -39,52 +35,57 @@ export async function crawlController(req: Request, res: Response) { RateLimiterMode.Crawl ); if (!success) { - return res.status(status).json({ error }); + return res.status(status).json({ success: false, error }); } if (req.headers["x-idempotency-key"]) { const isIdempotencyValid = await validateIdempotencyKey(req); if (!isIdempotencyValid) { - return res.status(409).json({ error: "Idempotency key already used" }); + return res.status(409).json({ success: false, error: "Idempotency key already used" }); } try { createIdempotencyKey(req); } catch (error) { Logger.error(error); - return res.status(500).json({ error: error.message }); + return res.status(500).json({ success: false, error: error.message }); } } const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1); if (!creditsCheckSuccess) { - return res.status(402).json({ error: "Insufficient credits" }); + return res.status(402).json({ success: false, error: "Insufficient credits" }); } let url = req.body.url; if (!url) { - return res.status(400).json({ error: "Url is required" }); + return res.status(400).json({ success: false, error: "Url is required" }); } if (isUrlBlocked(url)) { return res .status(403) .json({ + success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", }); } try { - url = checkAndUpdateURL(url); + url = checkAndUpdateURL(url).url; } catch (error) { - return res.status(400).json({ error: 'Invalid Url' }); + return res.status(400).json({ success: false, error: 'Invalid Url' }); } // TODO: add job to queue const id = uuidv4(); - return res.status(200).json({ jobId: id, url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}` }); + return res.status(200).json({ + success: true, + id, + url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}`, + }); // const mode = req.body.mode ?? "crawl"; @@ -134,6 +135,6 @@ export async function crawlController(req: Request, res: Response) { // res.json({ jobId: job.id }); } catch (error) { Logger.error(error); - return res.status(500).json({ error: error.message }); + return res.status(500).json({ success: false, error: error.message }); } } diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index 391b8a10..f4546abe 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -1,29 +1,19 @@ import { Request, Response } from "express"; -import { WebScraperDataProvider } from "../../../src/scraper/WebScraper"; -import { billTeam } from "../../../src/services/billing/credit_billing"; -import { checkTeamCredits } from "../../../src/services/billing/credit_billing"; import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../../../src/types"; -import { addWebScraperJob } from "../../../src/services/queue-jobs"; import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist"; -import { logCrawl } from "../../../src/services/logging/crawl_log"; -import { validateIdempotencyKey } from "../../../src/services/idempotency/validate"; -import { createIdempotencyKey } from "../../../src/services/idempotency/create"; -import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values"; -import { v4 as uuidv4 } from "uuid"; import { Logger } from "../../../src/lib/logger"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; +import { MapRequest, MapResponse } from "./types"; -export async function mapController(req: Request, res: Response) { +export async function mapController(req: Request<{}, MapResponse, MapRequest>, res: Response) { // expected req.body // req.body = { // url: string - // ignoreSitemap: true?? - // other crawler options? + // crawlerOptions: // } - try { const { success, team_id, error, status } = await authenticateUser( req, @@ -31,7 +21,7 @@ export async function mapController(req: Request, res: Response) { RateLimiterMode.Crawl ); if (!success) { - return res.status(status).json({ error }); + return res.status(status).json({ success: false, error }); } // if (req.headers["x-idempotency-key"]) { @@ -55,25 +45,26 @@ export async function mapController(req: Request, res: Response) { let url = req.body.url; if (!url) { - return res.status(400).json({ error: "Url is required" }); + return res.status(400).json({ success: false, error: "Url is required" }); } if (isUrlBlocked(url)) { return res .status(403) .json({ + success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", }); } try { - url = checkAndUpdateURL(url); + url = checkAndUpdateURL(url).url; } catch (error) { - return res.status(400).json({ error: 'Invalid Url' }); + return res.status(400).json({ success: false, error: 'Invalid Url' }); } - return res.status(200).json({ urls: [ "test1", "test2" ] }); + return res.status(200).json({ success: true, links: [ "test1", "test2" ] }); // const mode = req.body.mode ?? "crawl"; @@ -123,6 +114,6 @@ export async function mapController(req: Request, res: Response) { // res.json({ jobId: job.id }); } catch (error) { Logger.error(error); - return res.status(500).json({ error: error.message }); + return res.status(500).json({ success: false, error: error.message }); } } diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts index bf529ad2..e3cfcbdc 100644 --- a/apps/api/src/controllers/v1/scrape.ts +++ b/apps/api/src/controllers/v1/scrape.ts @@ -1,19 +1,12 @@ -// import { ExtractorOptions, PageOptions } from './../../lib/entities'; import { Request, Response } from "express"; -// import { WebScraperDataProvider } from "../../scraper/WebScraper"; -// import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing"; import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../../types"; -// import { logJob } from "../../services/logging/log_job"; -// import { Document } from "../../lib/entities"; import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function -// import { numTokensFromString } from '../../lib/LLM-extraction/helpers'; -// import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../../src/lib/default-values'; -// import { v4 as uuidv4 } from "uuid"; import { Logger } from '../../lib/logger'; import { checkAndUpdateURL } from '../../lib/validateUrl'; +import { ScrapeRequest, ScrapeResponse } from "./types"; -export async function scrapeController(req: Request, res: Response) { +export async function scrapeController(req: Request<{}, ScrapeResponse, ScrapeRequest>, res: Response) { let url = req.body.url; if (!url) { return { success: false, error: "Url is required", returnCode: 400 }; @@ -24,7 +17,7 @@ export async function scrapeController(req: Request, res: Response) { } try { - url = checkAndUpdateURL(url); + url = checkAndUpdateURL(url).url; } catch (error) { return { success: false, error: "Invalid URL", returnCode: 400 }; } @@ -53,20 +46,19 @@ export async function scrapeController(req: Request, res: Response) { RateLimiterMode.Scrape ); if (!success) { - return res.status(status).json({ error }); + return res.status(status).json({ success: false, error }); } // check credits - const result = { + const result: ScrapeResponse = { success: true, warning: "test", data: { markdown: "test", - content: "test", html: "test", rawHtml: "test", - linksOnPage: ["test1", "test2"], + links: ["test1", "test2"], screenshot: "test", metadata: { title: "test", @@ -174,7 +166,7 @@ export async function scrapeController(req: Request, res: Response) { // return res.status(result.returnCode).json(result); } catch (error) { Logger.error(error); - return res.status(500).json({ error: error.message }); + return res.status(500).json({ success: false, error: error.message }); } } diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts new file mode 100644 index 00000000..07d56e17 --- /dev/null +++ b/apps/api/src/controllers/v1/types.ts @@ -0,0 +1,97 @@ +export type Format = "markdown" | "html" | "rawHtml" | "links" | "screenshot" | "screenshot@fullPage"; + +export type ScrapeRequest = { + url: string; + formats?: Format[]; + headers?: { [K: string]: string }; + includeTags?: string[]; + excludeTags?: string[]; + onlyMainContent?: boolean; + timeout?: number; + waitFor?: number; +} + +export type CrawlerOptions = { + includePaths?: string[]; + excludePaths?: string[]; + maxDepth?: number; + limit?: number; + allowBackwardLinks?: boolean; // >> TODO: CHANGE THIS NAME??? + allowExternalLinks?: boolean; + ignoreSitemap?: boolean; +}; + +export type CrawlRequest = { + url: string; + crawlerOptions?: CrawlerOptions; + scrapeOptions?: Exclude; +}; + +export type MapRequest = { + url: string; + crawlerOptions?: CrawlerOptions; +}; + +export type Document = { + markdown?: string, + html?: string, + rawHtml?: string, + links?: string[], + screenshot?: string, + metadata: { + title?: string; + description?: string; + language?: string; + keywords?: string; + robots?: string; + ogTitle?: string; + ogDescription?: string; + ogUrl?: string; + ogImage?: string; + ogAudio?: string; + ogDeterminer?: string; + ogLocale?: string; + ogLocaleAlternate?: string[]; + ogSiteName?: string; + ogVideo?: string; + dcTermsCreated?: string; + dcDateCreated?: string; + dcDate?: string; + dcTermsType?: string; + dcType?: string; + dcTermsAudience?: string; + dcTermsSubject?: string; + dcSubject?: string; + dcDescription?: string; + dcTermsKeywords?: string; + modifiedTime?: string; + publishedTime?: string; + articleTag?: string; + articleSection?: string; + sourceURL?: string; + statusCode?: number; + error?: string; + }, +} + +export type ErrorResponse = { + success: false; + error: string; +}; + +export type ScrapeResponse = ErrorResponse | { + success: true; + warning?: string; + data: Document; +}; + +export type CrawlResponse = ErrorResponse | { + success: true; + id: string; + url: string; +} + +export type MapResponse = ErrorResponse | { + success: true; + links: string[]; +} diff --git a/apps/api/src/routes/admin.ts b/apps/api/src/routes/admin.ts index 77d1bf46..d8f1b1e4 100644 --- a/apps/api/src/routes/admin.ts +++ b/apps/api/src/routes/admin.ts @@ -1,10 +1,10 @@ import express from "express"; -import { redisHealthController } from "../controllers/admin/redis-health"; +import { redisHealthController } from "../controllers/v0/admin/redis-health"; import { checkQueuesController, cleanBefore24hCompleteJobsController, queuesController, -} from "../controllers/admin/queue"; +} from "../controllers/v0/admin/queue"; export const adminRouter = express.Router(); diff --git a/apps/api/src/routes/v0.ts b/apps/api/src/routes/v0.ts index 9c68d9bb..3a7bda65 100644 --- a/apps/api/src/routes/v0.ts +++ b/apps/api/src/routes/v0.ts @@ -1,14 +1,14 @@ import express from "express"; -import { crawlController } from "../../src/controllers/crawl"; -import { crawlStatusController } from "../../src/controllers/crawl-status"; -import { scrapeController } from "../../src/controllers/scrape"; -import { crawlPreviewController } from "../../src/controllers/crawlPreview"; -import { crawlJobStatusPreviewController } from "../../src/controllers/status"; -import { searchController } from "../../src/controllers/search"; -import { crawlCancelController } from "../../src/controllers/crawl-cancel"; -import { keyAuthController } from "../../src/controllers/keyAuth"; -import { livenessController } from "../controllers/liveness"; -import { readinessController } from "../controllers/readiness"; +import { crawlController } from "../../src/controllers/v0/crawl"; +import { crawlStatusController } from "../../src/controllers/v0/crawl-status"; +import { scrapeController } from "../../src/controllers/v0/scrape"; +import { crawlPreviewController } from "../../src/controllers/v0/crawlPreview"; +import { crawlJobStatusPreviewController } from "../../src/controllers/v0/status"; +import { searchController } from "../../src/controllers/v0/search"; +import { crawlCancelController } from "../../src/controllers/v0/crawl-cancel"; +import { keyAuthController } from "../../src/controllers/v0/keyAuth"; +import { livenessController } from "../controllers/v0/liveness"; +import { readinessController } from "../controllers/v0/readiness"; export const v0Router = express.Router();