v1 restructure

This commit is contained in:
Gergő Móricz 2024-08-15 21:51:59 +02:00
parent af08ab0b1a
commit 4165de1773
21 changed files with 211 additions and 133 deletions

View File

@ -1,6 +1,6 @@
import { crawlController } from '../crawl' import { crawlController } from '../v0/crawl'
import { Request, Response } from 'express'; import { Request, Response } from 'express';
import { authenticateUser } from '../auth'; // Ensure this import is correct import { authenticateUser } from '../v0/auth'; // Ensure this import is correct
import { createIdempotencyKey } from '../../services/idempotency/create'; import { createIdempotencyKey } from '../../services/idempotency/create';
import { validateIdempotencyKey } from '../../services/idempotency/validate'; import { validateIdempotencyKey } from '../../services/idempotency/validate';
import { v4 as uuidv4 } from 'uuid'; import { v4 as uuidv4 } from 'uuid';

View File

@ -1,9 +1,9 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { Job } from "bullmq"; import { Job } from "bullmq";
import { Logger } from "../../lib/logger"; import { Logger } from "../../../lib/logger";
import { getScrapeQueue } from "../../services/queue-service"; import { getScrapeQueue } from "../../../services/queue-service";
import { checkAlerts } from "../../services/alerts"; import { checkAlerts } from "../../../services/alerts";
export async function cleanBefore24hCompleteJobsController( export async function cleanBefore24hCompleteJobsController(
req: Request, req: Request,

View File

@ -1,7 +1,7 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import Redis from "ioredis"; import Redis from "ioredis";
import { Logger } from "../../lib/logger"; import { Logger } from "../../../lib/logger";
import { redisRateLimitClient } from "../../services/rate-limiter"; import { redisRateLimitClient } from "../../../services/rate-limiter";
export async function redisHealthController(req: Request, res: Response) { export async function redisHealthController(req: Request, res: Response) {
const retryOperation = async (operation, retries = 3) => { const retryOperation = async (operation, retries = 3) => {

View File

@ -1,19 +1,19 @@
import { parseApi } from "../../src/lib/parseApi"; import { parseApi } from "../../../src/lib/parseApi";
import { getRateLimiter } from "../../src/services/rate-limiter"; import { getRateLimiter } from "../../../src/services/rate-limiter";
import { import {
AuthResponse, AuthResponse,
NotificationType, NotificationType,
RateLimiterMode, RateLimiterMode,
} from "../../src/types"; } from "../../../src/types";
import { supabase_service } from "../../src/services/supabase"; import { supabase_service } from "../../../src/services/supabase";
import { withAuth } from "../../src/lib/withAuth"; import { withAuth } from "../../../src/lib/withAuth";
import { RateLimiterRedis } from "rate-limiter-flexible"; import { RateLimiterRedis } from "rate-limiter-flexible";
import { setTraceAttributes } from "@hyperdx/node-opentelemetry"; import { setTraceAttributes } from "@hyperdx/node-opentelemetry";
import { sendNotification } from "../services/notification/email_notification"; import { sendNotification } from "../../services/notification/email_notification";
import { Logger } from "../lib/logger"; import { Logger } from "../../lib/logger";
import { redlock } from "../../src/services/redlock"; import { redlock } from "../../../src/services/redlock";
import { getValue } from "../../src/services/redis"; import { getValue } from "../../../src/services/redis";
import { setValue } from "../../src/services/redis"; import { setValue } from "../../../src/services/redis";
import { validate } from "uuid"; import { validate } from "uuid";
function normalizedApiIsUuid(potentialUuid: string): boolean { function normalizedApiIsUuid(potentialUuid: string): boolean {

View File

@ -1,9 +1,9 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { supabase_service } from "../../src/services/supabase"; import { supabase_service } from "../../../src/services/supabase";
import { Logger } from "../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { getCrawl, saveCrawl } from "../../src/lib/crawl-redis"; import { getCrawl, saveCrawl } from "../../../src/lib/crawl-redis";
export async function crawlCancelController(req: Request, res: Response) { export async function crawlCancelController(req: Request, res: Response) {
try { try {

View File

@ -1,10 +1,10 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { getScrapeQueue } from "../../src/services/queue-service"; import { getScrapeQueue } from "../../../src/services/queue-service";
import { Logger } from "../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
export async function crawlStatusController(req: Request, res: Response) { export async function crawlStatusController(req: Request, res: Response) {
try { try {

View File

@ -1,18 +1,18 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { checkTeamCredits } from "../../src/services/billing/credit_billing"; import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { addScrapeJob } from "../../src/services/queue-jobs"; import { addScrapeJob } from "../../../src/services/queue-jobs";
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
import { logCrawl } from "../../src/services/logging/crawl_log"; import { logCrawl } from "../../../src/services/logging/crawl_log";
import { validateIdempotencyKey } from "../../src/services/idempotency/validate"; import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
import { createIdempotencyKey } from "../../src/services/idempotency/create"; import { createIdempotencyKey } from "../../../src/services/idempotency/create";
import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../src/lib/default-values"; import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from "../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../src/lib/crawl-redis"; import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
import { getScrapeQueue } from "../../src/services/queue-service"; import { getScrapeQueue } from "../../../src/services/queue-service";
import { checkAndUpdateURL } from "../../src/lib/validateUrl"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
export async function crawlController(req: Request, res: Response) { export async function crawlController(req: Request, res: Response) {
try { try {

View File

@ -1,12 +1,12 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from "../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../src/lib/crawl-redis"; import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
import { addScrapeJob } from "../../src/services/queue-jobs"; import { addScrapeJob } from "../../../src/services/queue-jobs";
import { checkAndUpdateURL } from "../../src/lib/validateUrl"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
export async function crawlPreviewController(req: Request, res: Response) { export async function crawlPreviewController(req: Request, res: Response) {
try { try {

View File

@ -1,5 +1,5 @@
import { AuthResponse, RateLimiterMode } from "../types"; import { AuthResponse, RateLimiterMode } from "../../types";
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";

View File

@ -1,17 +1,17 @@
import { ExtractorOptions, PageOptions } from './../lib/entities'; import { ExtractorOptions, PageOptions } from './../../lib/entities';
import { Request, Response } from "express"; import { Request, Response } from "express";
import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../types"; import { RateLimiterMode } from "../../types";
import { logJob } from "../services/logging/log_job"; import { logJob } from "../../services/logging/log_job";
import { Document } from "../lib/entities"; import { Document } from "../../lib/entities";
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
import { numTokensFromString } from '../lib/LLM-extraction/helpers'; import { numTokensFromString } from '../../lib/LLM-extraction/helpers';
import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values'; import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../lib/default-values';
import { addScrapeJob } from '../services/queue-jobs'; import { addScrapeJob } from '../../services/queue-jobs';
import { scrapeQueueEvents } from '../services/queue-service'; import { scrapeQueueEvents } from '../../services/queue-service';
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from '../lib/logger'; import { Logger } from '../../lib/logger';
export async function scrapeHelper( export async function scrapeHelper(
jobId: string, jobId: string,

View File

@ -1,15 +1,15 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { WebScraperDataProvider } from "../scraper/WebScraper"; import { WebScraperDataProvider } from "../../scraper/WebScraper";
import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../types"; import { RateLimiterMode } from "../../types";
import { logJob } from "../services/logging/log_job"; import { logJob } from "../../services/logging/log_job";
import { PageOptions, SearchOptions } from "../lib/entities"; import { PageOptions, SearchOptions } from "../../lib/entities";
import { search } from "../search"; import { search } from "../../search";
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from "../lib/logger"; import { Logger } from "../../lib/logger";
import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service"; import { getScrapeQueue, scrapeQueueEvents } from "../../services/queue-service";
export async function searchHelper( export async function searchHelper(
jobId: string, jobId: string,

View File

@ -1,8 +1,8 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { Logger } from "../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
import { getScrapeQueue } from "../../src/services/queue-service"; import { getScrapeQueue } from "../../../src/services/queue-service";
import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
export async function crawlJobStatusPreviewController(req: Request, res: Response) { export async function crawlJobStatusPreviewController(req: Request, res: Response) {
try { try {

View File

@ -1,9 +1,6 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { addWebScraperJob } from "../../../src/services/queue-jobs";
import { getWebScraperQueue } from "../../../src/services/queue-service";
import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
import { Logger } from "../../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";

View File

@ -1,20 +1,16 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { WebScraperDataProvider } from "../../../src/scraper/WebScraper";
import { billTeam } from "../../../src/services/billing/credit_billing";
import { checkTeamCredits } from "../../../src/services/billing/credit_billing"; import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { addWebScraperJob } from "../../../src/services/queue-jobs";
import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
import { logCrawl } from "../../../src/services/logging/crawl_log";
import { validateIdempotencyKey } from "../../../src/services/idempotency/validate"; import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
import { createIdempotencyKey } from "../../../src/services/idempotency/create"; import { createIdempotencyKey } from "../../../src/services/idempotency/create";
import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from "../../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
import { CrawlRequest, CrawlResponse } from "./types";
export async function crawlController(req: Request, res: Response) { export async function crawlController(req: Request<{}, {}, CrawlRequest>, res: Response<CrawlResponse>) {
// expected req.body // expected req.body
// req.body = { // req.body = {
@ -39,52 +35,57 @@ export async function crawlController(req: Request, res: Response) {
RateLimiterMode.Crawl RateLimiterMode.Crawl
); );
if (!success) { if (!success) {
return res.status(status).json({ error }); return res.status(status).json({ success: false, error });
} }
if (req.headers["x-idempotency-key"]) { if (req.headers["x-idempotency-key"]) {
const isIdempotencyValid = await validateIdempotencyKey(req); const isIdempotencyValid = await validateIdempotencyKey(req);
if (!isIdempotencyValid) { if (!isIdempotencyValid) {
return res.status(409).json({ error: "Idempotency key already used" }); return res.status(409).json({ success: false, error: "Idempotency key already used" });
} }
try { try {
createIdempotencyKey(req); createIdempotencyKey(req);
} catch (error) { } catch (error) {
Logger.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ success: false, error: error.message });
} }
} }
const { success: creditsCheckSuccess, message: creditsCheckMessage } = const { success: creditsCheckSuccess, message: creditsCheckMessage } =
await checkTeamCredits(team_id, 1); await checkTeamCredits(team_id, 1);
if (!creditsCheckSuccess) { if (!creditsCheckSuccess) {
return res.status(402).json({ error: "Insufficient credits" }); return res.status(402).json({ success: false, error: "Insufficient credits" });
} }
let url = req.body.url; let url = req.body.url;
if (!url) { if (!url) {
return res.status(400).json({ error: "Url is required" }); return res.status(400).json({ success: false, error: "Url is required" });
} }
if (isUrlBlocked(url)) { if (isUrlBlocked(url)) {
return res return res
.status(403) .status(403)
.json({ .json({
success: false,
error: error:
"Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
}); });
} }
try { try {
url = checkAndUpdateURL(url); url = checkAndUpdateURL(url).url;
} catch (error) { } catch (error) {
return res.status(400).json({ error: 'Invalid Url' }); return res.status(400).json({ success: false, error: 'Invalid Url' });
} }
// TODO: add job to queue // TODO: add job to queue
const id = uuidv4(); const id = uuidv4();
return res.status(200).json({ jobId: id, url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}` }); return res.status(200).json({
success: true,
id,
url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}`,
});
// const mode = req.body.mode ?? "crawl"; // const mode = req.body.mode ?? "crawl";
@ -134,6 +135,6 @@ export async function crawlController(req: Request, res: Response) {
// res.json({ jobId: job.id }); // res.json({ jobId: job.id });
} catch (error) { } catch (error) {
Logger.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ success: false, error: error.message });
} }
} }

View File

@ -1,29 +1,19 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { WebScraperDataProvider } from "../../../src/scraper/WebScraper";
import { billTeam } from "../../../src/services/billing/credit_billing";
import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { addWebScraperJob } from "../../../src/services/queue-jobs";
import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
import { logCrawl } from "../../../src/services/logging/crawl_log";
import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
import { createIdempotencyKey } from "../../../src/services/idempotency/create";
import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
import { v4 as uuidv4 } from "uuid";
import { Logger } from "../../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
import { MapRequest, MapResponse } from "./types";
export async function mapController(req: Request, res: Response) { export async function mapController(req: Request<{}, MapResponse, MapRequest>, res: Response<MapResponse>) {
// expected req.body // expected req.body
// req.body = { // req.body = {
// url: string // url: string
// ignoreSitemap: true?? // crawlerOptions:
// other crawler options?
// } // }
try { try {
const { success, team_id, error, status } = await authenticateUser( const { success, team_id, error, status } = await authenticateUser(
req, req,
@ -31,7 +21,7 @@ export async function mapController(req: Request, res: Response) {
RateLimiterMode.Crawl RateLimiterMode.Crawl
); );
if (!success) { if (!success) {
return res.status(status).json({ error }); return res.status(status).json({ success: false, error });
} }
// if (req.headers["x-idempotency-key"]) { // if (req.headers["x-idempotency-key"]) {
@ -55,25 +45,26 @@ export async function mapController(req: Request, res: Response) {
let url = req.body.url; let url = req.body.url;
if (!url) { if (!url) {
return res.status(400).json({ error: "Url is required" }); return res.status(400).json({ success: false, error: "Url is required" });
} }
if (isUrlBlocked(url)) { if (isUrlBlocked(url)) {
return res return res
.status(403) .status(403)
.json({ .json({
success: false,
error: error:
"Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
}); });
} }
try { try {
url = checkAndUpdateURL(url); url = checkAndUpdateURL(url).url;
} catch (error) { } catch (error) {
return res.status(400).json({ error: 'Invalid Url' }); return res.status(400).json({ success: false, error: 'Invalid Url' });
} }
return res.status(200).json({ urls: [ "test1", "test2" ] }); return res.status(200).json({ success: true, links: [ "test1", "test2" ] });
// const mode = req.body.mode ?? "crawl"; // const mode = req.body.mode ?? "crawl";
@ -123,6 +114,6 @@ export async function mapController(req: Request, res: Response) {
// res.json({ jobId: job.id }); // res.json({ jobId: job.id });
} catch (error) { } catch (error) {
Logger.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ success: false, error: error.message });
} }
} }

View File

@ -1,19 +1,12 @@
// import { ExtractorOptions, PageOptions } from './../../lib/entities';
import { Request, Response } from "express"; import { Request, Response } from "express";
// import { WebScraperDataProvider } from "../../scraper/WebScraper";
// import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../types"; import { RateLimiterMode } from "../../types";
// import { logJob } from "../../services/logging/log_job";
// import { Document } from "../../lib/entities";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
// import { numTokensFromString } from '../../lib/LLM-extraction/helpers';
// import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../../src/lib/default-values';
// import { v4 as uuidv4 } from "uuid";
import { Logger } from '../../lib/logger'; import { Logger } from '../../lib/logger';
import { checkAndUpdateURL } from '../../lib/validateUrl'; import { checkAndUpdateURL } from '../../lib/validateUrl';
import { ScrapeRequest, ScrapeResponse } from "./types";
export async function scrapeController(req: Request, res: Response) { export async function scrapeController(req: Request<{}, ScrapeResponse, ScrapeRequest>, res: Response<ScrapeResponse>) {
let url = req.body.url; let url = req.body.url;
if (!url) { if (!url) {
return { success: false, error: "Url is required", returnCode: 400 }; return { success: false, error: "Url is required", returnCode: 400 };
@ -24,7 +17,7 @@ export async function scrapeController(req: Request, res: Response) {
} }
try { try {
url = checkAndUpdateURL(url); url = checkAndUpdateURL(url).url;
} catch (error) { } catch (error) {
return { success: false, error: "Invalid URL", returnCode: 400 }; return { success: false, error: "Invalid URL", returnCode: 400 };
} }
@ -53,20 +46,19 @@ export async function scrapeController(req: Request, res: Response) {
RateLimiterMode.Scrape RateLimiterMode.Scrape
); );
if (!success) { if (!success) {
return res.status(status).json({ error }); return res.status(status).json({ success: false, error });
} }
// check credits // check credits
const result = { const result: ScrapeResponse = {
success: true, success: true,
warning: "test", warning: "test",
data: { data: {
markdown: "test", markdown: "test",
content: "test",
html: "test", html: "test",
rawHtml: "test", rawHtml: "test",
linksOnPage: ["test1", "test2"], links: ["test1", "test2"],
screenshot: "test", screenshot: "test",
metadata: { metadata: {
title: "test", title: "test",
@ -174,7 +166,7 @@ export async function scrapeController(req: Request, res: Response) {
// return res.status(result.returnCode).json(result); // return res.status(result.returnCode).json(result);
} catch (error) { } catch (error) {
Logger.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ success: false, error: error.message });
} }
} }

View File

@ -0,0 +1,97 @@
export type Format = "markdown" | "html" | "rawHtml" | "links" | "screenshot" | "screenshot@fullPage";
export type ScrapeRequest = {
url: string;
formats?: Format[];
headers?: { [K: string]: string };
includeTags?: string[];
excludeTags?: string[];
onlyMainContent?: boolean;
timeout?: number;
waitFor?: number;
}
export type CrawlerOptions = {
includePaths?: string[];
excludePaths?: string[];
maxDepth?: number;
limit?: number;
allowBackwardLinks?: boolean; // >> TODO: CHANGE THIS NAME???
allowExternalLinks?: boolean;
ignoreSitemap?: boolean;
};
export type CrawlRequest = {
url: string;
crawlerOptions?: CrawlerOptions;
scrapeOptions?: Exclude<ScrapeRequest, "url">;
};
export type MapRequest = {
url: string;
crawlerOptions?: CrawlerOptions;
};
export type Document = {
markdown?: string,
html?: string,
rawHtml?: string,
links?: string[],
screenshot?: string,
metadata: {
title?: string;
description?: string;
language?: string;
keywords?: string;
robots?: string;
ogTitle?: string;
ogDescription?: string;
ogUrl?: string;
ogImage?: string;
ogAudio?: string;
ogDeterminer?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogVideo?: string;
dcTermsCreated?: string;
dcDateCreated?: string;
dcDate?: string;
dcTermsType?: string;
dcType?: string;
dcTermsAudience?: string;
dcTermsSubject?: string;
dcSubject?: string;
dcDescription?: string;
dcTermsKeywords?: string;
modifiedTime?: string;
publishedTime?: string;
articleTag?: string;
articleSection?: string;
sourceURL?: string;
statusCode?: number;
error?: string;
},
}
export type ErrorResponse = {
success: false;
error: string;
};
export type ScrapeResponse = ErrorResponse | {
success: true;
warning?: string;
data: Document;
};
export type CrawlResponse = ErrorResponse | {
success: true;
id: string;
url: string;
}
export type MapResponse = ErrorResponse | {
success: true;
links: string[];
}

View File

@ -1,10 +1,10 @@
import express from "express"; import express from "express";
import { redisHealthController } from "../controllers/admin/redis-health"; import { redisHealthController } from "../controllers/v0/admin/redis-health";
import { import {
checkQueuesController, checkQueuesController,
cleanBefore24hCompleteJobsController, cleanBefore24hCompleteJobsController,
queuesController, queuesController,
} from "../controllers/admin/queue"; } from "../controllers/v0/admin/queue";
export const adminRouter = express.Router(); export const adminRouter = express.Router();

View File

@ -1,14 +1,14 @@
import express from "express"; import express from "express";
import { crawlController } from "../../src/controllers/crawl"; import { crawlController } from "../../src/controllers/v0/crawl";
import { crawlStatusController } from "../../src/controllers/crawl-status"; import { crawlStatusController } from "../../src/controllers/v0/crawl-status";
import { scrapeController } from "../../src/controllers/scrape"; import { scrapeController } from "../../src/controllers/v0/scrape";
import { crawlPreviewController } from "../../src/controllers/crawlPreview"; import { crawlPreviewController } from "../../src/controllers/v0/crawlPreview";
import { crawlJobStatusPreviewController } from "../../src/controllers/status"; import { crawlJobStatusPreviewController } from "../../src/controllers/v0/status";
import { searchController } from "../../src/controllers/search"; import { searchController } from "../../src/controllers/v0/search";
import { crawlCancelController } from "../../src/controllers/crawl-cancel"; import { crawlCancelController } from "../../src/controllers/v0/crawl-cancel";
import { keyAuthController } from "../../src/controllers/keyAuth"; import { keyAuthController } from "../../src/controllers/v0/keyAuth";
import { livenessController } from "../controllers/liveness"; import { livenessController } from "../controllers/v0/liveness";
import { readinessController } from "../controllers/readiness"; import { readinessController } from "../controllers/v0/readiness";
export const v0Router = express.Router(); export const v0Router = express.Router();