diff --git a/apps/api/.env.example b/apps/api/.env.example index 5d8e746d..08ff7d7f 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -57,3 +57,14 @@ SELF_HOSTED_WEBHOOK_URL= # Resend API Key for transactional emails RESEND_API_KEY= + +# LOGGING_LEVEL determines the verbosity of logs that the system will output. +# Available levels are: +# NONE - No logs will be output. +# ERROR - For logging error messages that indicate a failure in a specific operation. +# WARN - For logging potentially harmful situations that are not necessarily errors. +# INFO - For logging informational messages that highlight the progress of the application. +# DEBUG - For logging detailed information on the flow through the system, primarily used for debugging. +# TRACE - For logging more detailed information than the DEBUG level. +# Set LOGGING_LEVEL to one of the above options to control logging output. +LOGGING_LEVEL=INFO diff --git a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts index 3e324d39..019bc968 100644 --- a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts @@ -858,7 +858,6 @@ describe("E2E Tests for API Routes", () => { await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again } } - console.log(crawlData) expect(crawlData.length).toBeGreaterThan(0); expect(crawlData).toEqual(expect.arrayContaining([ expect.objectContaining({ url: expect.stringContaining("https://firecrawl.dev/?ref=mendable+banner") }), diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 56a5ec61..aab3d188 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -6,6 +6,7 @@ import { withAuth } from "../../src/lib/withAuth"; import { RateLimiterRedis } from "rate-limiter-flexible"; import { setTraceAttributes } from '@hyperdx/node-opentelemetry'; import { sendNotification } from "../services/notification/email_notification"; +import { Logger } from "../lib/logger"; export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise { return withAuth(supaAuthenticateUser)(req, res, mode); @@ -17,7 +18,7 @@ function setTrace(team_id: string, api_key: string) { api_key }); } catch (error) { - console.error('Error setting trace attributes:', error); + Logger.error(`Error setting trace attributes: ${error.message}`); } } @@ -82,7 +83,7 @@ export async function supaAuthenticateUser( // $$ language plpgsql; if (error) { - console.error('Error fetching key and price_id:', error); + Logger.warn(`Error fetching key and price_id: ${error.message}`); } else { // console.log('Key and Price ID:', data); } @@ -135,7 +136,7 @@ export async function supaAuthenticateUser( try { await rateLimiter.consume(team_endpoint_token); } catch (rateLimiterRes) { - console.error(rateLimiterRes); + Logger.error(`Rate limit exceeded: ${rateLimiterRes}`); const secs = Math.round(rateLimiterRes.msBeforeNext / 1000) || 1; const retryDate = new Date(Date.now() + rateLimiterRes.msBeforeNext); diff --git a/apps/api/src/controllers/crawl-cancel.ts b/apps/api/src/controllers/crawl-cancel.ts index ff4b2c58..d0c109ec 100644 --- a/apps/api/src/controllers/crawl-cancel.ts +++ b/apps/api/src/controllers/crawl-cancel.ts @@ -5,6 +5,7 @@ import { addWebScraperJob } from "../../src/services/queue-jobs"; import { getWebScraperQueue } from "../../src/services/queue-service"; import { supabase_service } from "../../src/services/supabase"; import { billTeam } from "../../src/services/billing/credit_billing"; +import { Logger } from "../../src/lib/logger"; export async function crawlCancelController(req: Request, res: Response) { try { @@ -43,7 +44,7 @@ export async function crawlCancelController(req: Request, res: Response) { const { partialDocs } = await job.progress(); if (partialDocs && partialDocs.length > 0 && jobState === "active") { - console.log("Billing team for partial docs..."); + Logger.info("Billing team for partial docs..."); // Note: the credits that we will bill them here might be lower than the actual // due to promises that are not yet resolved await billTeam(team_id, partialDocs.length); @@ -55,7 +56,7 @@ export async function crawlCancelController(req: Request, res: Response) { await job.discard(); await job.moveToFailed(Error("Job cancelled by user"), true); } catch (error) { - console.error(error); + Logger.error(error); } const newJobState = await job.getState(); @@ -64,7 +65,7 @@ export async function crawlCancelController(req: Request, res: Response) { status: "cancelled" }); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } } diff --git a/apps/api/src/controllers/crawl-status.ts b/apps/api/src/controllers/crawl-status.ts index a55003cc..5aafa433 100644 --- a/apps/api/src/controllers/crawl-status.ts +++ b/apps/api/src/controllers/crawl-status.ts @@ -4,6 +4,7 @@ import { RateLimiterMode } from "../../src/types"; import { addWebScraperJob } from "../../src/services/queue-jobs"; import { getWebScraperQueue } from "../../src/services/queue-service"; import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; +import { Logger } from "../../src/lib/logger"; export async function crawlStatusController(req: Request, res: Response) { try { @@ -44,7 +45,7 @@ export async function crawlStatusController(req: Request, res: Response) { partial_data: jobStatus == 'completed' ? [] : partialDocs, }); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } } diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 89358fcc..614a5928 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -10,6 +10,7 @@ import { logCrawl } from "../../src/services/logging/crawl_log"; import { validateIdempotencyKey } from "../../src/services/idempotency/validate"; import { createIdempotencyKey } from "../../src/services/idempotency/create"; import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../src/lib/default-values"; +import { Logger } from "../../src/lib/logger"; export async function crawlController(req: Request, res: Response) { try { @@ -30,7 +31,7 @@ export async function crawlController(req: Request, res: Response) { try { createIdempotencyKey(req); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } } @@ -83,7 +84,7 @@ export async function crawlController(req: Request, res: Response) { documents: docs, }); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } } @@ -101,7 +102,7 @@ export async function crawlController(req: Request, res: Response) { res.json({ jobId: job.id }); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } } diff --git a/apps/api/src/controllers/crawlPreview.ts b/apps/api/src/controllers/crawlPreview.ts index 2c3dc4ea..7c5c804d 100644 --- a/apps/api/src/controllers/crawlPreview.ts +++ b/apps/api/src/controllers/crawlPreview.ts @@ -3,6 +3,7 @@ import { authenticateUser } from "./auth"; import { RateLimiterMode } from "../../src/types"; import { addWebScraperJob } from "../../src/services/queue-jobs"; import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; +import { Logger } from "../../src/lib/logger"; export async function crawlPreviewController(req: Request, res: Response) { try { @@ -39,7 +40,7 @@ export async function crawlPreviewController(req: Request, res: Response) { res.json({ jobId: job.id }); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } } diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index fb57e41d..615f0c0e 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -9,6 +9,7 @@ import { Document } from "../lib/entities"; import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function import { numTokensFromString } from '../lib/LLM-extraction/helpers'; import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values'; +import { Logger } from '../lib/logger'; export async function scrapeHelper( req: Request, @@ -112,7 +113,7 @@ export async function scrapeController(req: Request, res: Response) { return res.status(402).json({ error: "Insufficient credits" }); } } catch (error) { - console.error(error); + Logger.error(error); earlyReturn = true; return res.status(402).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." }); } @@ -188,7 +189,7 @@ export async function scrapeController(req: Request, res: Response) { return res.status(result.returnCode).json(result); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } } diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index 8cb6d55b..adacb766 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -7,6 +7,7 @@ import { logJob } from "../services/logging/log_job"; import { PageOptions, SearchOptions } from "../lib/entities"; import { search } from "../search"; import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; +import { Logger } from "../lib/logger"; export async function searchHelper( req: Request, @@ -155,7 +156,7 @@ export async function searchController(req: Request, res: Response) { return res.status(402).json({ error: "Insufficient credits" }); } } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: "Internal server error" }); } const startTime = new Date().getTime(); @@ -183,7 +184,7 @@ export async function searchController(req: Request, res: Response) { }); return res.status(result.returnCode).json(result); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } } diff --git a/apps/api/src/controllers/status.ts b/apps/api/src/controllers/status.ts index 231885f4..3d7fccbb 100644 --- a/apps/api/src/controllers/status.ts +++ b/apps/api/src/controllers/status.ts @@ -1,6 +1,7 @@ import { Request, Response } from "express"; import { getWebScraperQueue } from "../../src/services/queue-service"; import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; +import { Logger } from "../../src/lib/logger"; export async function crawlJobStatusPreviewController(req: Request, res: Response) { try { @@ -35,7 +36,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons partial_data: jobStatus == 'completed' ? [] : partialDocs, }); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } } diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 88ec4418..98243756 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -12,16 +12,17 @@ import { sendSlackWebhook } from "./services/alerts/slack"; import { checkAlerts } from "./services/alerts"; import Redis from "ioredis"; import { redisRateLimitClient } from "./services/rate-limiter"; +import { Logger } from "./lib/logger"; const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); const { ExpressAdapter } = require("@bull-board/express"); const numCPUs = process.env.ENV === "local" ? 2 : os.cpus().length; -console.log(`Number of CPUs: ${numCPUs} available`); +Logger.info(`Number of CPUs: ${numCPUs} available`); if (cluster.isMaster) { - console.log(`Master ${process.pid} is running`); + Logger.info(`Master ${process.pid} is running`); // Fork workers. for (let i = 0; i < numCPUs; i++) { @@ -30,8 +31,8 @@ if (cluster.isMaster) { cluster.on("exit", (worker, code, signal) => { if (code !== null) { - console.log(`Worker ${worker.process.pid} exited`); - console.log("Starting a new worker"); + Logger.info(`Worker ${worker.process.pid} exited`); + Logger.info("Starting a new worker"); cluster.fork(); } }); @@ -81,14 +82,9 @@ if (cluster.isMaster) { function startServer(port = DEFAULT_PORT) { const server = app.listen(Number(port), HOST, () => { - console.log(`Worker ${process.pid} listening on port ${port}`); - console.log( - `For the UI, open http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues` - ); - console.log(""); - console.log("1. Make sure Redis is running on port 6379 by default"); - console.log( - "2. If you want to run nango, make sure you do port forwarding in 3002 using ngrok http 3002 " + Logger.info(`Worker ${process.pid} listening on port ${port}`); + Logger.info( + `For the Queue UI, open: http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues` ); }); return server; @@ -114,7 +110,7 @@ if (cluster.isMaster) { noActiveJobs, }); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } }); @@ -132,7 +128,7 @@ if (cluster.isMaster) { waitingJobs, }); } catch (error) { - console.error(error); + Logger.error(error); return res.status(500).json({ error: error.message }); } }); @@ -177,13 +173,13 @@ if (cluster.isMaster) { }); if (!response.ok) { - console.error("Failed to send Slack notification"); + Logger.error("Failed to send Slack notification"); } } }, timeout); } } catch (error) { - console.error(error); + Logger.debug(error); } }; @@ -198,7 +194,7 @@ if (cluster.isMaster) { await checkAlerts(); return res.status(200).send("Alerts initialized"); } catch (error) { - console.error("Failed to initialize alerts:", error); + Logger.debug(`Failed to initialize alerts: ${error}`); return res.status(500).send("Failed to initialize alerts"); } } @@ -207,6 +203,7 @@ if (cluster.isMaster) { app.get( `/admin/${process.env.BULL_AUTH_KEY}/clean-before-24h-complete-jobs`, async (req, res) => { + Logger.info("🐂 Cleaning jobs older than 24h"); try { const webScraperQueue = getWebScraperQueue(); const batchSize = 10; @@ -241,12 +238,12 @@ if (cluster.isMaster) { await job.remove(); count++; } catch (jobError) { - console.error(`Failed to remove job with ID ${job.id}:`, jobError); + Logger.error(`🐂 Failed to remove job with ID ${job.id}: ${jobError}` ); } } return res.status(200).send(`Removed ${count} completed jobs.`); } catch (error) { - console.error("Failed to clean last 24h complete jobs:", error); + Logger.error(`🐂 Failed to clean last 24h complete jobs: ${error}`); return res.status(500).send("Failed to clean jobs"); } } @@ -272,7 +269,7 @@ if (cluster.isMaster) { queueRedisHealth = await queueRedis.get(testKey); await queueRedis.del(testKey); } catch (error) { - console.error("queueRedis health check failed:", error); + Logger.error(`queueRedis health check failed: ${error}`); queueRedisHealth = null; } @@ -283,7 +280,7 @@ if (cluster.isMaster) { redisRateLimitHealth = await redisRateLimitClient.get(testKey); await redisRateLimitClient.del(testKey); } catch (error) { - console.error("redisRateLimitClient health check failed:", error); + Logger.error(`redisRateLimitClient health check failed: ${error}`); redisRateLimitHealth = null; } @@ -297,12 +294,12 @@ if (cluster.isMaster) { healthStatus.queueRedis === "healthy" && healthStatus.redisRateLimitClient === "healthy" ) { - console.log("Both Redis instances are healthy"); + Logger.info("Both Redis instances are healthy"); return res .status(200) .json({ status: "healthy", details: healthStatus }); } else { - console.log("Redis instances health check:", healthStatus); + Logger.info(`Redis instances health check: ${JSON.stringify(healthStatus)}`); await sendSlackWebhook( `[REDIS DOWN] Redis instances health check: ${JSON.stringify( healthStatus @@ -314,7 +311,7 @@ if (cluster.isMaster) { .json({ status: "unhealthy", details: healthStatus }); } } catch (error) { - console.error("Redis health check failed:", error); + Logger.error(`Redis health check failed: ${error}`); await sendSlackWebhook( `[REDIS DOWN] Redis instances health check: ${error.message}`, true @@ -326,5 +323,5 @@ if (cluster.isMaster) { } ); - console.log(`Worker ${process.pid} started`); + Logger.info(`Worker ${process.pid} started`); } diff --git a/apps/api/src/lib/LLM-extraction/index.ts b/apps/api/src/lib/LLM-extraction/index.ts index 2156fb3c..85a7e995 100644 --- a/apps/api/src/lib/LLM-extraction/index.ts +++ b/apps/api/src/lib/LLM-extraction/index.ts @@ -4,6 +4,7 @@ const ajv = new Ajv(); // Initialize AJV for JSON schema validation import { generateOpenAICompletions } from "./models"; import { Document, ExtractorOptions } from "../entities"; +import { Logger } from "../logger"; // Generate completion using OpenAI export async function generateCompletions( @@ -44,7 +45,7 @@ export async function generateCompletions( return completionResult; } catch (error) { - console.error(`Error generating completions: ${error}`); + Logger.error(`Error generating completions: ${error}`); throw new Error(`Error generating completions: ${error.message}`); } default: diff --git a/apps/api/src/lib/logger.ts b/apps/api/src/lib/logger.ts new file mode 100644 index 00000000..872dbf51 --- /dev/null +++ b/apps/api/src/lib/logger.ts @@ -0,0 +1,53 @@ +enum LogLevel { + NONE = 'NONE', // No logs will be output. + ERROR = 'ERROR', // For logging error messages that indicate a failure in a specific operation. + WARN = 'WARN', // For logging potentially harmful situations that are not necessarily errors. + INFO = 'INFO', // For logging informational messages that highlight the progress of the application. + DEBUG = 'DEBUG', // For logging detailed information on the flow through the system, primarily used for debugging. + TRACE = 'TRACE' // For logging more detailed information than the DEBUG level. +} +export class Logger { + static colors = { + ERROR: '\x1b[31m%s\x1b[0m', // Red + WARN: '\x1b[33m%s\x1b[0m', // Yellow + INFO: '\x1b[34m%s\x1b[0m', // Blue + DEBUG: '\x1b[36m%s\x1b[0m', // Cyan + TRACE: '\x1b[35m%s\x1b[0m' // Magenta + }; + + static log (message: string, level: LogLevel) { + const logLevel: LogLevel = LogLevel[process.env.LOGGING_LEVEL as keyof typeof LogLevel] || LogLevel.INFO; + const levels = [LogLevel.NONE, LogLevel.ERROR, LogLevel.WARN, LogLevel.INFO, LogLevel.DEBUG, LogLevel.TRACE]; + const currentLevelIndex = levels.indexOf(logLevel); + const messageLevelIndex = levels.indexOf(level); + + if (currentLevelIndex >= messageLevelIndex) { + const color = Logger.colors[level]; + console[level.toLowerCase()](color, `[${new Date().toISOString()}]${level} - ${message}`); + + // if (process.env.USE_DB_AUTH) { + // save to supabase? another place? + // supabase.from('logs').insert({ level: level, message: message, timestamp: new Date().toISOString(), success: boolean }); + // } + } + } + static error(message: string | any) { + Logger.log(message, LogLevel.ERROR); + } + + static warn(message: string) { + Logger.log(message, LogLevel.WARN); + } + + static info(message: string) { + Logger.log(message, LogLevel.INFO); + } + + static debug(message: string) { + Logger.log(message, LogLevel.DEBUG); + } + + static trace(message: string) { + Logger.log(message, LogLevel.TRACE); + } +} diff --git a/apps/api/src/lib/withAuth.ts b/apps/api/src/lib/withAuth.ts index ea5aa4d8..353c144b 100644 --- a/apps/api/src/lib/withAuth.ts +++ b/apps/api/src/lib/withAuth.ts @@ -1,4 +1,5 @@ import { AuthResponse } from "../../src/types"; +import { Logger } from "./logger"; let warningCount = 0; @@ -8,7 +9,7 @@ export function withAuth( return async function (...args: U): Promise { if (process.env.USE_DB_AUTHENTICATION === "false") { if (warningCount < 5) { - console.warn("WARNING - You're bypassing authentication"); + Logger.warn("You're bypassing authentication"); warningCount++; } return { success: true } as T; @@ -16,7 +17,7 @@ export function withAuth( try { return await originalFunction(...args); } catch (error) { - console.error("Error in withAuth function: ", error); + Logger.error(`Error in withAuth function: ${error}`); return { success: false, error: error.message } as T; } } diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 3c98e11e..0a7da035 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -10,6 +10,7 @@ import { DocumentUrl, Progress } from "../lib/entities"; import { billTeam } from "../services/billing/credit_billing"; import { Document } from "../lib/entities"; import { supabase_service } from "../services/supabase"; +import { Logger } from "../lib/logger"; export async function startWebScraperPipeline({ job, @@ -23,6 +24,7 @@ export async function startWebScraperPipeline({ crawlerOptions: job.data.crawlerOptions, pageOptions: job.data.pageOptions, inProgress: (progress) => { + Logger.debug(`🐂 Job in progress ${job.id}`); if (progress.currentDocument) { partialDocs.push(progress.currentDocument); if (partialDocs.length > 50) { @@ -32,9 +34,11 @@ export async function startWebScraperPipeline({ } }, onSuccess: (result) => { + Logger.debug(`🐂 Job completed ${job.id}`); saveJob(job, result); }, onError: (error) => { + Logger.error(`🐂 Job failed ${job.id}`); job.moveToFailed(error); }, team_id: job.data.team_id, @@ -108,7 +112,6 @@ export async function runWebScraper({ // this return doesn't matter too much for the job completion result return { success: true, message: "", docs: filteredDocs }; } catch (error) { - console.error("Error running web scraper", error); onError(error); return { success: false, message: error.message, docs: [] }; } @@ -136,6 +139,6 @@ const saveJob = async (job: Job, result: any) => { } } } catch (error) { - console.error("Failed to update job status:", error); + Logger.error(`🐂 Failed to update job status: ${error}`); } }; diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 640eada0..29819b1c 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -8,6 +8,7 @@ import { scrapSingleUrl } from "./single_url"; import robotsParser from "robots-parser"; import { getURLDepth } from "./utils/maxDepthUtils"; import { axiosTimeout } from "../../../src/lib/timeout"; +import { Logger } from "../../../src/lib/logger"; export class WebCrawler { private initialUrl: string; @@ -116,7 +117,7 @@ export class WebCrawler { const isAllowed = this.robots.isAllowed(link, "FireCrawlAgent") ?? true; // Check if the link is disallowed by robots.txt if (!isAllowed) { - console.log(`Link disallowed by robots.txt: ${link}`); + Logger.debug(`Link disallowed by robots.txt: ${link}`); return false; } @@ -133,15 +134,19 @@ export class WebCrawler { limit: number = 10000, maxDepth: number = 10 ): Promise<{ url: string, html: string }[]> { + + Logger.debug(`Crawler starting with ${this.initialUrl}`); // Fetch and parse robots.txt try { const response = await axios.get(this.robotsTxtUrl, { timeout: axiosTimeout }); this.robots = robotsParser(this.robotsTxtUrl, response.data); + Logger.debug(`Crawler robots.txt fetched with ${this.robotsTxtUrl}`); } catch (error) { - console.log(`Failed to fetch robots.txt from ${this.robotsTxtUrl}`); + Logger.debug(`Failed to fetch robots.txt from ${this.robotsTxtUrl}`); } - if(!crawlerOptions?.ignoreSitemap){ + if (!crawlerOptions?.ignoreSitemap){ + Logger.debug(`Fetching sitemap links from ${this.initialUrl}`); const sitemapLinks = await this.tryFetchSitemapLinks(this.initialUrl); if (sitemapLinks.length > 0) { let filteredLinks = this.filterLinks(sitemapLinks, limit, maxDepth); @@ -175,6 +180,7 @@ export class WebCrawler { inProgress?: (progress: Progress) => void, ): Promise<{ url: string, html: string }[]> { const queue = async.queue(async (task: string, callback) => { + Logger.debug(`Crawling ${task}`); if (this.crawledUrls.size >= Math.min(this.maxCrawledLinks, this.limit)) { if (callback && typeof callback === "function") { callback(); @@ -216,16 +222,18 @@ export class WebCrawler { } }, concurrencyLimit); + Logger.debug(`🐂 Pushing ${urls.length} URLs to the queue`); queue.push( urls.filter( (url) => !this.visited.has(url) && this.robots.isAllowed(url, "FireCrawlAgent") ), (err) => { - if (err) console.error(err); + if (err) Logger.error(`🐂 Error pushing URLs to the queue: ${err}`); } ); await queue.drain(); + Logger.debug(`🐂 Crawled ${this.crawledUrls.size} URLs, Queue drained.`); return Array.from(this.crawledUrls.entries()).map(([url, html]) => ({ url, html })); } @@ -282,7 +290,6 @@ export class WebCrawler { const urlObj = new URL(fullUrl); const path = urlObj.pathname; - if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS if (this.isInternalLink(fullUrl) && this.noSections(fullUrl) && @@ -452,7 +459,7 @@ export class WebCrawler { sitemapLinks = await getLinksFromSitemap({ sitemapUrl }); } } catch (error) { - console.error(`Failed to fetch sitemap with axios from ${sitemapUrl}: ${error}`); + Logger.debug(`Failed to fetch sitemap with axios from ${sitemapUrl}: ${error}`); const response = await getLinksFromSitemap({ sitemapUrl, mode: 'fire-engine' }); if (response) { sitemapLinks = response; @@ -467,7 +474,7 @@ export class WebCrawler { sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap }); } } catch (error) { - console.error(`Failed to fetch sitemap from ${baseUrlSitemap}: ${error}`); + Logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}: ${error}`); sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap, mode: 'fire-engine' }); } } diff --git a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts index f8b2503e..e2f8d8cc 100644 --- a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts +++ b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts @@ -1,10 +1,12 @@ +import { Logger } from "../../../lib/logger"; + export async function handleCustomScraping( text: string, url: string ): Promise<{ scraper: string; url: string; waitAfterLoad?: number, pageOptions?: { scrollXPaths?: string[] } } | null> { // Check for Readme Docs special case if (text.includes(' 0 ? this.getSitemapDataForSingleUrl(this.urls[0], links[0], 1500).catch( (error) => { - console.error("Failed to fetch sitemap data:", error); + Logger.debug(`Failed to fetch sitemap data: ${error}`); return null; } ) @@ -460,7 +461,7 @@ export class WebScraperDataProvider { let documents: Document[] = []; for (const url of urls) { const normalizedUrl = this.normalizeUrl(url); - console.log( + Logger.debug( "Getting cached document for web-scraper-cache:" + normalizedUrl ); const cachedDocumentString = await getValue( diff --git a/apps/api/src/scraper/WebScraper/scrapers/fetch.ts b/apps/api/src/scraper/WebScraper/scrapers/fetch.ts index 4c31438c..c9ddf93a 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fetch.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fetch.ts @@ -2,6 +2,7 @@ import axios from "axios"; import { logScrape } from "../../../services/logging/scrape_log"; import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { universalTimeout } from "../global"; +import { Logger } from "../../../lib/logger"; /** * Scrapes a URL with Axios @@ -34,9 +35,7 @@ export async function scrapWithFetch( }); if (response.status !== 200) { - console.error( - `[Axios] Error fetching url: ${url} with status: ${response.status}` - ); + Logger.debug(`⛏️ Axios: Failed to fetch url: ${url} with status: ${response.status}`); logParams.error_message = response.statusText; logParams.response_code = response.status; return { @@ -63,10 +62,10 @@ export async function scrapWithFetch( } catch (error) { if (error.code === "ECONNABORTED") { logParams.error_message = "Request timed out"; - console.log(`[Axios] Request timed out for ${url}`); + Logger.debug(`⛏️ Axios: Request timed out for ${url}`); } else { logParams.error_message = error.message || error; - console.error(`[Axios] Error fetching url: ${url} -> ${error}`); + Logger.debug(`⛏️ Axios: Failed to fetch url: ${url} | Error: ${error}`); } return { content: "", pageStatusCode: null, pageError: logParams.error_message }; } finally { diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index e547c019..0f4c2320 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -4,6 +4,7 @@ import { logScrape } from "../../../services/logging/scrape_log"; import { generateRequestParams } from "../single_url"; import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { universalTimeout } from "../global"; +import { Logger } from "../../../lib/logger"; /** * Scrapes a URL with Fire-Engine @@ -59,12 +60,10 @@ export async function scrapWithFireEngine({ let engine = engineParam; // do we want fireEngineOptions as first choice? - console.log( - `[Fire-Engine][${engine}] Scraping ${url} with wait: ${waitParam} and screenshot: ${screenshotParam} and method: ${fireEngineOptionsParam?.method ?? "null"}` + Logger.info( + `⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, method: ${fireEngineOptionsParam?.method ?? "null"} }` ); - // console.log(fireEngineOptionsParam) - const response = await axios.post( process.env.FIRE_ENGINE_BETA_URL + endpoint, { @@ -84,15 +83,15 @@ export async function scrapWithFireEngine({ ); if (response.status !== 200) { - console.error( - `[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}` + Logger.debug( + `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}` ); logParams.error_message = response.data?.pageError; logParams.response_code = response.data?.pageStatusCode; if(response.data && response.data?.pageStatusCode !== 200) { - console.error(`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}`); + Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`); } return { @@ -130,10 +129,10 @@ export async function scrapWithFireEngine({ } } catch (error) { if (error.code === "ECONNABORTED") { - console.log(`[Fire-Engine] Request timed out for ${url}`); + Logger.debug(`⛏️ Fire-Engine: Request timed out for ${url}`); logParams.error_message = "Request timed out"; } else { - console.error(`[Fire-Engine][c] Error fetching url: ${url} -> ${error}`); + Logger.debug(`⛏️ Fire-Engine: Failed to fetch url: ${url} | Error: ${error}`); logParams.error_message = error.message || error; } return { html: "", screenshot: "", pageStatusCode: null, pageError: logParams.error_message }; diff --git a/apps/api/src/scraper/WebScraper/scrapers/playwright.ts b/apps/api/src/scraper/WebScraper/scrapers/playwright.ts index 11c3c5ad..4b3180a3 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/playwright.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/playwright.ts @@ -3,6 +3,7 @@ import { logScrape } from "../../../services/logging/scrape_log"; import { generateRequestParams } from "../single_url"; import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { universalTimeout } from "../global"; +import { Logger } from "../../../lib/logger"; /** * Scrapes a URL with Playwright @@ -51,8 +52,8 @@ export async function scrapWithPlaywright( ); if (response.status !== 200) { - console.error( - `[Playwright] Error fetching url: ${url} with status: ${response.status}` + Logger.debug( + `⛏️ Playwright: Failed to fetch url: ${url} | status: ${response.status}, error: ${response.data?.pageError}` ); logParams.error_message = response.data?.pageError; logParams.response_code = response.data?.pageStatusCode; @@ -86,8 +87,8 @@ export async function scrapWithPlaywright( }; } catch (jsonError) { logParams.error_message = jsonError.message || jsonError; - console.error( - `[Playwright] Error parsing JSON response for url: ${url} -> ${jsonError}` + Logger.debug( + `⛏️ Playwright: Error parsing JSON response for url: ${url} | Error: ${jsonError}` ); return { content: "", pageStatusCode: null, pageError: logParams.error_message }; } @@ -95,10 +96,10 @@ export async function scrapWithPlaywright( } catch (error) { if (error.code === "ECONNABORTED") { logParams.error_message = "Request timed out"; - console.log(`[Playwright] Request timed out for ${url}`); + Logger.debug(`⛏️ Playwright: Request timed out for ${url}`); } else { logParams.error_message = error.message || error; - console.error(`[Playwright] Error fetching url: ${url} -> ${error}`); + Logger.debug(`⛏️ Playwright: Failed to fetch url: ${url} | Error: ${error}`); } return { content: "", pageStatusCode: null, pageError: logParams.error_message }; } finally { diff --git a/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts b/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts index 9a1f0b35..554bfe22 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts @@ -3,6 +3,7 @@ import { generateRequestParams } from "../single_url"; import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { universalTimeout } from "../global"; import { ScrapingBeeClient } from "scrapingbee"; +import { Logger } from "../../../lib/logger"; /** * Scrapes a URL with ScrapingBee @@ -56,8 +57,8 @@ export async function scrapWithScrapingBee( text = decoder.decode(response.data); logParams.success = true; } catch (decodeError) { - console.error( - `[ScrapingBee][c] Error decoding response data for url: ${url} -> ${decodeError}` + Logger.debug( + `⛏️ ScrapingBee: Error decoding response data for url: ${url} | Error: ${decodeError}` ); logParams.error_message = decodeError.message || decodeError; } @@ -72,7 +73,7 @@ export async function scrapWithScrapingBee( }; } } catch (error) { - console.error(`[ScrapingBee][c] Error fetching url: ${url} -> ${error}`); + Logger.debug(`⛏️ ScrapingBee: Error fetching url: ${url} | Error: ${error}`); logParams.error_message = error.message || error; logParams.response_code = error.response?.status; return { diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index 8fbd31e4..1e7f1fac 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -17,6 +17,7 @@ import { scrapWithFireEngine } from "./scrapers/fireEngine"; import { scrapWithPlaywright } from "./scrapers/playwright"; import { scrapWithScrapingBee } from "./scrapers/scrapingBee"; import { extractLinks } from "./utils/utils"; +import { Logger } from "../../lib/logger"; dotenv.config(); @@ -48,7 +49,7 @@ export async function generateRequestParams( return defaultParams; } } catch (error) { - console.error(`Error generating URL key: ${error}`); + Logger.error(`Error generating URL key: ${error}`); return defaultParams; } } @@ -154,7 +155,6 @@ export async function scrapSingleUrl( } if (process.env.FIRE_ENGINE_BETA_URL) { - console.log(`Scraping ${url} with Fire Engine`); const response = await scrapWithFireEngine({ url, waitFor: pageOptions.waitFor, @@ -277,7 +277,7 @@ export async function scrapSingleUrl( try { urlKey = new URL(urlToScrap).hostname.replace(/^www\./, ""); } catch (error) { - console.error(`Invalid URL key, trying: ${urlToScrap}`); + Logger.error(`Invalid URL key, trying: ${urlToScrap}`); } const defaultScraper = urlSpecificParams[urlKey]?.defaultScraper ?? ""; const scrapersInOrder = getScrapingFallbackOrder( @@ -311,12 +311,18 @@ export async function scrapSingleUrl( pageError = undefined; } - if (text && text.trim().length >= 100) break; - if (pageStatusCode && pageStatusCode == 404) break; - const nextScraperIndex = scrapersInOrder.indexOf(scraper) + 1; - if (nextScraperIndex < scrapersInOrder.length) { - console.info(`Falling back to ${scrapersInOrder[nextScraperIndex]}`); + if (text && text.trim().length >= 100) { + Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100, breaking`); + break; } + if (pageStatusCode && pageStatusCode == 404) { + Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with status code 404, breaking`); + break; + } + // const nextScraperIndex = scrapersInOrder.indexOf(scraper) + 1; + // if (nextScraperIndex < scrapersInOrder.length) { + // Logger.debug(`⛏️ ${scraper} Failed to fetch URL: ${urlToScrap} with status: ${pageStatusCode}, error: ${pageError} | Falling back to ${scrapersInOrder[nextScraperIndex]}`); + // } } if (!text) { @@ -372,7 +378,7 @@ export async function scrapSingleUrl( return document; } catch (error) { - console.error(`Error: ${error} - Failed to fetch URL: ${urlToScrap}`); + Logger.debug(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`); return { content: "", markdown: "", diff --git a/apps/api/src/scraper/WebScraper/sitemap.ts b/apps/api/src/scraper/WebScraper/sitemap.ts index 1dfbf3a1..b41f3eeb 100644 --- a/apps/api/src/scraper/WebScraper/sitemap.ts +++ b/apps/api/src/scraper/WebScraper/sitemap.ts @@ -3,6 +3,7 @@ import { axiosTimeout } from "../../lib/timeout"; import { parseStringPromise } from "xml2js"; import { scrapWithFireEngine } from "./scrapers/fireEngine"; import { WebCrawler } from "./crawler"; +import { Logger } from "../../lib/logger"; export async function getLinksFromSitemap( { @@ -26,7 +27,7 @@ export async function getLinksFromSitemap( content = response.html; } } catch (error) { - console.error(`Request failed for ${sitemapUrl}: ${error}`); + Logger.error(`Request failed for ${sitemapUrl}: ${error.message}`); return allUrls; } @@ -48,7 +49,7 @@ export async function getLinksFromSitemap( } } } catch (error) { - console.error(`Error processing ${sitemapUrl}: ${error}`); + Logger.debug(`Error processing sitemapUrl: ${sitemapUrl} | Error: ${error.message}`); } return allUrls; diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts index 4449285d..c09cc5b3 100644 --- a/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts +++ b/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts @@ -1,3 +1,4 @@ +import { Logger } from '../../../../lib/logger'; import { isUrlBlocked } from '../blocklist'; describe('isUrlBlocked', () => { @@ -19,7 +20,7 @@ describe('isUrlBlocked', () => { blockedUrls.forEach(url => { if (!isUrlBlocked(url)) { - console.log(`URL not blocked: ${url}`); + Logger.debug(`URL not blocked: ${url}`); } expect(isUrlBlocked(url)).toBe(true); }); diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index 633fd5d0..0bdf9876 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -1,3 +1,5 @@ +import { Logger } from "../../../lib/logger"; + const socialMediaBlocklist = [ 'facebook.com', 'x.com', @@ -59,7 +61,7 @@ export function isUrlBlocked(url: string): boolean { return isBlocked; } catch (e) { // If an error occurs (e.g., invalid URL), return false - console.error(`Error parsing the following URL: ${url}`); + Logger.error(`Error parsing the following URL: ${url}`); return false; } } diff --git a/apps/api/src/scraper/WebScraper/utils/imageDescription.ts b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts index fb8f65d9..318c2340 100644 --- a/apps/api/src/scraper/WebScraper/utils/imageDescription.ts +++ b/apps/api/src/scraper/WebScraper/utils/imageDescription.ts @@ -1,5 +1,6 @@ import Anthropic from '@anthropic-ai/sdk'; import axios from 'axios'; +import { Logger } from '../../../lib/logger'; export async function getImageDescription( imageUrl: string, @@ -82,7 +83,7 @@ export async function getImageDescription( } } } catch (error) { - console.error("Error generating image alt text:", error?.message); + Logger.error(`Error generating image alt text: ${error}`); return ""; } } diff --git a/apps/api/src/scraper/WebScraper/utils/metadata.ts b/apps/api/src/scraper/WebScraper/utils/metadata.ts index 3f2052c0..9496d569 100644 --- a/apps/api/src/scraper/WebScraper/utils/metadata.ts +++ b/apps/api/src/scraper/WebScraper/utils/metadata.ts @@ -1,4 +1,6 @@ import { CheerioAPI } from "cheerio"; +import { Logger } from "../../../lib/logger"; + interface Metadata { title?: string; description?: string; @@ -105,7 +107,7 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata { dctermsCreated = soup('meta[name="dcterms.created"]').attr("content") || null; } catch (error) { - console.error("Error extracting metadata:", error); + Logger.error(`Error extracting metadata: ${error}`); } return { diff --git a/apps/api/src/scraper/WebScraper/utils/pdfProcessor.ts b/apps/api/src/scraper/WebScraper/utils/pdfProcessor.ts index 3e01571e..1a9fa11b 100644 --- a/apps/api/src/scraper/WebScraper/utils/pdfProcessor.ts +++ b/apps/api/src/scraper/WebScraper/utils/pdfProcessor.ts @@ -7,6 +7,7 @@ import pdf from "pdf-parse"; import path from "path"; import os from "os"; import { axiosTimeout } from "../../../lib/timeout"; +import { Logger } from "../../../lib/logger"; dotenv.config(); @@ -39,6 +40,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro let content = ""; if (process.env.LLAMAPARSE_API_KEY && parsePDF) { + Logger.debug("Processing pdf document w/ LlamaIndex"); const apiKey = process.env.LLAMAPARSE_API_KEY; const headers = { Authorization: `Bearer ${apiKey}`, @@ -81,7 +83,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds } } catch (error) { - console.error("Error fetching result w/ LlamaIndex"); + Logger.debug("Error fetching result w/ LlamaIndex"); attempt++; await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds before retrying // You may want to handle specific errors differently @@ -93,7 +95,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro } content = resultResponse.data[resultType]; } catch (error) { - console.error("Error processing pdf document w/ LlamaIndex(2)"); + Logger.debug("Error processing pdf document w/ LlamaIndex(2)"); content = await processPdf(filePath); } } else if (parsePDF) { diff --git a/apps/api/src/scraper/WebScraper/utils/replacePaths.ts b/apps/api/src/scraper/WebScraper/utils/replacePaths.ts index 25b43f0a..f84db63f 100644 --- a/apps/api/src/scraper/WebScraper/utils/replacePaths.ts +++ b/apps/api/src/scraper/WebScraper/utils/replacePaths.ts @@ -1,3 +1,4 @@ +import { Logger } from "../../../lib/logger"; import { Document } from "../../../lib/entities"; export const replacePathsWithAbsolutePaths = (documents: Document[]): Document[] => { @@ -39,7 +40,7 @@ export const replacePathsWithAbsolutePaths = (documents: Document[]): Document[] return documents; } catch (error) { - console.error("Error replacing paths with absolute paths", error); + Logger.debug(`Error replacing paths with absolute paths: ${error}`); return documents; } }; @@ -78,7 +79,7 @@ export const replaceImgPathsWithAbsolutePaths = (documents: Document[]): Documen return documents; } catch (error) { - console.error("Error replacing img paths with absolute paths", error); + Logger.error(`Error replacing img paths with absolute paths: ${error}`); return documents; } }; \ No newline at end of file diff --git a/apps/api/src/scraper/WebScraper/utils/utils.ts b/apps/api/src/scraper/WebScraper/utils/utils.ts index 3aa021a6..dd5906b0 100644 --- a/apps/api/src/scraper/WebScraper/utils/utils.ts +++ b/apps/api/src/scraper/WebScraper/utils/utils.ts @@ -1,5 +1,6 @@ import axios from "axios"; import * as cheerio from "cheerio"; +import { Logger } from "../../../lib/logger"; export async function attemptScrapWithRequests( @@ -9,13 +10,13 @@ export async function attemptScrapWithRequests( const response = await axios.get(urlToScrap, { timeout: 15000 }); if (!response.data) { - console.log("Failed normal requests as well"); + Logger.debug("Failed normal requests as well"); return null; } return response.data; } catch (error) { - console.error(`Error in attemptScrapWithRequests: ${error}`); + Logger.debug(`Error in attemptScrapWithRequests: ${error}`); return null; } } diff --git a/apps/api/src/search/googlesearch.ts b/apps/api/src/search/googlesearch.ts index 6bfa1a39..060f4bd8 100644 --- a/apps/api/src/search/googlesearch.ts +++ b/apps/api/src/search/googlesearch.ts @@ -2,6 +2,7 @@ import axios from 'axios'; import * as cheerio from 'cheerio'; import * as querystring from 'querystring'; import { SearchResult } from '../../src/lib/entities'; +import { Logger } from '../../src/lib/logger'; const _useragent_list = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0', @@ -96,7 +97,7 @@ export async function google_search(term: string, advanced = false, num_results await new Promise(resolve => setTimeout(resolve, sleep_interval * 1000)); } catch (error) { if (error.message === 'Too many requests') { - console.warn('Too many requests, breaking the loop'); + Logger.warn('Too many requests, breaking the loop'); break; } throw error; @@ -107,7 +108,7 @@ export async function google_search(term: string, advanced = false, num_results } } if (attempts >= maxAttempts) { - console.warn('Max attempts reached, breaking the loop'); + Logger.warn('Max attempts reached, breaking the loop'); } return results } diff --git a/apps/api/src/search/index.ts b/apps/api/src/search/index.ts index 88cbf812..f5bc06e3 100644 --- a/apps/api/src/search/index.ts +++ b/apps/api/src/search/index.ts @@ -1,3 +1,4 @@ +import { Logger } from "../../src/lib/logger"; import { SearchResult } from "../../src/lib/entities"; import { google_search } from "./googlesearch"; import { serper_search } from "./serper"; @@ -47,7 +48,7 @@ export async function search({ timeout ); } catch (error) { - console.error("Error in search function: ", error); + Logger.error(`Error in search function: ${error}`); return [] } // if process.env.SERPER_API_KEY is set, use serper diff --git a/apps/api/src/services/alerts/index.ts b/apps/api/src/services/alerts/index.ts index 1cfb5906..88b3c726 100644 --- a/apps/api/src/services/alerts/index.ts +++ b/apps/api/src/services/alerts/index.ts @@ -1,3 +1,4 @@ +import { Logger } from "../../../src/lib/logger"; import { getWebScraperQueue } from "../queue-service"; import { sendSlackWebhook } from "./slack"; @@ -9,13 +10,13 @@ export async function checkAlerts() { process.env.ALERT_NUM_ACTIVE_JOBS && process.env.ALERT_NUM_WAITING_JOBS ) { - console.info("Initializing alerts"); + Logger.info("Initializing alerts"); const checkActiveJobs = async () => { try { const webScraperQueue = getWebScraperQueue(); const activeJobs = await webScraperQueue.getActiveCount(); if (activeJobs > Number(process.env.ALERT_NUM_ACTIVE_JOBS)) { - console.warn( + Logger.warn( `Alert: Number of active jobs is over ${process.env.ALERT_NUM_ACTIVE_JOBS}. Current active jobs: ${activeJobs}.` ); sendSlackWebhook( @@ -23,12 +24,12 @@ export async function checkAlerts() { true ); } else { - console.info( + Logger.info( `Number of active jobs is under ${process.env.ALERT_NUM_ACTIVE_JOBS}. Current active jobs: ${activeJobs}` ); } } catch (error) { - console.error("Failed to check active jobs:", error); + Logger.error(`Failed to check active jobs: ${error}`); } }; @@ -38,7 +39,7 @@ export async function checkAlerts() { const paused = await webScraperQueue.getPausedCount(); if (waitingJobs !== paused && waitingJobs > Number(process.env.ALERT_NUM_WAITING_JOBS)) { - console.warn( + Logger.warn( `Alert: Number of waiting jobs is over ${process.env.ALERT_NUM_WAITING_JOBS}. Current waiting jobs: ${waitingJobs}.` ); sendSlackWebhook( @@ -57,6 +58,6 @@ export async function checkAlerts() { // setInterval(checkAll, 10000); // Run every } } catch (error) { - console.error("Failed to initialize alerts:", error); + Logger.error(`Failed to initialize alerts: ${error}`); } } diff --git a/apps/api/src/services/alerts/slack.ts b/apps/api/src/services/alerts/slack.ts index f65035b1..96bf1c09 100644 --- a/apps/api/src/services/alerts/slack.ts +++ b/apps/api/src/services/alerts/slack.ts @@ -1,4 +1,5 @@ import axios from "axios"; +import { Logger } from "../../../src/lib/logger"; export async function sendSlackWebhook( message: string, @@ -16,8 +17,8 @@ export async function sendSlackWebhook( "Content-Type": "application/json", }, }); - console.log("Webhook sent successfully:", response.data); + Logger.log("Webhook sent successfully:", response.data); } catch (error) { - console.error("Error sending webhook:", error); + Logger.debug(`Error sending webhook: ${error}`); } } diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index 82668111..9369cdbb 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -2,6 +2,7 @@ import { NotificationType } from "../../types"; import { withAuth } from "../../lib/withAuth"; import { sendNotification } from "../notification/email_notification"; import { supabase_service } from "../supabase"; +import { Logger } from "../../lib/logger"; const FREE_CREDITS = 500; @@ -12,7 +13,7 @@ export async function supaBillTeam(team_id: string, credits: number) { if (team_id === "preview") { return { success: true, message: "Preview team, no credits used" }; } - console.log(`Billing team ${team_id} for ${credits} credits`); + Logger.info(`Billing team ${team_id} for ${credits} credits`); // When the API is used, you can log the credit usage in the credit_usage table: // team_id: The ID of the team using the API. // subscription_id: The ID of the team's active subscription. @@ -218,7 +219,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { 0 ); - console.log("totalCreditsUsed", totalCreditsUsed); + Logger.info(`totalCreditsUsed: ${totalCreditsUsed}`); const end = new Date(); end.setDate(end.getDate() + 30); @@ -262,14 +263,14 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { }); if (creditUsageError) { - console.error("Error calculating credit usage:", creditUsageError); + Logger.error(`Error calculating credit usage: ${creditUsageError}`); } if (creditUsages && creditUsages.length > 0) { totalCreditsUsed = creditUsages[0].total_credits_used; } } catch (error) { - console.error("Error calculating credit usage:", error); + Logger.error(`Error calculating credit usage: ${error}`); } // Adjust total credits used by subtracting coupon value diff --git a/apps/api/src/services/idempotency/create.ts b/apps/api/src/services/idempotency/create.ts index ec3e18e7..291e77d9 100644 --- a/apps/api/src/services/idempotency/create.ts +++ b/apps/api/src/services/idempotency/create.ts @@ -1,5 +1,6 @@ import { Request } from "express"; import { supabase_service } from "../supabase"; +import { Logger } from "../../../src/lib/logger"; export async function createIdempotencyKey( req: Request, @@ -14,7 +15,7 @@ export async function createIdempotencyKey( .insert({ key: idempotencyKey }); if (error) { - console.error("Failed to create idempotency key:", error); + Logger.error(`Failed to create idempotency key: ${error}`); throw error; } diff --git a/apps/api/src/services/idempotency/validate.ts b/apps/api/src/services/idempotency/validate.ts index 1ca348bb..4d58a31d 100644 --- a/apps/api/src/services/idempotency/validate.ts +++ b/apps/api/src/services/idempotency/validate.ts @@ -1,6 +1,7 @@ import { Request } from "express"; import { supabase_service } from "../supabase"; import { validate as isUuid } from 'uuid'; +import { Logger } from "../../../src/lib/logger"; export async function validateIdempotencyKey( req: Request, @@ -13,7 +14,7 @@ export async function validateIdempotencyKey( // Ensure idempotencyKey is treated as a string const key = Array.isArray(idempotencyKey) ? idempotencyKey[0] : idempotencyKey; if (!isUuid(key)) { - console.error("Invalid idempotency key provided in the request headers."); + Logger.debug("Invalid idempotency key provided in the request headers."); return false; } @@ -23,7 +24,7 @@ export async function validateIdempotencyKey( .eq("key", idempotencyKey); if (error) { - console.error(error); + Logger.error(`Error validating idempotency key: ${error}`); } if (!data || data.length === 0) { diff --git a/apps/api/src/services/logging/crawl_log.ts b/apps/api/src/services/logging/crawl_log.ts index 1224bd44..68008e02 100644 --- a/apps/api/src/services/logging/crawl_log.ts +++ b/apps/api/src/services/logging/crawl_log.ts @@ -1,4 +1,5 @@ import { supabase_service } from "../supabase"; +import { Logger } from "../../../src/lib/logger"; import "dotenv/config"; export async function logCrawl(job_id: string, team_id: string) { @@ -13,7 +14,7 @@ export async function logCrawl(job_id: string, team_id: string) { }, ]); } catch (error) { - console.error("Error logging crawl job:\n", error); + Logger.error(`Error logging crawl job to supabase:\n${error}`); } } } diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts index 9764493f..93d0b311 100644 --- a/apps/api/src/services/logging/log_job.ts +++ b/apps/api/src/services/logging/log_job.ts @@ -3,6 +3,7 @@ import { supabase_service } from "../supabase"; import { FirecrawlJob } from "../../types"; import { posthog } from "../posthog"; import "dotenv/config"; +import { Logger } from "../../lib/logger"; export async function logJob(job: FirecrawlJob) { try { @@ -68,9 +69,9 @@ export async function logJob(job: FirecrawlJob) { posthog.capture(phLog); } if (error) { - console.error("Error logging job:\n", error); + Logger.error(`Error logging job: ${error.message}`); } } catch (error) { - console.error("Error logging job:\n", error); + Logger.error(`Error logging job: ${error.message}`); } } diff --git a/apps/api/src/services/logging/scrape_log.ts b/apps/api/src/services/logging/scrape_log.ts index 728a67d0..208159da 100644 --- a/apps/api/src/services/logging/scrape_log.ts +++ b/apps/api/src/services/logging/scrape_log.ts @@ -2,15 +2,16 @@ import "dotenv/config"; import { ScrapeLog } from "../../types"; import { supabase_service } from "../supabase"; import { PageOptions } from "../../lib/entities"; +import { Logger } from "../../lib/logger"; export async function logScrape( scrapeLog: ScrapeLog, pageOptions?: PageOptions ) { if (process.env.USE_DB_AUTHENTICATION === "false") { + Logger.debug("Skipping logging scrape to Supabase"); return; } - try { // Only log jobs in production // if (process.env.ENV !== "production") { @@ -43,9 +44,9 @@ export async function logScrape( ]); if (error) { - console.error("Error logging proxy:\n", error); + Logger.error(`Error logging proxy:\n${error}`); } } catch (error) { - console.error("Error logging proxy:\n", error); + Logger.error(`Error logging proxy:\n${error}`); } } diff --git a/apps/api/src/services/logtail.ts b/apps/api/src/services/logtail.ts index 8b86a6b1..d9af3c7a 100644 --- a/apps/api/src/services/logtail.ts +++ b/apps/api/src/services/logtail.ts @@ -1,19 +1,20 @@ import { Logtail } from "@logtail/node"; import "dotenv/config"; +import { Logger } from "../lib/logger"; // A mock Logtail class to handle cases where LOGTAIL_KEY is not provided class MockLogtail { info(message: string, context?: Record): void { - console.log(message, context); + Logger.debug(`${message} - ${context}`); } error(message: string, context: Record = {}): void { - console.error(message, context); + Logger.error(`${message} - ${context}`); } } // Using the actual Logtail class if LOGTAIL_KEY exists, otherwise using the mock class // Additionally, print a warning to the terminal if LOGTAIL_KEY is not provided export const logtail = process.env.LOGTAIL_KEY ? new Logtail(process.env.LOGTAIL_KEY) : (() => { - console.warn("LOGTAIL_KEY is not provided - your events will not be logged. Using MockLogtail as a fallback. see logtail.ts for more."); + Logger.warn("LOGTAIL_KEY is not provided - your events will not be logged. Using MockLogtail as a fallback. see logtail.ts for more."); return new MockLogtail(); })(); diff --git a/apps/api/src/services/notification/email_notification.ts b/apps/api/src/services/notification/email_notification.ts index e5102acd..d7cd3de0 100644 --- a/apps/api/src/services/notification/email_notification.ts +++ b/apps/api/src/services/notification/email_notification.ts @@ -2,6 +2,7 @@ import { supabase_service } from "../supabase"; import { withAuth } from "../../lib/withAuth"; import { Resend } from "resend"; import { NotificationType } from "../../types"; +import { Logger } from "../../../src/lib/logger"; const emailTemplates: Record< NotificationType, @@ -52,11 +53,11 @@ async function sendEmailNotification( }); if (error) { - console.error("Error sending email: ", error); + Logger.debug(`Error sending email: ${error}`); return { success: false }; } } catch (error) { - console.error("Error sending email (2): ", error); + Logger.debug(`Error sending email (2): ${error}`); return { success: false }; } } @@ -79,7 +80,7 @@ export async function sendNotificationInternal( .lte("sent_date", endDateString); if (error) { - console.error("Error fetching notifications: ", error); + Logger.debug(`Error fetching notifications: ${error}`); return { success: false }; } @@ -93,7 +94,7 @@ export async function sendNotificationInternal( .eq("team_id", team_id); if (emailsError) { - console.error("Error fetching emails: ", emailsError); + Logger.debug(`Error fetching emails: ${emailsError}`); return { success: false }; } @@ -112,7 +113,7 @@ export async function sendNotificationInternal( ]); if (insertError) { - console.error("Error inserting notification record: ", insertError); + Logger.debug(`Error inserting notification record: ${insertError}`); return { success: false }; } diff --git a/apps/api/src/services/posthog.ts b/apps/api/src/services/posthog.ts index 5ec16e2e..a7419883 100644 --- a/apps/api/src/services/posthog.ts +++ b/apps/api/src/services/posthog.ts @@ -1,5 +1,6 @@ import { PostHog } from 'posthog-node'; import "dotenv/config"; +import { Logger } from '../../src/lib/logger'; export default function PostHogClient() { const posthogClient = new PostHog(process.env.POSTHOG_API_KEY, { @@ -19,7 +20,7 @@ class MockPostHog { export const posthog = process.env.POSTHOG_API_KEY ? PostHogClient() : (() => { - console.warn( + Logger.warn( "POSTHOG_API_KEY is not provided - your events will not be logged. Using MockPostHog as a fallback. See posthog.ts for more." ); return new MockPostHog(); diff --git a/apps/api/src/services/queue-service.ts b/apps/api/src/services/queue-service.ts index 6c817a4a..d531c2db 100644 --- a/apps/api/src/services/queue-service.ts +++ b/apps/api/src/services/queue-service.ts @@ -1,5 +1,6 @@ import Queue from "bull"; import { Queue as BullQueue } from "bull"; +import { Logger } from "../lib/logger"; let webScraperQueue: BullQueue; @@ -16,7 +17,7 @@ export function getWebScraperQueue() { attempts: 5 } }); - console.log("Web scraper queue created"); + Logger.info("Web scraper queue created"); } return webScraperQueue; } diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index be2a4c70..532c5bc1 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -7,8 +7,9 @@ import { callWebhook } from "./webhook"; import { logJob } from "./logging/log_job"; import { initSDK } from '@hyperdx/node-opentelemetry'; import { Job } from "bull"; +import { Logger } from "../lib/logger"; -if(process.env.ENV === 'production') { +if (process.env.ENV === 'production') { initSDK({ consoleCapture: true, additionalInstrumentations: [], @@ -18,7 +19,7 @@ if(process.env.ENV === 'production') { const wsq = getWebScraperQueue(); async function processJob(job: Job, done) { - console.log("taking job", job.id); + Logger.debug(`🐂 Worker taking job ${job.id}`); try { job.progress({ current: 1, @@ -58,18 +59,18 @@ async function processJob(job: Job, done) { pageOptions: job.data.pageOptions, origin: job.data.origin, }); - console.log("job done", job.id); + Logger.debug(`🐂 Job done ${job.id}`); done(null, data); } catch (error) { - console.log("job errored", job.id, error); + Logger.error(`🐂 Job errored ${job.id} - ${error}`); if (await getWebScraperQueue().isPaused(false)) { - console.log("queue is paused, ignoring"); + Logger.debug("🐂Queue is paused, ignoring"); return; } if (error instanceof CustomError) { // Here we handle the error, then save the failed job - console.error(error.message); // or any other error handling + Logger.error(error.message); // or any other error handling logtail.error("Custom error while ingesting", { job_id: job.id, @@ -77,7 +78,7 @@ async function processJob(job: Job, done) { dataIngestionJob: error.dataIngestionJob, }); } - console.log(error); + Logger.error(error); logtail.error("Overall error ingesting", { job_id: job.id, diff --git a/apps/api/src/services/redis.ts b/apps/api/src/services/redis.ts index b720a330..173d2b78 100644 --- a/apps/api/src/services/redis.ts +++ b/apps/api/src/services/redis.ts @@ -1,14 +1,15 @@ import Redis from "ioredis"; import { redisRateLimitClient } from "./rate-limiter"; +import { Logger } from "../lib/logger"; // Listen to 'error' events to the Redis connection redisRateLimitClient.on("error", (error) => { try { if (error.message === "ECONNRESET") { - console.log("Connection to Redis Session Store timed out."); + Logger.error("Connection to Redis Session Rate Limit Store timed out."); } else if (error.message === "ECONNREFUSED") { - console.log("Connection to Redis Session Store refused!"); - } else console.log(error); + Logger.error("Connection to Redis Session Rate Limit Store refused!"); + } else Logger.error(error); } catch (error) {} }); @@ -16,15 +17,15 @@ redisRateLimitClient.on("error", (error) => { redisRateLimitClient.on("reconnecting", (err) => { try { if (redisRateLimitClient.status === "reconnecting") - console.log("Reconnecting to Redis Session Store..."); - else console.log("Error reconnecting to Redis Session Store."); + Logger.info("Reconnecting to Redis Session Rate Limit Store..."); + else Logger.error("Error reconnecting to Redis Session Rate Limit Store."); } catch (error) {} }); // Listen to the 'connect' event to Redis redisRateLimitClient.on("connect", (err) => { try { - if (!err) console.log("Connected to Redis Session Store!"); + if (!err) Logger.info("Connected to Redis Session Rate Limit Store!"); } catch (error) {} }); diff --git a/apps/api/src/services/supabase.ts b/apps/api/src/services/supabase.ts index fa6404d7..d34f7b52 100644 --- a/apps/api/src/services/supabase.ts +++ b/apps/api/src/services/supabase.ts @@ -1,4 +1,5 @@ import { createClient, SupabaseClient } from "@supabase/supabase-js"; +import { Logger } from "../lib/logger"; // SupabaseService class initializes the Supabase client conditionally based on environment variables. class SupabaseService { @@ -10,13 +11,13 @@ class SupabaseService { // Only initialize the Supabase client if both URL and Service Token are provided. if (process.env.USE_DB_AUTHENTICATION === "false") { // Warn the user that Authentication is disabled by setting the client to null - console.warn( - "\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m" + Logger.warn( + "Authentication is disabled. Supabase client will not be initialized." ); this.client = null; } else if (!supabaseUrl || !supabaseServiceToken) { - console.error( - "\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m" + Logger.error( + "Supabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable" ); } else { this.client = createClient(supabaseUrl, supabaseServiceToken); @@ -35,10 +36,15 @@ export const supabase_service: SupabaseClient = new Proxy( new SupabaseService(), { get: function (target, prop, receiver) { + if (process.env.USE_DB_AUTHENTICATION === "false") { + Logger.debug( + "Attempted to access Supabase client when it's not configured." + ); + } const client = target.getClient(); // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback. if (client === null) { - console.error( + Logger.error( "Attempted to access Supabase client when it's not configured." ); return () => { diff --git a/apps/api/src/services/webhook.ts b/apps/api/src/services/webhook.ts index 18378546..b0222ea3 100644 --- a/apps/api/src/services/webhook.ts +++ b/apps/api/src/services/webhook.ts @@ -1,3 +1,4 @@ +import { Logger } from "../../src/lib/logger"; import { supabase_service } from "./supabase"; export const callWebhook = async (teamId: string, jobId: string,data: any) => { @@ -15,10 +16,7 @@ export const callWebhook = async (teamId: string, jobId: string,data: any) => { .eq("team_id", teamId) .limit(1); if (error) { - console.error( - `Error fetching webhook URL for team ID: ${teamId}`, - error.message - ); + Logger.error(`Error fetching webhook URL for team ID: ${teamId}, error: ${error.message}`); return null; } @@ -53,9 +51,6 @@ export const callWebhook = async (teamId: string, jobId: string,data: any) => { }), }); } catch (error) { - console.error( - `Error sending webhook for team ID: ${teamId}`, - error.message - ); + Logger.debug(`Error sending webhook for team ID: ${teamId}, error: ${error.message}`); } }; diff --git a/apps/test-suite/utils/supabase.ts b/apps/test-suite/utils/supabase.ts index aa19a8c9..dcefa38c 100644 --- a/apps/test-suite/utils/supabase.ts +++ b/apps/test-suite/utils/supabase.ts @@ -1,5 +1,7 @@ import { createClient, SupabaseClient } from "@supabase/supabase-js"; import "dotenv/config"; +import { Logger } from "../../api/src/lib/logger"; + // SupabaseService class initializes the Supabase client conditionally based on environment variables. class SupabaseService { private client: SupabaseClient | null = null; @@ -10,13 +12,13 @@ class SupabaseService { // Only initialize the Supabase client if both URL and Service Token are provided. if (process.env.USE_DB_AUTHENTICATION === "false") { // Warn the user that Authentication is disabled by setting the client to null - console.warn( - "\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m" + Logger.warn( + "Authentication is disabled. Supabase client will not be initialized." ); this.client = null; } else if (!supabaseUrl || !supabaseServiceToken) { - console.error( - "\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m" + Logger.error( + "Supabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable" ); } else { this.client = createClient(supabaseUrl, supabaseServiceToken); @@ -35,10 +37,15 @@ export const supabase_service: SupabaseClient = new Proxy( new SupabaseService(), { get: function (target, prop, receiver) { + if (process.env.USE_DB_AUTHENTICATION === "false") { + Logger.debug( + "Attempted to access Supabase client when it's not configured." + ); + } const client = target.getClient(); // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback. if (client === null) { - console.error( + Logger.error( "Attempted to access Supabase client when it's not configured." ); return () => {