added logger

This commit is contained in:
rafaelsideguide 2024-07-23 17:30:46 -03:00
parent f0b07b509b
commit 6208ecdbc0
25 changed files with 201 additions and 109 deletions

View File

@ -57,3 +57,14 @@ SELF_HOSTED_WEBHOOK_URL=
# Resend API Key for transactional emails # Resend API Key for transactional emails
RESEND_API_KEY= RESEND_API_KEY=
# LOGGING_LEVEL determines the verbosity of logs that the system will output.
# Available levels are:
# NONE - No logs will be output.
# ERROR - For logging error messages that indicate a failure in a specific operation.
# WARN - For logging potentially harmful situations that are not necessarily errors.
# INFO - For logging informational messages that highlight the progress of the application.
# DEBUG - For logging detailed information on the flow through the system, primarily used for debugging.
# TRACE - For logging more detailed information than the DEBUG level.
# Set LOGGING_LEVEL to one of the above options to control logging output.
LOGGING_LEVEL=INFO

View File

@ -858,7 +858,6 @@ describe("E2E Tests for API Routes", () => {
await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
} }
} }
console.log(crawlData)
expect(crawlData.length).toBeGreaterThan(0); expect(crawlData.length).toBeGreaterThan(0);
expect(crawlData).toEqual(expect.arrayContaining([ expect(crawlData).toEqual(expect.arrayContaining([
expect.objectContaining({ url: expect.stringContaining("https://firecrawl.dev/?ref=mendable+banner") }), expect.objectContaining({ url: expect.stringContaining("https://firecrawl.dev/?ref=mendable+banner") }),

View File

@ -6,6 +6,7 @@ import { withAuth } from "../../src/lib/withAuth";
import { RateLimiterRedis } from "rate-limiter-flexible"; import { RateLimiterRedis } from "rate-limiter-flexible";
import { setTraceAttributes } from '@hyperdx/node-opentelemetry'; import { setTraceAttributes } from '@hyperdx/node-opentelemetry';
import { sendNotification } from "../services/notification/email_notification"; import { sendNotification } from "../services/notification/email_notification";
import { Logger } from "../lib/logger";
export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<AuthResponse> { export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<AuthResponse> {
return withAuth(supaAuthenticateUser)(req, res, mode); return withAuth(supaAuthenticateUser)(req, res, mode);
@ -17,7 +18,7 @@ function setTrace(team_id: string, api_key: string) {
api_key api_key
}); });
} catch (error) { } catch (error) {
console.error('Error setting trace attributes:', error); Logger.error(`Error setting trace attributes: ${error.message}`);
} }
} }
@ -82,7 +83,7 @@ export async function supaAuthenticateUser(
// $$ language plpgsql; // $$ language plpgsql;
if (error) { if (error) {
console.error('Error fetching key and price_id:', error); Logger.warn(`Error fetching key and price_id: ${error.message}`);
} else { } else {
// console.log('Key and Price ID:', data); // console.log('Key and Price ID:', data);
} }
@ -135,7 +136,7 @@ export async function supaAuthenticateUser(
try { try {
await rateLimiter.consume(team_endpoint_token); await rateLimiter.consume(team_endpoint_token);
} catch (rateLimiterRes) { } catch (rateLimiterRes) {
console.error(rateLimiterRes); Logger.error(`Rate limit exceeded: ${rateLimiterRes}`);
const secs = Math.round(rateLimiterRes.msBeforeNext / 1000) || 1; const secs = Math.round(rateLimiterRes.msBeforeNext / 1000) || 1;
const retryDate = new Date(Date.now() + rateLimiterRes.msBeforeNext); const retryDate = new Date(Date.now() + rateLimiterRes.msBeforeNext);

View File

@ -5,6 +5,7 @@ import { addWebScraperJob } from "../../src/services/queue-jobs";
import { getWebScraperQueue } from "../../src/services/queue-service"; import { getWebScraperQueue } from "../../src/services/queue-service";
import { supabase_service } from "../../src/services/supabase"; import { supabase_service } from "../../src/services/supabase";
import { billTeam } from "../../src/services/billing/credit_billing"; import { billTeam } from "../../src/services/billing/credit_billing";
import { Logger } from "../../src/lib/logger";
export async function crawlCancelController(req: Request, res: Response) { export async function crawlCancelController(req: Request, res: Response) {
try { try {
@ -43,7 +44,7 @@ export async function crawlCancelController(req: Request, res: Response) {
const { partialDocs } = await job.progress(); const { partialDocs } = await job.progress();
if (partialDocs && partialDocs.length > 0 && jobState === "active") { if (partialDocs && partialDocs.length > 0 && jobState === "active") {
console.log("Billing team for partial docs..."); Logger.info("Billing team for partial docs...");
// Note: the credits that we will bill them here might be lower than the actual // Note: the credits that we will bill them here might be lower than the actual
// due to promises that are not yet resolved // due to promises that are not yet resolved
await billTeam(team_id, partialDocs.length); await billTeam(team_id, partialDocs.length);
@ -55,7 +56,7 @@ export async function crawlCancelController(req: Request, res: Response) {
await job.discard(); await job.discard();
await job.moveToFailed(Error("Job cancelled by user"), true); await job.moveToFailed(Error("Job cancelled by user"), true);
} catch (error) { } catch (error) {
console.error(error); Logger.error(error);
} }
const newJobState = await job.getState(); const newJobState = await job.getState();
@ -64,7 +65,7 @@ export async function crawlCancelController(req: Request, res: Response) {
status: "cancelled" status: "cancelled"
}); });
} catch (error) { } catch (error) {
console.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ error: error.message });
} }
} }

View File

@ -12,16 +12,17 @@ import { sendSlackWebhook } from "./services/alerts/slack";
import { checkAlerts } from "./services/alerts"; import { checkAlerts } from "./services/alerts";
import Redis from "ioredis"; import Redis from "ioredis";
import { redisRateLimitClient } from "./services/rate-limiter"; import { redisRateLimitClient } from "./services/rate-limiter";
import { Logger } from "./lib/logger";
const { createBullBoard } = require("@bull-board/api"); const { createBullBoard } = require("@bull-board/api");
const { BullAdapter } = require("@bull-board/api/bullAdapter"); const { BullAdapter } = require("@bull-board/api/bullAdapter");
const { ExpressAdapter } = require("@bull-board/express"); const { ExpressAdapter } = require("@bull-board/express");
const numCPUs = process.env.ENV === "local" ? 2 : os.cpus().length; const numCPUs = process.env.ENV === "local" ? 2 : os.cpus().length;
console.log(`Number of CPUs: ${numCPUs} available`); Logger.info(`Number of CPUs: ${numCPUs} available`);
if (cluster.isMaster) { if (cluster.isMaster) {
console.log(`Master ${process.pid} is running`); Logger.info(`Master ${process.pid} is running`);
// Fork workers. // Fork workers.
for (let i = 0; i < numCPUs; i++) { for (let i = 0; i < numCPUs; i++) {
@ -30,8 +31,8 @@ if (cluster.isMaster) {
cluster.on("exit", (worker, code, signal) => { cluster.on("exit", (worker, code, signal) => {
if (code !== null) { if (code !== null) {
console.log(`Worker ${worker.process.pid} exited`); Logger.info(`Worker ${worker.process.pid} exited`);
console.log("Starting a new worker"); Logger.info("Starting a new worker");
cluster.fork(); cluster.fork();
} }
}); });
@ -81,15 +82,10 @@ if (cluster.isMaster) {
function startServer(port = DEFAULT_PORT) { function startServer(port = DEFAULT_PORT) {
const server = app.listen(Number(port), HOST, () => { const server = app.listen(Number(port), HOST, () => {
console.log(`Worker ${process.pid} listening on port ${port}`); Logger.info(`Worker ${process.pid} listening on port ${port}`);
console.log( // Logger.info(
`For the UI, open http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues` // `For the UI, open: http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues`
); // );
console.log("");
console.log("1. Make sure Redis is running on port 6379 by default");
console.log(
"2. If you want to run nango, make sure you do port forwarding in 3002 using ngrok http 3002 "
);
}); });
return server; return server;
} }
@ -114,7 +110,7 @@ if (cluster.isMaster) {
noActiveJobs, noActiveJobs,
}); });
} catch (error) { } catch (error) {
console.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ error: error.message });
} }
}); });
@ -132,7 +128,7 @@ if (cluster.isMaster) {
waitingJobs, waitingJobs,
}); });
} catch (error) { } catch (error) {
console.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ error: error.message });
} }
}); });
@ -177,13 +173,13 @@ if (cluster.isMaster) {
}); });
if (!response.ok) { if (!response.ok) {
console.error("Failed to send Slack notification"); Logger.error("Failed to send Slack notification");
} }
} }
}, timeout); }, timeout);
} }
} catch (error) { } catch (error) {
console.error(error); Logger.debug(error);
} }
}; };
@ -198,7 +194,7 @@ if (cluster.isMaster) {
await checkAlerts(); await checkAlerts();
return res.status(200).send("Alerts initialized"); return res.status(200).send("Alerts initialized");
} catch (error) { } catch (error) {
console.error("Failed to initialize alerts:", error); Logger.debug(`Failed to initialize alerts: ${error}`);
return res.status(500).send("Failed to initialize alerts"); return res.status(500).send("Failed to initialize alerts");
} }
} }
@ -207,6 +203,7 @@ if (cluster.isMaster) {
app.get( app.get(
`/admin/${process.env.BULL_AUTH_KEY}/clean-before-24h-complete-jobs`, `/admin/${process.env.BULL_AUTH_KEY}/clean-before-24h-complete-jobs`,
async (req, res) => { async (req, res) => {
Logger.info("🐂 Cleaning jobs older than 24h");
try { try {
const webScraperQueue = getWebScraperQueue(); const webScraperQueue = getWebScraperQueue();
const batchSize = 10; const batchSize = 10;
@ -241,12 +238,12 @@ if (cluster.isMaster) {
await job.remove(); await job.remove();
count++; count++;
} catch (jobError) { } catch (jobError) {
console.error(`Failed to remove job with ID ${job.id}:`, jobError); Logger.error(`🐂 Failed to remove job with ID ${job.id}: ${jobError}` );
} }
} }
return res.status(200).send(`Removed ${count} completed jobs.`); return res.status(200).send(`Removed ${count} completed jobs.`);
} catch (error) { } catch (error) {
console.error("Failed to clean last 24h complete jobs:", error); Logger.error(`🐂 Failed to clean last 24h complete jobs: ${error}`);
return res.status(500).send("Failed to clean jobs"); return res.status(500).send("Failed to clean jobs");
} }
} }
@ -272,7 +269,7 @@ if (cluster.isMaster) {
queueRedisHealth = await queueRedis.get(testKey); queueRedisHealth = await queueRedis.get(testKey);
await queueRedis.del(testKey); await queueRedis.del(testKey);
} catch (error) { } catch (error) {
console.error("queueRedis health check failed:", error); Logger.error(`queueRedis health check failed: ${error}`);
queueRedisHealth = null; queueRedisHealth = null;
} }
@ -283,7 +280,7 @@ if (cluster.isMaster) {
redisRateLimitHealth = await redisRateLimitClient.get(testKey); redisRateLimitHealth = await redisRateLimitClient.get(testKey);
await redisRateLimitClient.del(testKey); await redisRateLimitClient.del(testKey);
} catch (error) { } catch (error) {
console.error("redisRateLimitClient health check failed:", error); Logger.error(`redisRateLimitClient health check failed: ${error}`);
redisRateLimitHealth = null; redisRateLimitHealth = null;
} }
@ -297,12 +294,12 @@ if (cluster.isMaster) {
healthStatus.queueRedis === "healthy" && healthStatus.queueRedis === "healthy" &&
healthStatus.redisRateLimitClient === "healthy" healthStatus.redisRateLimitClient === "healthy"
) { ) {
console.log("Both Redis instances are healthy"); Logger.info("Both Redis instances are healthy");
return res return res
.status(200) .status(200)
.json({ status: "healthy", details: healthStatus }); .json({ status: "healthy", details: healthStatus });
} else { } else {
console.log("Redis instances health check:", healthStatus); Logger.info(`Redis instances health check: ${JSON.stringify(healthStatus)}`);
await sendSlackWebhook( await sendSlackWebhook(
`[REDIS DOWN] Redis instances health check: ${JSON.stringify( `[REDIS DOWN] Redis instances health check: ${JSON.stringify(
healthStatus healthStatus
@ -314,7 +311,7 @@ if (cluster.isMaster) {
.json({ status: "unhealthy", details: healthStatus }); .json({ status: "unhealthy", details: healthStatus });
} }
} catch (error) { } catch (error) {
console.error("Redis health check failed:", error); Logger.error(`Redis health check failed: ${error}`);
await sendSlackWebhook( await sendSlackWebhook(
`[REDIS DOWN] Redis instances health check: ${error.message}`, `[REDIS DOWN] Redis instances health check: ${error.message}`,
true true
@ -326,5 +323,5 @@ if (cluster.isMaster) {
} }
); );
console.log(`Worker ${process.pid} started`); Logger.info(`Worker ${process.pid} started`);
} }

View File

@ -0,0 +1,53 @@
enum LogLevel {
NONE = 'NONE', // No logs will be output.
ERROR = 'ERROR', // For logging error messages that indicate a failure in a specific operation.
WARN = 'WARN', // For logging potentially harmful situations that are not necessarily errors.
INFO = 'INFO', // For logging informational messages that highlight the progress of the application.
DEBUG = 'DEBUG', // For logging detailed information on the flow through the system, primarily used for debugging.
TRACE = 'TRACE' // For logging more detailed information than the DEBUG level.
}
export class Logger {
static colors = {
ERROR: '\x1b[31m%s\x1b[0m', // Red
WARN: '\x1b[33m%s\x1b[0m', // Yellow
INFO: '\x1b[34m%s\x1b[0m', // Blue
DEBUG: '\x1b[36m%s\x1b[0m', // Cyan
TRACE: '\x1b[35m%s\x1b[0m' // Magenta
};
static log (message: string, level: LogLevel) {
const logLevel: LogLevel = LogLevel[process.env.LOGGING_LEVEL as keyof typeof LogLevel] || LogLevel.INFO;
const levels = [LogLevel.NONE, LogLevel.ERROR, LogLevel.WARN, LogLevel.INFO, LogLevel.DEBUG, LogLevel.TRACE];
const currentLevelIndex = levels.indexOf(logLevel);
const messageLevelIndex = levels.indexOf(level);
if (currentLevelIndex >= messageLevelIndex) {
const color = Logger.colors[level];
console[level.toLowerCase()](color, `[${new Date().toISOString()}]${level} - ${message}`);
// if (process.env.USE_DB_AUTH) {
// save to supabase? another place?
// supabase.from('logs').insert({ level: level, message: message, timestamp: new Date().toISOString(), success: boolean });
// }
}
}
static error(message: string | any) {
Logger.log(message, LogLevel.ERROR);
}
static warn(message: string) {
Logger.log(message, LogLevel.WARN);
}
static info(message: string) {
Logger.log(message, LogLevel.INFO);
}
static debug(message: string) {
Logger.log(message, LogLevel.DEBUG);
}
static trace(message: string) {
Logger.log(message, LogLevel.TRACE);
}
}

View File

@ -10,6 +10,7 @@ import { DocumentUrl, Progress } from "../lib/entities";
import { billTeam } from "../services/billing/credit_billing"; import { billTeam } from "../services/billing/credit_billing";
import { Document } from "../lib/entities"; import { Document } from "../lib/entities";
import { supabase_service } from "../services/supabase"; import { supabase_service } from "../services/supabase";
import { Logger } from "../lib/logger";
export async function startWebScraperPipeline({ export async function startWebScraperPipeline({
job, job,
@ -23,6 +24,7 @@ export async function startWebScraperPipeline({
crawlerOptions: job.data.crawlerOptions, crawlerOptions: job.data.crawlerOptions,
pageOptions: job.data.pageOptions, pageOptions: job.data.pageOptions,
inProgress: (progress) => { inProgress: (progress) => {
Logger.debug(`🐂 Job in progress ${job.id}`);
if (progress.currentDocument) { if (progress.currentDocument) {
partialDocs.push(progress.currentDocument); partialDocs.push(progress.currentDocument);
if (partialDocs.length > 50) { if (partialDocs.length > 50) {
@ -32,9 +34,11 @@ export async function startWebScraperPipeline({
} }
}, },
onSuccess: (result) => { onSuccess: (result) => {
Logger.debug(`🐂 Job completed ${job.id}`);
saveJob(job, result); saveJob(job, result);
}, },
onError: (error) => { onError: (error) => {
Logger.error(`🐂 Job failed ${job.id}`);
job.moveToFailed(error); job.moveToFailed(error);
}, },
team_id: job.data.team_id, team_id: job.data.team_id,
@ -108,7 +112,6 @@ export async function runWebScraper({
// this return doesn't matter too much for the job completion result // this return doesn't matter too much for the job completion result
return { success: true, message: "", docs: filteredDocs }; return { success: true, message: "", docs: filteredDocs };
} catch (error) { } catch (error) {
console.error("Error running web scraper", error);
onError(error); onError(error);
return { success: false, message: error.message, docs: [] }; return { success: false, message: error.message, docs: [] };
} }
@ -136,6 +139,6 @@ const saveJob = async (job: Job, result: any) => {
} }
} }
} catch (error) { } catch (error) {
console.error("Failed to update job status:", error); Logger.error(`🐂 Failed to update job status: ${error}`);
} }
}; };

View File

@ -8,6 +8,7 @@ import { scrapSingleUrl } from "./single_url";
import robotsParser from "robots-parser"; import robotsParser from "robots-parser";
import { getURLDepth } from "./utils/maxDepthUtils"; import { getURLDepth } from "./utils/maxDepthUtils";
import { axiosTimeout } from "../../../src/lib/timeout"; import { axiosTimeout } from "../../../src/lib/timeout";
import { Logger } from "../../../src/lib/logger";
export class WebCrawler { export class WebCrawler {
private initialUrl: string; private initialUrl: string;
@ -116,7 +117,7 @@ export class WebCrawler {
const isAllowed = this.robots.isAllowed(link, "FireCrawlAgent") ?? true; const isAllowed = this.robots.isAllowed(link, "FireCrawlAgent") ?? true;
// Check if the link is disallowed by robots.txt // Check if the link is disallowed by robots.txt
if (!isAllowed) { if (!isAllowed) {
console.log(`Link disallowed by robots.txt: ${link}`); Logger.debug(`Link disallowed by robots.txt: ${link}`);
return false; return false;
} }
@ -133,15 +134,19 @@ export class WebCrawler {
limit: number = 10000, limit: number = 10000,
maxDepth: number = 10 maxDepth: number = 10
): Promise<{ url: string, html: string }[]> { ): Promise<{ url: string, html: string }[]> {
Logger.debug(`Crawler starting with ${this.initialUrl}`);
// Fetch and parse robots.txt // Fetch and parse robots.txt
try { try {
const response = await axios.get(this.robotsTxtUrl, { timeout: axiosTimeout }); const response = await axios.get(this.robotsTxtUrl, { timeout: axiosTimeout });
this.robots = robotsParser(this.robotsTxtUrl, response.data); this.robots = robotsParser(this.robotsTxtUrl, response.data);
Logger.debug(`Crawler robots.txt fetched with ${this.robotsTxtUrl}`);
} catch (error) { } catch (error) {
console.log(`Failed to fetch robots.txt from ${this.robotsTxtUrl}`); Logger.debug(`Failed to fetch robots.txt from ${this.robotsTxtUrl}`);
} }
if(!crawlerOptions?.ignoreSitemap){ if (!crawlerOptions?.ignoreSitemap){
Logger.debug(`Fetching sitemap links from ${this.initialUrl}`);
const sitemapLinks = await this.tryFetchSitemapLinks(this.initialUrl); const sitemapLinks = await this.tryFetchSitemapLinks(this.initialUrl);
if (sitemapLinks.length > 0) { if (sitemapLinks.length > 0) {
let filteredLinks = this.filterLinks(sitemapLinks, limit, maxDepth); let filteredLinks = this.filterLinks(sitemapLinks, limit, maxDepth);
@ -175,6 +180,7 @@ export class WebCrawler {
inProgress?: (progress: Progress) => void, inProgress?: (progress: Progress) => void,
): Promise<{ url: string, html: string }[]> { ): Promise<{ url: string, html: string }[]> {
const queue = async.queue(async (task: string, callback) => { const queue = async.queue(async (task: string, callback) => {
Logger.debug(`Crawling ${task}`);
if (this.crawledUrls.size >= Math.min(this.maxCrawledLinks, this.limit)) { if (this.crawledUrls.size >= Math.min(this.maxCrawledLinks, this.limit)) {
if (callback && typeof callback === "function") { if (callback && typeof callback === "function") {
callback(); callback();
@ -216,16 +222,18 @@ export class WebCrawler {
} }
}, concurrencyLimit); }, concurrencyLimit);
Logger.debug(`🐂 Pushing ${urls.length} URLs to the queue`);
queue.push( queue.push(
urls.filter( urls.filter(
(url) => (url) =>
!this.visited.has(url) && this.robots.isAllowed(url, "FireCrawlAgent") !this.visited.has(url) && this.robots.isAllowed(url, "FireCrawlAgent")
), ),
(err) => { (err) => {
if (err) console.error(err); if (err) Logger.error(`🐂 Error pushing URLs to the queue: ${err}`);
} }
); );
await queue.drain(); await queue.drain();
Logger.debug(`🐂 Crawled ${this.crawledUrls.size} URLs, Queue drained.`);
return Array.from(this.crawledUrls.entries()).map(([url, html]) => ({ url, html })); return Array.from(this.crawledUrls.entries()).map(([url, html]) => ({ url, html }));
} }
@ -282,7 +290,6 @@ export class WebCrawler {
const urlObj = new URL(fullUrl); const urlObj = new URL(fullUrl);
const path = urlObj.pathname; const path = urlObj.pathname;
if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS
if (this.isInternalLink(fullUrl) && if (this.isInternalLink(fullUrl) &&
this.noSections(fullUrl) && this.noSections(fullUrl) &&
@ -452,7 +459,7 @@ export class WebCrawler {
sitemapLinks = await getLinksFromSitemap({ sitemapUrl }); sitemapLinks = await getLinksFromSitemap({ sitemapUrl });
} }
} catch (error) { } catch (error) {
console.error(`Failed to fetch sitemap with axios from ${sitemapUrl}: ${error}`); Logger.debug(`Failed to fetch sitemap with axios from ${sitemapUrl}: ${error}`);
const response = await getLinksFromSitemap({ sitemapUrl, mode: 'fire-engine' }); const response = await getLinksFromSitemap({ sitemapUrl, mode: 'fire-engine' });
if (response) { if (response) {
sitemapLinks = response; sitemapLinks = response;
@ -467,7 +474,7 @@ export class WebCrawler {
sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap }); sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap });
} }
} catch (error) { } catch (error) {
console.error(`Failed to fetch sitemap from ${baseUrlSitemap}: ${error}`); Logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}: ${error}`);
sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap, mode: 'fire-engine' }); sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap, mode: 'fire-engine' });
} }
} }

View File

@ -1,10 +1,12 @@
import { Logger } from "../../../lib/logger";
export async function handleCustomScraping( export async function handleCustomScraping(
text: string, text: string,
url: string url: string
): Promise<{ scraper: string; url: string; waitAfterLoad?: number, pageOptions?: { scrollXPaths?: string[] } } | null> { ): Promise<{ scraper: string; url: string; waitAfterLoad?: number, pageOptions?: { scrollXPaths?: string[] } } | null> {
// Check for Readme Docs special case // Check for Readme Docs special case
if (text.includes('<meta name="readme-deploy"')) { if (text.includes('<meta name="readme-deploy"')) {
console.log( Logger.debug(
`Special use case detected for ${url}, using Fire Engine with wait time 1000ms` `Special use case detected for ${url}, using Fire Engine with wait time 1000ms`
); );
return { return {
@ -19,7 +21,7 @@ export async function handleCustomScraping(
// Check for Vanta security portals // Check for Vanta security portals
if (text.includes('<link href="https://static.vanta.com')) { if (text.includes('<link href="https://static.vanta.com')) {
console.log( Logger.debug(
`Vanta link detected for ${url}, using Fire Engine with wait time 3000ms` `Vanta link detected for ${url}, using Fire Engine with wait time 3000ms`
); );
return { return {
@ -34,7 +36,7 @@ export async function handleCustomScraping(
const googleDriveMetaMatch = text.match(googleDriveMetaPattern); const googleDriveMetaMatch = text.match(googleDriveMetaPattern);
if (googleDriveMetaMatch) { if (googleDriveMetaMatch) {
const url = googleDriveMetaMatch[1]; const url = googleDriveMetaMatch[1];
console.log(`Google Drive PDF link detected: ${url}`); Logger.debug(`Google Drive PDF link detected: ${url}`);
const fileIdMatch = url.match(/https:\/\/drive\.google\.com\/file\/d\/([^\/]+)\/view/); const fileIdMatch = url.match(/https:\/\/drive\.google\.com\/file\/d\/([^\/]+)\/view/);
if (fileIdMatch) { if (fileIdMatch) {

View File

@ -19,6 +19,7 @@ import { generateCompletions } from "../../lib/LLM-extraction";
import { getWebScraperQueue } from "../../../src/services/queue-service"; import { getWebScraperQueue } from "../../../src/services/queue-service";
import { fetchAndProcessDocx } from "./utils/docxProcessor"; import { fetchAndProcessDocx } from "./utils/docxProcessor";
import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils"; import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils";
import { Logger } from "../../lib/logger";
export class WebScraperDataProvider { export class WebScraperDataProvider {
private bullJobId: string; private bullJobId: string;
@ -89,14 +90,14 @@ export class WebScraperDataProvider {
const job = await getWebScraperQueue().getJob(this.bullJobId); const job = await getWebScraperQueue().getJob(this.bullJobId);
const jobStatus = await job.getState(); const jobStatus = await job.getState();
if (jobStatus === "failed") { if (jobStatus === "failed") {
console.error( Logger.info(
"Job has failed or has been cancelled by the user. Stopping the job..." "Job has failed or has been cancelled by the user. Stopping the job..."
); );
return [] as Document[]; return [] as Document[];
} }
} }
} catch (error) { } catch (error) {
console.error(error); Logger.error(error.message);
return [] as Document[]; return [] as Document[];
} }
} }
@ -270,7 +271,7 @@ export class WebScraperDataProvider {
this.mode === "single_urls" && links.length > 0 this.mode === "single_urls" && links.length > 0
? this.getSitemapDataForSingleUrl(this.urls[0], links[0], 1500).catch( ? this.getSitemapDataForSingleUrl(this.urls[0], links[0], 1500).catch(
(error) => { (error) => {
console.error("Failed to fetch sitemap data:", error); Logger.debug(`Failed to fetch sitemap data: ${error}`);
return null; return null;
} }
) )
@ -460,7 +461,7 @@ export class WebScraperDataProvider {
let documents: Document[] = []; let documents: Document[] = [];
for (const url of urls) { for (const url of urls) {
const normalizedUrl = this.normalizeUrl(url); const normalizedUrl = this.normalizeUrl(url);
console.log( Logger.debug(
"Getting cached document for web-scraper-cache:" + normalizedUrl "Getting cached document for web-scraper-cache:" + normalizedUrl
); );
const cachedDocumentString = await getValue( const cachedDocumentString = await getValue(

View File

@ -2,6 +2,7 @@ import axios from "axios";
import { logScrape } from "../../../services/logging/scrape_log"; import { logScrape } from "../../../services/logging/scrape_log";
import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global"; import { universalTimeout } from "../global";
import { Logger } from "../../../lib/logger";
/** /**
* Scrapes a URL with Axios * Scrapes a URL with Axios
@ -34,9 +35,7 @@ export async function scrapWithFetch(
}); });
if (response.status !== 200) { if (response.status !== 200) {
console.error( Logger.debug(`⛏️ Axios: Failed to fetch url: ${url} with status: ${response.status}`);
`[Axios] Error fetching url: ${url} with status: ${response.status}`
);
logParams.error_message = response.statusText; logParams.error_message = response.statusText;
logParams.response_code = response.status; logParams.response_code = response.status;
return { return {
@ -63,10 +62,10 @@ export async function scrapWithFetch(
} catch (error) { } catch (error) {
if (error.code === "ECONNABORTED") { if (error.code === "ECONNABORTED") {
logParams.error_message = "Request timed out"; logParams.error_message = "Request timed out";
console.log(`[Axios] Request timed out for ${url}`); Logger.debug(`⛏️ Axios: Request timed out for ${url}`);
} else { } else {
logParams.error_message = error.message || error; logParams.error_message = error.message || error;
console.error(`[Axios] Error fetching url: ${url} -> ${error}`); Logger.debug(`⛏️ Axios: Failed to fetch url: ${url} | Error: ${error}`);
} }
return { content: "", pageStatusCode: null, pageError: logParams.error_message }; return { content: "", pageStatusCode: null, pageError: logParams.error_message };
} finally { } finally {

View File

@ -4,6 +4,7 @@ import { logScrape } from "../../../services/logging/scrape_log";
import { generateRequestParams } from "../single_url"; import { generateRequestParams } from "../single_url";
import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global"; import { universalTimeout } from "../global";
import { Logger } from "../../../lib/logger";
/** /**
* Scrapes a URL with Fire-Engine * Scrapes a URL with Fire-Engine
@ -59,12 +60,10 @@ export async function scrapWithFireEngine({
let engine = engineParam; // do we want fireEngineOptions as first choice? let engine = engineParam; // do we want fireEngineOptions as first choice?
console.log( Logger.info(
`[Fire-Engine][${engine}] Scraping ${url} with wait: ${waitParam} and screenshot: ${screenshotParam} and method: ${fireEngineOptionsParam?.method ?? "null"}` `⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
); );
// console.log(fireEngineOptionsParam)
const response = await axios.post( const response = await axios.post(
process.env.FIRE_ENGINE_BETA_URL + endpoint, process.env.FIRE_ENGINE_BETA_URL + endpoint,
{ {
@ -84,15 +83,15 @@ export async function scrapWithFireEngine({
); );
if (response.status !== 200) { if (response.status !== 200) {
console.error( Logger.debug(
`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}` `⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`
); );
logParams.error_message = response.data?.pageError; logParams.error_message = response.data?.pageError;
logParams.response_code = response.data?.pageStatusCode; logParams.response_code = response.data?.pageStatusCode;
if(response.data && response.data?.pageStatusCode !== 200) { if(response.data && response.data?.pageStatusCode !== 200) {
console.error(`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}`); Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`);
} }
return { return {
@ -130,10 +129,10 @@ export async function scrapWithFireEngine({
} }
} catch (error) { } catch (error) {
if (error.code === "ECONNABORTED") { if (error.code === "ECONNABORTED") {
console.log(`[Fire-Engine] Request timed out for ${url}`); Logger.debug(`⛏️ Fire-Engine: Request timed out for ${url}`);
logParams.error_message = "Request timed out"; logParams.error_message = "Request timed out";
} else { } else {
console.error(`[Fire-Engine][c] Error fetching url: ${url} -> ${error}`); Logger.debug(`⛏️ Fire-Engine: Failed to fetch url: ${url} | Error: ${error}`);
logParams.error_message = error.message || error; logParams.error_message = error.message || error;
} }
return { html: "", screenshot: "", pageStatusCode: null, pageError: logParams.error_message }; return { html: "", screenshot: "", pageStatusCode: null, pageError: logParams.error_message };

View File

@ -3,6 +3,7 @@ import { logScrape } from "../../../services/logging/scrape_log";
import { generateRequestParams } from "../single_url"; import { generateRequestParams } from "../single_url";
import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global"; import { universalTimeout } from "../global";
import { Logger } from "../../../lib/logger";
/** /**
* Scrapes a URL with Playwright * Scrapes a URL with Playwright
@ -51,8 +52,8 @@ export async function scrapWithPlaywright(
); );
if (response.status !== 200) { if (response.status !== 200) {
console.error( Logger.debug(
`[Playwright] Error fetching url: ${url} with status: ${response.status}` `⛏️ Playwright: Failed to fetch url: ${url} | status: ${response.status}, error: ${response.data?.pageError}`
); );
logParams.error_message = response.data?.pageError; logParams.error_message = response.data?.pageError;
logParams.response_code = response.data?.pageStatusCode; logParams.response_code = response.data?.pageStatusCode;
@ -86,8 +87,8 @@ export async function scrapWithPlaywright(
}; };
} catch (jsonError) { } catch (jsonError) {
logParams.error_message = jsonError.message || jsonError; logParams.error_message = jsonError.message || jsonError;
console.error( Logger.debug(
`[Playwright] Error parsing JSON response for url: ${url} -> ${jsonError}` `⛏️ Playwright: Error parsing JSON response for url: ${url} | Error: ${jsonError}`
); );
return { content: "", pageStatusCode: null, pageError: logParams.error_message }; return { content: "", pageStatusCode: null, pageError: logParams.error_message };
} }
@ -95,10 +96,10 @@ export async function scrapWithPlaywright(
} catch (error) { } catch (error) {
if (error.code === "ECONNABORTED") { if (error.code === "ECONNABORTED") {
logParams.error_message = "Request timed out"; logParams.error_message = "Request timed out";
console.log(`[Playwright] Request timed out for ${url}`); Logger.debug(`⛏️ Playwright: Request timed out for ${url}`);
} else { } else {
logParams.error_message = error.message || error; logParams.error_message = error.message || error;
console.error(`[Playwright] Error fetching url: ${url} -> ${error}`); Logger.debug(`⛏️ Playwright: Failed to fetch url: ${url} | Error: ${error}`);
} }
return { content: "", pageStatusCode: null, pageError: logParams.error_message }; return { content: "", pageStatusCode: null, pageError: logParams.error_message };
} finally { } finally {

View File

@ -3,6 +3,7 @@ import { generateRequestParams } from "../single_url";
import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global"; import { universalTimeout } from "../global";
import { ScrapingBeeClient } from "scrapingbee"; import { ScrapingBeeClient } from "scrapingbee";
import { Logger } from "../../../lib/logger";
/** /**
* Scrapes a URL with ScrapingBee * Scrapes a URL with ScrapingBee
@ -56,8 +57,8 @@ export async function scrapWithScrapingBee(
text = decoder.decode(response.data); text = decoder.decode(response.data);
logParams.success = true; logParams.success = true;
} catch (decodeError) { } catch (decodeError) {
console.error( Logger.debug(
`[ScrapingBee][c] Error decoding response data for url: ${url} -> ${decodeError}` `⛏️ ScrapingBee: Error decoding response data for url: ${url} | Error: ${decodeError}`
); );
logParams.error_message = decodeError.message || decodeError; logParams.error_message = decodeError.message || decodeError;
} }
@ -72,7 +73,7 @@ export async function scrapWithScrapingBee(
}; };
} }
} catch (error) { } catch (error) {
console.error(`[ScrapingBee][c] Error fetching url: ${url} -> ${error}`); Logger.debug(`⛏️ ScrapingBee: Error fetching url: ${url} | Error: ${error}`);
logParams.error_message = error.message || error; logParams.error_message = error.message || error;
logParams.response_code = error.response?.status; logParams.response_code = error.response?.status;
return { return {

View File

@ -17,6 +17,7 @@ import { scrapWithFireEngine } from "./scrapers/fireEngine";
import { scrapWithPlaywright } from "./scrapers/playwright"; import { scrapWithPlaywright } from "./scrapers/playwright";
import { scrapWithScrapingBee } from "./scrapers/scrapingBee"; import { scrapWithScrapingBee } from "./scrapers/scrapingBee";
import { extractLinks } from "./utils/utils"; import { extractLinks } from "./utils/utils";
import { Logger } from "../../lib/logger";
dotenv.config(); dotenv.config();
@ -48,7 +49,7 @@ export async function generateRequestParams(
return defaultParams; return defaultParams;
} }
} catch (error) { } catch (error) {
console.error(`Error generating URL key: ${error}`); Logger.error(`Error generating URL key: ${error}`);
return defaultParams; return defaultParams;
} }
} }
@ -154,7 +155,6 @@ export async function scrapSingleUrl(
} }
if (process.env.FIRE_ENGINE_BETA_URL) { if (process.env.FIRE_ENGINE_BETA_URL) {
console.log(`Scraping ${url} with Fire Engine`);
const response = await scrapWithFireEngine({ const response = await scrapWithFireEngine({
url, url,
waitFor: pageOptions.waitFor, waitFor: pageOptions.waitFor,
@ -277,7 +277,7 @@ export async function scrapSingleUrl(
try { try {
urlKey = new URL(urlToScrap).hostname.replace(/^www\./, ""); urlKey = new URL(urlToScrap).hostname.replace(/^www\./, "");
} catch (error) { } catch (error) {
console.error(`Invalid URL key, trying: ${urlToScrap}`); Logger.error(`Invalid URL key, trying: ${urlToScrap}`);
} }
const defaultScraper = urlSpecificParams[urlKey]?.defaultScraper ?? ""; const defaultScraper = urlSpecificParams[urlKey]?.defaultScraper ?? "";
const scrapersInOrder = getScrapingFallbackOrder( const scrapersInOrder = getScrapingFallbackOrder(
@ -311,12 +311,18 @@ export async function scrapSingleUrl(
pageError = undefined; pageError = undefined;
} }
if (text && text.trim().length >= 100) break; if (text && text.trim().length >= 100) {
if (pageStatusCode && pageStatusCode == 404) break; Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100, breaking`);
const nextScraperIndex = scrapersInOrder.indexOf(scraper) + 1; break;
if (nextScraperIndex < scrapersInOrder.length) {
console.info(`Falling back to ${scrapersInOrder[nextScraperIndex]}`);
} }
if (pageStatusCode && pageStatusCode == 404) {
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with status code 404, breaking`);
break;
}
// const nextScraperIndex = scrapersInOrder.indexOf(scraper) + 1;
// if (nextScraperIndex < scrapersInOrder.length) {
// Logger.debug(`⛏️ ${scraper} Failed to fetch URL: ${urlToScrap} with status: ${pageStatusCode}, error: ${pageError} | Falling back to ${scrapersInOrder[nextScraperIndex]}`);
// }
} }
if (!text) { if (!text) {
@ -372,7 +378,7 @@ export async function scrapSingleUrl(
return document; return document;
} catch (error) { } catch (error) {
console.error(`Error: ${error} - Failed to fetch URL: ${urlToScrap}`); Logger.debug(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
return { return {
content: "", content: "",
markdown: "", markdown: "",

View File

@ -3,6 +3,7 @@ import { axiosTimeout } from "../../lib/timeout";
import { parseStringPromise } from "xml2js"; import { parseStringPromise } from "xml2js";
import { scrapWithFireEngine } from "./scrapers/fireEngine"; import { scrapWithFireEngine } from "./scrapers/fireEngine";
import { WebCrawler } from "./crawler"; import { WebCrawler } from "./crawler";
import { Logger } from "../../lib/logger";
export async function getLinksFromSitemap( export async function getLinksFromSitemap(
{ {
@ -26,7 +27,7 @@ export async function getLinksFromSitemap(
content = response.html; content = response.html;
} }
} catch (error) { } catch (error) {
console.error(`Request failed for ${sitemapUrl}: ${error}`); Logger.error(`Request failed for ${sitemapUrl}: ${error.message}`);
return allUrls; return allUrls;
} }
@ -48,7 +49,7 @@ export async function getLinksFromSitemap(
} }
} }
} catch (error) { } catch (error) {
console.error(`Error processing ${sitemapUrl}: ${error}`); Logger.debug(`Error processing sitemapUrl: ${sitemapUrl} | Error: ${error.message}`);
} }
return allUrls; return allUrls;

View File

@ -1,3 +1,4 @@
import { Logger } from '../../../../lib/logger';
import { isUrlBlocked } from '../blocklist'; import { isUrlBlocked } from '../blocklist';
describe('isUrlBlocked', () => { describe('isUrlBlocked', () => {
@ -19,7 +20,7 @@ describe('isUrlBlocked', () => {
blockedUrls.forEach(url => { blockedUrls.forEach(url => {
if (!isUrlBlocked(url)) { if (!isUrlBlocked(url)) {
console.log(`URL not blocked: ${url}`); Logger.debug(`URL not blocked: ${url}`);
} }
expect(isUrlBlocked(url)).toBe(true); expect(isUrlBlocked(url)).toBe(true);
}); });

View File

@ -7,6 +7,7 @@ import pdf from "pdf-parse";
import path from "path"; import path from "path";
import os from "os"; import os from "os";
import { axiosTimeout } from "../../../lib/timeout"; import { axiosTimeout } from "../../../lib/timeout";
import { Logger } from "../../../lib/logger";
dotenv.config(); dotenv.config();
@ -39,6 +40,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
let content = ""; let content = "";
if (process.env.LLAMAPARSE_API_KEY && parsePDF) { if (process.env.LLAMAPARSE_API_KEY && parsePDF) {
Logger.debug("Processing pdf document w/ LlamaIndex");
const apiKey = process.env.LLAMAPARSE_API_KEY; const apiKey = process.env.LLAMAPARSE_API_KEY;
const headers = { const headers = {
Authorization: `Bearer ${apiKey}`, Authorization: `Bearer ${apiKey}`,
@ -81,7 +83,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds
} }
} catch (error) { } catch (error) {
console.error("Error fetching result w/ LlamaIndex"); Logger.debug("Error fetching result w/ LlamaIndex");
attempt++; attempt++;
await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds before retrying await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds before retrying
// You may want to handle specific errors differently // You may want to handle specific errors differently
@ -93,7 +95,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
} }
content = resultResponse.data[resultType]; content = resultResponse.data[resultType];
} catch (error) { } catch (error) {
console.error("Error processing pdf document w/ LlamaIndex(2)"); Logger.debug("Error processing pdf document w/ LlamaIndex(2)");
content = await processPdf(filePath); content = await processPdf(filePath);
} }
} else if (parsePDF) { } else if (parsePDF) {

View File

@ -2,6 +2,7 @@ import { NotificationType } from "../../types";
import { withAuth } from "../../lib/withAuth"; import { withAuth } from "../../lib/withAuth";
import { sendNotification } from "../notification/email_notification"; import { sendNotification } from "../notification/email_notification";
import { supabase_service } from "../supabase"; import { supabase_service } from "../supabase";
import { Logger } from "../../lib/logger";
const FREE_CREDITS = 500; const FREE_CREDITS = 500;
@ -12,7 +13,7 @@ export async function supaBillTeam(team_id: string, credits: number) {
if (team_id === "preview") { if (team_id === "preview") {
return { success: true, message: "Preview team, no credits used" }; return { success: true, message: "Preview team, no credits used" };
} }
console.log(`Billing team ${team_id} for ${credits} credits`); Logger.info(`Billing team ${team_id} for ${credits} credits`);
// When the API is used, you can log the credit usage in the credit_usage table: // When the API is used, you can log the credit usage in the credit_usage table:
// team_id: The ID of the team using the API. // team_id: The ID of the team using the API.
// subscription_id: The ID of the team's active subscription. // subscription_id: The ID of the team's active subscription.
@ -218,7 +219,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
0 0
); );
console.log("totalCreditsUsed", totalCreditsUsed); Logger.info(`totalCreditsUsed: ${totalCreditsUsed}`);
const end = new Date(); const end = new Date();
end.setDate(end.getDate() + 30); end.setDate(end.getDate() + 30);

View File

@ -3,6 +3,7 @@ import { supabase_service } from "../supabase";
import { FirecrawlJob } from "../../types"; import { FirecrawlJob } from "../../types";
import { posthog } from "../posthog"; import { posthog } from "../posthog";
import "dotenv/config"; import "dotenv/config";
import { Logger } from "../../lib/logger";
export async function logJob(job: FirecrawlJob) { export async function logJob(job: FirecrawlJob) {
try { try {
@ -68,9 +69,9 @@ export async function logJob(job: FirecrawlJob) {
posthog.capture(phLog); posthog.capture(phLog);
} }
if (error) { if (error) {
console.error("Error logging job:\n", error); Logger.error(`Error logging job: ${error.message}`);
} }
} catch (error) { } catch (error) {
console.error("Error logging job:\n", error); Logger.error(`Error logging job: ${error.message}`);
} }
} }

View File

@ -2,6 +2,7 @@ import "dotenv/config";
import { ScrapeLog } from "../../types"; import { ScrapeLog } from "../../types";
import { supabase_service } from "../supabase"; import { supabase_service } from "../supabase";
import { PageOptions } from "../../lib/entities"; import { PageOptions } from "../../lib/entities";
import { Logger } from "../../lib/logger";
export async function logScrape( export async function logScrape(
scrapeLog: ScrapeLog, scrapeLog: ScrapeLog,
@ -39,9 +40,9 @@ export async function logScrape(
]); ]);
if (error) { if (error) {
console.error("Error logging proxy:\n", error); Logger.error(`Error logging proxy:\n${error}`);
} }
} catch (error) { } catch (error) {
console.error("Error logging proxy:\n", error); Logger.error(`Error logging proxy:\n${error}`);
} }
} }

View File

@ -1,19 +1,20 @@
import { Logtail } from "@logtail/node"; import { Logtail } from "@logtail/node";
import "dotenv/config"; import "dotenv/config";
import { Logger } from "../lib/logger";
// A mock Logtail class to handle cases where LOGTAIL_KEY is not provided // A mock Logtail class to handle cases where LOGTAIL_KEY is not provided
class MockLogtail { class MockLogtail {
info(message: string, context?: Record<string, any>): void { info(message: string, context?: Record<string, any>): void {
console.log(message, context); Logger.debug(`${message} - ${context}`);
} }
error(message: string, context: Record<string, any> = {}): void { error(message: string, context: Record<string, any> = {}): void {
console.error(message, context); Logger.error(`${message} - ${context}`);
} }
} }
// Using the actual Logtail class if LOGTAIL_KEY exists, otherwise using the mock class // Using the actual Logtail class if LOGTAIL_KEY exists, otherwise using the mock class
// Additionally, print a warning to the terminal if LOGTAIL_KEY is not provided // Additionally, print a warning to the terminal if LOGTAIL_KEY is not provided
export const logtail = process.env.LOGTAIL_KEY ? new Logtail(process.env.LOGTAIL_KEY) : (() => { export const logtail = process.env.LOGTAIL_KEY ? new Logtail(process.env.LOGTAIL_KEY) : (() => {
console.warn("LOGTAIL_KEY is not provided - your events will not be logged. Using MockLogtail as a fallback. see logtail.ts for more."); Logger.warn("LOGTAIL_KEY is not provided - your events will not be logged. Using MockLogtail as a fallback. see logtail.ts for more.");
return new MockLogtail(); return new MockLogtail();
})(); })();

View File

@ -1,5 +1,6 @@
import Queue from "bull"; import Queue from "bull";
import { Queue as BullQueue } from "bull"; import { Queue as BullQueue } from "bull";
import { Logger } from "../lib/logger";
let webScraperQueue: BullQueue; let webScraperQueue: BullQueue;
@ -16,7 +17,7 @@ export function getWebScraperQueue() {
attempts: 5 attempts: 5
} }
}); });
console.log("Web scraper queue created"); Logger.info("Web scraper queue created");
} }
return webScraperQueue; return webScraperQueue;
} }

View File

@ -7,8 +7,9 @@ import { callWebhook } from "./webhook";
import { logJob } from "./logging/log_job"; import { logJob } from "./logging/log_job";
import { initSDK } from '@hyperdx/node-opentelemetry'; import { initSDK } from '@hyperdx/node-opentelemetry';
import { Job } from "bull"; import { Job } from "bull";
import { Logger } from "../lib/logger";
if(process.env.ENV === 'production') { if (process.env.ENV === 'production') {
initSDK({ initSDK({
consoleCapture: true, consoleCapture: true,
additionalInstrumentations: [], additionalInstrumentations: [],
@ -18,7 +19,7 @@ if(process.env.ENV === 'production') {
const wsq = getWebScraperQueue(); const wsq = getWebScraperQueue();
async function processJob(job: Job, done) { async function processJob(job: Job, done) {
console.log("taking job", job.id); Logger.debug(`🐂 Worker taking job ${job.id}`);
try { try {
job.progress({ job.progress({
current: 1, current: 1,
@ -58,18 +59,18 @@ async function processJob(job: Job, done) {
pageOptions: job.data.pageOptions, pageOptions: job.data.pageOptions,
origin: job.data.origin, origin: job.data.origin,
}); });
console.log("job done", job.id); Logger.debug(`🐂 Job done ${job.id}`);
done(null, data); done(null, data);
} catch (error) { } catch (error) {
console.log("job errored", job.id, error); Logger.error(`🐂 Job errored ${job.id} - ${error}`);
if (await getWebScraperQueue().isPaused(false)) { if (await getWebScraperQueue().isPaused(false)) {
console.log("queue is paused, ignoring"); Logger.debug("🐂Queue is paused, ignoring");
return; return;
} }
if (error instanceof CustomError) { if (error instanceof CustomError) {
// Here we handle the error, then save the failed job // Here we handle the error, then save the failed job
console.error(error.message); // or any other error handling Logger.error(error.message); // or any other error handling
logtail.error("Custom error while ingesting", { logtail.error("Custom error while ingesting", {
job_id: job.id, job_id: job.id,
@ -77,7 +78,7 @@ async function processJob(job: Job, done) {
dataIngestionJob: error.dataIngestionJob, dataIngestionJob: error.dataIngestionJob,
}); });
} }
console.log(error); Logger.error(error);
logtail.error("Overall error ingesting", { logtail.error("Overall error ingesting", {
job_id: job.id, job_id: job.id,

View File

@ -1,14 +1,15 @@
import Redis from "ioredis"; import Redis from "ioredis";
import { redisRateLimitClient } from "./rate-limiter"; import { redisRateLimitClient } from "./rate-limiter";
import { Logger } from "../lib/logger";
// Listen to 'error' events to the Redis connection // Listen to 'error' events to the Redis connection
redisRateLimitClient.on("error", (error) => { redisRateLimitClient.on("error", (error) => {
try { try {
if (error.message === "ECONNRESET") { if (error.message === "ECONNRESET") {
console.log("Connection to Redis Session Store timed out."); Logger.error("Connection to Redis Session Rate Limit Store timed out.");
} else if (error.message === "ECONNREFUSED") { } else if (error.message === "ECONNREFUSED") {
console.log("Connection to Redis Session Store refused!"); Logger.error("Connection to Redis Session Rate Limit Store refused!");
} else console.log(error); } else Logger.error(error);
} catch (error) {} } catch (error) {}
}); });
@ -16,15 +17,15 @@ redisRateLimitClient.on("error", (error) => {
redisRateLimitClient.on("reconnecting", (err) => { redisRateLimitClient.on("reconnecting", (err) => {
try { try {
if (redisRateLimitClient.status === "reconnecting") if (redisRateLimitClient.status === "reconnecting")
console.log("Reconnecting to Redis Session Store..."); Logger.info("Reconnecting to Redis Session Rate Limit Store...");
else console.log("Error reconnecting to Redis Session Store."); else Logger.error("Error reconnecting to Redis Session Rate Limit Store.");
} catch (error) {} } catch (error) {}
}); });
// Listen to the 'connect' event to Redis // Listen to the 'connect' event to Redis
redisRateLimitClient.on("connect", (err) => { redisRateLimitClient.on("connect", (err) => {
try { try {
if (!err) console.log("Connected to Redis Session Store!"); if (!err) Logger.info("Connected to Redis Session Rate Limit Store!");
} catch (error) {} } catch (error) {}
}); });