Merge pull request #450 from mendableai/feat/logger

[wip] Added logger
This commit is contained in:
Nicolas 2024-07-25 14:40:19 -04:00 committed by GitHub
commit a75d6889c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
50 changed files with 294 additions and 170 deletions

View File

@ -57,3 +57,14 @@ SELF_HOSTED_WEBHOOK_URL=
# Resend API Key for transactional emails
RESEND_API_KEY=
# LOGGING_LEVEL determines the verbosity of logs that the system will output.
# Available levels are:
# NONE - No logs will be output.
# ERROR - For logging error messages that indicate a failure in a specific operation.
# WARN - For logging potentially harmful situations that are not necessarily errors.
# INFO - For logging informational messages that highlight the progress of the application.
# DEBUG - For logging detailed information on the flow through the system, primarily used for debugging.
# TRACE - For logging more detailed information than the DEBUG level.
# Set LOGGING_LEVEL to one of the above options to control logging output.
LOGGING_LEVEL=INFO

View File

@ -858,7 +858,6 @@ describe("E2E Tests for API Routes", () => {
await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
}
}
console.log(crawlData)
expect(crawlData.length).toBeGreaterThan(0);
expect(crawlData).toEqual(expect.arrayContaining([
expect.objectContaining({ url: expect.stringContaining("https://firecrawl.dev/?ref=mendable+banner") }),

View File

@ -6,6 +6,7 @@ import { withAuth } from "../../src/lib/withAuth";
import { RateLimiterRedis } from "rate-limiter-flexible";
import { setTraceAttributes } from '@hyperdx/node-opentelemetry';
import { sendNotification } from "../services/notification/email_notification";
import { Logger } from "../lib/logger";
export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<AuthResponse> {
return withAuth(supaAuthenticateUser)(req, res, mode);
@ -17,7 +18,7 @@ function setTrace(team_id: string, api_key: string) {
api_key
});
} catch (error) {
console.error('Error setting trace attributes:', error);
Logger.error(`Error setting trace attributes: ${error.message}`);
}
}
@ -82,7 +83,7 @@ export async function supaAuthenticateUser(
// $$ language plpgsql;
if (error) {
console.error('Error fetching key and price_id:', error);
Logger.warn(`Error fetching key and price_id: ${error.message}`);
} else {
// console.log('Key and Price ID:', data);
}
@ -135,7 +136,7 @@ export async function supaAuthenticateUser(
try {
await rateLimiter.consume(team_endpoint_token);
} catch (rateLimiterRes) {
console.error(rateLimiterRes);
Logger.error(`Rate limit exceeded: ${rateLimiterRes}`);
const secs = Math.round(rateLimiterRes.msBeforeNext / 1000) || 1;
const retryDate = new Date(Date.now() + rateLimiterRes.msBeforeNext);

View File

@ -5,6 +5,7 @@ import { addWebScraperJob } from "../../src/services/queue-jobs";
import { getWebScraperQueue } from "../../src/services/queue-service";
import { supabase_service } from "../../src/services/supabase";
import { billTeam } from "../../src/services/billing/credit_billing";
import { Logger } from "../../src/lib/logger";
export async function crawlCancelController(req: Request, res: Response) {
try {
@ -43,7 +44,7 @@ export async function crawlCancelController(req: Request, res: Response) {
const { partialDocs } = await job.progress();
if (partialDocs && partialDocs.length > 0 && jobState === "active") {
console.log("Billing team for partial docs...");
Logger.info("Billing team for partial docs...");
// Note: the credits that we will bill them here might be lower than the actual
// due to promises that are not yet resolved
await billTeam(team_id, partialDocs.length);
@ -55,7 +56,7 @@ export async function crawlCancelController(req: Request, res: Response) {
await job.discard();
await job.moveToFailed(Error("Job cancelled by user"), true);
} catch (error) {
console.error(error);
Logger.error(error);
}
const newJobState = await job.getState();
@ -64,7 +65,7 @@ export async function crawlCancelController(req: Request, res: Response) {
status: "cancelled"
});
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
}

View File

@ -4,6 +4,7 @@ import { RateLimiterMode } from "../../src/types";
import { addWebScraperJob } from "../../src/services/queue-jobs";
import { getWebScraperQueue } from "../../src/services/queue-service";
import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
import { Logger } from "../../src/lib/logger";
export async function crawlStatusController(req: Request, res: Response) {
try {
@ -44,7 +45,7 @@ export async function crawlStatusController(req: Request, res: Response) {
partial_data: jobStatus == 'completed' ? [] : partialDocs,
});
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
}

View File

@ -10,6 +10,7 @@ import { logCrawl } from "../../src/services/logging/crawl_log";
import { validateIdempotencyKey } from "../../src/services/idempotency/validate";
import { createIdempotencyKey } from "../../src/services/idempotency/create";
import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../src/lib/default-values";
import { Logger } from "../../src/lib/logger";
export async function crawlController(req: Request, res: Response) {
try {
@ -30,7 +31,7 @@ export async function crawlController(req: Request, res: Response) {
try {
createIdempotencyKey(req);
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
}
@ -83,7 +84,7 @@ export async function crawlController(req: Request, res: Response) {
documents: docs,
});
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
}
@ -101,7 +102,7 @@ export async function crawlController(req: Request, res: Response) {
res.json({ jobId: job.id });
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
}

View File

@ -3,6 +3,7 @@ import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types";
import { addWebScraperJob } from "../../src/services/queue-jobs";
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
import { Logger } from "../../src/lib/logger";
export async function crawlPreviewController(req: Request, res: Response) {
try {
@ -39,7 +40,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
res.json({ jobId: job.id });
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
}

View File

@ -9,6 +9,7 @@ import { Document } from "../lib/entities";
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
import { numTokensFromString } from '../lib/LLM-extraction/helpers';
import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values';
import { Logger } from '../lib/logger';
export async function scrapeHelper(
req: Request,
@ -112,7 +113,7 @@ export async function scrapeController(req: Request, res: Response) {
return res.status(402).json({ error: "Insufficient credits" });
}
} catch (error) {
console.error(error);
Logger.error(error);
earlyReturn = true;
return res.status(402).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." });
}
@ -188,7 +189,7 @@ export async function scrapeController(req: Request, res: Response) {
return res.status(result.returnCode).json(result);
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
}

View File

@ -7,6 +7,7 @@ import { logJob } from "../services/logging/log_job";
import { PageOptions, SearchOptions } from "../lib/entities";
import { search } from "../search";
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
import { Logger } from "../lib/logger";
export async function searchHelper(
req: Request,
@ -155,7 +156,7 @@ export async function searchController(req: Request, res: Response) {
return res.status(402).json({ error: "Insufficient credits" });
}
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: "Internal server error" });
}
const startTime = new Date().getTime();
@ -183,7 +184,7 @@ export async function searchController(req: Request, res: Response) {
});
return res.status(result.returnCode).json(result);
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
}

View File

@ -1,6 +1,7 @@
import { Request, Response } from "express";
import { getWebScraperQueue } from "../../src/services/queue-service";
import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
import { Logger } from "../../src/lib/logger";
export async function crawlJobStatusPreviewController(req: Request, res: Response) {
try {
@ -35,7 +36,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons
partial_data: jobStatus == 'completed' ? [] : partialDocs,
});
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
}

View File

@ -12,16 +12,17 @@ import { sendSlackWebhook } from "./services/alerts/slack";
import { checkAlerts } from "./services/alerts";
import Redis from "ioredis";
import { redisRateLimitClient } from "./services/rate-limiter";
import { Logger } from "./lib/logger";
const { createBullBoard } = require("@bull-board/api");
const { BullAdapter } = require("@bull-board/api/bullAdapter");
const { ExpressAdapter } = require("@bull-board/express");
const numCPUs = process.env.ENV === "local" ? 2 : os.cpus().length;
console.log(`Number of CPUs: ${numCPUs} available`);
Logger.info(`Number of CPUs: ${numCPUs} available`);
if (cluster.isMaster) {
console.log(`Master ${process.pid} is running`);
Logger.info(`Master ${process.pid} is running`);
// Fork workers.
for (let i = 0; i < numCPUs; i++) {
@ -30,8 +31,8 @@ if (cluster.isMaster) {
cluster.on("exit", (worker, code, signal) => {
if (code !== null) {
console.log(`Worker ${worker.process.pid} exited`);
console.log("Starting a new worker");
Logger.info(`Worker ${worker.process.pid} exited`);
Logger.info("Starting a new worker");
cluster.fork();
}
});
@ -81,14 +82,9 @@ if (cluster.isMaster) {
function startServer(port = DEFAULT_PORT) {
const server = app.listen(Number(port), HOST, () => {
console.log(`Worker ${process.pid} listening on port ${port}`);
console.log(
`For the UI, open http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues`
);
console.log("");
console.log("1. Make sure Redis is running on port 6379 by default");
console.log(
"2. If you want to run nango, make sure you do port forwarding in 3002 using ngrok http 3002 "
Logger.info(`Worker ${process.pid} listening on port ${port}`);
Logger.info(
`For the Queue UI, open: http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues`
);
});
return server;
@ -114,7 +110,7 @@ if (cluster.isMaster) {
noActiveJobs,
});
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
});
@ -132,7 +128,7 @@ if (cluster.isMaster) {
waitingJobs,
});
} catch (error) {
console.error(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
});
@ -177,13 +173,13 @@ if (cluster.isMaster) {
});
if (!response.ok) {
console.error("Failed to send Slack notification");
Logger.error("Failed to send Slack notification");
}
}
}, timeout);
}
} catch (error) {
console.error(error);
Logger.debug(error);
}
};
@ -198,7 +194,7 @@ if (cluster.isMaster) {
await checkAlerts();
return res.status(200).send("Alerts initialized");
} catch (error) {
console.error("Failed to initialize alerts:", error);
Logger.debug(`Failed to initialize alerts: ${error}`);
return res.status(500).send("Failed to initialize alerts");
}
}
@ -207,6 +203,7 @@ if (cluster.isMaster) {
app.get(
`/admin/${process.env.BULL_AUTH_KEY}/clean-before-24h-complete-jobs`,
async (req, res) => {
Logger.info("🐂 Cleaning jobs older than 24h");
try {
const webScraperQueue = getWebScraperQueue();
const batchSize = 10;
@ -241,12 +238,12 @@ if (cluster.isMaster) {
await job.remove();
count++;
} catch (jobError) {
console.error(`Failed to remove job with ID ${job.id}:`, jobError);
Logger.error(`🐂 Failed to remove job with ID ${job.id}: ${jobError}` );
}
}
return res.status(200).send(`Removed ${count} completed jobs.`);
} catch (error) {
console.error("Failed to clean last 24h complete jobs:", error);
Logger.error(`🐂 Failed to clean last 24h complete jobs: ${error}`);
return res.status(500).send("Failed to clean jobs");
}
}
@ -272,7 +269,7 @@ if (cluster.isMaster) {
queueRedisHealth = await queueRedis.get(testKey);
await queueRedis.del(testKey);
} catch (error) {
console.error("queueRedis health check failed:", error);
Logger.error(`queueRedis health check failed: ${error}`);
queueRedisHealth = null;
}
@ -283,7 +280,7 @@ if (cluster.isMaster) {
redisRateLimitHealth = await redisRateLimitClient.get(testKey);
await redisRateLimitClient.del(testKey);
} catch (error) {
console.error("redisRateLimitClient health check failed:", error);
Logger.error(`redisRateLimitClient health check failed: ${error}`);
redisRateLimitHealth = null;
}
@ -297,12 +294,12 @@ if (cluster.isMaster) {
healthStatus.queueRedis === "healthy" &&
healthStatus.redisRateLimitClient === "healthy"
) {
console.log("Both Redis instances are healthy");
Logger.info("Both Redis instances are healthy");
return res
.status(200)
.json({ status: "healthy", details: healthStatus });
} else {
console.log("Redis instances health check:", healthStatus);
Logger.info(`Redis instances health check: ${JSON.stringify(healthStatus)}`);
await sendSlackWebhook(
`[REDIS DOWN] Redis instances health check: ${JSON.stringify(
healthStatus
@ -314,7 +311,7 @@ if (cluster.isMaster) {
.json({ status: "unhealthy", details: healthStatus });
}
} catch (error) {
console.error("Redis health check failed:", error);
Logger.error(`Redis health check failed: ${error}`);
await sendSlackWebhook(
`[REDIS DOWN] Redis instances health check: ${error.message}`,
true
@ -326,5 +323,5 @@ if (cluster.isMaster) {
}
);
console.log(`Worker ${process.pid} started`);
Logger.info(`Worker ${process.pid} started`);
}

View File

@ -4,6 +4,7 @@ const ajv = new Ajv(); // Initialize AJV for JSON schema validation
import { generateOpenAICompletions } from "./models";
import { Document, ExtractorOptions } from "../entities";
import { Logger } from "../logger";
// Generate completion using OpenAI
export async function generateCompletions(
@ -44,7 +45,7 @@ export async function generateCompletions(
return completionResult;
} catch (error) {
console.error(`Error generating completions: ${error}`);
Logger.error(`Error generating completions: ${error}`);
throw new Error(`Error generating completions: ${error.message}`);
}
default:

View File

@ -0,0 +1,53 @@
enum LogLevel {
NONE = 'NONE', // No logs will be output.
ERROR = 'ERROR', // For logging error messages that indicate a failure in a specific operation.
WARN = 'WARN', // For logging potentially harmful situations that are not necessarily errors.
INFO = 'INFO', // For logging informational messages that highlight the progress of the application.
DEBUG = 'DEBUG', // For logging detailed information on the flow through the system, primarily used for debugging.
TRACE = 'TRACE' // For logging more detailed information than the DEBUG level.
}
export class Logger {
static colors = {
ERROR: '\x1b[31m%s\x1b[0m', // Red
WARN: '\x1b[33m%s\x1b[0m', // Yellow
INFO: '\x1b[34m%s\x1b[0m', // Blue
DEBUG: '\x1b[36m%s\x1b[0m', // Cyan
TRACE: '\x1b[35m%s\x1b[0m' // Magenta
};
static log (message: string, level: LogLevel) {
const logLevel: LogLevel = LogLevel[process.env.LOGGING_LEVEL as keyof typeof LogLevel] || LogLevel.INFO;
const levels = [LogLevel.NONE, LogLevel.ERROR, LogLevel.WARN, LogLevel.INFO, LogLevel.DEBUG, LogLevel.TRACE];
const currentLevelIndex = levels.indexOf(logLevel);
const messageLevelIndex = levels.indexOf(level);
if (currentLevelIndex >= messageLevelIndex) {
const color = Logger.colors[level];
console[level.toLowerCase()](color, `[${new Date().toISOString()}]${level} - ${message}`);
// if (process.env.USE_DB_AUTH) {
// save to supabase? another place?
// supabase.from('logs').insert({ level: level, message: message, timestamp: new Date().toISOString(), success: boolean });
// }
}
}
static error(message: string | any) {
Logger.log(message, LogLevel.ERROR);
}
static warn(message: string) {
Logger.log(message, LogLevel.WARN);
}
static info(message: string) {
Logger.log(message, LogLevel.INFO);
}
static debug(message: string) {
Logger.log(message, LogLevel.DEBUG);
}
static trace(message: string) {
Logger.log(message, LogLevel.TRACE);
}
}

View File

@ -1,4 +1,5 @@
import { AuthResponse } from "../../src/types";
import { Logger } from "./logger";
let warningCount = 0;
@ -8,7 +9,7 @@ export function withAuth<T extends AuthResponse, U extends any[]>(
return async function (...args: U): Promise<T> {
if (process.env.USE_DB_AUTHENTICATION === "false") {
if (warningCount < 5) {
console.warn("WARNING - You're bypassing authentication");
Logger.warn("You're bypassing authentication");
warningCount++;
}
return { success: true } as T;
@ -16,7 +17,7 @@ export function withAuth<T extends AuthResponse, U extends any[]>(
try {
return await originalFunction(...args);
} catch (error) {
console.error("Error in withAuth function: ", error);
Logger.error(`Error in withAuth function: ${error}`);
return { success: false, error: error.message } as T;
}
}

View File

@ -10,6 +10,7 @@ import { DocumentUrl, Progress } from "../lib/entities";
import { billTeam } from "../services/billing/credit_billing";
import { Document } from "../lib/entities";
import { supabase_service } from "../services/supabase";
import { Logger } from "../lib/logger";
export async function startWebScraperPipeline({
job,
@ -23,6 +24,7 @@ export async function startWebScraperPipeline({
crawlerOptions: job.data.crawlerOptions,
pageOptions: job.data.pageOptions,
inProgress: (progress) => {
Logger.debug(`🐂 Job in progress ${job.id}`);
if (progress.currentDocument) {
partialDocs.push(progress.currentDocument);
if (partialDocs.length > 50) {
@ -32,9 +34,11 @@ export async function startWebScraperPipeline({
}
},
onSuccess: (result) => {
Logger.debug(`🐂 Job completed ${job.id}`);
saveJob(job, result);
},
onError: (error) => {
Logger.error(`🐂 Job failed ${job.id}`);
job.moveToFailed(error);
},
team_id: job.data.team_id,
@ -108,7 +112,6 @@ export async function runWebScraper({
// this return doesn't matter too much for the job completion result
return { success: true, message: "", docs: filteredDocs };
} catch (error) {
console.error("Error running web scraper", error);
onError(error);
return { success: false, message: error.message, docs: [] };
}
@ -136,6 +139,6 @@ const saveJob = async (job: Job, result: any) => {
}
}
} catch (error) {
console.error("Failed to update job status:", error);
Logger.error(`🐂 Failed to update job status: ${error}`);
}
};

View File

@ -8,6 +8,7 @@ import { scrapSingleUrl } from "./single_url";
import robotsParser from "robots-parser";
import { getURLDepth } from "./utils/maxDepthUtils";
import { axiosTimeout } from "../../../src/lib/timeout";
import { Logger } from "../../../src/lib/logger";
export class WebCrawler {
private initialUrl: string;
@ -116,7 +117,7 @@ export class WebCrawler {
const isAllowed = this.robots.isAllowed(link, "FireCrawlAgent") ?? true;
// Check if the link is disallowed by robots.txt
if (!isAllowed) {
console.log(`Link disallowed by robots.txt: ${link}`);
Logger.debug(`Link disallowed by robots.txt: ${link}`);
return false;
}
@ -133,15 +134,19 @@ export class WebCrawler {
limit: number = 10000,
maxDepth: number = 10
): Promise<{ url: string, html: string }[]> {
Logger.debug(`Crawler starting with ${this.initialUrl}`);
// Fetch and parse robots.txt
try {
const response = await axios.get(this.robotsTxtUrl, { timeout: axiosTimeout });
this.robots = robotsParser(this.robotsTxtUrl, response.data);
Logger.debug(`Crawler robots.txt fetched with ${this.robotsTxtUrl}`);
} catch (error) {
console.log(`Failed to fetch robots.txt from ${this.robotsTxtUrl}`);
Logger.debug(`Failed to fetch robots.txt from ${this.robotsTxtUrl}`);
}
if (!crawlerOptions?.ignoreSitemap){
Logger.debug(`Fetching sitemap links from ${this.initialUrl}`);
const sitemapLinks = await this.tryFetchSitemapLinks(this.initialUrl);
if (sitemapLinks.length > 0) {
let filteredLinks = this.filterLinks(sitemapLinks, limit, maxDepth);
@ -175,6 +180,7 @@ export class WebCrawler {
inProgress?: (progress: Progress) => void,
): Promise<{ url: string, html: string }[]> {
const queue = async.queue(async (task: string, callback) => {
Logger.debug(`Crawling ${task}`);
if (this.crawledUrls.size >= Math.min(this.maxCrawledLinks, this.limit)) {
if (callback && typeof callback === "function") {
callback();
@ -216,16 +222,18 @@ export class WebCrawler {
}
}, concurrencyLimit);
Logger.debug(`🐂 Pushing ${urls.length} URLs to the queue`);
queue.push(
urls.filter(
(url) =>
!this.visited.has(url) && this.robots.isAllowed(url, "FireCrawlAgent")
),
(err) => {
if (err) console.error(err);
if (err) Logger.error(`🐂 Error pushing URLs to the queue: ${err}`);
}
);
await queue.drain();
Logger.debug(`🐂 Crawled ${this.crawledUrls.size} URLs, Queue drained.`);
return Array.from(this.crawledUrls.entries()).map(([url, html]) => ({ url, html }));
}
@ -282,7 +290,6 @@ export class WebCrawler {
const urlObj = new URL(fullUrl);
const path = urlObj.pathname;
if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS
if (this.isInternalLink(fullUrl) &&
this.noSections(fullUrl) &&
@ -452,7 +459,7 @@ export class WebCrawler {
sitemapLinks = await getLinksFromSitemap({ sitemapUrl });
}
} catch (error) {
console.error(`Failed to fetch sitemap with axios from ${sitemapUrl}: ${error}`);
Logger.debug(`Failed to fetch sitemap with axios from ${sitemapUrl}: ${error}`);
const response = await getLinksFromSitemap({ sitemapUrl, mode: 'fire-engine' });
if (response) {
sitemapLinks = response;
@ -467,7 +474,7 @@ export class WebCrawler {
sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap });
}
} catch (error) {
console.error(`Failed to fetch sitemap from ${baseUrlSitemap}: ${error}`);
Logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}: ${error}`);
sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap, mode: 'fire-engine' });
}
}

View File

@ -1,10 +1,12 @@
import { Logger } from "../../../lib/logger";
export async function handleCustomScraping(
text: string,
url: string
): Promise<{ scraper: string; url: string; waitAfterLoad?: number, pageOptions?: { scrollXPaths?: string[] } } | null> {
// Check for Readme Docs special case
if (text.includes('<meta name="readme-deploy"')) {
console.log(
Logger.debug(
`Special use case detected for ${url}, using Fire Engine with wait time 1000ms`
);
return {
@ -19,7 +21,7 @@ export async function handleCustomScraping(
// Check for Vanta security portals
if (text.includes('<link href="https://static.vanta.com')) {
console.log(
Logger.debug(
`Vanta link detected for ${url}, using Fire Engine with wait time 3000ms`
);
return {
@ -34,7 +36,7 @@ export async function handleCustomScraping(
const googleDriveMetaMatch = text.match(googleDriveMetaPattern);
if (googleDriveMetaMatch) {
const url = googleDriveMetaMatch[1];
console.log(`Google Drive PDF link detected: ${url}`);
Logger.debug(`Google Drive PDF link detected: ${url}`);
const fileIdMatch = url.match(/https:\/\/drive\.google\.com\/file\/d\/([^\/]+)\/view/);
if (fileIdMatch) {

View File

@ -19,6 +19,7 @@ import { generateCompletions } from "../../lib/LLM-extraction";
import { getWebScraperQueue } from "../../../src/services/queue-service";
import { fetchAndProcessDocx } from "./utils/docxProcessor";
import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils";
import { Logger } from "../../lib/logger";
export class WebScraperDataProvider {
private bullJobId: string;
@ -89,14 +90,14 @@ export class WebScraperDataProvider {
const job = await getWebScraperQueue().getJob(this.bullJobId);
const jobStatus = await job.getState();
if (jobStatus === "failed") {
console.error(
Logger.info(
"Job has failed or has been cancelled by the user. Stopping the job..."
);
return [] as Document[];
}
}
} catch (error) {
console.error(error);
Logger.error(error.message);
return [] as Document[];
}
}
@ -270,7 +271,7 @@ export class WebScraperDataProvider {
this.mode === "single_urls" && links.length > 0
? this.getSitemapDataForSingleUrl(this.urls[0], links[0], 1500).catch(
(error) => {
console.error("Failed to fetch sitemap data:", error);
Logger.debug(`Failed to fetch sitemap data: ${error}`);
return null;
}
)
@ -460,7 +461,7 @@ export class WebScraperDataProvider {
let documents: Document[] = [];
for (const url of urls) {
const normalizedUrl = this.normalizeUrl(url);
console.log(
Logger.debug(
"Getting cached document for web-scraper-cache:" + normalizedUrl
);
const cachedDocumentString = await getValue(

View File

@ -2,6 +2,7 @@ import axios from "axios";
import { logScrape } from "../../../services/logging/scrape_log";
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global";
import { Logger } from "../../../lib/logger";
/**
* Scrapes a URL with Axios
@ -34,9 +35,7 @@ export async function scrapWithFetch(
});
if (response.status !== 200) {
console.error(
`[Axios] Error fetching url: ${url} with status: ${response.status}`
);
Logger.debug(`⛏️ Axios: Failed to fetch url: ${url} with status: ${response.status}`);
logParams.error_message = response.statusText;
logParams.response_code = response.status;
return {
@ -63,10 +62,10 @@ export async function scrapWithFetch(
} catch (error) {
if (error.code === "ECONNABORTED") {
logParams.error_message = "Request timed out";
console.log(`[Axios] Request timed out for ${url}`);
Logger.debug(`⛏️ Axios: Request timed out for ${url}`);
} else {
logParams.error_message = error.message || error;
console.error(`[Axios] Error fetching url: ${url} -> ${error}`);
Logger.debug(`⛏️ Axios: Failed to fetch url: ${url} | Error: ${error}`);
}
return { content: "", pageStatusCode: null, pageError: logParams.error_message };
} finally {

View File

@ -4,6 +4,7 @@ import { logScrape } from "../../../services/logging/scrape_log";
import { generateRequestParams } from "../single_url";
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global";
import { Logger } from "../../../lib/logger";
/**
* Scrapes a URL with Fire-Engine
@ -59,12 +60,10 @@ export async function scrapWithFireEngine({
let engine = engineParam; // do we want fireEngineOptions as first choice?
console.log(
`[Fire-Engine][${engine}] Scraping ${url} with wait: ${waitParam} and screenshot: ${screenshotParam} and method: ${fireEngineOptionsParam?.method ?? "null"}`
Logger.info(
`⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
);
// console.log(fireEngineOptionsParam)
const response = await axios.post(
process.env.FIRE_ENGINE_BETA_URL + endpoint,
{
@ -84,15 +83,15 @@ export async function scrapWithFireEngine({
);
if (response.status !== 200) {
console.error(
`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}`
Logger.debug(
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`
);
logParams.error_message = response.data?.pageError;
logParams.response_code = response.data?.pageStatusCode;
if(response.data && response.data?.pageStatusCode !== 200) {
console.error(`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}`);
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`);
}
return {
@ -130,10 +129,10 @@ export async function scrapWithFireEngine({
}
} catch (error) {
if (error.code === "ECONNABORTED") {
console.log(`[Fire-Engine] Request timed out for ${url}`);
Logger.debug(`⛏️ Fire-Engine: Request timed out for ${url}`);
logParams.error_message = "Request timed out";
} else {
console.error(`[Fire-Engine][c] Error fetching url: ${url} -> ${error}`);
Logger.debug(`⛏️ Fire-Engine: Failed to fetch url: ${url} | Error: ${error}`);
logParams.error_message = error.message || error;
}
return { html: "", screenshot: "", pageStatusCode: null, pageError: logParams.error_message };

View File

@ -3,6 +3,7 @@ import { logScrape } from "../../../services/logging/scrape_log";
import { generateRequestParams } from "../single_url";
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global";
import { Logger } from "../../../lib/logger";
/**
* Scrapes a URL with Playwright
@ -51,8 +52,8 @@ export async function scrapWithPlaywright(
);
if (response.status !== 200) {
console.error(
`[Playwright] Error fetching url: ${url} with status: ${response.status}`
Logger.debug(
`⛏️ Playwright: Failed to fetch url: ${url} | status: ${response.status}, error: ${response.data?.pageError}`
);
logParams.error_message = response.data?.pageError;
logParams.response_code = response.data?.pageStatusCode;
@ -86,8 +87,8 @@ export async function scrapWithPlaywright(
};
} catch (jsonError) {
logParams.error_message = jsonError.message || jsonError;
console.error(
`[Playwright] Error parsing JSON response for url: ${url} -> ${jsonError}`
Logger.debug(
`⛏️ Playwright: Error parsing JSON response for url: ${url} | Error: ${jsonError}`
);
return { content: "", pageStatusCode: null, pageError: logParams.error_message };
}
@ -95,10 +96,10 @@ export async function scrapWithPlaywright(
} catch (error) {
if (error.code === "ECONNABORTED") {
logParams.error_message = "Request timed out";
console.log(`[Playwright] Request timed out for ${url}`);
Logger.debug(`⛏️ Playwright: Request timed out for ${url}`);
} else {
logParams.error_message = error.message || error;
console.error(`[Playwright] Error fetching url: ${url} -> ${error}`);
Logger.debug(`⛏️ Playwright: Failed to fetch url: ${url} | Error: ${error}`);
}
return { content: "", pageStatusCode: null, pageError: logParams.error_message };
} finally {

View File

@ -3,6 +3,7 @@ import { generateRequestParams } from "../single_url";
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global";
import { ScrapingBeeClient } from "scrapingbee";
import { Logger } from "../../../lib/logger";
/**
* Scrapes a URL with ScrapingBee
@ -56,8 +57,8 @@ export async function scrapWithScrapingBee(
text = decoder.decode(response.data);
logParams.success = true;
} catch (decodeError) {
console.error(
`[ScrapingBee][c] Error decoding response data for url: ${url} -> ${decodeError}`
Logger.debug(
`⛏️ ScrapingBee: Error decoding response data for url: ${url} | Error: ${decodeError}`
);
logParams.error_message = decodeError.message || decodeError;
}
@ -72,7 +73,7 @@ export async function scrapWithScrapingBee(
};
}
} catch (error) {
console.error(`[ScrapingBee][c] Error fetching url: ${url} -> ${error}`);
Logger.debug(`⛏️ ScrapingBee: Error fetching url: ${url} | Error: ${error}`);
logParams.error_message = error.message || error;
logParams.response_code = error.response?.status;
return {

View File

@ -17,6 +17,7 @@ import { scrapWithFireEngine } from "./scrapers/fireEngine";
import { scrapWithPlaywright } from "./scrapers/playwright";
import { scrapWithScrapingBee } from "./scrapers/scrapingBee";
import { extractLinks } from "./utils/utils";
import { Logger } from "../../lib/logger";
dotenv.config();
@ -48,7 +49,7 @@ export async function generateRequestParams(
return defaultParams;
}
} catch (error) {
console.error(`Error generating URL key: ${error}`);
Logger.error(`Error generating URL key: ${error}`);
return defaultParams;
}
}
@ -154,7 +155,6 @@ export async function scrapSingleUrl(
}
if (process.env.FIRE_ENGINE_BETA_URL) {
console.log(`Scraping ${url} with Fire Engine`);
const response = await scrapWithFireEngine({
url,
waitFor: pageOptions.waitFor,
@ -277,7 +277,7 @@ export async function scrapSingleUrl(
try {
urlKey = new URL(urlToScrap).hostname.replace(/^www\./, "");
} catch (error) {
console.error(`Invalid URL key, trying: ${urlToScrap}`);
Logger.error(`Invalid URL key, trying: ${urlToScrap}`);
}
const defaultScraper = urlSpecificParams[urlKey]?.defaultScraper ?? "";
const scrapersInOrder = getScrapingFallbackOrder(
@ -311,12 +311,18 @@ export async function scrapSingleUrl(
pageError = undefined;
}
if (text && text.trim().length >= 100) break;
if (pageStatusCode && pageStatusCode == 404) break;
const nextScraperIndex = scrapersInOrder.indexOf(scraper) + 1;
if (nextScraperIndex < scrapersInOrder.length) {
console.info(`Falling back to ${scrapersInOrder[nextScraperIndex]}`);
if (text && text.trim().length >= 100) {
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100, breaking`);
break;
}
if (pageStatusCode && pageStatusCode == 404) {
Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with status code 404, breaking`);
break;
}
// const nextScraperIndex = scrapersInOrder.indexOf(scraper) + 1;
// if (nextScraperIndex < scrapersInOrder.length) {
// Logger.debug(`⛏️ ${scraper} Failed to fetch URL: ${urlToScrap} with status: ${pageStatusCode}, error: ${pageError} | Falling back to ${scrapersInOrder[nextScraperIndex]}`);
// }
}
if (!text) {
@ -372,7 +378,7 @@ export async function scrapSingleUrl(
return document;
} catch (error) {
console.error(`Error: ${error} - Failed to fetch URL: ${urlToScrap}`);
Logger.debug(`⛏️ Error: ${error.message} - Failed to fetch URL: ${urlToScrap}`);
return {
content: "",
markdown: "",

View File

@ -3,6 +3,7 @@ import { axiosTimeout } from "../../lib/timeout";
import { parseStringPromise } from "xml2js";
import { scrapWithFireEngine } from "./scrapers/fireEngine";
import { WebCrawler } from "./crawler";
import { Logger } from "../../lib/logger";
export async function getLinksFromSitemap(
{
@ -26,7 +27,7 @@ export async function getLinksFromSitemap(
content = response.html;
}
} catch (error) {
console.error(`Request failed for ${sitemapUrl}: ${error}`);
Logger.error(`Request failed for ${sitemapUrl}: ${error.message}`);
return allUrls;
}
@ -48,7 +49,7 @@ export async function getLinksFromSitemap(
}
}
} catch (error) {
console.error(`Error processing ${sitemapUrl}: ${error}`);
Logger.debug(`Error processing sitemapUrl: ${sitemapUrl} | Error: ${error.message}`);
}
return allUrls;

View File

@ -1,3 +1,4 @@
import { Logger } from '../../../../lib/logger';
import { isUrlBlocked } from '../blocklist';
describe('isUrlBlocked', () => {
@ -19,7 +20,7 @@ describe('isUrlBlocked', () => {
blockedUrls.forEach(url => {
if (!isUrlBlocked(url)) {
console.log(`URL not blocked: ${url}`);
Logger.debug(`URL not blocked: ${url}`);
}
expect(isUrlBlocked(url)).toBe(true);
});

View File

@ -1,3 +1,5 @@
import { Logger } from "../../../lib/logger";
const socialMediaBlocklist = [
'facebook.com',
'x.com',
@ -59,7 +61,7 @@ export function isUrlBlocked(url: string): boolean {
return isBlocked;
} catch (e) {
// If an error occurs (e.g., invalid URL), return false
console.error(`Error parsing the following URL: ${url}`);
Logger.error(`Error parsing the following URL: ${url}`);
return false;
}
}

View File

@ -1,5 +1,6 @@
import Anthropic from '@anthropic-ai/sdk';
import axios from 'axios';
import { Logger } from '../../../lib/logger';
export async function getImageDescription(
imageUrl: string,
@ -82,7 +83,7 @@ export async function getImageDescription(
}
}
} catch (error) {
console.error("Error generating image alt text:", error?.message);
Logger.error(`Error generating image alt text: ${error}`);
return "";
}
}

View File

@ -1,4 +1,6 @@
import { CheerioAPI } from "cheerio";
import { Logger } from "../../../lib/logger";
interface Metadata {
title?: string;
description?: string;
@ -105,7 +107,7 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata {
dctermsCreated = soup('meta[name="dcterms.created"]').attr("content") || null;
} catch (error) {
console.error("Error extracting metadata:", error);
Logger.error(`Error extracting metadata: ${error}`);
}
return {

View File

@ -7,6 +7,7 @@ import pdf from "pdf-parse";
import path from "path";
import os from "os";
import { axiosTimeout } from "../../../lib/timeout";
import { Logger } from "../../../lib/logger";
dotenv.config();
@ -39,6 +40,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
let content = "";
if (process.env.LLAMAPARSE_API_KEY && parsePDF) {
Logger.debug("Processing pdf document w/ LlamaIndex");
const apiKey = process.env.LLAMAPARSE_API_KEY;
const headers = {
Authorization: `Bearer ${apiKey}`,
@ -81,7 +83,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds
}
} catch (error) {
console.error("Error fetching result w/ LlamaIndex");
Logger.debug("Error fetching result w/ LlamaIndex");
attempt++;
await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds before retrying
// You may want to handle specific errors differently
@ -93,7 +95,7 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
}
content = resultResponse.data[resultType];
} catch (error) {
console.error("Error processing pdf document w/ LlamaIndex(2)");
Logger.debug("Error processing pdf document w/ LlamaIndex(2)");
content = await processPdf(filePath);
}
} else if (parsePDF) {

View File

@ -1,3 +1,4 @@
import { Logger } from "../../../lib/logger";
import { Document } from "../../../lib/entities";
export const replacePathsWithAbsolutePaths = (documents: Document[]): Document[] => {
@ -39,7 +40,7 @@ export const replacePathsWithAbsolutePaths = (documents: Document[]): Document[]
return documents;
} catch (error) {
console.error("Error replacing paths with absolute paths", error);
Logger.debug(`Error replacing paths with absolute paths: ${error}`);
return documents;
}
};
@ -78,7 +79,7 @@ export const replaceImgPathsWithAbsolutePaths = (documents: Document[]): Documen
return documents;
} catch (error) {
console.error("Error replacing img paths with absolute paths", error);
Logger.error(`Error replacing img paths with absolute paths: ${error}`);
return documents;
}
};

View File

@ -1,5 +1,6 @@
import axios from "axios";
import * as cheerio from "cheerio";
import { Logger } from "../../../lib/logger";
export async function attemptScrapWithRequests(
@ -9,13 +10,13 @@ export async function attemptScrapWithRequests(
const response = await axios.get(urlToScrap, { timeout: 15000 });
if (!response.data) {
console.log("Failed normal requests as well");
Logger.debug("Failed normal requests as well");
return null;
}
return response.data;
} catch (error) {
console.error(`Error in attemptScrapWithRequests: ${error}`);
Logger.debug(`Error in attemptScrapWithRequests: ${error}`);
return null;
}
}

View File

@ -2,6 +2,7 @@ import axios from 'axios';
import * as cheerio from 'cheerio';
import * as querystring from 'querystring';
import { SearchResult } from '../../src/lib/entities';
import { Logger } from '../../src/lib/logger';
const _useragent_list = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
@ -96,7 +97,7 @@ export async function google_search(term: string, advanced = false, num_results
await new Promise(resolve => setTimeout(resolve, sleep_interval * 1000));
} catch (error) {
if (error.message === 'Too many requests') {
console.warn('Too many requests, breaking the loop');
Logger.warn('Too many requests, breaking the loop');
break;
}
throw error;
@ -107,7 +108,7 @@ export async function google_search(term: string, advanced = false, num_results
}
}
if (attempts >= maxAttempts) {
console.warn('Max attempts reached, breaking the loop');
Logger.warn('Max attempts reached, breaking the loop');
}
return results
}

View File

@ -1,3 +1,4 @@
import { Logger } from "../../src/lib/logger";
import { SearchResult } from "../../src/lib/entities";
import { google_search } from "./googlesearch";
import { serper_search } from "./serper";
@ -47,7 +48,7 @@ export async function search({
timeout
);
} catch (error) {
console.error("Error in search function: ", error);
Logger.error(`Error in search function: ${error}`);
return []
}
// if process.env.SERPER_API_KEY is set, use serper

View File

@ -1,3 +1,4 @@
import { Logger } from "../../../src/lib/logger";
import { getWebScraperQueue } from "../queue-service";
import { sendSlackWebhook } from "./slack";
@ -9,13 +10,13 @@ export async function checkAlerts() {
process.env.ALERT_NUM_ACTIVE_JOBS &&
process.env.ALERT_NUM_WAITING_JOBS
) {
console.info("Initializing alerts");
Logger.info("Initializing alerts");
const checkActiveJobs = async () => {
try {
const webScraperQueue = getWebScraperQueue();
const activeJobs = await webScraperQueue.getActiveCount();
if (activeJobs > Number(process.env.ALERT_NUM_ACTIVE_JOBS)) {
console.warn(
Logger.warn(
`Alert: Number of active jobs is over ${process.env.ALERT_NUM_ACTIVE_JOBS}. Current active jobs: ${activeJobs}.`
);
sendSlackWebhook(
@ -23,12 +24,12 @@ export async function checkAlerts() {
true
);
} else {
console.info(
Logger.info(
`Number of active jobs is under ${process.env.ALERT_NUM_ACTIVE_JOBS}. Current active jobs: ${activeJobs}`
);
}
} catch (error) {
console.error("Failed to check active jobs:", error);
Logger.error(`Failed to check active jobs: ${error}`);
}
};
@ -38,7 +39,7 @@ export async function checkAlerts() {
const paused = await webScraperQueue.getPausedCount();
if (waitingJobs !== paused && waitingJobs > Number(process.env.ALERT_NUM_WAITING_JOBS)) {
console.warn(
Logger.warn(
`Alert: Number of waiting jobs is over ${process.env.ALERT_NUM_WAITING_JOBS}. Current waiting jobs: ${waitingJobs}.`
);
sendSlackWebhook(
@ -57,6 +58,6 @@ export async function checkAlerts() {
// setInterval(checkAll, 10000); // Run every
}
} catch (error) {
console.error("Failed to initialize alerts:", error);
Logger.error(`Failed to initialize alerts: ${error}`);
}
}

View File

@ -1,4 +1,5 @@
import axios from "axios";
import { Logger } from "../../../src/lib/logger";
export async function sendSlackWebhook(
message: string,
@ -16,8 +17,8 @@ export async function sendSlackWebhook(
"Content-Type": "application/json",
},
});
console.log("Webhook sent successfully:", response.data);
Logger.log("Webhook sent successfully:", response.data);
} catch (error) {
console.error("Error sending webhook:", error);
Logger.debug(`Error sending webhook: ${error}`);
}
}

View File

@ -2,6 +2,7 @@ import { NotificationType } from "../../types";
import { withAuth } from "../../lib/withAuth";
import { sendNotification } from "../notification/email_notification";
import { supabase_service } from "../supabase";
import { Logger } from "../../lib/logger";
const FREE_CREDITS = 500;
@ -12,7 +13,7 @@ export async function supaBillTeam(team_id: string, credits: number) {
if (team_id === "preview") {
return { success: true, message: "Preview team, no credits used" };
}
console.log(`Billing team ${team_id} for ${credits} credits`);
Logger.info(`Billing team ${team_id} for ${credits} credits`);
// When the API is used, you can log the credit usage in the credit_usage table:
// team_id: The ID of the team using the API.
// subscription_id: The ID of the team's active subscription.
@ -218,7 +219,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
0
);
console.log("totalCreditsUsed", totalCreditsUsed);
Logger.info(`totalCreditsUsed: ${totalCreditsUsed}`);
const end = new Date();
end.setDate(end.getDate() + 30);
@ -262,14 +263,14 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
});
if (creditUsageError) {
console.error("Error calculating credit usage:", creditUsageError);
Logger.error(`Error calculating credit usage: ${creditUsageError}`);
}
if (creditUsages && creditUsages.length > 0) {
totalCreditsUsed = creditUsages[0].total_credits_used;
}
} catch (error) {
console.error("Error calculating credit usage:", error);
Logger.error(`Error calculating credit usage: ${error}`);
}
// Adjust total credits used by subtracting coupon value

View File

@ -1,5 +1,6 @@
import { Request } from "express";
import { supabase_service } from "../supabase";
import { Logger } from "../../../src/lib/logger";
export async function createIdempotencyKey(
req: Request,
@ -14,7 +15,7 @@ export async function createIdempotencyKey(
.insert({ key: idempotencyKey });
if (error) {
console.error("Failed to create idempotency key:", error);
Logger.error(`Failed to create idempotency key: ${error}`);
throw error;
}

View File

@ -1,6 +1,7 @@
import { Request } from "express";
import { supabase_service } from "../supabase";
import { validate as isUuid } from 'uuid';
import { Logger } from "../../../src/lib/logger";
export async function validateIdempotencyKey(
req: Request,
@ -13,7 +14,7 @@ export async function validateIdempotencyKey(
// Ensure idempotencyKey is treated as a string
const key = Array.isArray(idempotencyKey) ? idempotencyKey[0] : idempotencyKey;
if (!isUuid(key)) {
console.error("Invalid idempotency key provided in the request headers.");
Logger.debug("Invalid idempotency key provided in the request headers.");
return false;
}
@ -23,7 +24,7 @@ export async function validateIdempotencyKey(
.eq("key", idempotencyKey);
if (error) {
console.error(error);
Logger.error(`Error validating idempotency key: ${error}`);
}
if (!data || data.length === 0) {

View File

@ -1,4 +1,5 @@
import { supabase_service } from "../supabase";
import { Logger } from "../../../src/lib/logger";
import "dotenv/config";
export async function logCrawl(job_id: string, team_id: string) {
@ -13,7 +14,7 @@ export async function logCrawl(job_id: string, team_id: string) {
},
]);
} catch (error) {
console.error("Error logging crawl job:\n", error);
Logger.error(`Error logging crawl job to supabase:\n${error}`);
}
}
}

View File

@ -3,6 +3,7 @@ import { supabase_service } from "../supabase";
import { FirecrawlJob } from "../../types";
import { posthog } from "../posthog";
import "dotenv/config";
import { Logger } from "../../lib/logger";
export async function logJob(job: FirecrawlJob) {
try {
@ -68,9 +69,9 @@ export async function logJob(job: FirecrawlJob) {
posthog.capture(phLog);
}
if (error) {
console.error("Error logging job:\n", error);
Logger.error(`Error logging job: ${error.message}`);
}
} catch (error) {
console.error("Error logging job:\n", error);
Logger.error(`Error logging job: ${error.message}`);
}
}

View File

@ -2,15 +2,16 @@ import "dotenv/config";
import { ScrapeLog } from "../../types";
import { supabase_service } from "../supabase";
import { PageOptions } from "../../lib/entities";
import { Logger } from "../../lib/logger";
export async function logScrape(
scrapeLog: ScrapeLog,
pageOptions?: PageOptions
) {
if (process.env.USE_DB_AUTHENTICATION === "false") {
Logger.debug("Skipping logging scrape to Supabase");
return;
}
try {
// Only log jobs in production
// if (process.env.ENV !== "production") {
@ -43,9 +44,9 @@ export async function logScrape(
]);
if (error) {
console.error("Error logging proxy:\n", error);
Logger.error(`Error logging proxy:\n${error}`);
}
} catch (error) {
console.error("Error logging proxy:\n", error);
Logger.error(`Error logging proxy:\n${error}`);
}
}

View File

@ -1,19 +1,20 @@
import { Logtail } from "@logtail/node";
import "dotenv/config";
import { Logger } from "../lib/logger";
// A mock Logtail class to handle cases where LOGTAIL_KEY is not provided
class MockLogtail {
info(message: string, context?: Record<string, any>): void {
console.log(message, context);
Logger.debug(`${message} - ${context}`);
}
error(message: string, context: Record<string, any> = {}): void {
console.error(message, context);
Logger.error(`${message} - ${context}`);
}
}
// Using the actual Logtail class if LOGTAIL_KEY exists, otherwise using the mock class
// Additionally, print a warning to the terminal if LOGTAIL_KEY is not provided
export const logtail = process.env.LOGTAIL_KEY ? new Logtail(process.env.LOGTAIL_KEY) : (() => {
console.warn("LOGTAIL_KEY is not provided - your events will not be logged. Using MockLogtail as a fallback. see logtail.ts for more.");
Logger.warn("LOGTAIL_KEY is not provided - your events will not be logged. Using MockLogtail as a fallback. see logtail.ts for more.");
return new MockLogtail();
})();

View File

@ -2,6 +2,7 @@ import { supabase_service } from "../supabase";
import { withAuth } from "../../lib/withAuth";
import { Resend } from "resend";
import { NotificationType } from "../../types";
import { Logger } from "../../../src/lib/logger";
const emailTemplates: Record<
NotificationType,
@ -52,11 +53,11 @@ async function sendEmailNotification(
});
if (error) {
console.error("Error sending email: ", error);
Logger.debug(`Error sending email: ${error}`);
return { success: false };
}
} catch (error) {
console.error("Error sending email (2): ", error);
Logger.debug(`Error sending email (2): ${error}`);
return { success: false };
}
}
@ -79,7 +80,7 @@ export async function sendNotificationInternal(
.lte("sent_date", endDateString);
if (error) {
console.error("Error fetching notifications: ", error);
Logger.debug(`Error fetching notifications: ${error}`);
return { success: false };
}
@ -93,7 +94,7 @@ export async function sendNotificationInternal(
.eq("team_id", team_id);
if (emailsError) {
console.error("Error fetching emails: ", emailsError);
Logger.debug(`Error fetching emails: ${emailsError}`);
return { success: false };
}
@ -112,7 +113,7 @@ export async function sendNotificationInternal(
]);
if (insertError) {
console.error("Error inserting notification record: ", insertError);
Logger.debug(`Error inserting notification record: ${insertError}`);
return { success: false };
}

View File

@ -1,5 +1,6 @@
import { PostHog } from 'posthog-node';
import "dotenv/config";
import { Logger } from '../../src/lib/logger';
export default function PostHogClient() {
const posthogClient = new PostHog(process.env.POSTHOG_API_KEY, {
@ -19,7 +20,7 @@ class MockPostHog {
export const posthog = process.env.POSTHOG_API_KEY
? PostHogClient()
: (() => {
console.warn(
Logger.warn(
"POSTHOG_API_KEY is not provided - your events will not be logged. Using MockPostHog as a fallback. See posthog.ts for more."
);
return new MockPostHog();

View File

@ -1,5 +1,6 @@
import Queue from "bull";
import { Queue as BullQueue } from "bull";
import { Logger } from "../lib/logger";
let webScraperQueue: BullQueue;
@ -16,7 +17,7 @@ export function getWebScraperQueue() {
attempts: 5
}
});
console.log("Web scraper queue created");
Logger.info("Web scraper queue created");
}
return webScraperQueue;
}

View File

@ -7,6 +7,7 @@ import { callWebhook } from "./webhook";
import { logJob } from "./logging/log_job";
import { initSDK } from '@hyperdx/node-opentelemetry';
import { Job } from "bull";
import { Logger } from "../lib/logger";
if (process.env.ENV === 'production') {
initSDK({
@ -18,7 +19,7 @@ if(process.env.ENV === 'production') {
const wsq = getWebScraperQueue();
async function processJob(job: Job, done) {
console.log("taking job", job.id);
Logger.debug(`🐂 Worker taking job ${job.id}`);
try {
job.progress({
current: 1,
@ -58,18 +59,18 @@ async function processJob(job: Job, done) {
pageOptions: job.data.pageOptions,
origin: job.data.origin,
});
console.log("job done", job.id);
Logger.debug(`🐂 Job done ${job.id}`);
done(null, data);
} catch (error) {
console.log("job errored", job.id, error);
Logger.error(`🐂 Job errored ${job.id} - ${error}`);
if (await getWebScraperQueue().isPaused(false)) {
console.log("queue is paused, ignoring");
Logger.debug("🐂Queue is paused, ignoring");
return;
}
if (error instanceof CustomError) {
// Here we handle the error, then save the failed job
console.error(error.message); // or any other error handling
Logger.error(error.message); // or any other error handling
logtail.error("Custom error while ingesting", {
job_id: job.id,
@ -77,7 +78,7 @@ async function processJob(job: Job, done) {
dataIngestionJob: error.dataIngestionJob,
});
}
console.log(error);
Logger.error(error);
logtail.error("Overall error ingesting", {
job_id: job.id,

View File

@ -1,14 +1,15 @@
import Redis from "ioredis";
import { redisRateLimitClient } from "./rate-limiter";
import { Logger } from "../lib/logger";
// Listen to 'error' events to the Redis connection
redisRateLimitClient.on("error", (error) => {
try {
if (error.message === "ECONNRESET") {
console.log("Connection to Redis Session Store timed out.");
Logger.error("Connection to Redis Session Rate Limit Store timed out.");
} else if (error.message === "ECONNREFUSED") {
console.log("Connection to Redis Session Store refused!");
} else console.log(error);
Logger.error("Connection to Redis Session Rate Limit Store refused!");
} else Logger.error(error);
} catch (error) {}
});
@ -16,15 +17,15 @@ redisRateLimitClient.on("error", (error) => {
redisRateLimitClient.on("reconnecting", (err) => {
try {
if (redisRateLimitClient.status === "reconnecting")
console.log("Reconnecting to Redis Session Store...");
else console.log("Error reconnecting to Redis Session Store.");
Logger.info("Reconnecting to Redis Session Rate Limit Store...");
else Logger.error("Error reconnecting to Redis Session Rate Limit Store.");
} catch (error) {}
});
// Listen to the 'connect' event to Redis
redisRateLimitClient.on("connect", (err) => {
try {
if (!err) console.log("Connected to Redis Session Store!");
if (!err) Logger.info("Connected to Redis Session Rate Limit Store!");
} catch (error) {}
});

View File

@ -1,4 +1,5 @@
import { createClient, SupabaseClient } from "@supabase/supabase-js";
import { Logger } from "../lib/logger";
// SupabaseService class initializes the Supabase client conditionally based on environment variables.
class SupabaseService {
@ -10,13 +11,13 @@ class SupabaseService {
// Only initialize the Supabase client if both URL and Service Token are provided.
if (process.env.USE_DB_AUTHENTICATION === "false") {
// Warn the user that Authentication is disabled by setting the client to null
console.warn(
"\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m"
Logger.warn(
"Authentication is disabled. Supabase client will not be initialized."
);
this.client = null;
} else if (!supabaseUrl || !supabaseServiceToken) {
console.error(
"\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m"
Logger.error(
"Supabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable"
);
} else {
this.client = createClient(supabaseUrl, supabaseServiceToken);
@ -35,10 +36,15 @@ export const supabase_service: SupabaseClient = new Proxy(
new SupabaseService(),
{
get: function (target, prop, receiver) {
if (process.env.USE_DB_AUTHENTICATION === "false") {
Logger.debug(
"Attempted to access Supabase client when it's not configured."
);
}
const client = target.getClient();
// If the Supabase client is not initialized, intercept property access to provide meaningful error feedback.
if (client === null) {
console.error(
Logger.error(
"Attempted to access Supabase client when it's not configured."
);
return () => {

View File

@ -1,3 +1,4 @@
import { Logger } from "../../src/lib/logger";
import { supabase_service } from "./supabase";
export const callWebhook = async (teamId: string, jobId: string,data: any) => {
@ -15,10 +16,7 @@ export const callWebhook = async (teamId: string, jobId: string,data: any) => {
.eq("team_id", teamId)
.limit(1);
if (error) {
console.error(
`Error fetching webhook URL for team ID: ${teamId}`,
error.message
);
Logger.error(`Error fetching webhook URL for team ID: ${teamId}, error: ${error.message}`);
return null;
}
@ -53,9 +51,6 @@ export const callWebhook = async (teamId: string, jobId: string,data: any) => {
}),
});
} catch (error) {
console.error(
`Error sending webhook for team ID: ${teamId}`,
error.message
);
Logger.debug(`Error sending webhook for team ID: ${teamId}, error: ${error.message}`);
}
};

View File

@ -1,5 +1,7 @@
import { createClient, SupabaseClient } from "@supabase/supabase-js";
import "dotenv/config";
import { Logger } from "../../api/src/lib/logger";
// SupabaseService class initializes the Supabase client conditionally based on environment variables.
class SupabaseService {
private client: SupabaseClient | null = null;
@ -10,13 +12,13 @@ class SupabaseService {
// Only initialize the Supabase client if both URL and Service Token are provided.
if (process.env.USE_DB_AUTHENTICATION === "false") {
// Warn the user that Authentication is disabled by setting the client to null
console.warn(
"\x1b[33mAuthentication is disabled. Supabase client will not be initialized.\x1b[0m"
Logger.warn(
"Authentication is disabled. Supabase client will not be initialized."
);
this.client = null;
} else if (!supabaseUrl || !supabaseServiceToken) {
console.error(
"\x1b[31mSupabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable\x1b[0m"
Logger.error(
"Supabase environment variables aren't configured correctly. Supabase client will not be initialized. Fix ENV configuration or disable DB authentication with USE_DB_AUTHENTICATION env variable"
);
} else {
this.client = createClient(supabaseUrl, supabaseServiceToken);
@ -35,10 +37,15 @@ export const supabase_service: SupabaseClient = new Proxy(
new SupabaseService(),
{
get: function (target, prop, receiver) {
if (process.env.USE_DB_AUTHENTICATION === "false") {
Logger.debug(
"Attempted to access Supabase client when it's not configured."
);
}
const client = target.getClient();
// If the Supabase client is not initialized, intercept property access to provide meaningful error feedback.
if (client === null) {
console.error(
Logger.error(
"Attempted to access Supabase client when it's not configured."
);
return () => {