Merge pull request #516 from kevinswiber/fix/use-db-auth-in-single-url-scraper

Ensuring USE_DB_AUTHENTICATION is true in single URL scraper.
This commit is contained in:
Nicolas 2024-09-02 23:35:52 -03:00 committed by GitHub
commit c3d90d494c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 34 additions and 21 deletions

View File

@ -25,7 +25,8 @@ export class Logger {
const color = Logger.colors[level];
console[level.toLowerCase()](color, `[${new Date().toISOString()}]${level} - ${message}`);
// if (process.env.USE_DB_AUTH) {
// const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
// if (useDbAuthentication) {
// save to supabase? another place?
// supabase.from('logs').insert({ level: level, message: message, timestamp: new Date().toISOString(), success: boolean });
// }

View File

@ -36,7 +36,8 @@ export class ScrapeEvents {
static async insert(jobId: string, content: ScrapeEvent) {
if (jobId === "TEST") return null;
if (process.env.USE_DB_AUTHENTICATION) {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (useDbAuthentication) {
try {
const result = await supabase.from("scrape_events").insert({
job_id: jobId,

View File

@ -7,7 +7,8 @@ export function withAuth<T extends AuthResponse, U extends any[]>(
originalFunction: (...args: U) => Promise<T>
) {
return async function (...args: U): Promise<T> {
if (process.env.USE_DB_AUTHENTICATION === "false") {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
if (warningCount < 5) {
Logger.warn("You're bypassing authentication");
warningCount++;

View File

@ -144,7 +144,8 @@ export async function runWebScraper({
const saveJob = async (job: Job, result: any, token: string, mode: string) => {
try {
if (process.env.USE_DB_AUTHENTICATION === "true") {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (useDbAuthentication) {
const { data, error } = await supabase_service
.from("firecrawl_jobs")
.update({ docs: result })

View File

@ -23,12 +23,15 @@ import { clientSideError } from "../../strings";
dotenv.config();
const useScrapingBee = process.env.SCRAPING_BEE_API_KEY !== '' && process.env.SCRAPING_BEE_API_KEY !== undefined;
const useFireEngine = process.env.FIRE_ENGINE_BETA_URL !== '' && process.env.FIRE_ENGINE_BETA_URL !== undefined;
export const baseScrapers = [
"fire-engine;chrome-cdp",
"fire-engine",
"scrapingBee",
process.env.USE_DB_AUTHENTICATION ? undefined : "playwright",
"scrapingBeeLoad",
useFireEngine ? "fire-engine;chrome-cdp" : undefined,
useFireEngine ? "fire-engine" : undefined,
useScrapingBee ? "scrapingBee" : undefined,
useFireEngine ? undefined : "playwright",
useScrapingBee ? "scrapingBeeLoad" : undefined,
"fetch",
].filter(Boolean);
@ -85,18 +88,18 @@ function getScrapingFallbackOrder(
});
let defaultOrder = [
!process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine;chrome-cdp",
!process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine",
"scrapingBee",
process.env.USE_DB_AUTHENTICATION ? undefined : "playwright",
"scrapingBeeLoad",
useFireEngine ? "fire-engine;chrome-cdp" : undefined,
useFireEngine ? "fire-engine" : undefined,
useScrapingBee ? "scrapingBee" : undefined,
useScrapingBee ? "scrapingBeeLoad" : undefined,
useFireEngine ? undefined : "playwright",
"fetch",
].filter(Boolean);
if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
defaultOrder = [
"fire-engine",
process.env.USE_DB_AUTHENTICATION ? undefined : "playwright",
useFireEngine ? undefined : "playwright",
...defaultOrder.filter(
(scraper) => scraper !== "fire-engine" && scraper !== "playwright"
),

View File

@ -3,7 +3,8 @@ import { Logger } from "../../../src/lib/logger";
import "dotenv/config";
export async function logCrawl(job_id: string, team_id: string) {
if (process.env.USE_DB_AUTHENTICATION === 'true') {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (useDbAuthentication) {
try {
const { data, error } = await supabase_service
.from("bulljobs_teams")

View File

@ -7,7 +7,8 @@ import { Logger } from "../../lib/logger";
export async function logJob(job: FirecrawlJob) {
try {
if (process.env.USE_DB_AUTHENTICATION === "false") {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
return;
}

View File

@ -8,7 +8,8 @@ export async function logScrape(
scrapeLog: ScrapeLog,
pageOptions?: PageOptions
) {
if (process.env.USE_DB_AUTHENTICATION === "false") {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
Logger.debug("Skipping logging scrape to Supabase");
return;
}

View File

@ -8,8 +8,9 @@ class SupabaseService {
constructor() {
const supabaseUrl = process.env.SUPABASE_URL;
const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN;
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
// Only initialize the Supabase client if both URL and Service Token are provided.
if (process.env.USE_DB_AUTHENTICATION === "false") {
if (!useDbAuthentication) {
// Warn the user that Authentication is disabled by setting the client to null
Logger.warn(
"Authentication is disabled. Supabase client will not be initialized."

View File

@ -9,7 +9,8 @@ class SupabaseService {
const supabaseUrl = process.env.SUPABASE_URL;
const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN;
// Only initialize the Supabase client if both URL and Service Token are provided.
if (process.env.USE_DB_AUTHENTICATION === "false") {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
// Warn the user that Authentication is disabled by setting the client to null
console.warn(
"Authentication is disabled. Supabase client will not be initialized."
@ -36,7 +37,8 @@ export const supabase_service: SupabaseClient = new Proxy(
new SupabaseService(),
{
get: function (target, prop, receiver) {
if (process.env.USE_DB_AUTHENTICATION === "false") {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
console.debug(
"Attempted to access Supabase client when it's not configured."
);