diff --git a/apps/api/src/lib/scrape-events.ts b/apps/api/src/lib/scrape-events.ts index 97e2cecc..a50f8fba 100644 --- a/apps/api/src/lib/scrape-events.ts +++ b/apps/api/src/lib/scrape-events.ts @@ -1,109 +1,109 @@ -import { Job } from "bullmq"; -import { supabase_service as supabase } from "../services/supabase"; -import { logger } from "./logger"; -import { configDotenv } from "dotenv"; -import { Engine } from "../scraper/scrapeURL/engines"; -configDotenv(); +// import { Job } from "bullmq"; +// import { supabase_service as supabase } from "../services/supabase"; +// import { logger } from "./logger"; +// import { configDotenv } from "dotenv"; +// import { Engine } from "../scraper/scrapeURL/engines"; +// configDotenv(); -export type ScrapeErrorEvent = { - type: "error"; - message: string; - stack?: string; -}; +// export type ScrapeErrorEvent = { +// type: "error"; +// message: string; +// stack?: string; +// }; -export type ScrapeScrapeEvent = { - type: "scrape"; - url: string; - worker?: string; - method: Engine; - result: null | { - success: boolean; - response_code?: number; - response_size?: number; - error?: string | object; - // proxy?: string, - time_taken: number; - }; -}; +// export type ScrapeScrapeEvent = { +// type: "scrape"; +// url: string; +// worker?: string; +// method: Engine; +// result: null | { +// success: boolean; +// response_code?: number; +// response_size?: number; +// error?: string | object; +// // proxy?: string, +// time_taken: number; +// }; +// }; -export type ScrapeQueueEvent = { - type: "queue"; - event: - | "waiting" - | "active" - | "completed" - | "paused" - | "resumed" - | "removed" - | "failed"; - worker?: string; -}; +// export type ScrapeQueueEvent = { +// type: "queue"; +// event: +// | "waiting" +// | "active" +// | "completed" +// | "paused" +// | "resumed" +// | "removed" +// | "failed"; +// worker?: string; +// }; -export type ScrapeEvent = - | ScrapeErrorEvent - | ScrapeScrapeEvent - | ScrapeQueueEvent; +// export type ScrapeEvent = +// | ScrapeErrorEvent +// | ScrapeScrapeEvent +// | ScrapeQueueEvent; -export class ScrapeEvents { - static async insert(jobId: string, content: ScrapeEvent) { - if (jobId === "TEST") return null; +// export class ScrapeEvents { +// static async insert(jobId: string, content: ScrapeEvent) { +// if (jobId === "TEST") return null; - const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true"; - if (useDbAuthentication) { - try { - const result = await supabase - .from("scrape_events") - .insert({ - job_id: jobId, - type: content.type, - content: content, - // created_at - }) - .select() - .single(); - return (result.data as any).id; - } catch (error) { - // logger.error(`Error inserting scrape event: ${error}`); - return null; - } - } +// const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true"; +// if (useDbAuthentication) { +// try { +// const result = await supabase +// .from("scrape_events") +// .insert({ +// job_id: jobId, +// type: content.type, +// content: content, +// // created_at +// }) +// .select() +// .single(); +// return (result.data as any).id; +// } catch (error) { +// // logger.error(`Error inserting scrape event: ${error}`); +// return null; +// } +// } - return null; - } +// return null; +// } - static async updateScrapeResult( - logId: number | null, - result: ScrapeScrapeEvent["result"], - ) { - if (logId === null) return; +// static async updateScrapeResult( +// logId: number | null, +// result: ScrapeScrapeEvent["result"], +// ) { +// if (logId === null) return; - try { - const previousLog = ( - await supabase.from("scrape_events").select().eq("id", logId).single() - ).data as any; - await supabase - .from("scrape_events") - .update({ - content: { - ...previousLog.content, - result, - }, - }) - .eq("id", logId); - } catch (error) { - logger.error(`Error updating scrape result: ${error}`); - } - } +// try { +// const previousLog = ( +// await supabase.from("scrape_events").select().eq("id", logId).single() +// ).data as any; +// await supabase +// .from("scrape_events") +// .update({ +// content: { +// ...previousLog.content, +// result, +// }, +// }) +// .eq("id", logId); +// } catch (error) { +// logger.error(`Error updating scrape result: ${error}`); +// } +// } - static async logJobEvent(job: Job | any, event: ScrapeQueueEvent["event"]) { - try { - await this.insert(((job as any).id ? (job as any).id : job) as string, { - type: "queue", - event, - worker: process.env.FLY_MACHINE_ID, - }); - } catch (error) { - logger.error(`Error logging job event: ${error}`); - } - } -} +// static async logJobEvent(job: Job | any, event: ScrapeQueueEvent["event"]) { +// try { +// await this.insert(((job as any).id ? (job as any).id : job) as string, { +// type: "queue", +// event, +// worker: process.env.FLY_MACHINE_ID, +// }); +// } catch (error) { +// logger.error(`Error logging job event: ${error}`); +// } +// } +// } diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 8e287180..2f9b6495 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -8,7 +8,6 @@ import { billTeam } from "../services/billing/credit_billing"; import { Document } from "../controllers/v1/types"; import { supabase_service } from "../services/supabase"; import { logger as _logger } from "../lib/logger"; -import { ScrapeEvents } from "../lib/scrape-events"; import { configDotenv } from "dotenv"; import { EngineResultsTracker, @@ -146,35 +145,35 @@ export async function runWebScraper({ } } - const engineOrder = Object.entries(engines) - .sort((a, b) => a[1].startedAt - b[1].startedAt) - .map((x) => x[0]) as Engine[]; + // const engineOrder = Object.entries(engines) + // .sort((a, b) => a[1].startedAt - b[1].startedAt) + // .map((x) => x[0]) as Engine[]; - for (const engine of engineOrder) { - const result = engines[engine] as Exclude< - EngineResultsTracker[Engine], - undefined - >; - ScrapeEvents.insert(bull_job_id, { - type: "scrape", - url, - method: engine, - result: { - success: result.state === "success", - response_code: - result.state === "success" ? result.result.statusCode : undefined, - response_size: - result.state === "success" ? result.result.html.length : undefined, - error: - result.state === "error" - ? result.error - : result.state === "timeout" - ? "Timed out" - : undefined, - time_taken: result.finishedAt - result.startedAt, - }, - }); - } + // for (const engine of engineOrder) { + // const result = engines[engine] as Exclude< + // EngineResultsTracker[Engine], + // undefined + // >; + // ScrapeEvents.insert(bull_job_id, { + // type: "scrape", + // url, + // method: engine, + // result: { + // success: result.state === "success", + // response_code: + // result.state === "success" ? result.result.statusCode : undefined, + // response_size: + // result.state === "success" ? result.result.html.length : undefined, + // error: + // result.state === "error" + // ? result.error + // : result.state === "timeout" + // ? "Timed out" + // : undefined, + // time_taken: result.finishedAt - result.startedAt, + // }, + // }); + // } if (error === undefined && response?.success) { return response; @@ -228,7 +227,7 @@ const saveJob = async ( // // I think the job won't exist here anymore // } } - ScrapeEvents.logJobEvent(job, "completed"); + // ScrapeEvents.logJobEvent(job, "completed"); } catch (error) { _logger.error(`🐂 Failed to update job status`, { module: "runWebScraper",