NIck: rm scrape events

This commit is contained in:
Nicolas 2025-04-23 14:19:25 -04:00
parent feda4dede7
commit 22f7efed35
2 changed files with 128 additions and 129 deletions

View File

@ -1,109 +1,109 @@
import { Job } from "bullmq";
import { supabase_service as supabase } from "../services/supabase";
import { logger } from "./logger";
import { configDotenv } from "dotenv";
import { Engine } from "../scraper/scrapeURL/engines";
configDotenv();
// import { Job } from "bullmq";
// import { supabase_service as supabase } from "../services/supabase";
// import { logger } from "./logger";
// import { configDotenv } from "dotenv";
// import { Engine } from "../scraper/scrapeURL/engines";
// configDotenv();
export type ScrapeErrorEvent = {
type: "error";
message: string;
stack?: string;
};
// export type ScrapeErrorEvent = {
// type: "error";
// message: string;
// stack?: string;
// };
export type ScrapeScrapeEvent = {
type: "scrape";
url: string;
worker?: string;
method: Engine;
result: null | {
success: boolean;
response_code?: number;
response_size?: number;
error?: string | object;
// proxy?: string,
time_taken: number;
};
};
// export type ScrapeScrapeEvent = {
// type: "scrape";
// url: string;
// worker?: string;
// method: Engine;
// result: null | {
// success: boolean;
// response_code?: number;
// response_size?: number;
// error?: string | object;
// // proxy?: string,
// time_taken: number;
// };
// };
export type ScrapeQueueEvent = {
type: "queue";
event:
| "waiting"
| "active"
| "completed"
| "paused"
| "resumed"
| "removed"
| "failed";
worker?: string;
};
// export type ScrapeQueueEvent = {
// type: "queue";
// event:
// | "waiting"
// | "active"
// | "completed"
// | "paused"
// | "resumed"
// | "removed"
// | "failed";
// worker?: string;
// };
export type ScrapeEvent =
| ScrapeErrorEvent
| ScrapeScrapeEvent
| ScrapeQueueEvent;
// export type ScrapeEvent =
// | ScrapeErrorEvent
// | ScrapeScrapeEvent
// | ScrapeQueueEvent;
export class ScrapeEvents {
static async insert(jobId: string, content: ScrapeEvent) {
if (jobId === "TEST") return null;
// export class ScrapeEvents {
// static async insert(jobId: string, content: ScrapeEvent) {
// if (jobId === "TEST") return null;
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
if (useDbAuthentication) {
try {
const result = await supabase
.from("scrape_events")
.insert({
job_id: jobId,
type: content.type,
content: content,
// created_at
})
.select()
.single();
return (result.data as any).id;
} catch (error) {
// logger.error(`Error inserting scrape event: ${error}`);
return null;
}
}
// const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
// if (useDbAuthentication) {
// try {
// const result = await supabase
// .from("scrape_events")
// .insert({
// job_id: jobId,
// type: content.type,
// content: content,
// // created_at
// })
// .select()
// .single();
// return (result.data as any).id;
// } catch (error) {
// // logger.error(`Error inserting scrape event: ${error}`);
// return null;
// }
// }
return null;
}
// return null;
// }
static async updateScrapeResult(
logId: number | null,
result: ScrapeScrapeEvent["result"],
) {
if (logId === null) return;
// static async updateScrapeResult(
// logId: number | null,
// result: ScrapeScrapeEvent["result"],
// ) {
// if (logId === null) return;
try {
const previousLog = (
await supabase.from("scrape_events").select().eq("id", logId).single()
).data as any;
await supabase
.from("scrape_events")
.update({
content: {
...previousLog.content,
result,
},
})
.eq("id", logId);
} catch (error) {
logger.error(`Error updating scrape result: ${error}`);
}
}
// try {
// const previousLog = (
// await supabase.from("scrape_events").select().eq("id", logId).single()
// ).data as any;
// await supabase
// .from("scrape_events")
// .update({
// content: {
// ...previousLog.content,
// result,
// },
// })
// .eq("id", logId);
// } catch (error) {
// logger.error(`Error updating scrape result: ${error}`);
// }
// }
static async logJobEvent(job: Job | any, event: ScrapeQueueEvent["event"]) {
try {
await this.insert(((job as any).id ? (job as any).id : job) as string, {
type: "queue",
event,
worker: process.env.FLY_MACHINE_ID,
});
} catch (error) {
logger.error(`Error logging job event: ${error}`);
}
}
}
// static async logJobEvent(job: Job | any, event: ScrapeQueueEvent["event"]) {
// try {
// await this.insert(((job as any).id ? (job as any).id : job) as string, {
// type: "queue",
// event,
// worker: process.env.FLY_MACHINE_ID,
// });
// } catch (error) {
// logger.error(`Error logging job event: ${error}`);
// }
// }
// }

View File

@ -8,7 +8,6 @@ import { billTeam } from "../services/billing/credit_billing";
import { Document } from "../controllers/v1/types";
import { supabase_service } from "../services/supabase";
import { logger as _logger } from "../lib/logger";
import { ScrapeEvents } from "../lib/scrape-events";
import { configDotenv } from "dotenv";
import {
EngineResultsTracker,
@ -146,35 +145,35 @@ export async function runWebScraper({
}
}
const engineOrder = Object.entries(engines)
.sort((a, b) => a[1].startedAt - b[1].startedAt)
.map((x) => x[0]) as Engine[];
// const engineOrder = Object.entries(engines)
// .sort((a, b) => a[1].startedAt - b[1].startedAt)
// .map((x) => x[0]) as Engine[];
for (const engine of engineOrder) {
const result = engines[engine] as Exclude<
EngineResultsTracker[Engine],
undefined
>;
ScrapeEvents.insert(bull_job_id, {
type: "scrape",
url,
method: engine,
result: {
success: result.state === "success",
response_code:
result.state === "success" ? result.result.statusCode : undefined,
response_size:
result.state === "success" ? result.result.html.length : undefined,
error:
result.state === "error"
? result.error
: result.state === "timeout"
? "Timed out"
: undefined,
time_taken: result.finishedAt - result.startedAt,
},
});
}
// for (const engine of engineOrder) {
// const result = engines[engine] as Exclude<
// EngineResultsTracker[Engine],
// undefined
// >;
// ScrapeEvents.insert(bull_job_id, {
// type: "scrape",
// url,
// method: engine,
// result: {
// success: result.state === "success",
// response_code:
// result.state === "success" ? result.result.statusCode : undefined,
// response_size:
// result.state === "success" ? result.result.html.length : undefined,
// error:
// result.state === "error"
// ? result.error
// : result.state === "timeout"
// ? "Timed out"
// : undefined,
// time_taken: result.finishedAt - result.startedAt,
// },
// });
// }
if (error === undefined && response?.success) {
return response;
@ -228,7 +227,7 @@ const saveJob = async (
// // I think the job won't exist here anymore
// }
}
ScrapeEvents.logJobEvent(job, "completed");
// ScrapeEvents.logJobEvent(job, "completed");
} catch (error) {
_logger.error(`🐂 Failed to update job status`, {
module: "runWebScraper",