mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 18:48:59 +08:00
NIck: rm scrape events
This commit is contained in:
parent
feda4dede7
commit
22f7efed35
@ -1,109 +1,109 @@
|
||||
import { Job } from "bullmq";
|
||||
import { supabase_service as supabase } from "../services/supabase";
|
||||
import { logger } from "./logger";
|
||||
import { configDotenv } from "dotenv";
|
||||
import { Engine } from "../scraper/scrapeURL/engines";
|
||||
configDotenv();
|
||||
// import { Job } from "bullmq";
|
||||
// import { supabase_service as supabase } from "../services/supabase";
|
||||
// import { logger } from "./logger";
|
||||
// import { configDotenv } from "dotenv";
|
||||
// import { Engine } from "../scraper/scrapeURL/engines";
|
||||
// configDotenv();
|
||||
|
||||
export type ScrapeErrorEvent = {
|
||||
type: "error";
|
||||
message: string;
|
||||
stack?: string;
|
||||
};
|
||||
// export type ScrapeErrorEvent = {
|
||||
// type: "error";
|
||||
// message: string;
|
||||
// stack?: string;
|
||||
// };
|
||||
|
||||
export type ScrapeScrapeEvent = {
|
||||
type: "scrape";
|
||||
url: string;
|
||||
worker?: string;
|
||||
method: Engine;
|
||||
result: null | {
|
||||
success: boolean;
|
||||
response_code?: number;
|
||||
response_size?: number;
|
||||
error?: string | object;
|
||||
// proxy?: string,
|
||||
time_taken: number;
|
||||
};
|
||||
};
|
||||
// export type ScrapeScrapeEvent = {
|
||||
// type: "scrape";
|
||||
// url: string;
|
||||
// worker?: string;
|
||||
// method: Engine;
|
||||
// result: null | {
|
||||
// success: boolean;
|
||||
// response_code?: number;
|
||||
// response_size?: number;
|
||||
// error?: string | object;
|
||||
// // proxy?: string,
|
||||
// time_taken: number;
|
||||
// };
|
||||
// };
|
||||
|
||||
export type ScrapeQueueEvent = {
|
||||
type: "queue";
|
||||
event:
|
||||
| "waiting"
|
||||
| "active"
|
||||
| "completed"
|
||||
| "paused"
|
||||
| "resumed"
|
||||
| "removed"
|
||||
| "failed";
|
||||
worker?: string;
|
||||
};
|
||||
// export type ScrapeQueueEvent = {
|
||||
// type: "queue";
|
||||
// event:
|
||||
// | "waiting"
|
||||
// | "active"
|
||||
// | "completed"
|
||||
// | "paused"
|
||||
// | "resumed"
|
||||
// | "removed"
|
||||
// | "failed";
|
||||
// worker?: string;
|
||||
// };
|
||||
|
||||
export type ScrapeEvent =
|
||||
| ScrapeErrorEvent
|
||||
| ScrapeScrapeEvent
|
||||
| ScrapeQueueEvent;
|
||||
// export type ScrapeEvent =
|
||||
// | ScrapeErrorEvent
|
||||
// | ScrapeScrapeEvent
|
||||
// | ScrapeQueueEvent;
|
||||
|
||||
export class ScrapeEvents {
|
||||
static async insert(jobId: string, content: ScrapeEvent) {
|
||||
if (jobId === "TEST") return null;
|
||||
// export class ScrapeEvents {
|
||||
// static async insert(jobId: string, content: ScrapeEvent) {
|
||||
// if (jobId === "TEST") return null;
|
||||
|
||||
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
|
||||
if (useDbAuthentication) {
|
||||
try {
|
||||
const result = await supabase
|
||||
.from("scrape_events")
|
||||
.insert({
|
||||
job_id: jobId,
|
||||
type: content.type,
|
||||
content: content,
|
||||
// created_at
|
||||
})
|
||||
.select()
|
||||
.single();
|
||||
return (result.data as any).id;
|
||||
} catch (error) {
|
||||
// logger.error(`Error inserting scrape event: ${error}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
// const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
|
||||
// if (useDbAuthentication) {
|
||||
// try {
|
||||
// const result = await supabase
|
||||
// .from("scrape_events")
|
||||
// .insert({
|
||||
// job_id: jobId,
|
||||
// type: content.type,
|
||||
// content: content,
|
||||
// // created_at
|
||||
// })
|
||||
// .select()
|
||||
// .single();
|
||||
// return (result.data as any).id;
|
||||
// } catch (error) {
|
||||
// // logger.error(`Error inserting scrape event: ${error}`);
|
||||
// return null;
|
||||
// }
|
||||
// }
|
||||
|
||||
return null;
|
||||
}
|
||||
// return null;
|
||||
// }
|
||||
|
||||
static async updateScrapeResult(
|
||||
logId: number | null,
|
||||
result: ScrapeScrapeEvent["result"],
|
||||
) {
|
||||
if (logId === null) return;
|
||||
// static async updateScrapeResult(
|
||||
// logId: number | null,
|
||||
// result: ScrapeScrapeEvent["result"],
|
||||
// ) {
|
||||
// if (logId === null) return;
|
||||
|
||||
try {
|
||||
const previousLog = (
|
||||
await supabase.from("scrape_events").select().eq("id", logId).single()
|
||||
).data as any;
|
||||
await supabase
|
||||
.from("scrape_events")
|
||||
.update({
|
||||
content: {
|
||||
...previousLog.content,
|
||||
result,
|
||||
},
|
||||
})
|
||||
.eq("id", logId);
|
||||
} catch (error) {
|
||||
logger.error(`Error updating scrape result: ${error}`);
|
||||
}
|
||||
}
|
||||
// try {
|
||||
// const previousLog = (
|
||||
// await supabase.from("scrape_events").select().eq("id", logId).single()
|
||||
// ).data as any;
|
||||
// await supabase
|
||||
// .from("scrape_events")
|
||||
// .update({
|
||||
// content: {
|
||||
// ...previousLog.content,
|
||||
// result,
|
||||
// },
|
||||
// })
|
||||
// .eq("id", logId);
|
||||
// } catch (error) {
|
||||
// logger.error(`Error updating scrape result: ${error}`);
|
||||
// }
|
||||
// }
|
||||
|
||||
static async logJobEvent(job: Job | any, event: ScrapeQueueEvent["event"]) {
|
||||
try {
|
||||
await this.insert(((job as any).id ? (job as any).id : job) as string, {
|
||||
type: "queue",
|
||||
event,
|
||||
worker: process.env.FLY_MACHINE_ID,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error(`Error logging job event: ${error}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// static async logJobEvent(job: Job | any, event: ScrapeQueueEvent["event"]) {
|
||||
// try {
|
||||
// await this.insert(((job as any).id ? (job as any).id : job) as string, {
|
||||
// type: "queue",
|
||||
// event,
|
||||
// worker: process.env.FLY_MACHINE_ID,
|
||||
// });
|
||||
// } catch (error) {
|
||||
// logger.error(`Error logging job event: ${error}`);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
@ -8,7 +8,6 @@ import { billTeam } from "../services/billing/credit_billing";
|
||||
import { Document } from "../controllers/v1/types";
|
||||
import { supabase_service } from "../services/supabase";
|
||||
import { logger as _logger } from "../lib/logger";
|
||||
import { ScrapeEvents } from "../lib/scrape-events";
|
||||
import { configDotenv } from "dotenv";
|
||||
import {
|
||||
EngineResultsTracker,
|
||||
@ -146,35 +145,35 @@ export async function runWebScraper({
|
||||
}
|
||||
}
|
||||
|
||||
const engineOrder = Object.entries(engines)
|
||||
.sort((a, b) => a[1].startedAt - b[1].startedAt)
|
||||
.map((x) => x[0]) as Engine[];
|
||||
// const engineOrder = Object.entries(engines)
|
||||
// .sort((a, b) => a[1].startedAt - b[1].startedAt)
|
||||
// .map((x) => x[0]) as Engine[];
|
||||
|
||||
for (const engine of engineOrder) {
|
||||
const result = engines[engine] as Exclude<
|
||||
EngineResultsTracker[Engine],
|
||||
undefined
|
||||
>;
|
||||
ScrapeEvents.insert(bull_job_id, {
|
||||
type: "scrape",
|
||||
url,
|
||||
method: engine,
|
||||
result: {
|
||||
success: result.state === "success",
|
||||
response_code:
|
||||
result.state === "success" ? result.result.statusCode : undefined,
|
||||
response_size:
|
||||
result.state === "success" ? result.result.html.length : undefined,
|
||||
error:
|
||||
result.state === "error"
|
||||
? result.error
|
||||
: result.state === "timeout"
|
||||
? "Timed out"
|
||||
: undefined,
|
||||
time_taken: result.finishedAt - result.startedAt,
|
||||
},
|
||||
});
|
||||
}
|
||||
// for (const engine of engineOrder) {
|
||||
// const result = engines[engine] as Exclude<
|
||||
// EngineResultsTracker[Engine],
|
||||
// undefined
|
||||
// >;
|
||||
// ScrapeEvents.insert(bull_job_id, {
|
||||
// type: "scrape",
|
||||
// url,
|
||||
// method: engine,
|
||||
// result: {
|
||||
// success: result.state === "success",
|
||||
// response_code:
|
||||
// result.state === "success" ? result.result.statusCode : undefined,
|
||||
// response_size:
|
||||
// result.state === "success" ? result.result.html.length : undefined,
|
||||
// error:
|
||||
// result.state === "error"
|
||||
// ? result.error
|
||||
// : result.state === "timeout"
|
||||
// ? "Timed out"
|
||||
// : undefined,
|
||||
// time_taken: result.finishedAt - result.startedAt,
|
||||
// },
|
||||
// });
|
||||
// }
|
||||
|
||||
if (error === undefined && response?.success) {
|
||||
return response;
|
||||
@ -228,7 +227,7 @@ const saveJob = async (
|
||||
// // I think the job won't exist here anymore
|
||||
// }
|
||||
}
|
||||
ScrapeEvents.logJobEvent(job, "completed");
|
||||
// ScrapeEvents.logJobEvent(job, "completed");
|
||||
} catch (error) {
|
||||
_logger.error(`🐂 Failed to update job status`, {
|
||||
module: "runWebScraper",
|
||||
|
Loading…
x
Reference in New Issue
Block a user