fix: fix posthog, add dummy crawl DB items

This commit is contained in:
Gergő Móricz 2024-08-15 18:55:18 +02:00
parent 8a5cad72f6
commit 846610681b
4 changed files with 42 additions and 2 deletions

View File

@ -107,6 +107,7 @@ export async function crawlController(req: Request, res: Response) {
pageOptions,
team_id,
robots,
createdAt: Date.now(),
};
await saveCrawl(id, sc);

View File

@ -8,6 +8,7 @@ export type StoredCrawl = {
team_id: string;
robots?: string;
cancelled?: boolean;
createdAt: number;
};
export async function saveCrawl(id: string, crawl: StoredCrawl) {

View File

@ -44,7 +44,7 @@ export async function logJob(job: FirecrawlJob) {
},
]);
if (process.env.POSTHOG_API_KEY) {
if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
let phLog = {
distinctId: "from-api", //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
...(job.team_id !== "preview" && {
@ -65,7 +65,6 @@ export async function logJob(job: FirecrawlJob) {
extractor_options: job.extractor_options,
num_tokens: job.num_tokens,
retry: job.retry,
crawl_id: job.crawl_id,
},
};
posthog.capture(phLog);

View File

@ -230,6 +230,21 @@ async function processJob(job: Job, token: string) {
const fullDocs = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
await logJob({
job_id: job.data.crawl_id,
success: jobStatus === "completed",
message: message,
num_docs: fullDocs.length,
docs: [],
time_taken: (Date.now() - sc.createdAt) / 1000,
team_id: job.data.team_id,
mode: "crawl",
url: sc.originUrl,
crawlerOptions: sc.crawlerOptions,
pageOptions: sc.pageOptions,
origin: job.data.origin,
});
const data = {
success: jobStatus !== "failed",
result: {
@ -281,9 +296,11 @@ async function processJob(job: Job, token: string) {
error:
"Something went wrong... Contact help@mendable.ai or try again." /* etc... */,
};
if (job.data.mode === "crawl" || job.data.crawl_id) {
await callWebhook(job.data.team_id, job.data.crawl_id ?? job.id as string, data);
}
await logJob({
job_id: job.id as string,
success: false,
@ -302,6 +319,28 @@ async function processJob(job: Job, token: string) {
origin: job.data.origin,
crawl_id: job.data.crawl_id,
});
if (job.data.crawl_id) {
const sc = await getCrawl(job.data.crawl_id);
await logJob({
job_id: job.data.crawl_id,
success: false,
message:
typeof error === "string"
? error
: error.message ?? "Something went wrong... Contact help@mendable.ai",
num_docs: 0,
docs: [],
time_taken: 0,
team_id: job.data.team_id,
mode: "crawl",
url: sc ? sc.originUrl : job.data.url,
crawlerOptions: sc ? sc.crawlerOptions : job.data.crawlerOptions,
pageOptions: sc ? sc.pageOptions : job.data.pageOptions,
origin: job.data.origin,
});
}
// done(null, data);
return data;
}