mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 02:18:59 +08:00
fix: fix posthog, add dummy crawl DB items
This commit is contained in:
parent
8a5cad72f6
commit
846610681b
@ -107,6 +107,7 @@ export async function crawlController(req: Request, res: Response) {
|
|||||||
pageOptions,
|
pageOptions,
|
||||||
team_id,
|
team_id,
|
||||||
robots,
|
robots,
|
||||||
|
createdAt: Date.now(),
|
||||||
};
|
};
|
||||||
|
|
||||||
await saveCrawl(id, sc);
|
await saveCrawl(id, sc);
|
||||||
|
@ -8,6 +8,7 @@ export type StoredCrawl = {
|
|||||||
team_id: string;
|
team_id: string;
|
||||||
robots?: string;
|
robots?: string;
|
||||||
cancelled?: boolean;
|
cancelled?: boolean;
|
||||||
|
createdAt: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
export async function saveCrawl(id: string, crawl: StoredCrawl) {
|
export async function saveCrawl(id: string, crawl: StoredCrawl) {
|
||||||
|
@ -44,7 +44,7 @@ export async function logJob(job: FirecrawlJob) {
|
|||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
|
||||||
if (process.env.POSTHOG_API_KEY) {
|
if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
|
||||||
let phLog = {
|
let phLog = {
|
||||||
distinctId: "from-api", //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
|
distinctId: "from-api", //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
|
||||||
...(job.team_id !== "preview" && {
|
...(job.team_id !== "preview" && {
|
||||||
@ -65,7 +65,6 @@ export async function logJob(job: FirecrawlJob) {
|
|||||||
extractor_options: job.extractor_options,
|
extractor_options: job.extractor_options,
|
||||||
num_tokens: job.num_tokens,
|
num_tokens: job.num_tokens,
|
||||||
retry: job.retry,
|
retry: job.retry,
|
||||||
crawl_id: job.crawl_id,
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
posthog.capture(phLog);
|
posthog.capture(phLog);
|
||||||
|
@ -230,6 +230,21 @@ async function processJob(job: Job, token: string) {
|
|||||||
|
|
||||||
const fullDocs = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
|
const fullDocs = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
|
||||||
|
|
||||||
|
await logJob({
|
||||||
|
job_id: job.data.crawl_id,
|
||||||
|
success: jobStatus === "completed",
|
||||||
|
message: message,
|
||||||
|
num_docs: fullDocs.length,
|
||||||
|
docs: [],
|
||||||
|
time_taken: (Date.now() - sc.createdAt) / 1000,
|
||||||
|
team_id: job.data.team_id,
|
||||||
|
mode: "crawl",
|
||||||
|
url: sc.originUrl,
|
||||||
|
crawlerOptions: sc.crawlerOptions,
|
||||||
|
pageOptions: sc.pageOptions,
|
||||||
|
origin: job.data.origin,
|
||||||
|
});
|
||||||
|
|
||||||
const data = {
|
const data = {
|
||||||
success: jobStatus !== "failed",
|
success: jobStatus !== "failed",
|
||||||
result: {
|
result: {
|
||||||
@ -281,9 +296,11 @@ async function processJob(job: Job, token: string) {
|
|||||||
error:
|
error:
|
||||||
"Something went wrong... Contact help@mendable.ai or try again." /* etc... */,
|
"Something went wrong... Contact help@mendable.ai or try again." /* etc... */,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (job.data.mode === "crawl" || job.data.crawl_id) {
|
if (job.data.mode === "crawl" || job.data.crawl_id) {
|
||||||
await callWebhook(job.data.team_id, job.data.crawl_id ?? job.id as string, data);
|
await callWebhook(job.data.team_id, job.data.crawl_id ?? job.id as string, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
await logJob({
|
await logJob({
|
||||||
job_id: job.id as string,
|
job_id: job.id as string,
|
||||||
success: false,
|
success: false,
|
||||||
@ -302,6 +319,28 @@ async function processJob(job: Job, token: string) {
|
|||||||
origin: job.data.origin,
|
origin: job.data.origin,
|
||||||
crawl_id: job.data.crawl_id,
|
crawl_id: job.data.crawl_id,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (job.data.crawl_id) {
|
||||||
|
const sc = await getCrawl(job.data.crawl_id);
|
||||||
|
|
||||||
|
await logJob({
|
||||||
|
job_id: job.data.crawl_id,
|
||||||
|
success: false,
|
||||||
|
message:
|
||||||
|
typeof error === "string"
|
||||||
|
? error
|
||||||
|
: error.message ?? "Something went wrong... Contact help@mendable.ai",
|
||||||
|
num_docs: 0,
|
||||||
|
docs: [],
|
||||||
|
time_taken: 0,
|
||||||
|
team_id: job.data.team_id,
|
||||||
|
mode: "crawl",
|
||||||
|
url: sc ? sc.originUrl : job.data.url,
|
||||||
|
crawlerOptions: sc ? sc.crawlerOptions : job.data.crawlerOptions,
|
||||||
|
pageOptions: sc ? sc.pageOptions : job.data.pageOptions,
|
||||||
|
origin: job.data.origin,
|
||||||
|
});
|
||||||
|
}
|
||||||
// done(null, data);
|
// done(null, data);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user