mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 22:39:01 +08:00
fix: fix posthog, add dummy crawl DB items
This commit is contained in:
parent
8a5cad72f6
commit
846610681b
@ -107,6 +107,7 @@ export async function crawlController(req: Request, res: Response) {
|
||||
pageOptions,
|
||||
team_id,
|
||||
robots,
|
||||
createdAt: Date.now(),
|
||||
};
|
||||
|
||||
await saveCrawl(id, sc);
|
||||
|
@ -8,6 +8,7 @@ export type StoredCrawl = {
|
||||
team_id: string;
|
||||
robots?: string;
|
||||
cancelled?: boolean;
|
||||
createdAt: number;
|
||||
};
|
||||
|
||||
export async function saveCrawl(id: string, crawl: StoredCrawl) {
|
||||
|
@ -44,7 +44,7 @@ export async function logJob(job: FirecrawlJob) {
|
||||
},
|
||||
]);
|
||||
|
||||
if (process.env.POSTHOG_API_KEY) {
|
||||
if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
|
||||
let phLog = {
|
||||
distinctId: "from-api", //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
|
||||
...(job.team_id !== "preview" && {
|
||||
@ -65,7 +65,6 @@ export async function logJob(job: FirecrawlJob) {
|
||||
extractor_options: job.extractor_options,
|
||||
num_tokens: job.num_tokens,
|
||||
retry: job.retry,
|
||||
crawl_id: job.crawl_id,
|
||||
},
|
||||
};
|
||||
posthog.capture(phLog);
|
||||
|
@ -230,6 +230,21 @@ async function processJob(job: Job, token: string) {
|
||||
|
||||
const fullDocs = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
|
||||
|
||||
await logJob({
|
||||
job_id: job.data.crawl_id,
|
||||
success: jobStatus === "completed",
|
||||
message: message,
|
||||
num_docs: fullDocs.length,
|
||||
docs: [],
|
||||
time_taken: (Date.now() - sc.createdAt) / 1000,
|
||||
team_id: job.data.team_id,
|
||||
mode: "crawl",
|
||||
url: sc.originUrl,
|
||||
crawlerOptions: sc.crawlerOptions,
|
||||
pageOptions: sc.pageOptions,
|
||||
origin: job.data.origin,
|
||||
});
|
||||
|
||||
const data = {
|
||||
success: jobStatus !== "failed",
|
||||
result: {
|
||||
@ -281,9 +296,11 @@ async function processJob(job: Job, token: string) {
|
||||
error:
|
||||
"Something went wrong... Contact help@mendable.ai or try again." /* etc... */,
|
||||
};
|
||||
|
||||
if (job.data.mode === "crawl" || job.data.crawl_id) {
|
||||
await callWebhook(job.data.team_id, job.data.crawl_id ?? job.id as string, data);
|
||||
}
|
||||
|
||||
await logJob({
|
||||
job_id: job.id as string,
|
||||
success: false,
|
||||
@ -302,6 +319,28 @@ async function processJob(job: Job, token: string) {
|
||||
origin: job.data.origin,
|
||||
crawl_id: job.data.crawl_id,
|
||||
});
|
||||
|
||||
if (job.data.crawl_id) {
|
||||
const sc = await getCrawl(job.data.crawl_id);
|
||||
|
||||
await logJob({
|
||||
job_id: job.data.crawl_id,
|
||||
success: false,
|
||||
message:
|
||||
typeof error === "string"
|
||||
? error
|
||||
: error.message ?? "Something went wrong... Contact help@mendable.ai",
|
||||
num_docs: 0,
|
||||
docs: [],
|
||||
time_taken: 0,
|
||||
team_id: job.data.team_id,
|
||||
mode: "crawl",
|
||||
url: sc ? sc.originUrl : job.data.url,
|
||||
crawlerOptions: sc ? sc.crawlerOptions : job.data.crawlerOptions,
|
||||
pageOptions: sc ? sc.pageOptions : job.data.pageOptions,
|
||||
origin: job.data.origin,
|
||||
});
|
||||
}
|
||||
// done(null, data);
|
||||
return data;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user