mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-16 06:25:56 +08:00
Nick:
This commit is contained in:
parent
5ecd9cb6f5
commit
32849b017f
@ -117,7 +117,7 @@ export async function scrapWithFireEngine({
|
||||
} finally {
|
||||
const endTime = Date.now();
|
||||
logParams.time_taken_seconds = (endTime - logParams.startTime) / 1000;
|
||||
await logScrape(logParams);
|
||||
await logScrape(logParams, pageOptions);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { ExtractorOptions } from './../../lib/entities';
|
||||
import { ExtractorOptions } from "./../../lib/entities";
|
||||
import { supabase_service } from "../supabase";
|
||||
import { FirecrawlJob } from "../../types";
|
||||
import { posthog } from "../posthog";
|
||||
@ -11,6 +11,16 @@ export async function logJob(job: FirecrawlJob) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Redact any pages that have an authorization header
|
||||
if (
|
||||
job.pageOptions &&
|
||||
job.pageOptions.headers &&
|
||||
job.pageOptions.headers["Authorization"]
|
||||
) {
|
||||
job.pageOptions.headers["Authorization"] = "REDACTED";
|
||||
job.docs = [{ content: "REDACTED DUE TO AUTHORIZATION HEADER", html: "REDACTED DUE TO AUTHORIZATION HEADER" }];
|
||||
}
|
||||
|
||||
const { data, error } = await supabase_service
|
||||
.from("firecrawl_jobs")
|
||||
.insert([
|
||||
@ -27,16 +37,15 @@ export async function logJob(job: FirecrawlJob) {
|
||||
page_options: job.pageOptions,
|
||||
origin: job.origin,
|
||||
extractor_options: job.extractor_options,
|
||||
num_tokens: job.num_tokens
|
||||
num_tokens: job.num_tokens,
|
||||
},
|
||||
]);
|
||||
|
||||
if (process.env.POSTHOG_API_KEY) {
|
||||
|
||||
let phLog = {
|
||||
distinctId: "from-api", //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
|
||||
...(job.team_id !== "preview" && {
|
||||
groups: { team: job.team_id }
|
||||
groups: { team: job.team_id },
|
||||
}), //* Identifying event on this team
|
||||
event: "job-logged",
|
||||
properties: {
|
||||
@ -51,9 +60,9 @@ export async function logJob(job: FirecrawlJob) {
|
||||
page_options: job.pageOptions,
|
||||
origin: job.origin,
|
||||
extractor_options: job.extractor_options,
|
||||
num_tokens: job.num_tokens
|
||||
num_tokens: job.num_tokens,
|
||||
},
|
||||
}
|
||||
};
|
||||
posthog.capture(phLog);
|
||||
}
|
||||
if (error) {
|
||||
|
@ -1,17 +1,27 @@
|
||||
import "dotenv/config";
|
||||
import { ScrapeLog } from "../../types";
|
||||
import { supabase_service } from "../supabase";
|
||||
import { PageOptions } from "../../lib/entities";
|
||||
|
||||
export async function logScrape(scrapeLog: ScrapeLog) {
|
||||
export async function logScrape(
|
||||
scrapeLog: ScrapeLog,
|
||||
pageOptions?: PageOptions
|
||||
) {
|
||||
try {
|
||||
// Only log jobs in production
|
||||
// if (process.env.ENV !== "production") {
|
||||
// return;
|
||||
// }
|
||||
// Redact any pages that have an authorization header
|
||||
if (
|
||||
pageOptions &&
|
||||
pageOptions.headers &&
|
||||
pageOptions.headers["Authorization"]
|
||||
) {
|
||||
scrapeLog.html = "REDACTED DUE TO AUTHORIZATION HEADER";
|
||||
}
|
||||
|
||||
const { data, error } = await supabase_service
|
||||
.from("scrape_logs")
|
||||
.insert([
|
||||
const { data, error } = await supabase_service.from("scrape_logs").insert([
|
||||
{
|
||||
url: scrapeLog.url,
|
||||
scraper: scrapeLog.scraper,
|
||||
|
Loading…
x
Reference in New Issue
Block a user