mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-20 19:59:12 +08:00
Merge branch 'main' of https://github.com/mendableai/firecrawl
This commit is contained in:
commit
cf17479626
@ -315,5 +315,33 @@ describe("HTML Transformer", () => {
|
|||||||
expect(result).toContain("á é í ó ú ñ");
|
expect(result).toContain("á é í ó ú ñ");
|
||||||
expect(result).toContain("🎉 👍 🚀");
|
expect(result).toContain("🎉 👍 🚀");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should make all URLs absolute", async () => {
|
||||||
|
const options = {
|
||||||
|
html: `
|
||||||
|
<div>
|
||||||
|
<a href="https://example.com/fullurl">hi</a>
|
||||||
|
<a href="http://example.net/fullurl">hi</a>
|
||||||
|
<a href="/pathurl">hi</a>
|
||||||
|
<a href="//example.net/proturl">hi</a>
|
||||||
|
<a href="?queryurl">hi</a>
|
||||||
|
<a href="#hashurl">hi</a>
|
||||||
|
</div>
|
||||||
|
`,
|
||||||
|
url: "https://example.com",
|
||||||
|
include_tags: [],
|
||||||
|
exclude_tags: [],
|
||||||
|
only_main_content: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await transformHtml(options);
|
||||||
|
console.log(result)
|
||||||
|
expect(result).toContain("https://example.com/fullurl");
|
||||||
|
expect(result).toContain("http://example.net/fullurl");
|
||||||
|
expect(result).toContain("https://example.com/pathurl");
|
||||||
|
expect(result).toContain("https://example.net/proturl");
|
||||||
|
expect(result).toContain("https://example.com/?queryurl");
|
||||||
|
expect(result).toContain("https://example.com/#hashurl");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -48,7 +48,7 @@ export async function supaBillTeam(
|
|||||||
credits,
|
credits,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (team_id === "preview") {
|
if (team_id === "preview" || team_id.startsWith("preview_")) {
|
||||||
return { success: true, message: "Preview team, no credits used" };
|
return { success: true, message: "Preview team, no credits used" };
|
||||||
}
|
}
|
||||||
_logger.info(`Billing team ${team_id} for ${credits} credits`);
|
_logger.info(`Billing team ${team_id} for ${credits} credits`);
|
||||||
@ -109,7 +109,7 @@ export async function supaCheckTeamCredits(
|
|||||||
credits: number,
|
credits: number,
|
||||||
): Promise<CheckTeamCreditsResponse> {
|
): Promise<CheckTeamCreditsResponse> {
|
||||||
// WARNING: chunk will be null if team_id is preview -- do not perform operations on it under ANY circumstances - mogery
|
// WARNING: chunk will be null if team_id is preview -- do not perform operations on it under ANY circumstances - mogery
|
||||||
if (team_id === "preview") {
|
if (team_id === "preview" || team_id.startsWith("preview_")) {
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
message: "Preview team, no credits used",
|
message: "Preview team, no credits used",
|
||||||
|
@ -50,7 +50,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
|||||||
num_docs: job.num_docs,
|
num_docs: job.num_docs,
|
||||||
docs: cleanOfNull(job.docs),
|
docs: cleanOfNull(job.docs),
|
||||||
time_taken: job.time_taken,
|
time_taken: job.time_taken,
|
||||||
team_id: job.team_id === "preview" ? null : job.team_id,
|
team_id: (job.team_id === "preview" || job.team_id?.startsWith("preview_"))? null : job.team_id,
|
||||||
mode: job.mode,
|
mode: job.mode,
|
||||||
url: job.url,
|
url: job.url,
|
||||||
crawler_options: job.crawlerOptions,
|
crawler_options: job.crawlerOptions,
|
||||||
@ -112,7 +112,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
|||||||
if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
|
if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
|
||||||
let phLog = {
|
let phLog = {
|
||||||
distinctId: "from-api", //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
|
distinctId: "from-api", //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
|
||||||
...(job.team_id !== "preview" && {
|
...((job.team_id !== "preview" && !job.team_id?.startsWith("preview_")) && {
|
||||||
groups: { team: job.team_id },
|
groups: { team: job.team_id },
|
||||||
}), //* Identifying event on this team
|
}), //* Identifying event on this team
|
||||||
event: "job-logged",
|
event: "job-logged",
|
||||||
@ -121,7 +121,7 @@ export async function logJob(job: FirecrawlJob, force: boolean = false) {
|
|||||||
message: job.message,
|
message: job.message,
|
||||||
num_docs: job.num_docs,
|
num_docs: job.num_docs,
|
||||||
time_taken: job.time_taken,
|
time_taken: job.time_taken,
|
||||||
team_id: job.team_id === "preview" ? null : job.team_id,
|
team_id: (job.team_id === "preview" || job.team_id?.startsWith("preview_"))? null : job.team_id,
|
||||||
mode: job.mode,
|
mode: job.mode,
|
||||||
url: job.url,
|
url: job.url,
|
||||||
crawler_options: job.crawlerOptions,
|
crawler_options: job.crawlerOptions,
|
||||||
|
@ -86,7 +86,7 @@ export async function sendNotificationInternal(
|
|||||||
chunk: AuthCreditUsageChunk,
|
chunk: AuthCreditUsageChunk,
|
||||||
bypassRecentChecks: boolean = false,
|
bypassRecentChecks: boolean = false,
|
||||||
): Promise<{ success: boolean }> {
|
): Promise<{ success: boolean }> {
|
||||||
if (team_id === "preview") {
|
if (team_id === "preview" || team_id.startsWith("preview_")) {
|
||||||
return { success: true };
|
return { success: true };
|
||||||
}
|
}
|
||||||
return await redlock.using(
|
return await redlock.using(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user