mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 06:28:59 +08:00
Testing improvements (FIR-2209) (#1623)
* yeet ad blocking tests until further notice * feat: re-enable billing tests * more timeout * cache issues with billing test * weird thing * fix(api/tests/scrape/status): propagation time * stupid * no log * sws
This commit is contained in:
parent
e297cf8a0d
commit
406d696667
@ -1,197 +1,211 @@
|
|||||||
// import { batchScrape, crawl, creditUsage, extract, map, scrape, search, tokenUsage } from "./lib";
|
import { batchScrape, crawl, creditUsage, extract, map, scrape, search, tokenUsage } from "./lib";
|
||||||
|
|
||||||
// const sleep = (ms: number) => new Promise(x => setTimeout(() => x(true), ms));
|
const sleep = (ms: number) => new Promise(x => setTimeout(() => x(true), ms));
|
||||||
// const sleepForBatchBilling = () => sleep(20000);
|
const sleepForBatchBilling = () => sleep(40000);
|
||||||
|
|
||||||
// beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
// // Wait for previous test runs to stop billing processing
|
// Wait for previous test runs to stop billing processing
|
||||||
// if (!process.env.TEST_SUITE_SELF_HOSTED) {
|
if (!process.env.TEST_SUITE_SELF_HOSTED) {
|
||||||
// await sleep(40000);
|
await sleep(40000);
|
||||||
// }
|
}
|
||||||
// }, 50000);
|
}, 50000);
|
||||||
|
|
||||||
// describe("Billing tests", () => {
|
describe("Billing tests", () => {
|
||||||
// if (process.env.TEST_SUITE_SELF_HOSTED) {
|
if (process.env.TEST_SUITE_SELF_HOSTED) {
|
||||||
// it("dummy", () => {
|
it("dummy", () => {
|
||||||
// expect(true).toBe(true);
|
expect(true).toBe(true);
|
||||||
// });
|
});
|
||||||
// } else {
|
} else {
|
||||||
// it("bills scrape correctly", async () => {
|
it("bills scrape correctly", async () => {
|
||||||
// const rc1 = (await creditUsage()).remaining_credits;
|
const rc1 = (await creditUsage()).remaining_credits;
|
||||||
|
|
||||||
// // Run all scrape operations in parallel with Promise.all
|
// Run all scrape operations in parallel with Promise.all
|
||||||
// await Promise.all([
|
await Promise.all([
|
||||||
// // scrape 1: regular fc.dev scrape (1 credit)
|
// scrape 1: regular fc.dev scrape (1 credit)
|
||||||
// scrape({
|
scrape({
|
||||||
// url: "https://firecrawl.dev"
|
url: "https://firecrawl.dev"
|
||||||
// }),
|
}),
|
||||||
|
|
||||||
// // scrape 1.1: regular fc.dev scrape (1 credit)
|
// scrape 1.1: regular fc.dev scrape (1 credit)
|
||||||
// scrape({
|
scrape({
|
||||||
// url: "https://firecrawl.dev"
|
url: "https://firecrawl.dev"
|
||||||
// }),
|
}),
|
||||||
|
|
||||||
// // scrape 2: fc.dev with json (5 credits)
|
// scrape 2: fc.dev with json (5 credits)
|
||||||
// scrape({
|
scrape({
|
||||||
// url: "https://firecrawl.dev",
|
url: "https://firecrawl.dev",
|
||||||
// formats: ["json"],
|
formats: ["json"],
|
||||||
// jsonOptions: {
|
jsonOptions: {
|
||||||
// schema: {
|
schema: {
|
||||||
// type: "object",
|
type: "object",
|
||||||
// properties: {
|
properties: {
|
||||||
// is_open_source: { type: "boolean" },
|
is_open_source: { type: "boolean" },
|
||||||
// },
|
},
|
||||||
// required: ["is_open_source"],
|
required: ["is_open_source"],
|
||||||
// },
|
},
|
||||||
// },
|
},
|
||||||
// })
|
})
|
||||||
// ]);
|
]);
|
||||||
|
|
||||||
// // sum: 7 credits
|
// sum: 7 credits
|
||||||
|
|
||||||
// await sleepForBatchBilling();
|
await sleepForBatchBilling();
|
||||||
|
|
||||||
// const rc2 = (await creditUsage()).remaining_credits;
|
const rc2 = (await creditUsage()).remaining_credits;
|
||||||
|
|
||||||
// expect(rc1 - rc2).toBe(7);
|
expect(rc1 - rc2).toBe(7);
|
||||||
// }, 120000);
|
}, 120000);
|
||||||
|
|
||||||
// it("bills batch scrape correctly", async () => {
|
it("bills batch scrape correctly", async () => {
|
||||||
// const rc1 = (await creditUsage()).remaining_credits;
|
const rc1 = (await creditUsage()).remaining_credits;
|
||||||
|
|
||||||
// // Run both scrape operations in parallel with Promise.all
|
// Run both scrape operations in parallel with Promise.all
|
||||||
// const [scrape1, scrape2] = await Promise.all([
|
const [scrape1, scrape2] = await Promise.all([
|
||||||
// // scrape 1: regular batch scrape with failing domain (2 credits)
|
// scrape 1: regular batch scrape with failing domain (2 credits)
|
||||||
// batchScrape({
|
batchScrape({
|
||||||
// urls: [
|
urls: [
|
||||||
// "https://firecrawl.dev",
|
"https://firecrawl.dev",
|
||||||
// "https://mendable.ai",
|
"https://mendable.ai",
|
||||||
// "https://thisdomaindoesnotexistandwillfail.fcr",
|
"https://thisdomaindoesnotexistandwillfail.fcr",
|
||||||
// ],
|
],
|
||||||
// }),
|
}),
|
||||||
|
|
||||||
// // scrape 2: batch scrape with json (10 credits)
|
// scrape 2: batch scrape with json (10 credits)
|
||||||
// batchScrape({
|
batchScrape({
|
||||||
// urls: [
|
urls: [
|
||||||
// "https://firecrawl.dev",
|
"https://firecrawl.dev",
|
||||||
// "https://mendable.ai",
|
"https://mendable.ai",
|
||||||
// "https://thisdomaindoesnotexistandwillfail.fcr",
|
"https://thisdomaindoesnotexistandwillfail.fcr",
|
||||||
// ],
|
],
|
||||||
// formats: ["json"],
|
formats: ["json"],
|
||||||
// jsonOptions: {
|
jsonOptions: {
|
||||||
// schema: {
|
schema: {
|
||||||
// type: "object",
|
type: "object",
|
||||||
// properties: {
|
properties: {
|
||||||
// four_word_summary: { type: "string" },
|
four_word_summary: { type: "string" },
|
||||||
// },
|
},
|
||||||
// required: ["four_word_summary"],
|
required: ["four_word_summary"],
|
||||||
// },
|
},
|
||||||
// },
|
},
|
||||||
// })
|
})
|
||||||
// ]);
|
]);
|
||||||
|
|
||||||
// // sum: 12 credits
|
// sum: 12 credits
|
||||||
|
|
||||||
// await sleepForBatchBilling();
|
await sleepForBatchBilling();
|
||||||
|
|
||||||
// const rc2 = (await creditUsage()).remaining_credits;
|
const rc2 = (await creditUsage()).remaining_credits;
|
||||||
|
|
||||||
// expect(rc1 - rc2).toBe(12);
|
expect(rc1 - rc2).toBe(12);
|
||||||
// }, 600000);
|
}, 600000);
|
||||||
|
|
||||||
// it("bills crawl correctly", async () => {
|
it("bills crawl correctly", async () => {
|
||||||
// const rc1 = (await creditUsage()).remaining_credits;
|
const rc1 = (await creditUsage()).remaining_credits;
|
||||||
|
|
||||||
// // Run both crawl operations in parallel with Promise.all
|
// Run both crawl operations in parallel with Promise.all
|
||||||
// const [crawl1, crawl2] = await Promise.all([
|
const [crawl1, crawl2] = await Promise.all([
|
||||||
// // crawl 1: regular fc.dev crawl (x credits)
|
// crawl 1: regular fc.dev crawl (x credits)
|
||||||
// crawl({
|
crawl({
|
||||||
// url: "https://firecrawl.dev",
|
url: "https://firecrawl.dev",
|
||||||
// }),
|
limit: 10,
|
||||||
|
}),
|
||||||
|
|
||||||
// // crawl 2: fc.dev crawl with json (5y credits)
|
// crawl 2: fc.dev crawl with json (5y credits)
|
||||||
// crawl({
|
crawl({
|
||||||
// url: "https://firecrawl.dev",
|
url: "https://firecrawl.dev",
|
||||||
// scrapeOptions: {
|
scrapeOptions: {
|
||||||
// formats: ["json"],
|
formats: ["json"],
|
||||||
// jsonOptions: {
|
jsonOptions: {
|
||||||
// schema: {
|
schema: {
|
||||||
// type: "object",
|
type: "object",
|
||||||
// properties: {
|
properties: {
|
||||||
// four_word_summary: { type: "string" },
|
four_word_summary: { type: "string" },
|
||||||
// },
|
},
|
||||||
// required: ["four_word_summary"],
|
required: ["four_word_summary"],
|
||||||
// },
|
},
|
||||||
// },
|
},
|
||||||
// }
|
},
|
||||||
// })
|
limit: 10,
|
||||||
// ]);
|
})
|
||||||
|
]);
|
||||||
|
|
||||||
// expect(crawl1.success).toBe(true);
|
expect(crawl1.success).toBe(true);
|
||||||
// expect(crawl2.success).toBe(true);
|
expect(crawl2.success).toBe(true);
|
||||||
|
|
||||||
// // sum: x+5y credits
|
// sum: x+5y credits
|
||||||
|
|
||||||
// await sleepForBatchBilling();
|
await sleepForBatchBilling();
|
||||||
|
|
||||||
// const rc2 = (await creditUsage()).remaining_credits;
|
const rc2 = (await creditUsage()).remaining_credits;
|
||||||
|
|
||||||
// if (crawl1.success && crawl2.success) {
|
if (crawl1.success && crawl2.success) {
|
||||||
// expect(rc1 - rc2).toBe(crawl1.completed + crawl2.completed * 5);
|
expect(rc1 - rc2).toBe(crawl1.completed + crawl2.completed * 5);
|
||||||
// }
|
}
|
||||||
// }, 600000);
|
}, 600000);
|
||||||
|
|
||||||
// it("bills map correctly", async () => {
|
it("bills map correctly", async () => {
|
||||||
// const rc1 = (await creditUsage()).remaining_credits;
|
const rc1 = (await creditUsage()).remaining_credits;
|
||||||
// await map({ url: "https://firecrawl.dev" });
|
await map({ url: "https://firecrawl.dev" });
|
||||||
// await sleepForBatchBilling();
|
await sleepForBatchBilling();
|
||||||
// const rc2 = (await creditUsage()).remaining_credits;
|
const rc2 = (await creditUsage()).remaining_credits;
|
||||||
// expect(rc1 - rc2).toBe(1);
|
expect(rc1 - rc2).toBe(1);
|
||||||
// }, 60000);
|
}, 60000);
|
||||||
|
|
||||||
// it("bills search correctly", async () => {
|
it("bills search correctly", async () => {
|
||||||
// const rc1 = (await creditUsage()).remaining_credits;
|
const rc1 = (await creditUsage()).remaining_credits;
|
||||||
|
|
||||||
// const results = await search({
|
const results = await search({
|
||||||
// query: "firecrawl"
|
query: "firecrawl"
|
||||||
// });
|
});
|
||||||
|
|
||||||
// await sleepForBatchBilling();
|
await sleepForBatchBilling();
|
||||||
|
|
||||||
// const rc2 = (await creditUsage()).remaining_credits;
|
const rc2 = (await creditUsage()).remaining_credits;
|
||||||
|
|
||||||
// expect(rc1 - rc2).toBe(results.length);
|
expect(rc1 - rc2).toBe(results.length);
|
||||||
// }, 60000);
|
}, 60000);
|
||||||
|
|
||||||
// it("bills extract correctly", async () => {
|
it("bills search with scrape correctly", async () => {
|
||||||
// const rc1 = (await tokenUsage()).remaining_tokens;
|
const rc1 = (await creditUsage()).remaining_credits;
|
||||||
|
|
||||||
// await extract({
|
const results = await search({
|
||||||
// urls: ["https://firecrawl.dev"],
|
query: "firecrawl",
|
||||||
// schema: {
|
scrapeOptions: {
|
||||||
// "type": "object",
|
formats: ["markdown"],
|
||||||
// "properties": {
|
},
|
||||||
// "is_open_source": {
|
});
|
||||||
// "type": "boolean"
|
|
||||||
// }
|
|
||||||
// },
|
|
||||||
// "required": [
|
|
||||||
// "is_open_source"
|
|
||||||
// ]
|
|
||||||
// },
|
|
||||||
// origin: "api-sdk",
|
|
||||||
// });
|
|
||||||
|
|
||||||
// await sleepForBatchBilling();
|
await sleepForBatchBilling();
|
||||||
|
|
||||||
// const rc2 = (await tokenUsage()).remaining_tokens;
|
const rc2 = (await creditUsage()).remaining_credits;
|
||||||
|
|
||||||
// expect(rc1 - rc2).toBe(305);
|
expect(rc1 - rc2).toBe(results.length);
|
||||||
// }, 300000);
|
}, 600000);
|
||||||
// }
|
|
||||||
// });
|
|
||||||
|
|
||||||
// temporarily disabled
|
it("bills extract correctly", async () => {
|
||||||
it("is mocked", () => {
|
const rc1 = (await tokenUsage()).remaining_tokens;
|
||||||
expect(true).toBe(true);
|
|
||||||
|
await extract({
|
||||||
|
urls: ["https://firecrawl.dev"],
|
||||||
|
schema: {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"is_open_source": {
|
||||||
|
"type": "boolean"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"is_open_source"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
origin: "api-sdk",
|
||||||
|
});
|
||||||
|
|
||||||
|
await sleepForBatchBilling();
|
||||||
|
|
||||||
|
const rc2 = (await tokenUsage()).remaining_tokens;
|
||||||
|
|
||||||
|
expect(rc1 - rc2).toBe(305);
|
||||||
|
}, 300000);
|
||||||
|
}
|
||||||
});
|
});
|
@ -73,28 +73,31 @@ describe("Scrape tests", () => {
|
|||||||
|
|
||||||
expect(response.markdown).toContain("Firecrawl");
|
expect(response.markdown).toContain("Firecrawl");
|
||||||
|
|
||||||
|
// Give time to propagate to read replica
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
|
||||||
const status = await scrapeStatus(response.metadata.scrapeId!);
|
const status = await scrapeStatus(response.metadata.scrapeId!);
|
||||||
expect(JSON.stringify(status)).toBe(JSON.stringify(response));
|
expect(JSON.stringify(status)).toBe(JSON.stringify(response));
|
||||||
}, 60000);
|
}, 60000);
|
||||||
|
|
||||||
describe("Ad blocking (f-e dependant)", () => {
|
// describe("Ad blocking (f-e dependant)", () => {
|
||||||
it.concurrent("blocks ads by default", async () => {
|
// it.concurrent("blocks ads by default", async () => {
|
||||||
const response = await scrape({
|
// const response = await scrape({
|
||||||
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
// url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||||
});
|
// });
|
||||||
|
|
||||||
expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
// expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||||
}, 30000);
|
// }, 30000);
|
||||||
|
|
||||||
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
// it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||||
const response = await scrape({
|
// const response = await scrape({
|
||||||
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
// url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||||
blockAds: false,
|
// blockAds: false,
|
||||||
});
|
// });
|
||||||
|
|
||||||
expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
|
// expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
|
||||||
}, 30000);
|
// }, 30000);
|
||||||
});
|
// });
|
||||||
|
|
||||||
describe("Change Tracking format", () => {
|
describe("Change Tracking format", () => {
|
||||||
it.concurrent("works", async () => {
|
it.concurrent("works", async () => {
|
||||||
|
@ -2,6 +2,7 @@ import { Request, Response } from "express";
|
|||||||
import { RequestWithAuth } from "./types";
|
import { RequestWithAuth } from "./types";
|
||||||
import { getACUCTeam } from "../auth";
|
import { getACUCTeam } from "../auth";
|
||||||
import { logger } from "../../lib/logger";
|
import { logger } from "../../lib/logger";
|
||||||
|
import { RateLimiterMode } from "../../types";
|
||||||
|
|
||||||
export async function creditUsageController(
|
export async function creditUsageController(
|
||||||
req: RequestWithAuth,
|
req: RequestWithAuth,
|
||||||
@ -20,7 +21,7 @@ export async function creditUsageController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise fetch fresh data
|
// Otherwise fetch fresh data
|
||||||
const chunk = await getACUCTeam(req.auth.team_id);
|
const chunk = await getACUCTeam(req.auth.team_id, false, false, RateLimiterMode.Scrape);
|
||||||
if (!chunk) {
|
if (!chunk) {
|
||||||
res.status(404).json({
|
res.status(404).json({
|
||||||
success: false,
|
success: false,
|
||||||
|
@ -42,7 +42,7 @@ export async function scrapeController(
|
|||||||
});
|
});
|
||||||
//
|
//
|
||||||
|
|
||||||
const isDirectToBullMQ = process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
|
const isDirectToBullMQ = process.env.SEARCH_PREVIEW_TOKEN !== undefined && process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
|
||||||
|
|
||||||
await addScrapeJob(
|
await addScrapeJob(
|
||||||
{
|
{
|
||||||
|
@ -172,8 +172,7 @@ export async function searchController(
|
|||||||
};
|
};
|
||||||
const startTime = new Date().getTime();
|
const startTime = new Date().getTime();
|
||||||
const costTracking = new CostTracking();
|
const costTracking = new CostTracking();
|
||||||
const isSearchPreview =
|
const isSearchPreview = process.env.SEARCH_PREVIEW_TOKEN !== undefined && process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
|
||||||
process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
req.body = searchRequestSchema.parse(req.body);
|
req.body = searchRequestSchema.parse(req.body);
|
||||||
|
@ -21,7 +21,7 @@ export async function tokenUsageController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise fetch fresh data
|
// Otherwise fetch fresh data
|
||||||
const chunk = await getACUCTeam(req.auth.team_id, false, true, RateLimiterMode.Extract);
|
const chunk = await getACUCTeam(req.auth.team_id, false, false, RateLimiterMode.Extract);
|
||||||
if (!chunk) {
|
if (!chunk) {
|
||||||
res.status(404).json({
|
res.status(404).json({
|
||||||
success: false,
|
success: false,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user