mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 02:28:59 +08:00
Testing improvements (FIR-2209) (#1623)
* yeet ad blocking tests until further notice * feat: re-enable billing tests * more timeout * cache issues with billing test * weird thing * fix(api/tests/scrape/status): propagation time * stupid * no log * sws
This commit is contained in:
parent
e297cf8a0d
commit
406d696667
@ -1,197 +1,211 @@
|
||||
// import { batchScrape, crawl, creditUsage, extract, map, scrape, search, tokenUsage } from "./lib";
|
||||
import { batchScrape, crawl, creditUsage, extract, map, scrape, search, tokenUsage } from "./lib";
|
||||
|
||||
// const sleep = (ms: number) => new Promise(x => setTimeout(() => x(true), ms));
|
||||
// const sleepForBatchBilling = () => sleep(20000);
|
||||
const sleep = (ms: number) => new Promise(x => setTimeout(() => x(true), ms));
|
||||
const sleepForBatchBilling = () => sleep(40000);
|
||||
|
||||
// beforeAll(async () => {
|
||||
// // Wait for previous test runs to stop billing processing
|
||||
// if (!process.env.TEST_SUITE_SELF_HOSTED) {
|
||||
// await sleep(40000);
|
||||
// }
|
||||
// }, 50000);
|
||||
beforeAll(async () => {
|
||||
// Wait for previous test runs to stop billing processing
|
||||
if (!process.env.TEST_SUITE_SELF_HOSTED) {
|
||||
await sleep(40000);
|
||||
}
|
||||
}, 50000);
|
||||
|
||||
// describe("Billing tests", () => {
|
||||
// if (process.env.TEST_SUITE_SELF_HOSTED) {
|
||||
// it("dummy", () => {
|
||||
// expect(true).toBe(true);
|
||||
// });
|
||||
// } else {
|
||||
// it("bills scrape correctly", async () => {
|
||||
// const rc1 = (await creditUsage()).remaining_credits;
|
||||
describe("Billing tests", () => {
|
||||
if (process.env.TEST_SUITE_SELF_HOSTED) {
|
||||
it("dummy", () => {
|
||||
expect(true).toBe(true);
|
||||
});
|
||||
} else {
|
||||
it("bills scrape correctly", async () => {
|
||||
const rc1 = (await creditUsage()).remaining_credits;
|
||||
|
||||
// // Run all scrape operations in parallel with Promise.all
|
||||
// await Promise.all([
|
||||
// // scrape 1: regular fc.dev scrape (1 credit)
|
||||
// scrape({
|
||||
// url: "https://firecrawl.dev"
|
||||
// }),
|
||||
// Run all scrape operations in parallel with Promise.all
|
||||
await Promise.all([
|
||||
// scrape 1: regular fc.dev scrape (1 credit)
|
||||
scrape({
|
||||
url: "https://firecrawl.dev"
|
||||
}),
|
||||
|
||||
// // scrape 1.1: regular fc.dev scrape (1 credit)
|
||||
// scrape({
|
||||
// url: "https://firecrawl.dev"
|
||||
// }),
|
||||
// scrape 1.1: regular fc.dev scrape (1 credit)
|
||||
scrape({
|
||||
url: "https://firecrawl.dev"
|
||||
}),
|
||||
|
||||
// // scrape 2: fc.dev with json (5 credits)
|
||||
// scrape({
|
||||
// url: "https://firecrawl.dev",
|
||||
// formats: ["json"],
|
||||
// jsonOptions: {
|
||||
// schema: {
|
||||
// type: "object",
|
||||
// properties: {
|
||||
// is_open_source: { type: "boolean" },
|
||||
// },
|
||||
// required: ["is_open_source"],
|
||||
// },
|
||||
// },
|
||||
// })
|
||||
// ]);
|
||||
// scrape 2: fc.dev with json (5 credits)
|
||||
scrape({
|
||||
url: "https://firecrawl.dev",
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
is_open_source: { type: "boolean" },
|
||||
},
|
||||
required: ["is_open_source"],
|
||||
},
|
||||
},
|
||||
})
|
||||
]);
|
||||
|
||||
// // sum: 7 credits
|
||||
// sum: 7 credits
|
||||
|
||||
// await sleepForBatchBilling();
|
||||
await sleepForBatchBilling();
|
||||
|
||||
// const rc2 = (await creditUsage()).remaining_credits;
|
||||
const rc2 = (await creditUsage()).remaining_credits;
|
||||
|
||||
// expect(rc1 - rc2).toBe(7);
|
||||
// }, 120000);
|
||||
expect(rc1 - rc2).toBe(7);
|
||||
}, 120000);
|
||||
|
||||
// it("bills batch scrape correctly", async () => {
|
||||
// const rc1 = (await creditUsage()).remaining_credits;
|
||||
it("bills batch scrape correctly", async () => {
|
||||
const rc1 = (await creditUsage()).remaining_credits;
|
||||
|
||||
// // Run both scrape operations in parallel with Promise.all
|
||||
// const [scrape1, scrape2] = await Promise.all([
|
||||
// // scrape 1: regular batch scrape with failing domain (2 credits)
|
||||
// batchScrape({
|
||||
// urls: [
|
||||
// "https://firecrawl.dev",
|
||||
// "https://mendable.ai",
|
||||
// "https://thisdomaindoesnotexistandwillfail.fcr",
|
||||
// ],
|
||||
// }),
|
||||
// Run both scrape operations in parallel with Promise.all
|
||||
const [scrape1, scrape2] = await Promise.all([
|
||||
// scrape 1: regular batch scrape with failing domain (2 credits)
|
||||
batchScrape({
|
||||
urls: [
|
||||
"https://firecrawl.dev",
|
||||
"https://mendable.ai",
|
||||
"https://thisdomaindoesnotexistandwillfail.fcr",
|
||||
],
|
||||
}),
|
||||
|
||||
// // scrape 2: batch scrape with json (10 credits)
|
||||
// batchScrape({
|
||||
// urls: [
|
||||
// "https://firecrawl.dev",
|
||||
// "https://mendable.ai",
|
||||
// "https://thisdomaindoesnotexistandwillfail.fcr",
|
||||
// ],
|
||||
// formats: ["json"],
|
||||
// jsonOptions: {
|
||||
// schema: {
|
||||
// type: "object",
|
||||
// properties: {
|
||||
// four_word_summary: { type: "string" },
|
||||
// },
|
||||
// required: ["four_word_summary"],
|
||||
// },
|
||||
// },
|
||||
// })
|
||||
// ]);
|
||||
// scrape 2: batch scrape with json (10 credits)
|
||||
batchScrape({
|
||||
urls: [
|
||||
"https://firecrawl.dev",
|
||||
"https://mendable.ai",
|
||||
"https://thisdomaindoesnotexistandwillfail.fcr",
|
||||
],
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
four_word_summary: { type: "string" },
|
||||
},
|
||||
required: ["four_word_summary"],
|
||||
},
|
||||
},
|
||||
})
|
||||
]);
|
||||
|
||||
// // sum: 12 credits
|
||||
// sum: 12 credits
|
||||
|
||||
// await sleepForBatchBilling();
|
||||
await sleepForBatchBilling();
|
||||
|
||||
// const rc2 = (await creditUsage()).remaining_credits;
|
||||
const rc2 = (await creditUsage()).remaining_credits;
|
||||
|
||||
// expect(rc1 - rc2).toBe(12);
|
||||
// }, 600000);
|
||||
expect(rc1 - rc2).toBe(12);
|
||||
}, 600000);
|
||||
|
||||
// it("bills crawl correctly", async () => {
|
||||
// const rc1 = (await creditUsage()).remaining_credits;
|
||||
it("bills crawl correctly", async () => {
|
||||
const rc1 = (await creditUsage()).remaining_credits;
|
||||
|
||||
// // Run both crawl operations in parallel with Promise.all
|
||||
// const [crawl1, crawl2] = await Promise.all([
|
||||
// // crawl 1: regular fc.dev crawl (x credits)
|
||||
// crawl({
|
||||
// url: "https://firecrawl.dev",
|
||||
// }),
|
||||
// Run both crawl operations in parallel with Promise.all
|
||||
const [crawl1, crawl2] = await Promise.all([
|
||||
// crawl 1: regular fc.dev crawl (x credits)
|
||||
crawl({
|
||||
url: "https://firecrawl.dev",
|
||||
limit: 10,
|
||||
}),
|
||||
|
||||
// // crawl 2: fc.dev crawl with json (5y credits)
|
||||
// crawl({
|
||||
// url: "https://firecrawl.dev",
|
||||
// scrapeOptions: {
|
||||
// formats: ["json"],
|
||||
// jsonOptions: {
|
||||
// schema: {
|
||||
// type: "object",
|
||||
// properties: {
|
||||
// four_word_summary: { type: "string" },
|
||||
// },
|
||||
// required: ["four_word_summary"],
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
// })
|
||||
// ]);
|
||||
// crawl 2: fc.dev crawl with json (5y credits)
|
||||
crawl({
|
||||
url: "https://firecrawl.dev",
|
||||
scrapeOptions: {
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
four_word_summary: { type: "string" },
|
||||
},
|
||||
required: ["four_word_summary"],
|
||||
},
|
||||
},
|
||||
},
|
||||
limit: 10,
|
||||
})
|
||||
]);
|
||||
|
||||
// expect(crawl1.success).toBe(true);
|
||||
// expect(crawl2.success).toBe(true);
|
||||
expect(crawl1.success).toBe(true);
|
||||
expect(crawl2.success).toBe(true);
|
||||
|
||||
// // sum: x+5y credits
|
||||
// sum: x+5y credits
|
||||
|
||||
// await sleepForBatchBilling();
|
||||
await sleepForBatchBilling();
|
||||
|
||||
// const rc2 = (await creditUsage()).remaining_credits;
|
||||
const rc2 = (await creditUsage()).remaining_credits;
|
||||
|
||||
// if (crawl1.success && crawl2.success) {
|
||||
// expect(rc1 - rc2).toBe(crawl1.completed + crawl2.completed * 5);
|
||||
// }
|
||||
// }, 600000);
|
||||
if (crawl1.success && crawl2.success) {
|
||||
expect(rc1 - rc2).toBe(crawl1.completed + crawl2.completed * 5);
|
||||
}
|
||||
}, 600000);
|
||||
|
||||
// it("bills map correctly", async () => {
|
||||
// const rc1 = (await creditUsage()).remaining_credits;
|
||||
// await map({ url: "https://firecrawl.dev" });
|
||||
// await sleepForBatchBilling();
|
||||
// const rc2 = (await creditUsage()).remaining_credits;
|
||||
// expect(rc1 - rc2).toBe(1);
|
||||
// }, 60000);
|
||||
it("bills map correctly", async () => {
|
||||
const rc1 = (await creditUsage()).remaining_credits;
|
||||
await map({ url: "https://firecrawl.dev" });
|
||||
await sleepForBatchBilling();
|
||||
const rc2 = (await creditUsage()).remaining_credits;
|
||||
expect(rc1 - rc2).toBe(1);
|
||||
}, 60000);
|
||||
|
||||
// it("bills search correctly", async () => {
|
||||
// const rc1 = (await creditUsage()).remaining_credits;
|
||||
it("bills search correctly", async () => {
|
||||
const rc1 = (await creditUsage()).remaining_credits;
|
||||
|
||||
// const results = await search({
|
||||
// query: "firecrawl"
|
||||
// });
|
||||
const results = await search({
|
||||
query: "firecrawl"
|
||||
});
|
||||
|
||||
// await sleepForBatchBilling();
|
||||
await sleepForBatchBilling();
|
||||
|
||||
// const rc2 = (await creditUsage()).remaining_credits;
|
||||
const rc2 = (await creditUsage()).remaining_credits;
|
||||
|
||||
// expect(rc1 - rc2).toBe(results.length);
|
||||
// }, 60000);
|
||||
expect(rc1 - rc2).toBe(results.length);
|
||||
}, 60000);
|
||||
|
||||
// it("bills extract correctly", async () => {
|
||||
// const rc1 = (await tokenUsage()).remaining_tokens;
|
||||
it("bills search with scrape correctly", async () => {
|
||||
const rc1 = (await creditUsage()).remaining_credits;
|
||||
|
||||
const results = await search({
|
||||
query: "firecrawl",
|
||||
scrapeOptions: {
|
||||
formats: ["markdown"],
|
||||
},
|
||||
});
|
||||
|
||||
await sleepForBatchBilling();
|
||||
|
||||
const rc2 = (await creditUsage()).remaining_credits;
|
||||
|
||||
expect(rc1 - rc2).toBe(results.length);
|
||||
}, 600000);
|
||||
|
||||
it("bills extract correctly", async () => {
|
||||
const rc1 = (await tokenUsage()).remaining_tokens;
|
||||
|
||||
// await extract({
|
||||
// urls: ["https://firecrawl.dev"],
|
||||
// schema: {
|
||||
// "type": "object",
|
||||
// "properties": {
|
||||
// "is_open_source": {
|
||||
// "type": "boolean"
|
||||
// }
|
||||
// },
|
||||
// "required": [
|
||||
// "is_open_source"
|
||||
// ]
|
||||
// },
|
||||
// origin: "api-sdk",
|
||||
// });
|
||||
await extract({
|
||||
urls: ["https://firecrawl.dev"],
|
||||
schema: {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"is_open_source": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"is_open_source"
|
||||
]
|
||||
},
|
||||
origin: "api-sdk",
|
||||
});
|
||||
|
||||
// await sleepForBatchBilling();
|
||||
await sleepForBatchBilling();
|
||||
|
||||
// const rc2 = (await tokenUsage()).remaining_tokens;
|
||||
const rc2 = (await tokenUsage()).remaining_tokens;
|
||||
|
||||
// expect(rc1 - rc2).toBe(305);
|
||||
// }, 300000);
|
||||
// }
|
||||
// });
|
||||
|
||||
// temporarily disabled
|
||||
it("is mocked", () => {
|
||||
expect(true).toBe(true);
|
||||
});
|
||||
expect(rc1 - rc2).toBe(305);
|
||||
}, 300000);
|
||||
}
|
||||
});
|
||||
|
@ -72,29 +72,32 @@ describe("Scrape tests", () => {
|
||||
});
|
||||
|
||||
expect(response.markdown).toContain("Firecrawl");
|
||||
|
||||
// Give time to propagate to read replica
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
|
||||
const status = await scrapeStatus(response.metadata.scrapeId!);
|
||||
expect(JSON.stringify(status)).toBe(JSON.stringify(response));
|
||||
}, 60000);
|
||||
|
||||
describe("Ad blocking (f-e dependant)", () => {
|
||||
it.concurrent("blocks ads by default", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||
});
|
||||
// describe("Ad blocking (f-e dependant)", () => {
|
||||
// it.concurrent("blocks ads by default", async () => {
|
||||
// const response = await scrape({
|
||||
// url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||
// });
|
||||
|
||||
expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||
}, 30000);
|
||||
// expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||
// }, 30000);
|
||||
|
||||
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||
blockAds: false,
|
||||
});
|
||||
// it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||
// const response = await scrape({
|
||||
// url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||
// blockAds: false,
|
||||
// });
|
||||
|
||||
expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
|
||||
}, 30000);
|
||||
});
|
||||
// expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
|
||||
// }, 30000);
|
||||
// });
|
||||
|
||||
describe("Change Tracking format", () => {
|
||||
it.concurrent("works", async () => {
|
||||
|
@ -2,6 +2,7 @@ import { Request, Response } from "express";
|
||||
import { RequestWithAuth } from "./types";
|
||||
import { getACUCTeam } from "../auth";
|
||||
import { logger } from "../../lib/logger";
|
||||
import { RateLimiterMode } from "../../types";
|
||||
|
||||
export async function creditUsageController(
|
||||
req: RequestWithAuth,
|
||||
@ -20,7 +21,7 @@ export async function creditUsageController(
|
||||
}
|
||||
|
||||
// Otherwise fetch fresh data
|
||||
const chunk = await getACUCTeam(req.auth.team_id);
|
||||
const chunk = await getACUCTeam(req.auth.team_id, false, false, RateLimiterMode.Scrape);
|
||||
if (!chunk) {
|
||||
res.status(404).json({
|
||||
success: false,
|
||||
|
@ -42,7 +42,7 @@ export async function scrapeController(
|
||||
});
|
||||
//
|
||||
|
||||
const isDirectToBullMQ = process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
|
||||
const isDirectToBullMQ = process.env.SEARCH_PREVIEW_TOKEN !== undefined && process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
|
||||
|
||||
await addScrapeJob(
|
||||
{
|
||||
|
@ -172,8 +172,7 @@ export async function searchController(
|
||||
};
|
||||
const startTime = new Date().getTime();
|
||||
const costTracking = new CostTracking();
|
||||
const isSearchPreview =
|
||||
process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
|
||||
const isSearchPreview = process.env.SEARCH_PREVIEW_TOKEN !== undefined && process.env.SEARCH_PREVIEW_TOKEN === req.body.__searchPreviewToken;
|
||||
|
||||
try {
|
||||
req.body = searchRequestSchema.parse(req.body);
|
||||
|
@ -21,7 +21,7 @@ export async function tokenUsageController(
|
||||
}
|
||||
|
||||
// Otherwise fetch fresh data
|
||||
const chunk = await getACUCTeam(req.auth.team_id, false, true, RateLimiterMode.Extract);
|
||||
const chunk = await getACUCTeam(req.auth.team_id, false, false, RateLimiterMode.Extract);
|
||||
if (!chunk) {
|
||||
res.status(404).json({
|
||||
success: false,
|
||||
|
Loading…
x
Reference in New Issue
Block a user