mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-04 20:20:40 +08:00
137 lines
4.0 KiB
TypeScript
137 lines
4.0 KiB
TypeScript
import request from "supertest";
|
|
import { configDotenv } from "dotenv";
|
|
import { ScrapeRequestInput } from "../../controllers/v1/types";
|
|
|
|
configDotenv();
|
|
const TEST_URL = "http://127.0.0.1:3002";
|
|
|
|
async function scrape(body: ScrapeRequestInput) {
|
|
return await request(TEST_URL)
|
|
.post("/v1/scrape")
|
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
|
.set("Content-Type", "application/json")
|
|
.send(body);
|
|
}
|
|
|
|
function expectScrapeToSucceed(response: Awaited<ReturnType<typeof scrape>>) {
|
|
expect(response.statusCode).toBe(200);
|
|
expect(response.body.success).toBe(true);
|
|
expect(typeof response.body.data).toBe("object");
|
|
}
|
|
|
|
describe("Scrape tests", () => {
|
|
it("mocking works properly", async () => {
|
|
// depends on falsified mock mocking-works-properly
|
|
// this test will fail if mock is bypassed with real data -- firecrawl.dev will never have
|
|
// that as its actual markdown output
|
|
|
|
const response = await scrape({
|
|
url: "http://firecrawl.dev",
|
|
useMock: "mocking-works-properly",
|
|
});
|
|
|
|
expectScrapeToSucceed(response);
|
|
expect(response.body.data.markdown).toBe(
|
|
"this is fake data coming from the mocking system!",
|
|
);
|
|
}, 10000);
|
|
|
|
describe("Ad blocking (f-e dependant)", () => {
|
|
it.concurrent("blocks ads by default", async () => {
|
|
const response = await scrape({
|
|
url: "https://canyoublockit.com/testing/",
|
|
});
|
|
|
|
expectScrapeToSucceed(response);
|
|
expect(response.body.data.markdown).not.toContain(".g.doubleclick.net/");
|
|
}, 10000);
|
|
|
|
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
|
const response = await scrape({
|
|
url: "https://canyoublockit.com/testing/",
|
|
blockAds: false,
|
|
});
|
|
|
|
expectScrapeToSucceed(response);
|
|
expect(response.body.data.markdown).toContain(".g.doubleclick.net/");
|
|
}, 10000);
|
|
});
|
|
|
|
describe("Location API (f-e dependant)", () => {
|
|
it.concurrent("works without specifying an explicit location", async () => {
|
|
const response = await scrape({
|
|
url: "https://iplocation.com",
|
|
});
|
|
|
|
expectScrapeToSucceed(response);
|
|
}, 10000);
|
|
|
|
it.concurrent("works with country US", async () => {
|
|
const response = await scrape({
|
|
url: "https://iplocation.com",
|
|
location: { country: "US" },
|
|
});
|
|
|
|
expectScrapeToSucceed(response);
|
|
expect(response.body.data.markdown).toContain("| Country | United States |");
|
|
}, 10000);
|
|
});
|
|
|
|
describe("JSON scrape support", () => {
|
|
it.concurrent("returns parseable JSON", async () => {
|
|
const response = await scrape({
|
|
url: "https://jsonplaceholder.typicode.com/todos/1",
|
|
formats: ["rawHtml"],
|
|
});
|
|
|
|
expectScrapeToSucceed(response);
|
|
const obj = JSON.parse(response.body.data.rawHtml);
|
|
expect(obj.id).toBe(1);
|
|
}, 25000); // TODO: mock and shorten
|
|
});
|
|
|
|
describe("Screenshot", () => {
|
|
it.concurrent("screenshot format works", async () => {
|
|
const response = await scrape({
|
|
url: "http://firecrawl.dev",
|
|
formats: ["screenshot"]
|
|
});
|
|
|
|
expectScrapeToSucceed(response);
|
|
expect(typeof response.body.data.screenshot).toBe("string");
|
|
}, 15000);
|
|
|
|
it.concurrent("screenshot@fullPage format works", async () => {
|
|
const response = await scrape({
|
|
url: "http://firecrawl.dev",
|
|
formats: ["screenshot@fullPage"]
|
|
});
|
|
|
|
expectScrapeToSucceed(response);
|
|
expect(typeof response.body.data.screenshot).toBe("string");
|
|
}, 15000);
|
|
});
|
|
|
|
describe("Proxy API (f-e dependant)", () => {
|
|
it.concurrent("undefined works", async () => {
|
|
await scrape({
|
|
url: "http://firecrawl.dev",
|
|
});
|
|
}, 15000);
|
|
|
|
it.concurrent("basic works", async () => {
|
|
await scrape({
|
|
url: "http://firecrawl.dev",
|
|
proxy: "basic",
|
|
});
|
|
}, 15000);
|
|
|
|
it.concurrent("stealth works", async () => {
|
|
await scrape({
|
|
url: "http://firecrawl.dev",
|
|
proxy: "stealth",
|
|
});
|
|
}, 15000);
|
|
});
|
|
});
|