mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-17 01:15:58 +08:00
Nick: all tests passing
This commit is contained in:
parent
5205c5f005
commit
ba5279eafc
@ -1,6 +1,10 @@
|
||||
import request from "supertest";
|
||||
import dotenv from "dotenv";
|
||||
import { FirecrawlCrawlResponse, FirecrawlCrawlStatusResponse, FirecrawlScrapeResponse } from "../../types";
|
||||
import {
|
||||
FirecrawlCrawlResponse,
|
||||
FirecrawlCrawlStatusResponse,
|
||||
FirecrawlScrapeResponse,
|
||||
} from "../../types";
|
||||
|
||||
dotenv.config();
|
||||
const TEST_URL = "http://127.0.0.1:3002";
|
||||
@ -24,20 +28,27 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
|
||||
describe("POST /v0/scrape", () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL).post("/v0/scrape");
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL).post(
|
||||
"/v0/scrape"
|
||||
);
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
it.concurrent(
|
||||
"should return an error response with an invalid API key",
|
||||
async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://firecrawl.dev" });
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key", async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response with a valid API key",
|
||||
async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -52,21 +63,36 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(response.body.data.content).toContain("_Roast_");
|
||||
expect(response.body.data.metadata.pageError).toBeUndefined();
|
||||
expect(response.body.data.metadata.title).toBe("Roast My Website");
|
||||
expect(response.body.data.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
||||
expect(response.body.data.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
|
||||
expect(response.body.data.metadata.description).toBe(
|
||||
"Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
|
||||
);
|
||||
expect(response.body.data.metadata.keywords).toBe(
|
||||
"Roast My Website,Roast,Website,GitHub,Firecrawl"
|
||||
);
|
||||
expect(response.body.data.metadata.robots).toBe("follow, index");
|
||||
expect(response.body.data.metadata.ogTitle).toBe("Roast My Website");
|
||||
expect(response.body.data.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
||||
expect(response.body.data.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
|
||||
expect(response.body.data.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
|
||||
expect(response.body.data.metadata.ogDescription).toBe(
|
||||
"Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
|
||||
);
|
||||
expect(response.body.data.metadata.ogUrl).toBe(
|
||||
"https://www.roastmywebsite.ai"
|
||||
);
|
||||
expect(response.body.data.metadata.ogImage).toBe(
|
||||
"https://www.roastmywebsite.ai/og.png"
|
||||
);
|
||||
expect(response.body.data.metadata.ogLocaleAlternate).toStrictEqual([]);
|
||||
expect(response.body.data.metadata.ogSiteName).toBe("Roast My Website");
|
||||
expect(response.body.data.metadata.sourceURL).toBe("https://roastmywebsite.ai");
|
||||
expect(response.body.data.metadata.sourceURL).toBe(
|
||||
"https://roastmywebsite.ai"
|
||||
);
|
||||
expect(response.body.data.metadata.pageStatusCode).toBe(200);
|
||||
}, 30000); // 30 seconds timeout
|
||||
},
|
||||
30000
|
||||
); // 30 seconds timeout
|
||||
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response with a valid API key and includeHtml set to true",
|
||||
async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -86,44 +112,61 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(response.body.data.html).toContain("<h1");
|
||||
expect(response.body.data.metadata.pageStatusCode).toBe(200);
|
||||
expect(response.body.data.metadata.pageError).toBeUndefined();
|
||||
}, 30000); // 30 seconds timeout
|
||||
},
|
||||
30000
|
||||
); // 30 seconds timeout
|
||||
|
||||
it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response for a valid scrape with PDF file",
|
||||
async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
.send({ url: 'https://arxiv.org/pdf/astro-ph/9301001.pdf' });
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://arxiv.org/pdf/astro-ph/9301001.pdf" });
|
||||
await new Promise((r) => setTimeout(r, 6000));
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty('data');
|
||||
expect(response.body.data).toHaveProperty('content');
|
||||
expect(response.body.data).toHaveProperty('metadata');
|
||||
expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("content");
|
||||
expect(response.body.data).toHaveProperty("metadata");
|
||||
expect(response.body.data.content).toContain(
|
||||
"We present spectrophotometric observations of the Broad Line Radio Galaxy"
|
||||
);
|
||||
expect(response.body.data.metadata.pageStatusCode).toBe(200);
|
||||
expect(response.body.data.metadata.pageError).toBeUndefined();
|
||||
}, 60000); // 60 seconds
|
||||
},
|
||||
60000
|
||||
); // 60 seconds
|
||||
|
||||
it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response for a valid scrape with PDF file without explicit .pdf extension",
|
||||
async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
.send({ url: 'https://arxiv.org/pdf/astro-ph/9301001' });
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://arxiv.org/pdf/astro-ph/9301001" });
|
||||
await new Promise((r) => setTimeout(r, 6000));
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty('data');
|
||||
expect(response.body.data).toHaveProperty('content');
|
||||
expect(response.body.data).toHaveProperty('metadata');
|
||||
expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("content");
|
||||
expect(response.body.data).toHaveProperty("metadata");
|
||||
expect(response.body.data.content).toContain(
|
||||
"We present spectrophotometric observations of the Broad Line Radio Galaxy"
|
||||
);
|
||||
expect(response.body.data.metadata.pageStatusCode).toBe(200);
|
||||
expect(response.body.data.metadata.pageError).toBeUndefined();
|
||||
}, 60000); // 60 seconds
|
||||
},
|
||||
60000
|
||||
); // 60 seconds
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key with removeTags option", async () => {
|
||||
const responseWithoutRemoveTags: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
it.concurrent(
|
||||
"should return a successful response with a valid API key with removeTags option",
|
||||
async () => {
|
||||
const responseWithoutRemoveTags: FirecrawlScrapeResponse =
|
||||
await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
@ -134,16 +177,27 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(responseWithoutRemoveTags.body.data).toHaveProperty("markdown");
|
||||
expect(responseWithoutRemoveTags.body.data).toHaveProperty("metadata");
|
||||
expect(responseWithoutRemoveTags.body.data).not.toHaveProperty("html");
|
||||
expect(responseWithoutRemoveTags.body.data.content).toContain("Scrape This Site");
|
||||
expect(responseWithoutRemoveTags.body.data.content).toContain("Lessons and Videos"); // #footer
|
||||
expect(responseWithoutRemoveTags.body.data.content).toContain("[Sandbox]("); // .nav
|
||||
expect(responseWithoutRemoveTags.body.data.content).toContain("web scraping"); // strong
|
||||
expect(responseWithoutRemoveTags.body.data.content).toContain(
|
||||
"Scrape This Site"
|
||||
);
|
||||
expect(responseWithoutRemoveTags.body.data.content).toContain(
|
||||
"Lessons and Videos"
|
||||
); // #footer
|
||||
expect(responseWithoutRemoveTags.body.data.content).toContain(
|
||||
"[Sandbox]("
|
||||
); // .nav
|
||||
expect(responseWithoutRemoveTags.body.data.content).toContain(
|
||||
"web scraping"
|
||||
); // strong
|
||||
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://www.scrapethissite.com/", pageOptions: { removeTags: ['.nav', '#footer', 'strong'] } });
|
||||
.send({
|
||||
url: "https://www.scrapethissite.com/",
|
||||
pageOptions: { removeTags: [".nav", "#footer", "strong"] },
|
||||
});
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("content");
|
||||
@ -154,118 +208,157 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(response.body.data.content).not.toContain("Lessons and Videos"); // #footer
|
||||
expect(response.body.data.content).not.toContain("[Sandbox]("); // .nav
|
||||
expect(response.body.data.content).not.toContain("web scraping"); // strong
|
||||
}, 30000); // 30 seconds timeout
|
||||
},
|
||||
30000
|
||||
); // 30 seconds timeout
|
||||
|
||||
it.concurrent('should return a successful response for a scrape with 400 page', async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response for a scrape with 400 page",
|
||||
async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
.send({ url: 'https://httpstat.us/400' });
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://httpstat.us/400" });
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty('data');
|
||||
expect(response.body.data).toHaveProperty('content');
|
||||
expect(response.body.data).toHaveProperty('metadata');
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("content");
|
||||
expect(response.body.data).toHaveProperty("metadata");
|
||||
expect(response.body.data.metadata.pageStatusCode).toBe(400);
|
||||
expect(response.body.data.metadata.pageError.toLowerCase()).toContain("bad request");
|
||||
}, 60000); // 60 seconds
|
||||
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
|
||||
"bad request"
|
||||
);
|
||||
},
|
||||
60000
|
||||
); // 60 seconds
|
||||
|
||||
it.concurrent('should return a successful response for a scrape with 401 page', async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response for a scrape with 401 page",
|
||||
async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
.send({ url: 'https://httpstat.us/401' });
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://httpstat.us/401" });
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty('data');
|
||||
expect(response.body.data).toHaveProperty('content');
|
||||
expect(response.body.data).toHaveProperty('metadata');
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("content");
|
||||
expect(response.body.data).toHaveProperty("metadata");
|
||||
expect(response.body.data.metadata.pageStatusCode).toBe(401);
|
||||
expect(response.body.data.metadata.pageError.toLowerCase()).toContain("unauthorized");
|
||||
}, 60000); // 60 seconds
|
||||
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
|
||||
"unauthorized"
|
||||
);
|
||||
},
|
||||
60000
|
||||
); // 60 seconds
|
||||
|
||||
it.concurrent("should return a successful response for a scrape with 403 page", async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response for a scrape with 403 page",
|
||||
async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
.send({ url: 'https://httpstat.us/403' });
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://httpstat.us/403" });
|
||||
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty('data');
|
||||
expect(response.body.data).toHaveProperty('content');
|
||||
expect(response.body.data).toHaveProperty('metadata');
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("content");
|
||||
expect(response.body.data).toHaveProperty("metadata");
|
||||
expect(response.body.data.metadata.pageStatusCode).toBe(403);
|
||||
expect(response.body.data.metadata.pageError.toLowerCase()).toContain("forbidden");
|
||||
}, 60000); // 60 seconds
|
||||
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
|
||||
"forbidden"
|
||||
);
|
||||
},
|
||||
60000
|
||||
); // 60 seconds
|
||||
|
||||
it.concurrent('should return a successful response for a scrape with 404 page', async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response for a scrape with 404 page",
|
||||
async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
.send({ url: 'https://httpstat.us/404' });
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://httpstat.us/404" });
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty('data');
|
||||
expect(response.body.data).toHaveProperty('content');
|
||||
expect(response.body.data).toHaveProperty('metadata');
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("content");
|
||||
expect(response.body.data).toHaveProperty("metadata");
|
||||
expect(response.body.data.metadata.pageStatusCode).toBe(404);
|
||||
}, 60000); // 60 seconds
|
||||
},
|
||||
60000
|
||||
); // 60 seconds
|
||||
|
||||
it.concurrent('should return a successful response for a scrape with 405 page', async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response for a scrape with 405 page",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
.send({ url: 'https://httpstat.us/405' });
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://httpstat.us/405" });
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty('data');
|
||||
expect(response.body.data).toHaveProperty('content');
|
||||
expect(response.body.data).toHaveProperty('metadata');
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("content");
|
||||
expect(response.body.data).toHaveProperty("metadata");
|
||||
expect(response.body.data.metadata.pageStatusCode).toBe(405);
|
||||
}, 60000); // 60 seconds
|
||||
},
|
||||
60000
|
||||
); // 60 seconds
|
||||
|
||||
it.concurrent('should return a successful response for a scrape with 500 page', async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response for a scrape with 500 page",
|
||||
async () => {
|
||||
const response: FirecrawlScrapeResponse = await request(TEST_URL)
|
||||
.post('/v0/scrape')
|
||||
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set('Content-Type', 'application/json')
|
||||
.send({ url: 'https://httpstat.us/500' });
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://httpstat.us/500" });
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty('data');
|
||||
expect(response.body.data).toHaveProperty('content');
|
||||
expect(response.body.data).toHaveProperty('metadata');
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(response.body.data).toHaveProperty("content");
|
||||
expect(response.body.data).toHaveProperty("metadata");
|
||||
expect(response.body.data.metadata.pageStatusCode).toBe(500);
|
||||
}, 60000); // 60 seconds
|
||||
},
|
||||
60000
|
||||
); // 60 seconds
|
||||
});
|
||||
|
||||
describe("POST /v0/crawl", () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL).post("/v0/crawl");
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL).post(
|
||||
"/v0/crawl"
|
||||
);
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
it.concurrent(
|
||||
"should return an error response with an invalid API key",
|
||||
async () => {
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://firecrawl.dev" });
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key for crawl", async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response with a valid API key for crawl",
|
||||
async () => {
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -276,9 +369,12 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(response.body.jobId).toMatch(
|
||||
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
||||
);
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key and valid includes option", async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response with a valid API key and valid includes option",
|
||||
async () => {
|
||||
const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -329,11 +425,19 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||
expect(completedResponse.body.data[0].content).toContain("Mendable");
|
||||
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
|
||||
expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
|
||||
}, 180000); // 180 seconds
|
||||
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
|
||||
200
|
||||
);
|
||||
expect(
|
||||
completedResponse.body.data[0].metadata.pageError
|
||||
).toBeUndefined();
|
||||
},
|
||||
180000
|
||||
); // 180 seconds
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response with a valid API key and valid excludes option",
|
||||
async () => {
|
||||
const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -364,7 +468,9 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
|
||||
const completedResponse: FirecrawlCrawlStatusResponse = await request(TEST_URL)
|
||||
const completedResponse: FirecrawlCrawlStatusResponse = await request(
|
||||
TEST_URL
|
||||
)
|
||||
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||
|
||||
@ -375,9 +481,13 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
urls.forEach((url: string) => {
|
||||
expect(url.startsWith("https://wwww.mendable.ai/blog/")).toBeFalsy();
|
||||
});
|
||||
}, 90000); // 90 seconds
|
||||
},
|
||||
90000
|
||||
); // 90 seconds
|
||||
|
||||
it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response with max depth option for a valid crawl job",
|
||||
async () => {
|
||||
const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -406,7 +516,9 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
|
||||
}
|
||||
}
|
||||
const completedResponse: FirecrawlCrawlStatusResponse = await request(TEST_URL)
|
||||
const completedResponse: FirecrawlCrawlStatusResponse = await request(
|
||||
TEST_URL
|
||||
)
|
||||
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||
|
||||
@ -417,8 +529,12 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("content");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
|
||||
expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
|
||||
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
|
||||
200
|
||||
);
|
||||
expect(
|
||||
completedResponse.body.data[0].metadata.pageError
|
||||
).toBeUndefined();
|
||||
const urls = completedResponse.body.data.map(
|
||||
(item: any) => item.metadata?.sourceURL
|
||||
);
|
||||
@ -426,29 +542,43 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
|
||||
// Check if all URLs have a maximum depth of 1
|
||||
urls.forEach((url: string) => {
|
||||
const pathSplits = new URL(url).pathname.split('/');
|
||||
const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0);
|
||||
const pathSplits = new URL(url).pathname.split("/");
|
||||
const depth =
|
||||
pathSplits.length -
|
||||
(pathSplits[0].length === 0 &&
|
||||
pathSplits[pathSplits.length - 1].length === 0
|
||||
? 1
|
||||
: 0);
|
||||
expect(depth).toBeLessThanOrEqual(2);
|
||||
});
|
||||
}, 180000);
|
||||
},
|
||||
180000
|
||||
);
|
||||
});
|
||||
|
||||
describe("POST /v0/crawlWebsitePreview", () => {
|
||||
it.concurrent("should require authorization", async () => {
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL).post("/v0/crawlWebsitePreview");
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL).post(
|
||||
"/v0/crawlWebsitePreview"
|
||||
);
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
it.concurrent(
|
||||
"should return an error response with an invalid API key",
|
||||
async () => {
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawlWebsitePreview")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ url: "https://firecrawl.dev" });
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
|
||||
it.concurrent(
|
||||
"should return a timeout error when scraping takes longer than the specified timeout",
|
||||
async () => {
|
||||
const response: FirecrawlCrawlResponse = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -456,7 +586,9 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
.send({ url: "https://firecrawl.dev", timeout: 1000 });
|
||||
|
||||
expect(response.statusCode).toBe(408);
|
||||
}, 3000);
|
||||
},
|
||||
3000
|
||||
);
|
||||
});
|
||||
|
||||
describe("POST /v0/search", () => {
|
||||
@ -465,16 +597,21 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
it.concurrent(
|
||||
"should return an error response with an invalid API key",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/search")
|
||||
.set("Authorization", `Bearer invalid-api-key`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({ query: "test" });
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key for search", async () => {
|
||||
it.concurrent(
|
||||
"should return a successful response with a valid API key for search",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/search")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -484,7 +621,9 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(response.body).toHaveProperty("success");
|
||||
expect(response.body.success).toBe(true);
|
||||
expect(response.body).toHaveProperty("data");
|
||||
}, 60000); // 60 seconds timeout
|
||||
},
|
||||
60000
|
||||
); // 60 seconds timeout
|
||||
});
|
||||
|
||||
describe("GET /v0/crawl/status/:jobId", () => {
|
||||
@ -493,21 +632,29 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
|
||||
it.concurrent("should return an error response with an invalid API key", async () => {
|
||||
it.concurrent(
|
||||
"should return an error response with an invalid API key",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.get("/v0/crawl/status/123")
|
||||
.set("Authorization", `Bearer invalid-api-key`);
|
||||
expect(response.statusCode).toBe(401);
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
it.concurrent("should return Job not found for invalid job ID", async () => {
|
||||
it.concurrent(
|
||||
"should return Job not found for invalid job ID",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.get("/v0/crawl/status/invalidJobId")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
|
||||
expect(response.statusCode).toBe(404);
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
it.concurrent("should return a successful crawl status response for a valid crawl job", async () => {
|
||||
it.concurrent(
|
||||
"should return a successful crawl status response for a valid crawl job",
|
||||
async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -543,15 +690,24 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
|
||||
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
|
||||
expect(completedResponse.body.data[0].content).toContain("Mendable");
|
||||
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
|
||||
expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
|
||||
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
|
||||
200
|
||||
);
|
||||
expect(
|
||||
completedResponse.body.data[0].metadata.pageError
|
||||
).toBeUndefined();
|
||||
|
||||
const childrenLinks = completedResponse.body.data.filter(doc =>
|
||||
doc.metadata && doc.metadata.sourceURL && doc.metadata.sourceURL.includes("mendable.ai/blog")
|
||||
const childrenLinks = completedResponse.body.data.filter(
|
||||
(doc) =>
|
||||
doc.metadata &&
|
||||
doc.metadata.sourceURL &&
|
||||
doc.metadata.sourceURL.includes("mendable.ai/blog")
|
||||
);
|
||||
|
||||
expect(childrenLinks.length).toBe(completedResponse.body.data.length);
|
||||
}, 180000); // 120 seconds
|
||||
},
|
||||
180000
|
||||
); // 120 seconds
|
||||
|
||||
// TODO: review the test below
|
||||
// it.concurrent('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension ', async () => {
|
||||
@ -599,7 +755,9 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
// expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
|
||||
// }, 180000); // 120 seconds
|
||||
|
||||
it.concurrent("If someone cancels a crawl job, it should turn into failed status", async () => {
|
||||
it.concurrent(
|
||||
"If someone cancels a crawl job, it should turn into failed status",
|
||||
async () => {
|
||||
const crawlResponse = await request(TEST_URL)
|
||||
.post("/v0/crawl")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -628,22 +786,39 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(completedResponse.body).toHaveProperty("data");
|
||||
|
||||
let isNullOrEmptyArray = false;
|
||||
if (completedResponse.body.data === null || completedResponse.body.data.length === 0) {
|
||||
if (
|
||||
completedResponse.body.data === null ||
|
||||
completedResponse.body.data.length === 0
|
||||
) {
|
||||
isNullOrEmptyArray = true;
|
||||
}
|
||||
expect(isNullOrEmptyArray).toBe(true);
|
||||
expect(completedResponse.body.data).toEqual(expect.arrayContaining([]));
|
||||
expect(completedResponse.body).toHaveProperty("partial_data");
|
||||
expect(completedResponse.body.partial_data[0]).toHaveProperty("content");
|
||||
expect(completedResponse.body.partial_data[0]).toHaveProperty("markdown");
|
||||
expect(completedResponse.body.partial_data[0]).toHaveProperty("metadata");
|
||||
expect(completedResponse.body.partial_data[0].metadata.pageStatusCode).toBe(200);
|
||||
expect(completedResponse.body.partial_data[0].metadata.pageError).toBeUndefined();
|
||||
}, 60000); // 60 seconds
|
||||
expect(completedResponse.body.partial_data[0]).toHaveProperty(
|
||||
"content"
|
||||
);
|
||||
expect(completedResponse.body.partial_data[0]).toHaveProperty(
|
||||
"markdown"
|
||||
);
|
||||
expect(completedResponse.body.partial_data[0]).toHaveProperty(
|
||||
"metadata"
|
||||
);
|
||||
expect(
|
||||
completedResponse.body.partial_data[0].metadata.pageStatusCode
|
||||
).toBe(200);
|
||||
expect(
|
||||
completedResponse.body.partial_data[0].metadata.pageError
|
||||
).toBeUndefined();
|
||||
},
|
||||
60000
|
||||
); // 60 seconds
|
||||
});
|
||||
|
||||
describe("POST /v0/scrape with LLM Extraction", () => {
|
||||
it.concurrent("should extract data using LLM extraction mode", async () => {
|
||||
it.concurrent(
|
||||
"should extract data using LLM extraction mode",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v0/scrape")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
@ -690,6 +865,100 @@ describe("E2E Tests for v0 API Routes", () => {
|
||||
expect(llmExtraction).toHaveProperty("is_open_source");
|
||||
expect(llmExtraction.is_open_source).toBe(false);
|
||||
expect(typeof llmExtraction.is_open_source).toBe("boolean");
|
||||
}, 60000); // 60 secs
|
||||
},
|
||||
60000
|
||||
); // 60 secs
|
||||
});
|
||||
|
||||
describe("POST /v0/map", () => {
|
||||
it.concurrent(
|
||||
"should return a list of links for mendable.ai without subdomains included",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v1/map")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({
|
||||
url: "https://mendable.ai",
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("success", true);
|
||||
expect(response.body).toHaveProperty("links");
|
||||
expect(response.body.links).not.toContain("https://docs.mendable.ai");
|
||||
expect(Array.isArray(response.body.links)).toBe(true);
|
||||
expect(response.body.links.length).toBeGreaterThan(0);
|
||||
},
|
||||
60000
|
||||
); // 60 secs
|
||||
|
||||
it.concurrent(
|
||||
"should return a list of links for a given URL with subdomains included",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v1/map")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({
|
||||
url: "https://python.langchain.com",
|
||||
includeSubdomains: true,
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("success", true);
|
||||
expect(response.body).toHaveProperty("links");
|
||||
expect(Array.isArray(response.body.links)).toBe(true);
|
||||
expect(response.body.links.length).toBeGreaterThan(0);
|
||||
},
|
||||
60000
|
||||
); // 60 secs
|
||||
|
||||
it.concurrent(
|
||||
"should return a list of links for a given URL with subdomains and search",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v1/map")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({
|
||||
url: "https://python.langchain.com",
|
||||
includeSubdomains: true,
|
||||
search: "agents",
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("success", true);
|
||||
expect(response.body).toHaveProperty("links");
|
||||
expect(response.body.links).toContain(
|
||||
"https://api.python.langchain.com/en/latest/_modules/langchain/agents/openai_functions_agent/base.html"
|
||||
);
|
||||
expect(Array.isArray(response.body.links)).toBe(true);
|
||||
expect(response.body.links.length).toBeGreaterThan(0);
|
||||
response.body.links.forEach((link) => {
|
||||
expect(link).toContain("python.langchain.com");
|
||||
});
|
||||
},
|
||||
60000
|
||||
); // 60 secs
|
||||
|
||||
it.concurrent(
|
||||
"should handle invalid URL input gracefully",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v1/map")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({
|
||||
url: "invalid-url",
|
||||
includeSubdomains: true,
|
||||
search: "agents",
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(400);
|
||||
expect(response.body).toHaveProperty("success", false);
|
||||
expect(response.body).toHaveProperty("details");
|
||||
},
|
||||
60000
|
||||
); // 60 secs
|
||||
});
|
||||
});
|
||||
|
@ -41,7 +41,8 @@ export async function mapController(
|
||||
|
||||
const crawler = crawlToCrawler(id, sc);
|
||||
|
||||
const sitemap = sc.crawlerOptions.ignoreSitemap
|
||||
const sitemap =
|
||||
sc.crawlerOptions.ignoreSitemap || req.body.search
|
||||
? null
|
||||
: await crawler.tryGetSitemap();
|
||||
|
||||
@ -51,13 +52,23 @@ export async function mapController(
|
||||
});
|
||||
}
|
||||
|
||||
const mapResults = await fireEngineMap(`site:${req.body.url}`, {
|
||||
let mapUrl = req.body.search
|
||||
? `"${req.body.search}" site:${req.body.url}`
|
||||
: `site:${req.body.url}`;
|
||||
console.log(mapUrl);
|
||||
// www. seems to exclude subdomains in some cases
|
||||
const mapResults = await fireEngineMap(mapUrl, {
|
||||
numResults: 50,
|
||||
});
|
||||
console.log(mapResults);
|
||||
|
||||
if (mapResults.length > 0) {
|
||||
mapResults.map((x) => {
|
||||
if (req.body.search) {
|
||||
links.unshift(x.url);
|
||||
} else {
|
||||
links.push(x.url);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -3,9 +3,16 @@ import { z } from "zod";
|
||||
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
|
||||
import { PageOptions } from "../../lib/entities";
|
||||
|
||||
export type Format = "markdown" | "html" | "rawHtml" | "links" | "screenshot" | "screenshot@fullPage";
|
||||
export type Format =
|
||||
| "markdown"
|
||||
| "html"
|
||||
| "rawHtml"
|
||||
| "links"
|
||||
| "screenshot"
|
||||
| "screenshot@fullPage";
|
||||
|
||||
const url = z.preprocess(x => {
|
||||
const url = z.preprocess(
|
||||
(x) => {
|
||||
if (typeof x === "string" && !/^([^.:]+:\/\/)/.test(x)) {
|
||||
if (x.startsWith("://")) {
|
||||
return "http" + x;
|
||||
@ -15,10 +22,27 @@ const url = z.preprocess(x => {
|
||||
} else {
|
||||
return x;
|
||||
}
|
||||
}, z.string().url().regex(/^https?:\/\//, "URL uses unsupported protocol").refine(x => !isUrlBlocked(x), "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."));
|
||||
},
|
||||
z
|
||||
.string()
|
||||
.url()
|
||||
.regex(/^https?:\/\//, "URL uses unsupported protocol")
|
||||
.refine(
|
||||
(x) => !isUrlBlocked(x),
|
||||
"Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
|
||||
)
|
||||
);
|
||||
|
||||
export const scrapeOptions = z.object({
|
||||
formats: z.enum(["markdown", "html", "rawHtml", "links", "screenshot", "screenshot@fullPage"])
|
||||
formats: z
|
||||
.enum([
|
||||
"markdown",
|
||||
"html",
|
||||
"rawHtml",
|
||||
"links",
|
||||
"screenshot",
|
||||
"screenshot@fullPage",
|
||||
])
|
||||
.array()
|
||||
.optional()
|
||||
.default(["markdown"]),
|
||||
@ -34,7 +58,7 @@ export const scrapeOptions = z.object({
|
||||
export type ScrapeOptions = z.infer<typeof scrapeOptions>;
|
||||
|
||||
export const scrapeRequestSchema = scrapeOptions.extend({
|
||||
url: z.string().url(),
|
||||
url,
|
||||
origin: z.string().optional().default("api"),
|
||||
});
|
||||
|
||||
@ -90,10 +114,10 @@ export const crawlRequestSchema = z.object({
|
||||
export type CrawlRequest = z.infer<typeof crawlRequestSchema>;
|
||||
|
||||
export const mapRequestSchema = crawlerOptions.extend({
|
||||
url,
|
||||
url: z.string().url(),
|
||||
origin: z.string().optional().default("api"),
|
||||
includeSubdomains: z.boolean().default(false),
|
||||
searchEngine: z.string().optional(),
|
||||
search: z.string().optional(),
|
||||
});
|
||||
|
||||
// export type MapRequest = {
|
||||
@ -104,11 +128,11 @@ export const mapRequestSchema = crawlerOptions.extend({
|
||||
export type MapRequest = z.infer<typeof mapRequestSchema>;
|
||||
|
||||
export type Document = {
|
||||
markdown?: string,
|
||||
html?: string,
|
||||
rawHtml?: string,
|
||||
links?: string[],
|
||||
screenshot?: string,
|
||||
markdown?: string;
|
||||
html?: string;
|
||||
rawHtml?: string;
|
||||
links?: string[];
|
||||
screenshot?: string;
|
||||
metadata: {
|
||||
title?: string;
|
||||
description?: string;
|
||||
@ -142,8 +166,8 @@ export type Document = {
|
||||
sourceURL?: string;
|
||||
statusCode?: number;
|
||||
error?: string;
|
||||
},
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
export type ErrorResponse = {
|
||||
success: false;
|
||||
@ -151,11 +175,13 @@ export type ErrorResponse = {
|
||||
details?: any;
|
||||
};
|
||||
|
||||
export type ScrapeResponse = ErrorResponse | {
|
||||
export type ScrapeResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
warning?: string;
|
||||
data: Document;
|
||||
};
|
||||
};
|
||||
|
||||
export interface ScrapeResponseRequestTest {
|
||||
statusCode: number;
|
||||
@ -163,40 +189,54 @@ export interface ScrapeResponseRequestTest {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export type CrawlResponse = ErrorResponse | {
|
||||
export type CrawlResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
id: string;
|
||||
url: string;
|
||||
}
|
||||
};
|
||||
|
||||
export type MapResponse = ErrorResponse | {
|
||||
export type MapResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
success: true;
|
||||
links: string[];
|
||||
}
|
||||
};
|
||||
|
||||
export type CrawlStatusParams = {
|
||||
jobId: string;
|
||||
}
|
||||
};
|
||||
|
||||
export type CrawlStatusResponse = ErrorResponse | {
|
||||
status: "scraping" | "completed" | "failed" | "cancelled",
|
||||
export type CrawlStatusResponse =
|
||||
| ErrorResponse
|
||||
| {
|
||||
status: "scraping" | "completed" | "failed" | "cancelled";
|
||||
totalCount: number;
|
||||
creditsUsed: number;
|
||||
expiresAt: string;
|
||||
next?: string;
|
||||
data: Document[];
|
||||
}
|
||||
};
|
||||
|
||||
type AuthObject = {
|
||||
team_id: string;
|
||||
plan: string;
|
||||
}
|
||||
};
|
||||
|
||||
export interface RequestWithMaybeAuth<ReqParams = {}, ReqBody = undefined, ResBody = undefined> extends Request<ReqParams, ReqBody, ResBody> {
|
||||
export interface RequestWithMaybeAuth<
|
||||
ReqParams = {},
|
||||
ReqBody = undefined,
|
||||
ResBody = undefined
|
||||
> extends Request<ReqParams, ReqBody, ResBody> {
|
||||
auth?: AuthObject;
|
||||
}
|
||||
|
||||
export interface RequestWithAuth<ReqParams = {}, ReqBody = undefined, ResBody = undefined> extends Request<ReqParams, ReqBody, ResBody> {
|
||||
export interface RequestWithAuth<
|
||||
ReqParams = {},
|
||||
ReqBody = undefined,
|
||||
ResBody = undefined
|
||||
> extends Request<ReqParams, ReqBody, ResBody> {
|
||||
auth: AuthObject;
|
||||
}
|
||||
|
||||
@ -225,7 +265,7 @@ export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
|
||||
includeLinks: x.formats.includes("links"),
|
||||
screenshot: x.formats.includes("screenshot"),
|
||||
fullPageScreenshot: x.formats.includes("screenshot@fullPage"),
|
||||
parsePDF: x.parsePDF
|
||||
parsePDF: x.parsePDF,
|
||||
};
|
||||
}
|
||||
|
||||
@ -243,5 +283,5 @@ export function legacyDocumentConverter(doc: any): Document {
|
||||
error: doc.metadata.pageError,
|
||||
statusCode: doc.metadata.pageStatusCode,
|
||||
},
|
||||
}
|
||||
};
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user