diff --git a/apps/api/.prettierrc b/apps/api/.prettierrc
index d93a7f24..5d50a9cd 100644
--- a/apps/api/.prettierrc
+++ b/apps/api/.prettierrc
@@ -1,3 +1,3 @@
{
- "trailingComma": "none"
+ "trailingComma": "all"
}
\ No newline at end of file
diff --git a/apps/api/src/__tests__/e2e_extract/index.test.ts b/apps/api/src/__tests__/e2e_extract/index.test.ts
index 117cbab1..e1e4d1ce 100644
--- a/apps/api/src/__tests__/e2e_extract/index.test.ts
+++ b/apps/api/src/__tests__/e2e_extract/index.test.ts
@@ -3,7 +3,7 @@ import dotenv from "dotenv";
import {
FirecrawlCrawlResponse,
FirecrawlCrawlStatusResponse,
- FirecrawlScrapeResponse
+ FirecrawlScrapeResponse,
} from "../../types";
dotenv.config();
@@ -23,9 +23,9 @@ describe("E2E Tests for Extract API Routes", () => {
schema: {
type: "object",
properties: {
- authors: { type: "array", items: { type: "string" } }
- }
- }
+ authors: { type: "array", items: { type: "string" } },
+ },
+ },
});
console.log(response.body);
@@ -45,7 +45,7 @@ describe("E2E Tests for Extract API Routes", () => {
expect(gotItRight).toBeGreaterThan(1);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -62,9 +62,9 @@ describe("E2E Tests for Extract API Routes", () => {
schema: {
type: "object",
properties: {
- founders: { type: "array", items: { type: "string" } }
- }
- }
+ founders: { type: "array", items: { type: "string" } },
+ },
+ },
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
@@ -83,7 +83,7 @@ describe("E2E Tests for Extract API Routes", () => {
expect(gotItRight).toBeGreaterThanOrEqual(2);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -100,10 +100,10 @@ describe("E2E Tests for Extract API Routes", () => {
schema: {
type: "array",
items: {
- type: "string"
+ type: "string",
},
- required: ["items"]
- }
+ required: ["items"],
+ },
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
@@ -118,7 +118,7 @@ describe("E2E Tests for Extract API Routes", () => {
expect(gotItRight).toBeGreaterThan(2);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -135,15 +135,15 @@ describe("E2E Tests for Extract API Routes", () => {
schema: {
type: "object",
properties: {
- pciDssCompliance: { type: "boolean" }
- }
- }
+ pciDssCompliance: { type: "boolean" },
+ },
+ },
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
expect(response.body.data?.pciDssCompliance).toBe(true);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -163,10 +163,10 @@ describe("E2E Tests for Extract API Routes", () => {
properties: {
connector: { type: "string" },
description: { type: "string" },
- supportsCaptureDelete: { type: "boolean" }
- }
- }
- }
+ supportsCaptureDelete: { type: "boolean" },
+ },
+ },
+ },
});
console.log(response.body);
@@ -174,7 +174,7 @@ describe("E2E Tests for Extract API Routes", () => {
// expect(response.body).toHaveProperty("data");
// expect(response.body.data?.pciDssCompliance).toBe(true);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -186,17 +186,17 @@ describe("E2E Tests for Extract API Routes", () => {
.set("Content-Type", "application/json")
.send({
urls: [
- "https://careers.abnormalsecurity.com/jobs/6119456003?gh_jid=6119456003"
+ "https://careers.abnormalsecurity.com/jobs/6119456003?gh_jid=6119456003",
],
prompt: "what applicant tracking system is this company using?",
schema: {
type: "object",
properties: {
isGreenhouseATS: { type: "boolean" },
- answer: { type: "string" }
- }
+ answer: { type: "string" },
+ },
},
- allowExternalLinks: true
+ allowExternalLinks: true,
});
console.log(response.body);
@@ -204,7 +204,7 @@ describe("E2E Tests for Extract API Routes", () => {
expect(response.body).toHaveProperty("data");
expect(response.body.data?.isGreenhouseATS).toBe(true);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -222,12 +222,12 @@ describe("E2E Tests for Extract API Routes", () => {
items: {
type: "object",
properties: {
- component: { type: "string" }
- }
+ component: { type: "string" },
+ },
},
- required: ["items"]
+ required: ["items"],
},
- allowExternalLinks: true
+ allowExternalLinks: true,
});
console.log(response.body.data?.items);
@@ -248,7 +248,7 @@ describe("E2E Tests for Extract API Routes", () => {
}
expect(gotItRight).toBeGreaterThan(2);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -267,11 +267,11 @@ describe("E2E Tests for Extract API Routes", () => {
properties: {
name: { type: "string" },
work: { type: "string" },
- education: { type: "string" }
+ education: { type: "string" },
},
- required: ["name", "work", "education"]
+ required: ["name", "work", "education"],
},
- allowExternalLinks: true
+ allowExternalLinks: true,
});
console.log(response.body.data);
@@ -281,7 +281,7 @@ describe("E2E Tests for Extract API Routes", () => {
expect(response.body.data?.work).toBeDefined();
expect(response.body.data?.education).toBeDefined();
},
- 60000
+ 60000,
);
it.concurrent(
@@ -293,7 +293,7 @@ describe("E2E Tests for Extract API Routes", () => {
.set("Content-Type", "application/json")
.send({
urls: ["https://docs.firecrawl.dev"],
- prompt: "What is the title and description of the page?"
+ prompt: "What is the title and description of the page?",
});
console.log(response.body.data);
@@ -302,6 +302,6 @@ describe("E2E Tests for Extract API Routes", () => {
expect(typeof response.body.data).toBe("object");
expect(Object.keys(response.body.data).length).toBeGreaterThan(0);
},
- 60000
+ 60000,
);
});
diff --git a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts
index a8841aab..45b3c31e 100644
--- a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts
@@ -47,7 +47,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent("should return an error for a blocklisted URL", async () => {
@@ -59,7 +59,7 @@ describe("E2E Tests for API Routes", () => {
.send({ url: blocklistedUrl });
expect(response.statusCode).toBe(403);
expect(response.body.error).toContain(
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
);
});
@@ -103,30 +103,30 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data.metadata.pageError).toBeUndefined();
expect(response.body.data.metadata.title).toBe("Roast My Website");
expect(response.body.data.metadata.description).toBe(
- "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
+ "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️",
);
expect(response.body.data.metadata.keywords).toBe(
- "Roast My Website,Roast,Website,GitHub,Firecrawl"
+ "Roast My Website,Roast,Website,GitHub,Firecrawl",
);
expect(response.body.data.metadata.robots).toBe("follow, index");
expect(response.body.data.metadata.ogTitle).toBe("Roast My Website");
expect(response.body.data.metadata.ogDescription).toBe(
- "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
+ "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️",
);
expect(response.body.data.metadata.ogUrl).toBe(
- "https://www.roastmywebsite.ai"
+ "https://www.roastmywebsite.ai",
);
expect(response.body.data.metadata.ogImage).toBe(
- "https://www.roastmywebsite.ai/og.png"
+ "https://www.roastmywebsite.ai/og.png",
);
expect(response.body.data.metadata.ogLocaleAlternate).toStrictEqual([]);
expect(response.body.data.metadata.ogSiteName).toBe("Roast My Website");
expect(response.body.data.metadata.sourceURL).toBe(
- "https://roastmywebsite.ai"
+ "https://roastmywebsite.ai",
);
expect(response.body.data.metadata.pageStatusCode).toBe(200);
},
- 30000
+ 30000,
); // 30 seconds timeout
it.concurrent(
@@ -138,7 +138,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://roastmywebsite.ai",
- pageOptions: { includeHtml: true }
+ pageOptions: { includeHtml: true },
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
@@ -152,7 +152,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data.metadata.pageStatusCode).toBe(200);
expect(response.body.data.metadata.pageError).toBeUndefined();
},
- 30000
+ 30000,
); // 30 seconds timeout
it.concurrent(
@@ -164,7 +164,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://roastmywebsite.ai",
- pageOptions: { includeRawHtml: true }
+ pageOptions: { includeRawHtml: true },
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
@@ -178,7 +178,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data.metadata.pageStatusCode).toBe(200);
expect(response.body.data.metadata.pageError).toBeUndefined();
},
- 30000
+ 30000,
); // 30 seconds timeout
it.concurrent(
@@ -196,12 +196,12 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("content");
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.content).toContain(
- "We present spectrophotometric observations of the Broad Line Radio Galaxy"
+ "We present spectrophotometric observations of the Broad Line Radio Galaxy",
);
expect(response.body.data.metadata.pageStatusCode).toBe(200);
expect(response.body.data.metadata.pageError).toBeUndefined();
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -219,12 +219,12 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("content");
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.content).toContain(
- "We present spectrophotometric observations of the Broad Line Radio Galaxy"
+ "We present spectrophotometric observations of the Broad Line Radio Galaxy",
);
expect(response.body.data.metadata.pageStatusCode).toBe(200);
expect(response.body.data.metadata.pageError).toBeUndefined();
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -236,7 +236,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://arxiv.org/pdf/astro-ph/9301001.pdf",
- pageOptions: { parsePDF: false }
+ pageOptions: { parsePDF: false },
});
await new Promise((r) => setTimeout(r, 6000));
@@ -245,10 +245,10 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("content");
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.content).toContain(
- "/Title(arXiv:astro-ph/9301001v1 7 Jan 1993)>>endobj"
+ "/Title(arXiv:astro-ph/9301001v1 7 Jan 1993)>>endobj",
);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -266,16 +266,16 @@ describe("E2E Tests for API Routes", () => {
expect(responseWithoutRemoveTags.body.data).toHaveProperty("metadata");
expect(responseWithoutRemoveTags.body.data).not.toHaveProperty("html");
expect(responseWithoutRemoveTags.body.data.content).toContain(
- "Scrape This Site"
+ "Scrape This Site",
);
expect(responseWithoutRemoveTags.body.data.content).toContain(
- "Lessons and Videos"
+ "Lessons and Videos",
); // #footer
expect(responseWithoutRemoveTags.body.data.content).toContain(
- "[Sandbox]("
+ "[Sandbox](",
); // .nav
expect(responseWithoutRemoveTags.body.data.content).toContain(
- "web scraping"
+ "web scraping",
); // strong
const response = await request(TEST_URL)
@@ -284,7 +284,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://www.scrapethissite.com/",
- pageOptions: { removeTags: [".nav", "#footer", "strong"] }
+ pageOptions: { removeTags: [".nav", "#footer", "strong"] },
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
@@ -297,7 +297,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data.content).not.toContain("[Sandbox]("); // .nav
expect(response.body.data.content).not.toContain("web scraping"); // strong
},
- 30000
+ 30000,
); // 30 seconds timeout
// TODO: add this test back once we nail the waitFor option to be more deterministic
@@ -337,10 +337,10 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(400);
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
- "bad request"
+ "bad request",
);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -359,10 +359,10 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(401);
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
- "unauthorized"
+ "unauthorized",
);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -381,10 +381,10 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(403);
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
- "forbidden"
+ "forbidden",
);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -403,10 +403,10 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(404);
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
- "not found"
+ "not found",
);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -425,10 +425,10 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(405);
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
- "method not allowed"
+ "method not allowed",
);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -447,10 +447,10 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(500);
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
- "internal server error"
+ "internal server error",
);
},
- 60000
+ 60000,
); // 60 seconds
});
@@ -469,7 +469,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent("should return an error for a blocklisted URL", async () => {
@@ -481,7 +481,7 @@ describe("E2E Tests for API Routes", () => {
.send({ url: blocklistedUrl });
expect(response.statusCode).toBe(403);
expect(response.body.error).toContain(
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
);
});
@@ -496,9 +496,9 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("jobId");
expect(response.body.jobId).toMatch(
- /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
+ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/,
);
- }
+ },
);
it.concurrent(
"should prevent duplicate requests using the same idempotency key",
@@ -525,7 +525,7 @@ describe("E2E Tests for API Routes", () => {
expect(secondResponse.statusCode).toBe(409);
expect(secondResponse.body.error).toBe("Idempotency key already used");
- }
+ },
);
it.concurrent(
@@ -539,8 +539,8 @@ describe("E2E Tests for API Routes", () => {
url: "https://mendable.ai",
limit: 10,
crawlerOptions: {
- includes: ["blog/*"]
- }
+ includes: ["blog/*"],
+ },
});
let response;
@@ -563,7 +563,7 @@ describe("E2E Tests for API Routes", () => {
const completedResponse = response;
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThan(5);
urls.forEach((url: string) => {
@@ -579,13 +579,13 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("Mendable");
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
- 200
+ 200,
);
expect(
- completedResponse.body.data[0].metadata.pageError
+ completedResponse.body.data[0].metadata.pageError,
).toBeUndefined();
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -599,8 +599,8 @@ describe("E2E Tests for API Routes", () => {
url: "https://mendable.ai",
limit: 10,
crawlerOptions: {
- excludes: ["blog/*"]
- }
+ excludes: ["blog/*"],
+ },
});
let isFinished = false;
@@ -623,14 +623,14 @@ describe("E2E Tests for API Routes", () => {
const completedResponse = response;
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThan(5);
urls.forEach((url: string) => {
expect(url.startsWith("https://wwww.mendable.ai/blog/")).toBeFalsy();
});
},
- 90000
+ 90000,
); // 90 seconds
it.concurrent(
@@ -642,7 +642,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://mendable.ai",
- crawlerOptions: { limit: 3 }
+ crawlerOptions: { limit: 3 },
});
let isFinished = false;
@@ -674,13 +674,13 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("Mendable");
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
- 200
+ 200,
);
expect(
- completedResponse.body.data[0].metadata.pageError
+ completedResponse.body.data[0].metadata.pageError,
).toBeUndefined();
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -692,7 +692,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://www.scrapethissite.com",
- crawlerOptions: { maxDepth: 1 }
+ crawlerOptions: { maxDepth: 1 },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -726,13 +726,13 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
- 200
+ 200,
);
expect(
- completedResponse.body.data[0].metadata.pageError
+ completedResponse.body.data[0].metadata.pageError,
).toBeUndefined();
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThan(1);
@@ -748,7 +748,7 @@ describe("E2E Tests for API Routes", () => {
expect(depth).toBeLessThanOrEqual(2);
});
},
- 180000
+ 180000,
);
it.concurrent(
@@ -760,7 +760,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://www.scrapethissite.com/pages/",
- crawlerOptions: { maxDepth: 1 }
+ crawlerOptions: { maxDepth: 1 },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -794,7 +794,7 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThan(1);
@@ -810,7 +810,7 @@ describe("E2E Tests for API Routes", () => {
expect(depth).toBeLessThanOrEqual(3);
});
},
- 180000
+ 180000,
);
it.concurrent(
@@ -822,7 +822,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://www.mendable.ai",
- crawlerOptions: { maxDepth: 0 }
+ crawlerOptions: { maxDepth: 0 },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -849,7 +849,7 @@ describe("E2E Tests for API Routes", () => {
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
const testurls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
//console.log(testurls)
@@ -861,7 +861,7 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThanOrEqual(1);
@@ -877,7 +877,7 @@ describe("E2E Tests for API Routes", () => {
expect(depth).toBeLessThanOrEqual(1);
});
},
- 180000
+ 180000,
);
// it.concurrent("should return a successful response with a valid API key and valid limit option", async () => {
@@ -934,7 +934,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://roastmywebsite.ai",
- pageOptions: { includeHtml: true }
+ pageOptions: { includeHtml: true },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -969,10 +969,10 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
- 200
+ 200,
);
expect(
- completedResponse.body.data[0].metadata.pageError
+ completedResponse.body.data[0].metadata.pageError,
).toBeUndefined();
// 120 seconds
@@ -983,13 +983,13 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0].html).toContain("
{
allowExternalContentLinks: true,
ignoreSitemap: true,
returnOnlyUrls: true,
- limit: 50
- }
+ limit: 50,
+ },
});
expect(crawlInitResponse.statusCode).toBe(200);
@@ -1031,19 +1031,19 @@ describe("E2E Tests for API Routes", () => {
expect.arrayContaining([
expect.objectContaining({
url: expect.stringContaining(
- "https://firecrawl.dev/?ref=mendable+banner"
- )
+ "https://firecrawl.dev/?ref=mendable+banner",
+ ),
}),
expect.objectContaining({
- url: expect.stringContaining("https://mendable.ai/pricing")
+ url: expect.stringContaining("https://mendable.ai/pricing"),
}),
expect.objectContaining({
- url: expect.stringContaining("https://x.com/CalebPeffer")
- })
- ])
+ url: expect.stringContaining("https://x.com/CalebPeffer"),
+ }),
+ ]),
);
},
- 180000
+ 180000,
); // 3 minutes timeout
});
@@ -1062,7 +1062,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
+ },
);
// it.concurrent("should return an error for a blocklisted URL", async () => {
@@ -1088,7 +1088,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(408);
},
- 3000
+ 3000,
);
// it.concurrent("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
@@ -1120,7 +1120,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({ query: "test" });
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent(
@@ -1136,7 +1136,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.body.success).toBe(true);
expect(response.body).toHaveProperty("data");
},
- 30000
+ 30000,
); // 30 seconds timeout
});
@@ -1153,7 +1153,7 @@ describe("E2E Tests for API Routes", () => {
.get("/v0/crawl/status/123")
.set("Authorization", `Bearer invalid-api-key`);
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent(
@@ -1163,7 +1163,7 @@ describe("E2E Tests for API Routes", () => {
.get("/v0/crawl/status/invalidJobId")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(404);
- }
+ },
);
it.concurrent(
@@ -1201,22 +1201,22 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("Mendable");
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
- 200
+ 200,
);
expect(
- completedResponse.body.data[0].metadata.pageError
+ completedResponse.body.data[0].metadata.pageError,
).toBeUndefined();
const childrenLinks = completedResponse.body.data.filter(
(doc) =>
doc.metadata &&
doc.metadata.sourceURL &&
- doc.metadata.sourceURL.includes("mendable.ai/blog")
+ doc.metadata.sourceURL.includes("mendable.ai/blog"),
);
expect(childrenLinks.length).toBe(completedResponse.body.data.length);
},
- 180000
+ 180000,
); // 120 seconds
it.concurrent(
@@ -1236,9 +1236,9 @@ describe("E2E Tests for API Routes", () => {
"abs/*",
"static/*",
"about/*",
- "archive/*"
- ]
- }
+ "archive/*",
+ ],
+ },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -1266,21 +1266,21 @@ describe("E2E Tests for API Routes", () => {
expect.arrayContaining([
expect.objectContaining({
content: expect.stringContaining(
- "asymmetries might represent, for instance, preferred source orientations to our line of sight."
- )
- })
- ])
+ "asymmetries might represent, for instance, preferred source orientations to our line of sight.",
+ ),
+ }),
+ ]),
);
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
- 200
+ 200,
);
expect(
- completedResponse.body.data[0].metadata.pageError
+ completedResponse.body.data[0].metadata.pageError,
).toBeUndefined();
},
- 180000
+ 180000,
); // 120 seconds
it.concurrent(
@@ -1292,7 +1292,7 @@ describe("E2E Tests for API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://roastmywebsite.ai",
- pageOptions: { includeHtml: true }
+ pageOptions: { includeHtml: true },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -1333,13 +1333,13 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
expect(completedResponse.body.data[0].html).toContain(" {
.send({
url: "https://mendable.ai/blog",
pageOptions: { includeHtml: true },
- crawlerOptions: { allowBackwardCrawling: true }
+ crawlerOptions: { allowBackwardCrawling: true },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -1397,10 +1397,10 @@ describe("E2E Tests for API Routes", () => {
});
expect(completedResponse.body.data.length).toBeGreaterThan(
- onlyChildrenLinks.length
+ onlyChildrenLinks.length,
);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -1438,13 +1438,13 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.partial_data[0]).toHaveProperty("markdown");
expect(completedResponse.body.partial_data[0]).toHaveProperty("metadata");
expect(
- completedResponse.body.partial_data[0].metadata.pageStatusCode
+ completedResponse.body.partial_data[0].metadata.pageStatusCode,
).toBe(200);
expect(
- completedResponse.body.partial_data[0].metadata.pageError
+ completedResponse.body.partial_data[0].metadata.pageError,
).toBeUndefined();
},
- 60000
+ 60000,
); // 60 seconds
describe("POST /v0/scrape with LLM Extraction", () => {
@@ -1458,7 +1458,7 @@ describe("E2E Tests for API Routes", () => {
.send({
url: "https://mendable.ai",
pageOptions: {
- onlyMainContent: true
+ onlyMainContent: true,
},
extractorOptions: {
mode: "llm-extraction",
@@ -1468,18 +1468,18 @@ describe("E2E Tests for API Routes", () => {
type: "object",
properties: {
company_mission: {
- type: "string"
+ type: "string",
},
supports_sso: {
- type: "boolean"
+ type: "boolean",
},
is_open_source: {
- type: "boolean"
- }
+ type: "boolean",
+ },
},
- required: ["company_mission", "supports_sso", "is_open_source"]
- }
- }
+ required: ["company_mission", "supports_sso", "is_open_source"],
+ },
+ },
});
// Ensure that the job was successfully created before proceeding with LLM extraction
@@ -1498,7 +1498,7 @@ describe("E2E Tests for API Routes", () => {
expect(llmExtraction.is_open_source).toBe(false);
expect(typeof llmExtraction.is_open_source).toBe("boolean");
},
- 60000
+ 60000,
); // 60 secs
it.concurrent(
@@ -1519,15 +1519,15 @@ describe("E2E Tests for API Routes", () => {
type: "object",
properties: {
primary_cta: {
- type: "string"
+ type: "string",
},
secondary_cta: {
- type: "string"
- }
+ type: "string",
+ },
},
- required: ["primary_cta", "secondary_cta"]
- }
- }
+ required: ["primary_cta", "secondary_cta"],
+ },
+ },
});
// Ensure that the job was successfully created before proceeding with LLM extraction
@@ -1542,7 +1542,7 @@ describe("E2E Tests for API Routes", () => {
expect(llmExtraction).toHaveProperty("secondary_cta");
expect(typeof llmExtraction.secondary_cta).toBe("string");
},
- 60000
+ 60000,
); // 60 secs
});
@@ -1617,8 +1617,8 @@ describe("E2E Tests for API Routes", () => {
.send({
url: "https://flutterbricks.com",
crawlerOptions: {
- mode: "fast"
- }
+ mode: "fast",
+ },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -1660,7 +1660,7 @@ describe("E2E Tests for API Routes", () => {
expect(results.length).toBeGreaterThanOrEqual(10);
expect(results.length).toBeLessThanOrEqual(15);
},
- 20000
+ 20000,
);
// it.concurrent("should complete the crawl in more than 10 seconds", async () => {
@@ -1741,7 +1741,7 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(429);
},
- 90000
+ 90000,
);
});
diff --git a/apps/api/src/__tests__/e2e_map/index.test.ts b/apps/api/src/__tests__/e2e_map/index.test.ts
index 948f097e..30ec6776 100644
--- a/apps/api/src/__tests__/e2e_map/index.test.ts
+++ b/apps/api/src/__tests__/e2e_map/index.test.ts
@@ -15,7 +15,7 @@ describe("E2E Tests for Map API Routes", () => {
.send({
url: "https://firecrawl.dev",
sitemapOnly: false,
- search: "smart-crawl"
+ search: "smart-crawl",
});
console.log(response.body);
@@ -24,7 +24,7 @@ describe("E2E Tests for Map API Routes", () => {
expect(response.body.links.length).toBeGreaterThan(0);
expect(response.body.links[0]).toContain("firecrawl.dev/smart-crawl");
},
- 60000
+ 60000,
);
it.concurrent(
@@ -37,7 +37,7 @@ describe("E2E Tests for Map API Routes", () => {
.send({
url: "https://firecrawl.dev",
sitemapOnly: false,
- includeSubdomains: true
+ includeSubdomains: true,
});
console.log(response.body);
@@ -45,10 +45,10 @@ describe("E2E Tests for Map API Routes", () => {
expect(response.body).toHaveProperty("links");
expect(response.body.links.length).toBeGreaterThan(0);
expect(response.body.links[response.body.links.length - 1]).toContain(
- "docs.firecrawl.dev"
+ "docs.firecrawl.dev",
);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -60,7 +60,7 @@ describe("E2E Tests for Map API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://firecrawl.dev",
- sitemapOnly: true
+ sitemapOnly: true,
});
console.log(response.body);
@@ -68,10 +68,10 @@ describe("E2E Tests for Map API Routes", () => {
expect(response.body).toHaveProperty("links");
expect(response.body.links.length).toBeGreaterThan(0);
expect(response.body.links[response.body.links.length - 1]).not.toContain(
- "docs.firecrawl.dev"
+ "docs.firecrawl.dev",
);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -84,7 +84,7 @@ describe("E2E Tests for Map API Routes", () => {
.send({
url: "https://firecrawl.dev",
sitemapOnly: false,
- limit: 10
+ limit: 10,
});
console.log(response.body);
@@ -92,7 +92,7 @@ describe("E2E Tests for Map API Routes", () => {
expect(response.body).toHaveProperty("links");
expect(response.body.links.length).toBeLessThanOrEqual(10);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -104,7 +104,7 @@ describe("E2E Tests for Map API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://geekflare.com/sitemap_index.xml",
- sitemapOnly: true
+ sitemapOnly: true,
});
console.log(response.body);
@@ -112,6 +112,6 @@ describe("E2E Tests for Map API Routes", () => {
expect(response.body).toHaveProperty("links");
expect(response.body.links.length).toBeGreaterThan(1900);
},
- 60000
+ 60000,
);
});
diff --git a/apps/api/src/__tests__/e2e_noAuth/index.test.ts b/apps/api/src/__tests__/e2e_noAuth/index.test.ts
index 9c3ddf33..e30352a5 100644
--- a/apps/api/src/__tests__/e2e_noAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_noAuth/index.test.ts
@@ -62,7 +62,7 @@ describe("E2E Tests for API Routes with No Authentication", () => {
.send({ url: blocklistedUrl });
expect(response.statusCode).toBe(403);
expect(response.body.error).toContain(
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
);
});
@@ -89,7 +89,7 @@ describe("E2E Tests for API Routes with No Authentication", () => {
.send({ url: blocklistedUrl });
expect(response.statusCode).toBe(403);
expect(response.body.error).toContain(
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
);
});
@@ -101,7 +101,7 @@ describe("E2E Tests for API Routes with No Authentication", () => {
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("jobId");
expect(response.body.jobId).toMatch(
- /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
+ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/,
);
});
});
@@ -120,7 +120,7 @@ describe("E2E Tests for API Routes with No Authentication", () => {
.send({ url: blocklistedUrl });
expect(response.statusCode).toBe(403);
expect(response.body.error).toContain(
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
);
});
@@ -132,7 +132,7 @@ describe("E2E Tests for API Routes with No Authentication", () => {
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("jobId");
expect(response.body.jobId).toMatch(
- /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
+ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/,
);
});
});
@@ -172,7 +172,7 @@ describe("E2E Tests for API Routes with No Authentication", () => {
it("should return Job not found for invalid job ID", async () => {
const response = await request(TEST_URL).get(
- "/v0/crawl/status/invalidJobId"
+ "/v0/crawl/status/invalidJobId",
);
expect(response.statusCode).toBe(404);
});
@@ -185,7 +185,7 @@ describe("E2E Tests for API Routes with No Authentication", () => {
expect(crawlResponse.statusCode).toBe(200);
const response = await request(TEST_URL).get(
- `/v0/crawl/status/${crawlResponse.body.jobId}`
+ `/v0/crawl/status/${crawlResponse.body.jobId}`,
);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("status");
@@ -195,7 +195,7 @@ describe("E2E Tests for API Routes with No Authentication", () => {
await new Promise((r) => setTimeout(r, 30000));
const completedResponse = await request(TEST_URL).get(
- `/v0/crawl/status/${crawlResponse.body.jobId}`
+ `/v0/crawl/status/${crawlResponse.body.jobId}`,
);
expect(completedResponse.statusCode).toBe(200);
expect(completedResponse.body).toHaveProperty("status");
diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts
index 33e3be5d..35ee2d89 100644
--- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts
@@ -2,7 +2,7 @@ import request from "supertest";
import { configDotenv } from "dotenv";
import {
ScrapeRequestInput,
- ScrapeResponseRequestTest
+ ScrapeResponseRequestTest,
} from "../../controllers/v1/types";
configDotenv();
@@ -24,7 +24,7 @@ describe("E2E Tests for v1 API Routes", () => {
console.log(
"process.env.USE_DB_AUTHENTICATION",
- process.env.USE_DB_AUTHENTICATION
+ process.env.USE_DB_AUTHENTICATION,
);
console.log("?", process.env.USE_DB_AUTHENTICATION === "true");
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
@@ -47,7 +47,7 @@ describe("E2E Tests for v1 API Routes", () => {
it.concurrent("should throw error for blocklisted URL", async () => {
const scrapeRequest: ScrapeRequestInput = {
- url: "https://facebook.com/fake-test"
+ url: "https://facebook.com/fake-test",
};
const response = await request(TEST_URL)
@@ -58,7 +58,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.statusCode).toBe(403);
expect(response.body.error).toBe(
- "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions."
+ "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.",
);
});
@@ -71,14 +71,14 @@ describe("E2E Tests for v1 API Routes", () => {
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent(
"should return a successful response with a valid API key",
async () => {
const scrapeRequest: ScrapeRequestInput = {
- url: "https://roastmywebsite.ai"
+ url: "https://roastmywebsite.ai",
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -100,37 +100,37 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data.metadata.error).toBeUndefined();
expect(response.body.data.metadata.title).toBe("Roast My Website");
expect(response.body.data.metadata.description).toBe(
- "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
+ "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️",
);
expect(response.body.data.metadata.keywords).toBe(
- "Roast My Website,Roast,Website,GitHub,Firecrawl"
+ "Roast My Website,Roast,Website,GitHub,Firecrawl",
);
expect(response.body.data.metadata.robots).toBe("follow, index");
expect(response.body.data.metadata.ogTitle).toBe("Roast My Website");
expect(response.body.data.metadata.ogDescription).toBe(
- "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
+ "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️",
);
expect(response.body.data.metadata.ogUrl).toBe(
- "https://www.roastmywebsite.ai"
+ "https://www.roastmywebsite.ai",
);
expect(response.body.data.metadata.ogImage).toBe(
- "https://www.roastmywebsite.ai/og.png"
+ "https://www.roastmywebsite.ai/og.png",
);
expect(response.body.data.metadata.ogLocaleAlternate).toStrictEqual([]);
expect(response.body.data.metadata.ogSiteName).toBe("Roast My Website");
expect(response.body.data.metadata.sourceURL).toBe(
- "https://roastmywebsite.ai"
+ "https://roastmywebsite.ai",
);
expect(response.body.data.metadata.statusCode).toBe(200);
},
- 30000
+ 30000,
); // 30 seconds timeout
it.concurrent(
"should return a successful response with a valid API key",
async () => {
const scrapeRequest: ScrapeRequestInput = {
- url: "https://arxiv.org/abs/2410.04840"
+ url: "https://arxiv.org/abs/2410.04840",
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -151,43 +151,43 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data.markdown).toContain("Strong Model Collapse");
expect(response.body.data.metadata.error).toBeUndefined();
expect(response.body.data.metadata.description).toContain(
- "Abstract page for arXiv paper 2410.04840: Strong Model Collapse"
+ "Abstract page for arXiv paper 2410.04840: Strong Model Collapse",
);
expect(response.body.data.metadata.citation_title).toBe(
- "Strong Model Collapse"
+ "Strong Model Collapse",
);
expect(response.body.data.metadata.citation_author).toEqual([
"Dohmatob, Elvis",
"Feng, Yunzhen",
"Subramonian, Arjun",
- "Kempe, Julia"
+ "Kempe, Julia",
]);
expect(response.body.data.metadata.citation_date).toBe("2024/10/07");
expect(response.body.data.metadata.citation_online_date).toBe(
- "2024/10/08"
+ "2024/10/08",
);
expect(response.body.data.metadata.citation_pdf_url).toBe(
- "http://arxiv.org/pdf/2410.04840"
+ "http://arxiv.org/pdf/2410.04840",
);
expect(response.body.data.metadata.citation_arxiv_id).toBe(
- "2410.04840"
+ "2410.04840",
);
expect(response.body.data.metadata.citation_abstract).toContain(
- "Within the scaling laws paradigm"
+ "Within the scaling laws paradigm",
);
expect(response.body.data.metadata.sourceURL).toBe(
- "https://arxiv.org/abs/2410.04840"
+ "https://arxiv.org/abs/2410.04840",
);
expect(response.body.data.metadata.statusCode).toBe(200);
},
- 30000
+ 30000,
);
it.concurrent(
"should return a successful response with a valid API key and includeHtml set to true",
async () => {
const scrapeRequest: ScrapeRequestInput = {
url: "https://roastmywebsite.ai",
- formats: ["markdown", "html"]
+ formats: ["markdown", "html"],
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -209,13 +209,13 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data.metadata.statusCode).toBe(200);
expect(response.body.data.metadata.error).toBeUndefined();
},
- 30000
+ 30000,
);
it.concurrent(
"should return a successful response for a valid scrape with PDF file",
async () => {
const scrapeRequest: ScrapeRequestInput = {
- url: "https://arxiv.org/pdf/astro-ph/9301001.pdf"
+ url: "https://arxiv.org/pdf/astro-ph/9301001.pdf",
// formats: ["markdown", "html"],
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -232,19 +232,19 @@ describe("E2E Tests for v1 API Routes", () => {
}
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.markdown).toContain(
- "Broad Line Radio Galaxy"
+ "Broad Line Radio Galaxy",
);
expect(response.body.data.metadata.statusCode).toBe(200);
expect(response.body.data.metadata.error).toBeUndefined();
},
- 60000
+ 60000,
);
it.concurrent(
"should return a successful response for a valid scrape with PDF file without explicit .pdf extension",
async () => {
const scrapeRequest: ScrapeRequestInput = {
- url: "https://arxiv.org/pdf/astro-ph/9301001"
+ url: "https://arxiv.org/pdf/astro-ph/9301001",
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
.post("/v1/scrape")
@@ -261,12 +261,12 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data).toHaveProperty("markdown");
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.markdown).toContain(
- "Broad Line Radio Galaxy"
+ "Broad Line Radio Galaxy",
);
expect(response.body.data.metadata.statusCode).toBe(200);
expect(response.body.data.metadata.error).toBeUndefined();
},
- 60000
+ 60000,
);
it.concurrent(
@@ -274,7 +274,7 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest: ScrapeRequestInput = {
url: "https://www.scrapethissite.com/",
- onlyMainContent: false // default is true
+ onlyMainContent: false, // default is true
};
const responseWithoutRemoveTags: ScrapeResponseRequestTest =
await request(TEST_URL)
@@ -292,16 +292,16 @@ describe("E2E Tests for v1 API Routes", () => {
expect(responseWithoutRemoveTags.body.data).toHaveProperty("metadata");
expect(responseWithoutRemoveTags.body.data).not.toHaveProperty("html");
expect(responseWithoutRemoveTags.body.data.markdown).toContain(
- "[FAQ](/faq/)"
+ "[FAQ](/faq/)",
); // .nav
expect(responseWithoutRemoveTags.body.data.markdown).toContain(
- "Hartley Brody 2023"
+ "Hartley Brody 2023",
); // #footer
const scrapeRequestWithRemoveTags: ScrapeRequestInput = {
url: "https://www.scrapethissite.com/",
excludeTags: [".nav", "#footer", "strong"],
- onlyMainContent: false // default is true
+ onlyMainContent: false, // default is true
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
.post("/v1/scrape")
@@ -320,7 +320,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data.markdown).not.toContain("Hartley Brody 2023");
expect(response.body.data.markdown).not.toContain("[FAQ](/faq/)"); //
},
- 30000
+ 30000,
);
it.concurrent(
@@ -342,7 +342,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.statusCode).toBe(400);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -364,7 +364,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.statusCode).toBe(401);
},
- 60000
+ 60000,
);
// Removed it as we want to retry fallback to the next scraper
@@ -405,7 +405,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.statusCode).toBe(404);
},
- 60000
+ 60000,
);
// it.concurrent('should return a successful response for a scrape with 405 page', async () => {
@@ -455,7 +455,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.statusCode).toBe(408);
},
- 3000
+ 3000,
);
it.concurrent(
@@ -463,7 +463,7 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest: ScrapeRequestInput = {
url: "https://roastmywebsite.ai",
- formats: ["html", "rawHtml"]
+ formats: ["html", "rawHtml"],
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -486,7 +486,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data.metadata.statusCode).toBe(200);
expect(response.body.data.metadata.error).toBeUndefined();
},
- 30000
+ 30000,
);
it.concurrent(
@@ -495,7 +495,7 @@ describe("E2E Tests for v1 API Routes", () => {
const scrapeRequest: ScrapeRequestInput = {
url: "https://ycombinator.com/companies",
formats: ["markdown"],
- waitFor: 8000
+ waitFor: 8000,
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -518,7 +518,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data.metadata.statusCode).toBe(200);
expect(response.body.data.metadata.error).toBeUndefined();
},
- 30000
+ 30000,
);
it.concurrent(
@@ -526,7 +526,7 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest: ScrapeRequestInput = {
url: "https://roastmywebsite.ai",
- formats: ["links"]
+ formats: ["links"],
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -548,7 +548,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data.metadata.statusCode).toBe(200);
expect(response.body.data.metadata.error).toBeUndefined();
},
- 30000
+ 30000,
);
});
@@ -569,14 +569,14 @@ describe("E2E Tests for v1 API Routes", () => {
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent(
"should return a successful response with a valid API key",
async () => {
const mapRequest = {
- url: "https://roastmywebsite.ai"
+ url: "https://roastmywebsite.ai",
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -594,7 +594,7 @@ describe("E2E Tests for v1 API Routes", () => {
const links = response.body.links as unknown[];
expect(Array.isArray(links)).toBe(true);
expect(links.length).toBeGreaterThan(0);
- }
+ },
);
it.concurrent(
@@ -602,7 +602,7 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const mapRequest = {
url: "https://usemotion.com",
- search: "pricing"
+ search: "pricing",
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -621,7 +621,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(Array.isArray(links)).toBe(true);
expect(links.length).toBeGreaterThan(0);
expect(links[0]).toContain("usemotion.com/pricing");
- }
+ },
);
it.concurrent(
@@ -630,7 +630,7 @@ describe("E2E Tests for v1 API Routes", () => {
const mapRequest = {
url: "https://firecrawl.dev",
search: "docs",
- includeSubdomains: true
+ includeSubdomains: true,
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -650,10 +650,10 @@ describe("E2E Tests for v1 API Routes", () => {
expect(links.length).toBeGreaterThan(0);
const containsDocsFirecrawlDev = links.some((link: string) =>
- link.includes("docs.firecrawl.dev")
+ link.includes("docs.firecrawl.dev"),
);
expect(containsDocsFirecrawlDev).toBe(true);
- }
+ },
);
it.concurrent(
@@ -662,7 +662,7 @@ describe("E2E Tests for v1 API Routes", () => {
const mapRequest = {
url: "https://www.firecrawl.dev",
search: "docs",
- includeSubdomains: true
+ includeSubdomains: true,
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -682,11 +682,11 @@ describe("E2E Tests for v1 API Routes", () => {
expect(links.length).toBeGreaterThan(0);
const containsDocsFirecrawlDev = links.some((link: string) =>
- link.includes("docs.firecrawl.dev")
+ link.includes("docs.firecrawl.dev"),
);
expect(containsDocsFirecrawlDev).toBe(true);
},
- 10000
+ 10000,
);
it.concurrent(
@@ -695,7 +695,7 @@ describe("E2E Tests for v1 API Routes", () => {
const mapRequest = {
url: "https://www.firecrawl.dev",
search: "docs",
- includeSubdomains: false
+ includeSubdomains: false,
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -714,14 +714,14 @@ describe("E2E Tests for v1 API Routes", () => {
expect(Array.isArray(links)).toBe(true);
expect(links.length).toBeGreaterThan(0);
expect(links[0]).not.toContain("docs.firecrawl.dev");
- }
+ },
);
it.concurrent("should return an error for invalid URL", async () => {
const mapRequest = {
url: "invalid-url",
includeSubdomains: true,
- search: "test"
+ search: "test",
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -746,7 +746,7 @@ describe("E2E Tests for v1 API Routes", () => {
it.concurrent("should throw error for blocklisted URL", async () => {
const scrapeRequest: ScrapeRequestInput = {
- url: "https://facebook.com/fake-test"
+ url: "https://facebook.com/fake-test",
};
const response = await request(TEST_URL)
@@ -757,7 +757,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.statusCode).toBe(403);
expect(response.body.error).toBe(
- "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions."
+ "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.",
);
});
@@ -770,7 +770,7 @@ describe("E2E Tests for v1 API Routes", () => {
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent("should return a successful response", async () => {
@@ -783,7 +783,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("id");
expect(response.body.id).toMatch(
- /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
+ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/,
);
expect(response.body).toHaveProperty("success", true);
expect(response.body).toHaveProperty("url");
@@ -800,7 +800,7 @@ describe("E2E Tests for v1 API Routes", () => {
.send({
url: "https://firecrawl.dev",
limit: 40,
- includePaths: ["blog/*"]
+ includePaths: ["blog/*"],
});
let response;
@@ -826,7 +826,7 @@ describe("E2E Tests for v1 API Routes", () => {
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThan(5);
urls.forEach((url: string) => {
@@ -843,7 +843,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
},
- 180000
+ 180000,
); // 180 seconds
it.concurrent(
@@ -856,7 +856,7 @@ describe("E2E Tests for v1 API Routes", () => {
.send({
url: "https://firecrawl.dev",
limit: 40,
- excludePaths: ["blog/*"]
+ excludePaths: ["blog/*"],
});
let isFinished = false;
@@ -882,14 +882,14 @@ describe("E2E Tests for v1 API Routes", () => {
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThan(3);
urls.forEach((url: string) => {
expect(url.startsWith("https://www.firecrawl.dev/blog/")).toBeFalsy();
});
},
- 90000
+ 90000,
); // 90 seconds
it.concurrent(
@@ -901,7 +901,7 @@ describe("E2E Tests for v1 API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://www.scrapethissite.com",
- maxDepth: 1
+ maxDepth: 1,
});
expect(crawlResponse.statusCode).toBe(200);
@@ -911,7 +911,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("status");
expect(["active", "waiting", "completed", "scraping"]).toContain(
- response.body.status
+ response.body.status,
);
// wait for 60 seconds
let isCompleted = false;
@@ -939,7 +939,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThan(1);
@@ -955,7 +955,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(depth).toBeLessThanOrEqual(2);
});
},
- 180000
+ 180000,
);
});
@@ -972,7 +972,7 @@ describe("E2E Tests for v1 API Routes", () => {
.get("/v1/crawl/123")
.set("Authorization", `Bearer invalid-api-key`);
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent(
@@ -982,7 +982,7 @@ describe("E2E Tests for v1 API Routes", () => {
.get("/v1/crawl/invalidJobId")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(404);
- }
+ },
);
it.concurrent(
@@ -1026,12 +1026,12 @@ describe("E2E Tests for v1 API Routes", () => {
expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
const childrenLinks = completedResponse.body.data.filter(
- (doc) => doc.metadata && doc.metadata.sourceURL
+ (doc) => doc.metadata && doc.metadata.sourceURL,
);
expect(childrenLinks.length).toBe(completedResponse.body.data.length);
},
- 180000
+ 180000,
); // 120 seconds
it.concurrent(
@@ -1068,7 +1068,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
},
- 60000
+ 60000,
); // 60 seconds
});
});
diff --git a/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts
index e297f7c8..313b7357 100644
--- a/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts
+++ b/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts
@@ -2,7 +2,7 @@ import request from "supertest";
import { configDotenv } from "dotenv";
import {
ScrapeRequest,
- ScrapeResponseRequestTest
+ ScrapeResponseRequestTest,
} from "../../controllers/v1/types";
configDotenv();
@@ -14,7 +14,7 @@ describe("E2E Tests for v1 API Routes", () => {
"should return a successful response for a scrape with 403 page",
async () => {
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -30,18 +30,18 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.statusCode).toBe(403);
},
- 30000
+ 30000,
);
it.concurrent(
"should handle 'formats:markdown (default)' parameter correctly",
async () => {
const scrapeRequest = {
- url: E2E_TEST_SERVER_URL
+ url: E2E_TEST_SERVER_URL,
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -57,26 +57,26 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data).toHaveProperty("markdown");
expect(response.body.data.markdown).toContain(
- "This page is used for end-to-end (e2e) testing with Firecrawl."
+ "This page is used for end-to-end (e2e) testing with Firecrawl.",
);
expect(response.body.data.markdown).toContain(
- "Content with id #content-1"
+ "Content with id #content-1",
);
// expect(response.body.data.markdown).toContain("Loading...");
expect(response.body.data.markdown).toContain("Click me!");
expect(response.body.data.markdown).toContain(
- "Power your AI apps with clean data crawled from any website. It's also open-source."
+ "Power your AI apps with clean data crawled from any website. It's also open-source.",
); // firecrawl.dev inside an iframe
expect(response.body.data.markdown).toContain(
- "This content loads only when you see it. Don't blink! 👼"
+ "This content loads only when you see it. Don't blink! 👼",
); // the browser always scroll to the bottom
expect(response.body.data.markdown).not.toContain("Header"); // Only main content is returned by default
expect(response.body.data.markdown).not.toContain("footer"); // Only main content is returned by default
expect(response.body.data.markdown).not.toContain(
- "This content is only visible on mobile"
+ "This content is only visible on mobile",
);
},
- 30000
+ 30000,
);
it.concurrent(
@@ -84,11 +84,11 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
- formats: ["html"]
+ formats: ["html"],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -105,13 +105,13 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data).toHaveProperty("html");
expect(response.body.data.html).not.toContain(
- ''
+ '',
);
expect(response.body.data.html).toContain(
- '
This page is used for end-to-end (e2e) testing with Firecrawl.
'
+ 'This page is used for end-to-end (e2e) testing with Firecrawl.
',
);
},
- 30000
+ 30000,
);
it.concurrent(
@@ -119,11 +119,11 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
- formats: ["rawHtml"]
+ formats: ["rawHtml"],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -140,11 +140,11 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data).toHaveProperty("rawHtml");
expect(response.body.data.rawHtml).toContain(
- ">This page is used for end-to-end (e2e) testing with Firecrawl."
+ ">This page is used for end-to-end (e2e) testing with Firecrawl.",
);
expect(response.body.data.rawHtml).toContain(">Header");
},
- 30000
+ 30000,
);
// - TODO: tests for links
@@ -157,11 +157,11 @@ describe("E2E Tests for v1 API Routes", () => {
// @ts-ignore
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
- headers: { "e2e-header-test": "firecrawl" }
+ headers: { "e2e-header-test": "firecrawl" },
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -175,10 +175,10 @@ describe("E2E Tests for v1 API Routes", () => {
}
expect(response.body.data.markdown).toContain(
- "e2e-header-test: firecrawl"
+ "e2e-header-test: firecrawl",
);
},
- 30000
+ 30000,
);
it.concurrent(
@@ -186,11 +186,11 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
- includeTags: ["#content-1"]
+ includeTags: ["#content-1"],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -204,13 +204,13 @@ describe("E2E Tests for v1 API Routes", () => {
}
expect(response.body.data.markdown).not.toContain(
- "This page is used for end-to-end (e2e) testing with Firecrawl.
"
+ "This page is used for end-to-end (e2e) testing with Firecrawl.
",
);
expect(response.body.data.markdown).toContain(
- "Content with id #content-1"
+ "Content with id #content-1",
);
},
- 30000
+ 30000,
);
it.concurrent(
@@ -218,11 +218,11 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
- excludeTags: ["#content-1"]
+ excludeTags: ["#content-1"],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -236,13 +236,13 @@ describe("E2E Tests for v1 API Routes", () => {
}
expect(response.body.data.markdown).toContain(
- "This page is used for end-to-end (e2e) testing with Firecrawl."
+ "This page is used for end-to-end (e2e) testing with Firecrawl.",
);
expect(response.body.data.markdown).not.toContain(
- "Content with id #content-1"
+ "Content with id #content-1",
);
},
- 30000
+ 30000,
);
it.concurrent(
@@ -251,11 +251,11 @@ describe("E2E Tests for v1 API Routes", () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
formats: ["html", "markdown"],
- onlyMainContent: false
+ onlyMainContent: false,
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -269,13 +269,13 @@ describe("E2E Tests for v1 API Routes", () => {
}
expect(response.body.data.markdown).toContain(
- "This page is used for end-to-end (e2e) testing with Firecrawl."
+ "This page is used for end-to-end (e2e) testing with Firecrawl.",
);
expect(response.body.data.html).toContain(
- ''
+ '',
);
},
- 30000
+ 30000,
);
it.concurrent(
@@ -283,11 +283,11 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
- timeout: 500
+ timeout: 500,
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -302,7 +302,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.error).toBe("Request timed out");
expect(response.body.success).toBe(false);
},
- 30000
+ 30000,
);
it.concurrent(
@@ -310,11 +310,11 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
- mobile: true
+ mobile: true,
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -327,17 +327,17 @@ describe("E2E Tests for v1 API Routes", () => {
throw new Error("Expected response body to have 'data' property");
}
expect(response.body.data.markdown).toContain(
- "This content is only visible on mobile"
+ "This content is only visible on mobile",
);
},
- 30000
+ 30000,
);
it.concurrent(
"should handle 'parsePDF' parameter correctly",
async () => {
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -352,21 +352,21 @@ describe("E2E Tests for v1 API Routes", () => {
}
expect(response.body.data.markdown).toContain(
- "arXiv:astro-ph/9301001v1 7 Jan 1993"
+ "arXiv:astro-ph/9301001v1 7 Jan 1993",
);
expect(response.body.data.markdown).not.toContain(
- "h7uKu14adDL6yGfnGf2qycY5uq8kC3OKCWkPxm"
+ "h7uKu14adDL6yGfnGf2qycY5uq8kC3OKCWkPxm",
);
const responseNoParsePDF: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
url: "https://arxiv.org/pdf/astro-ph/9301001.pdf",
- parsePDF: false
+ parsePDF: false,
});
await new Promise((r) => setTimeout(r, 6000));
@@ -376,10 +376,10 @@ describe("E2E Tests for v1 API Routes", () => {
throw new Error("Expected response body to have 'data' property");
}
expect(responseNoParsePDF.body.data.markdown).toContain(
- "h7uKu14adDL6yGfnGf2qycY5uq8kC3OKCWkPxm"
+ "h7uKu14adDL6yGfnGf2qycY5uq8kC3OKCWkPxm",
);
},
- 30000
+ 30000,
);
// it.concurrent("should handle 'location' parameter correctly",
@@ -408,11 +408,11 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest = {
url: "https://expired.badssl.com/",
- timeout: 120000
+ timeout: 120000,
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -430,7 +430,7 @@ describe("E2E Tests for v1 API Routes", () => {
const scrapeRequestWithSkipTlsVerification = {
url: "https://expired.badssl.com/",
skipTlsVerification: true,
- timeout: 120000
+ timeout: 120000,
} as ScrapeRequest;
const responseWithSkipTlsVerification: ScrapeResponseRequestTest =
@@ -448,10 +448,10 @@ describe("E2E Tests for v1 API Routes", () => {
}
// console.log(responseWithSkipTlsVerification.body.data)
expect(responseWithSkipTlsVerification.body.data.markdown).toContain(
- "badssl.com"
+ "badssl.com",
);
},
- 60000
+ 60000,
);
it.concurrent(
@@ -459,11 +459,11 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
- removeBase64Images: true
+ removeBase64Images: true,
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -478,7 +478,7 @@ describe("E2E Tests for v1 API Routes", () => {
// - TODO: not working for every image
// expect(response.body.data.markdown).toContain("Image-Removed");
},
- 30000
+ 30000,
);
it.concurrent(
@@ -489,13 +489,13 @@ describe("E2E Tests for v1 API Routes", () => {
actions: [
{
type: "wait",
- milliseconds: 10000
- }
- ]
+ milliseconds: 10000,
+ },
+ ],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -508,10 +508,10 @@ describe("E2E Tests for v1 API Routes", () => {
}
expect(response.body.data.markdown).not.toContain("Loading...");
expect(response.body.data.markdown).toContain(
- "Content loaded after 5 seconds!"
+ "Content loaded after 5 seconds!",
);
},
- 30000
+ 30000,
);
// screenshot
@@ -522,13 +522,13 @@ describe("E2E Tests for v1 API Routes", () => {
url: E2E_TEST_SERVER_URL,
actions: [
{
- type: "screenshot"
- }
- ]
+ type: "screenshot",
+ },
+ ],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -543,15 +543,15 @@ describe("E2E Tests for v1 API Routes", () => {
throw new Error("Expected response body to have screenshots array");
}
expect(response.body.data.actions.screenshots[0].length).toBeGreaterThan(
- 0
+ 0,
);
expect(response.body.data.actions.screenshots[0]).toContain(
- "https://service.firecrawl.dev/storage/v1/object/public/media/screenshot-"
+ "https://service.firecrawl.dev/storage/v1/object/public/media/screenshot-",
);
// TODO compare screenshot with expected screenshot
},
- 30000
+ 30000,
);
it.concurrent(
@@ -562,16 +562,16 @@ describe("E2E Tests for v1 API Routes", () => {
actions: [
{
type: "screenshot",
- fullPage: true
+ fullPage: true,
},
{
- type: "scrape"
- }
- ]
+ type: "scrape",
+ },
+ ],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -587,24 +587,24 @@ describe("E2E Tests for v1 API Routes", () => {
throw new Error("Expected response body to have screenshots array");
}
expect(response.body.data.actions.screenshots[0].length).toBeGreaterThan(
- 0
+ 0,
);
expect(response.body.data.actions.screenshots[0]).toContain(
- "https://service.firecrawl.dev/storage/v1/object/public/media/screenshot-"
+ "https://service.firecrawl.dev/storage/v1/object/public/media/screenshot-",
);
if (!response.body.data.actions?.scrapes) {
throw new Error("Expected response body to have scrapes array");
}
expect(response.body.data.actions.scrapes[0].url).toBe(
- "https://firecrawl-e2e-test.vercel.app/"
+ "https://firecrawl-e2e-test.vercel.app/",
);
expect(response.body.data.actions.scrapes[0].html).toContain(
- "This page is used for end-to-end (e2e) testing with Firecrawl."
+ "This page is used for end-to-end (e2e) testing with Firecrawl.",
);
// TODO compare screenshot with expected full page screenshot
},
- 30000
+ 30000,
);
it.concurrent(
@@ -615,13 +615,13 @@ describe("E2E Tests for v1 API Routes", () => {
actions: [
{
type: "click",
- selector: "#click-me"
- }
- ]
+ selector: "#click-me",
+ },
+ ],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -634,10 +634,10 @@ describe("E2E Tests for v1 API Routes", () => {
}
expect(response.body.data.markdown).not.toContain("Click me!");
expect(response.body.data.markdown).toContain(
- "Text changed after click!"
+ "Text changed after click!",
);
},
- 30000
+ 30000,
);
it.concurrent(
@@ -649,17 +649,17 @@ describe("E2E Tests for v1 API Routes", () => {
actions: [
{
type: "click",
- selector: "#input-1"
+ selector: "#input-1",
},
{
type: "write",
- text: "Hello, world!"
- }
- ]
+ text: "Hello, world!",
+ },
+ ],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -675,7 +675,7 @@ describe("E2E Tests for v1 API Routes", () => {
// uncomment the following line:
// expect(response.body.data.html).toContain("");
},
- 30000
+ 30000,
);
// TODO: fix this test (need to fix fire-engine first)
@@ -688,13 +688,13 @@ describe("E2E Tests for v1 API Routes", () => {
actions: [
{
type: "press",
- key: "ArrowDown"
- }
- ]
+ key: "ArrowDown",
+ },
+ ],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -709,7 +709,7 @@ describe("E2E Tests for v1 API Routes", () => {
// }
// expect(response.body.data.markdown).toContain("Last Key Clicked: ArrowDown")
},
- 30000
+ 30000,
);
// TODO: fix this test (need to fix fire-engine first)
@@ -722,18 +722,18 @@ describe("E2E Tests for v1 API Routes", () => {
actions: [
{
type: "click",
- selector: "#scroll-bottom-loader"
+ selector: "#scroll-bottom-loader",
},
{
type: "scroll",
direction: "down",
- amount: 2000
- }
- ]
+ amount: 2000,
+ },
+ ],
} as ScrapeRequest;
const response: ScrapeResponseRequestTest = await request(
- FIRECRAWL_API_URL
+ FIRECRAWL_API_URL,
)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@@ -748,7 +748,7 @@ describe("E2E Tests for v1 API Routes", () => {
//
// expect(response.body.data.markdown).toContain("You have reached the bottom!")
},
- 30000
+ 30000,
);
// TODO: test scrape action
diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts
index e026eef0..46668e64 100644
--- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts
@@ -3,7 +3,7 @@ import dotenv from "dotenv";
import {
FirecrawlCrawlResponse,
FirecrawlCrawlStatusResponse,
- FirecrawlScrapeResponse
+ FirecrawlScrapeResponse,
} from "../../types";
dotenv.config();
@@ -42,7 +42,7 @@ describe("E2E Tests for v0 API Routes", () => {
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent(
@@ -63,30 +63,30 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data.metadata.pageError).toBeUndefined();
expect(response.body.data.metadata.title).toBe("Roast My Website");
expect(response.body.data.metadata.description).toBe(
- "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
+ "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️",
);
expect(response.body.data.metadata.keywords).toBe(
- "Roast My Website,Roast,Website,GitHub,Firecrawl"
+ "Roast My Website,Roast,Website,GitHub,Firecrawl",
);
expect(response.body.data.metadata.robots).toBe("follow, index");
expect(response.body.data.metadata.ogTitle).toBe("Roast My Website");
expect(response.body.data.metadata.ogDescription).toBe(
- "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
+ "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️",
);
expect(response.body.data.metadata.ogUrl).toBe(
- "https://www.roastmywebsite.ai"
+ "https://www.roastmywebsite.ai",
);
expect(response.body.data.metadata.ogImage).toBe(
- "https://www.roastmywebsite.ai/og.png"
+ "https://www.roastmywebsite.ai/og.png",
);
expect(response.body.data.metadata.ogLocaleAlternate).toStrictEqual([]);
expect(response.body.data.metadata.ogSiteName).toBe("Roast My Website");
expect(response.body.data.metadata.sourceURL).toBe(
- "https://roastmywebsite.ai"
+ "https://roastmywebsite.ai",
);
expect(response.body.data.metadata.pageStatusCode).toBe(200);
},
- 30000
+ 30000,
); // 30 seconds timeout
it.concurrent(
@@ -98,7 +98,7 @@ describe("E2E Tests for v0 API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://roastmywebsite.ai",
- pageOptions: { includeHtml: true }
+ pageOptions: { includeHtml: true },
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
@@ -112,7 +112,7 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data.metadata.pageStatusCode).toBe(200);
expect(response.body.data.metadata.pageError).toBeUndefined();
},
- 30000
+ 30000,
); // 30 seconds timeout
it.concurrent(
@@ -130,12 +130,12 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data).toHaveProperty("content");
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.content).toContain(
- "We present spectrophotometric observations of the Broad Line Radio Galaxy"
+ "We present spectrophotometric observations of the Broad Line Radio Galaxy",
);
expect(response.body.data.metadata.pageStatusCode).toBe(200);
expect(response.body.data.metadata.pageError).toBeUndefined();
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -153,12 +153,12 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data).toHaveProperty("content");
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.content).toContain(
- "We present spectrophotometric observations of the Broad Line Radio Galaxy"
+ "We present spectrophotometric observations of the Broad Line Radio Galaxy",
);
expect(response.body.data.metadata.pageStatusCode).toBe(200);
expect(response.body.data.metadata.pageError).toBeUndefined();
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -177,16 +177,16 @@ describe("E2E Tests for v0 API Routes", () => {
expect(responseWithoutRemoveTags.body.data).toHaveProperty("metadata");
expect(responseWithoutRemoveTags.body.data).not.toHaveProperty("html");
expect(responseWithoutRemoveTags.body.data.content).toContain(
- "Scrape This Site"
+ "Scrape This Site",
);
expect(responseWithoutRemoveTags.body.data.content).toContain(
- "Lessons and Videos"
+ "Lessons and Videos",
); // #footer
expect(responseWithoutRemoveTags.body.data.content).toContain(
- "[Sandbox]("
+ "[Sandbox](",
); // .nav
expect(responseWithoutRemoveTags.body.data.content).toContain(
- "web scraping"
+ "web scraping",
); // strong
const response: FirecrawlScrapeResponse = await request(TEST_URL)
@@ -195,7 +195,7 @@ describe("E2E Tests for v0 API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://www.scrapethissite.com/",
- pageOptions: { removeTags: [".nav", "#footer", "strong"] }
+ pageOptions: { removeTags: [".nav", "#footer", "strong"] },
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
@@ -208,7 +208,7 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data.content).not.toContain("[Sandbox]("); // .nav
expect(response.body.data.content).not.toContain("web scraping"); // strong
},
- 30000
+ 30000,
); // 30 seconds timeout
it.concurrent(
@@ -227,10 +227,10 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(400);
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
- "bad request"
+ "bad request",
);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -249,10 +249,10 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(401);
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
- "unauthorized"
+ "unauthorized",
);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -271,10 +271,10 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(403);
expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
- "forbidden"
+ "forbidden",
);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -293,7 +293,7 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(404);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -312,7 +312,7 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(405);
},
- 60000
+ 60000,
); // 60 seconds
it.concurrent(
@@ -331,7 +331,7 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.pageStatusCode).toBe(500);
},
- 60000
+ 60000,
); // 60 seconds
});
@@ -351,7 +351,7 @@ describe("E2E Tests for v0 API Routes", () => {
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent(
@@ -365,9 +365,9 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("jobId");
expect(response.body.jobId).toMatch(
- /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
+ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/,
);
- }
+ },
);
it.concurrent(
@@ -381,8 +381,8 @@ describe("E2E Tests for v0 API Routes", () => {
url: "https://mendable.ai",
limit: 10,
crawlerOptions: {
- includes: ["blog/*"]
- }
+ includes: ["blog/*"],
+ },
});
let response: FirecrawlCrawlStatusResponse;
@@ -408,7 +408,7 @@ describe("E2E Tests for v0 API Routes", () => {
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThan(5);
urls.forEach((url: string) => {
@@ -424,13 +424,13 @@ describe("E2E Tests for v0 API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("Mendable");
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
- 200
+ 200,
);
expect(
- completedResponse.body.data[0].metadata.pageError
+ completedResponse.body.data[0].metadata.pageError,
).toBeUndefined();
},
- 180000
+ 180000,
); // 180 seconds
it.concurrent(
@@ -444,8 +444,8 @@ describe("E2E Tests for v0 API Routes", () => {
url: "https://mendable.ai",
limit: 10,
crawlerOptions: {
- excludes: ["blog/*"]
- }
+ excludes: ["blog/*"],
+ },
});
let isFinished = false;
@@ -467,20 +467,20 @@ describe("E2E Tests for v0 API Routes", () => {
await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
const completedResponse: FirecrawlCrawlStatusResponse = await request(
- TEST_URL
+ TEST_URL,
)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThan(5);
urls.forEach((url: string) => {
expect(url.startsWith("https://wwww.mendable.ai/blog/")).toBeFalsy();
});
},
- 90000
+ 90000,
); // 90 seconds
it.concurrent(
@@ -492,7 +492,7 @@ describe("E2E Tests for v0 API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://www.scrapethissite.com",
- crawlerOptions: { maxDepth: 1 }
+ crawlerOptions: { maxDepth: 1 },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -515,7 +515,7 @@ describe("E2E Tests for v0 API Routes", () => {
}
}
const completedResponse: FirecrawlCrawlStatusResponse = await request(
- TEST_URL
+ TEST_URL,
)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
@@ -528,13 +528,13 @@ describe("E2E Tests for v0 API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
- 200
+ 200,
);
expect(
- completedResponse.body.data[0].metadata.pageError
+ completedResponse.body.data[0].metadata.pageError,
).toBeUndefined();
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
+ (item: any) => item.metadata?.sourceURL,
);
expect(urls.length).toBeGreaterThan(1);
@@ -550,14 +550,14 @@ describe("E2E Tests for v0 API Routes", () => {
expect(depth).toBeLessThanOrEqual(2);
});
},
- 180000
+ 180000,
);
});
describe("POST /v0/crawlWebsitePreview", () => {
it.concurrent("should require authorization", async () => {
const response: FirecrawlCrawlResponse = await request(TEST_URL).post(
- "/v0/crawlWebsitePreview"
+ "/v0/crawlWebsitePreview",
);
expect(response.statusCode).toBe(401);
});
@@ -571,7 +571,7 @@ describe("E2E Tests for v0 API Routes", () => {
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent(
@@ -585,7 +585,7 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.statusCode).toBe(408);
},
- 3000
+ 3000,
);
});
@@ -604,7 +604,7 @@ describe("E2E Tests for v0 API Routes", () => {
.set("Content-Type", "application/json")
.send({ query: "test" });
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent(
@@ -620,7 +620,7 @@ describe("E2E Tests for v0 API Routes", () => {
expect(response.body.success).toBe(true);
expect(response.body).toHaveProperty("data");
},
- 60000
+ 60000,
); // 60 seconds timeout
});
@@ -637,7 +637,7 @@ describe("E2E Tests for v0 API Routes", () => {
.get("/v0/crawl/status/123")
.set("Authorization", `Bearer invalid-api-key`);
expect(response.statusCode).toBe(401);
- }
+ },
);
it.concurrent(
@@ -647,7 +647,7 @@ describe("E2E Tests for v0 API Routes", () => {
.get("/v0/crawl/status/invalidJobId")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(404);
- }
+ },
);
it.concurrent(
@@ -689,22 +689,22 @@ describe("E2E Tests for v0 API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0].content).toContain("Firecrawl");
expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
- 200
+ 200,
);
expect(
- completedResponse.body.data[0].metadata.pageError
+ completedResponse.body.data[0].metadata.pageError,
).toBeUndefined();
const childrenLinks = completedResponse.body.data.filter(
(doc) =>
doc.metadata &&
doc.metadata.sourceURL &&
- doc.metadata.sourceURL.includes("firecrawl.dev/blog")
+ doc.metadata.sourceURL.includes("firecrawl.dev/blog"),
);
expect(childrenLinks.length).toBe(completedResponse.body.data.length);
},
- 180000
+ 180000,
); // 120 seconds
// TODO: review the test below
@@ -762,7 +762,7 @@ describe("E2E Tests for v0 API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://docs.tatum.io",
- crawlerOptions: { limit: 200 }
+ crawlerOptions: { limit: 200 },
});
expect(crawlResponse.statusCode).toBe(200);
@@ -798,22 +798,22 @@ describe("E2E Tests for v0 API Routes", () => {
expect(completedResponse.body.data).toEqual(expect.arrayContaining([]));
expect(completedResponse.body).toHaveProperty("partial_data");
expect(completedResponse.body.partial_data[0]).toHaveProperty(
- "content"
+ "content",
);
expect(completedResponse.body.partial_data[0]).toHaveProperty(
- "markdown"
+ "markdown",
);
expect(completedResponse.body.partial_data[0]).toHaveProperty(
- "metadata"
+ "metadata",
);
expect(
- completedResponse.body.partial_data[0].metadata.pageStatusCode
+ completedResponse.body.partial_data[0].metadata.pageStatusCode,
).toBe(200);
expect(
- completedResponse.body.partial_data[0].metadata.pageError
+ completedResponse.body.partial_data[0].metadata.pageError,
).toBeUndefined();
},
- 60000
+ 60000,
); // 60 seconds
});
@@ -828,7 +828,7 @@ describe("E2E Tests for v0 API Routes", () => {
.send({
url: "https://mendable.ai",
pageOptions: {
- onlyMainContent: true
+ onlyMainContent: true,
},
extractorOptions: {
mode: "llm-extraction",
@@ -838,18 +838,18 @@ describe("E2E Tests for v0 API Routes", () => {
type: "object",
properties: {
company_mission: {
- type: "string"
+ type: "string",
},
supports_sso: {
- type: "boolean"
+ type: "boolean",
},
is_open_source: {
- type: "boolean"
- }
+ type: "boolean",
+ },
},
- required: ["company_mission", "supports_sso", "is_open_source"]
- }
- }
+ required: ["company_mission", "supports_sso", "is_open_source"],
+ },
+ },
});
// Ensure that the job was successfully created before proceeding with LLM extraction
@@ -868,7 +868,7 @@ describe("E2E Tests for v0 API Routes", () => {
expect(llmExtraction.is_open_source).toBe(false);
expect(typeof llmExtraction.is_open_source).toBe("boolean");
},
- 60000
+ 60000,
); // 60 secs
});
});
diff --git a/apps/api/src/controllers/__tests__/crawl.test.ts b/apps/api/src/controllers/__tests__/crawl.test.ts
index 81fa2e5d..a004ee3c 100644
--- a/apps/api/src/controllers/__tests__/crawl.test.ts
+++ b/apps/api/src/controllers/__tests__/crawl.test.ts
@@ -10,9 +10,9 @@ jest.mock("../auth", () => ({
success: true,
team_id: "team123",
error: null,
- status: 200
+ status: 200,
}),
- reduce: jest.fn()
+ reduce: jest.fn(),
}));
jest.mock("../../services/idempotency/validate");
@@ -21,15 +21,15 @@ describe("crawlController", () => {
const req = {
headers: {
"x-idempotency-key": await uuidv4(),
- Authorization: `Bearer ${process.env.TEST_API_KEY}`
+ Authorization: `Bearer ${process.env.TEST_API_KEY}`,
},
body: {
- url: "https://mendable.ai"
- }
+ url: "https://mendable.ai",
+ },
} as unknown as Request;
const res = {
status: jest.fn().mockReturnThis(),
- json: jest.fn()
+ json: jest.fn(),
} as unknown as Response;
// Mock the idempotency key validation to return false for the second call
@@ -45,7 +45,7 @@ describe("crawlController", () => {
await crawlController(req, res);
expect(res.status).toHaveBeenCalledWith(409);
expect(res.json).toHaveBeenCalledWith({
- error: "Idempotency key already used"
+ error: "Idempotency key already used",
});
});
});
diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts
index 947c2784..f865984a 100644
--- a/apps/api/src/controllers/auth.ts
+++ b/apps/api/src/controllers/auth.ts
@@ -4,7 +4,7 @@ import {
AuthResponse,
NotificationType,
PlanType,
- RateLimiterMode
+ RateLimiterMode,
} from "../types";
import { supabase_service } from "../services/supabase";
import { withAuth } from "../lib/withAuth";
@@ -41,7 +41,7 @@ export async function setCachedACUC(
acuc:
| AuthCreditUsageChunk
| null
- | ((acuc: AuthCreditUsageChunk) => AuthCreditUsageChunk | null)
+ | ((acuc: AuthCreditUsageChunk) => AuthCreditUsageChunk | null),
) {
const cacheKeyACUC = `acuc_${api_key}`;
const redLockKey = `lock_${cacheKeyACUC}`;
@@ -76,7 +76,7 @@ export async function setCachedACUC(
export async function getACUC(
api_key: string,
cacheOnly = false,
- useCache = true
+ useCache = true,
): Promise {
const cacheKeyACUC = `acuc_${api_key}`;
@@ -97,7 +97,7 @@ export async function getACUC(
({ data, error } = await supabase_service.rpc(
"auth_credit_usage_chunk_test_21_credit_pack",
{ input_key: api_key },
- { get: true }
+ { get: true },
));
if (!error) {
@@ -105,13 +105,13 @@ export async function getACUC(
}
logger.warn(
- `Failed to retrieve authentication and credit usage data after ${retries}, trying again...`
+ `Failed to retrieve authentication and credit usage data after ${retries}, trying again...`,
);
retries++;
if (retries === maxRetries) {
throw new Error(
"Failed to retrieve authentication and credit usage data after 3 attempts: " +
- JSON.stringify(error)
+ JSON.stringify(error),
);
}
@@ -143,19 +143,19 @@ export async function clearACUC(api_key: string): Promise {
export async function authenticateUser(
req,
res,
- mode?: RateLimiterMode
+ mode?: RateLimiterMode,
): Promise {
return withAuth(supaAuthenticateUser, {
success: true,
chunk: null,
- team_id: "bypass"
+ team_id: "bypass",
})(req, res, mode);
}
export async function supaAuthenticateUser(
req,
res,
- mode?: RateLimiterMode
+ mode?: RateLimiterMode,
): Promise {
const authHeader =
req.headers.authorization ??
@@ -170,7 +170,7 @@ export async function supaAuthenticateUser(
return {
success: false,
error: "Unauthorized: Token missing",
- status: 401
+ status: 401,
};
}
@@ -199,7 +199,7 @@ export async function supaAuthenticateUser(
return {
success: false,
error: "Unauthorized: Invalid token",
- status: 401
+ status: 401,
};
}
@@ -209,7 +209,7 @@ export async function supaAuthenticateUser(
return {
success: false,
error: "Unauthorized: Invalid token",
- status: 401
+ status: 401,
};
}
@@ -219,14 +219,14 @@ export async function supaAuthenticateUser(
const plan = getPlanByPriceId(priceId);
subscriptionData = {
team_id: teamId,
- plan
+ plan,
};
switch (mode) {
case RateLimiterMode.Crawl:
rateLimiter = getRateLimiter(
RateLimiterMode.Crawl,
token,
- subscriptionData.plan
+ subscriptionData.plan,
);
break;
case RateLimiterMode.Scrape:
@@ -234,21 +234,21 @@ export async function supaAuthenticateUser(
RateLimiterMode.Scrape,
token,
subscriptionData.plan,
- teamId
+ teamId,
);
break;
case RateLimiterMode.Search:
rateLimiter = getRateLimiter(
RateLimiterMode.Search,
token,
- subscriptionData.plan
+ subscriptionData.plan,
);
break;
case RateLimiterMode.Map:
rateLimiter = getRateLimiter(
RateLimiterMode.Map,
token,
- subscriptionData.plan
+ subscriptionData.plan,
);
break;
case RateLimiterMode.CrawlStatus:
@@ -278,7 +278,7 @@ export async function supaAuthenticateUser(
priceId,
plan: subscriptionData?.plan,
mode,
- rateLimiterRes
+ rateLimiterRes,
});
const secs = Math.round(rateLimiterRes.msBeforeNext / 1000) || 1;
const retryDate = new Date(Date.now() + rateLimiterRes.msBeforeNext);
@@ -293,7 +293,7 @@ export async function supaAuthenticateUser(
return {
success: false,
error: `Rate limit exceeded. Consumed (req/min): ${rateLimiterRes.consumedPoints}, Remaining (req/min): ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
- status: 429
+ status: 429,
};
}
@@ -323,7 +323,7 @@ export async function supaAuthenticateUser(
success: true,
team_id: teamId ?? undefined,
plan: (subscriptionData?.plan ?? "") as PlanType,
- chunk
+ chunk,
};
}
function getPlanByPriceId(price_id: string | null): PlanType {
diff --git a/apps/api/src/controllers/v0/admin/queue.ts b/apps/api/src/controllers/v0/admin/queue.ts
index 6cc1c6e0..d7d9c089 100644
--- a/apps/api/src/controllers/v0/admin/queue.ts
+++ b/apps/api/src/controllers/v0/admin/queue.ts
@@ -8,7 +8,7 @@ import { sendSlackWebhook } from "../../../services/alerts/slack";
export async function cleanBefore24hCompleteJobsController(
req: Request,
- res: Response
+ res: Response,
) {
logger.info("🐂 Cleaning jobs older than 24h");
try {
@@ -22,8 +22,8 @@ export async function cleanBefore24hCompleteJobsController(
["completed"],
i * batchSize,
i * batchSize + batchSize,
- true
- )
+ true,
+ ),
);
}
const completedJobs: Job[] = (
@@ -33,7 +33,7 @@ export async function cleanBefore24hCompleteJobsController(
completedJobs.filter(
(job) =>
job.finishedOn !== undefined &&
- job.finishedOn < Date.now() - 24 * 60 * 60 * 1000
+ job.finishedOn < Date.now() - 24 * 60 * 60 * 1000,
) || [];
let count = 0;
@@ -73,14 +73,14 @@ export async function queuesController(req: Request, res: Response) {
const scrapeQueue = getScrapeQueue();
const [webScraperActive] = await Promise.all([
- scrapeQueue.getActiveCount()
+ scrapeQueue.getActiveCount(),
]);
const noActiveJobs = webScraperActive === 0;
// 200 if no active jobs, 503 if there are active jobs
return res.status(noActiveJobs ? 200 : 500).json({
webScraperActive,
- noActiveJobs
+ noActiveJobs,
});
} catch (error) {
logger.error(error);
@@ -99,7 +99,7 @@ export async function autoscalerController(req: Request, res: Response) {
await Promise.all([
scrapeQueue.getActiveCount(),
scrapeQueue.getWaitingCount(),
- scrapeQueue.getPrioritizedCount()
+ scrapeQueue.getPrioritizedCount(),
]);
let waitingAndPriorityCount = webScraperWaiting + webScraperPriority;
@@ -109,9 +109,9 @@ export async function autoscalerController(req: Request, res: Response) {
"https://api.machines.dev/v1/apps/firecrawl-scraper-js/machines",
{
headers: {
- Authorization: `Bearer ${process.env.FLY_API_TOKEN}`
- }
- }
+ Authorization: `Bearer ${process.env.FLY_API_TOKEN}`,
+ },
+ },
);
const machines = await request.json();
@@ -121,7 +121,7 @@ export async function autoscalerController(req: Request, res: Response) {
(machine.state === "started" ||
machine.state === "starting" ||
machine.state === "replacing") &&
- machine.config.env["FLY_PROCESS_GROUP"] === "worker"
+ machine.config.env["FLY_PROCESS_GROUP"] === "worker",
).length;
let targetMachineCount = activeMachines;
@@ -134,17 +134,17 @@ export async function autoscalerController(req: Request, res: Response) {
if (webScraperActive > 9000 || waitingAndPriorityCount > 2000) {
targetMachineCount = Math.min(
maxNumberOfMachines,
- activeMachines + baseScaleUp * 3
+ activeMachines + baseScaleUp * 3,
);
} else if (webScraperActive > 5000 || waitingAndPriorityCount > 1000) {
targetMachineCount = Math.min(
maxNumberOfMachines,
- activeMachines + baseScaleUp * 2
+ activeMachines + baseScaleUp * 2,
);
} else if (webScraperActive > 1000 || waitingAndPriorityCount > 500) {
targetMachineCount = Math.min(
maxNumberOfMachines,
- activeMachines + baseScaleUp
+ activeMachines + baseScaleUp,
);
}
@@ -152,47 +152,47 @@ export async function autoscalerController(req: Request, res: Response) {
if (webScraperActive < 100 && waitingAndPriorityCount < 50) {
targetMachineCount = Math.max(
minNumberOfMachines,
- activeMachines - baseScaleDown * 3
+ activeMachines - baseScaleDown * 3,
);
} else if (webScraperActive < 500 && waitingAndPriorityCount < 200) {
targetMachineCount = Math.max(
minNumberOfMachines,
- activeMachines - baseScaleDown * 2
+ activeMachines - baseScaleDown * 2,
);
} else if (webScraperActive < 1000 && waitingAndPriorityCount < 500) {
targetMachineCount = Math.max(
minNumberOfMachines,
- activeMachines - baseScaleDown
+ activeMachines - baseScaleDown,
);
}
if (targetMachineCount !== activeMachines) {
logger.info(
- `🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`
+ `🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`,
);
if (targetMachineCount > activeMachines) {
sendSlackWebhook(
`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting - Current DateTime: ${new Date().toISOString()}`,
false,
- process.env.SLACK_AUTOSCALER ?? ""
+ process.env.SLACK_AUTOSCALER ?? "",
);
} else {
sendSlackWebhook(
`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting - Current DateTime: ${new Date().toISOString()}`,
false,
- process.env.SLACK_AUTOSCALER ?? ""
+ process.env.SLACK_AUTOSCALER ?? "",
);
}
return res.status(200).json({
mode: "scale-descale",
- count: targetMachineCount
+ count: targetMachineCount,
});
}
return res.status(200).json({
mode: "normal",
- count: activeMachines
+ count: activeMachines,
});
} catch (error) {
logger.error(error);
diff --git a/apps/api/src/controllers/v0/admin/redis-health.ts b/apps/api/src/controllers/v0/admin/redis-health.ts
index 963755ef..b3256edf 100644
--- a/apps/api/src/controllers/v0/admin/redis-health.ts
+++ b/apps/api/src/controllers/v0/admin/redis-health.ts
@@ -38,7 +38,7 @@ export async function redisHealthController(req: Request, res: Response) {
try {
await retryOperation(() => redisRateLimitClient.set(testKey, testValue));
redisRateLimitHealth = await retryOperation(() =>
- redisRateLimitClient.get(testKey)
+ redisRateLimitClient.get(testKey),
);
await retryOperation(() => redisRateLimitClient.del(testKey));
} catch (error) {
@@ -49,7 +49,7 @@ export async function redisHealthController(req: Request, res: Response) {
const healthStatus = {
queueRedis: queueRedisHealth === testValue ? "healthy" : "unhealthy",
redisRateLimitClient:
- redisRateLimitHealth === testValue ? "healthy" : "unhealthy"
+ redisRateLimitHealth === testValue ? "healthy" : "unhealthy",
};
if (
@@ -60,7 +60,7 @@ export async function redisHealthController(req: Request, res: Response) {
return res.status(200).json({ status: "healthy", details: healthStatus });
} else {
logger.info(
- `Redis instances health check: ${JSON.stringify(healthStatus)}`
+ `Redis instances health check: ${JSON.stringify(healthStatus)}`,
);
// await sendSlackWebhook(
// `[REDIS DOWN] Redis instances health check: ${JSON.stringify(
diff --git a/apps/api/src/controllers/v0/crawl-cancel.ts b/apps/api/src/controllers/v0/crawl-cancel.ts
index b445978c..db834230 100644
--- a/apps/api/src/controllers/v0/crawl-cancel.ts
+++ b/apps/api/src/controllers/v0/crawl-cancel.ts
@@ -48,7 +48,7 @@ export async function crawlCancelController(req: Request, res: Response) {
}
res.json({
- status: "cancelled"
+ status: "cancelled",
});
} catch (error) {
Sentry.captureException(error);
diff --git a/apps/api/src/controllers/v0/crawl-status.ts b/apps/api/src/controllers/v0/crawl-status.ts
index 756fca44..60ca0e7f 100644
--- a/apps/api/src/controllers/v0/crawl-status.ts
+++ b/apps/api/src/controllers/v0/crawl-status.ts
@@ -60,12 +60,12 @@ export async function crawlStatusController(req: Request, res: Response) {
// Combine jobs and jobStatuses into a single array of objects
let jobsWithStatuses = jobs.map((job, index) => ({
job,
- status: jobStatuses[index]
+ status: jobStatuses[index],
}));
// Filter out failed jobs
jobsWithStatuses = jobsWithStatuses.filter(
- (x) => x.status !== "failed" && x.status !== "unknown"
+ (x) => x.status !== "failed" && x.status !== "unknown",
);
// Sort jobs by timestamp
@@ -84,10 +84,10 @@ export async function crawlStatusController(req: Request, res: Response) {
const data = jobs
.filter(
(x) =>
- x.failedReason !== "Concurreny limit hit" && x.returnvalue !== null
+ x.failedReason !== "Concurreny limit hit" && x.returnvalue !== null,
)
.map((x) =>
- Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue
+ Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue,
);
if (
@@ -117,7 +117,7 @@ export async function crawlStatusController(req: Request, res: Response) {
? []
: data
.filter((x) => x !== null)
- .map((x) => toLegacyDocument(x, sc.internalOptions))
+ .map((x) => toLegacyDocument(x, sc.internalOptions)),
});
} catch (error) {
Sentry.captureException(error);
diff --git a/apps/api/src/controllers/v0/crawl.ts b/apps/api/src/controllers/v0/crawl.ts
index bb9ba363..36b8309f 100644
--- a/apps/api/src/controllers/v0/crawl.ts
+++ b/apps/api/src/controllers/v0/crawl.ts
@@ -10,7 +10,7 @@ import { createIdempotencyKey } from "../../../src/services/idempotency/create";
import {
defaultCrawlPageOptions,
defaultCrawlerOptions,
- defaultOrigin
+ defaultOrigin,
} from "../../../src/lib/default-values";
import { v4 as uuidv4 } from "uuid";
import { logger } from "../../../src/lib/logger";
@@ -21,7 +21,7 @@ import {
lockURL,
lockURLs,
saveCrawl,
- StoredCrawl
+ StoredCrawl,
} from "../../../src/lib/crawl-redis";
import { getScrapeQueue } from "../../../src/services/queue-service";
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
@@ -54,7 +54,7 @@ export async function crawlController(req: Request, res: Response) {
const crawlerOptions = {
...defaultCrawlerOptions,
- ...req.body.crawlerOptions
+ ...req.body.crawlerOptions,
};
const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
@@ -82,13 +82,13 @@ export async function crawlController(req: Request, res: Response) {
const {
success: creditsCheckSuccess,
message: creditsCheckMessage,
- remainingCredits
+ remainingCredits,
} = await checkTeamCredits(chunk, team_id, limitCheck);
if (!creditsCheckSuccess) {
return res.status(402).json({
error:
- "Insufficient credits. You may be requesting with a higher limit than the amount of credits you have left. If not, upgrade your plan at https://firecrawl.dev/pricing or contact us at help@firecrawl.com"
+ "Insufficient credits. You may be requesting with a higher limit than the amount of credits you have left. If not, upgrade your plan at https://firecrawl.dev/pricing or contact us at help@firecrawl.com",
});
}
@@ -113,7 +113,7 @@ export async function crawlController(req: Request, res: Response) {
if (isUrlBlocked(url)) {
return res.status(403).json({
error:
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
});
}
@@ -153,7 +153,7 @@ export async function crawlController(req: Request, res: Response) {
const { scrapeOptions, internalOptions } = fromLegacyScrapeOptions(
pageOptions,
undefined,
- undefined
+ undefined,
);
internalOptions.disableSmartWaitCache = true; // NOTE: smart wait disabled for crawls to ensure contentful scrape, speed does not matter
@@ -166,7 +166,7 @@ export async function crawlController(req: Request, res: Response) {
internalOptions,
team_id,
plan,
- createdAt: Date.now()
+ createdAt: Date.now(),
};
const crawler = crawlToCrawler(id, sc);
@@ -204,23 +204,23 @@ export async function crawlController(req: Request, res: Response) {
plan,
origin: req.body.origin ?? defaultOrigin,
crawl_id: id,
- sitemapped: true
+ sitemapped: true,
},
opts: {
jobId: uuid,
- priority: jobPriority
- }
+ priority: jobPriority,
+ },
};
});
await lockURLs(
id,
sc,
- jobs.map((x) => x.data.url)
+ jobs.map((x) => x.data.url),
);
await addCrawlJobs(
id,
- jobs.map((x) => x.opts.jobId)
+ jobs.map((x) => x.opts.jobId),
);
for (const job of jobs) {
// add with sentry instrumentation
@@ -243,12 +243,12 @@ export async function crawlController(req: Request, res: Response) {
team_id,
plan: plan!,
origin: req.body.origin ?? defaultOrigin,
- crawl_id: id
+ crawl_id: id,
},
{
- priority: 15 // prioritize request 0 of crawl jobs same as scrape jobs
+ priority: 15, // prioritize request 0 of crawl jobs same as scrape jobs
},
- jobId
+ jobId,
);
await addCrawlJob(id, jobId);
}
@@ -258,7 +258,7 @@ export async function crawlController(req: Request, res: Response) {
Sentry.captureException(error);
logger.error(error);
return res.status(500).json({
- error: error instanceof ZodError ? "Invalid URL" : error.message
+ error: error instanceof ZodError ? "Invalid URL" : error.message,
});
}
}
diff --git a/apps/api/src/controllers/v0/crawlPreview.ts b/apps/api/src/controllers/v0/crawlPreview.ts
index 3b47bfaa..405e49c2 100644
--- a/apps/api/src/controllers/v0/crawlPreview.ts
+++ b/apps/api/src/controllers/v0/crawlPreview.ts
@@ -9,7 +9,7 @@ import {
crawlToCrawler,
lockURL,
saveCrawl,
- StoredCrawl
+ StoredCrawl,
} from "../../../src/lib/crawl-redis";
import { addScrapeJob } from "../../../src/services/queue-jobs";
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
@@ -43,7 +43,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
if (isUrlBlocked(url)) {
return res.status(403).json({
error:
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
});
}
@@ -51,7 +51,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
const pageOptions = req.body.pageOptions ?? {
onlyMainContent: false,
includeHtml: false,
- removeTags: []
+ removeTags: [],
};
// if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
@@ -94,7 +94,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
const { scrapeOptions, internalOptions } = fromLegacyScrapeOptions(
pageOptions,
undefined,
- undefined
+ undefined,
);
const sc: StoredCrawl = {
@@ -105,7 +105,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
team_id,
plan,
robots,
- createdAt: Date.now()
+ createdAt: Date.now(),
};
await saveCrawl(id, sc);
@@ -131,10 +131,10 @@ export async function crawlPreviewController(req: Request, res: Response) {
internalOptions,
origin: "website-preview",
crawl_id: id,
- sitemapped: true
+ sitemapped: true,
},
{},
- jobId
+ jobId,
);
await addCrawlJob(id, jobId);
}
@@ -151,10 +151,10 @@ export async function crawlPreviewController(req: Request, res: Response) {
scrapeOptions,
internalOptions,
origin: "website-preview",
- crawl_id: id
+ crawl_id: id,
},
{},
- jobId
+ jobId,
);
await addCrawlJob(id, jobId);
}
diff --git a/apps/api/src/controllers/v0/scrape.ts b/apps/api/src/controllers/v0/scrape.ts
index 4a761ea3..8501e502 100644
--- a/apps/api/src/controllers/v0/scrape.ts
+++ b/apps/api/src/controllers/v0/scrape.ts
@@ -2,7 +2,7 @@ import { ExtractorOptions, PageOptions } from "./../../lib/entities";
import { Request, Response } from "express";
import {
billTeam,
- checkTeamCredits
+ checkTeamCredits,
} from "../../services/billing/credit_billing";
import { authenticateUser } from "../auth";
import { PlanType, RateLimiterMode } from "../../types";
@@ -11,7 +11,7 @@ import {
Document,
fromLegacyCombo,
toLegacyDocument,
- url as urlSchema
+ url as urlSchema,
} from "../v1/types";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
import { numTokensFromString } from "../../lib/LLM-extraction/helpers";
@@ -19,7 +19,7 @@ import {
defaultPageOptions,
defaultExtractorOptions,
defaultTimeout,
- defaultOrigin
+ defaultOrigin,
} from "../../lib/default-values";
import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
import { getScrapeQueue } from "../../services/queue-service";
@@ -38,7 +38,7 @@ export async function scrapeHelper(
pageOptions: PageOptions,
extractorOptions: ExtractorOptions,
timeout: number,
- plan?: PlanType
+ plan?: PlanType,
): Promise<{
success: boolean;
error?: string;
@@ -55,7 +55,7 @@ export async function scrapeHelper(
success: false,
error:
"Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
- returnCode: 403
+ returnCode: 403,
};
}
@@ -65,7 +65,7 @@ export async function scrapeHelper(
pageOptions,
extractorOptions,
timeout,
- crawlerOptions
+ crawlerOptions,
);
await addScrapeJob(
@@ -77,11 +77,11 @@ export async function scrapeHelper(
internalOptions,
plan: plan!,
origin: req.body.origin ?? defaultOrigin,
- is_scrape: true
+ is_scrape: true,
},
{},
jobId,
- jobPriority
+ jobPriority,
);
let doc;
@@ -90,7 +90,7 @@ export async function scrapeHelper(
{
name: "Wait for job to finish",
op: "bullmq.wait",
- attributes: { job: jobId }
+ attributes: { job: jobId },
},
async (span) => {
try {
@@ -104,20 +104,20 @@ export async function scrapeHelper(
return {
success: false,
error: "Request timed out",
- returnCode: 408
+ returnCode: 408,
};
} else if (
typeof e === "string" &&
(e.includes("Error generating completions: ") ||
e.includes("Invalid schema for function") ||
e.includes(
- "LLM extraction did not match the extraction schema you provided."
+ "LLM extraction did not match the extraction schema you provided.",
))
) {
return {
success: false,
error: e,
- returnCode: 500
+ returnCode: 500,
};
} else {
throw e;
@@ -125,7 +125,7 @@ export async function scrapeHelper(
}
span.setAttribute("result", JSON.stringify(doc));
return null;
- }
+ },
);
if (err !== null) {
@@ -140,7 +140,7 @@ export async function scrapeHelper(
success: true,
error: "No page found",
returnCode: 200,
- data: doc
+ data: doc,
};
}
@@ -166,7 +166,7 @@ export async function scrapeHelper(
return {
success: true,
data: toLegacyDocument(doc, internalOptions),
- returnCode: 200
+ returnCode: 200,
};
}
@@ -185,7 +185,7 @@ export async function scrapeController(req: Request, res: Response) {
const pageOptions = { ...defaultPageOptions, ...req.body.pageOptions };
const extractorOptions = {
...defaultExtractorOptions,
- ...req.body.extractorOptions
+ ...req.body.extractorOptions,
};
const origin = req.body.origin ?? defaultOrigin;
let timeout = req.body.timeout ?? defaultTimeout;
@@ -197,7 +197,7 @@ export async function scrapeController(req: Request, res: Response) {
) {
return res.status(400).json({
error:
- "extractorOptions.extractionSchema must be an object if llm-extraction mode is specified"
+ "extractorOptions.extractionSchema must be an object if llm-extraction mode is specified",
});
}
@@ -213,7 +213,7 @@ export async function scrapeController(req: Request, res: Response) {
earlyReturn = true;
return res.status(402).json({
error:
- "Insufficient credits. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing"
+ "Insufficient credits. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing",
});
}
} catch (error) {
@@ -221,7 +221,7 @@ export async function scrapeController(req: Request, res: Response) {
earlyReturn = true;
return res.status(500).json({
error:
- "Error checking team credits. Please contact help@firecrawl.com for help."
+ "Error checking team credits. Please contact help@firecrawl.com for help.",
});
}
@@ -236,7 +236,7 @@ export async function scrapeController(req: Request, res: Response) {
pageOptions,
extractorOptions,
timeout,
- plan
+ plan,
);
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;
@@ -244,7 +244,7 @@ export async function scrapeController(req: Request, res: Response) {
result.data && (result.data as Document).markdown
? numTokensFromString(
(result.data as Document).markdown!,
- "gpt-3.5-turbo"
+ "gpt-3.5-turbo",
)
: 0;
@@ -267,7 +267,7 @@ export async function scrapeController(req: Request, res: Response) {
// billing for doc done on queue end, bill only for llm extraction
billTeam(team_id, chunk?.sub_id, creditsToBeBilled).catch((error) => {
logger.error(
- `Failed to bill team ${team_id} for ${creditsToBeBilled} credits: ${error}`
+ `Failed to bill team ${team_id} for ${creditsToBeBilled} credits: ${error}`,
);
// Optionally, you could notify an admin or add to a retry queue here
});
@@ -290,7 +290,7 @@ export async function scrapeController(req: Request, res: Response) {
const { scrapeOptions } = fromLegacyScrapeOptions(
pageOptions,
extractorOptions,
- timeout
+ timeout,
);
logJob({
@@ -306,7 +306,7 @@ export async function scrapeController(req: Request, res: Response) {
crawlerOptions: crawlerOptions,
scrapeOptions,
origin: origin,
- num_tokens: numTokens
+ num_tokens: numTokens,
});
return res.status(result.returnCode).json(result);
@@ -319,7 +319,7 @@ export async function scrapeController(req: Request, res: Response) {
? "Invalid URL"
: typeof error === "string"
? error
- : (error?.message ?? "Internal Server Error")
+ : (error?.message ?? "Internal Server Error"),
});
}
}
diff --git a/apps/api/src/controllers/v0/search.ts b/apps/api/src/controllers/v0/search.ts
index 4950ea5f..6a3513df 100644
--- a/apps/api/src/controllers/v0/search.ts
+++ b/apps/api/src/controllers/v0/search.ts
@@ -1,7 +1,7 @@
import { Request, Response } from "express";
import {
billTeam,
- checkTeamCredits
+ checkTeamCredits,
} from "../../services/billing/credit_billing";
import { authenticateUser } from "../auth";
import { PlanType, RateLimiterMode } from "../../types";
@@ -20,7 +20,7 @@ import {
Document,
fromLegacyCombo,
fromLegacyScrapeOptions,
- toLegacyDocument
+ toLegacyDocument,
} from "../v1/types";
export async function searchHelper(
@@ -31,7 +31,7 @@ export async function searchHelper(
crawlerOptions: any,
pageOptions: PageOptions,
searchOptions: SearchOptions,
- plan: PlanType | undefined
+ plan: PlanType | undefined,
): Promise<{
success: boolean;
error?: string;
@@ -62,7 +62,7 @@ export async function searchHelper(
filter: filter,
lang: searchOptions.lang ?? "en",
country: searchOptions.country ?? "us",
- location: searchOptions.location
+ location: searchOptions.location,
});
let justSearch = pageOptions.fetchPageContent === false;
@@ -71,13 +71,13 @@ export async function searchHelper(
pageOptions,
undefined,
60000,
- crawlerOptions
+ crawlerOptions,
);
if (justSearch) {
billTeam(team_id, subscription_id, res.length).catch((error) => {
logger.error(
- `Failed to bill team ${team_id} for ${res.length} credits: ${error}`
+ `Failed to bill team ${team_id} for ${res.length} credits: ${error}`,
);
// Optionally, you could notify an admin or add to a retry queue here
});
@@ -107,12 +107,12 @@ export async function searchHelper(
mode: "single_urls",
team_id: team_id,
scrapeOptions,
- internalOptions
+ internalOptions,
},
opts: {
jobId: uuid,
- priority: jobPriority
- }
+ priority: jobPriority,
+ },
};
});
@@ -123,7 +123,7 @@ export async function searchHelper(
const docs = (
await Promise.all(
- jobDatas.map((x) => waitForJob(x.opts.jobId, 60000))
+ jobDatas.map((x) => waitForJob(x.opts.jobId, 60000)),
)
).map((x) => toLegacyDocument(x, internalOptions));
@@ -136,7 +136,7 @@ export async function searchHelper(
// make sure doc.content is not empty
const filteredDocs = docs.filter(
- (doc: any) => doc && doc.content && doc.content.trim().length > 0
+ (doc: any) => doc && doc.content && doc.content.trim().length > 0,
);
if (filteredDocs.length === 0) {
@@ -144,14 +144,14 @@ export async function searchHelper(
success: true,
error: "No page found",
returnCode: 200,
- data: docs
+ data: docs,
};
}
return {
success: true,
data: filteredDocs,
- returnCode: 200
+ returnCode: 200,
};
}
@@ -169,7 +169,7 @@ export async function searchController(req: Request, res: Response) {
onlyMainContent: req.body.pageOptions?.onlyMainContent ?? false,
fetchPageContent: req.body.pageOptions?.fetchPageContent ?? true,
removeTags: req.body.pageOptions?.removeTags ?? [],
- fallback: req.body.pageOptions?.fallback ?? false
+ fallback: req.body.pageOptions?.fallback ?? false,
};
const origin = req.body.origin ?? "api";
@@ -197,7 +197,7 @@ export async function searchController(req: Request, res: Response) {
crawlerOptions,
pageOptions,
searchOptions,
- plan
+ plan,
);
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;
@@ -212,7 +212,7 @@ export async function searchController(req: Request, res: Response) {
mode: "search",
url: req.body.query,
crawlerOptions: crawlerOptions,
- origin: origin
+ origin: origin,
});
return res.status(result.returnCode).json(result);
} catch (error) {
diff --git a/apps/api/src/controllers/v0/status.ts b/apps/api/src/controllers/v0/status.ts
index 73bfa159..c68579ea 100644
--- a/apps/api/src/controllers/v0/status.ts
+++ b/apps/api/src/controllers/v0/status.ts
@@ -6,7 +6,7 @@ import * as Sentry from "@sentry/node";
export async function crawlJobStatusPreviewController(
req: Request,
- res: Response
+ res: Response,
) {
try {
const sc = await getCrawl(req.params.jobId);
@@ -26,7 +26,7 @@ export async function crawlJobStatusPreviewController(
// }
const jobs = (await getJobs(req.params.jobId, jobIDs)).sort(
- (a, b) => a.timestamp - b.timestamp
+ (a, b) => a.timestamp - b.timestamp,
);
const jobStatuses = await Promise.all(jobs.map((x) => x.getState()));
const jobStatus = sc.cancelled
@@ -38,7 +38,7 @@ export async function crawlJobStatusPreviewController(
: "active";
const data = jobs.map((x) =>
- Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue
+ Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue,
);
res.json({
@@ -48,7 +48,7 @@ export async function crawlJobStatusPreviewController(
total: jobs.length,
data: jobStatus === "completed" ? data : null,
partial_data:
- jobStatus === "completed" ? [] : data.filter((x) => x !== null)
+ jobStatus === "completed" ? [] : data.filter((x) => x !== null),
});
} catch (error) {
Sentry.captureException(error);
diff --git a/apps/api/src/controllers/v1/__tests__/urlValidation.test.ts b/apps/api/src/controllers/v1/__tests__/urlValidation.test.ts
index 1ce058a0..b455e5ab 100644
--- a/apps/api/src/controllers/v1/__tests__/urlValidation.test.ts
+++ b/apps/api/src/controllers/v1/__tests__/urlValidation.test.ts
@@ -25,13 +25,13 @@ describe("URL Schema Validation", () => {
it("should reject URLs without a valid top-level domain", () => {
expect(() => url.parse("http://example")).toThrow(
- "URL must have a valid top-level domain or be a valid path"
+ "URL must have a valid top-level domain or be a valid path",
);
});
it("should reject blocked URLs", () => {
expect(() => url.parse("https://facebook.com")).toThrow(
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
);
});
@@ -47,28 +47,28 @@ describe("URL Schema Validation", () => {
it("should handle URLs with subdomains that are blocked", () => {
expect(() => url.parse("https://sub.facebook.com")).toThrow(
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
);
});
it("should handle URLs with paths that are blocked", () => {
expect(() => url.parse("http://facebook.com/path")).toThrow(
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
);
expect(() => url.parse("https://facebook.com/another/path")).toThrow(
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
);
});
it("should reject malformed URLs starting with 'http://http'", () => {
expect(() => url.parse("http://http://example.com")).toThrow(
- "Invalid URL. Invalid protocol."
+ "Invalid URL. Invalid protocol.",
);
});
it("should reject malformed URLs containing multiple 'http://'", () => {
expect(() =>
- url.parse("http://example.com/http://example.com")
+ url.parse("http://example.com/http://example.com"),
).not.toThrow();
});
diff --git a/apps/api/src/controllers/v1/batch-scrape.ts b/apps/api/src/controllers/v1/batch-scrape.ts
index a78264e3..89fa6741 100644
--- a/apps/api/src/controllers/v1/batch-scrape.ts
+++ b/apps/api/src/controllers/v1/batch-scrape.ts
@@ -5,14 +5,14 @@ import {
batchScrapeRequestSchema,
CrawlResponse,
RequestWithAuth,
- ScrapeOptions
+ ScrapeOptions,
} from "./types";
import {
addCrawlJobs,
getCrawl,
lockURLs,
saveCrawl,
- StoredCrawl
+ StoredCrawl,
} from "../../lib/crawl-redis";
import { logCrawl } from "../../services/logging/crawl_log";
import { getJobPriority } from "../../lib/job-priority";
@@ -22,7 +22,7 @@ import { logger as _logger } from "../../lib/logger";
export async function batchScrapeController(
req: RequestWithAuth<{}, CrawlResponse, BatchScrapeRequest>,
- res: Response
+ res: Response,
) {
req.body = batchScrapeRequestSchema.parse(req.body);
@@ -33,12 +33,12 @@ export async function batchScrapeController(
module: "api/v1",
method: "batchScrapeController",
teamId: req.auth.team_id,
- plan: req.auth.plan
+ plan: req.auth.plan,
});
logger.debug("Batch scrape " + id + " starting", {
urlsLength: req.body.urls,
appendToId: req.body.appendToId,
- account: req.account
+ account: req.account,
});
if (!req.body.appendToId) {
@@ -59,7 +59,7 @@ export async function batchScrapeController(
internalOptions: { disableSmartWaitCache: true }, // NOTE: smart wait disabled for batch scrapes to ensure contentful scrape, speed does not matter
team_id: req.auth.team_id,
createdAt: Date.now(),
- plan: req.auth.plan
+ plan: req.auth.plan,
};
if (!req.body.appendToId) {
@@ -75,7 +75,7 @@ export async function batchScrapeController(
jobPriority = await getJobPriority({
plan: req.auth.plan,
team_id: req.auth.team_id,
- basePriority: 21
+ basePriority: 21,
});
}
logger.debug("Using job priority " + jobPriority, { jobPriority });
@@ -97,12 +97,12 @@ export async function batchScrapeController(
crawl_id: id,
sitemapped: true,
v1: true,
- webhook: req.body.webhook
+ webhook: req.body.webhook,
},
opts: {
jobId: uuidv4(),
- priority: 20
- }
+ priority: 20,
+ },
};
});
@@ -110,19 +110,19 @@ export async function batchScrapeController(
await lockURLs(
id,
sc,
- jobs.map((x) => x.data.url)
+ jobs.map((x) => x.data.url),
);
logger.debug("Adding scrape jobs to Redis...");
await addCrawlJobs(
id,
- jobs.map((x) => x.opts.jobId)
+ jobs.map((x) => x.opts.jobId),
);
logger.debug("Adding scrape jobs to BullMQ...");
await addScrapeJobs(jobs);
if (req.body.webhook) {
logger.debug("Calling webhook with batch_scrape.started...", {
- webhook: req.body.webhook
+ webhook: req.body.webhook,
});
await callWebhook(
req.auth.team_id,
@@ -130,7 +130,7 @@ export async function batchScrapeController(
null,
req.body.webhook,
true,
- "batch_scrape.started"
+ "batch_scrape.started",
);
}
@@ -139,6 +139,6 @@ export async function batchScrapeController(
return res.status(200).json({
success: true,
id,
- url: `${protocol}://${req.get("host")}/v1/batch/scrape/${id}`
+ url: `${protocol}://${req.get("host")}/v1/batch/scrape/${id}`,
});
}
diff --git a/apps/api/src/controllers/v1/concurrency-check.ts b/apps/api/src/controllers/v1/concurrency-check.ts
index bd25c73b..5ed569f5 100644
--- a/apps/api/src/controllers/v1/concurrency-check.ts
+++ b/apps/api/src/controllers/v1/concurrency-check.ts
@@ -2,7 +2,7 @@ import { authenticateUser } from "../auth";
import {
ConcurrencyCheckParams,
ConcurrencyCheckResponse,
- RequestWithAuth
+ RequestWithAuth,
} from "./types";
import { RateLimiterMode } from "../../types";
import { Response } from "express";
@@ -10,14 +10,14 @@ import { redisConnection } from "../../services/queue-service";
// Basically just middleware and error wrapping
export async function concurrencyCheckController(
req: RequestWithAuth,
- res: Response
+ res: Response,
) {
const concurrencyLimiterKey = "concurrency-limiter:" + req.auth.team_id;
const now = Date.now();
const activeJobsOfTeam = await redisConnection.zrangebyscore(
concurrencyLimiterKey,
now,
- Infinity
+ Infinity,
);
return res
.status(200)
diff --git a/apps/api/src/controllers/v1/crawl-cancel.ts b/apps/api/src/controllers/v1/crawl-cancel.ts
index 986ff104..00af8b31 100644
--- a/apps/api/src/controllers/v1/crawl-cancel.ts
+++ b/apps/api/src/controllers/v1/crawl-cancel.ts
@@ -9,7 +9,7 @@ configDotenv();
export async function crawlCancelController(
req: RequestWithAuth<{ jobId: string }>,
- res: Response
+ res: Response,
) {
try {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
@@ -43,7 +43,7 @@ export async function crawlCancelController(
}
res.json({
- status: "cancelled"
+ status: "cancelled",
});
} catch (error) {
Sentry.captureException(error);
diff --git a/apps/api/src/controllers/v1/crawl-status-ws.ts b/apps/api/src/controllers/v1/crawl-status-ws.ts
index d9994d97..817dc184 100644
--- a/apps/api/src/controllers/v1/crawl-status-ws.ts
+++ b/apps/api/src/controllers/v1/crawl-status-ws.ts
@@ -6,7 +6,7 @@ import {
CrawlStatusResponse,
Document,
ErrorResponse,
- RequestWithAuth
+ RequestWithAuth,
} from "./types";
import { WebSocket } from "ws";
import { v4 as uuidv4 } from "uuid";
@@ -19,7 +19,7 @@ import {
getDoneJobsOrderedLength,
getThrottledJobs,
isCrawlFinished,
- isCrawlFinishedLocked
+ isCrawlFinishedLocked,
} from "../../lib/crawl-redis";
import { getScrapeQueue } from "../../services/queue-service";
import { getJob, getJobs } from "./crawl-status";
@@ -64,7 +64,7 @@ function close(ws: WebSocket, code: number, msg: Message) {
async function crawlStatusWS(
ws: WebSocket,
- req: RequestWithAuth
+ req: RequestWithAuth,
) {
const sc = await getCrawl(req.params.jobId);
if (!sc) {
@@ -89,7 +89,10 @@ async function crawlStatusWS(
const notDoneJobIDs = jobIDs.filter((x) => !doneJobIDs.includes(x));
const jobStatuses = await Promise.all(
- notDoneJobIDs.map(async (x) => [x, await getScrapeQueue().getJobState(x)])
+ notDoneJobIDs.map(async (x) => [
+ x,
+ await getScrapeQueue().getJobState(x),
+ ]),
);
const newlyDoneJobIDs: string[] = jobStatuses
.filter((x) => x[1] === "completed" || x[1] === "failed")
@@ -102,7 +105,7 @@ async function crawlStatusWS(
if (job.returnvalue) {
send(ws, {
type: "document",
- data: job.returnvalue
+ data: job.returnvalue,
});
} else {
return close(ws, 3000, { type: "error", error: job.failedReason });
@@ -120,7 +123,9 @@ async function crawlStatusWS(
let jobIDs = await getCrawlJobs(req.params.jobId);
let jobStatuses = await Promise.all(
- jobIDs.map(async (x) => [x, await getScrapeQueue().getJobState(x)] as const)
+ jobIDs.map(
+ async (x) => [x, await getScrapeQueue().getJobState(x)] as const,
+ ),
);
const throttledJobs = new Set(...(await getThrottledJobs(req.auth.team_id)));
@@ -161,8 +166,8 @@ async function crawlStatusWS(
completed: doneJobIDs.length,
creditsUsed: jobIDs.length,
expiresAt: (await getCrawlExpiry(req.params.jobId)).toISOString(),
- data: data
- }
+ data: data,
+ },
});
if (status !== "scraping") {
@@ -174,7 +179,7 @@ async function crawlStatusWS(
// Basically just middleware and error wrapping
export async function crawlStatusWSController(
ws: WebSocket,
- req: RequestWithAuth
+ req: RequestWithAuth,
) {
try {
const auth = await authenticateUser(req, null, RateLimiterMode.CrawlStatus);
@@ -182,7 +187,7 @@ export async function crawlStatusWSController(
if (!auth.success) {
return close(ws, 3000, {
type: "error",
- error: auth.error
+ error: auth.error,
});
}
@@ -201,7 +206,7 @@ export async function crawlStatusWSController(
verbose = JSON.stringify({
message: err.message,
name: err.name,
- stack: err.stack
+ stack: err.stack,
});
}
}
@@ -212,13 +217,13 @@ export async function crawlStatusWSController(
") -- ID " +
id +
" -- " +
- verbose
+ verbose,
);
return close(ws, 1011, {
type: "error",
error:
"An unexpected error occurred. Please contact help@firecrawl.com for help. Your exception ID is " +
- id
+ id,
});
}
}
diff --git a/apps/api/src/controllers/v1/crawl-status.ts b/apps/api/src/controllers/v1/crawl-status.ts
index d88d26fb..59db16d8 100644
--- a/apps/api/src/controllers/v1/crawl-status.ts
+++ b/apps/api/src/controllers/v1/crawl-status.ts
@@ -3,7 +3,7 @@ import {
CrawlStatusParams,
CrawlStatusResponse,
ErrorResponse,
- RequestWithAuth
+ RequestWithAuth,
} from "./types";
import {
getCrawl,
@@ -11,12 +11,12 @@ import {
getCrawlJobs,
getDoneJobsOrdered,
getDoneJobsOrderedLength,
- getThrottledJobs
+ getThrottledJobs,
} from "../../lib/crawl-redis";
import { getScrapeQueue } from "../../services/queue-service";
import {
supabaseGetJobById,
- supabaseGetJobsById
+ supabaseGetJobsById,
} from "../../lib/supabase-jobs";
import { configDotenv } from "dotenv";
import { Job, JobState } from "bullmq";
@@ -70,7 +70,7 @@ export async function getJobs(ids: string[]) {
export async function crawlStatusController(
req: RequestWithAuth,
res: Response,
- isBatch = false
+ isBatch = false,
) {
const sc = await getCrawl(req.params.jobId);
if (!sc) {
@@ -90,7 +90,9 @@ export async function crawlStatusController(
let jobIDs = await getCrawlJobs(req.params.jobId);
let jobStatuses = await Promise.all(
- jobIDs.map(async (x) => [x, await getScrapeQueue().getJobState(x)] as const)
+ jobIDs.map(
+ async (x) => [x, await getScrapeQueue().getJobState(x)] as const,
+ ),
);
const throttledJobs = new Set(...(await getThrottledJobs(req.auth.team_id)));
@@ -124,7 +126,7 @@ export async function crawlStatusController(
const doneJobsOrder = await getDoneJobsOrdered(
req.params.jobId,
start,
- end ?? -1
+ end ?? -1,
);
let doneJobs: Job[] = [];
@@ -158,7 +160,7 @@ export async function crawlStatusController(
if (job.returnvalue === undefined) {
logger.warn(
"Job was considered done, but returnvalue is undefined!",
- { jobId: job.id, state }
+ { jobId: job.id, state },
);
continue;
}
@@ -175,8 +177,8 @@ export async function crawlStatusController(
doneJobs = (
await Promise.all(
(await getJobs(doneJobsOrder)).map(async (x) =>
- (await x.getState()) === "failed" ? null : x
- )
+ (await x.getState()) === "failed" ? null : x,
+ ),
)
).filter((x) => x !== null) as Job[];
}
@@ -185,7 +187,7 @@ export async function crawlStatusController(
const protocol = process.env.ENV === "local" ? req.protocol : "https";
const nextURL = new URL(
- `${protocol}://${req.get("host")}/v1/${isBatch ? "batch/scrape" : "crawl"}/${req.params.jobId}`
+ `${protocol}://${req.get("host")}/v1/${isBatch ? "batch/scrape" : "crawl"}/${req.params.jobId}`,
);
nextURL.searchParams.set("skip", (start + data.length).toString());
@@ -215,6 +217,6 @@ export async function crawlStatusController(
status !== "scraping" && start + data.length === doneJobsLength // if there's not gonna be any documents after this
? undefined
: nextURL.href,
- data: data
+ data: data,
});
}
diff --git a/apps/api/src/controllers/v1/crawl.ts b/apps/api/src/controllers/v1/crawl.ts
index dac1b735..1fb470f9 100644
--- a/apps/api/src/controllers/v1/crawl.ts
+++ b/apps/api/src/controllers/v1/crawl.ts
@@ -5,7 +5,7 @@ import {
crawlRequestSchema,
CrawlResponse,
RequestWithAuth,
- toLegacyCrawlerOptions
+ toLegacyCrawlerOptions,
} from "./types";
import {
addCrawlJob,
@@ -14,7 +14,7 @@ import {
lockURL,
lockURLs,
saveCrawl,
- StoredCrawl
+ StoredCrawl,
} from "../../lib/crawl-redis";
import { logCrawl } from "../../services/logging/crawl_log";
import { getScrapeQueue } from "../../services/queue-service";
@@ -26,7 +26,7 @@ import { scrapeOptions as scrapeOptionsSchema } from "./types";
export async function crawlController(
req: RequestWithAuth<{}, CrawlResponse, CrawlRequest>,
- res: Response
+ res: Response,
) {
const preNormalizedBody = req.body;
req.body = crawlRequestSchema.parse(req.body);
@@ -37,12 +37,12 @@ export async function crawlController(
module: "api/v1",
method: "crawlController",
teamId: req.auth.team_id,
- plan: req.auth.plan
+ plan: req.auth.plan,
});
logger.debug("Crawl " + id + " starting", {
request: req.body,
originalRequest: preNormalizedBody,
- account: req.account
+ account: req.account,
});
await logCrawl(id, req.auth.team_id);
@@ -56,7 +56,7 @@ export async function crawlController(
const crawlerOptions = {
...req.body,
url: undefined,
- scrapeOptions: undefined
+ scrapeOptions: undefined,
};
const scrapeOptions = req.body.scrapeOptions;
@@ -86,7 +86,7 @@ export async function crawlController(
logger.debug("Determined limit: " + crawlerOptions.limit, {
remainingCredits,
bodyLimit: originalLimit,
- originalBodyLimit: preNormalizedBody.limit
+ originalBodyLimit: preNormalizedBody.limit,
});
const sc: StoredCrawl = {
@@ -96,7 +96,7 @@ export async function crawlController(
internalOptions: { disableSmartWaitCache: true }, // NOTE: smart wait disabled for crawls to ensure contentful scrape, speed does not matter
team_id: req.auth.team_id,
createdAt: Date.now(),
- plan: req.auth.plan
+ plan: req.auth.plan,
};
const crawler = crawlToCrawler(id, sc);
@@ -105,7 +105,7 @@ export async function crawlController(
sc.robots = await crawler.getRobotsTxt(scrapeOptions.skipTlsVerification);
} catch (e) {
logger.debug("Failed to get robots.txt (this is probably fine!)", {
- error: e
+ error: e,
});
}
@@ -117,7 +117,7 @@ export async function crawlController(
if (sitemap !== null && sitemap.length > 0) {
logger.debug("Using sitemap of length " + sitemap.length, {
- sitemapLength: sitemap.length
+ sitemapLength: sitemap.length,
});
let jobPriority = 20;
// If it is over 1000, we need to get the job priority,
@@ -127,7 +127,7 @@ export async function crawlController(
jobPriority = await getJobPriority({
plan: req.auth.plan,
team_id: req.auth.team_id,
- basePriority: 21
+ basePriority: 21,
});
}
logger.debug("Using job priority " + jobPriority, { jobPriority });
@@ -149,12 +149,12 @@ export async function crawlController(
crawl_id: id,
sitemapped: true,
webhook: req.body.webhook,
- v1: true
+ v1: true,
},
opts: {
jobId: uuid,
- priority: 20
- }
+ priority: 20,
+ },
};
});
@@ -162,18 +162,18 @@ export async function crawlController(
await lockURLs(
id,
sc,
- jobs.map((x) => x.data.url)
+ jobs.map((x) => x.data.url),
);
logger.debug("Adding scrape jobs to Redis...");
await addCrawlJobs(
id,
- jobs.map((x) => x.opts.jobId)
+ jobs.map((x) => x.opts.jobId),
);
logger.debug("Adding scrape jobs to BullMQ...");
await getScrapeQueue().addBulk(jobs);
} else {
logger.debug("Sitemap not found or ignored.", {
- ignoreSitemap: sc.crawlerOptions.ignoreSitemap
+ ignoreSitemap: sc.crawlerOptions.ignoreSitemap,
});
logger.debug("Locking URL...");
@@ -192,12 +192,12 @@ export async function crawlController(
origin: "api",
crawl_id: id,
webhook: req.body.webhook,
- v1: true
+ v1: true,
},
{
- priority: 15
+ priority: 15,
},
- jobId
+ jobId,
);
logger.debug("Adding scrape job to BullMQ...", { jobId });
await addCrawlJob(id, jobId);
@@ -206,7 +206,7 @@ export async function crawlController(
if (req.body.webhook) {
logger.debug("Calling webhook with crawl.started...", {
- webhook: req.body.webhook
+ webhook: req.body.webhook,
});
await callWebhook(
req.auth.team_id,
@@ -214,7 +214,7 @@ export async function crawlController(
null,
req.body.webhook,
true,
- "crawl.started"
+ "crawl.started",
);
}
@@ -223,6 +223,6 @@ export async function crawlController(
return res.status(200).json({
success: true,
id,
- url: `${protocol}://${req.get("host")}/v1/crawl/${id}`
+ url: `${protocol}://${req.get("host")}/v1/crawl/${id}`,
});
}
diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts
index 74b188e7..0c286253 100644
--- a/apps/api/src/controllers/v1/extract.ts
+++ b/apps/api/src/controllers/v1/extract.ts
@@ -6,7 +6,7 @@ import {
extractRequestSchema,
ExtractResponse,
MapDocument,
- scrapeOptions
+ scrapeOptions,
} from "./types";
import { Document } from "../../lib/entities";
import Redis from "ioredis";
@@ -43,7 +43,7 @@ const MIN_REQUIRED_LINKS = 1;
*/
export async function extractController(
req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>,
- res: Response
+ res: Response,
) {
const selfHosted = process.env.USE_DB_AUTHENTICATION !== "true";
@@ -81,7 +81,7 @@ export async function extractController(
// If we're self-hosted, we don't want to ignore the sitemap, due to our fire-engine mapping
ignoreSitemap: !selfHosted ? true : false,
includeMetadata: true,
- includeSubdomains: req.body.includeSubdomains
+ includeSubdomains: req.body.includeSubdomains,
});
let mappedLinks = mapResults.links as MapDocument[];
@@ -89,7 +89,8 @@ export async function extractController(
mappedLinks = mappedLinks.slice(0, MAX_EXTRACT_LIMIT);
let mappedLinksRerank = mappedLinks.map(
- (x) => `url: ${x.url}, title: ${x.title}, description: ${x.description}`
+ (x) =>
+ `url: ${x.url}, title: ${x.title}, description: ${x.description}`,
);
// Filter by path prefix if present
@@ -103,31 +104,31 @@ export async function extractController(
const linksAndScores = await performRanking(
mappedLinksRerank,
mappedLinks.map((l) => l.url),
- mapUrl
+ mapUrl,
);
// First try with high threshold
let filteredLinks = filterAndProcessLinks(
mappedLinks,
linksAndScores,
- INITIAL_SCORE_THRESHOLD
+ INITIAL_SCORE_THRESHOLD,
);
// If we don't have enough high-quality links, try with lower threshold
if (filteredLinks.length < MIN_REQUIRED_LINKS) {
logger.info(
- `Only found ${filteredLinks.length} links with score > ${INITIAL_SCORE_THRESHOLD}. Trying lower threshold...`
+ `Only found ${filteredLinks.length} links with score > ${INITIAL_SCORE_THRESHOLD}. Trying lower threshold...`,
);
filteredLinks = filterAndProcessLinks(
mappedLinks,
linksAndScores,
- FALLBACK_SCORE_THRESHOLD
+ FALLBACK_SCORE_THRESHOLD,
);
if (filteredLinks.length === 0) {
// If still no results, take top N results regardless of score
logger.warn(
- `No links found with score > ${FALLBACK_SCORE_THRESHOLD}. Taking top ${MIN_REQUIRED_LINKS} results.`
+ `No links found with score > ${FALLBACK_SCORE_THRESHOLD}. Taking top ${MIN_REQUIRED_LINKS} results.`,
);
filteredLinks = linksAndScores
.sort((a, b) => b.score - a.score)
@@ -135,7 +136,9 @@ export async function extractController(
.map((x) => mappedLinks.find((link) => link.url === x.link))
.filter(
(x): x is MapDocument =>
- x !== undefined && x.url !== undefined && !isUrlBlocked(x.url)
+ x !== undefined &&
+ x.url !== undefined &&
+ !isUrlBlocked(x.url),
);
}
}
@@ -161,7 +164,7 @@ export async function extractController(
return res.status(400).json({
success: false,
error:
- "No valid URLs found to scrape. Try adjusting your search criteria or including more URLs."
+ "No valid URLs found to scrape. Try adjusting your search criteria or including more URLs.",
});
}
@@ -174,7 +177,7 @@ export async function extractController(
const jobPriority = await getJobPriority({
plan: req.auth.plan as PlanType,
team_id: req.auth.team_id,
- basePriority: 10
+ basePriority: 10,
});
await addScrapeJob(
@@ -186,11 +189,11 @@ export async function extractController(
internalOptions: {},
plan: req.auth.plan!,
origin,
- is_scrape: true
+ is_scrape: true,
},
{},
jobId,
- jobPriority
+ jobPriority,
);
try {
@@ -208,12 +211,12 @@ export async function extractController(
) {
throw {
status: 408,
- error: "Request timed out"
+ error: "Request timed out",
};
} else {
throw {
status: 500,
- error: `(Internal server error) - ${e && e.message ? e.message : e}`
+ error: `(Internal server error) - ${e && e.message ? e.message : e}`,
};
}
}
@@ -225,7 +228,7 @@ export async function extractController(
} catch (e) {
return res.status(e.status).json({
success: false,
- error: e.error
+ error: e.error,
});
}
@@ -237,11 +240,11 @@ export async function extractController(
"Always prioritize using the provided content to answer the question. Do not make up an answer. Be concise and follow the schema if provided. Here are the urls the user provided of which he wants to extract information from: " +
links.join(", "),
prompt: req.body.prompt,
- schema: req.body.schema
+ schema: req.body.schema,
},
docs.map((x) => buildDocument(x)).join("\n"),
undefined,
- true // isExtractEndpoint
+ true, // isExtractEndpoint
);
// TODO: change this later
@@ -249,9 +252,9 @@ export async function extractController(
billTeam(req.auth.team_id, req.acuc?.sub_id, links.length * 5).catch(
(error) => {
logger.error(
- `Failed to bill team ${req.auth.team_id} for ${links.length * 5} credits: ${error}`
+ `Failed to bill team ${req.auth.team_id} for ${links.length * 5} credits: ${error}`,
);
- }
+ },
);
let data = completions.extract ?? {};
@@ -269,14 +272,14 @@ export async function extractController(
url: req.body.urls.join(", "),
scrapeOptions: req.body,
origin: req.body.origin ?? "api",
- num_tokens: completions.numTokens ?? 0
+ num_tokens: completions.numTokens ?? 0,
});
return res.status(200).json({
success: true,
data: data,
scrape_id: id,
- warning: warning
+ warning: warning,
});
}
@@ -295,13 +298,13 @@ function filterAndProcessLinks(
score: number;
originalIndex: number;
}[],
- threshold: number
+ threshold: number,
): MapDocument[] {
return linksAndScores
.filter((x) => x.score > threshold)
.map((x) => mappedLinks.find((link) => link.url === x.link))
.filter(
(x): x is MapDocument =>
- x !== undefined && x.url !== undefined && !isUrlBlocked(x.url)
+ x !== undefined && x.url !== undefined && !isUrlBlocked(x.url),
);
}
diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts
index 7ddd7b78..cd302708 100644
--- a/apps/api/src/controllers/v1/map.ts
+++ b/apps/api/src/controllers/v1/map.ts
@@ -4,7 +4,7 @@ import {
MapDocument,
mapRequestSchema,
RequestWithAuth,
- scrapeOptions
+ scrapeOptions,
} from "./types";
import { crawlToCrawler, StoredCrawl } from "../../lib/crawl-redis";
import { MapResponse, MapRequest } from "./types";
@@ -13,7 +13,7 @@ import {
checkAndUpdateURLForMap,
isSameDomain,
isSameSubdomain,
- removeDuplicateUrls
+ removeDuplicateUrls,
} from "../../lib/validateUrl";
import { fireEngineMap } from "../../search/fireEngine";
import { billTeam } from "../../services/billing/credit_billing";
@@ -49,7 +49,7 @@ export async function getMapResults({
plan,
origin,
includeMetadata = false,
- allowExternalLinks
+ allowExternalLinks,
}: {
url: string;
search?: string;
@@ -72,13 +72,13 @@ export async function getMapResults({
crawlerOptions: {
...crawlerOptions,
limit: crawlerOptions.sitemapOnly ? 10000000 : limit,
- scrapeOptions: undefined
+ scrapeOptions: undefined,
},
scrapeOptions: scrapeOptions.parse({}),
internalOptions: {},
team_id: teamId,
createdAt: Date.now(),
- plan: plan
+ plan: plan,
};
const crawler = crawlToCrawler(id, sc);
@@ -114,7 +114,7 @@ export async function getMapResults({
const resultsPerPage = 100;
const maxPages = Math.ceil(
- Math.min(MAX_FIRE_ENGINE_RESULTS, limit) / resultsPerPage
+ Math.min(MAX_FIRE_ENGINE_RESULTS, limit) / resultsPerPage,
);
const cacheKey = `fireEngineMap:${mapUrl}`;
@@ -129,12 +129,12 @@ export async function getMapResults({
const fetchPage = async (page: number) => {
return fireEngineMap(mapUrl, {
numResults: resultsPerPage,
- page: page
+ page: page,
});
};
pagePromises = Array.from({ length: maxPages }, (_, i) =>
- fetchPage(i + 1)
+ fetchPage(i + 1),
);
allResults = await Promise.all(pagePromises);
@@ -144,7 +144,7 @@ export async function getMapResults({
// Parallelize sitemap fetch with serper search
const [sitemap, ...searchResults] = await Promise.all([
ignoreSitemap ? null : crawler.tryGetSitemap(true),
- ...(cachedResult ? [] : pagePromises)
+ ...(cachedResult ? [] : pagePromises),
]);
if (!cachedResult) {
@@ -172,7 +172,7 @@ export async function getMapResults({
links = [
mapResults[0].url,
...mapResults.slice(1).map((x) => x.url),
- ...links
+ ...links,
];
} else {
mapResults.map((x) => {
@@ -218,13 +218,13 @@ export async function getMapResults({
links: includeMetadata ? mapResults : linksToReturn,
scrape_id: origin?.includes("website") ? id : undefined,
job_id: id,
- time_taken: (new Date().getTime() - Date.now()) / 1000
+ time_taken: (new Date().getTime() - Date.now()) / 1000,
};
}
export async function mapController(
req: RequestWithAuth<{}, MapResponse, MapRequest>,
- res: Response
+ res: Response,
) {
req.body = mapRequestSchema.parse(req.body);
@@ -237,13 +237,13 @@ export async function mapController(
crawlerOptions: req.body,
origin: req.body.origin,
teamId: req.auth.team_id,
- plan: req.auth.plan
+ plan: req.auth.plan,
});
// Bill the team
billTeam(req.auth.team_id, req.acuc?.sub_id, 1).catch((error) => {
logger.error(
- `Failed to bill team ${req.auth.team_id} for 1 credit: ${error}`
+ `Failed to bill team ${req.auth.team_id} for 1 credit: ${error}`,
);
});
@@ -261,13 +261,13 @@ export async function mapController(
crawlerOptions: {},
scrapeOptions: {},
origin: req.body.origin ?? "api",
- num_tokens: 0
+ num_tokens: 0,
});
const response = {
success: true as const,
links: result.links,
- scrape_id: result.scrape_id
+ scrape_id: result.scrape_id,
};
return res.status(200).json(response);
diff --git a/apps/api/src/controllers/v1/scrape-status.ts b/apps/api/src/controllers/v1/scrape-status.ts
index b366b79e..7fec74a1 100644
--- a/apps/api/src/controllers/v1/scrape-status.ts
+++ b/apps/api/src/controllers/v1/scrape-status.ts
@@ -13,29 +13,29 @@ export async function scrapeStatusController(req: any, res: any) {
const job = await supabaseGetJobByIdOnlyData(req.params.jobId);
const allowedTeams = [
"41bdbfe1-0579-4d9b-b6d5-809f16be12f5",
- "511544f2-2fce-4183-9c59-6c29b02c69b5"
+ "511544f2-2fce-4183-9c59-6c29b02c69b5",
];
if (!allowedTeams.includes(job?.team_id)) {
return res.status(403).json({
success: false,
- error: "You are not allowed to access this resource."
+ error: "You are not allowed to access this resource.",
});
}
return res.status(200).json({
success: true,
- data: job?.docs[0]
+ data: job?.docs[0],
});
} catch (error) {
if (error instanceof Error && error.message == "Too Many Requests") {
return res.status(429).json({
success: false,
- error: "Rate limit exceeded. Please try again later."
+ error: "Rate limit exceeded. Please try again later.",
});
} else {
return res.status(500).json({
success: false,
- error: "An unexpected error occurred."
+ error: "An unexpected error occurred.",
});
}
}
diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts
index 05cc68e3..ddd5da74 100644
--- a/apps/api/src/controllers/v1/scrape.ts
+++ b/apps/api/src/controllers/v1/scrape.ts
@@ -5,7 +5,7 @@ import {
RequestWithAuth,
ScrapeRequest,
scrapeRequestSchema,
- ScrapeResponse
+ ScrapeResponse,
} from "./types";
import { billTeam } from "../../services/billing/credit_billing";
import { v4 as uuidv4 } from "uuid";
@@ -17,7 +17,7 @@ import { getScrapeQueue } from "../../services/queue-service";
export async function scrapeController(
req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>,
- res: Response
+ res: Response,
) {
req.body = scrapeRequestSchema.parse(req.body);
let earlyReturn = false;
@@ -30,7 +30,7 @@ export async function scrapeController(
const jobPriority = await getJobPriority({
plan: req.auth.plan as PlanType,
team_id: req.auth.team_id,
- basePriority: 10
+ basePriority: 10,
});
await addScrapeJob(
@@ -42,18 +42,18 @@ export async function scrapeController(
internalOptions: {},
plan: req.auth.plan!,
origin: req.body.origin,
- is_scrape: true
+ is_scrape: true,
},
{},
jobId,
- jobPriority
+ jobPriority,
);
const totalWait =
(req.body.waitFor ?? 0) +
(req.body.actions ?? []).reduce(
(a, x) => (x.type === "wait" ? (x.milliseconds ?? 0) : 0) + a,
- 0
+ 0,
);
let doc: Document;
@@ -67,12 +67,12 @@ export async function scrapeController(
) {
return res.status(408).json({
success: false,
- error: "Request timed out"
+ error: "Request timed out",
});
} else {
return res.status(500).json({
success: false,
- error: `(Internal server error) - ${e && e.message ? e.message : e}`
+ error: `(Internal server error) - ${e && e.message ? e.message : e}`,
});
}
}
@@ -99,10 +99,10 @@ export async function scrapeController(
billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(
(error) => {
logger.error(
- `Failed to bill team ${req.auth.team_id} for ${creditsToBeBilled} credits: ${error}`
+ `Failed to bill team ${req.auth.team_id} for ${creditsToBeBilled} credits: ${error}`,
);
// Optionally, you could notify an admin or add to a retry queue here
- }
+ },
);
if (!req.body.formats.includes("rawHtml")) {
@@ -123,12 +123,12 @@ export async function scrapeController(
url: req.body.url,
scrapeOptions: req.body,
origin: origin,
- num_tokens: numTokens
+ num_tokens: numTokens,
});
return res.status(200).json({
success: true,
data: doc,
- scrape_id: origin?.includes("website") ? jobId : undefined
+ scrape_id: origin?.includes("website") ? jobId : undefined,
});
}
diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts
index f9fa2392..57e208b4 100644
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@@ -8,7 +8,7 @@ import {
ExtractorOptions,
PageOptions,
ScrapeActionContent,
- Document as V0Document
+ Document as V0Document,
} from "../../lib/entities";
import { InternalOptions } from "../../scraper/scrapeURL";
@@ -34,7 +34,7 @@ export const url = z.preprocess(
.regex(/^https?:\/\//, "URL uses unsupported protocol")
.refine(
(x) => /\.[a-z]{2,}([\/?#]|$)/i.test(x),
- "URL must have a valid top-level domain or be a valid path"
+ "URL must have a valid top-level domain or be a valid path",
)
.refine((x) => {
try {
@@ -46,8 +46,8 @@ export const url = z.preprocess(
}, "Invalid URL")
.refine(
(x) => !isUrlBlocked(x as string),
- "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
- )
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
+ ),
);
const strictMessage =
@@ -60,9 +60,9 @@ export const extractOptions = z
systemPrompt: z
.string()
.default(
- "Based on the information on the page, extract all the information from the schema in JSON format. Try to extract all the fields even those that might not be marked as required."
+ "Based on the information on the page, extract all the information from the schema in JSON format. Try to extract all the fields even those that might not be marked as required.",
),
- prompt: z.string().optional()
+ prompt: z.string().optional(),
})
.strict(strictMessage);
@@ -74,7 +74,7 @@ export const actionsSchema = z.array(
.object({
type: z.literal("wait"),
milliseconds: z.number().int().positive().finite().optional(),
- selector: z.string().optional()
+ selector: z.string().optional(),
})
.refine(
(data) =>
@@ -82,38 +82,38 @@ export const actionsSchema = z.array(
!(data.milliseconds !== undefined && data.selector !== undefined),
{
message:
- "Either 'milliseconds' or 'selector' must be provided, but not both."
- }
+ "Either 'milliseconds' or 'selector' must be provided, but not both.",
+ },
),
z.object({
type: z.literal("click"),
- selector: z.string()
+ selector: z.string(),
}),
z.object({
type: z.literal("screenshot"),
- fullPage: z.boolean().default(false)
+ fullPage: z.boolean().default(false),
}),
z.object({
type: z.literal("write"),
- text: z.string()
+ text: z.string(),
}),
z.object({
type: z.literal("press"),
- key: z.string()
+ key: z.string(),
}),
z.object({
type: z.literal("scroll"),
direction: z.enum(["up", "down"]).optional().default("down"),
- selector: z.string().optional()
+ selector: z.string().optional(),
}),
z.object({
- type: z.literal("scrape")
+ type: z.literal("scrape"),
}),
z.object({
type: z.literal("executeJavascript"),
- script: z.string()
- })
- ])
+ script: z.string(),
+ }),
+ ]),
);
export const scrapeOptions = z
@@ -126,14 +126,14 @@ export const scrapeOptions = z
"links",
"screenshot",
"screenshot@fullPage",
- "extract"
+ "extract",
])
.array()
.optional()
.default(["markdown"])
.refine(
(x) => !(x.includes("screenshot") && x.includes("screenshot@fullPage")),
- "You may only specify either screenshot or screenshot@fullPage"
+ "You may only specify either screenshot or screenshot@fullPage",
),
headers: z.record(z.string(), z.string()).optional(),
includeTags: z.string().array().optional(),
@@ -155,11 +155,11 @@ export const scrapeOptions = z
(val) => !val || Object.keys(countries).includes(val.toUpperCase()),
{
message:
- "Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code."
- }
+ "Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
+ },
)
.transform((val) => (val ? val.toUpperCase() : "US")),
- languages: z.string().array().optional()
+ languages: z.string().array().optional(),
})
.optional(),
@@ -173,15 +173,15 @@ export const scrapeOptions = z
(val) => !val || Object.keys(countries).includes(val.toUpperCase()),
{
message:
- "Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code."
- }
+ "Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
+ },
)
.transform((val) => (val ? val.toUpperCase() : "US")),
- languages: z.string().array().optional()
+ languages: z.string().array().optional(),
})
.optional(),
skipTlsVerification: z.boolean().default(false),
- removeBase64Images: z.boolean().default(true)
+ removeBase64Images: z.boolean().default(true),
})
.strict(strictMessage);
@@ -199,7 +199,7 @@ export const extractV1Options = z
includeSubdomains: z.boolean().default(true),
allowExternalLinks: z.boolean().default(false),
origin: z.string().optional().default("api"),
- timeout: z.number().int().positive().finite().safe().default(60000)
+ timeout: z.number().int().positive().finite().safe().default(60000),
})
.strict(strictMessage);
@@ -212,7 +212,7 @@ export const scrapeRequestSchema = scrapeOptions
.extend({
url,
origin: z.string().optional().default("api"),
- timeout: z.number().int().positive().finite().safe().default(30000)
+ timeout: z.number().int().positive().finite().safe().default(30000),
})
.strict(strictMessage)
.refine(
@@ -226,8 +226,8 @@ export const scrapeRequestSchema = scrapeOptions
},
{
message:
- "When 'extract' format is specified, 'extract' options must be provided, and vice versa"
- }
+ "When 'extract' format is specified, 'extract' options must be provided, and vice versa",
+ },
)
.transform((obj) => {
if ((obj.formats?.includes("extract") || obj.extract) && !obj.timeout) {
@@ -250,9 +250,9 @@ export const webhookSchema = z.preprocess(
z
.object({
url: z.string().url(),
- headers: z.record(z.string(), z.string()).default({})
+ headers: z.record(z.string(), z.string()).default({}),
})
- .strict(strictMessage)
+ .strict(strictMessage),
);
export const batchScrapeRequestSchema = scrapeOptions
@@ -260,7 +260,7 @@ export const batchScrapeRequestSchema = scrapeOptions
urls: url.array(),
origin: z.string().optional().default("api"),
webhook: webhookSchema.optional(),
- appendToId: z.string().uuid().optional()
+ appendToId: z.string().uuid().optional(),
})
.strict(strictMessage)
.refine(
@@ -274,8 +274,8 @@ export const batchScrapeRequestSchema = scrapeOptions
},
{
message:
- "When 'extract' format is specified, 'extract' options must be provided, and vice versa"
- }
+ "When 'extract' format is specified, 'extract' options must be provided, and vice versa",
+ },
);
export type BatchScrapeRequest = z.infer;
@@ -292,7 +292,7 @@ const crawlerOptions = z
ignoreRobotsTxt: z.boolean().default(false),
ignoreSitemap: z.boolean().default(false),
deduplicateSimilarURLs: z.boolean().default(true),
- ignoreQueryParameters: z.boolean().default(false)
+ ignoreQueryParameters: z.boolean().default(false),
})
.strict(strictMessage);
@@ -314,7 +314,7 @@ export const crawlRequestSchema = crawlerOptions
origin: z.string().optional().default("api"),
scrapeOptions: scrapeOptions.default({}),
webhook: webhookSchema.optional(),
- limit: z.number().default(10000)
+ limit: z.number().default(10000),
})
.strict(strictMessage);
@@ -340,7 +340,7 @@ export const mapRequestSchema = crawlerOptions
search: z.string().optional(),
ignoreSitemap: z.boolean().default(false),
sitemapOnly: z.boolean().default(false),
- limit: z.number().min(1).max(5000).default(5000)
+ limit: z.number().min(1).max(5000).default(5000),
})
.strict(strictMessage);
@@ -510,7 +510,7 @@ export type AuthCreditUsageChunk = {
export interface RequestWithMaybeACUC<
ReqParams = {},
ReqBody = undefined,
- ResBody = undefined
+ ResBody = undefined,
> extends Request {
acuc?: AuthCreditUsageChunk;
}
@@ -518,7 +518,7 @@ export interface RequestWithMaybeACUC<
export interface RequestWithACUC<
ReqParams = {},
ReqBody = undefined,
- ResBody = undefined
+ ResBody = undefined,
> extends Request {
acuc: AuthCreditUsageChunk;
}
@@ -526,7 +526,7 @@ export interface RequestWithACUC<
export interface RequestWithAuth<
ReqParams = {},
ReqBody = undefined,
- ResBody = undefined
+ ResBody = undefined,
> extends Request {
auth: AuthObject;
account?: Account;
@@ -535,7 +535,7 @@ export interface RequestWithAuth<
export interface RequestWithMaybeAuth<
ReqParams = {},
ReqBody = undefined,
- ResBody = undefined
+ ResBody = undefined,
> extends RequestWithMaybeACUC {
auth?: AuthObject;
account?: Account;
@@ -544,7 +544,7 @@ export interface RequestWithMaybeAuth<
export interface RequestWithAuth<
ReqParams = {},
ReqBody = undefined,
- ResBody = undefined
+ ResBody = undefined,
> extends RequestWithACUC {
auth: AuthObject;
account?: Account;
@@ -569,7 +569,7 @@ export function toLegacyCrawlerOptions(x: CrawlerOptions) {
ignoreRobotsTxt: x.ignoreRobotsTxt,
ignoreSitemap: x.ignoreSitemap,
deduplicateSimilarURLs: x.deduplicateSimilarURLs,
- ignoreQueryParameters: x.ignoreQueryParameters
+ ignoreQueryParameters: x.ignoreQueryParameters,
};
}
@@ -589,11 +589,11 @@ export function fromLegacyCrawlerOptions(x: any): {
ignoreRobotsTxt: x.ignoreRobotsTxt,
ignoreSitemap: x.ignoreSitemap,
deduplicateSimilarURLs: x.deduplicateSimilarURLs,
- ignoreQueryParameters: x.ignoreQueryParameters
+ ignoreQueryParameters: x.ignoreQueryParameters,
}),
internalOptions: {
- v0CrawlOnlyUrls: x.returnOnlyUrls
- }
+ v0CrawlOnlyUrls: x.returnOnlyUrls,
+ },
};
}
@@ -605,7 +605,7 @@ export interface MapDocument {
export function fromLegacyScrapeOptions(
pageOptions: PageOptions,
extractorOptions: ExtractorOptions | undefined,
- timeout: number | undefined
+ timeout: number | undefined,
): { scrapeOptions: ScrapeOptions; internalOptions: InternalOptions } {
return {
scrapeOptions: scrapeOptions.parse({
@@ -621,7 +621,7 @@ export function fromLegacyScrapeOptions(
extractorOptions.mode.includes("llm-extraction")
? ("extract" as const)
: null,
- "links"
+ "links",
].filter((x) => x !== null),
waitFor: pageOptions.waitFor,
headers: pageOptions.headers,
@@ -646,16 +646,16 @@ export function fromLegacyScrapeOptions(
? {
systemPrompt: extractorOptions.extractionPrompt,
prompt: extractorOptions.userPrompt,
- schema: extractorOptions.extractionSchema
+ schema: extractorOptions.extractionSchema,
}
: undefined,
- mobile: pageOptions.mobile
+ mobile: pageOptions.mobile,
}),
internalOptions: {
atsv: pageOptions.atsv,
v0DisableJsDom: pageOptions.disableJsDom,
- v0UseFastMode: pageOptions.useFastMode
- }
+ v0UseFastMode: pageOptions.useFastMode,
+ },
// TODO: fallback, fetchPageContent, replaceAllPathsWithAbsolutePaths, includeLinks
};
}
@@ -664,12 +664,12 @@ export function fromLegacyCombo(
pageOptions: PageOptions,
extractorOptions: ExtractorOptions | undefined,
timeout: number | undefined,
- crawlerOptions: any
+ crawlerOptions: any,
): { scrapeOptions: ScrapeOptions; internalOptions: InternalOptions } {
const { scrapeOptions, internalOptions: i1 } = fromLegacyScrapeOptions(
pageOptions,
extractorOptions,
- timeout
+ timeout,
);
const { internalOptions: i2 } = fromLegacyCrawlerOptions(crawlerOptions);
return { scrapeOptions, internalOptions: Object.assign(i1, i2) };
@@ -677,7 +677,7 @@ export function fromLegacyCombo(
export function toLegacyDocument(
document: Document,
- internalOptions: InternalOptions
+ internalOptions: InternalOptions,
): V0Document | { url: string } {
if (internalOptions.v0CrawlOnlyUrls) {
return { url: document.metadata.sourceURL! };
@@ -696,9 +696,9 @@ export function toLegacyDocument(
statusCode: undefined,
pageError: document.metadata.error,
pageStatusCode: document.metadata.statusCode,
- screenshot: document.screenshot
+ screenshot: document.screenshot,
},
actions: document.actions,
- warning: document.warning
+ warning: document.warning,
};
}
diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts
index a4f4445b..adc080f2 100644
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@@ -46,12 +46,12 @@ serverAdapter.setBasePath(`/admin/${process.env.BULL_AUTH_KEY}/queues`);
const { addQueue, removeQueue, setQueues, replaceQueues } = createBullBoard({
queues: [new BullAdapter(getScrapeQueue())],
- serverAdapter: serverAdapter
+ serverAdapter: serverAdapter,
});
app.use(
`/admin/${process.env.BULL_AUTH_KEY}/queues`,
- serverAdapter.getRouter()
+ serverAdapter.getRouter(),
);
app.get("/", (req, res) => {
@@ -75,7 +75,7 @@ function startServer(port = DEFAULT_PORT) {
const server = app.listen(Number(port), HOST, () => {
logger.info(`Worker ${process.pid} listening on port ${port}`);
logger.info(
- `For the Queue UI, open: http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues`
+ `For the Queue UI, open: http://${HOST}:${port}/admin/${process.env.BULL_AUTH_KEY}/queues`,
);
});
@@ -103,7 +103,7 @@ app.get(`/serverHealthCheck`, async (req, res) => {
const noWaitingJobs = waitingJobs === 0;
// 200 if no active jobs, 503 if there are active jobs
return res.status(noWaitingJobs ? 200 : 500).json({
- waitingJobs
+ waitingJobs,
});
} catch (error) {
Sentry.captureException(error);
@@ -120,7 +120,7 @@ app.get("/serverHealthCheck/notify", async (req, res) => {
const getWaitingJobsCount = async () => {
const scrapeQueue = getScrapeQueue();
const [waitingJobsCount] = await Promise.all([
- scrapeQueue.getWaitingCount()
+ scrapeQueue.getWaitingCount(),
]);
return waitingJobsCount;
@@ -140,15 +140,15 @@ app.get("/serverHealthCheck/notify", async (req, res) => {
const message = {
text: `⚠️ Warning: The number of active jobs (${waitingJobsCount}) has exceeded the threshold (${treshold}) for more than ${
timeout / 60000
- } minute(s).`
+ } minute(s).`,
};
const response = await fetch(slackWebhookUrl, {
method: "POST",
headers: {
- "Content-Type": "application/json"
+ "Content-Type": "application/json",
},
- body: JSON.stringify(message)
+ body: JSON.stringify(message),
});
if (!response.ok) {
@@ -176,7 +176,7 @@ app.use(
err: unknown,
req: Request<{}, ErrorResponse, undefined>,
res: Response,
- next: NextFunction
+ next: NextFunction,
) => {
if (err instanceof ZodError) {
if (
@@ -192,7 +192,7 @@ app.use(
} else {
next(err);
}
- }
+ },
);
Sentry.setupExpressErrorHandler(app);
@@ -202,7 +202,7 @@ app.use(
err: unknown,
req: Request<{}, ErrorResponse, undefined>,
res: ResponseWithSentry,
- next: NextFunction
+ next: NextFunction,
) => {
if (
err instanceof SyntaxError &&
@@ -222,7 +222,7 @@ app.use(
verbose = JSON.stringify({
message: err.message,
name: err.name,
- stack: err.stack
+ stack: err.stack,
});
}
}
@@ -233,15 +233,15 @@ app.use(
") -- ID " +
id +
" -- " +
- verbose
+ verbose,
);
res.status(500).json({
success: false,
error:
"An unexpected error occurred. Please contact help@firecrawl.com for help. Your exception ID is " +
- id
+ id,
});
- }
+ },
);
logger.info(`Worker ${process.pid} started`);
diff --git a/apps/api/src/lib/LLM-extraction/index.ts b/apps/api/src/lib/LLM-extraction/index.ts
index 47ecaf18..de7017ea 100644
--- a/apps/api/src/lib/LLM-extraction/index.ts
+++ b/apps/api/src/lib/LLM-extraction/index.ts
@@ -10,7 +10,7 @@ import { logger } from "../logger";
export async function generateCompletions(
documents: Document[],
extractionOptions: ExtractorOptions | undefined,
- mode: "markdown" | "raw-html"
+ mode: "markdown" | "raw-html",
): Promise {
// const schema = zodToJsonSchema(options.schema)
@@ -32,7 +32,7 @@ export async function generateCompletions(
schema: schema,
prompt: prompt,
systemPrompt: systemPrompt,
- mode: mode
+ mode: mode,
});
// Validate the JSON output against the schema using AJV
if (schema) {
@@ -43,8 +43,8 @@ export async function generateCompletions(
`JSON parsing error(s): ${validate.errors
?.map((err) => err.message)
.join(
- ", "
- )}\n\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.`
+ ", ",
+ )}\n\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.`,
);
}
}
@@ -57,7 +57,7 @@ export async function generateCompletions(
default:
throw new Error("Invalid client");
}
- })
+ }),
);
return completions;
diff --git a/apps/api/src/lib/LLM-extraction/models.ts b/apps/api/src/lib/LLM-extraction/models.ts
index 563863c0..cc1355de 100644
--- a/apps/api/src/lib/LLM-extraction/models.ts
+++ b/apps/api/src/lib/LLM-extraction/models.ts
@@ -14,7 +14,7 @@ const defaultPrompt =
function prepareOpenAIDoc(
document: Document,
- mode: "markdown" | "raw-html"
+ mode: "markdown" | "raw-html",
): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null {
let markdown = document.markdown;
@@ -50,7 +50,7 @@ export async function generateOpenAICompletions({
systemPrompt = defaultPrompt,
prompt,
temperature,
- mode
+ mode,
}: {
client: OpenAI;
model?: string;
@@ -68,7 +68,7 @@ export async function generateOpenAICompletions({
return {
...document,
warning:
- "LLM extraction was not performed since the document's content is empty or missing."
+ "LLM extraction was not performed since the document's content is empty or missing.",
};
}
const [content, numTokens] = preparedDoc;
@@ -81,21 +81,21 @@ export async function generateOpenAICompletions({
messages: [
{
role: "system",
- content: systemPrompt
+ content: systemPrompt,
},
{ role: "user", content },
{
role: "user",
- content: `Transform the above content into structured json output based on the following user request: ${prompt}`
- }
+ content: `Transform the above content into structured json output based on the following user request: ${prompt}`,
+ },
],
response_format: { type: "json_object" },
- temperature
+ temperature,
});
try {
llmExtraction = JSON.parse(
- (jsonCompletion.choices[0].message.content ?? "").trim()
+ (jsonCompletion.choices[0].message.content ?? "").trim(),
);
} catch (e) {
throw new Error("Invalid JSON");
@@ -106,9 +106,9 @@ export async function generateOpenAICompletions({
messages: [
{
role: "system",
- content: systemPrompt
+ content: systemPrompt,
},
- { role: "user", content }
+ { role: "user", content },
],
tools: [
{
@@ -116,12 +116,12 @@ export async function generateOpenAICompletions({
function: {
name: "extract_content",
description: "Extracts the content from the given webpage(s)",
- parameters: schema
- }
- }
+ parameters: schema,
+ },
+ },
],
tool_choice: { type: "function", function: { name: "extract_content" } },
- temperature
+ temperature,
});
const c = completion.choices[0].message.tool_calls[0].function.arguments;
@@ -140,6 +140,6 @@ export async function generateOpenAICompletions({
warning:
numTokens > maxTokens
? `Page was trimmed to fit the maximum token limit defined by the LLM model (Max: ${maxTokens} tokens, Attemped: ${numTokens} tokens). If results are not good, email us at help@mendable.ai so we can help you.`
- : undefined
+ : undefined,
};
}
diff --git a/apps/api/src/lib/__tests__/html-to-markdown.test.ts b/apps/api/src/lib/__tests__/html-to-markdown.test.ts
index f69c2949..d35e2cce 100644
--- a/apps/api/src/lib/__tests__/html-to-markdown.test.ts
+++ b/apps/api/src/lib/__tests__/html-to-markdown.test.ts
@@ -31,16 +31,16 @@ describe("parseMarkdown", () => {
{ html: "Unclosed tag", expected: "Unclosed tag" },
{
html: "
Missing closing div",
- expected: "Missing closing div"
+ expected: "Missing closing div",
},
{
html: "Wrong nesting
",
- expected: "**Wrong nesting**"
+ expected: "**Wrong nesting**",
},
{
html: 'Link without closing tag',
- expected: "[Link without closing tag](http://example.com)"
- }
+ expected: "[Link without closing tag](http://example.com)",
+ },
];
for (const { html, expected } of invalidHtmls) {
diff --git a/apps/api/src/lib/__tests__/job-priority.test.ts b/apps/api/src/lib/__tests__/job-priority.test.ts
index 4bd5fda9..1a7550ef 100644
--- a/apps/api/src/lib/__tests__/job-priority.test.ts
+++ b/apps/api/src/lib/__tests__/job-priority.test.ts
@@ -1,7 +1,7 @@
import {
getJobPriority,
addJobPriority,
- deleteJobPriority
+ deleteJobPriority,
} from "../job-priority";
import { redisConnection } from "../../services/queue-service";
import { PlanType } from "../../types";
@@ -11,8 +11,8 @@ jest.mock("../../services/queue-service", () => ({
sadd: jest.fn(),
srem: jest.fn(),
scard: jest.fn(),
- expire: jest.fn()
- }
+ expire: jest.fn(),
+ },
}));
describe("Job Priority Tests", () => {
@@ -26,11 +26,11 @@ describe("Job Priority Tests", () => {
await addJobPriority(team_id, job_id);
expect(redisConnection.sadd).toHaveBeenCalledWith(
`limit_team_id:${team_id}`,
- job_id
+ job_id,
);
expect(redisConnection.expire).toHaveBeenCalledWith(
`limit_team_id:${team_id}`,
- 60
+ 60,
);
});
@@ -40,7 +40,7 @@ describe("Job Priority Tests", () => {
await deleteJobPriority(team_id, job_id);
expect(redisConnection.srem).toHaveBeenCalledWith(
`limit_team_id:${team_id}`,
- job_id
+ job_id,
);
});
@@ -89,7 +89,7 @@ describe("Job Priority Tests", () => {
await addJobPriority(team_id, job_id1);
expect(redisConnection.expire).toHaveBeenCalledWith(
`limit_team_id:${team_id}`,
- 60
+ 60,
);
// Clear the mock calls
@@ -99,7 +99,7 @@ describe("Job Priority Tests", () => {
await addJobPriority(team_id, job_id2);
expect(redisConnection.expire).toHaveBeenCalledWith(
`limit_team_id:${team_id}`,
- 60
+ 60,
);
});
@@ -112,7 +112,7 @@ describe("Job Priority Tests", () => {
await addJobPriority(team_id, job_id);
expect(redisConnection.expire).toHaveBeenCalledWith(
`limit_team_id:${team_id}`,
- 60
+ 60,
);
// Fast-forward time by 59 seconds
diff --git a/apps/api/src/lib/batch-process.ts b/apps/api/src/lib/batch-process.ts
index 20bb4ab6..1e4ac7be 100644
--- a/apps/api/src/lib/batch-process.ts
+++ b/apps/api/src/lib/batch-process.ts
@@ -1,7 +1,7 @@
export async function batchProcess(
array: T[],
batchSize: number,
- asyncFunction: (item: T, index: number) => Promise
+ asyncFunction: (item: T, index: number) => Promise,
): Promise {
const batches: T[][] = [];
for (let i = 0; i < array.length; i += batchSize) {
diff --git a/apps/api/src/lib/cache.ts b/apps/api/src/lib/cache.ts
index 30c9f0b4..7dcbf88b 100644
--- a/apps/api/src/lib/cache.ts
+++ b/apps/api/src/lib/cache.ts
@@ -6,14 +6,14 @@ const logger = _logger.child({ module: "cache" });
export const cacheRedis = process.env.CACHE_REDIS_URL
? new IORedis(process.env.CACHE_REDIS_URL, {
- maxRetriesPerRequest: null
+ maxRetriesPerRequest: null,
})
: null;
export function cacheKey(
url: string,
scrapeOptions: ScrapeOptions,
- internalOptions: InternalOptions
+ internalOptions: InternalOptions,
): string | null {
if (!cacheRedis) return null;
@@ -49,7 +49,7 @@ export async function saveEntryToCache(key: string, entry: CacheEntry) {
}
export async function getEntryFromCache(
- key: string
+ key: string,
): Promise {
if (!cacheRedis) return null;
diff --git a/apps/api/src/lib/concurrency-limit.ts b/apps/api/src/lib/concurrency-limit.ts
index aba1fd3a..8205113f 100644
--- a/apps/api/src/lib/concurrency-limit.ts
+++ b/apps/api/src/lib/concurrency-limit.ts
@@ -14,37 +14,37 @@ export function getConcurrencyLimitMax(plan: string): number {
export async function cleanOldConcurrencyLimitEntries(
team_id: string,
- now: number = Date.now()
+ now: number = Date.now(),
) {
await redisConnection.zremrangebyscore(constructKey(team_id), -Infinity, now);
}
export async function getConcurrencyLimitActiveJobs(
team_id: string,
- now: number = Date.now()
+ now: number = Date.now(),
): Promise {
return await redisConnection.zrangebyscore(
constructKey(team_id),
now,
- Infinity
+ Infinity,
);
}
export async function pushConcurrencyLimitActiveJob(
team_id: string,
id: string,
- now: number = Date.now()
+ now: number = Date.now(),
) {
await redisConnection.zadd(
constructKey(team_id),
now + stalledJobTimeoutMs,
- id
+ id,
);
}
export async function removeConcurrencyLimitActiveJob(
team_id: string,
- id: string
+ id: string,
) {
await redisConnection.zrem(constructKey(team_id), id);
}
@@ -57,7 +57,7 @@ export type ConcurrencyLimitedJob = {
};
export async function takeConcurrencyLimitedJob(
- team_id: string
+ team_id: string,
): Promise {
const res = await redisConnection.zmpop(1, constructQueueKey(team_id), "MIN");
if (res === null || res === undefined) {
@@ -69,11 +69,11 @@ export async function takeConcurrencyLimitedJob(
export async function pushConcurrencyLimitedJob(
team_id: string,
- job: ConcurrencyLimitedJob
+ job: ConcurrencyLimitedJob,
) {
await redisConnection.zadd(
constructQueueKey(team_id),
job.priority ?? 1,
- JSON.stringify(job)
+ JSON.stringify(job),
);
}
diff --git a/apps/api/src/lib/crawl-redis.test.ts b/apps/api/src/lib/crawl-redis.test.ts
index ef2dabee..65d4e13a 100644
--- a/apps/api/src/lib/crawl-redis.test.ts
+++ b/apps/api/src/lib/crawl-redis.test.ts
@@ -3,7 +3,7 @@ import { generateURLPermutations } from "./crawl-redis";
describe("generateURLPermutations", () => {
it("generates permutations correctly", () => {
const bareHttps = generateURLPermutations("https://firecrawl.dev").map(
- (x) => x.href
+ (x) => x.href,
);
expect(bareHttps.length).toBe(4);
expect(bareHttps.includes("https://firecrawl.dev/")).toBe(true);
@@ -12,7 +12,7 @@ describe("generateURLPermutations", () => {
expect(bareHttps.includes("http://www.firecrawl.dev/")).toBe(true);
const bareHttp = generateURLPermutations("http://firecrawl.dev").map(
- (x) => x.href
+ (x) => x.href,
);
expect(bareHttp.length).toBe(4);
expect(bareHttp.includes("https://firecrawl.dev/")).toBe(true);
@@ -21,7 +21,7 @@ describe("generateURLPermutations", () => {
expect(bareHttp.includes("http://www.firecrawl.dev/")).toBe(true);
const wwwHttps = generateURLPermutations("https://www.firecrawl.dev").map(
- (x) => x.href
+ (x) => x.href,
);
expect(wwwHttps.length).toBe(4);
expect(wwwHttps.includes("https://firecrawl.dev/")).toBe(true);
@@ -30,7 +30,7 @@ describe("generateURLPermutations", () => {
expect(wwwHttps.includes("http://www.firecrawl.dev/")).toBe(true);
const wwwHttp = generateURLPermutations("http://www.firecrawl.dev").map(
- (x) => x.href
+ (x) => x.href,
);
expect(wwwHttp.length).toBe(4);
expect(wwwHttp.includes("https://firecrawl.dev/")).toBe(true);
diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts
index ab1a238d..6ccb9436 100644
--- a/apps/api/src/lib/crawl-redis.ts
+++ b/apps/api/src/lib/crawl-redis.ts
@@ -24,7 +24,7 @@ export async function saveCrawl(id: string, crawl: StoredCrawl) {
method: "saveCrawl",
crawlId: id,
teamId: crawl.team_id,
- plan: crawl.plan
+ plan: crawl.plan,
});
await redisConnection.set("crawl:" + id, JSON.stringify(crawl));
await redisConnection.expire("crawl:" + id, 24 * 60 * 60, "NX");
@@ -53,7 +53,7 @@ export async function addCrawlJob(id: string, job_id: string) {
jobId: job_id,
module: "crawl-redis",
method: "addCrawlJob",
- crawlId: id
+ crawlId: id,
});
await redisConnection.sadd("crawl:" + id + ":jobs", job_id);
await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60, "NX");
@@ -64,7 +64,7 @@ export async function addCrawlJobs(id: string, job_ids: string[]) {
jobIds: job_ids,
module: "crawl-redis",
method: "addCrawlJobs",
- crawlId: id
+ crawlId: id,
});
await redisConnection.sadd("crawl:" + id + ":jobs", ...job_ids);
await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60, "NX");
@@ -73,19 +73,19 @@ export async function addCrawlJobs(id: string, job_ids: string[]) {
export async function addCrawlJobDone(
id: string,
job_id: string,
- success: boolean
+ success: boolean,
) {
_logger.debug("Adding done crawl job to Redis...", {
jobId: job_id,
module: "crawl-redis",
method: "addCrawlJobDone",
- crawlId: id
+ crawlId: id,
});
await redisConnection.sadd("crawl:" + id + ":jobs_done", job_id);
await redisConnection.expire(
"crawl:" + id + ":jobs_done",
24 * 60 * 60,
- "NX"
+ "NX",
);
if (success) {
@@ -93,7 +93,7 @@ export async function addCrawlJobDone(
await redisConnection.expire(
"crawl:" + id + ":jobs_done_ordered",
24 * 60 * 60,
- "NX"
+ "NX",
);
}
}
@@ -105,12 +105,12 @@ export async function getDoneJobsOrderedLength(id: string): Promise {
export async function getDoneJobsOrdered(
id: string,
start = 0,
- end = -1
+ end = -1,
): Promise {
return await redisConnection.lrange(
"crawl:" + id + ":jobs_done_ordered",
start,
- end
+ end,
);
}
@@ -130,7 +130,7 @@ export async function finishCrawl(id: string) {
_logger.debug("Marking crawl as finished.", {
module: "crawl-redis",
method: "finishCrawl",
- crawlId: id
+ crawlId: id,
});
const set = await redisConnection.setnx("crawl:" + id + ":finish", "yes");
if (set === 1) {
@@ -141,7 +141,7 @@ export async function finishCrawl(id: string) {
_logger.debug("Crawl can not be finished yet, not marking as finished.", {
module: "crawl-redis",
method: "finishCrawl",
- crawlId: id
+ crawlId: id,
});
}
}
@@ -154,7 +154,7 @@ export async function getThrottledJobs(teamId: string): Promise {
return await redisConnection.zrangebyscore(
"concurrency-limiter:" + teamId + ":throttled",
Date.now(),
- Infinity
+ Infinity,
);
}
@@ -201,7 +201,7 @@ export function generateURLPermutations(url: string | URL): URL[] {
export async function lockURL(
id: string,
sc: StoredCrawl,
- url: string
+ url: string,
): Promise {
let logger = _logger.child({
crawlId: id,
@@ -209,7 +209,7 @@ export async function lockURL(
method: "lockURL",
preNormalizedURL: url,
teamId: sc.team_id,
- plan: sc.plan
+ plan: sc.plan,
});
if (typeof sc.crawlerOptions?.limit === "number") {
@@ -218,7 +218,7 @@ export async function lockURL(
sc.crawlerOptions.limit
) {
logger.debug(
- "Crawl has already hit visited_unique limit, not locking URL."
+ "Crawl has already hit visited_unique limit, not locking URL.",
);
return false;
}
@@ -231,7 +231,7 @@ export async function lockURL(
await redisConnection.expire(
"crawl:" + id + ":visited_unique",
24 * 60 * 60,
- "NX"
+ "NX",
);
let res: boolean;
@@ -242,7 +242,7 @@ export async function lockURL(
// logger.debug("Adding URL permutations for URL " + JSON.stringify(url) + "...", { permutations });
const x = await redisConnection.sadd(
"crawl:" + id + ":visited",
- ...permutations
+ ...permutations,
);
res = x === permutations.length;
}
@@ -250,7 +250,7 @@ export async function lockURL(
await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX");
logger.debug("Locking URL " + JSON.stringify(url) + "... result: " + res, {
- res
+ res,
});
return res;
}
@@ -259,7 +259,7 @@ export async function lockURL(
export async function lockURLs(
id: string,
sc: StoredCrawl,
- urls: string[]
+ urls: string[],
): Promise {
urls = urls.map((url) => normalizeURL(url, sc));
const logger = _logger.child({
@@ -267,7 +267,7 @@ export async function lockURLs(
module: "crawl-redis",
method: "lockURL",
teamId: sc.team_id,
- plan: sc.plan
+ plan: sc.plan,
});
// Add to visited_unique set
@@ -276,7 +276,7 @@ export async function lockURLs(
await redisConnection.expire(
"crawl:" + id + ":visited_unique",
24 * 60 * 60,
- "NX"
+ "NX",
);
let res: boolean;
@@ -285,12 +285,12 @@ export async function lockURLs(
res = x === urls.length;
} else {
const allPermutations = urls.flatMap((url) =>
- generateURLPermutations(url).map((x) => x.href)
+ generateURLPermutations(url).map((x) => x.href),
);
logger.debug("Adding " + allPermutations.length + " URL permutations...");
const x = await redisConnection.sadd(
"crawl:" + id + ":visited",
- ...allPermutations
+ ...allPermutations,
);
res = x === allPermutations.length;
}
@@ -304,7 +304,7 @@ export async function lockURLs(
export function crawlToCrawler(
id: string,
sc: StoredCrawl,
- newBase?: string
+ newBase?: string,
): WebCrawler {
const crawler = new WebCrawler({
jobId: id,
@@ -315,7 +315,7 @@ export function crawlToCrawler(
maxCrawledLinks: sc.crawlerOptions?.maxCrawledLinks ?? 1000,
maxCrawledDepth: getAdjustedMaxDepth(
sc.originUrl!,
- sc.crawlerOptions?.maxDepth ?? 10
+ sc.crawlerOptions?.maxDepth ?? 10,
),
limit: sc.crawlerOptions?.limit ?? 10000,
generateImgAltText: sc.crawlerOptions?.generateImgAltText ?? false,
@@ -323,7 +323,7 @@ export function crawlToCrawler(
allowExternalContentLinks:
sc.crawlerOptions?.allowExternalContentLinks ?? false,
allowSubdomains: sc.crawlerOptions?.allowSubdomains ?? false,
- ignoreRobotsTxt: sc.crawlerOptions?.ignoreRobotsTxt ?? false
+ ignoreRobotsTxt: sc.crawlerOptions?.ignoreRobotsTxt ?? false,
});
if (sc.robots !== undefined) {
diff --git a/apps/api/src/lib/custom-error.ts b/apps/api/src/lib/custom-error.ts
index 25502a8e..20a01cb6 100644
--- a/apps/api/src/lib/custom-error.ts
+++ b/apps/api/src/lib/custom-error.ts
@@ -8,7 +8,7 @@ export class CustomError extends Error {
statusCode: number,
status: string,
message: string = "",
- dataIngestionJob?: any
+ dataIngestionJob?: any,
) {
super(message);
this.statusCode = statusCode;
diff --git a/apps/api/src/lib/default-values.ts b/apps/api/src/lib/default-values.ts
index ceca176c..2754b7cd 100644
--- a/apps/api/src/lib/default-values.ts
+++ b/apps/api/src/lib/default-values.ts
@@ -8,21 +8,21 @@ export const defaultPageOptions = {
waitFor: 0,
screenshot: false,
fullPageScreenshot: false,
- parsePDF: true
+ parsePDF: true,
};
export const defaultCrawlerOptions = {
allowBackwardCrawling: false,
- limit: 10000
+ limit: 10000,
};
export const defaultCrawlPageOptions = {
onlyMainContent: false,
includeHtml: false,
removeTags: [],
- parsePDF: true
+ parsePDF: true,
};
export const defaultExtractorOptions = {
- mode: "markdown"
+ mode: "markdown",
};
diff --git a/apps/api/src/lib/extract/reranker.ts b/apps/api/src/lib/extract/reranker.ts
index 044f71a4..26e7ac06 100644
--- a/apps/api/src/lib/extract/reranker.ts
+++ b/apps/api/src/lib/extract/reranker.ts
@@ -1,21 +1,21 @@
import { CohereClient } from "cohere-ai";
import { MapDocument } from "../../controllers/v1/types";
const cohere = new CohereClient({
- token: process.env.COHERE_API_KEY
+ token: process.env.COHERE_API_KEY,
});
export async function rerankDocuments(
documents: (string | Record)[],
query: string,
topN = 3,
- model = "rerank-english-v3.0"
+ model = "rerank-english-v3.0",
) {
const rerank = await cohere.v2.rerank({
documents,
query,
topN,
model,
- returnDocuments: true
+ returnDocuments: true,
});
return rerank.results
@@ -23,6 +23,6 @@ export async function rerankDocuments(
.map((x) => ({
document: x.document,
index: x.index,
- relevanceScore: x.relevanceScore
+ relevanceScore: x.relevanceScore,
}));
}
diff --git a/apps/api/src/lib/html-to-markdown.ts b/apps/api/src/lib/html-to-markdown.ts
index 7a0020d1..cba1a80b 100644
--- a/apps/api/src/lib/html-to-markdown.ts
+++ b/apps/api/src/lib/html-to-markdown.ts
@@ -13,7 +13,7 @@ const goExecutablePath = join(
process.cwd(),
"sharedLibs",
"go-html-to-md",
- "html-to-markdown.so"
+ "html-to-markdown.so",
);
class GoMarkdownConverter {
@@ -51,7 +51,7 @@ class GoMarkdownConverter {
}
export async function parseMarkdown(
- html: string | null | undefined
+ html: string | null | undefined,
): Promise {
if (!html) {
return "";
@@ -74,12 +74,12 @@ export async function parseMarkdown(
) {
Sentry.captureException(error);
logger.error(
- `Error converting HTML to Markdown with Go parser: ${error}`
+ `Error converting HTML to Markdown with Go parser: ${error}`,
);
} else {
logger.warn(
"Tried to use Go parser, but it doesn't exist in the file system.",
- { goExecutablePath }
+ { goExecutablePath },
);
}
}
@@ -101,7 +101,7 @@ export async function parseMarkdown(
var href = node.getAttribute("href").trim();
var title = node.title ? ' "' + node.title + '"' : "";
return "[" + content.trim() + "](" + href + title + ")\n";
- }
+ },
});
var gfm = turndownPluginGfm.gfm;
turndownService.use(gfm);
@@ -145,7 +145,7 @@ function removeSkipToContentLinks(markdownContent: string): string {
// Remove [Skip to Content](#page) and [Skip to content](#skip)
const newMarkdownContent = markdownContent.replace(
/\[Skip to Content\]\(#[^\)]*\)/gi,
- ""
+ "",
);
return newMarkdownContent;
}
diff --git a/apps/api/src/lib/job-priority.ts b/apps/api/src/lib/job-priority.ts
index 2bafc3e6..7e2d44de 100644
--- a/apps/api/src/lib/job-priority.ts
+++ b/apps/api/src/lib/job-priority.ts
@@ -31,7 +31,7 @@ export async function deleteJobPriority(team_id, job_id) {
export async function getJobPriority({
plan,
team_id,
- basePriority = 10
+ basePriority = 10,
}: {
plan: PlanType | undefined;
team_id: string;
@@ -91,12 +91,12 @@ export async function getJobPriority({
} else {
// If not, we keep base priority + planModifier
return Math.ceil(
- basePriority + Math.ceil((setLength - bucketLimit) * planModifier)
+ basePriority + Math.ceil((setLength - bucketLimit) * planModifier),
);
}
} catch (e) {
logger.error(
- `Get job priority failed: ${team_id}, ${plan}, ${basePriority}`
+ `Get job priority failed: ${team_id}, ${plan}, ${basePriority}`,
);
return basePriority;
}
diff --git a/apps/api/src/lib/logger.ts b/apps/api/src/lib/logger.ts
index 6996ffd4..3cc04a11 100644
--- a/apps/api/src/lib/logger.ts
+++ b/apps/api/src/lib/logger.ts
@@ -14,14 +14,14 @@ const logFormat = winston.format.printf(
name: value.name,
message: value.message,
stack: value.stack,
- cause: value.cause
+ cause: value.cause,
};
} else {
return value;
}
})
: ""
- }`
+ }`,
);
export const logger = winston.createLogger({
@@ -34,26 +34,26 @@ export const logger = winston.createLogger({
name: value.name,
message: value.message,
stack: value.stack,
- cause: value.cause
+ cause: value.cause,
};
} else {
return value;
}
- }
+ },
}),
transports: [
new winston.transports.Console({
format: winston.format.combine(
winston.format.timestamp({ format: "YYYY-MM-DD HH:mm:ss" }),
winston.format.metadata({
- fillExcept: ["message", "level", "timestamp"]
+ fillExcept: ["message", "level", "timestamp"],
}),
...((process.env.ENV === "production" &&
process.env.SENTRY_ENVIRONMENT === "dev") ||
process.env.ENV !== "production"
? [winston.format.colorize(), logFormat]
- : [])
- )
- })
- ]
+ : []),
+ ),
+ }),
+ ],
});
diff --git a/apps/api/src/lib/map-cosine.ts b/apps/api/src/lib/map-cosine.ts
index 2a089548..a6c06e27 100644
--- a/apps/api/src/lib/map-cosine.ts
+++ b/apps/api/src/lib/map-cosine.ts
@@ -6,10 +6,10 @@ export function performCosineSimilarity(links: string[], searchQuery: string) {
const cosineSimilarity = (vec1: number[], vec2: number[]): number => {
const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0);
const magnitude1 = Math.sqrt(
- vec1.reduce((sum, val) => sum + val * val, 0)
+ vec1.reduce((sum, val) => sum + val * val, 0),
);
const magnitude2 = Math.sqrt(
- vec2.reduce((sum, val) => sum + val * val, 0)
+ vec2.reduce((sum, val) => sum + val * val, 0),
);
if (magnitude1 === 0 || magnitude2 === 0) return 0;
return dotProduct / (magnitude1 * magnitude2);
diff --git a/apps/api/src/lib/ranker.test.ts b/apps/api/src/lib/ranker.test.ts
index 2b30de19..b884c2fb 100644
--- a/apps/api/src/lib/ranker.test.ts
+++ b/apps/api/src/lib/ranker.test.ts
@@ -5,13 +5,13 @@ describe("performRanking", () => {
const linksWithContext = [
"url: https://example.com/dogs, title: All about dogs, description: Learn about different dog breeds",
"url: https://example.com/cats, title: Cat care guide, description: Everything about cats",
- "url: https://example.com/pets, title: General pet care, description: Care for all types of pets"
+ "url: https://example.com/pets, title: General pet care, description: Care for all types of pets",
];
const links = [
"https://example.com/dogs",
"https://example.com/cats",
- "https://example.com/pets"
+ "https://example.com/pets",
];
const searchQuery = "cats training";
@@ -50,7 +50,7 @@ describe("performRanking", () => {
it("should maintain original order for equal scores", async () => {
const linksWithContext = [
"url: https://example.com/1, title: Similar content A, description: test",
- "url: https://example.com/2, title: Similar content B, description: test"
+ "url: https://example.com/2, title: Similar content B, description: test",
];
const links = ["https://example.com/1", "https://example.com/2"];
diff --git a/apps/api/src/lib/ranker.ts b/apps/api/src/lib/ranker.ts
index 2f06d76d..bffbc9c2 100644
--- a/apps/api/src/lib/ranker.ts
+++ b/apps/api/src/lib/ranker.ts
@@ -5,14 +5,14 @@ import OpenAI from "openai";
configDotenv();
const openai = new OpenAI({
- apiKey: process.env.OPENAI_API_KEY
+ apiKey: process.env.OPENAI_API_KEY,
});
async function getEmbedding(text: string) {
const embedding = await openai.embeddings.create({
model: "text-embedding-ada-002",
input: text,
- encoding_format: "float"
+ encoding_format: "float",
});
return embedding.data[0].embedding;
@@ -39,7 +39,7 @@ const textToVector = (searchQuery: string, text: string): number[] => {
async function performRanking(
linksWithContext: string[],
links: string[],
- searchQuery: string
+ searchQuery: string,
) {
try {
// Handle invalid inputs
@@ -64,7 +64,7 @@ async function performRanking(
link: links[index],
linkWithContext,
score,
- originalIndex: index
+ originalIndex: index,
};
} catch (err) {
// If embedding fails for a link, return with score 0
@@ -72,10 +72,10 @@ async function performRanking(
link: links[index],
linkWithContext,
score: 0,
- originalIndex: index
+ originalIndex: index,
};
}
- })
+ }),
);
// Sort links based on similarity scores while preserving original order for equal scores
diff --git a/apps/api/src/lib/scrape-events.ts b/apps/api/src/lib/scrape-events.ts
index 6c39c722..97e2cecc 100644
--- a/apps/api/src/lib/scrape-events.ts
+++ b/apps/api/src/lib/scrape-events.ts
@@ -56,7 +56,7 @@ export class ScrapeEvents {
.insert({
job_id: jobId,
type: content.type,
- content: content
+ content: content,
// created_at
})
.select()
@@ -73,7 +73,7 @@ export class ScrapeEvents {
static async updateScrapeResult(
logId: number | null,
- result: ScrapeScrapeEvent["result"]
+ result: ScrapeScrapeEvent["result"],
) {
if (logId === null) return;
@@ -86,8 +86,8 @@ export class ScrapeEvents {
.update({
content: {
...previousLog.content,
- result
- }
+ result,
+ },
})
.eq("id", logId);
} catch (error) {
@@ -100,7 +100,7 @@ export class ScrapeEvents {
await this.insert(((job as any).id ? (job as any).id : job) as string, {
type: "queue",
event,
- worker: process.env.FLY_MACHINE_ID
+ worker: process.env.FLY_MACHINE_ID,
});
} catch (error) {
logger.error(`Error logging job event: ${error}`);
diff --git a/apps/api/src/lib/validate-country.ts b/apps/api/src/lib/validate-country.ts
index 797ea542..bff1c25c 100644
--- a/apps/api/src/lib/validate-country.ts
+++ b/apps/api/src/lib/validate-country.ts
@@ -6,7 +6,7 @@ export const countries = {
continent: "EU",
capital: "Andorra la Vella",
currency: ["EUR"],
- languages: ["ca"]
+ languages: ["ca"],
},
AE: {
name: "United Arab Emirates",
@@ -15,7 +15,7 @@ export const countries = {
continent: "AS",
capital: "Abu Dhabi",
currency: ["AED"],
- languages: ["ar"]
+ languages: ["ar"],
},
AF: {
name: "Afghanistan",
@@ -24,7 +24,7 @@ export const countries = {
continent: "AS",
capital: "Kabul",
currency: ["AFN"],
- languages: ["ps", "uz", "tk"]
+ languages: ["ps", "uz", "tk"],
},
AG: {
name: "Antigua and Barbuda",
@@ -33,7 +33,7 @@ export const countries = {
continent: "NA",
capital: "Saint John's",
currency: ["XCD"],
- languages: ["en"]
+ languages: ["en"],
},
AI: {
name: "Anguilla",
@@ -42,7 +42,7 @@ export const countries = {
continent: "NA",
capital: "The Valley",
currency: ["XCD"],
- languages: ["en"]
+ languages: ["en"],
},
AL: {
name: "Albania",
@@ -51,7 +51,7 @@ export const countries = {
continent: "EU",
capital: "Tirana",
currency: ["ALL"],
- languages: ["sq"]
+ languages: ["sq"],
},
AM: {
name: "Armenia",
@@ -60,7 +60,7 @@ export const countries = {
continent: "AS",
capital: "Yerevan",
currency: ["AMD"],
- languages: ["hy", "ru"]
+ languages: ["hy", "ru"],
},
AO: {
name: "Angola",
@@ -69,7 +69,7 @@ export const countries = {
continent: "AF",
capital: "Luanda",
currency: ["AOA"],
- languages: ["pt"]
+ languages: ["pt"],
},
AQ: {
name: "Antarctica",
@@ -78,7 +78,7 @@ export const countries = {
continent: "AN",
capital: "",
currency: [],
- languages: []
+ languages: [],
},
AR: {
name: "Argentina",
@@ -87,7 +87,7 @@ export const countries = {
continent: "SA",
capital: "Buenos Aires",
currency: ["ARS"],
- languages: ["es", "gn"]
+ languages: ["es", "gn"],
},
AS: {
name: "American Samoa",
@@ -96,7 +96,7 @@ export const countries = {
continent: "OC",
capital: "Pago Pago",
currency: ["USD"],
- languages: ["en", "sm"]
+ languages: ["en", "sm"],
},
AT: {
name: "Austria",
@@ -105,7 +105,7 @@ export const countries = {
continent: "EU",
capital: "Vienna",
currency: ["EUR"],
- languages: ["de"]
+ languages: ["de"],
},
AU: {
name: "Australia",
@@ -114,7 +114,7 @@ export const countries = {
continent: "OC",
capital: "Canberra",
currency: ["AUD"],
- languages: ["en"]
+ languages: ["en"],
},
AW: {
name: "Aruba",
@@ -123,7 +123,7 @@ export const countries = {
continent: "NA",
capital: "Oranjestad",
currency: ["AWG"],
- languages: ["nl", "pa"]
+ languages: ["nl", "pa"],
},
AX: {
name: "Aland",
@@ -133,7 +133,7 @@ export const countries = {
capital: "Mariehamn",
currency: ["EUR"],
languages: ["sv"],
- partOf: "FI"
+ partOf: "FI",
},
AZ: {
name: "Azerbaijan",
@@ -143,7 +143,7 @@ export const countries = {
continents: ["AS", "EU"],
capital: "Baku",
currency: ["AZN"],
- languages: ["az"]
+ languages: ["az"],
},
BA: {
name: "Bosnia and Herzegovina",
@@ -152,7 +152,7 @@ export const countries = {
continent: "EU",
capital: "Sarajevo",
currency: ["BAM"],
- languages: ["bs", "hr", "sr"]
+ languages: ["bs", "hr", "sr"],
},
BB: {
name: "Barbados",
@@ -161,7 +161,7 @@ export const countries = {
continent: "NA",
capital: "Bridgetown",
currency: ["BBD"],
- languages: ["en"]
+ languages: ["en"],
},
BD: {
name: "Bangladesh",
@@ -170,7 +170,7 @@ export const countries = {
continent: "AS",
capital: "Dhaka",
currency: ["BDT"],
- languages: ["bn"]
+ languages: ["bn"],
},
BE: {
name: "Belgium",
@@ -179,7 +179,7 @@ export const countries = {
continent: "EU",
capital: "Brussels",
currency: ["EUR"],
- languages: ["nl", "fr", "de"]
+ languages: ["nl", "fr", "de"],
},
BF: {
name: "Burkina Faso",
@@ -188,7 +188,7 @@ export const countries = {
continent: "AF",
capital: "Ouagadougou",
currency: ["XOF"],
- languages: ["fr", "ff"]
+ languages: ["fr", "ff"],
},
BG: {
name: "Bulgaria",
@@ -197,7 +197,7 @@ export const countries = {
continent: "EU",
capital: "Sofia",
currency: ["BGN"],
- languages: ["bg"]
+ languages: ["bg"],
},
BH: {
name: "Bahrain",
@@ -206,7 +206,7 @@ export const countries = {
continent: "AS",
capital: "Manama",
currency: ["BHD"],
- languages: ["ar"]
+ languages: ["ar"],
},
BI: {
name: "Burundi",
@@ -215,7 +215,7 @@ export const countries = {
continent: "AF",
capital: "Bujumbura",
currency: ["BIF"],
- languages: ["fr", "rn"]
+ languages: ["fr", "rn"],
},
BJ: {
name: "Benin",
@@ -224,7 +224,7 @@ export const countries = {
continent: "AF",
capital: "Porto-Novo",
currency: ["XOF"],
- languages: ["fr"]
+ languages: ["fr"],
},
BL: {
name: "Saint Barthelemy",
@@ -233,7 +233,7 @@ export const countries = {
continent: "NA",
capital: "Gustavia",
currency: ["EUR"],
- languages: ["fr"]
+ languages: ["fr"],
},
BM: {
name: "Bermuda",
@@ -242,7 +242,7 @@ export const countries = {
continent: "NA",
capital: "Hamilton",
currency: ["BMD"],
- languages: ["en"]
+ languages: ["en"],
},
BN: {
name: "Brunei",
@@ -251,7 +251,7 @@ export const countries = {
continent: "AS",
capital: "Bandar Seri Begawan",
currency: ["BND"],
- languages: ["ms"]
+ languages: ["ms"],
},
BO: {
name: "Bolivia",
@@ -260,7 +260,7 @@ export const countries = {
continent: "SA",
capital: "Sucre",
currency: ["BOB", "BOV"],
- languages: ["es", "ay", "qu"]
+ languages: ["es", "ay", "qu"],
},
BQ: {
name: "Bonaire",
@@ -269,7 +269,7 @@ export const countries = {
continent: "NA",
capital: "Kralendijk",
currency: ["USD"],
- languages: ["nl"]
+ languages: ["nl"],
},
BR: {
name: "Brazil",
@@ -278,7 +278,7 @@ export const countries = {
continent: "SA",
capital: "Brasília",
currency: ["BRL"],
- languages: ["pt"]
+ languages: ["pt"],
},
BS: {
name: "Bahamas",
@@ -287,7 +287,7 @@ export const countries = {
continent: "NA",
capital: "Nassau",
currency: ["BSD"],
- languages: ["en"]
+ languages: ["en"],
},
BT: {
name: "Bhutan",
@@ -296,7 +296,7 @@ export const countries = {
continent: "AS",
capital: "Thimphu",
currency: ["BTN", "INR"],
- languages: ["dz"]
+ languages: ["dz"],
},
BV: {
name: "Bouvet Island",
@@ -305,7 +305,7 @@ export const countries = {
continent: "AN",
capital: "",
currency: ["NOK"],
- languages: ["no", "nb", "nn"]
+ languages: ["no", "nb", "nn"],
},
BW: {
name: "Botswana",
@@ -314,7 +314,7 @@ export const countries = {
continent: "AF",
capital: "Gaborone",
currency: ["BWP"],
- languages: ["en", "tn"]
+ languages: ["en", "tn"],
},
BY: {
name: "Belarus",
@@ -323,7 +323,7 @@ export const countries = {
continent: "EU",
capital: "Minsk",
currency: ["BYN"],
- languages: ["be", "ru"]
+ languages: ["be", "ru"],
},
BZ: {
name: "Belize",
@@ -332,7 +332,7 @@ export const countries = {
continent: "NA",
capital: "Belmopan",
currency: ["BZD"],
- languages: ["en", "es"]
+ languages: ["en", "es"],
},
CA: {
name: "Canada",
@@ -341,7 +341,7 @@ export const countries = {
continent: "NA",
capital: "Ottawa",
currency: ["CAD"],
- languages: ["en", "fr"]
+ languages: ["en", "fr"],
},
CC: {
name: "Cocos (Keeling) Islands",
@@ -350,7 +350,7 @@ export const countries = {
continent: "AS",
capital: "West Island",
currency: ["AUD"],
- languages: ["en"]
+ languages: ["en"],
},
CD: {
name: "Democratic Republic of the Congo",
@@ -359,7 +359,7 @@ export const countries = {
continent: "AF",
capital: "Kinshasa",
currency: ["CDF"],
- languages: ["fr", "ln", "kg", "sw", "lu"]
+ languages: ["fr", "ln", "kg", "sw", "lu"],
},
CF: {
name: "Central African Republic",
@@ -368,7 +368,7 @@ export const countries = {
continent: "AF",
capital: "Bangui",
currency: ["XAF"],
- languages: ["fr", "sg"]
+ languages: ["fr", "sg"],
},
CG: {
name: "Republic of the Congo",
@@ -377,7 +377,7 @@ export const countries = {
continent: "AF",
capital: "Brazzaville",
currency: ["XAF"],
- languages: ["fr", "ln"]
+ languages: ["fr", "ln"],
},
CH: {
name: "Switzerland",
@@ -386,7 +386,7 @@ export const countries = {
continent: "EU",
capital: "Bern",
currency: ["CHE", "CHF", "CHW"],
- languages: ["de", "fr", "it"]
+ languages: ["de", "fr", "it"],
},
CI: {
name: "Ivory Coast",
@@ -395,7 +395,7 @@ export const countries = {
continent: "AF",
capital: "Yamoussoukro",
currency: ["XOF"],
- languages: ["fr"]
+ languages: ["fr"],
},
CK: {
name: "Cook Islands",
@@ -404,7 +404,7 @@ export const countries = {
continent: "OC",
capital: "Avarua",
currency: ["NZD"],
- languages: ["en"]
+ languages: ["en"],
},
CL: {
name: "Chile",
@@ -413,7 +413,7 @@ export const countries = {
continent: "SA",
capital: "Santiago",
currency: ["CLF", "CLP"],
- languages: ["es"]
+ languages: ["es"],
},
CM: {
name: "Cameroon",
@@ -422,7 +422,7 @@ export const countries = {
continent: "AF",
capital: "Yaoundé",
currency: ["XAF"],
- languages: ["en", "fr"]
+ languages: ["en", "fr"],
},
CN: {
name: "China",
@@ -431,7 +431,7 @@ export const countries = {
continent: "AS",
capital: "Beijing",
currency: ["CNY"],
- languages: ["zh"]
+ languages: ["zh"],
},
CO: {
name: "Colombia",
@@ -440,7 +440,7 @@ export const countries = {
continent: "SA",
capital: "Bogotá",
currency: ["COP"],
- languages: ["es"]
+ languages: ["es"],
},
CR: {
name: "Costa Rica",
@@ -449,7 +449,7 @@ export const countries = {
continent: "NA",
capital: "San José",
currency: ["CRC"],
- languages: ["es"]
+ languages: ["es"],
},
CU: {
name: "Cuba",
@@ -458,7 +458,7 @@ export const countries = {
continent: "NA",
capital: "Havana",
currency: ["CUC", "CUP"],
- languages: ["es"]
+ languages: ["es"],
},
CV: {
name: "Cape Verde",
@@ -467,7 +467,7 @@ export const countries = {
continent: "AF",
capital: "Praia",
currency: ["CVE"],
- languages: ["pt"]
+ languages: ["pt"],
},
CW: {
name: "Curacao",
@@ -476,7 +476,7 @@ export const countries = {
continent: "NA",
capital: "Willemstad",
currency: ["ANG"],
- languages: ["nl", "pa", "en"]
+ languages: ["nl", "pa", "en"],
},
CX: {
name: "Christmas Island",
@@ -485,7 +485,7 @@ export const countries = {
continent: "AS",
capital: "Flying Fish Cove",
currency: ["AUD"],
- languages: ["en"]
+ languages: ["en"],
},
CY: {
name: "Cyprus",
@@ -494,7 +494,7 @@ export const countries = {
continent: "EU",
capital: "Nicosia",
currency: ["EUR"],
- languages: ["el", "tr", "hy"]
+ languages: ["el", "tr", "hy"],
},
CZ: {
name: "Czech Republic",
@@ -503,7 +503,7 @@ export const countries = {
continent: "EU",
capital: "Prague",
currency: ["CZK"],
- languages: ["cs"]
+ languages: ["cs"],
},
DE: {
name: "Germany",
@@ -512,7 +512,7 @@ export const countries = {
continent: "EU",
capital: "Berlin",
currency: ["EUR"],
- languages: ["de"]
+ languages: ["de"],
},
DJ: {
name: "Djibouti",
@@ -521,7 +521,7 @@ export const countries = {
continent: "AF",
capital: "Djibouti",
currency: ["DJF"],
- languages: ["fr", "ar"]
+ languages: ["fr", "ar"],
},
DK: {
name: "Denmark",
@@ -531,7 +531,7 @@ export const countries = {
continents: ["EU", "NA"],
capital: "Copenhagen",
currency: ["DKK"],
- languages: ["da"]
+ languages: ["da"],
},
DM: {
name: "Dominica",
@@ -540,7 +540,7 @@ export const countries = {
continent: "NA",
capital: "Roseau",
currency: ["XCD"],
- languages: ["en"]
+ languages: ["en"],
},
DO: {
name: "Dominican Republic",
@@ -549,7 +549,7 @@ export const countries = {
continent: "NA",
capital: "Santo Domingo",
currency: ["DOP"],
- languages: ["es"]
+ languages: ["es"],
},
DZ: {
name: "Algeria",
@@ -558,7 +558,7 @@ export const countries = {
continent: "AF",
capital: "Algiers",
currency: ["DZD"],
- languages: ["ar"]
+ languages: ["ar"],
},
EC: {
name: "Ecuador",
@@ -567,7 +567,7 @@ export const countries = {
continent: "SA",
capital: "Quito",
currency: ["USD"],
- languages: ["es"]
+ languages: ["es"],
},
EE: {
name: "Estonia",
@@ -576,7 +576,7 @@ export const countries = {
continent: "EU",
capital: "Tallinn",
currency: ["EUR"],
- languages: ["et"]
+ languages: ["et"],
},
EG: {
name: "Egypt",
@@ -586,7 +586,7 @@ export const countries = {
continents: ["AF", "AS"],
capital: "Cairo",
currency: ["EGP"],
- languages: ["ar"]
+ languages: ["ar"],
},
EH: {
name: "Western Sahara",
@@ -595,7 +595,7 @@ export const countries = {
continent: "AF",
capital: "El Aaiún",
currency: ["MAD", "DZD", "MRU"],
- languages: ["es"]
+ languages: ["es"],
},
ER: {
name: "Eritrea",
@@ -604,7 +604,7 @@ export const countries = {
continent: "AF",
capital: "Asmara",
currency: ["ERN"],
- languages: ["ti", "ar", "en"]
+ languages: ["ti", "ar", "en"],
},
ES: {
name: "Spain",
@@ -613,7 +613,7 @@ export const countries = {
continent: "EU",
capital: "Madrid",
currency: ["EUR"],
- languages: ["es", "eu", "ca", "gl", "oc"]
+ languages: ["es", "eu", "ca", "gl", "oc"],
},
ET: {
name: "Ethiopia",
@@ -622,7 +622,7 @@ export const countries = {
continent: "AF",
capital: "Addis Ababa",
currency: ["ETB"],
- languages: ["am"]
+ languages: ["am"],
},
FI: {
name: "Finland",
@@ -631,7 +631,7 @@ export const countries = {
continent: "EU",
capital: "Helsinki",
currency: ["EUR"],
- languages: ["fi", "sv"]
+ languages: ["fi", "sv"],
},
FJ: {
name: "Fiji",
@@ -640,7 +640,7 @@ export const countries = {
continent: "OC",
capital: "Suva",
currency: ["FJD"],
- languages: ["en", "fj", "hi", "ur"]
+ languages: ["en", "fj", "hi", "ur"],
},
FK: {
name: "Falkland Islands",
@@ -649,7 +649,7 @@ export const countries = {
continent: "SA",
capital: "Stanley",
currency: ["FKP"],
- languages: ["en"]
+ languages: ["en"],
},
FM: {
name: "Micronesia",
@@ -658,7 +658,7 @@ export const countries = {
continent: "OC",
capital: "Palikir",
currency: ["USD"],
- languages: ["en"]
+ languages: ["en"],
},
FO: {
name: "Faroe Islands",
@@ -667,7 +667,7 @@ export const countries = {
continent: "EU",
capital: "Tórshavn",
currency: ["DKK"],
- languages: ["fo"]
+ languages: ["fo"],
},
FR: {
name: "France",
@@ -676,7 +676,7 @@ export const countries = {
continent: "EU",
capital: "Paris",
currency: ["EUR"],
- languages: ["fr"]
+ languages: ["fr"],
},
GA: {
name: "Gabon",
@@ -685,7 +685,7 @@ export const countries = {
continent: "AF",
capital: "Libreville",
currency: ["XAF"],
- languages: ["fr"]
+ languages: ["fr"],
},
GB: {
name: "United Kingdom",
@@ -694,7 +694,7 @@ export const countries = {
continent: "EU",
capital: "London",
currency: ["GBP"],
- languages: ["en"]
+ languages: ["en"],
},
GD: {
name: "Grenada",
@@ -703,7 +703,7 @@ export const countries = {
continent: "NA",
capital: "St. George's",
currency: ["XCD"],
- languages: ["en"]
+ languages: ["en"],
},
GE: {
name: "Georgia",
@@ -713,7 +713,7 @@ export const countries = {
continents: ["AS", "EU"],
capital: "Tbilisi",
currency: ["GEL"],
- languages: ["ka"]
+ languages: ["ka"],
},
GF: {
name: "French Guiana",
@@ -722,7 +722,7 @@ export const countries = {
continent: "SA",
capital: "Cayenne",
currency: ["EUR"],
- languages: ["fr"]
+ languages: ["fr"],
},
GG: {
name: "Guernsey",
@@ -731,7 +731,7 @@ export const countries = {
continent: "EU",
capital: "St. Peter Port",
currency: ["GBP"],
- languages: ["en", "fr"]
+ languages: ["en", "fr"],
},
GH: {
name: "Ghana",
@@ -740,7 +740,7 @@ export const countries = {
continent: "AF",
capital: "Accra",
currency: ["GHS"],
- languages: ["en"]
+ languages: ["en"],
},
GI: {
name: "Gibraltar",
@@ -749,7 +749,7 @@ export const countries = {
continent: "EU",
capital: "Gibraltar",
currency: ["GIP"],
- languages: ["en"]
+ languages: ["en"],
},
GL: {
name: "Greenland",
@@ -758,7 +758,7 @@ export const countries = {
continent: "NA",
capital: "Nuuk",
currency: ["DKK"],
- languages: ["kl"]
+ languages: ["kl"],
},
GM: {
name: "Gambia",
@@ -767,7 +767,7 @@ export const countries = {
continent: "AF",
capital: "Banjul",
currency: ["GMD"],
- languages: ["en"]
+ languages: ["en"],
},
GN: {
name: "Guinea",
@@ -776,7 +776,7 @@ export const countries = {
continent: "AF",
capital: "Conakry",
currency: ["GNF"],
- languages: ["fr", "ff"]
+ languages: ["fr", "ff"],
},
GP: {
name: "Guadeloupe",
@@ -785,7 +785,7 @@ export const countries = {
continent: "NA",
capital: "Basse-Terre",
currency: ["EUR"],
- languages: ["fr"]
+ languages: ["fr"],
},
GQ: {
name: "Equatorial Guinea",
@@ -794,7 +794,7 @@ export const countries = {
continent: "AF",
capital: "Malabo",
currency: ["XAF"],
- languages: ["es", "fr"]
+ languages: ["es", "fr"],
},
GR: {
name: "Greece",
@@ -803,7 +803,7 @@ export const countries = {
continent: "EU",
capital: "Athens",
currency: ["EUR"],
- languages: ["el"]
+ languages: ["el"],
},
GS: {
name: "South Georgia and the South Sandwich Islands",
@@ -812,7 +812,7 @@ export const countries = {
continent: "AN",
capital: "King Edward Point",
currency: ["GBP"],
- languages: ["en"]
+ languages: ["en"],
},
GT: {
name: "Guatemala",
@@ -821,7 +821,7 @@ export const countries = {
continent: "NA",
capital: "Guatemala City",
currency: ["GTQ"],
- languages: ["es"]
+ languages: ["es"],
},
GU: {
name: "Guam",
@@ -830,7 +830,7 @@ export const countries = {
continent: "OC",
capital: "Hagåtña",
currency: ["USD"],
- languages: ["en", "ch", "es"]
+ languages: ["en", "ch", "es"],
},
GW: {
name: "Guinea-Bissau",
@@ -839,7 +839,7 @@ export const countries = {
continent: "AF",
capital: "Bissau",
currency: ["XOF"],
- languages: ["pt"]
+ languages: ["pt"],
},
GY: {
name: "Guyana",
@@ -848,7 +848,7 @@ export const countries = {
continent: "SA",
capital: "Georgetown",
currency: ["GYD"],
- languages: ["en"]
+ languages: ["en"],
},
HK: {
name: "Hong Kong",
@@ -857,7 +857,7 @@ export const countries = {
continent: "AS",
capital: "City of Victoria",
currency: ["HKD"],
- languages: ["zh", "en"]
+ languages: ["zh", "en"],
},
HM: {
name: "Heard Island and McDonald Islands",
@@ -866,7 +866,7 @@ export const countries = {
continent: "AN",
capital: "",
currency: ["AUD"],
- languages: ["en"]
+ languages: ["en"],
},
HN: {
name: "Honduras",
@@ -875,7 +875,7 @@ export const countries = {
continent: "NA",
capital: "Tegucigalpa",
currency: ["HNL"],
- languages: ["es"]
+ languages: ["es"],
},
HR: {
name: "Croatia",
@@ -884,7 +884,7 @@ export const countries = {
continent: "EU",
capital: "Zagreb",
currency: ["EUR"],
- languages: ["hr"]
+ languages: ["hr"],
},
HT: {
name: "Haiti",
@@ -893,7 +893,7 @@ export const countries = {
continent: "NA",
capital: "Port-au-Prince",
currency: ["HTG", "USD"],
- languages: ["fr", "ht"]
+ languages: ["fr", "ht"],
},
HU: {
name: "Hungary",
@@ -902,7 +902,7 @@ export const countries = {
continent: "EU",
capital: "Budapest",
currency: ["HUF"],
- languages: ["hu"]
+ languages: ["hu"],
},
ID: {
name: "Indonesia",
@@ -911,7 +911,7 @@ export const countries = {
continent: "AS",
capital: "Jakarta",
currency: ["IDR"],
- languages: ["id"]
+ languages: ["id"],
},
IE: {
name: "Ireland",
@@ -920,7 +920,7 @@ export const countries = {
continent: "EU",
capital: "Dublin",
currency: ["EUR"],
- languages: ["ga", "en"]
+ languages: ["ga", "en"],
},
IL: {
name: "Israel",
@@ -929,7 +929,7 @@ export const countries = {
continent: "AS",
capital: "Jerusalem",
currency: ["ILS"],
- languages: ["he", "ar"]
+ languages: ["he", "ar"],
},
IM: {
name: "Isle of Man",
@@ -938,7 +938,7 @@ export const countries = {
continent: "EU",
capital: "Douglas",
currency: ["GBP"],
- languages: ["en", "gv"]
+ languages: ["en", "gv"],
},
IN: {
name: "India",
@@ -947,7 +947,7 @@ export const countries = {
continent: "AS",
capital: "New Delhi",
currency: ["INR"],
- languages: ["hi", "en"]
+ languages: ["hi", "en"],
},
IO: {
name: "British Indian Ocean Territory",
@@ -956,7 +956,7 @@ export const countries = {
continent: "AS",
capital: "Diego Garcia",
currency: ["USD"],
- languages: ["en"]
+ languages: ["en"],
},
IQ: {
name: "Iraq",
@@ -965,7 +965,7 @@ export const countries = {
continent: "AS",
capital: "Baghdad",
currency: ["IQD"],
- languages: ["ar", "ku"]
+ languages: ["ar", "ku"],
},
IR: {
name: "Iran",
@@ -974,7 +974,7 @@ export const countries = {
continent: "AS",
capital: "Tehran",
currency: ["IRR"],
- languages: ["fa"]
+ languages: ["fa"],
},
IS: {
name: "Iceland",
@@ -983,7 +983,7 @@ export const countries = {
continent: "EU",
capital: "Reykjavik",
currency: ["ISK"],
- languages: ["is"]
+ languages: ["is"],
},
IT: {
name: "Italy",
@@ -992,7 +992,7 @@ export const countries = {
continent: "EU",
capital: "Rome",
currency: ["EUR"],
- languages: ["it"]
+ languages: ["it"],
},
JE: {
name: "Jersey",
@@ -1001,7 +1001,7 @@ export const countries = {
continent: "EU",
capital: "Saint Helier",
currency: ["GBP"],
- languages: ["en", "fr"]
+ languages: ["en", "fr"],
},
JM: {
name: "Jamaica",
@@ -1010,7 +1010,7 @@ export const countries = {
continent: "NA",
capital: "Kingston",
currency: ["JMD"],
- languages: ["en"]
+ languages: ["en"],
},
JO: {
name: "Jordan",
@@ -1019,7 +1019,7 @@ export const countries = {
continent: "AS",
capital: "Amman",
currency: ["JOD"],
- languages: ["ar"]
+ languages: ["ar"],
},
JP: {
name: "Japan",
@@ -1028,7 +1028,7 @@ export const countries = {
continent: "AS",
capital: "Tokyo",
currency: ["JPY"],
- languages: ["ja"]
+ languages: ["ja"],
},
KE: {
name: "Kenya",
@@ -1037,7 +1037,7 @@ export const countries = {
continent: "AF",
capital: "Nairobi",
currency: ["KES"],
- languages: ["en", "sw"]
+ languages: ["en", "sw"],
},
KG: {
name: "Kyrgyzstan",
@@ -1046,7 +1046,7 @@ export const countries = {
continent: "AS",
capital: "Bishkek",
currency: ["KGS"],
- languages: ["ky", "ru"]
+ languages: ["ky", "ru"],
},
KH: {
name: "Cambodia",
@@ -1055,7 +1055,7 @@ export const countries = {
continent: "AS",
capital: "Phnom Penh",
currency: ["KHR"],
- languages: ["km"]
+ languages: ["km"],
},
KI: {
name: "Kiribati",
@@ -1064,7 +1064,7 @@ export const countries = {
continent: "OC",
capital: "South Tarawa",
currency: ["AUD"],
- languages: ["en"]
+ languages: ["en"],
},
KM: {
name: "Comoros",
@@ -1073,7 +1073,7 @@ export const countries = {
continent: "AF",
capital: "Moroni",
currency: ["KMF"],
- languages: ["ar", "fr"]
+ languages: ["ar", "fr"],
},
KN: {
name: "Saint Kitts and Nevis",
@@ -1082,7 +1082,7 @@ export const countries = {
continent: "NA",
capital: "Basseterre",
currency: ["XCD"],
- languages: ["en"]
+ languages: ["en"],
},
KP: {
name: "North Korea",
@@ -1091,7 +1091,7 @@ export const countries = {
continent: "AS",
capital: "Pyongyang",
currency: ["KPW"],
- languages: ["ko"]
+ languages: ["ko"],
},
KR: {
name: "South Korea",
@@ -1100,7 +1100,7 @@ export const countries = {
continent: "AS",
capital: "Seoul",
currency: ["KRW"],
- languages: ["ko"]
+ languages: ["ko"],
},
KW: {
name: "Kuwait",
@@ -1109,7 +1109,7 @@ export const countries = {
continent: "AS",
capital: "Kuwait City",
currency: ["KWD"],
- languages: ["ar"]
+ languages: ["ar"],
},
KY: {
name: "Cayman Islands",
@@ -1118,7 +1118,7 @@ export const countries = {
continent: "NA",
capital: "George Town",
currency: ["KYD"],
- languages: ["en"]
+ languages: ["en"],
},
KZ: {
name: "Kazakhstan",
@@ -1128,7 +1128,7 @@ export const countries = {
continents: ["AS", "EU"],
capital: "Astana",
currency: ["KZT"],
- languages: ["kk", "ru"]
+ languages: ["kk", "ru"],
},
LA: {
name: "Laos",
@@ -1137,7 +1137,7 @@ export const countries = {
continent: "AS",
capital: "Vientiane",
currency: ["LAK"],
- languages: ["lo"]
+ languages: ["lo"],
},
LB: {
name: "Lebanon",
@@ -1146,7 +1146,7 @@ export const countries = {
continent: "AS",
capital: "Beirut",
currency: ["LBP"],
- languages: ["ar", "fr"]
+ languages: ["ar", "fr"],
},
LC: {
name: "Saint Lucia",
@@ -1155,7 +1155,7 @@ export const countries = {
continent: "NA",
capital: "Castries",
currency: ["XCD"],
- languages: ["en"]
+ languages: ["en"],
},
LI: {
name: "Liechtenstein",
@@ -1164,7 +1164,7 @@ export const countries = {
continent: "EU",
capital: "Vaduz",
currency: ["CHF"],
- languages: ["de"]
+ languages: ["de"],
},
LK: {
name: "Sri Lanka",
@@ -1173,7 +1173,7 @@ export const countries = {
continent: "AS",
capital: "Colombo",
currency: ["LKR"],
- languages: ["si", "ta"]
+ languages: ["si", "ta"],
},
LR: {
name: "Liberia",
@@ -1182,7 +1182,7 @@ export const countries = {
continent: "AF",
capital: "Monrovia",
currency: ["LRD"],
- languages: ["en"]
+ languages: ["en"],
},
LS: {
name: "Lesotho",
@@ -1191,7 +1191,7 @@ export const countries = {
continent: "AF",
capital: "Maseru",
currency: ["LSL", "ZAR"],
- languages: ["en", "st"]
+ languages: ["en", "st"],
},
LT: {
name: "Lithuania",
@@ -1200,7 +1200,7 @@ export const countries = {
continent: "EU",
capital: "Vilnius",
currency: ["EUR"],
- languages: ["lt"]
+ languages: ["lt"],
},
LU: {
name: "Luxembourg",
@@ -1209,7 +1209,7 @@ export const countries = {
continent: "EU",
capital: "Luxembourg",
currency: ["EUR"],
- languages: ["fr", "de", "lb"]
+ languages: ["fr", "de", "lb"],
},
LV: {
name: "Latvia",
@@ -1218,7 +1218,7 @@ export const countries = {
continent: "EU",
capital: "Riga",
currency: ["EUR"],
- languages: ["lv"]
+ languages: ["lv"],
},
LY: {
name: "Libya",
@@ -1227,7 +1227,7 @@ export const countries = {
continent: "AF",
capital: "Tripoli",
currency: ["LYD"],
- languages: ["ar"]
+ languages: ["ar"],
},
MA: {
name: "Morocco",
@@ -1236,7 +1236,7 @@ export const countries = {
continent: "AF",
capital: "Rabat",
currency: ["MAD"],
- languages: ["ar"]
+ languages: ["ar"],
},
MC: {
name: "Monaco",
@@ -1245,7 +1245,7 @@ export const countries = {
continent: "EU",
capital: "Monaco",
currency: ["EUR"],
- languages: ["fr"]
+ languages: ["fr"],
},
MD: {
name: "Moldova",
@@ -1254,7 +1254,7 @@ export const countries = {
continent: "EU",
capital: "Chișinău",
currency: ["MDL"],
- languages: ["ro"]
+ languages: ["ro"],
},
ME: {
name: "Montenegro",
@@ -1263,7 +1263,7 @@ export const countries = {
continent: "EU",
capital: "Podgorica",
currency: ["EUR"],
- languages: ["sr", "bs", "sq", "hr"]
+ languages: ["sr", "bs", "sq", "hr"],
},
MF: {
name: "Saint Martin",
@@ -1272,7 +1272,7 @@ export const countries = {
continent: "NA",
capital: "Marigot",
currency: ["EUR"],
- languages: ["en", "fr", "nl"]
+ languages: ["en", "fr", "nl"],
},
MG: {
name: "Madagascar",
@@ -1281,7 +1281,7 @@ export const countries = {
continent: "AF",
capital: "Antananarivo",
currency: ["MGA"],
- languages: ["fr", "mg"]
+ languages: ["fr", "mg"],
},
MH: {
name: "Marshall Islands",
@@ -1290,7 +1290,7 @@ export const countries = {
continent: "OC",
capital: "Majuro",
currency: ["USD"],
- languages: ["en", "mh"]
+ languages: ["en", "mh"],
},
MK: {
name: "North Macedonia",
@@ -1299,7 +1299,7 @@ export const countries = {
continent: "EU",
capital: "Skopje",
currency: ["MKD"],
- languages: ["mk"]
+ languages: ["mk"],
},
ML: {
name: "Mali",
@@ -1308,7 +1308,7 @@ export const countries = {
continent: "AF",
capital: "Bamako",
currency: ["XOF"],
- languages: ["fr"]
+ languages: ["fr"],
},
MM: {
name: "Myanmar (Burma)",
@@ -1317,7 +1317,7 @@ export const countries = {
continent: "AS",
capital: "Naypyidaw",
currency: ["MMK"],
- languages: ["my"]
+ languages: ["my"],
},
MN: {
name: "Mongolia",
@@ -1326,7 +1326,7 @@ export const countries = {
continent: "AS",
capital: "Ulan Bator",
currency: ["MNT"],
- languages: ["mn"]
+ languages: ["mn"],
},
MO: {
name: "Macao",
@@ -1335,7 +1335,7 @@ export const countries = {
continent: "AS",
capital: "",
currency: ["MOP"],
- languages: ["zh", "pt"]
+ languages: ["zh", "pt"],
},
MP: {
name: "Northern Mariana Islands",
@@ -1344,7 +1344,7 @@ export const countries = {
continent: "OC",
capital: "Saipan",
currency: ["USD"],
- languages: ["en", "ch"]
+ languages: ["en", "ch"],
},
MQ: {
name: "Martinique",
@@ -1353,7 +1353,7 @@ export const countries = {
continent: "NA",
capital: "Fort-de-France",
currency: ["EUR"],
- languages: ["fr"]
+ languages: ["fr"],
},
MR: {
name: "Mauritania",
@@ -1362,7 +1362,7 @@ export const countries = {
continent: "AF",
capital: "Nouakchott",
currency: ["MRU"],
- languages: ["ar"]
+ languages: ["ar"],
},
MS: {
name: "Montserrat",
@@ -1371,7 +1371,7 @@ export const countries = {
continent: "NA",
capital: "Plymouth",
currency: ["XCD"],
- languages: ["en"]
+ languages: ["en"],
},
MT: {
name: "Malta",
@@ -1380,7 +1380,7 @@ export const countries = {
continent: "EU",
capital: "Valletta",
currency: ["EUR"],
- languages: ["mt", "en"]
+ languages: ["mt", "en"],
},
MU: {
name: "Mauritius",
@@ -1389,7 +1389,7 @@ export const countries = {
continent: "AF",
capital: "Port Louis",
currency: ["MUR"],
- languages: ["en"]
+ languages: ["en"],
},
MV: {
name: "Maldives",
@@ -1398,7 +1398,7 @@ export const countries = {
continent: "AS",
capital: "Malé",
currency: ["MVR"],
- languages: ["dv"]
+ languages: ["dv"],
},
MW: {
name: "Malawi",
@@ -1407,7 +1407,7 @@ export const countries = {
continent: "AF",
capital: "Lilongwe",
currency: ["MWK"],
- languages: ["en", "ny"]
+ languages: ["en", "ny"],
},
MX: {
name: "Mexico",
@@ -1416,7 +1416,7 @@ export const countries = {
continent: "NA",
capital: "Mexico City",
currency: ["MXN"],
- languages: ["es"]
+ languages: ["es"],
},
MY: {
name: "Malaysia",
@@ -1425,7 +1425,7 @@ export const countries = {
continent: "AS",
capital: "Kuala Lumpur",
currency: ["MYR"],
- languages: ["ms"]
+ languages: ["ms"],
},
MZ: {
name: "Mozambique",
@@ -1434,7 +1434,7 @@ export const countries = {
continent: "AF",
capital: "Maputo",
currency: ["MZN"],
- languages: ["pt"]
+ languages: ["pt"],
},
NA: {
name: "Namibia",
@@ -1443,7 +1443,7 @@ export const countries = {
continent: "AF",
capital: "Windhoek",
currency: ["NAD", "ZAR"],
- languages: ["en", "af"]
+ languages: ["en", "af"],
},
NC: {
name: "New Caledonia",
@@ -1452,7 +1452,7 @@ export const countries = {
continent: "OC",
capital: "Nouméa",
currency: ["XPF"],
- languages: ["fr"]
+ languages: ["fr"],
},
NE: {
name: "Niger",
@@ -1461,7 +1461,7 @@ export const countries = {
continent: "AF",
capital: "Niamey",
currency: ["XOF"],
- languages: ["fr"]
+ languages: ["fr"],
},
NF: {
name: "Norfolk Island",
@@ -1470,7 +1470,7 @@ export const countries = {
continent: "OC",
capital: "Kingston",
currency: ["AUD"],
- languages: ["en"]
+ languages: ["en"],
},
NG: {
name: "Nigeria",
@@ -1479,7 +1479,7 @@ export const countries = {
continent: "AF",
capital: "Abuja",
currency: ["NGN"],
- languages: ["en"]
+ languages: ["en"],
},
NI: {
name: "Nicaragua",
@@ -1488,7 +1488,7 @@ export const countries = {
continent: "NA",
capital: "Managua",
currency: ["NIO"],
- languages: ["es"]
+ languages: ["es"],
},
NL: {
name: "Netherlands",
@@ -1497,7 +1497,7 @@ export const countries = {
continent: "EU",
capital: "Amsterdam",
currency: ["EUR"],
- languages: ["nl"]
+ languages: ["nl"],
},
NO: {
name: "Norway",
@@ -1506,7 +1506,7 @@ export const countries = {
continent: "EU",
capital: "Oslo",
currency: ["NOK"],
- languages: ["no", "nb", "nn"]
+ languages: ["no", "nb", "nn"],
},
NP: {
name: "Nepal",
@@ -1515,7 +1515,7 @@ export const countries = {
continent: "AS",
capital: "Kathmandu",
currency: ["NPR"],
- languages: ["ne"]
+ languages: ["ne"],
},
NR: {
name: "Nauru",
@@ -1524,7 +1524,7 @@ export const countries = {
continent: "OC",
capital: "Yaren",
currency: ["AUD"],
- languages: ["en", "na"]
+ languages: ["en", "na"],
},
NU: {
name: "Niue",
@@ -1533,7 +1533,7 @@ export const countries = {
continent: "OC",
capital: "Alofi",
currency: ["NZD"],
- languages: ["en"]
+ languages: ["en"],
},
NZ: {
name: "New Zealand",
@@ -1542,7 +1542,7 @@ export const countries = {
continent: "OC",
capital: "Wellington",
currency: ["NZD"],
- languages: ["en", "mi"]
+ languages: ["en", "mi"],
},
OM: {
name: "Oman",
@@ -1551,7 +1551,7 @@ export const countries = {
continent: "AS",
capital: "Muscat",
currency: ["OMR"],
- languages: ["ar"]
+ languages: ["ar"],
},
PA: {
name: "Panama",
@@ -1560,7 +1560,7 @@ export const countries = {
continent: "NA",
capital: "Panama City",
currency: ["PAB", "USD"],
- languages: ["es"]
+ languages: ["es"],
},
PE: {
name: "Peru",
@@ -1569,7 +1569,7 @@ export const countries = {
continent: "SA",
capital: "Lima",
currency: ["PEN"],
- languages: ["es"]
+ languages: ["es"],
},
PF: {
name: "French Polynesia",
@@ -1578,7 +1578,7 @@ export const countries = {
continent: "OC",
capital: "Papeetē",
currency: ["XPF"],
- languages: ["fr"]
+ languages: ["fr"],
},
PG: {
name: "Papua New Guinea",
@@ -1587,7 +1587,7 @@ export const countries = {
continent: "OC",
capital: "Port Moresby",
currency: ["PGK"],
- languages: ["en"]
+ languages: ["en"],
},
PH: {
name: "Philippines",
@@ -1596,7 +1596,7 @@ export const countries = {
continent: "AS",
capital: "Manila",
currency: ["PHP"],
- languages: ["en"]
+ languages: ["en"],
},
PK: {
name: "Pakistan",
@@ -1605,7 +1605,7 @@ export const countries = {
continent: "AS",
capital: "Islamabad",
currency: ["PKR"],
- languages: ["en", "ur"]
+ languages: ["en", "ur"],
},
PL: {
name: "Poland",
@@ -1614,7 +1614,7 @@ export const countries = {
continent: "EU",
capital: "Warsaw",
currency: ["PLN"],
- languages: ["pl"]
+ languages: ["pl"],
},
PM: {
name: "Saint Pierre and Miquelon",
@@ -1623,7 +1623,7 @@ export const countries = {
continent: "NA",
capital: "Saint-Pierre",
currency: ["EUR"],
- languages: ["fr"]
+ languages: ["fr"],
},
PN: {
name: "Pitcairn Islands",
@@ -1632,7 +1632,7 @@ export const countries = {
continent: "OC",
capital: "Adamstown",
currency: ["NZD"],
- languages: ["en"]
+ languages: ["en"],
},
PR: {
name: "Puerto Rico",
@@ -1641,7 +1641,7 @@ export const countries = {
continent: "NA",
capital: "San Juan",
currency: ["USD"],
- languages: ["es", "en"]
+ languages: ["es", "en"],
},
PS: {
name: "Palestine",
@@ -1650,7 +1650,7 @@ export const countries = {
continent: "AS",
capital: "Ramallah",
currency: ["ILS"],
- languages: ["ar"]
+ languages: ["ar"],
},
PT: {
name: "Portugal",
@@ -1659,7 +1659,7 @@ export const countries = {
continent: "EU",
capital: "Lisbon",
currency: ["EUR"],
- languages: ["pt"]
+ languages: ["pt"],
},
PW: {
name: "Palau",
@@ -1668,7 +1668,7 @@ export const countries = {
continent: "OC",
capital: "Ngerulmud",
currency: ["USD"],
- languages: ["en"]
+ languages: ["en"],
},
PY: {
name: "Paraguay",
@@ -1677,7 +1677,7 @@ export const countries = {
continent: "SA",
capital: "Asunción",
currency: ["PYG"],
- languages: ["es", "gn"]
+ languages: ["es", "gn"],
},
QA: {
name: "Qatar",
@@ -1686,7 +1686,7 @@ export const countries = {
continent: "AS",
capital: "Doha",
currency: ["QAR"],
- languages: ["ar"]
+ languages: ["ar"],
},
RE: {
name: "Reunion",
@@ -1695,7 +1695,7 @@ export const countries = {
continent: "AF",
capital: "Saint-Denis",
currency: ["EUR"],
- languages: ["fr"]
+ languages: ["fr"],
},
RO: {
name: "Romania",
@@ -1704,7 +1704,7 @@ export const countries = {
continent: "EU",
capital: "Bucharest",
currency: ["RON"],
- languages: ["ro"]
+ languages: ["ro"],
},
RS: {
name: "Serbia",
@@ -1713,7 +1713,7 @@ export const countries = {
continent: "EU",
capital: "Belgrade",
currency: ["RSD"],
- languages: ["sr"]
+ languages: ["sr"],
},
RU: {
name: "Russia",
@@ -1723,7 +1723,7 @@ export const countries = {
continents: ["AS", "EU"],
capital: "Moscow",
currency: ["RUB"],
- languages: ["ru"]
+ languages: ["ru"],
},
RW: {
name: "Rwanda",
@@ -1732,7 +1732,7 @@ export const countries = {
continent: "AF",
capital: "Kigali",
currency: ["RWF"],
- languages: ["rw", "en", "fr"]
+ languages: ["rw", "en", "fr"],
},
SA: {
name: "Saudi Arabia",
@@ -1741,7 +1741,7 @@ export const countries = {
continent: "AS",
capital: "Riyadh",
currency: ["SAR"],
- languages: ["ar"]
+ languages: ["ar"],
},
SB: {
name: "Solomon Islands",
@@ -1750,7 +1750,7 @@ export const countries = {
continent: "OC",
capital: "Honiara",
currency: ["SBD"],
- languages: ["en"]
+ languages: ["en"],
},
SC: {
name: "Seychelles",
@@ -1759,7 +1759,7 @@ export const countries = {
continent: "AF",
capital: "Victoria",
currency: ["SCR"],
- languages: ["fr", "en"]
+ languages: ["fr", "en"],
},
SD: {
name: "Sudan",
@@ -1768,7 +1768,7 @@ export const countries = {
continent: "AF",
capital: "Khartoum",
currency: ["SDG"],
- languages: ["ar", "en"]
+ languages: ["ar", "en"],
},
SE: {
name: "Sweden",
@@ -1777,7 +1777,7 @@ export const countries = {
continent: "EU",
capital: "Stockholm",
currency: ["SEK"],
- languages: ["sv"]
+ languages: ["sv"],
},
SG: {
name: "Singapore",
@@ -1786,7 +1786,7 @@ export const countries = {
continent: "AS",
capital: "Singapore",
currency: ["SGD"],
- languages: ["en", "ms", "ta", "zh"]
+ languages: ["en", "ms", "ta", "zh"],
},
SH: {
name: "Saint Helena",
@@ -1795,7 +1795,7 @@ export const countries = {
continent: "AF",
capital: "Jamestown",
currency: ["SHP"],
- languages: ["en"]
+ languages: ["en"],
},
SI: {
name: "Slovenia",
@@ -1804,7 +1804,7 @@ export const countries = {
continent: "EU",
capital: "Ljubljana",
currency: ["EUR"],
- languages: ["sl"]
+ languages: ["sl"],
},
SJ: {
name: "Svalbard and Jan Mayen",
@@ -1813,7 +1813,7 @@ export const countries = {
continent: "EU",
capital: "Longyearbyen",
currency: ["NOK"],
- languages: ["no"]
+ languages: ["no"],
},
SK: {
name: "Slovakia",
@@ -1822,7 +1822,7 @@ export const countries = {
continent: "EU",
capital: "Bratislava",
currency: ["EUR"],
- languages: ["sk"]
+ languages: ["sk"],
},
SL: {
name: "Sierra Leone",
@@ -1831,7 +1831,7 @@ export const countries = {
continent: "AF",
capital: "Freetown",
currency: ["SLL"],
- languages: ["en"]
+ languages: ["en"],
},
SM: {
name: "San Marino",
@@ -1840,7 +1840,7 @@ export const countries = {
continent: "EU",
capital: "City of San Marino",
currency: ["EUR"],
- languages: ["it"]
+ languages: ["it"],
},
SN: {
name: "Senegal",
@@ -1849,7 +1849,7 @@ export const countries = {
continent: "AF",
capital: "Dakar",
currency: ["XOF"],
- languages: ["fr"]
+ languages: ["fr"],
},
SO: {
name: "Somalia",
@@ -1858,7 +1858,7 @@ export const countries = {
continent: "AF",
capital: "Mogadishu",
currency: ["SOS"],
- languages: ["so", "ar"]
+ languages: ["so", "ar"],
},
SR: {
name: "Suriname",
@@ -1867,7 +1867,7 @@ export const countries = {
continent: "SA",
capital: "Paramaribo",
currency: ["SRD"],
- languages: ["nl"]
+ languages: ["nl"],
},
SS: {
name: "South Sudan",
@@ -1876,7 +1876,7 @@ export const countries = {
continent: "AF",
capital: "Juba",
currency: ["SSP"],
- languages: ["en"]
+ languages: ["en"],
},
ST: {
name: "Sao Tome and Principe",
@@ -1885,7 +1885,7 @@ export const countries = {
continent: "AF",
capital: "São Tomé",
currency: ["STN"],
- languages: ["pt"]
+ languages: ["pt"],
},
SV: {
name: "El Salvador",
@@ -1894,7 +1894,7 @@ export const countries = {
continent: "NA",
capital: "San Salvador",
currency: ["SVC", "USD"],
- languages: ["es"]
+ languages: ["es"],
},
SX: {
name: "Sint Maarten",
@@ -1903,7 +1903,7 @@ export const countries = {
continent: "NA",
capital: "Philipsburg",
currency: ["ANG"],
- languages: ["nl", "en"]
+ languages: ["nl", "en"],
},
SY: {
name: "Syria",
@@ -1912,7 +1912,7 @@ export const countries = {
continent: "AS",
capital: "Damascus",
currency: ["SYP"],
- languages: ["ar"]
+ languages: ["ar"],
},
SZ: {
name: "Eswatini",
@@ -1921,7 +1921,7 @@ export const countries = {
continent: "AF",
capital: "Lobamba",
currency: ["SZL"],
- languages: ["en", "ss"]
+ languages: ["en", "ss"],
},
TC: {
name: "Turks and Caicos Islands",
@@ -1930,7 +1930,7 @@ export const countries = {
continent: "NA",
capital: "Cockburn Town",
currency: ["USD"],
- languages: ["en"]
+ languages: ["en"],
},
TD: {
name: "Chad",
@@ -1939,7 +1939,7 @@ export const countries = {
continent: "AF",
capital: "N'Djamena",
currency: ["XAF"],
- languages: ["fr", "ar"]
+ languages: ["fr", "ar"],
},
TF: {
name: "French Southern Territories",
@@ -1948,7 +1948,7 @@ export const countries = {
continent: "AN",
capital: "Port-aux-Français",
currency: ["EUR"],
- languages: ["fr"]
+ languages: ["fr"],
},
TG: {
name: "Togo",
@@ -1957,7 +1957,7 @@ export const countries = {
continent: "AF",
capital: "Lomé",
currency: ["XOF"],
- languages: ["fr"]
+ languages: ["fr"],
},
TH: {
name: "Thailand",
@@ -1966,7 +1966,7 @@ export const countries = {
continent: "AS",
capital: "Bangkok",
currency: ["THB"],
- languages: ["th"]
+ languages: ["th"],
},
TJ: {
name: "Tajikistan",
@@ -1975,7 +1975,7 @@ export const countries = {
continent: "AS",
capital: "Dushanbe",
currency: ["TJS"],
- languages: ["tg", "ru"]
+ languages: ["tg", "ru"],
},
TK: {
name: "Tokelau",
@@ -1984,7 +1984,7 @@ export const countries = {
continent: "OC",
capital: "Fakaofo",
currency: ["NZD"],
- languages: ["en"]
+ languages: ["en"],
},
TL: {
name: "East Timor",
@@ -1993,7 +1993,7 @@ export const countries = {
continent: "OC",
capital: "Dili",
currency: ["USD"],
- languages: ["pt"]
+ languages: ["pt"],
},
TM: {
name: "Turkmenistan",
@@ -2002,7 +2002,7 @@ export const countries = {
continent: "AS",
capital: "Ashgabat",
currency: ["TMT"],
- languages: ["tk", "ru"]
+ languages: ["tk", "ru"],
},
TN: {
name: "Tunisia",
@@ -2011,7 +2011,7 @@ export const countries = {
continent: "AF",
capital: "Tunis",
currency: ["TND"],
- languages: ["ar"]
+ languages: ["ar"],
},
TO: {
name: "Tonga",
@@ -2020,7 +2020,7 @@ export const countries = {
continent: "OC",
capital: "Nuku'alofa",
currency: ["TOP"],
- languages: ["en", "to"]
+ languages: ["en", "to"],
},
TR: {
name: "Turkey",
@@ -2030,7 +2030,7 @@ export const countries = {
continents: ["AS", "EU"],
capital: "Ankara",
currency: ["TRY"],
- languages: ["tr"]
+ languages: ["tr"],
},
TT: {
name: "Trinidad and Tobago",
@@ -2039,7 +2039,7 @@ export const countries = {
continent: "NA",
capital: "Port of Spain",
currency: ["TTD"],
- languages: ["en"]
+ languages: ["en"],
},
TV: {
name: "Tuvalu",
@@ -2048,7 +2048,7 @@ export const countries = {
continent: "OC",
capital: "Funafuti",
currency: ["AUD"],
- languages: ["en"]
+ languages: ["en"],
},
TW: {
name: "Taiwan",
@@ -2057,7 +2057,7 @@ export const countries = {
continent: "AS",
capital: "Taipei",
currency: ["TWD"],
- languages: ["zh"]
+ languages: ["zh"],
},
TZ: {
name: "Tanzania",
@@ -2066,7 +2066,7 @@ export const countries = {
continent: "AF",
capital: "Dodoma",
currency: ["TZS"],
- languages: ["sw", "en"]
+ languages: ["sw", "en"],
},
UA: {
name: "Ukraine",
@@ -2075,7 +2075,7 @@ export const countries = {
continent: "EU",
capital: "Kyiv",
currency: ["UAH"],
- languages: ["uk"]
+ languages: ["uk"],
},
UG: {
name: "Uganda",
@@ -2084,7 +2084,7 @@ export const countries = {
continent: "AF",
capital: "Kampala",
currency: ["UGX"],
- languages: ["en", "sw"]
+ languages: ["en", "sw"],
},
UM: {
name: "U.S. Minor Outlying Islands",
@@ -2093,7 +2093,7 @@ export const countries = {
continent: "OC",
capital: "",
currency: ["USD"],
- languages: ["en"]
+ languages: ["en"],
},
US: {
name: "United States",
@@ -2102,7 +2102,7 @@ export const countries = {
continent: "NA",
capital: "Washington D.C.",
currency: ["USD", "USN", "USS"],
- languages: ["en"]
+ languages: ["en"],
},
UY: {
name: "Uruguay",
@@ -2111,7 +2111,7 @@ export const countries = {
continent: "SA",
capital: "Montevideo",
currency: ["UYI", "UYU"],
- languages: ["es"]
+ languages: ["es"],
},
UZ: {
name: "Uzbekistan",
@@ -2120,7 +2120,7 @@ export const countries = {
continent: "AS",
capital: "Tashkent",
currency: ["UZS"],
- languages: ["uz", "ru"]
+ languages: ["uz", "ru"],
},
VA: {
name: "Vatican City",
@@ -2129,7 +2129,7 @@ export const countries = {
continent: "EU",
capital: "Vatican City",
currency: ["EUR"],
- languages: ["it", "la"]
+ languages: ["it", "la"],
},
VC: {
name: "Saint Vincent and the Grenadines",
@@ -2138,7 +2138,7 @@ export const countries = {
continent: "NA",
capital: "Kingstown",
currency: ["XCD"],
- languages: ["en"]
+ languages: ["en"],
},
VE: {
name: "Venezuela",
@@ -2147,7 +2147,7 @@ export const countries = {
continent: "SA",
capital: "Caracas",
currency: ["VES"],
- languages: ["es"]
+ languages: ["es"],
},
VG: {
name: "British Virgin Islands",
@@ -2156,7 +2156,7 @@ export const countries = {
continent: "NA",
capital: "Road Town",
currency: ["USD"],
- languages: ["en"]
+ languages: ["en"],
},
VI: {
name: "U.S. Virgin Islands",
@@ -2165,7 +2165,7 @@ export const countries = {
continent: "NA",
capital: "Charlotte Amalie",
currency: ["USD"],
- languages: ["en"]
+ languages: ["en"],
},
VN: {
name: "Vietnam",
@@ -2174,7 +2174,7 @@ export const countries = {
continent: "AS",
capital: "Hanoi",
currency: ["VND"],
- languages: ["vi"]
+ languages: ["vi"],
},
VU: {
name: "Vanuatu",
@@ -2183,7 +2183,7 @@ export const countries = {
continent: "OC",
capital: "Port Vila",
currency: ["VUV"],
- languages: ["bi", "en", "fr"]
+ languages: ["bi", "en", "fr"],
},
WF: {
name: "Wallis and Futuna",
@@ -2192,7 +2192,7 @@ export const countries = {
continent: "OC",
capital: "Mata-Utu",
currency: ["XPF"],
- languages: ["fr"]
+ languages: ["fr"],
},
WS: {
name: "Samoa",
@@ -2201,7 +2201,7 @@ export const countries = {
continent: "OC",
capital: "Apia",
currency: ["WST"],
- languages: ["sm", "en"]
+ languages: ["sm", "en"],
},
XK: {
name: "Kosovo",
@@ -2211,7 +2211,7 @@ export const countries = {
capital: "Pristina",
currency: ["EUR"],
languages: ["sq", "sr"],
- userAssigned: true
+ userAssigned: true,
},
YE: {
name: "Yemen",
@@ -2220,7 +2220,7 @@ export const countries = {
continent: "AS",
capital: "Sana'a",
currency: ["YER"],
- languages: ["ar"]
+ languages: ["ar"],
},
YT: {
name: "Mayotte",
@@ -2229,7 +2229,7 @@ export const countries = {
continent: "AF",
capital: "Mamoudzou",
currency: ["EUR"],
- languages: ["fr"]
+ languages: ["fr"],
},
ZA: {
name: "South Africa",
@@ -2238,7 +2238,7 @@ export const countries = {
continent: "AF",
capital: "Pretoria",
currency: ["ZAR"],
- languages: ["af", "en", "nr", "st", "ss", "tn", "ts", "ve", "xh", "zu"]
+ languages: ["af", "en", "nr", "st", "ss", "tn", "ts", "ve", "xh", "zu"],
},
ZM: {
name: "Zambia",
@@ -2247,7 +2247,7 @@ export const countries = {
continent: "AF",
capital: "Lusaka",
currency: ["ZMW"],
- languages: ["en"]
+ languages: ["en"],
},
ZW: {
name: "Zimbabwe",
@@ -2256,6 +2256,6 @@ export const countries = {
continent: "AF",
capital: "Harare",
currency: ["USD", "ZAR", "BWP", "GBP", "AUD", "CNY", "INR", "JPY"],
- languages: ["en", "sn", "nd"]
- }
+ languages: ["en", "sn", "nd"],
+ },
};
diff --git a/apps/api/src/lib/validateUrl.test.ts b/apps/api/src/lib/validateUrl.test.ts
index 81c150fb..e417b444 100644
--- a/apps/api/src/lib/validateUrl.test.ts
+++ b/apps/api/src/lib/validateUrl.test.ts
@@ -20,7 +20,7 @@ describe("isSameDomain", () => {
it("should return true for a subdomain with different protocols", () => {
const result = isSameDomain(
"https://sub.example.com",
- "http://example.com"
+ "http://example.com",
);
expect(result).toBe(true);
});
@@ -35,7 +35,7 @@ describe("isSameDomain", () => {
it("should return true for a subdomain with www prefix", () => {
const result = isSameDomain(
"http://www.sub.example.com",
- "http://example.com"
+ "http://example.com",
);
expect(result).toBe(true);
});
@@ -43,7 +43,7 @@ describe("isSameDomain", () => {
it("should return true for the same domain with www prefix", () => {
const result = isSameDomain(
"http://docs.s.s.example.com",
- "http://example.com"
+ "http://example.com",
);
expect(result).toBe(true);
});
@@ -53,7 +53,7 @@ describe("isSameSubdomain", () => {
it("should return false for a subdomain", () => {
const result = isSameSubdomain(
"http://example.com",
- "http://docs.example.com"
+ "http://docs.example.com",
);
expect(result).toBe(false);
});
@@ -61,7 +61,7 @@ describe("isSameSubdomain", () => {
it("should return true for the same subdomain", () => {
const result = isSameSubdomain(
"http://docs.example.com",
- "http://docs.example.com"
+ "http://docs.example.com",
);
expect(result).toBe(true);
});
@@ -69,7 +69,7 @@ describe("isSameSubdomain", () => {
it("should return false for different subdomains", () => {
const result = isSameSubdomain(
"http://docs.example.com",
- "http://blog.example.com"
+ "http://blog.example.com",
);
expect(result).toBe(false);
});
@@ -89,7 +89,7 @@ describe("isSameSubdomain", () => {
it("should return true for the same subdomain with different protocols", () => {
const result = isSameSubdomain(
"https://docs.example.com",
- "http://docs.example.com"
+ "http://docs.example.com",
);
expect(result).toBe(true);
});
@@ -97,7 +97,7 @@ describe("isSameSubdomain", () => {
it("should return true for the same subdomain with www prefix", () => {
const result = isSameSubdomain(
"http://www.docs.example.com",
- "http://docs.example.com"
+ "http://docs.example.com",
);
expect(result).toBe(true);
});
@@ -105,7 +105,7 @@ describe("isSameSubdomain", () => {
it("should return false for a subdomain with www prefix and different subdomain", () => {
const result = isSameSubdomain(
"http://www.docs.example.com",
- "http://blog.example.com"
+ "http://blog.example.com",
);
expect(result).toBe(false);
});
@@ -117,7 +117,7 @@ describe("removeDuplicateUrls", () => {
"http://example.com",
"https://example.com",
"http://www.example.com",
- "https://www.example.com"
+ "https://www.example.com",
];
const result = removeDuplicateUrls(urls);
expect(result).toEqual(["https://example.com"]);
@@ -128,14 +128,14 @@ describe("removeDuplicateUrls", () => {
"https://example.com/page1",
"https://example.com/page2",
"https://example.com/page1?param=1",
- "https://example.com/page1#section1"
+ "https://example.com/page1#section1",
];
const result = removeDuplicateUrls(urls);
expect(result).toEqual([
"https://example.com/page1",
"https://example.com/page2",
"https://example.com/page1?param=1",
- "https://example.com/page1#section1"
+ "https://example.com/page1#section1",
]);
});
diff --git a/apps/api/src/lib/withAuth.ts b/apps/api/src/lib/withAuth.ts
index ab3f4d4b..a585fe0a 100644
--- a/apps/api/src/lib/withAuth.ts
+++ b/apps/api/src/lib/withAuth.ts
@@ -8,7 +8,7 @@ let warningCount = 0;
export function withAuth(
originalFunction: (...args: U) => Promise,
- mockSuccess: T
+ mockSuccess: T,
) {
return async function (...args: U): Promise {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts
index 981189ab..dc907371 100644
--- a/apps/api/src/main/runWebScraper.ts
+++ b/apps/api/src/main/runWebScraper.ts
@@ -2,7 +2,7 @@ import { Job } from "bullmq";
import {
WebScraperOptions,
RunWebScraperParams,
- RunWebScraperResult
+ RunWebScraperResult,
} from "../types";
import { billTeam } from "../services/billing/credit_billing";
import { Document } from "../controllers/v1/types";
@@ -13,14 +13,14 @@ import { configDotenv } from "dotenv";
import {
EngineResultsTracker,
scrapeURL,
- ScrapeUrlResponse
+ ScrapeUrlResponse,
} from "../scraper/scrapeURL";
import { Engine } from "../scraper/scrapeURL/engines";
configDotenv();
export async function startWebScraperPipeline({
job,
- token
+ token,
}: {
job: Job & { id: string };
token: string;
@@ -32,9 +32,9 @@ export async function startWebScraperPipeline({
...job.data.scrapeOptions,
...(job.data.crawl_id
? {
- formats: job.data.scrapeOptions.formats.concat(["rawHtml"])
+ formats: job.data.scrapeOptions.formats.concat(["rawHtml"]),
}
- : {})
+ : {}),
},
internalOptions: job.data.internalOptions,
// onSuccess: (result, mode) => {
@@ -48,7 +48,7 @@ export async function startWebScraperPipeline({
team_id: job.data.team_id,
bull_job_id: job.id.toString(),
priority: job.opts.priority,
- is_scrape: job.data.is_scrape ?? false
+ is_scrape: job.data.is_scrape ?? false,
});
}
@@ -62,14 +62,14 @@ export async function runWebScraper({
team_id,
bull_job_id,
priority,
- is_scrape = false
+ is_scrape = false,
}: RunWebScraperParams): Promise {
let response: ScrapeUrlResponse | undefined = undefined;
let engines: EngineResultsTracker = {};
try {
response = await scrapeURL(bull_job_id, url, scrapeOptions, {
priority,
- ...internalOptions
+ ...internalOptions,
});
if (!response.success) {
if (response.error instanceof Error) {
@@ -81,7 +81,7 @@ export async function runWebScraper({
? JSON.stringify(response.error)
: typeof response.error === "object"
? JSON.stringify({ ...response.error })
- : response.error)
+ : response.error),
);
}
}
@@ -94,7 +94,7 @@ export async function runWebScraper({
billTeam(team_id, undefined, creditsToBeBilled).catch((error) => {
logger.error(
- `Failed to bill team ${team_id} for ${creditsToBeBilled} credits: ${error}`
+ `Failed to bill team ${team_id} for ${creditsToBeBilled} credits: ${error}`,
);
// Optionally, you could notify an admin or add to a retry queue here
});
@@ -117,14 +117,14 @@ export async function runWebScraper({
return {
...response,
success: false,
- error
+ error,
};
} else {
return {
success: false,
error,
logs: ["no logs -- error coming from runWebScraper"],
- engines
+ engines,
};
}
// onError(error);
@@ -154,8 +154,8 @@ export async function runWebScraper({
: result.state === "timeout"
? "Timed out"
: undefined,
- time_taken: result.finishedAt - result.startedAt
- }
+ time_taken: result.finishedAt - result.startedAt,
+ },
});
}
}
@@ -166,7 +166,7 @@ const saveJob = async (
result: any,
token: string,
mode: string,
- engines?: EngineResultsTracker
+ engines?: EngineResultsTracker,
) => {
try {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
diff --git a/apps/api/src/routes/admin.ts b/apps/api/src/routes/admin.ts
index 861ae9fc..ec9967b8 100644
--- a/apps/api/src/routes/admin.ts
+++ b/apps/api/src/routes/admin.ts
@@ -4,7 +4,7 @@ import {
autoscalerController,
checkQueuesController,
cleanBefore24hCompleteJobsController,
- queuesController
+ queuesController,
} from "../controllers/v0/admin/queue";
import { wrap } from "./v1";
import { acucCacheClearController } from "../controllers/v0/admin/acuc-cache-clear";
@@ -13,27 +13,27 @@ export const adminRouter = express.Router();
adminRouter.get(
`/admin/${process.env.BULL_AUTH_KEY}/redis-health`,
- redisHealthController
+ redisHealthController,
);
adminRouter.get(
`/admin/${process.env.BULL_AUTH_KEY}/clean-before-24h-complete-jobs`,
- cleanBefore24hCompleteJobsController
+ cleanBefore24hCompleteJobsController,
);
adminRouter.get(
`/admin/${process.env.BULL_AUTH_KEY}/check-queues`,
- checkQueuesController
+ checkQueuesController,
);
adminRouter.get(`/admin/${process.env.BULL_AUTH_KEY}/queues`, queuesController);
adminRouter.get(
`/admin/${process.env.BULL_AUTH_KEY}/autoscaler`,
- autoscalerController
+ autoscalerController,
);
adminRouter.post(
`/admin/${process.env.BULL_AUTH_KEY}/acuc-cache-clear`,
- wrap(acucCacheClearController)
+ wrap(acucCacheClearController),
);
diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts
index a9727e00..5daa077b 100644
--- a/apps/api/src/routes/v1.ts
+++ b/apps/api/src/routes/v1.ts
@@ -8,7 +8,7 @@ import {
ErrorResponse,
RequestWithACUC,
RequestWithAuth,
- RequestWithMaybeAuth
+ RequestWithMaybeAuth,
} from "../controllers/v1/types";
import { RateLimiterMode } from "../types";
import { authenticateUser } from "../controllers/auth";
@@ -33,7 +33,7 @@ import { extractController } from "../controllers/v1/extract";
// import { readinessController } from "../controllers/v1/readiness";
function checkCreditsMiddleware(
- minimum?: number
+ minimum?: number,
): (req: RequestWithAuth, res: Response, next: NextFunction) => void {
return (req, res, next) => {
(async () => {
@@ -44,20 +44,20 @@ function checkCreditsMiddleware(
const { success, remainingCredits, chunk } = await checkTeamCredits(
req.acuc,
req.auth.team_id,
- minimum ?? 1
+ minimum ?? 1,
);
if (chunk) {
req.acuc = chunk;
}
if (!success) {
logger.error(
- `Insufficient credits: ${JSON.stringify({ team_id: req.auth.team_id, minimum, remainingCredits })}`
+ `Insufficient credits: ${JSON.stringify({ team_id: req.auth.team_id, minimum, remainingCredits })}`,
);
if (!res.headersSent) {
return res.status(402).json({
success: false,
error:
- "Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing or try changing the request limit to a lower value."
+ "Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing or try changing the request limit to a lower value.",
});
}
}
@@ -68,7 +68,7 @@ function checkCreditsMiddleware(
}
export function authMiddleware(
- rateLimiterMode: RateLimiterMode
+ rateLimiterMode: RateLimiterMode,
): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void {
return (req, res, next) => {
(async () => {
@@ -99,7 +99,7 @@ export function authMiddleware(
function idempotencyMiddleware(
req: Request,
res: Response,
- next: NextFunction
+ next: NextFunction,
) {
(async () => {
if (req.headers["x-idempotency-key"]) {
@@ -123,7 +123,7 @@ function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
return res.status(403).json({
success: false,
error:
- "URL is blocked intentionally. Firecrawl currently does not support social media scraping due to policy restrictions."
+ "URL is blocked intentionally. Firecrawl currently does not support social media scraping due to policy restrictions.",
});
}
}
@@ -131,7 +131,7 @@ function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
}
export function wrap(
- controller: (req: Request, res: Response) => Promise
+ controller: (req: Request, res: Response) => Promise,
): (req: Request, res: Response, next: NextFunction) => any {
return (req, res, next) => {
controller(req, res).catch((err) => next(err));
@@ -147,7 +147,7 @@ v1Router.post(
authMiddleware(RateLimiterMode.Scrape),
checkCreditsMiddleware(1),
blocklistMiddleware,
- wrap(scrapeController)
+ wrap(scrapeController),
);
v1Router.post(
@@ -156,7 +156,7 @@ v1Router.post(
checkCreditsMiddleware(),
blocklistMiddleware,
idempotencyMiddleware,
- wrap(crawlController)
+ wrap(crawlController),
);
v1Router.post(
@@ -165,7 +165,7 @@ v1Router.post(
checkCreditsMiddleware(),
blocklistMiddleware,
idempotencyMiddleware,
- wrap(batchScrapeController)
+ wrap(batchScrapeController),
);
v1Router.post(
@@ -173,20 +173,20 @@ v1Router.post(
authMiddleware(RateLimiterMode.Map),
checkCreditsMiddleware(1),
blocklistMiddleware,
- wrap(mapController)
+ wrap(mapController),
);
v1Router.get(
"/crawl/:jobId",
authMiddleware(RateLimiterMode.CrawlStatus),
- wrap(crawlStatusController)
+ wrap(crawlStatusController),
);
v1Router.get(
"/batch/scrape/:jobId",
authMiddleware(RateLimiterMode.CrawlStatus),
// Yes, it uses the same controller as the normal crawl status controller
- wrap((req: any, res): any => crawlStatusController(req, res, true))
+ wrap((req: any, res): any => crawlStatusController(req, res, true)),
);
v1Router.get("/scrape/:jobId", wrap(scrapeStatusController));
@@ -194,7 +194,7 @@ v1Router.get("/scrape/:jobId", wrap(scrapeStatusController));
v1Router.get(
"/concurrency-check",
authMiddleware(RateLimiterMode.CrawlStatus),
- wrap(concurrencyCheckController)
+ wrap(concurrencyCheckController),
);
v1Router.ws("/crawl/:jobId", crawlStatusWSController);
@@ -203,7 +203,7 @@ v1Router.post(
"/extract",
authMiddleware(RateLimiterMode.Scrape),
checkCreditsMiddleware(1),
- wrap(extractController)
+ wrap(extractController),
);
// v1Router.post("/crawlWebsitePreview", crawlPreviewController);
@@ -211,7 +211,7 @@ v1Router.post(
v1Router.delete(
"/crawl/:jobId",
authMiddleware(RateLimiterMode.CrawlStatus),
- crawlCancelController
+ crawlCancelController,
);
// v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController);
diff --git a/apps/api/src/run-req.ts b/apps/api/src/run-req.ts
index 61ee61bd..a7f4694a 100644
--- a/apps/api/src/run-req.ts
+++ b/apps/api/src/run-req.ts
@@ -18,20 +18,20 @@ async function sendCrawl(result: Result): Promise {
{
url: url,
crawlerOptions: {
- limit: 75
+ limit: 75,
},
pageOptions: {
includeHtml: true,
replaceAllPathsWithAbsolutePaths: true,
- waitFor: 1000
- }
+ waitFor: 1000,
+ },
},
{
headers: {
"Content-Type": "application/json",
- Authorization: `Bearer `
- }
- }
+ Authorization: `Bearer `,
+ },
+ },
);
result.idempotency_key = idempotencyKey;
return response.data.jobId;
@@ -51,9 +51,9 @@ async function getContent(result: Result): Promise {
{
headers: {
"Content-Type": "application/json",
- Authorization: `Bearer `
- }
- }
+ Authorization: `Bearer `,
+ },
+ },
);
if (response.data.status === "completed") {
result.result_data_jsonb = response.data.data;
@@ -97,11 +97,11 @@ async function processResults(results: Result[]): Promise {
// Save job id along with the start_url
const resultWithJobId = results.map((r) => ({
start_url: r.start_url,
- job_id: r.job_id
+ job_id: r.job_id,
}));
await fs.writeFile(
"results_with_job_id_4000_6000.json",
- JSON.stringify(resultWithJobId, null, 4)
+ JSON.stringify(resultWithJobId, null, 4),
);
} catch (error) {
console.error("Error writing to results_with_content.json:", error);
diff --git a/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts b/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts
index da2b7d61..897ea46c 100644
--- a/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts
+++ b/apps/api/src/scraper/WebScraper/__tests__/crawler.test.ts
@@ -32,7 +32,7 @@ describe("WebCrawler", () => {
getMatchingLineNumber: jest.fn().mockReturnValue(0),
getCrawlDelay: jest.fn().mockReturnValue(0),
getSitemaps: jest.fn().mockReturnValue([]),
- getPreferredHost: jest.fn().mockReturnValue("example.com")
+ getPreferredHost: jest.fn().mockReturnValue("example.com"),
});
});
@@ -46,7 +46,7 @@ describe("WebCrawler", () => {
includes: [],
excludes: [],
limit: limit, // Apply the limit
- maxCrawledDepth: 10
+ maxCrawledDepth: 10,
});
// Mock sitemap fetching function to return more links than the limit
@@ -56,7 +56,7 @@ describe("WebCrawler", () => {
initialUrl,
initialUrl + "/page1",
initialUrl + "/page2",
- initialUrl + "/page3"
+ initialUrl + "/page3",
]);
const filteredLinks = crawler["filterLinks"](
@@ -64,10 +64,10 @@ describe("WebCrawler", () => {
initialUrl,
initialUrl + "/page1",
initialUrl + "/page2",
- initialUrl + "/page3"
+ initialUrl + "/page3",
],
limit,
- 10
+ 10,
);
expect(filteredLinks.length).toBe(limit); // Check if the number of results respects the limit
diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts
index be3cdf72..19b0b5b4 100644
--- a/apps/api/src/scraper/WebScraper/crawler.ts
+++ b/apps/api/src/scraper/WebScraper/crawler.ts
@@ -40,7 +40,7 @@ export class WebCrawler {
allowBackwardCrawling = false,
allowExternalContentLinks = false,
allowSubdomains = false,
- ignoreRobotsTxt = false
+ ignoreRobotsTxt = false,
}: {
jobId: string;
initialUrl: string;
@@ -79,7 +79,7 @@ export class WebCrawler {
sitemapLinks: string[],
limit: number,
maxDepth: number,
- fromMap: boolean = false
+ fromMap: boolean = false,
): string[] {
// If the initial URL is a sitemap.xml, skip filtering
if (this.initialUrl.endsWith("sitemap.xml") && fromMap) {
@@ -95,7 +95,7 @@ export class WebCrawler {
this.logger.debug(`Error processing link: ${link}`, {
link,
error,
- method: "filterLinks"
+ method: "filterLinks",
});
return false;
}
@@ -112,7 +112,7 @@ export class WebCrawler {
if (this.excludes.length > 0 && this.excludes[0] !== "") {
if (
this.excludes.some((excludePattern) =>
- new RegExp(excludePattern).test(path)
+ new RegExp(excludePattern).test(path),
)
) {
return false;
@@ -123,7 +123,7 @@ export class WebCrawler {
if (this.includes.length > 0 && this.includes[0] !== "") {
if (
!this.includes.some((includePattern) =>
- new RegExp(includePattern).test(path)
+ new RegExp(includePattern).test(path),
)
) {
return false;
@@ -140,7 +140,7 @@ export class WebCrawler {
}
const initialHostname = normalizedInitialUrl.hostname.replace(
/^www\./,
- ""
+ "",
);
const linkHostname = normalizedLink.hostname.replace(/^www\./, "");
@@ -165,7 +165,7 @@ export class WebCrawler {
if (!isAllowed) {
this.logger.debug(`Link disallowed by robots.txt: ${link}`, {
method: "filterLinks",
- link
+ link,
});
return false;
}
@@ -183,12 +183,12 @@ export class WebCrawler {
let extraArgs = {};
if (skipTlsVerification) {
extraArgs["httpsAgent"] = new https.Agent({
- rejectUnauthorized: false
+ rejectUnauthorized: false,
});
}
const response = await axios.get(this.robotsTxtUrl, {
timeout: axiosTimeout,
- ...extraArgs
+ ...extraArgs,
});
return response.data;
}
@@ -199,10 +199,10 @@ export class WebCrawler {
public async tryGetSitemap(
fromMap: boolean = false,
- onlySitemap: boolean = false
+ onlySitemap: boolean = false,
): Promise<{ url: string; html: string }[] | null> {
this.logger.debug(`Fetching sitemap links from ${this.initialUrl}`, {
- method: "tryGetSitemap"
+ method: "tryGetSitemap",
});
const sitemapLinks = await this.tryFetchSitemapLinks(this.initialUrl);
if (fromMap && onlySitemap) {
@@ -213,7 +213,7 @@ export class WebCrawler {
sitemapLinks,
this.limit,
this.maxCrawledDepth,
- fromMap
+ fromMap,
);
return filteredLinks.map((link) => ({ url: link, html: "" }));
}
@@ -303,7 +303,7 @@ export class WebCrawler {
private isRobotsAllowed(
url: string,
- ignoreRobotsTxt: boolean = false
+ ignoreRobotsTxt: boolean = false,
): boolean {
return ignoreRobotsTxt
? true
@@ -352,7 +352,7 @@ export class WebCrawler {
url
.split("/")
.slice(3)
- .filter((subArray) => subArray.length > 0).length
+ .filter((subArray) => subArray.length > 0).length,
);
}
@@ -373,7 +373,7 @@ export class WebCrawler {
private isSubdomain(link: string): boolean {
return new URL(link, this.baseUrl).hostname.endsWith(
- "." + new URL(this.baseUrl).hostname.split(".").slice(-2).join(".")
+ "." + new URL(this.baseUrl).hostname.split(".").slice(-2).join("."),
);
}
@@ -405,7 +405,7 @@ export class WebCrawler {
".ttf",
".woff2",
".webp",
- ".inc"
+ ".inc",
];
try {
@@ -414,7 +414,7 @@ export class WebCrawler {
} catch (error) {
this.logger.error(`Error processing URL in isFile`, {
method: "isFile",
- error
+ error,
});
return false;
}
@@ -431,7 +431,7 @@ export class WebCrawler {
"github.com",
"calendly.com",
"discord.gg",
- "discord.com"
+ "discord.com",
];
return socialMediaOrEmail.some((ext) => url.includes(ext));
}
@@ -457,14 +457,14 @@ export class WebCrawler {
} catch (error) {
this.logger.debug(
`Failed to fetch sitemap with axios from ${sitemapUrl}`,
- { method: "tryFetchSitemapLinks", sitemapUrl, error }
+ { method: "tryFetchSitemapLinks", sitemapUrl, error },
);
if (error instanceof AxiosError && error.response?.status === 404) {
// ignore 404
} else {
const response = await getLinksFromSitemap(
{ sitemapUrl, mode: "fire-engine" },
- this.logger
+ this.logger,
);
if (response) {
sitemapLinks = response;
@@ -476,26 +476,26 @@ export class WebCrawler {
const baseUrlSitemap = `${this.baseUrl}/sitemap.xml`;
try {
const response = await axios.get(baseUrlSitemap, {
- timeout: axiosTimeout
+ timeout: axiosTimeout,
});
if (response.status === 200) {
sitemapLinks = await getLinksFromSitemap(
{ sitemapUrl: baseUrlSitemap, mode: "fire-engine" },
- this.logger
+ this.logger,
);
}
} catch (error) {
this.logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}`, {
method: "tryFetchSitemapLinks",
sitemapUrl: baseUrlSitemap,
- error
+ error,
});
if (error instanceof AxiosError && error.response?.status === 404) {
// ignore 404
} else {
sitemapLinks = await getLinksFromSitemap(
{ sitemapUrl: baseUrlSitemap, mode: "fire-engine" },
- this.logger
+ this.logger,
);
}
}
@@ -503,7 +503,7 @@ export class WebCrawler {
const normalizedUrl = normalizeUrl(url);
const normalizedSitemapLinks = sitemapLinks.map((link) =>
- normalizeUrl(link)
+ normalizeUrl(link),
);
// has to be greater than 0 to avoid adding the initial URL to the sitemap links, and preventing crawler to crawl
if (
diff --git a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts
index ba77b78b..01c40de9 100644
--- a/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts
+++ b/apps/api/src/scraper/WebScraper/custom/handleCustomScraping.ts
@@ -2,7 +2,7 @@ import { logger } from "../../../lib/logger";
export async function handleCustomScraping(
text: string,
- url: string
+ url: string,
): Promise<{
scraper: string;
url: string;
@@ -15,7 +15,7 @@ export async function handleCustomScraping(
!url.includes("developers.notion.com")
) {
logger.debug(
- `Special use case detected for ${url}, using Fire Engine with wait time 1000ms`
+ `Special use case detected for ${url}, using Fire Engine with wait time 1000ms`,
);
return {
scraper: "fire-engine",
@@ -23,21 +23,21 @@ export async function handleCustomScraping(
waitAfterLoad: 1000,
pageOptions: {
scrollXPaths: [
- '//*[@id="ReferencePlayground"]/section[3]/div/pre/div/div/div[5]'
- ]
- }
+ '//*[@id="ReferencePlayground"]/section[3]/div/pre/div/div/div[5]',
+ ],
+ },
};
}
// Check for Vanta security portals
if (text.includes(' {
try {
let content: string = "";
@@ -29,7 +29,7 @@ export async function getLinksFromSitemap(
"sitemap",
sitemapUrl,
scrapeOptions.parse({ formats: ["rawHtml"] }),
- { forceEngine: "fire-engine;tlsclient", v0DisableJsDom: true }
+ { forceEngine: "fire-engine;tlsclient", v0DisableJsDom: true },
);
if (!response.success) {
throw response.error;
@@ -41,7 +41,7 @@ export async function getLinksFromSitemap(
method: "getLinksFromSitemap",
mode,
sitemapUrl,
- error
+ error,
});
return allUrls;
@@ -56,8 +56,8 @@ export async function getLinksFromSitemap(
.map((sitemap) =>
getLinksFromSitemap(
{ sitemapUrl: sitemap.loc[0], allUrls, mode },
- logger
- )
+ logger,
+ ),
);
await Promise.all(sitemapPromises);
} else if (root && root.url) {
@@ -66,7 +66,7 @@ export async function getLinksFromSitemap(
(url) =>
url.loc &&
url.loc.length > 0 &&
- !WebCrawler.prototype.isFile(url.loc[0])
+ !WebCrawler.prototype.isFile(url.loc[0]),
)
.map((url) => url.loc[0]);
allUrls.push(...validUrls);
@@ -76,7 +76,7 @@ export async function getLinksFromSitemap(
method: "getLinksFromSitemap",
mode,
sitemapUrl,
- error
+ error,
});
}
@@ -85,12 +85,12 @@ export async function getLinksFromSitemap(
export const fetchSitemapData = async (
url: string,
- timeout?: number
+ timeout?: number,
): Promise => {
const sitemapUrl = url.endsWith("/sitemap.xml") ? url : `${url}/sitemap.xml`;
try {
const response = await axios.get(sitemapUrl, {
- timeout: timeout || axiosTimeout
+ timeout: timeout || axiosTimeout,
});
if (response.status === 200) {
const xml = response.data;
diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts
index d256aa44..d3963685 100644
--- a/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts
+++ b/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts
@@ -15,7 +15,7 @@ describe("Blocklist Functionality", () => {
"https://flickr.com/photos/johndoe",
"https://whatsapp.com/download",
"https://wechat.com/features",
- "https://telegram.org/apps"
+ "https://telegram.org/apps",
])("should return true for blocklisted URL %s", (url) => {
expect(isUrlBlocked(url)).toBe(true);
});
@@ -33,7 +33,7 @@ describe("Blocklist Functionality", () => {
"https://flickr.com/help/terms",
"https://whatsapp.com/legal",
"https://wechat.com/en/privacy-policy",
- "https://telegram.org/tos"
+ "https://telegram.org/tos",
])("should return false for allowed URLs with keywords %s", (url) => {
expect(isUrlBlocked(url)).toBe(false);
});
@@ -54,35 +54,35 @@ describe("Blocklist Functionality", () => {
"https://facebook.com.someotherdomain.com",
"https://www.facebook.com/profile",
"https://api.twitter.com/info",
- "https://instagram.com/accounts/login"
+ "https://instagram.com/accounts/login",
])(
"should return true for URLs with blocklisted domains in subdomains or paths %s",
(url) => {
expect(isUrlBlocked(url)).toBe(true);
- }
+ },
);
test.each([
"https://example.com/facebook.com",
"https://example.com/redirect?url=https://twitter.com",
- "https://facebook.com.policy.example.com"
+ "https://facebook.com.policy.example.com",
])(
"should return false for URLs where blocklisted domain is part of another domain or path %s",
(url) => {
expect(isUrlBlocked(url)).toBe(false);
- }
+ },
);
test.each(["https://FACEBOOK.com", "https://INSTAGRAM.com/@something"])(
"should handle case variations %s",
(url) => {
expect(isUrlBlocked(url)).toBe(true);
- }
+ },
);
test.each([
"https://facebook.com?redirect=https://example.com",
- "https://twitter.com?query=something"
+ "https://twitter.com?query=something",
])("should handle query parameters %s", (url) => {
expect(isUrlBlocked(url)).toBe(true);
});
diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts
index e60943e6..58fcade4 100644
--- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts
+++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts
@@ -18,7 +18,7 @@ const socialMediaBlocklist = [
"youtube.com",
"corterix.com",
"southwest.com",
- "ryanair.com"
+ "ryanair.com",
];
const allowedKeywords = [
@@ -41,7 +41,7 @@ const allowedKeywords = [
"://library.tiktok.com",
"://ads.tiktok.com",
"://tiktok.com/business",
- "://developers.facebook.com"
+ "://developers.facebook.com",
];
export function isUrlBlocked(url: string): boolean {
@@ -50,7 +50,7 @@ export function isUrlBlocked(url: string): boolean {
// Check if the URL contains any allowed keywords as whole words
if (
allowedKeywords.some((keyword) =>
- new RegExp(`\\b${keyword}\\b`, "i").test(lowerCaseUrl)
+ new RegExp(`\\b${keyword}\\b`, "i").test(lowerCaseUrl),
)
) {
return false;
@@ -68,7 +68,7 @@ export function isUrlBlocked(url: string): boolean {
const isBlocked = socialMediaBlocklist.some((domain) => {
const domainPattern = new RegExp(
`(^|\\.)${domain.replace(".", "\\.")}(\\.|$)`,
- "i"
+ "i",
);
return domainPattern.test(hostname);
});
diff --git a/apps/api/src/scraper/WebScraper/utils/maxDepthUtils.ts b/apps/api/src/scraper/WebScraper/utils/maxDepthUtils.ts
index 3db7c5c1..a58f9c4e 100644
--- a/apps/api/src/scraper/WebScraper/utils/maxDepthUtils.ts
+++ b/apps/api/src/scraper/WebScraper/utils/maxDepthUtils.ts
@@ -1,6 +1,6 @@
export function getAdjustedMaxDepth(
url: string,
- maxCrawlDepth: number
+ maxCrawlDepth: number,
): number {
const baseURLDepth = getURLDepth(url);
const adjustedMaxDepth = maxCrawlDepth + baseURLDepth;
diff --git a/apps/api/src/scraper/scrapeURL/engines/cache/index.ts b/apps/api/src/scraper/scrapeURL/engines/cache/index.ts
index f6ffcb13..f48806fd 100644
--- a/apps/api/src/scraper/scrapeURL/engines/cache/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/cache/index.ts
@@ -14,6 +14,6 @@ export async function scrapeCache(meta: Meta): Promise {
url: entry.url,
html: entry.html,
statusCode: entry.statusCode,
- error: entry.error
+ error: entry.error,
};
}
diff --git a/apps/api/src/scraper/scrapeURL/engines/docx/index.ts b/apps/api/src/scraper/scrapeURL/engines/docx/index.ts
index 02ed0c3f..933d4d74 100644
--- a/apps/api/src/scraper/scrapeURL/engines/docx/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/docx/index.ts
@@ -10,6 +10,6 @@ export async function scrapeDOCX(meta: Meta): Promise {
url: response.url,
statusCode: response.status,
- html: (await mammoth.convertToHtml({ path: tempFilePath })).value
+ html: (await mammoth.convertToHtml({ path: tempFilePath })).value,
};
}
diff --git a/apps/api/src/scraper/scrapeURL/engines/fetch/index.ts b/apps/api/src/scraper/scrapeURL/engines/fetch/index.ts
index 92f2d451..af6f57c0 100644
--- a/apps/api/src/scraper/scrapeURL/engines/fetch/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fetch/index.ts
@@ -4,33 +4,33 @@ import { TimeoutError } from "../../error";
import { specialtyScrapeCheck } from "../utils/specialtyHandler";
export async function scrapeURLWithFetch(
- meta: Meta
+ meta: Meta,
): Promise {
const timeout = 20000;
const response = await Promise.race([
fetch(meta.url, {
redirect: "follow",
- headers: meta.options.headers
+ headers: meta.options.headers,
}),
(async () => {
await new Promise((resolve) => setTimeout(() => resolve(null), timeout));
throw new TimeoutError(
"Fetch was unable to scrape the page before timing out",
- { cause: { timeout } }
+ { cause: { timeout } },
);
- })()
+ })(),
]);
specialtyScrapeCheck(
meta.logger.child({ method: "scrapeURLWithFetch/specialtyScrapeCheck" }),
- Object.fromEntries(response.headers as any)
+ Object.fromEntries(response.headers as any),
);
return {
url: response.url,
html: await response.text(),
- statusCode: response.status
+ statusCode: response.status,
// TODO: error?
};
}
diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts
index c3742d26..328931ba 100644
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts
@@ -31,10 +31,10 @@ const successSchema = z.object({
actionContent: z
.object({
url: z.string(),
- html: z.string()
+ html: z.string(),
})
.array()
- .optional()
+ .optional(),
});
export type FireEngineCheckStatusSuccess = z.infer;
@@ -47,16 +47,16 @@ const processingSchema = z.object({
"waiting",
"waiting-children",
"unknown",
- "prioritized"
+ "prioritized",
]),
- processing: z.boolean()
+ processing: z.boolean(),
});
const failedSchema = z.object({
jobId: z.string(),
state: z.literal("failed"),
processing: z.literal(false),
- error: z.string()
+ error: z.string(),
});
export class StillProcessingError extends Error {
@@ -67,7 +67,7 @@ export class StillProcessingError extends Error {
export async function fireEngineCheckStatus(
logger: Logger,
- jobId: string
+ jobId: string,
): Promise {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
@@ -75,8 +75,8 @@ export async function fireEngineCheckStatus(
{
name: "fire-engine: Check status",
attributes: {
- jobId
- }
+ jobId,
+ },
},
async (span) => {
return await robustFetch({
@@ -87,12 +87,12 @@ export async function fireEngineCheckStatus(
...(Sentry.isInitialized()
? {
"sentry-trace": Sentry.spanToTraceHeader(span),
- baggage: Sentry.spanToBaggageHeader(span)
+ baggage: Sentry.spanToBaggageHeader(span),
}
- : {})
- }
+ : {}),
+ },
});
- }
+ },
);
const successParse = successSchema.safeParse(status);
@@ -115,23 +115,23 @@ export async function fireEngineCheckStatus(
throw new EngineError("Scrape job failed", {
cause: {
status,
- jobId
- }
+ jobId,
+ },
});
}
} else {
logger.debug("Check status returned response not matched by any schema", {
status,
- jobId
+ jobId,
});
throw new Error(
"Check status returned response not matched by any schema",
{
cause: {
status,
- jobId
- }
- }
+ jobId,
+ },
+ },
);
}
}
diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/delete.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/delete.ts
index 96d73390..d5fe58cb 100644
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/delete.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/delete.ts
@@ -10,8 +10,8 @@ export async function fireEngineDelete(logger: Logger, jobId: string) {
{
name: "fire-engine: Delete scrape",
attributes: {
- jobId
- }
+ jobId,
+ },
},
async (span) => {
await robustFetch({
@@ -21,15 +21,15 @@ export async function fireEngineDelete(logger: Logger, jobId: string) {
...(Sentry.isInitialized()
? {
"sentry-trace": Sentry.spanToTraceHeader(span),
- baggage: Sentry.spanToBaggageHeader(span)
+ baggage: Sentry.spanToBaggageHeader(span),
}
- : {})
+ : {}),
},
ignoreResponse: true,
ignoreFailure: true,
- logger: logger.child({ method: "fireEngineDelete/robustFetch", jobId })
+ logger: logger.child({ method: "fireEngineDelete/robustFetch", jobId }),
});
- }
+ },
);
// We do not care whether this fails or not.
diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
index 851b8faf..3fc32835 100644
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
@@ -5,13 +5,13 @@ import {
FireEngineScrapeRequestChromeCDP,
FireEngineScrapeRequestCommon,
FireEngineScrapeRequestPlaywright,
- FireEngineScrapeRequestTLSClient
+ FireEngineScrapeRequestTLSClient,
} from "./scrape";
import { EngineScrapeResult } from "..";
import {
fireEngineCheckStatus,
FireEngineCheckStatusSuccess,
- StillProcessingError
+ StillProcessingError,
} from "./checkStatus";
import { EngineError, SiteError, TimeoutError } from "../../error";
import * as Sentry from "@sentry/node";
@@ -27,15 +27,15 @@ async function performFireEngineScrape<
Engine extends
| FireEngineScrapeRequestChromeCDP
| FireEngineScrapeRequestPlaywright
- | FireEngineScrapeRequestTLSClient
+ | FireEngineScrapeRequestTLSClient,
>(
logger: Logger,
request: FireEngineScrapeRequestCommon & Engine,
- timeout = defaultTimeout
+ timeout = defaultTimeout,
): Promise {
const scrape = await fireEngineScrape(
logger.child({ method: "fireEngineScrape" }),
- request
+ request,
);
const startTime = Date.now();
@@ -47,25 +47,25 @@ async function performFireEngineScrape<
if (errors.length >= errorLimit) {
logger.error("Error limit hit.", { errors });
throw new Error("Error limit hit. See e.cause.errors for errors.", {
- cause: { errors }
+ cause: { errors },
});
}
if (Date.now() - startTime > timeout) {
logger.info(
"Fire-engine was unable to scrape the page before timing out.",
- { errors, timeout }
+ { errors, timeout },
);
throw new TimeoutError(
"Fire-engine was unable to scrape the page before timing out",
- { cause: { errors, timeout } }
+ { cause: { errors, timeout } },
);
}
try {
status = await fireEngineCheckStatus(
logger.child({ method: "fireEngineCheckStatus" }),
- scrape.jobId
+ scrape.jobId,
);
} catch (error) {
if (error instanceof StillProcessingError) {
@@ -73,7 +73,7 @@ async function performFireEngineScrape<
} else if (error instanceof EngineError || error instanceof SiteError) {
logger.debug("Fire-engine scrape job failed.", {
error,
- jobId: scrape.jobId
+ jobId: scrape.jobId,
});
throw error;
} else {
@@ -81,7 +81,7 @@ async function performFireEngineScrape<
errors.push(error);
logger.debug(
`An unexpeceted error occurred while calling checkStatus. Error counter is now at ${errors.length}.`,
- { error, jobId: scrape.jobId }
+ { error, jobId: scrape.jobId },
);
}
}
@@ -93,7 +93,7 @@ async function performFireEngineScrape<
}
export async function scrapeURLWithFireEngineChromeCDP(
- meta: Meta
+ meta: Meta,
): Promise {
const actions: Action[] = [
// Transform waitFor option into an action (unsupported by chrome-cdp)
@@ -101,8 +101,8 @@ export async function scrapeURLWithFireEngineChromeCDP(
? [
{
type: "wait" as const,
- milliseconds: meta.options.waitFor
- }
+ milliseconds: meta.options.waitFor,
+ },
]
: []),
@@ -112,13 +112,13 @@ export async function scrapeURLWithFireEngineChromeCDP(
? [
{
type: "screenshot" as const,
- fullPage: meta.options.formats.includes("screenshot@fullPage")
- }
+ fullPage: meta.options.formats.includes("screenshot@fullPage"),
+ },
]
: []),
// Include specified actions
- ...(meta.options.actions ?? [])
+ ...(meta.options.actions ?? []),
];
const request: FireEngineScrapeRequestCommon &
@@ -130,36 +130,36 @@ export async function scrapeURLWithFireEngineChromeCDP(
headers: meta.options.headers,
...(actions.length > 0
? {
- actions
+ actions,
}
: {}),
priority: meta.internalOptions.priority,
geolocation: meta.options.geolocation,
mobile: meta.options.mobile,
timeout: meta.options.timeout === undefined ? 300000 : undefined, // TODO: better timeout logic
- disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache
+ disableSmartWaitCache: meta.internalOptions.disableSmartWaitCache,
// TODO: scrollXPaths
};
const totalWait = actions.reduce(
(a, x) => (x.type === "wait" ? (x.milliseconds ?? 1000) + a : a),
- 0
+ 0,
);
let response = await performFireEngineScrape(
meta.logger.child({
method: "scrapeURLWithFireEngineChromeCDP/callFireEngine",
- request
+ request,
}),
request,
- meta.options.timeout !== undefined ? defaultTimeout + totalWait : Infinity // TODO: better timeout handling
+ meta.options.timeout !== undefined ? defaultTimeout + totalWait : Infinity, // TODO: better timeout handling
);
specialtyScrapeCheck(
meta.logger.child({
- method: "scrapeURLWithFireEngineChromeCDP/specialtyScrapeCheck"
+ method: "scrapeURLWithFireEngineChromeCDP/specialtyScrapeCheck",
}),
- response.responseHeaders
+ response.responseHeaders,
);
if (
@@ -168,20 +168,20 @@ export async function scrapeURLWithFireEngineChromeCDP(
) {
meta.logger.debug(
"Transforming screenshots from actions into screenshot field",
- { screenshots: response.screenshots }
+ { screenshots: response.screenshots },
);
response.screenshot = (response.screenshots ?? [])[0];
(response.screenshots ?? []).splice(0, 1);
meta.logger.debug("Screenshot transformation done", {
screenshots: response.screenshots,
- screenshot: response.screenshot
+ screenshot: response.screenshot,
});
}
if (!response.url) {
meta.logger.warn("Fire-engine did not return the response's URL", {
response,
- sourceURL: meta.url
+ sourceURL: meta.url,
});
}
@@ -197,15 +197,15 @@ export async function scrapeURLWithFireEngineChromeCDP(
? {
actions: {
screenshots: response.screenshots ?? [],
- scrapes: response.actionContent ?? []
- }
+ scrapes: response.actionContent ?? [],
+ },
}
- : {})
+ : {}),
};
}
export async function scrapeURLWithFireEnginePlaywright(
- meta: Meta
+ meta: Meta,
): Promise {
const request: FireEngineScrapeRequestCommon &
FireEngineScrapeRequestPlaywright = {
@@ -220,31 +220,31 @@ export async function scrapeURLWithFireEnginePlaywright(
wait: meta.options.waitFor,
geolocation: meta.options.geolocation,
- timeout: meta.options.timeout === undefined ? 300000 : undefined // TODO: better timeout logic
+ timeout: meta.options.timeout === undefined ? 300000 : undefined, // TODO: better timeout logic
};
let response = await performFireEngineScrape(
meta.logger.child({
method: "scrapeURLWithFireEngineChromeCDP/callFireEngine",
- request
+ request,
}),
request,
meta.options.timeout !== undefined
? defaultTimeout + meta.options.waitFor
- : Infinity // TODO: better timeout handling
+ : Infinity, // TODO: better timeout handling
);
specialtyScrapeCheck(
meta.logger.child({
- method: "scrapeURLWithFireEnginePlaywright/specialtyScrapeCheck"
+ method: "scrapeURLWithFireEnginePlaywright/specialtyScrapeCheck",
}),
- response.responseHeaders
+ response.responseHeaders,
);
if (!response.url) {
meta.logger.warn("Fire-engine did not return the response's URL", {
response,
- sourceURL: meta.url
+ sourceURL: meta.url,
});
}
@@ -257,14 +257,14 @@ export async function scrapeURLWithFireEnginePlaywright(
...(response.screenshots !== undefined && response.screenshots.length > 0
? {
- screenshot: response.screenshots[0]
+ screenshot: response.screenshots[0],
}
- : {})
+ : {}),
};
}
export async function scrapeURLWithFireEngineTLSClient(
- meta: Meta
+ meta: Meta,
): Promise {
const request: FireEngineScrapeRequestCommon &
FireEngineScrapeRequestTLSClient = {
@@ -279,29 +279,29 @@ export async function scrapeURLWithFireEngineTLSClient(
geolocation: meta.options.geolocation,
disableJsDom: meta.internalOptions.v0DisableJsDom,
- timeout: meta.options.timeout === undefined ? 300000 : undefined // TODO: better timeout logic
+ timeout: meta.options.timeout === undefined ? 300000 : undefined, // TODO: better timeout logic
};
let response = await performFireEngineScrape(
meta.logger.child({
method: "scrapeURLWithFireEngineChromeCDP/callFireEngine",
- request
+ request,
}),
request,
- meta.options.timeout !== undefined ? defaultTimeout : Infinity // TODO: better timeout handling
+ meta.options.timeout !== undefined ? defaultTimeout : Infinity, // TODO: better timeout handling
);
specialtyScrapeCheck(
meta.logger.child({
- method: "scrapeURLWithFireEngineTLSClient/specialtyScrapeCheck"
+ method: "scrapeURLWithFireEngineTLSClient/specialtyScrapeCheck",
}),
- response.responseHeaders
+ response.responseHeaders,
);
if (!response.url) {
meta.logger.warn("Fire-engine did not return the response's URL", {
response,
- sourceURL: meta.url
+ sourceURL: meta.url,
});
}
@@ -310,6 +310,6 @@ export async function scrapeURLWithFireEngineTLSClient(
html: response.content,
error: response.pageError,
- statusCode: response.pageStatusCode
+ statusCode: response.pageStatusCode,
};
}
diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts
index ffca4b41..de6ac3f4 100644
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts
@@ -58,17 +58,17 @@ export type FireEngineScrapeRequestTLSClient = {
const schema = z.object({
jobId: z.string(),
- processing: z.boolean()
+ processing: z.boolean(),
});
export async function fireEngineScrape<
Engine extends
| FireEngineScrapeRequestChromeCDP
| FireEngineScrapeRequestPlaywright
- | FireEngineScrapeRequestTLSClient
+ | FireEngineScrapeRequestTLSClient,
>(
logger: Logger,
- request: FireEngineScrapeRequestCommon & Engine
+ request: FireEngineScrapeRequestCommon & Engine,
): Promise> {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
@@ -78,8 +78,8 @@ export async function fireEngineScrape<
{
name: "fire-engine: Scrape",
attributes: {
- url: request.url
- }
+ url: request.url,
+ },
},
async (span) => {
return await robustFetch({
@@ -89,16 +89,16 @@ export async function fireEngineScrape<
...(Sentry.isInitialized()
? {
"sentry-trace": Sentry.spanToTraceHeader(span),
- baggage: Sentry.spanToBaggageHeader(span)
+ baggage: Sentry.spanToBaggageHeader(span),
}
- : {})
+ : {}),
},
body: request,
logger: logger.child({ method: "fireEngineScrape/robustFetch" }),
schema,
- tryCount: 3
+ tryCount: 3,
});
- }
+ },
);
return scrapeRequest;
diff --git a/apps/api/src/scraper/scrapeURL/engines/index.ts b/apps/api/src/scraper/scrapeURL/engines/index.ts
index 1d9db249..01ac0be9 100644
--- a/apps/api/src/scraper/scrapeURL/engines/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/index.ts
@@ -4,7 +4,7 @@ import { scrapeDOCX } from "./docx";
import {
scrapeURLWithFireEngineChromeCDP,
scrapeURLWithFireEnginePlaywright,
- scrapeURLWithFireEngineTLSClient
+ scrapeURLWithFireEngineTLSClient,
} from "./fire-engine";
import { scrapePDF } from "./pdf";
import { scrapeURLWithScrapingBee } from "./scrapingbee";
@@ -43,7 +43,7 @@ export const engines: Engine[] = [
? [
"fire-engine;chrome-cdp" as const,
"fire-engine;playwright" as const,
- "fire-engine;tlsclient" as const
+ "fire-engine;tlsclient" as const,
]
: []),
...(useScrapingBee
@@ -52,7 +52,7 @@ export const engines: Engine[] = [
...(usePlaywright ? ["playwright" as const] : []),
"fetch",
"pdf",
- "docx"
+ "docx",
];
export const featureFlags = [
@@ -66,7 +66,7 @@ export const featureFlags = [
"location",
"mobile",
"skipTlsVerification",
- "useFastMode"
+ "useFastMode",
] as const;
export type FeatureFlag = (typeof featureFlags)[number];
@@ -86,7 +86,7 @@ export const featureFlagOptions: {
useFastMode: { priority: 90 },
location: { priority: 10 },
mobile: { priority: 10 },
- skipTlsVerification: { priority: 10 }
+ skipTlsVerification: { priority: 10 },
} as const;
export type EngineScrapeResult = {
@@ -116,7 +116,7 @@ const engineHandlers: {
playwright: scrapeURLWithPlaywright,
fetch: scrapeURLWithFetch,
pdf: scrapePDF,
- docx: scrapeDOCX
+ docx: scrapeDOCX,
};
export const engineOptions: {
@@ -141,9 +141,9 @@ export const engineOptions: {
location: false,
mobile: false,
skipTlsVerification: false,
- useFastMode: false
+ useFastMode: false,
},
- quality: 1000 // cache should always be tried first
+ quality: 1000, // cache should always be tried first
},
"fire-engine;chrome-cdp": {
features: {
@@ -157,9 +157,9 @@ export const engineOptions: {
location: true,
mobile: true,
skipTlsVerification: true,
- useFastMode: false
+ useFastMode: false,
},
- quality: 50
+ quality: 50,
},
"fire-engine;playwright": {
features: {
@@ -173,9 +173,9 @@ export const engineOptions: {
location: false,
mobile: false,
skipTlsVerification: false,
- useFastMode: false
+ useFastMode: false,
},
- quality: 40
+ quality: 40,
},
scrapingbee: {
features: {
@@ -189,9 +189,9 @@ export const engineOptions: {
location: false,
mobile: false,
skipTlsVerification: false,
- useFastMode: false
+ useFastMode: false,
},
- quality: 30
+ quality: 30,
},
scrapingbeeLoad: {
features: {
@@ -205,9 +205,9 @@ export const engineOptions: {
location: false,
mobile: false,
skipTlsVerification: false,
- useFastMode: false
+ useFastMode: false,
},
- quality: 29
+ quality: 29,
},
playwright: {
features: {
@@ -221,9 +221,9 @@ export const engineOptions: {
location: false,
mobile: false,
skipTlsVerification: false,
- useFastMode: false
+ useFastMode: false,
},
- quality: 20
+ quality: 20,
},
"fire-engine;tlsclient": {
features: {
@@ -237,9 +237,9 @@ export const engineOptions: {
location: true,
mobile: false,
skipTlsVerification: false,
- useFastMode: true
+ useFastMode: true,
},
- quality: 10
+ quality: 10,
},
fetch: {
features: {
@@ -253,9 +253,9 @@ export const engineOptions: {
location: false,
mobile: false,
skipTlsVerification: false,
- useFastMode: true
+ useFastMode: true,
},
- quality: 5
+ quality: 5,
},
pdf: {
features: {
@@ -269,9 +269,9 @@ export const engineOptions: {
location: false,
mobile: false,
skipTlsVerification: false,
- useFastMode: true
+ useFastMode: true,
},
- quality: -10
+ quality: -10,
},
docx: {
features: {
@@ -285,10 +285,10 @@ export const engineOptions: {
location: false,
mobile: false,
skipTlsVerification: false,
- useFastMode: true
+ useFastMode: true,
},
- quality: -10
- }
+ quality: -10,
+ },
};
export function buildFallbackList(meta: Meta): {
@@ -297,7 +297,7 @@ export function buildFallbackList(meta: Meta): {
}[] {
const prioritySum = [...meta.featureFlags].reduce(
(a, x) => a + featureFlagOptions[x].priority,
- 0
+ 0,
);
const priorityThreshold = Math.floor(prioritySum / 2);
let selectedEngines: {
@@ -315,13 +315,13 @@ export function buildFallbackList(meta: Meta): {
const supportedFlags = new Set([
...Object.entries(engineOptions[engine].features)
.filter(
- ([k, v]) => meta.featureFlags.has(k as FeatureFlag) && v === true
+ ([k, v]) => meta.featureFlags.has(k as FeatureFlag) && v === true,
)
- .map(([k, _]) => k)
+ .map(([k, _]) => k),
]);
const supportScore = [...supportedFlags].reduce(
(a, x) => a + featureFlagOptions[x].priority,
- 0
+ 0,
);
const unsupportedFeatures = new Set([...meta.featureFlags]);
@@ -338,7 +338,7 @@ export function buildFallbackList(meta: Meta): {
prioritySum,
priorityThreshold,
featureFlags: [...meta.featureFlags],
- unsupportedFeatures
+ unsupportedFeatures,
});
} else {
meta.logger.debug(
@@ -348,22 +348,22 @@ export function buildFallbackList(meta: Meta): {
prioritySum,
priorityThreshold,
featureFlags: [...meta.featureFlags],
- unsupportedFeatures
- }
+ unsupportedFeatures,
+ },
);
}
}
if (selectedEngines.some((x) => engineOptions[x.engine].quality > 0)) {
selectedEngines = selectedEngines.filter(
- (x) => engineOptions[x.engine].quality > 0
+ (x) => engineOptions[x.engine].quality > 0,
);
}
selectedEngines.sort(
(a, b) =>
b.supportScore - a.supportScore ||
- engineOptions[b.engine].quality - engineOptions[a.engine].quality
+ engineOptions[b.engine].quality - engineOptions[a.engine].quality,
);
return selectedEngines;
@@ -371,16 +371,16 @@ export function buildFallbackList(meta: Meta): {
export async function scrapeURLWithEngine(
meta: Meta,
- engine: Engine
+ engine: Engine,
): Promise {
const fn = engineHandlers[engine];
const logger = meta.logger.child({
method: fn.name ?? "scrapeURLWithEngine",
- engine
+ engine,
});
const _meta = {
...meta,
- logger
+ logger,
};
return await fn(_meta);
diff --git a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
index 62313a71..341a4f1a 100644
--- a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
@@ -14,10 +14,10 @@ type PDFProcessorResult = { html: string; markdown?: string };
async function scrapePDFWithLlamaParse(
meta: Meta,
- tempFilePath: string
+ tempFilePath: string,
): Promise {
meta.logger.debug("Processing PDF document with LlamaIndex", {
- tempFilePath
+ tempFilePath,
});
const uploadForm = new FormData();
@@ -28,7 +28,7 @@ async function scrapePDFWithLlamaParse(
name: tempFilePath,
stream() {
return createReadStream(
- tempFilePath
+ tempFilePath,
) as unknown as ReadableStream;
},
arrayBuffer() {
@@ -41,22 +41,22 @@ async function scrapePDFWithLlamaParse(
slice(start, end, contentType) {
throw Error("Unimplemented in mock Blob: slice");
},
- type: "application/pdf"
+ type: "application/pdf",
} as Blob);
const upload = await robustFetch({
url: "https://api.cloud.llamaindex.ai/api/parsing/upload",
method: "POST",
headers: {
- Authorization: `Bearer ${process.env.LLAMAPARSE_API_KEY}`
+ Authorization: `Bearer ${process.env.LLAMAPARSE_API_KEY}`,
},
body: uploadForm,
logger: meta.logger.child({
- method: "scrapePDFWithLlamaParse/upload/robustFetch"
+ method: "scrapePDFWithLlamaParse/upload/robustFetch",
}),
schema: z.object({
- id: z.string()
- })
+ id: z.string(),
+ }),
});
const jobId = upload.id;
@@ -70,18 +70,18 @@ async function scrapePDFWithLlamaParse(
url: `https://api.cloud.llamaindex.ai/api/parsing/job/${jobId}/result/markdown`,
method: "GET",
headers: {
- Authorization: `Bearer ${process.env.LLAMAPARSE_API_KEY}`
+ Authorization: `Bearer ${process.env.LLAMAPARSE_API_KEY}`,
},
logger: meta.logger.child({
- method: "scrapePDFWithLlamaParse/result/robustFetch"
+ method: "scrapePDFWithLlamaParse/result/robustFetch",
}),
schema: z.object({
- markdown: z.string()
- })
+ markdown: z.string(),
+ }),
});
return {
markdown: result.markdown,
- html: await marked.parse(result.markdown, { async: true })
+ html: await marked.parse(result.markdown, { async: true }),
};
} catch (e) {
if (e instanceof Error && e.message === "Request sent failure status") {
@@ -93,7 +93,7 @@ async function scrapePDFWithLlamaParse(
throw new RemoveFeatureError(["pdf"]);
} else {
throw new Error("LlamaParse threw an error", {
- cause: e.cause
+ cause: e.cause,
});
}
} else {
@@ -109,7 +109,7 @@ async function scrapePDFWithLlamaParse(
async function scrapePDFWithParsePDF(
meta: Meta,
- tempFilePath: string
+ tempFilePath: string,
): Promise {
meta.logger.debug("Processing PDF document with parse-pdf", { tempFilePath });
@@ -118,7 +118,7 @@ async function scrapePDFWithParsePDF(
return {
markdown: escaped,
- html: escaped
+ html: escaped,
};
}
@@ -131,7 +131,7 @@ export async function scrapePDF(meta: Meta): Promise {
statusCode: file.response.status,
html: content,
- markdown: content
+ markdown: content,
};
}
@@ -144,22 +144,22 @@ export async function scrapePDF(meta: Meta): Promise {
{
...meta,
logger: meta.logger.child({
- method: "scrapePDF/scrapePDFWithLlamaParse"
- })
+ method: "scrapePDF/scrapePDFWithLlamaParse",
+ }),
},
- tempFilePath
+ tempFilePath,
);
} catch (error) {
if (error instanceof Error && error.message === "LlamaParse timed out") {
meta.logger.warn("LlamaParse timed out -- falling back to parse-pdf", {
- error
+ error,
});
} else if (error instanceof RemoveFeatureError) {
throw error;
} else {
meta.logger.warn(
"LlamaParse failed to parse PDF -- falling back to parse-pdf",
- { error }
+ { error },
);
Sentry.captureException(error);
}
@@ -170,9 +170,11 @@ export async function scrapePDF(meta: Meta): Promise {
result = await scrapePDFWithParsePDF(
{
...meta,
- logger: meta.logger.child({ method: "scrapePDF/scrapePDFWithParsePDF" })
+ logger: meta.logger.child({
+ method: "scrapePDF/scrapePDFWithParsePDF",
+ }),
},
- tempFilePath
+ tempFilePath,
);
}
@@ -183,6 +185,6 @@ export async function scrapePDF(meta: Meta): Promise {
statusCode: response.status,
html: result.html,
- markdown: result.markdown
+ markdown: result.markdown,
};
}
diff --git a/apps/api/src/scraper/scrapeURL/engines/playwright/index.ts b/apps/api/src/scraper/scrapeURL/engines/playwright/index.ts
index a8c16045..c92b1d90 100644
--- a/apps/api/src/scraper/scrapeURL/engines/playwright/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/playwright/index.ts
@@ -5,7 +5,7 @@ import { TimeoutError } from "../../error";
import { robustFetch } from "../../lib/fetch";
export async function scrapeURLWithPlaywright(
- meta: Meta
+ meta: Meta,
): Promise {
const timeout = 20000 + meta.options.waitFor;
@@ -13,35 +13,35 @@ export async function scrapeURLWithPlaywright(
await robustFetch({
url: process.env.PLAYWRIGHT_MICROSERVICE_URL!,
headers: {
- "Content-Type": "application/json"
+ "Content-Type": "application/json",
},
body: {
url: meta.url,
wait_after_load: meta.options.waitFor,
timeout,
- headers: meta.options.headers
+ headers: meta.options.headers,
},
method: "POST",
logger: meta.logger.child("scrapeURLWithPlaywright/robustFetch"),
schema: z.object({
content: z.string(),
pageStatusCode: z.number(),
- pageError: z.string().optional()
- })
+ pageError: z.string().optional(),
+ }),
}),
(async () => {
await new Promise((resolve) => setTimeout(() => resolve(null), 20000));
throw new TimeoutError(
"Playwright was unable to scrape the page before timing out",
- { cause: { timeout } }
+ { cause: { timeout } },
);
- })()
+ })(),
]);
return {
url: meta.url, // TODO: impove redirect following
html: response.content,
statusCode: response.pageStatusCode,
- error: response.pageError
+ error: response.pageError,
};
}
diff --git a/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts b/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts
index 8388016a..50ac502b 100644
--- a/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts
@@ -8,7 +8,7 @@ import { EngineError } from "../../error";
const client = new ScrapingBeeClient(process.env.SCRAPING_BEE_API_KEY!);
export function scrapeURLWithScrapingBee(
- wait_browser: "domcontentloaded" | "networkidle2"
+ wait_browser: "domcontentloaded" | "networkidle2",
): (meta: Meta) => Promise {
return async (meta: Meta): Promise => {
let response: AxiosResponse;
@@ -23,12 +23,12 @@ export function scrapeURLWithScrapingBee(
json_response: true,
screenshot: meta.options.formats.includes("screenshot"),
screenshot_full_page: meta.options.formats.includes(
- "screenshot@fullPage"
- )
+ "screenshot@fullPage",
+ ),
},
headers: {
- "ScrapingService-Request": "TRUE" // this is sent to the page, not to ScrapingBee - mogery
- }
+ "ScrapingService-Request": "TRUE", // this is sent to the page, not to ScrapingBee - mogery
+ },
});
} catch (error) {
if (error instanceof AxiosError && error.response !== undefined) {
@@ -51,25 +51,25 @@ export function scrapeURLWithScrapingBee(
if (body.errors || body.body?.error || isHiddenEngineError) {
meta.logger.error("ScrapingBee threw an error", {
- body: body.body?.error ?? body.errors ?? body.body ?? body
+ body: body.body?.error ?? body.errors ?? body.body ?? body,
});
throw new EngineError("Engine error #34", {
- cause: { body, statusCode: response.status }
+ cause: { body, statusCode: response.status },
});
}
if (typeof body.body !== "string") {
meta.logger.error("ScrapingBee: Body is not string??", { body });
throw new EngineError("Engine error #35", {
- cause: { body, statusCode: response.status }
+ cause: { body, statusCode: response.status },
});
}
specialtyScrapeCheck(
meta.logger.child({
- method: "scrapeURLWithScrapingBee/specialtyScrapeCheck"
+ method: "scrapeURLWithScrapingBee/specialtyScrapeCheck",
}),
- body.headers
+ body.headers,
);
return {
@@ -80,9 +80,9 @@ export function scrapeURLWithScrapingBee(
statusCode: response.status,
...(body.screenshot
? {
- screenshot: `data:image/png;base64,${body.screenshot}`
+ screenshot: `data:image/png;base64,${body.screenshot}`,
}
- : {})
+ : {}),
};
};
}
diff --git a/apps/api/src/scraper/scrapeURL/engines/utils/downloadFile.ts b/apps/api/src/scraper/scrapeURL/engines/utils/downloadFile.ts
index 84a52425..e2e3ee6f 100644
--- a/apps/api/src/scraper/scrapeURL/engines/utils/downloadFile.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/utils/downloadFile.ts
@@ -13,13 +13,13 @@ export async function fetchFileToBuffer(url: string): Promise<{
const response = await fetch(url); // TODO: maybe we could use tlsclient for this? for proxying
return {
response,
- buffer: Buffer.from(await response.arrayBuffer())
+ buffer: Buffer.from(await response.arrayBuffer()),
};
}
export async function downloadFile(
id: string,
- url: string
+ url: string,
): Promise<{
response: undici.Response;
tempFilePath: string;
@@ -32,9 +32,9 @@ export async function downloadFile(
const response = await undici.fetch(url, {
dispatcher: new undici.Agent({
connect: {
- rejectUnauthorized: false
- }
- })
+ rejectUnauthorized: false,
+ },
+ }),
});
// This should never happen in the current state of JS (2024), but let's check anyways.
@@ -47,13 +47,13 @@ export async function downloadFile(
tempFileWrite.on("finish", () => resolve(null));
tempFileWrite.on("error", (error) => {
reject(
- new EngineError("Failed to write to temp file", { cause: { error } })
+ new EngineError("Failed to write to temp file", { cause: { error } }),
);
});
});
return {
response,
- tempFilePath
+ tempFilePath,
};
}
diff --git a/apps/api/src/scraper/scrapeURL/engines/utils/specialtyHandler.ts b/apps/api/src/scraper/scrapeURL/engines/utils/specialtyHandler.ts
index 4f497e52..352f6a7e 100644
--- a/apps/api/src/scraper/scrapeURL/engines/utils/specialtyHandler.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/utils/specialtyHandler.ts
@@ -3,15 +3,15 @@ import { AddFeatureError } from "../../error";
export function specialtyScrapeCheck(
logger: Logger,
- headers: Record | undefined
+ headers: Record | undefined,
) {
const contentType = (Object.entries(headers ?? {}).find(
- (x) => x[0].toLowerCase() === "content-type"
+ (x) => x[0].toLowerCase() === "content-type",
) ?? [])[1];
if (contentType === undefined) {
logger.warn("Failed to check contentType -- was not present in headers", {
- headers
+ headers,
});
} else if (
contentType === "application/pdf" ||
@@ -23,7 +23,7 @@ export function specialtyScrapeCheck(
contentType ===
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
contentType.startsWith(
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document;"
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document;",
)
) {
// .docx
diff --git a/apps/api/src/scraper/scrapeURL/error.ts b/apps/api/src/scraper/scrapeURL/error.ts
index c6eb45e3..ec044745 100644
--- a/apps/api/src/scraper/scrapeURL/error.ts
+++ b/apps/api/src/scraper/scrapeURL/error.ts
@@ -19,7 +19,7 @@ export class NoEnginesLeftError extends Error {
constructor(fallbackList: Engine[], results: EngineResultsTracker) {
super(
- "All scraping engines failed! -- Double check the URL to make sure it's not broken. If the issue persists, contact us at help@firecrawl.com."
+ "All scraping engines failed! -- Double check the URL to make sure it's not broken. If the issue persists, contact us at help@firecrawl.com.",
);
this.fallbackList = fallbackList;
this.results = results;
@@ -40,7 +40,8 @@ export class RemoveFeatureError extends Error {
constructor(featureFlags: FeatureFlag[]) {
super(
- "Incorrect feature flags have been discovered: " + featureFlags.join(", ")
+ "Incorrect feature flags have been discovered: " +
+ featureFlags.join(", "),
);
this.featureFlags = featureFlags;
}
@@ -50,7 +51,7 @@ export class SiteError extends Error {
public code: string;
constructor(code: string) {
super(
- "Specified URL is failing to load in the browser. Error code: " + code
+ "Specified URL is failing to load in the browser. Error code: " + code,
);
this.code = code;
}
diff --git a/apps/api/src/scraper/scrapeURL/index.ts b/apps/api/src/scraper/scrapeURL/index.ts
index 0a0b6c92..a3eb6f1e 100644
--- a/apps/api/src/scraper/scrapeURL/index.ts
+++ b/apps/api/src/scraper/scrapeURL/index.ts
@@ -8,7 +8,7 @@ import {
Engine,
EngineScrapeResult,
FeatureFlag,
- scrapeURLWithEngine
+ scrapeURLWithEngine,
} from "./engines";
import { parseMarkdown } from "../../lib/html-to-markdown";
import {
@@ -17,7 +17,7 @@ import {
NoEnginesLeftError,
RemoveFeatureError,
SiteError,
- TimeoutError
+ TimeoutError,
} from "./error";
import { executeTransformers } from "./transformers";
import { LLMRefusalError } from "./transformers/llmExtract";
@@ -50,7 +50,7 @@ export type Meta = {
function buildFeatureFlags(
url: string,
options: ScrapeOptions,
- internalOptions: InternalOptions
+ internalOptions: InternalOptions,
): Set {
const flags: Set = new Set();
@@ -112,7 +112,7 @@ function buildMetaObject(
id: string,
url: string,
options: ScrapeOptions,
- internalOptions: InternalOptions
+ internalOptions: InternalOptions,
): Meta {
const specParams =
urlSpecificParams[new URL(url).hostname.replace(/^www\./, "")];
@@ -120,14 +120,14 @@ function buildMetaObject(
options = Object.assign(options, specParams.scrapeOptions);
internalOptions = Object.assign(
internalOptions,
- specParams.internalOptions
+ specParams.internalOptions,
);
}
const _logger = logger.child({
module: "ScrapeURL",
scrapeId: id,
- scrapeURL: url
+ scrapeURL: url,
});
const logs: any[] = [];
@@ -138,7 +138,7 @@ function buildMetaObject(
internalOptions,
logger: _logger,
logs,
- featureFlags: buildFeatureFlags(url, options, internalOptions)
+ featureFlags: buildFeatureFlags(url, options, internalOptions),
};
}
@@ -229,7 +229,7 @@ async function scrapeURLLoop(meta: Meta): Promise {
factors: { isLongEnough, isGoodStatusCode, hasNoPageError },
unsupportedFeatures,
startedAt,
- finishedAt: Date.now()
+ finishedAt: Date.now(),
};
// NOTE: TODO: what to do when status code is bad is tough...
@@ -237,35 +237,35 @@ async function scrapeURLLoop(meta: Meta): Promise {
// should we just use all the fallbacks and pick the one with the longest text? - mogery
if (isLongEnough || !isGoodStatusCode) {
meta.logger.info("Scrape via " + engine + " deemed successful.", {
- factors: { isLongEnough, isGoodStatusCode, hasNoPageError }
+ factors: { isLongEnough, isGoodStatusCode, hasNoPageError },
});
result = {
engine,
unsupportedFeatures,
- result: engineResult as EngineScrapeResult & { markdown: string }
+ result: engineResult as EngineScrapeResult & { markdown: string },
};
break;
}
} catch (error) {
if (error instanceof EngineError) {
meta.logger.info("Engine " + engine + " could not scrape the page.", {
- error
+ error,
});
results[engine] = {
state: "error",
error: safeguardCircularError(error),
unexpected: false,
startedAt,
- finishedAt: Date.now()
+ finishedAt: Date.now(),
};
} else if (error instanceof TimeoutError) {
meta.logger.info("Engine " + engine + " timed out while scraping.", {
- error
+ error,
});
results[engine] = {
state: "timeout",
startedAt,
- finishedAt: Date.now()
+ finishedAt: Date.now(),
};
} else if (
error instanceof AddFeatureError ||
@@ -278,7 +278,7 @@ async function scrapeURLLoop(meta: Meta): Promise {
error: safeguardCircularError(error),
unexpected: true,
startedAt,
- finishedAt: Date.now()
+ finishedAt: Date.now(),
};
error.results = results;
meta.logger.warn("LLM refusal encountered", { error });
@@ -289,14 +289,14 @@ async function scrapeURLLoop(meta: Meta): Promise {
Sentry.captureException(error);
meta.logger.info(
"An unexpected error happened while scraping with " + engine + ".",
- { error }
+ { error },
);
results[engine] = {
state: "error",
error: safeguardCircularError(error),
unexpected: true,
startedAt,
- finishedAt: Date.now()
+ finishedAt: Date.now(),
};
}
}
@@ -305,7 +305,7 @@ async function scrapeURLLoop(meta: Meta): Promise {
if (result === null) {
throw new NoEnginesLeftError(
fallbackList.map((x) => x.engine),
- results
+ results,
);
}
@@ -318,15 +318,15 @@ async function scrapeURLLoop(meta: Meta): Promise {
sourceURL: meta.url,
url: result.result.url,
statusCode: result.result.statusCode,
- error: result.result.error
- }
+ error: result.result.error,
+ },
};
if (result.unsupportedFeatures.size > 0) {
const warning = `The engine used does not support the following features: ${[...result.unsupportedFeatures].join(", ")} -- your scrape may be partial.`;
meta.logger.warn(warning, {
engine: result.engine,
- unsupportedFeatures: result.unsupportedFeatures
+ unsupportedFeatures: result.unsupportedFeatures,
});
document.warning =
document.warning !== undefined
@@ -340,7 +340,7 @@ async function scrapeURLLoop(meta: Meta): Promise {
success: true,
document,
logs: meta.logs,
- engines: results
+ engines: results,
};
}
@@ -348,7 +348,7 @@ export async function scrapeURL(
id: string,
url: string,
options: ScrapeOptions,
- internalOptions: InternalOptions = {}
+ internalOptions: InternalOptions = {},
): Promise {
const meta = buildMetaObject(id, url, options, internalOptions);
try {
@@ -363,10 +363,10 @@ export async function scrapeURL(
meta.logger.debug(
"More feature flags requested by scraper: adding " +
error.featureFlags.join(", "),
- { error, existingFlags: meta.featureFlags }
+ { error, existingFlags: meta.featureFlags },
);
meta.featureFlags = new Set(
- [...meta.featureFlags].concat(error.featureFlags)
+ [...meta.featureFlags].concat(error.featureFlags),
);
} else if (
error instanceof RemoveFeatureError &&
@@ -375,12 +375,12 @@ export async function scrapeURL(
meta.logger.debug(
"Incorrect feature flags reported by scraper: removing " +
error.featureFlags.join(","),
- { error, existingFlags: meta.featureFlags }
+ { error, existingFlags: meta.featureFlags },
);
meta.featureFlags = new Set(
[...meta.featureFlags].filter(
- (x) => !error.featureFlags.includes(x)
- )
+ (x) => !error.featureFlags.includes(x),
+ ),
);
} else {
throw error;
@@ -415,7 +415,7 @@ export async function scrapeURL(
success: false,
error,
logs: meta.logs,
- engines: results
+ engines: results,
};
}
}
diff --git a/apps/api/src/scraper/scrapeURL/lib/extractLinks.ts b/apps/api/src/scraper/scrapeURL/lib/extractLinks.ts
index 6d71c036..7d612875 100644
--- a/apps/api/src/scraper/scrapeURL/lib/extractLinks.ts
+++ b/apps/api/src/scraper/scrapeURL/lib/extractLinks.ts
@@ -27,7 +27,7 @@ export function extractLinks(html: string, baseUrl: string): string[] {
} catch (error) {
logger.error(
`Failed to construct URL for href: ${href} with base: ${baseUrl}`,
- { error }
+ { error },
);
}
}
diff --git a/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts b/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts
index 0f581373..040bf0ee 100644
--- a/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts
+++ b/apps/api/src/scraper/scrapeURL/lib/extractMetadata.ts
@@ -4,7 +4,7 @@ import { Meta } from "..";
export function extractMetadata(
meta: Meta,
- html: string
+ html: string,
): Document["metadata"] {
let title: string | undefined = undefined;
let description: string | undefined = undefined;
@@ -148,6 +148,6 @@ export function extractMetadata(
publishedTime,
articleTag,
articleSection,
- ...customMetadata
+ ...customMetadata,
};
}
diff --git a/apps/api/src/scraper/scrapeURL/lib/fetch.ts b/apps/api/src/scraper/scrapeURL/lib/fetch.ts
index 400c23a7..897587a9 100644
--- a/apps/api/src/scraper/scrapeURL/lib/fetch.ts
+++ b/apps/api/src/scraper/scrapeURL/lib/fetch.ts
@@ -20,7 +20,7 @@ export type RobustFetchParams> = {
export async function robustFetch<
Schema extends z.Schema,
- Output = z.infer
+ Output = z.infer,
>({
url,
logger,
@@ -32,7 +32,7 @@ export async function robustFetch<
ignoreFailure = false,
requestId = uuid(),
tryCount = 1,
- tryCooldown
+ tryCooldown,
}: RobustFetchParams): Promise