mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-05 15:20:53 +08:00
Merge pull request #478 from mendableai/nsc/cacheable-lookup
DNS Cacheable Lookup to avoid DNS blocking ops
This commit is contained in:
commit
8660df0eb0
@ -24,8 +24,8 @@ kill_timeout = '30s'
|
|||||||
|
|
||||||
[http_service.concurrency]
|
[http_service.concurrency]
|
||||||
type = "requests"
|
type = "requests"
|
||||||
hard_limit = 100
|
hard_limit = 200
|
||||||
soft_limit = 50
|
soft_limit = 75
|
||||||
|
|
||||||
[[http_service.checks]]
|
[[http_service.checks]]
|
||||||
grace_period = "20s"
|
grace_period = "20s"
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@flydotio/dockerfile": "^0.4.10",
|
"@flydotio/dockerfile": "^0.4.10",
|
||||||
|
"@jest/globals": "^29.7.0",
|
||||||
"@tsconfig/recommended": "^1.0.3",
|
"@tsconfig/recommended": "^1.0.3",
|
||||||
"@types/body-parser": "^1.19.2",
|
"@types/body-parser": "^1.19.2",
|
||||||
"@types/bull": "^4.10.0",
|
"@types/bull": "^4.10.0",
|
||||||
@ -63,6 +64,7 @@
|
|||||||
"axios": "^1.3.4",
|
"axios": "^1.3.4",
|
||||||
"bottleneck": "^2.19.5",
|
"bottleneck": "^2.19.5",
|
||||||
"bull": "^4.15.0",
|
"bull": "^4.15.0",
|
||||||
|
"cacheable-lookup": "^6.1.0",
|
||||||
"cheerio": "^1.0.0-rc.12",
|
"cheerio": "^1.0.0-rc.12",
|
||||||
"cohere": "^1.1.1",
|
"cohere": "^1.1.1",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
|
20
apps/api/pnpm-lock.yaml
generated
20
apps/api/pnpm-lock.yaml
generated
@ -59,6 +59,9 @@ importers:
|
|||||||
bull:
|
bull:
|
||||||
specifier: ^4.15.0
|
specifier: ^4.15.0
|
||||||
version: 4.15.0
|
version: 4.15.0
|
||||||
|
cacheable-lookup:
|
||||||
|
specifier: ^6.1.0
|
||||||
|
version: 6.1.0
|
||||||
cheerio:
|
cheerio:
|
||||||
specifier: ^1.0.0-rc.12
|
specifier: ^1.0.0-rc.12
|
||||||
version: 1.0.0-rc.12
|
version: 1.0.0-rc.12
|
||||||
@ -189,6 +192,9 @@ importers:
|
|||||||
'@flydotio/dockerfile':
|
'@flydotio/dockerfile':
|
||||||
specifier: ^0.4.10
|
specifier: ^0.4.10
|
||||||
version: 0.4.11
|
version: 0.4.11
|
||||||
|
'@jest/globals':
|
||||||
|
specifier: ^29.7.0
|
||||||
|
version: 29.7.0
|
||||||
'@tsconfig/recommended':
|
'@tsconfig/recommended':
|
||||||
specifier: ^1.0.3
|
specifier: ^1.0.3
|
||||||
version: 1.0.6
|
version: 1.0.6
|
||||||
@ -1937,6 +1943,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==}
|
resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==}
|
||||||
engines: {node: '>= 0.8'}
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
cacheable-lookup@6.1.0:
|
||||||
|
resolution: {integrity: sha512-KJ/Dmo1lDDhmW2XDPMo+9oiy/CeqosPguPCrgcVzKyZrL6pM1gU2GmPY/xo6OQPTUaA/c0kwHuywB4E6nmT9ww==}
|
||||||
|
engines: {node: '>=10.6.0'}
|
||||||
|
|
||||||
call-bind@1.0.7:
|
call-bind@1.0.7:
|
||||||
resolution: {integrity: sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==}
|
resolution: {integrity: sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==}
|
||||||
engines: {node: '>= 0.4'}
|
engines: {node: '>= 0.4'}
|
||||||
@ -4369,8 +4379,8 @@ packages:
|
|||||||
engines: {node: '>=14.17'}
|
engines: {node: '>=14.17'}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
typescript@5.5.3:
|
typescript@5.5.4:
|
||||||
resolution: {integrity: sha512-/hreyEujaB0w76zKo6717l3L0o/qEUtRgdvUBvlkhoWeOVMjMuHNHk0BRBzikzuGDqNmPQbg5ifMEqsHLiIUcQ==}
|
resolution: {integrity: sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==}
|
||||||
engines: {node: '>=14.17'}
|
engines: {node: '>=14.17'}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
@ -6917,6 +6927,8 @@ snapshots:
|
|||||||
|
|
||||||
bytes@3.1.2: {}
|
bytes@3.1.2: {}
|
||||||
|
|
||||||
|
cacheable-lookup@6.1.0: {}
|
||||||
|
|
||||||
call-bind@1.0.7:
|
call-bind@1.0.7:
|
||||||
dependencies:
|
dependencies:
|
||||||
es-define-property: 1.0.0
|
es-define-property: 1.0.0
|
||||||
@ -8927,7 +8939,7 @@ snapshots:
|
|||||||
csv-parse: 5.5.6
|
csv-parse: 5.5.6
|
||||||
gpt3-tokenizer: 1.1.5
|
gpt3-tokenizer: 1.1.5
|
||||||
openai: 3.3.0
|
openai: 3.3.0
|
||||||
typescript: 5.5.3
|
typescript: 5.5.4
|
||||||
uuid: 9.0.1
|
uuid: 9.0.1
|
||||||
zod: 3.23.8
|
zod: 3.23.8
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
@ -9519,7 +9531,7 @@ snapshots:
|
|||||||
|
|
||||||
typescript@5.4.5: {}
|
typescript@5.4.5: {}
|
||||||
|
|
||||||
typescript@5.5.3: {}
|
typescript@5.5.4: {}
|
||||||
|
|
||||||
typesense@1.8.2(@babel/runtime@7.24.6):
|
typesense@1.8.2(@babel/runtime@7.24.6):
|
||||||
dependencies:
|
dependencies:
|
||||||
|
@ -88,7 +88,10 @@ export async function supaAuthenticateUser(
|
|||||||
// console.log('Key and Price ID:', data);
|
// console.log('Key and Price ID:', data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (error || !data || data.length === 0) {
|
if (error || !data || data.length === 0) {
|
||||||
|
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: "Unauthorized: Invalid token",
|
error: "Unauthorized: Invalid token",
|
||||||
@ -178,7 +181,10 @@ export async function supaAuthenticateUser(
|
|||||||
.select("*")
|
.select("*")
|
||||||
.eq("key", normalizedApi);
|
.eq("key", normalizedApi);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (error || !data || data.length === 0) {
|
if (error || !data || data.length === 0) {
|
||||||
|
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: "Unauthorized: Invalid token",
|
error: "Unauthorized: Invalid token",
|
||||||
|
@ -10,6 +10,9 @@ import os from "os";
|
|||||||
import { Logger } from "./lib/logger";
|
import { Logger } from "./lib/logger";
|
||||||
import { adminRouter } from "./routes/admin";
|
import { adminRouter } from "./routes/admin";
|
||||||
import { ScrapeEvents } from "./lib/scrape-events";
|
import { ScrapeEvents } from "./lib/scrape-events";
|
||||||
|
import http from 'node:http';
|
||||||
|
import https from 'node:https';
|
||||||
|
import CacheableLookup from 'cacheable-lookup';
|
||||||
|
|
||||||
const { createBullBoard } = require("@bull-board/api");
|
const { createBullBoard } = require("@bull-board/api");
|
||||||
const { BullAdapter } = require("@bull-board/api/bullAdapter");
|
const { BullAdapter } = require("@bull-board/api/bullAdapter");
|
||||||
@ -18,6 +21,14 @@ const { ExpressAdapter } = require("@bull-board/express");
|
|||||||
const numCPUs = process.env.ENV === "local" ? 2 : os.cpus().length;
|
const numCPUs = process.env.ENV === "local" ? 2 : os.cpus().length;
|
||||||
Logger.info(`Number of CPUs: ${numCPUs} available`);
|
Logger.info(`Number of CPUs: ${numCPUs} available`);
|
||||||
|
|
||||||
|
const cacheable = new CacheableLookup({
|
||||||
|
// this is important to avoid querying local hostnames see https://github.com/szmarczak/cacheable-lookup readme
|
||||||
|
lookup:false
|
||||||
|
});
|
||||||
|
|
||||||
|
cacheable.install(http.globalAgent);
|
||||||
|
cacheable.install(https.globalAgent)
|
||||||
|
|
||||||
if (cluster.isMaster) {
|
if (cluster.isMaster) {
|
||||||
Logger.info(`Master ${process.pid} is running`);
|
Logger.info(`Master ${process.pid} is running`);
|
||||||
|
|
||||||
|
15
apps/api/src/scraper/WebScraper/__tests__/dns.test.ts
Normal file
15
apps/api/src/scraper/WebScraper/__tests__/dns.test.ts
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
import CacheableLookup from 'cacheable-lookup';
|
||||||
|
import https from 'node:https';
|
||||||
|
import axios from "axios";
|
||||||
|
|
||||||
|
describe("DNS", () => {
|
||||||
|
it("cached dns", async () => {
|
||||||
|
const cachedDns = new CacheableLookup();
|
||||||
|
cachedDns.install(https.globalAgent);
|
||||||
|
jest.spyOn(cachedDns, "lookupAsync");
|
||||||
|
|
||||||
|
const res = await axios.get("https://example.com");
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
expect(cachedDns.lookupAsync).toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
Loading…
x
Reference in New Issue
Block a user