From bd9673e104caeb6082dfccd0d69ef6cfecae0e09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Fri, 16 May 2025 15:44:52 +0200 Subject: [PATCH] Mog/cachable lookup (#1560) * feat(scrapeURL): use cacheableLookup * feat(queue-worker): add cacheablelookup * fix(cacheable-lookup): make it work with tailscale on local * add devenv * try again * allow querying all * log * fixes * asd * fix: * fix(lookup): * lookup --- .github/workflows/test-server.yml | 2 ++ apps/api/src/index.ts | 8 +++----- apps/api/src/scraper/scrapeURL/engines/utils/safeFetch.ts | 3 ++- apps/api/src/scraper/scrapeURL/lib/cacheableLookup.ts | 4 ++++ apps/api/src/scraper/scrapeURL/lib/fetch.ts | 4 ++++ apps/api/src/services/queue-worker.ts | 7 +++++++ 6 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 apps/api/src/scraper/scrapeURL/lib/cacheableLookup.ts diff --git a/.github/workflows/test-server.yml b/.github/workflows/test-server.yml index febad61a..cdde350b 100644 --- a/.github/workflows/test-server.yml +++ b/.github/workflows/test-server.yml @@ -35,6 +35,7 @@ env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }} USE_GO_MARKDOWN_PARSER: true + SENTRY_ENVIRONMENT: dev jobs: test: @@ -53,6 +54,7 @@ jobs: oauth-client-id: ${{ secrets.TS_OAUTH_CLIENT_ID }} oauth-secret: ${{ secrets.TS_OAUTH_SECRET }} tags: tag:ci + use-cache: 'true' - name: Install pnpm uses: pnpm/action-setup@v4 with: diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index efca0479..933a980f 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -18,7 +18,6 @@ import { logger } from "./lib/logger"; import { adminRouter } from "./routes/admin"; import http from "node:http"; import https from "node:https"; -import CacheableLookup from "cacheable-lookup"; import { v1Router } from "./routes/v1"; import expressWs from "express-ws"; import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types"; @@ -26,6 +25,7 @@ import { ZodError } from "zod"; import { v4 as uuidv4 } from "uuid"; import { RateLimiterMode } from "./types"; import { attachWsProxy } from "./services/agentLivecastWS"; +import { cacheableLookup } from "./scraper/scrapeURL/lib/cacheableLookup"; const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); @@ -34,11 +34,9 @@ const { ExpressAdapter } = require("@bull-board/express"); const numCPUs = process.env.ENV === "local" ? 2 : os.cpus().length; logger.info(`Number of CPUs: ${numCPUs} available`); -const cacheable = new CacheableLookup(); - // Install cacheable lookup for all other requests -cacheable.install(http.globalAgent); -cacheable.install(https.globalAgent); +cacheableLookup.install(http.globalAgent); +cacheableLookup.install(https.globalAgent); // Initialize Express with WebSocket support const expressApp = express(); diff --git a/apps/api/src/scraper/scrapeURL/engines/utils/safeFetch.ts b/apps/api/src/scraper/scrapeURL/engines/utils/safeFetch.ts index 351a7742..1a959224 100644 --- a/apps/api/src/scraper/scrapeURL/engines/utils/safeFetch.ts +++ b/apps/api/src/scraper/scrapeURL/engines/utils/safeFetch.ts @@ -2,6 +2,7 @@ import type { Socket } from "net"; import type { TLSSocket } from "tls"; import * as undici from "undici"; import { Address6 } from "ip-address"; +import { cacheableLookup } from "../../lib/cacheableLookup"; export class InsecureConnectionError extends Error { constructor() { @@ -46,7 +47,7 @@ export function makeSecureDispatcher( const agentOpts: undici.Agent.Options = { connect: { rejectUnauthorized: false, // bypass SSL failures -- this is fine - // lookup: secureLookup, + lookup: cacheableLookup.lookup, }, maxRedirections: 5000, ...options, diff --git a/apps/api/src/scraper/scrapeURL/lib/cacheableLookup.ts b/apps/api/src/scraper/scrapeURL/lib/cacheableLookup.ts new file mode 100644 index 00000000..b561967c --- /dev/null +++ b/apps/api/src/scraper/scrapeURL/lib/cacheableLookup.ts @@ -0,0 +1,4 @@ +import CacheableLookup from 'cacheable-lookup'; +import dns from 'dns'; + +export const cacheableLookup = (process.env.SENTRY_ENVIRONMENT === "dev" ? { lookup: dns.lookup, install: () => {} } : new CacheableLookup({})); diff --git a/apps/api/src/scraper/scrapeURL/lib/fetch.ts b/apps/api/src/scraper/scrapeURL/lib/fetch.ts index c64e7953..24822f41 100644 --- a/apps/api/src/scraper/scrapeURL/lib/fetch.ts +++ b/apps/api/src/scraper/scrapeURL/lib/fetch.ts @@ -5,6 +5,7 @@ import { MockState, saveMock } from "./mock"; import { TimeoutSignal } from "../../../controllers/v1/types"; import { fireEngineURL } from "../engines/fire-engine/scrape"; import { fetch, RequestInit, Response, FormData, Agent } from "undici"; +import { cacheableLookup } from "./cacheableLookup"; export type RobustFetchParams> = { url: string; @@ -82,6 +83,9 @@ export async function robustFetch< dispatcher: new Agent({ headersTimeout: 0, bodyTimeout: 0, + connect: { + lookup: cacheableLookup.lookup, + }, }), ...(body instanceof FormData ? { diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 04181c9e..553b944f 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -82,6 +82,9 @@ import { performExtraction_F0 } from "../lib/extract/fire-0/extraction-service-f import { CostTracking } from "../lib/extract/extraction-service"; import { getACUCTeam } from "../controllers/auth"; import Express from "express"; +import http from "http"; +import https from "https"; +import { cacheableLookup } from "../scraper/scrapeURL/lib/cacheableLookup"; configDotenv(); @@ -109,6 +112,10 @@ const gotJobInterval = Number(process.env.CONNECTION_MONITOR_INTERVAL) || 20; const runningJobs: Set = new Set(); +// Install cacheable lookup for all other requests +cacheableLookup.install(http.globalAgent); +cacheableLookup.install(https.globalAgent); + async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) { const logger = _logger.child({ module: "queue-worker",