mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-01 09:27:04 +08:00
Mog/cachable lookup (#1560)
* feat(scrapeURL): use cacheableLookup * feat(queue-worker): add cacheablelookup * fix(cacheable-lookup): make it work with tailscale on local * add devenv * try again * allow querying all * log * fixes * asd * fix: * fix(lookup): * lookup
This commit is contained in:
parent
d46ba95924
commit
bd9673e104
2
.github/workflows/test-server.yml
vendored
2
.github/workflows/test-server.yml
vendored
@ -35,6 +35,7 @@ env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }}
|
||||
USE_GO_MARKDOWN_PARSER: true
|
||||
SENTRY_ENVIRONMENT: dev
|
||||
|
||||
jobs:
|
||||
test:
|
||||
@ -53,6 +54,7 @@ jobs:
|
||||
oauth-client-id: ${{ secrets.TS_OAUTH_CLIENT_ID }}
|
||||
oauth-secret: ${{ secrets.TS_OAUTH_SECRET }}
|
||||
tags: tag:ci
|
||||
use-cache: 'true'
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
|
@ -18,7 +18,6 @@ import { logger } from "./lib/logger";
|
||||
import { adminRouter } from "./routes/admin";
|
||||
import http from "node:http";
|
||||
import https from "node:https";
|
||||
import CacheableLookup from "cacheable-lookup";
|
||||
import { v1Router } from "./routes/v1";
|
||||
import expressWs from "express-ws";
|
||||
import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types";
|
||||
@ -26,6 +25,7 @@ import { ZodError } from "zod";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { RateLimiterMode } from "./types";
|
||||
import { attachWsProxy } from "./services/agentLivecastWS";
|
||||
import { cacheableLookup } from "./scraper/scrapeURL/lib/cacheableLookup";
|
||||
|
||||
const { createBullBoard } = require("@bull-board/api");
|
||||
const { BullAdapter } = require("@bull-board/api/bullAdapter");
|
||||
@ -34,11 +34,9 @@ const { ExpressAdapter } = require("@bull-board/express");
|
||||
const numCPUs = process.env.ENV === "local" ? 2 : os.cpus().length;
|
||||
logger.info(`Number of CPUs: ${numCPUs} available`);
|
||||
|
||||
const cacheable = new CacheableLookup();
|
||||
|
||||
// Install cacheable lookup for all other requests
|
||||
cacheable.install(http.globalAgent);
|
||||
cacheable.install(https.globalAgent);
|
||||
cacheableLookup.install(http.globalAgent);
|
||||
cacheableLookup.install(https.globalAgent);
|
||||
|
||||
// Initialize Express with WebSocket support
|
||||
const expressApp = express();
|
||||
|
@ -2,6 +2,7 @@ import type { Socket } from "net";
|
||||
import type { TLSSocket } from "tls";
|
||||
import * as undici from "undici";
|
||||
import { Address6 } from "ip-address";
|
||||
import { cacheableLookup } from "../../lib/cacheableLookup";
|
||||
|
||||
export class InsecureConnectionError extends Error {
|
||||
constructor() {
|
||||
@ -46,7 +47,7 @@ export function makeSecureDispatcher(
|
||||
const agentOpts: undici.Agent.Options = {
|
||||
connect: {
|
||||
rejectUnauthorized: false, // bypass SSL failures -- this is fine
|
||||
// lookup: secureLookup,
|
||||
lookup: cacheableLookup.lookup,
|
||||
},
|
||||
maxRedirections: 5000,
|
||||
...options,
|
||||
|
4
apps/api/src/scraper/scrapeURL/lib/cacheableLookup.ts
Normal file
4
apps/api/src/scraper/scrapeURL/lib/cacheableLookup.ts
Normal file
@ -0,0 +1,4 @@
|
||||
import CacheableLookup from 'cacheable-lookup';
|
||||
import dns from 'dns';
|
||||
|
||||
export const cacheableLookup = (process.env.SENTRY_ENVIRONMENT === "dev" ? { lookup: dns.lookup, install: () => {} } : new CacheableLookup({}));
|
@ -5,6 +5,7 @@ import { MockState, saveMock } from "./mock";
|
||||
import { TimeoutSignal } from "../../../controllers/v1/types";
|
||||
import { fireEngineURL } from "../engines/fire-engine/scrape";
|
||||
import { fetch, RequestInit, Response, FormData, Agent } from "undici";
|
||||
import { cacheableLookup } from "./cacheableLookup";
|
||||
|
||||
export type RobustFetchParams<Schema extends z.Schema<any>> = {
|
||||
url: string;
|
||||
@ -82,6 +83,9 @@ export async function robustFetch<
|
||||
dispatcher: new Agent({
|
||||
headersTimeout: 0,
|
||||
bodyTimeout: 0,
|
||||
connect: {
|
||||
lookup: cacheableLookup.lookup,
|
||||
},
|
||||
}),
|
||||
...(body instanceof FormData
|
||||
? {
|
||||
|
@ -82,6 +82,9 @@ import { performExtraction_F0 } from "../lib/extract/fire-0/extraction-service-f
|
||||
import { CostTracking } from "../lib/extract/extraction-service";
|
||||
import { getACUCTeam } from "../controllers/auth";
|
||||
import Express from "express";
|
||||
import http from "http";
|
||||
import https from "https";
|
||||
import { cacheableLookup } from "../scraper/scrapeURL/lib/cacheableLookup";
|
||||
|
||||
configDotenv();
|
||||
|
||||
@ -109,6 +112,10 @@ const gotJobInterval = Number(process.env.CONNECTION_MONITOR_INTERVAL) || 20;
|
||||
|
||||
const runningJobs: Set<string> = new Set();
|
||||
|
||||
// Install cacheable lookup for all other requests
|
||||
cacheableLookup.install(http.globalAgent);
|
||||
cacheableLookup.install(https.globalAgent);
|
||||
|
||||
async function finishCrawlIfNeeded(job: Job & { id: string }, sc: StoredCrawl) {
|
||||
const logger = _logger.child({
|
||||
module: "queue-worker",
|
||||
|
Loading…
x
Reference in New Issue
Block a user