feat: new snips test framework (FIR-414) (#1033)

* feat: new snips test framework

* Update mock.ts

---------

Co-authored-by: Nicolas <nicolascamara29@gmail.com>
This commit is contained in:
Gergő Móricz 2025-01-13 20:50:47 +01:00 committed by GitHub
parent 9a13c1dede
commit 5c62bb1195
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 393 additions and 86 deletions

View File

@ -15,6 +15,7 @@
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
"test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'",
"test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_full_withAuth)'",
"test:snips": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false src/__tests__/snips/*.test.ts",
"workers": "nodemon --exec ts-node src/services/queue-worker.ts",
"worker:production": "node dist/src/services/queue-worker.js",
"mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest",
@ -37,6 +38,7 @@
"@types/jest": "^29.5.12",
"@types/node": "^20.14.1",
"@types/pdf-parse": "^1.1.4",
"@types/supertest": "^6.0.2",
"body-parser": "^1.20.1",
"express": "^4.18.2",
"jest": "^29.6.3",

View File

@ -267,6 +267,9 @@ importers:
'@types/pdf-parse':
specifier: ^1.1.4
version: 1.1.4
'@types/supertest':
specifier: ^6.0.2
version: 6.0.2
body-parser:
specifier: ^1.20.1
version: 1.20.2
@ -1516,6 +1519,9 @@ packages:
'@types/connect@3.4.38':
resolution: {integrity: sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==}
'@types/cookiejar@2.1.5':
resolution: {integrity: sha512-he+DHOWReW0nghN24E1WUqM0efK4kI9oTqDm6XmK8ZPe2djZ90BSNdGnIyCLzCPw7/pogPlGbzI2wHGGmi4O/Q==}
'@types/cors@2.8.17':
resolution: {integrity: sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA==}
@ -1549,6 +1555,9 @@ packages:
'@types/jest@29.5.12':
resolution: {integrity: sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==}
'@types/methods@1.1.4':
resolution: {integrity: sha512-ymXWVrDiCxTBE3+RIrrP533E70eA+9qu7zdWoHuOmGujkYtzf4HQF96b8nwHLqhuf4ykX61IGRIB38CC6/sImQ==}
'@types/mime@1.3.5':
resolution: {integrity: sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w==}
@ -1597,6 +1606,12 @@ packages:
'@types/stack-utils@2.0.3':
resolution: {integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==}
'@types/superagent@8.1.9':
resolution: {integrity: sha512-pTVjI73witn+9ILmoJdajHGW2jkSaOzhiFYF1Rd3EQ94kymLqB9PjD9ISg7WaALC7+dCHT0FGe9T2LktLq/3GQ==}
'@types/supertest@6.0.2':
resolution: {integrity: sha512-137ypx2lk/wTQbW6An6safu9hXmajAifU/s7szAHLN/FeIm5w7yR0Wkl9fdJMRSHwOn4HLAI0DaB2TOORuhPDg==}
'@types/triple-beam@1.3.5':
resolution: {integrity: sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw==}
@ -6566,6 +6581,8 @@ snapshots:
dependencies:
'@types/node': 20.14.1
'@types/cookiejar@2.1.5': {}
'@types/cors@2.8.17':
dependencies:
'@types/node': 20.14.1
@ -6613,6 +6630,8 @@ snapshots:
expect: 29.7.0
pretty-format: 29.7.0
'@types/methods@1.1.4': {}
'@types/mime@1.3.5': {}
'@types/mysql@2.15.22':
@ -6667,6 +6686,18 @@ snapshots:
'@types/stack-utils@2.0.3': {}
'@types/superagent@8.1.9':
dependencies:
'@types/cookiejar': 2.1.5
'@types/methods': 1.1.4
'@types/node': 20.14.1
form-data: 4.0.0
'@types/supertest@6.0.2':
dependencies:
'@types/methods': 1.1.4
'@types/superagent': 8.1.9
'@types/triple-beam@1.3.5': {}
'@types/uuid@9.0.8': {}

View File

@ -0,0 +1,107 @@
[
{
"time": 1735911273239,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape",
"method": "POST",
"body": {
"url": "http://firecrawl.dev",
"engine": "chrome-cdp",
"instantReturn": true,
"skipTlsVerification": false,
"priority": 10,
"mobile": false,
"timeout": 15000
},
"headers": {},
"ignoreResponse": false,
"ignoreFailure": false,
"tryCount": 3
},
"result": {
"status": 200,
"headers": {},
"body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp\",\"processing\":true}"
}
},
{
"time": 1735911273354,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
"ignoreFailure": false,
"tryCount": 1
},
"result": {
"status": 200,
"headers": {},
"body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35\",\"state\":\"prioritized\",\"processing\":true}"
}
},
{
"time": 1735911273720,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
"ignoreFailure": false,
"tryCount": 1
},
"result": {
"status": 200,
"headers": {},
"body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35\",\"state\":\"active\",\"processing\":true}"
}
},
{
"time": 1735911274092,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
"ignoreFailure": false,
"tryCount": 1
},
"result": {
"status": 200,
"headers": {},
"body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35\",\"state\":\"active\",\"processing\":true}"
}
},
{
"time": 1735911274467,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
"ignoreFailure": false,
"tryCount": 1
},
"result": {
"status": 200,
"headers": {},
"body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35\",\"state\":\"active\",\"processing\":true}"
}
},
{
"time": 1735911274947,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
"ignoreFailure": false,
"tryCount": 1
},
"result": {
"status": 200,
"headers": {},
"body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35\",\"state\":\"completed\",\"processing\":false,\"timeTaken\":1.204,\"content\":\"<!DOCTYPE html><html lang=\\\"en\\\"><body><p>this is fake data coming from the mocking system!</p></body></html>\",\"url\":\"https://www.firecrawl.dev/\",\"screenshots\":[],\"actionContent\":[],\"pageStatusCode\":200,\"responseHeaders\":{\"X-DNS-Prefetch-Control\":\"off\",\"age\":\"0\",\"cache-control\":\"private, no-cache, no-store, max-age=0, must-revalidate\",\"content-encoding\":\"br\",\"content-type\":\"text/html; charset=utf-8\",\"date\":\"Fri, 03 Jan 2025 13:34:34 GMT\",\"link\":\"</_next/static/media/171883e03d2067b6-s.p.woff2>; rel=preload; as=\\\"font\\\"; crossorigin=\\\"\\\"; type=\\\"font/woff2\\\", </_next/static/media/a34f9d1faa5f3315-s.p.woff2>; rel=preload; as=\\\"font\\\"; crossorigin=\\\"\\\"; type=\\\"font/woff2\\\", </_next/static/media/c4c7b0ec92b72e30-s.p.woff2>; rel=preload; as=\\\"font\\\"; crossorigin=\\\"\\\"; type=\\\"font/woff2\\\"\",\"permissions-policy\":\"keyboard-map=(), attribution-reporting=(), run-ad-auction=(), private-state-token-redemption=(), private-state-token-issuance=(), join-ad-interest-group=(), idle-detection=(), compute-pressure=(), browsing-topics=()\",\"server\":\"Vercel\",\"strict-transport-security\":\"max-age=63072000\",\"vary\":\"RSC, Next-Router-State-Tree, Next-Router-Prefetch\",\"x-matched-path\":\"/\",\"x-powered-by\":\"Next.js\",\"x-vercel-cache\":\"MISS\",\"x-vercel-id\":\"iad1::iad1::bs88l-1735911273932-1f7bba7a8b45\"},\"invalidTlsCert\":false,\"file\":null}"
}
}
]

View File

@ -0,0 +1,36 @@
import request from "supertest";
import { configDotenv } from "dotenv";
import { ScrapeRequestInput } from "../../controllers/v1/types";
configDotenv();
const TEST_URL = "http://127.0.0.1:3002";
async function scrape(body: ScrapeRequestInput) {
return await request(TEST_URL)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send(body);
}
function expectScrapeToSucceed(response: Awaited<ReturnType<typeof scrape>>) {
expect(response.statusCode).toBe(200);
expect(response.body.success).toBe(true);
expect(typeof response.body.data).toBe("object");
}
describe("Scrape tests", () => {
it("mocking works properly", async () => {
// depends on falsified mock mocking-works-properly
// this test will fail if mock is bypassed with real data -- firecrawl.dev will never have
// that as its actual markdown output
const response = await scrape({
url: "http://firecrawl.dev",
useMock: "mocking-works-properly",
});
expectScrapeToSucceed(response);
expect(response.body.data.markdown).toBe("this is fake data coming from the mocking system!");
});
});

View File

@ -0,0 +1,12 @@
const path = require("path");
const fs = require("fs");
const mocksDirPath = path.join(__dirname, "../../../scraper/scrapeURL/mocks");
const files = fs.readdirSync(mocksDirPath);
const contents = files.map(x => JSON.parse(fs.readFileSync(path.join(mocksDirPath, x), "utf8")));
fs.writeFileSync(
path.join(__dirname, "../mocks/" + process.argv[2] + ".json"),
JSON.stringify(contents, undefined, 4),
);

View File

@ -181,6 +181,7 @@ export const scrapeOptions = z
skipTlsVerification: z.boolean().default(false),
removeBase64Images: z.boolean().default(true),
fastMode: z.boolean().default(false),
useMock: z.string().optional(),
})
.strict(strictMessage);

View File

@ -0,0 +1 @@
/mocks

View File

@ -3,12 +3,8 @@ import * as Sentry from "@sentry/node";
import { z } from "zod";
import { robustFetch } from "../../lib/fetch";
import {
ActionError,
EngineError,
SiteError,
UnsupportedFileError,
} from "../../error";
import { ActionError, EngineError, SiteError, UnsupportedFileError } from "../../error";
import { MockState } from "../../lib/mock";
const successSchema = z.object({
jobId: z.string(),
@ -82,6 +78,7 @@ export class StillProcessingError extends Error {
export async function fireEngineCheckStatus(
logger: Logger,
jobId: string,
mock: MockState | null,
): Promise<FireEngineCheckStatusSuccess> {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
@ -105,6 +102,7 @@ export async function fireEngineCheckStatus(
}
: {}),
},
mock,
});
},
);

View File

@ -2,8 +2,9 @@ import { Logger } from "winston";
import * as Sentry from "@sentry/node";
import { robustFetch } from "../../lib/fetch";
import { MockState } from "../../lib/mock";
export async function fireEngineDelete(logger: Logger, jobId: string) {
export async function fireEngineDelete(logger: Logger, jobId: string, mock: MockState | null) {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
await Sentry.startSpan(
@ -28,6 +29,7 @@ export async function fireEngineDelete(logger: Logger, jobId: string) {
ignoreResponse: true,
ignoreFailure: true,
logger: logger.child({ method: "fireEngineDelete/robustFetch", jobId }),
mock,
});
},
);

View File

@ -24,6 +24,7 @@ import * as Sentry from "@sentry/node";
import { Action } from "../../../../lib/entities";
import { specialtyScrapeCheck } from "../utils/specialtyHandler";
import { fireEngineDelete } from "./delete";
import { MockState, saveMock } from "../../lib/mock";
// This function does not take `Meta` on purpose. It may not access any
// meta values to construct the request -- that must be done by the
@ -37,10 +38,12 @@ async function performFireEngineScrape<
logger: Logger,
request: FireEngineScrapeRequestCommon & Engine,
timeout: number,
mock: MockState | null,
): Promise<FireEngineCheckStatusSuccess> {
const scrape = await fireEngineScrape(
logger.child({ method: "fireEngineScrape" }),
request,
mock,
);
const startTime = Date.now();
@ -57,6 +60,7 @@ async function performFireEngineScrape<
afterErrors: errors,
}),
scrape.jobId,
mock,
);
throw new Error("Error limit hit. See e.cause.errors for errors.", {
cause: { errors },
@ -78,6 +82,7 @@ async function performFireEngineScrape<
status = await fireEngineCheckStatus(
logger.child({ method: "fireEngineCheckStatus" }),
scrape.jobId,
mock,
);
} catch (error) {
if (error instanceof StillProcessingError) {
@ -94,6 +99,7 @@ async function performFireEngineScrape<
afterError: error,
}),
scrape.jobId,
mock,
);
logger.debug("Fire-engine scrape job failed.", {
error,
@ -131,6 +137,7 @@ async function performFireEngineScrape<
method: "performFireEngineScrape/fireEngineDelete",
}),
scrape.jobId,
mock,
);
return status;
@ -200,6 +207,7 @@ export async function scrapeURLWithFireEngineChromeCDP(
}),
request,
timeout,
meta.mock,
);
if (
@ -274,6 +282,7 @@ export async function scrapeURLWithFireEnginePlaywright(
}),
request,
timeout,
meta.mock,
);
if (!response.url) {
@ -327,6 +336,7 @@ export async function scrapeURLWithFireEngineTLSClient(
}),
request,
timeout,
meta.mock,
);
if (!response.url) {

View File

@ -4,6 +4,7 @@ import { z } from "zod";
import { Action } from "../../../../lib/entities";
import { robustFetch } from "../../lib/fetch";
import { MockState } from "../../lib/mock";
export type FireEngineScrapeRequestCommon = {
url: string;
@ -69,6 +70,7 @@ export async function fireEngineScrape<
>(
logger: Logger,
request: FireEngineScrapeRequestCommon & Engine,
mock: MockState | null,
): Promise<z.infer<typeof schema>> {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
@ -97,6 +99,7 @@ export async function fireEngineScrape<
logger: logger.child({ method: "fireEngineScrape/robustFetch" }),
schema,
tryCount: 3,
mock,
});
},
);

View File

@ -46,6 +46,7 @@ async function scrapePDFWithRunPodMU(
markdown: z.string(),
}),
}),
mock: meta.mock,
});
return {

View File

@ -29,6 +29,7 @@ export async function scrapeURLWithPlaywright(
pageStatusCode: z.number(),
pageError: z.string().optional(),
}),
mock: meta.mock,
}),
(async () => {
await new Promise((resolve) => setTimeout(() => resolve(null), timeout));

View File

@ -2,7 +2,7 @@ import { Logger } from "winston";
import * as Sentry from "@sentry/node";
import { Document, ScrapeOptions } from "../../controllers/v1/types";
import { logger } from "../../lib/logger";
import { logger as _logger } from "../../lib/logger";
import {
buildFallbackList,
Engine,
@ -24,6 +24,7 @@ import {
import { executeTransformers } from "./transformers";
import { LLMRefusalError } from "./transformers/llmExtract";
import { urlSpecificParams } from "./lib/urlSpecificParams";
import { loadMock, MockState } from "./lib/mock";
export type ScrapeUrlResponse = (
| {
@ -47,6 +48,7 @@ export type Meta = {
logger: Logger;
logs: any[];
featureFlags: Set<FeatureFlag>;
mock: MockState | null;
};
function buildFeatureFlags(
@ -110,12 +112,12 @@ function buildFeatureFlags(
// The meta object is usually immutable, except for the logs array, and in edge cases (e.g. a new feature is suddenly required)
// Having a meta object that is treated as immutable helps the code stay clean and easily tracable,
// while also retaining the benefits that WebScraper had from its OOP design.
function buildMetaObject(
async function buildMetaObject(
id: string,
url: string,
options: ScrapeOptions,
internalOptions: InternalOptions,
): Meta {
): Promise<Meta> {
const specParams =
urlSpecificParams[new URL(url).hostname.replace(/^www\./, "")];
if (specParams !== undefined) {
@ -126,7 +128,7 @@ function buildMetaObject(
);
}
const _logger = logger.child({
const logger = _logger.child({
module: "ScrapeURL",
scrapeId: id,
scrapeURL: url,
@ -138,9 +140,10 @@ function buildMetaObject(
url,
options,
internalOptions,
logger: _logger,
logger,
logs,
featureFlags: buildFeatureFlags(url, options, internalOptions),
mock: options.useMock !== undefined ? await loadMock(options.useMock, _logger) : null,
};
}
@ -299,7 +302,7 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
throw error;
} else {
Sentry.captureException(error);
meta.logger.info(
meta.logger.warn(
"An unexpected error happened while scraping with " + engine + ".",
{ error },
);
@ -362,7 +365,7 @@ export async function scrapeURL(
options: ScrapeOptions,
internalOptions: InternalOptions = {},
): Promise<ScrapeUrlResponse> {
const meta = buildMetaObject(id, url, options, internalOptions);
const meta = await buildMetaObject(id, url, options, internalOptions);
try {
while (true) {
try {

View File

@ -1,7 +1,7 @@
import { Logger } from "winston";
import { z, ZodError } from "zod";
import { v4 as uuid } from "uuid";
import * as Sentry from "@sentry/node";
import { MockState, saveMock } from "./mock";
export type RobustFetchParams<Schema extends z.Schema<any>> = {
url: string;
@ -16,6 +16,7 @@ export type RobustFetchParams<Schema extends z.Schema<any>> = {
requestId?: string;
tryCount?: number;
tryCooldown?: number;
mock: MockState | null;
};
export async function robustFetch<
@ -30,9 +31,10 @@ export async function robustFetch<
schema,
ignoreResponse = false,
ignoreFailure = false,
requestId = uuid(),
requestId = crypto.randomUUID(),
tryCount = 1,
tryCooldown,
mock
}: RobustFetchParams<Schema>): Promise<Output> {
const params = {
url,
@ -47,73 +49,108 @@ export async function robustFetch<
tryCooldown,
};
let request: Response;
try {
request = await fetch(url, {
method,
headers: {
...(body instanceof FormData
? {}
: body !== undefined
? {
"Content-Type": "application/json",
}
: {}),
...(headers !== undefined ? headers : {}),
},
...(body instanceof FormData
? {
body,
}
: body !== undefined
? {
body: JSON.stringify(body),
}
: {}),
});
} catch (error) {
if (!ignoreFailure) {
Sentry.captureException(error);
if (tryCount > 1) {
logger.debug(
"Request failed, trying " + (tryCount - 1) + " more times",
{ params, error, requestId },
);
return await robustFetch({
...params,
requestId,
tryCount: tryCount - 1,
});
} else {
logger.debug("Request failed", { params, error, requestId });
throw new Error("Request failed", {
cause: {
params,
requestId,
error,
},
});
}
} else {
return null as Output;
}
}
if (ignoreResponse === true) {
return null as Output;
}
const response = {
status: request.status,
headers: request.headers,
body: await request.text(), // NOTE: can this throw an exception?
let response: {
status: number;
headers: Headers,
body: string,
};
if (request.status >= 300) {
if (mock === null) {
let request: Response;
try {
request = await fetch(url, {
method,
headers: {
...(body instanceof FormData
? {}
: body !== undefined
? {
"Content-Type": "application/json",
}
: {}),
...(headers !== undefined ? headers : {}),
},
...(body instanceof FormData
? {
body,
}
: body !== undefined
? {
body: JSON.stringify(body),
}
: {}),
});
} catch (error) {
if (!ignoreFailure) {
Sentry.captureException(error);
if (tryCount > 1) {
logger.debug(
"Request failed, trying " + (tryCount - 1) + " more times",
{ params, error, requestId },
);
return await robustFetch({
...params,
requestId,
tryCount: tryCount - 1,
mock,
});
} else {
logger.debug("Request failed", { params, error, requestId });
throw new Error("Request failed", {
cause: {
params,
requestId,
error,
},
});
}
} else {
return null as Output;
}
}
if (ignoreResponse === true) {
return null as Output;
}
response = {
status: request.status,
headers: request.headers,
body: await request.text(), // NOTE: can this throw an exception?
};
} else {
if (ignoreResponse === true) {
return null as Output;
}
const makeRequestTypeId = (request: typeof mock["requests"][number]["options"]) => {
let out = request.url + ";" + request.method;
if (process.env.FIRE_ENGINE_BETA_URL && url.startsWith(process.env.FIRE_ENGINE_BETA_URL) && request.method === "POST") {
out += "f-e;" + request.body?.engine + ";" + request.body?.url;
}
return out;
}
const thisId = makeRequestTypeId(params);
const matchingMocks = mock.requests.filter(x => makeRequestTypeId(x.options) === thisId).sort((a,b) => a.time - b.time);
const nextI = mock.tracker[thisId] ?? 0;
mock.tracker[thisId] = nextI + 1;
if (!matchingMocks[nextI]) {
throw new Error("Failed to mock request -- no mock targets found.");
}
response = {
...(matchingMocks[nextI].result),
headers: new Headers(matchingMocks[nextI].result.headers),
};
}
if (response.status >= 300) {
if (tryCount > 1) {
logger.debug(
"Request sent failure status, trying " + (tryCount - 1) + " more times",
{ params, request, response, requestId },
{ params, response, requestId },
);
if (tryCooldown !== undefined) {
await new Promise((resolve) =>
@ -124,18 +161,17 @@ export async function robustFetch<
...params,
requestId,
tryCount: tryCount - 1,
mock,
});
} else {
logger.debug("Request sent failure status", {
params,
request,
response,
requestId,
});
throw new Error("Request sent failure status", {
cause: {
params,
request,
response,
requestId,
},
@ -143,20 +179,27 @@ export async function robustFetch<
}
}
if (mock === null) {
await saveMock({
...params,
logger: undefined,
schema: undefined,
headers: undefined,
}, response);
}
let data: Output;
try {
data = JSON.parse(response.body);
} catch (error) {
logger.debug("Request sent malformed JSON", {
params,
request,
response,
requestId,
});
throw new Error("Request sent malformed JSON", {
cause: {
params,
request,
response,
requestId,
},
@ -170,7 +213,6 @@ export async function robustFetch<
if (error instanceof ZodError) {
logger.debug("Response does not match provided schema", {
params,
request,
response,
requestId,
error,
@ -179,7 +221,6 @@ export async function robustFetch<
throw new Error("Response does not match provided schema", {
cause: {
params,
request,
response,
requestId,
error,
@ -189,7 +230,6 @@ export async function robustFetch<
} else {
logger.debug("Parsing response with provided schema failed", {
params,
request,
response,
requestId,
error,
@ -198,7 +238,6 @@ export async function robustFetch<
throw new Error("Parsing response with provided schema failed", {
cause: {
params,
request,
response,
requestId,
error,

View File

@ -0,0 +1,60 @@
import * as fs from "fs/promises";
import * as path from "path";
import { logger as _logger } from "../../../lib/logger";
import { Logger } from "winston";
const saveMocksDirPath = path.join(__dirname, "../mocks/").replace("dist/", "");
const loadMocksDirPath = path.join(__dirname, "../../../__tests__/snips/mocks");
export async function saveMock(options: unknown, result: unknown) {
if (!process.env.FIRECRAWL_SAVE_MOCKS) return;
await fs.mkdir(saveMocksDirPath, { recursive: true });
const fileName = Date.now() + "-" + crypto.randomUUID() + ".json";
const filePath = path.join(saveMocksDirPath, fileName);
console.log(filePath);
await fs.writeFile(filePath, JSON.stringify({
time: Date.now(),
options,
result,
}, undefined, 4));
}
export type MockState = {
requests: {
time: number,
options: {
url: string,
method: string,
body?: any,
ignoreResponse: boolean,
ignoreFailure: boolean,
tryCount: number,
tryCooldown?: number,
},
result: any,
}[],
tracker: Record<string, number>,
}
export async function loadMock(name: string, logger: Logger = _logger): Promise<MockState | null> {
try {
const mockPath = path.join(loadMocksDirPath, name + ".json");
const relative = path.relative(loadMocksDirPath, mockPath);
if (!relative || relative.startsWith("..") || path.isAbsolute(relative)) {
// directory moving
return null;
}
const load = JSON.parse(await fs.readFile(mockPath, "utf8"));
return {
requests: load,
tracker: {},
};
} catch (error) {
logger.warn("Failed to load mock file!", { name, module: "scrapeURL:mock", method: "loadMock", error });
return null;
}
}