diff --git a/apps/api/package.json b/apps/api/package.json
index 670dfc7a..059659c0 100644
--- a/apps/api/package.json
+++ b/apps/api/package.json
@@ -15,6 +15,7 @@
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
"test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'",
"test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_full_withAuth)'",
+ "test:snips": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false src/__tests__/snips/*.test.ts",
"workers": "nodemon --exec ts-node src/services/queue-worker.ts",
"worker:production": "node dist/src/services/queue-worker.js",
"mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest",
@@ -37,6 +38,7 @@
"@types/jest": "^29.5.12",
"@types/node": "^20.14.1",
"@types/pdf-parse": "^1.1.4",
+ "@types/supertest": "^6.0.2",
"body-parser": "^1.20.1",
"express": "^4.18.2",
"jest": "^29.6.3",
diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml
index 082a200f..dbf4f35a 100644
--- a/apps/api/pnpm-lock.yaml
+++ b/apps/api/pnpm-lock.yaml
@@ -267,6 +267,9 @@ importers:
'@types/pdf-parse':
specifier: ^1.1.4
version: 1.1.4
+ '@types/supertest':
+ specifier: ^6.0.2
+ version: 6.0.2
body-parser:
specifier: ^1.20.1
version: 1.20.2
@@ -1516,6 +1519,9 @@ packages:
'@types/connect@3.4.38':
resolution: {integrity: sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==}
+ '@types/cookiejar@2.1.5':
+ resolution: {integrity: sha512-he+DHOWReW0nghN24E1WUqM0efK4kI9oTqDm6XmK8ZPe2djZ90BSNdGnIyCLzCPw7/pogPlGbzI2wHGGmi4O/Q==}
+
'@types/cors@2.8.17':
resolution: {integrity: sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA==}
@@ -1549,6 +1555,9 @@ packages:
'@types/jest@29.5.12':
resolution: {integrity: sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==}
+ '@types/methods@1.1.4':
+ resolution: {integrity: sha512-ymXWVrDiCxTBE3+RIrrP533E70eA+9qu7zdWoHuOmGujkYtzf4HQF96b8nwHLqhuf4ykX61IGRIB38CC6/sImQ==}
+
'@types/mime@1.3.5':
resolution: {integrity: sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w==}
@@ -1597,6 +1606,12 @@ packages:
'@types/stack-utils@2.0.3':
resolution: {integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==}
+ '@types/superagent@8.1.9':
+ resolution: {integrity: sha512-pTVjI73witn+9ILmoJdajHGW2jkSaOzhiFYF1Rd3EQ94kymLqB9PjD9ISg7WaALC7+dCHT0FGe9T2LktLq/3GQ==}
+
+ '@types/supertest@6.0.2':
+ resolution: {integrity: sha512-137ypx2lk/wTQbW6An6safu9hXmajAifU/s7szAHLN/FeIm5w7yR0Wkl9fdJMRSHwOn4HLAI0DaB2TOORuhPDg==}
+
'@types/triple-beam@1.3.5':
resolution: {integrity: sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw==}
@@ -6566,6 +6581,8 @@ snapshots:
dependencies:
'@types/node': 20.14.1
+ '@types/cookiejar@2.1.5': {}
+
'@types/cors@2.8.17':
dependencies:
'@types/node': 20.14.1
@@ -6613,6 +6630,8 @@ snapshots:
expect: 29.7.0
pretty-format: 29.7.0
+ '@types/methods@1.1.4': {}
+
'@types/mime@1.3.5': {}
'@types/mysql@2.15.22':
@@ -6667,6 +6686,18 @@ snapshots:
'@types/stack-utils@2.0.3': {}
+ '@types/superagent@8.1.9':
+ dependencies:
+ '@types/cookiejar': 2.1.5
+ '@types/methods': 1.1.4
+ '@types/node': 20.14.1
+ form-data: 4.0.0
+
+ '@types/supertest@6.0.2':
+ dependencies:
+ '@types/methods': 1.1.4
+ '@types/superagent': 8.1.9
+
'@types/triple-beam@1.3.5': {}
'@types/uuid@9.0.8': {}
diff --git a/apps/api/src/__tests__/snips/mocks/mocking-works-properly.json b/apps/api/src/__tests__/snips/mocks/mocking-works-properly.json
new file mode 100644
index 00000000..5609e6c2
--- /dev/null
+++ b/apps/api/src/__tests__/snips/mocks/mocking-works-properly.json
@@ -0,0 +1,107 @@
+[
+ {
+ "time": 1735911273239,
+ "options": {
+ "url": "http://default-fire-engine-api-service:8080/scrape",
+ "method": "POST",
+ "body": {
+ "url": "http://firecrawl.dev",
+ "engine": "chrome-cdp",
+ "instantReturn": true,
+ "skipTlsVerification": false,
+ "priority": 10,
+ "mobile": false,
+ "timeout": 15000
+ },
+ "headers": {},
+ "ignoreResponse": false,
+ "ignoreFailure": false,
+ "tryCount": 3
+ },
+ "result": {
+ "status": 200,
+ "headers": {},
+ "body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp\",\"processing\":true}"
+ }
+ },
+ {
+ "time": 1735911273354,
+ "options": {
+ "url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
+ "method": "GET",
+ "headers": {},
+ "ignoreResponse": false,
+ "ignoreFailure": false,
+ "tryCount": 1
+ },
+ "result": {
+ "status": 200,
+ "headers": {},
+ "body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35\",\"state\":\"prioritized\",\"processing\":true}"
+ }
+ },
+ {
+ "time": 1735911273720,
+ "options": {
+ "url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
+ "method": "GET",
+ "headers": {},
+ "ignoreResponse": false,
+ "ignoreFailure": false,
+ "tryCount": 1
+ },
+ "result": {
+ "status": 200,
+ "headers": {},
+ "body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35\",\"state\":\"active\",\"processing\":true}"
+ }
+ },
+ {
+ "time": 1735911274092,
+ "options": {
+ "url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
+ "method": "GET",
+ "headers": {},
+ "ignoreResponse": false,
+ "ignoreFailure": false,
+ "tryCount": 1
+ },
+ "result": {
+ "status": 200,
+ "headers": {},
+ "body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35\",\"state\":\"active\",\"processing\":true}"
+ }
+ },
+ {
+ "time": 1735911274467,
+ "options": {
+ "url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
+ "method": "GET",
+ "headers": {},
+ "ignoreResponse": false,
+ "ignoreFailure": false,
+ "tryCount": 1
+ },
+ "result": {
+ "status": 200,
+ "headers": {},
+ "body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35\",\"state\":\"active\",\"processing\":true}"
+ }
+ },
+ {
+ "time": 1735911274947,
+ "options": {
+ "url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
+ "method": "GET",
+ "headers": {},
+ "ignoreResponse": false,
+ "ignoreFailure": false,
+ "tryCount": 1
+ },
+ "result": {
+ "status": 200,
+ "headers": {},
+ "body": "{\"jobId\":\"ede37286-90db-4f60-8efb-76217dfcfa35\",\"state\":\"completed\",\"processing\":false,\"timeTaken\":1.204,\"content\":\"
this is fake data coming from the mocking system!
\",\"url\":\"https://www.firecrawl.dev/\",\"screenshots\":[],\"actionContent\":[],\"pageStatusCode\":200,\"responseHeaders\":{\"X-DNS-Prefetch-Control\":\"off\",\"age\":\"0\",\"cache-control\":\"private, no-cache, no-store, max-age=0, must-revalidate\",\"content-encoding\":\"br\",\"content-type\":\"text/html; charset=utf-8\",\"date\":\"Fri, 03 Jan 2025 13:34:34 GMT\",\"link\":\"; rel=preload; as=\\\"font\\\"; crossorigin=\\\"\\\"; type=\\\"font/woff2\\\", ; rel=preload; as=\\\"font\\\"; crossorigin=\\\"\\\"; type=\\\"font/woff2\\\", ; rel=preload; as=\\\"font\\\"; crossorigin=\\\"\\\"; type=\\\"font/woff2\\\"\",\"permissions-policy\":\"keyboard-map=(), attribution-reporting=(), run-ad-auction=(), private-state-token-redemption=(), private-state-token-issuance=(), join-ad-interest-group=(), idle-detection=(), compute-pressure=(), browsing-topics=()\",\"server\":\"Vercel\",\"strict-transport-security\":\"max-age=63072000\",\"vary\":\"RSC, Next-Router-State-Tree, Next-Router-Prefetch\",\"x-matched-path\":\"/\",\"x-powered-by\":\"Next.js\",\"x-vercel-cache\":\"MISS\",\"x-vercel-id\":\"iad1::iad1::bs88l-1735911273932-1f7bba7a8b45\"},\"invalidTlsCert\":false,\"file\":null}"
+ }
+ }
+]
\ No newline at end of file
diff --git a/apps/api/src/__tests__/snips/scrape.test.ts b/apps/api/src/__tests__/snips/scrape.test.ts
new file mode 100644
index 00000000..c337f4f8
--- /dev/null
+++ b/apps/api/src/__tests__/snips/scrape.test.ts
@@ -0,0 +1,36 @@
+import request from "supertest";
+import { configDotenv } from "dotenv";
+import { ScrapeRequestInput } from "../../controllers/v1/types";
+
+configDotenv();
+const TEST_URL = "http://127.0.0.1:3002";
+
+async function scrape(body: ScrapeRequestInput) {
+ return await request(TEST_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(body);
+}
+
+function expectScrapeToSucceed(response: Awaited>) {
+ expect(response.statusCode).toBe(200);
+ expect(response.body.success).toBe(true);
+ expect(typeof response.body.data).toBe("object");
+}
+
+describe("Scrape tests", () => {
+ it("mocking works properly", async () => {
+ // depends on falsified mock mocking-works-properly
+ // this test will fail if mock is bypassed with real data -- firecrawl.dev will never have
+ // that as its actual markdown output
+
+ const response = await scrape({
+ url: "http://firecrawl.dev",
+ useMock: "mocking-works-properly",
+ });
+
+ expectScrapeToSucceed(response);
+ expect(response.body.data.markdown).toBe("this is fake data coming from the mocking system!");
+ });
+});
\ No newline at end of file
diff --git a/apps/api/src/__tests__/snips/utils/collect-mocks.js b/apps/api/src/__tests__/snips/utils/collect-mocks.js
new file mode 100644
index 00000000..3b879136
--- /dev/null
+++ b/apps/api/src/__tests__/snips/utils/collect-mocks.js
@@ -0,0 +1,12 @@
+const path = require("path");
+const fs = require("fs");
+
+const mocksDirPath = path.join(__dirname, "../../../scraper/scrapeURL/mocks");
+const files = fs.readdirSync(mocksDirPath);
+
+const contents = files.map(x => JSON.parse(fs.readFileSync(path.join(mocksDirPath, x), "utf8")));
+
+fs.writeFileSync(
+ path.join(__dirname, "../mocks/" + process.argv[2] + ".json"),
+ JSON.stringify(contents, undefined, 4),
+);
\ No newline at end of file
diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts
index 7aebd560..1160d871 100644
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@@ -181,6 +181,7 @@ export const scrapeOptions = z
skipTlsVerification: z.boolean().default(false),
removeBase64Images: z.boolean().default(true),
fastMode: z.boolean().default(false),
+ useMock: z.string().optional(),
})
.strict(strictMessage);
diff --git a/apps/api/src/scraper/scrapeURL/.gitignore b/apps/api/src/scraper/scrapeURL/.gitignore
new file mode 100644
index 00000000..3307228f
--- /dev/null
+++ b/apps/api/src/scraper/scrapeURL/.gitignore
@@ -0,0 +1 @@
+/mocks
\ No newline at end of file
diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts
index 8b7b86fb..b3af6103 100644
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts
@@ -3,12 +3,8 @@ import * as Sentry from "@sentry/node";
import { z } from "zod";
import { robustFetch } from "../../lib/fetch";
-import {
- ActionError,
- EngineError,
- SiteError,
- UnsupportedFileError,
-} from "../../error";
+import { ActionError, EngineError, SiteError, UnsupportedFileError } from "../../error";
+import { MockState } from "../../lib/mock";
const successSchema = z.object({
jobId: z.string(),
@@ -82,6 +78,7 @@ export class StillProcessingError extends Error {
export async function fireEngineCheckStatus(
logger: Logger,
jobId: string,
+ mock: MockState | null,
): Promise {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
@@ -105,6 +102,7 @@ export async function fireEngineCheckStatus(
}
: {}),
},
+ mock,
});
},
);
diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/delete.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/delete.ts
index d5fe58cb..1d4464d9 100644
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/delete.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/delete.ts
@@ -2,8 +2,9 @@ import { Logger } from "winston";
import * as Sentry from "@sentry/node";
import { robustFetch } from "../../lib/fetch";
+import { MockState } from "../../lib/mock";
-export async function fireEngineDelete(logger: Logger, jobId: string) {
+export async function fireEngineDelete(logger: Logger, jobId: string, mock: MockState | null) {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
await Sentry.startSpan(
@@ -28,6 +29,7 @@ export async function fireEngineDelete(logger: Logger, jobId: string) {
ignoreResponse: true,
ignoreFailure: true,
logger: logger.child({ method: "fireEngineDelete/robustFetch", jobId }),
+ mock,
});
},
);
diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
index 43da361c..62a50f60 100644
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts
@@ -24,6 +24,7 @@ import * as Sentry from "@sentry/node";
import { Action } from "../../../../lib/entities";
import { specialtyScrapeCheck } from "../utils/specialtyHandler";
import { fireEngineDelete } from "./delete";
+import { MockState, saveMock } from "../../lib/mock";
// This function does not take `Meta` on purpose. It may not access any
// meta values to construct the request -- that must be done by the
@@ -37,10 +38,12 @@ async function performFireEngineScrape<
logger: Logger,
request: FireEngineScrapeRequestCommon & Engine,
timeout: number,
+ mock: MockState | null,
): Promise {
const scrape = await fireEngineScrape(
logger.child({ method: "fireEngineScrape" }),
request,
+ mock,
);
const startTime = Date.now();
@@ -57,6 +60,7 @@ async function performFireEngineScrape<
afterErrors: errors,
}),
scrape.jobId,
+ mock,
);
throw new Error("Error limit hit. See e.cause.errors for errors.", {
cause: { errors },
@@ -78,6 +82,7 @@ async function performFireEngineScrape<
status = await fireEngineCheckStatus(
logger.child({ method: "fireEngineCheckStatus" }),
scrape.jobId,
+ mock,
);
} catch (error) {
if (error instanceof StillProcessingError) {
@@ -94,6 +99,7 @@ async function performFireEngineScrape<
afterError: error,
}),
scrape.jobId,
+ mock,
);
logger.debug("Fire-engine scrape job failed.", {
error,
@@ -131,6 +137,7 @@ async function performFireEngineScrape<
method: "performFireEngineScrape/fireEngineDelete",
}),
scrape.jobId,
+ mock,
);
return status;
@@ -200,6 +207,7 @@ export async function scrapeURLWithFireEngineChromeCDP(
}),
request,
timeout,
+ meta.mock,
);
if (
@@ -274,6 +282,7 @@ export async function scrapeURLWithFireEnginePlaywright(
}),
request,
timeout,
+ meta.mock,
);
if (!response.url) {
@@ -327,6 +336,7 @@ export async function scrapeURLWithFireEngineTLSClient(
}),
request,
timeout,
+ meta.mock,
);
if (!response.url) {
diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts
index de6ac3f4..4248024a 100644
--- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/scrape.ts
@@ -4,6 +4,7 @@ import { z } from "zod";
import { Action } from "../../../../lib/entities";
import { robustFetch } from "../../lib/fetch";
+import { MockState } from "../../lib/mock";
export type FireEngineScrapeRequestCommon = {
url: string;
@@ -69,6 +70,7 @@ export async function fireEngineScrape<
>(
logger: Logger,
request: FireEngineScrapeRequestCommon & Engine,
+ mock: MockState | null,
): Promise> {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
@@ -97,6 +99,7 @@ export async function fireEngineScrape<
logger: logger.child({ method: "fireEngineScrape/robustFetch" }),
schema,
tryCount: 3,
+ mock,
});
},
);
diff --git a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
index 101c9a53..a3678615 100644
--- a/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/pdf/index.ts
@@ -46,6 +46,7 @@ async function scrapePDFWithRunPodMU(
markdown: z.string(),
}),
}),
+ mock: meta.mock,
});
return {
diff --git a/apps/api/src/scraper/scrapeURL/engines/playwright/index.ts b/apps/api/src/scraper/scrapeURL/engines/playwright/index.ts
index edcd50c0..123a1c68 100644
--- a/apps/api/src/scraper/scrapeURL/engines/playwright/index.ts
+++ b/apps/api/src/scraper/scrapeURL/engines/playwright/index.ts
@@ -29,6 +29,7 @@ export async function scrapeURLWithPlaywright(
pageStatusCode: z.number(),
pageError: z.string().optional(),
}),
+ mock: meta.mock,
}),
(async () => {
await new Promise((resolve) => setTimeout(() => resolve(null), timeout));
diff --git a/apps/api/src/scraper/scrapeURL/index.ts b/apps/api/src/scraper/scrapeURL/index.ts
index 26577675..3df5020d 100644
--- a/apps/api/src/scraper/scrapeURL/index.ts
+++ b/apps/api/src/scraper/scrapeURL/index.ts
@@ -2,7 +2,7 @@ import { Logger } from "winston";
import * as Sentry from "@sentry/node";
import { Document, ScrapeOptions } from "../../controllers/v1/types";
-import { logger } from "../../lib/logger";
+import { logger as _logger } from "../../lib/logger";
import {
buildFallbackList,
Engine,
@@ -24,6 +24,7 @@ import {
import { executeTransformers } from "./transformers";
import { LLMRefusalError } from "./transformers/llmExtract";
import { urlSpecificParams } from "./lib/urlSpecificParams";
+import { loadMock, MockState } from "./lib/mock";
export type ScrapeUrlResponse = (
| {
@@ -47,6 +48,7 @@ export type Meta = {
logger: Logger;
logs: any[];
featureFlags: Set;
+ mock: MockState | null;
};
function buildFeatureFlags(
@@ -110,12 +112,12 @@ function buildFeatureFlags(
// The meta object is usually immutable, except for the logs array, and in edge cases (e.g. a new feature is suddenly required)
// Having a meta object that is treated as immutable helps the code stay clean and easily tracable,
// while also retaining the benefits that WebScraper had from its OOP design.
-function buildMetaObject(
+async function buildMetaObject(
id: string,
url: string,
options: ScrapeOptions,
internalOptions: InternalOptions,
-): Meta {
+): Promise {
const specParams =
urlSpecificParams[new URL(url).hostname.replace(/^www\./, "")];
if (specParams !== undefined) {
@@ -126,7 +128,7 @@ function buildMetaObject(
);
}
- const _logger = logger.child({
+ const logger = _logger.child({
module: "ScrapeURL",
scrapeId: id,
scrapeURL: url,
@@ -138,9 +140,10 @@ function buildMetaObject(
url,
options,
internalOptions,
- logger: _logger,
+ logger,
logs,
featureFlags: buildFeatureFlags(url, options, internalOptions),
+ mock: options.useMock !== undefined ? await loadMock(options.useMock, _logger) : null,
};
}
@@ -299,7 +302,7 @@ async function scrapeURLLoop(meta: Meta): Promise {
throw error;
} else {
Sentry.captureException(error);
- meta.logger.info(
+ meta.logger.warn(
"An unexpected error happened while scraping with " + engine + ".",
{ error },
);
@@ -362,7 +365,7 @@ export async function scrapeURL(
options: ScrapeOptions,
internalOptions: InternalOptions = {},
): Promise {
- const meta = buildMetaObject(id, url, options, internalOptions);
+ const meta = await buildMetaObject(id, url, options, internalOptions);
try {
while (true) {
try {
diff --git a/apps/api/src/scraper/scrapeURL/lib/fetch.ts b/apps/api/src/scraper/scrapeURL/lib/fetch.ts
index 897587a9..56b91687 100644
--- a/apps/api/src/scraper/scrapeURL/lib/fetch.ts
+++ b/apps/api/src/scraper/scrapeURL/lib/fetch.ts
@@ -1,7 +1,7 @@
import { Logger } from "winston";
import { z, ZodError } from "zod";
-import { v4 as uuid } from "uuid";
import * as Sentry from "@sentry/node";
+import { MockState, saveMock } from "./mock";
export type RobustFetchParams> = {
url: string;
@@ -16,6 +16,7 @@ export type RobustFetchParams> = {
requestId?: string;
tryCount?: number;
tryCooldown?: number;
+ mock: MockState | null;
};
export async function robustFetch<
@@ -30,9 +31,10 @@ export async function robustFetch<
schema,
ignoreResponse = false,
ignoreFailure = false,
- requestId = uuid(),
+ requestId = crypto.randomUUID(),
tryCount = 1,
tryCooldown,
+ mock
}: RobustFetchParams): Promise