fix(scrapeURL/fire-engine): default to separate US-generic proxy list if no location is specified (FIR-728) (#1104)

* feat(location/country): default to us-generic

* add tests + fix mock
This commit is contained in:
Gergő Móricz 2025-01-29 08:23:36 +01:00 committed by GitHub
parent 5c1b67511c
commit 5733b82e9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 36 additions and 12 deletions

View File

@ -2,7 +2,7 @@
{
"time": 1735911273239,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape",
"url": "<fire-engine>/scrape",
"method": "POST",
"body": {
"url": "http://firecrawl.dev",
@ -27,7 +27,7 @@
{
"time": 1735911273354,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
@ -43,7 +43,7 @@
{
"time": 1735911273720,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
@ -59,7 +59,7 @@
{
"time": 1735911274092,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
@ -75,7 +75,7 @@
{
"time": 1735911274467,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,
@ -91,7 +91,7 @@
{
"time": 1735911274947,
"options": {
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
"method": "GET",
"headers": {},
"ignoreResponse": false,

View File

@ -35,4 +35,24 @@ describe("Scrape tests", () => {
"this is fake data coming from the mocking system!",
);
});
describe("Location API", () => {
it.concurrent("works without specifying an explicit location", async () => {
const response = await scrape({
url: "https://iplocation.com",
});
expectScrapeToSucceed(response);
});
it.concurrent("works with country US", async () => {
const response = await scrape({
url: "https://iplocation.com",
location: { country: "US" },
});
expectScrapeToSucceed(response);
expect(response.body.data.markdown).toContain("| Country | United States |");
});
})
});

View File

@ -154,13 +154,13 @@ export const scrapeOptions = z
.string()
.optional()
.refine(
(val) => !val || Object.keys(countries).includes(val.toUpperCase()),
(val) => !val || Object.keys(countries).includes(val.toUpperCase()) || val === "US-generic",
{
message:
"Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
},
)
.transform((val) => (val ? val.toUpperCase() : "US")),
.transform((val) => (val ? val.toUpperCase() : "US-generic")),
languages: z.string().array().optional(),
})
.optional(),
@ -178,7 +178,7 @@ export const scrapeOptions = z
"Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
},
)
.transform((val) => (val ? val.toUpperCase() : "US")),
.transform((val) => (val ? val.toUpperCase() : "US-generic")),
languages: z.string().array().optional(),
})
.optional(),

View File

@ -126,10 +126,14 @@ export async function robustFetch<
const makeRequestTypeId = (
request: (typeof mock)["requests"][number]["options"],
) => {
let out = request.url + ";" + request.method;
let trueUrl = (process.env.FIRE_ENGINE_BETA_URL && request.url.startsWith(process.env.FIRE_ENGINE_BETA_URL))
? request.url.replace(process.env.FIRE_ENGINE_BETA_URL, "<fire-engine>")
: request.url;
let out = trueUrl + ";" + request.method;
if (
process.env.FIRE_ENGINE_BETA_URL &&
url.startsWith(process.env.FIRE_ENGINE_BETA_URL) &&
(trueUrl.startsWith("<fire-engine>")) &&
request.method === "POST"
) {
out += "f-e;" + request.body?.engine + ";" + request.body?.url;

View File

@ -3,7 +3,7 @@ import * as path from "path";
import { logger as _logger } from "../../../lib/logger";
import { Logger } from "winston";
const saveMocksDirPath = path.join(__dirname, "../mocks/").replace("dist/", "");
const loadMocksDirPath = path.join(__dirname, "../../../__tests__/snips/mocks");
const loadMocksDirPath = path.join(__dirname, "../../../__tests__/snips/mocks").replace("dist/", "");
export async function saveMock(options: unknown, result: unknown) {
if (process.env.FIRECRAWL_SAVE_MOCKS !== "true") return;