mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 12:39:05 +08:00
fix(scrapeURL/fire-engine): default to separate US-generic proxy list if no location is specified (FIR-728) (#1104)
* feat(location/country): default to us-generic * add tests + fix mock
This commit is contained in:
parent
5c1b67511c
commit
5733b82e9d
@ -2,7 +2,7 @@
|
||||
{
|
||||
"time": 1735911273239,
|
||||
"options": {
|
||||
"url": "http://default-fire-engine-api-service:8080/scrape",
|
||||
"url": "<fire-engine>/scrape",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"url": "http://firecrawl.dev",
|
||||
@ -27,7 +27,7 @@
|
||||
{
|
||||
"time": 1735911273354,
|
||||
"options": {
|
||||
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||
"method": "GET",
|
||||
"headers": {},
|
||||
"ignoreResponse": false,
|
||||
@ -43,7 +43,7 @@
|
||||
{
|
||||
"time": 1735911273720,
|
||||
"options": {
|
||||
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||
"method": "GET",
|
||||
"headers": {},
|
||||
"ignoreResponse": false,
|
||||
@ -59,7 +59,7 @@
|
||||
{
|
||||
"time": 1735911274092,
|
||||
"options": {
|
||||
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||
"method": "GET",
|
||||
"headers": {},
|
||||
"ignoreResponse": false,
|
||||
@ -75,7 +75,7 @@
|
||||
{
|
||||
"time": 1735911274467,
|
||||
"options": {
|
||||
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||
"method": "GET",
|
||||
"headers": {},
|
||||
"ignoreResponse": false,
|
||||
@ -91,7 +91,7 @@
|
||||
{
|
||||
"time": 1735911274947,
|
||||
"options": {
|
||||
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||
"method": "GET",
|
||||
"headers": {},
|
||||
"ignoreResponse": false,
|
||||
|
@ -35,4 +35,24 @@ describe("Scrape tests", () => {
|
||||
"this is fake data coming from the mocking system!",
|
||||
);
|
||||
});
|
||||
|
||||
describe("Location API", () => {
|
||||
it.concurrent("works without specifying an explicit location", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
});
|
||||
|
||||
expectScrapeToSucceed(response);
|
||||
});
|
||||
|
||||
it.concurrent("works with country US", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
location: { country: "US" },
|
||||
});
|
||||
|
||||
expectScrapeToSucceed(response);
|
||||
expect(response.body.data.markdown).toContain("| Country | United States |");
|
||||
});
|
||||
})
|
||||
});
|
||||
|
@ -154,13 +154,13 @@ export const scrapeOptions = z
|
||||
.string()
|
||||
.optional()
|
||||
.refine(
|
||||
(val) => !val || Object.keys(countries).includes(val.toUpperCase()),
|
||||
(val) => !val || Object.keys(countries).includes(val.toUpperCase()) || val === "US-generic",
|
||||
{
|
||||
message:
|
||||
"Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
|
||||
},
|
||||
)
|
||||
.transform((val) => (val ? val.toUpperCase() : "US")),
|
||||
.transform((val) => (val ? val.toUpperCase() : "US-generic")),
|
||||
languages: z.string().array().optional(),
|
||||
})
|
||||
.optional(),
|
||||
@ -178,7 +178,7 @@ export const scrapeOptions = z
|
||||
"Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
|
||||
},
|
||||
)
|
||||
.transform((val) => (val ? val.toUpperCase() : "US")),
|
||||
.transform((val) => (val ? val.toUpperCase() : "US-generic")),
|
||||
languages: z.string().array().optional(),
|
||||
})
|
||||
.optional(),
|
||||
|
@ -126,10 +126,14 @@ export async function robustFetch<
|
||||
const makeRequestTypeId = (
|
||||
request: (typeof mock)["requests"][number]["options"],
|
||||
) => {
|
||||
let out = request.url + ";" + request.method;
|
||||
let trueUrl = (process.env.FIRE_ENGINE_BETA_URL && request.url.startsWith(process.env.FIRE_ENGINE_BETA_URL))
|
||||
? request.url.replace(process.env.FIRE_ENGINE_BETA_URL, "<fire-engine>")
|
||||
: request.url;
|
||||
|
||||
let out = trueUrl + ";" + request.method;
|
||||
if (
|
||||
process.env.FIRE_ENGINE_BETA_URL &&
|
||||
url.startsWith(process.env.FIRE_ENGINE_BETA_URL) &&
|
||||
(trueUrl.startsWith("<fire-engine>")) &&
|
||||
request.method === "POST"
|
||||
) {
|
||||
out += "f-e;" + request.body?.engine + ";" + request.body?.url;
|
||||
|
@ -3,7 +3,7 @@ import * as path from "path";
|
||||
import { logger as _logger } from "../../../lib/logger";
|
||||
import { Logger } from "winston";
|
||||
const saveMocksDirPath = path.join(__dirname, "../mocks/").replace("dist/", "");
|
||||
const loadMocksDirPath = path.join(__dirname, "../../../__tests__/snips/mocks");
|
||||
const loadMocksDirPath = path.join(__dirname, "../../../__tests__/snips/mocks").replace("dist/", "");
|
||||
|
||||
export async function saveMock(options: unknown, result: unknown) {
|
||||
if (process.env.FIRECRAWL_SAVE_MOCKS !== "true") return;
|
||||
|
Loading…
x
Reference in New Issue
Block a user