mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 02:59:03 +08:00
fix(scrapeURL/fire-engine): default to separate US-generic proxy list if no location is specified (FIR-728) (#1104)
* feat(location/country): default to us-generic * add tests + fix mock
This commit is contained in:
parent
5c1b67511c
commit
5733b82e9d
@ -2,7 +2,7 @@
|
|||||||
{
|
{
|
||||||
"time": 1735911273239,
|
"time": 1735911273239,
|
||||||
"options": {
|
"options": {
|
||||||
"url": "http://default-fire-engine-api-service:8080/scrape",
|
"url": "<fire-engine>/scrape",
|
||||||
"method": "POST",
|
"method": "POST",
|
||||||
"body": {
|
"body": {
|
||||||
"url": "http://firecrawl.dev",
|
"url": "http://firecrawl.dev",
|
||||||
@ -27,7 +27,7 @@
|
|||||||
{
|
{
|
||||||
"time": 1735911273354,
|
"time": 1735911273354,
|
||||||
"options": {
|
"options": {
|
||||||
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"headers": {},
|
"headers": {},
|
||||||
"ignoreResponse": false,
|
"ignoreResponse": false,
|
||||||
@ -43,7 +43,7 @@
|
|||||||
{
|
{
|
||||||
"time": 1735911273720,
|
"time": 1735911273720,
|
||||||
"options": {
|
"options": {
|
||||||
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"headers": {},
|
"headers": {},
|
||||||
"ignoreResponse": false,
|
"ignoreResponse": false,
|
||||||
@ -59,7 +59,7 @@
|
|||||||
{
|
{
|
||||||
"time": 1735911274092,
|
"time": 1735911274092,
|
||||||
"options": {
|
"options": {
|
||||||
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"headers": {},
|
"headers": {},
|
||||||
"ignoreResponse": false,
|
"ignoreResponse": false,
|
||||||
@ -75,7 +75,7 @@
|
|||||||
{
|
{
|
||||||
"time": 1735911274467,
|
"time": 1735911274467,
|
||||||
"options": {
|
"options": {
|
||||||
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"headers": {},
|
"headers": {},
|
||||||
"ignoreResponse": false,
|
"ignoreResponse": false,
|
||||||
@ -91,7 +91,7 @@
|
|||||||
{
|
{
|
||||||
"time": 1735911274947,
|
"time": 1735911274947,
|
||||||
"options": {
|
"options": {
|
||||||
"url": "http://default-fire-engine-api-service:8080/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
"url": "<fire-engine>/scrape/ede37286-90db-4f60-8efb-76217dfcfa35!chrome-cdp",
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"headers": {},
|
"headers": {},
|
||||||
"ignoreResponse": false,
|
"ignoreResponse": false,
|
||||||
|
@ -35,4 +35,24 @@ describe("Scrape tests", () => {
|
|||||||
"this is fake data coming from the mocking system!",
|
"this is fake data coming from the mocking system!",
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("Location API", () => {
|
||||||
|
it.concurrent("works without specifying an explicit location", async () => {
|
||||||
|
const response = await scrape({
|
||||||
|
url: "https://iplocation.com",
|
||||||
|
});
|
||||||
|
|
||||||
|
expectScrapeToSucceed(response);
|
||||||
|
});
|
||||||
|
|
||||||
|
it.concurrent("works with country US", async () => {
|
||||||
|
const response = await scrape({
|
||||||
|
url: "https://iplocation.com",
|
||||||
|
location: { country: "US" },
|
||||||
|
});
|
||||||
|
|
||||||
|
expectScrapeToSucceed(response);
|
||||||
|
expect(response.body.data.markdown).toContain("| Country | United States |");
|
||||||
|
});
|
||||||
|
})
|
||||||
});
|
});
|
||||||
|
@ -154,13 +154,13 @@ export const scrapeOptions = z
|
|||||||
.string()
|
.string()
|
||||||
.optional()
|
.optional()
|
||||||
.refine(
|
.refine(
|
||||||
(val) => !val || Object.keys(countries).includes(val.toUpperCase()),
|
(val) => !val || Object.keys(countries).includes(val.toUpperCase()) || val === "US-generic",
|
||||||
{
|
{
|
||||||
message:
|
message:
|
||||||
"Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
|
"Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.transform((val) => (val ? val.toUpperCase() : "US")),
|
.transform((val) => (val ? val.toUpperCase() : "US-generic")),
|
||||||
languages: z.string().array().optional(),
|
languages: z.string().array().optional(),
|
||||||
})
|
})
|
||||||
.optional(),
|
.optional(),
|
||||||
@ -178,7 +178,7 @@ export const scrapeOptions = z
|
|||||||
"Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
|
"Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.transform((val) => (val ? val.toUpperCase() : "US")),
|
.transform((val) => (val ? val.toUpperCase() : "US-generic")),
|
||||||
languages: z.string().array().optional(),
|
languages: z.string().array().optional(),
|
||||||
})
|
})
|
||||||
.optional(),
|
.optional(),
|
||||||
|
@ -126,10 +126,14 @@ export async function robustFetch<
|
|||||||
const makeRequestTypeId = (
|
const makeRequestTypeId = (
|
||||||
request: (typeof mock)["requests"][number]["options"],
|
request: (typeof mock)["requests"][number]["options"],
|
||||||
) => {
|
) => {
|
||||||
let out = request.url + ";" + request.method;
|
let trueUrl = (process.env.FIRE_ENGINE_BETA_URL && request.url.startsWith(process.env.FIRE_ENGINE_BETA_URL))
|
||||||
|
? request.url.replace(process.env.FIRE_ENGINE_BETA_URL, "<fire-engine>")
|
||||||
|
: request.url;
|
||||||
|
|
||||||
|
let out = trueUrl + ";" + request.method;
|
||||||
if (
|
if (
|
||||||
process.env.FIRE_ENGINE_BETA_URL &&
|
process.env.FIRE_ENGINE_BETA_URL &&
|
||||||
url.startsWith(process.env.FIRE_ENGINE_BETA_URL) &&
|
(trueUrl.startsWith("<fire-engine>")) &&
|
||||||
request.method === "POST"
|
request.method === "POST"
|
||||||
) {
|
) {
|
||||||
out += "f-e;" + request.body?.engine + ";" + request.body?.url;
|
out += "f-e;" + request.body?.engine + ";" + request.body?.url;
|
||||||
|
@ -3,7 +3,7 @@ import * as path from "path";
|
|||||||
import { logger as _logger } from "../../../lib/logger";
|
import { logger as _logger } from "../../../lib/logger";
|
||||||
import { Logger } from "winston";
|
import { Logger } from "winston";
|
||||||
const saveMocksDirPath = path.join(__dirname, "../mocks/").replace("dist/", "");
|
const saveMocksDirPath = path.join(__dirname, "../mocks/").replace("dist/", "");
|
||||||
const loadMocksDirPath = path.join(__dirname, "../../../__tests__/snips/mocks");
|
const loadMocksDirPath = path.join(__dirname, "../../../__tests__/snips/mocks").replace("dist/", "");
|
||||||
|
|
||||||
export async function saveMock(options: unknown, result: unknown) {
|
export async function saveMock(options: unknown, result: unknown) {
|
||||||
if (process.env.FIRECRAWL_SAVE_MOCKS !== "true") return;
|
if (process.env.FIRECRAWL_SAVE_MOCKS !== "true") return;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user