mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 10:58:58 +08:00
Nick: fixed map search
This commit is contained in:
parent
27903247b6
commit
55dad82df1
@ -469,9 +469,7 @@ describe("POST /v1/map", () => {
|
|||||||
|
|
||||||
it.concurrent("should return a successful response with a valid API key", async () => {
|
it.concurrent("should return a successful response with a valid API key", async () => {
|
||||||
const mapRequest = {
|
const mapRequest = {
|
||||||
url: "https://roastmywebsite.ai",
|
url: "https://roastmywebsite.ai"
|
||||||
includeSubdomains: true,
|
|
||||||
search: "test",
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const response: ScrapeResponseRequestTest = await request(TEST_URL)
|
const response: ScrapeResponseRequestTest = await request(TEST_URL)
|
||||||
@ -491,6 +489,30 @@ describe("POST /v1/map", () => {
|
|||||||
expect(links.length).toBeGreaterThan(0);
|
expect(links.length).toBeGreaterThan(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it.concurrent("should return a successful response with a valid API key and search", async () => {
|
||||||
|
const mapRequest = {
|
||||||
|
url: "https://usemotion.com",
|
||||||
|
search: "pricing"
|
||||||
|
};
|
||||||
|
|
||||||
|
const response: ScrapeResponseRequestTest = await request(TEST_URL)
|
||||||
|
.post("/v1/map")
|
||||||
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
.set("Content-Type", "application/json")
|
||||||
|
.send(mapRequest);
|
||||||
|
|
||||||
|
expect(response.statusCode).toBe(200);
|
||||||
|
expect(response.body).toHaveProperty("success", true);
|
||||||
|
expect(response.body).toHaveProperty("links");
|
||||||
|
if (!("links" in response.body)) {
|
||||||
|
throw new Error("Expected response body to have 'links' property");
|
||||||
|
}
|
||||||
|
const links = response.body.links as unknown[];
|
||||||
|
expect(Array.isArray(links)).toBe(true);
|
||||||
|
expect(links.length).toBeGreaterThan(0);
|
||||||
|
expect(links[0]).toContain("usemotion.com/pricing");
|
||||||
|
});
|
||||||
|
|
||||||
it.concurrent("should return an error for invalid URL", async () => {
|
it.concurrent("should return an error for invalid URL", async () => {
|
||||||
const mapRequest = {
|
const mapRequest = {
|
||||||
url: "invalid-url",
|
url: "invalid-url",
|
||||||
|
@ -26,11 +26,10 @@ export async function mapController(
|
|||||||
const id = uuidv4();
|
const id = uuidv4();
|
||||||
let links: string[] = [req.body.url];
|
let links: string[] = [req.body.url];
|
||||||
|
|
||||||
const crawlerOptions = legacyCrawlerOptions(req.body);
|
|
||||||
|
|
||||||
const sc: StoredCrawl = {
|
const sc: StoredCrawl = {
|
||||||
originUrl: req.body.url,
|
originUrl: req.body.url,
|
||||||
crawlerOptions,
|
crawlerOptions: legacyCrawlerOptions(req.body),
|
||||||
pageOptions: {},
|
pageOptions: {},
|
||||||
team_id: req.auth.team_id,
|
team_id: req.auth.team_id,
|
||||||
createdAt: Date.now(),
|
createdAt: Date.now(),
|
||||||
@ -39,7 +38,7 @@ export async function mapController(
|
|||||||
const crawler = crawlToCrawler(id, sc);
|
const crawler = crawlToCrawler(id, sc);
|
||||||
|
|
||||||
const sitemap =
|
const sitemap =
|
||||||
sc.crawlerOptions.ignoreSitemap || req.body.search
|
req.body.ignoreSitemap
|
||||||
? null
|
? null
|
||||||
: await crawler.tryGetSitemap();
|
: await crawler.tryGetSitemap();
|
||||||
|
|
||||||
@ -58,16 +57,19 @@ export async function mapController(
|
|||||||
});
|
});
|
||||||
|
|
||||||
if (mapResults.length > 0) {
|
if (mapResults.length > 0) {
|
||||||
mapResults.map((x) => {
|
|
||||||
if (req.body.search) {
|
if (req.body.search) {
|
||||||
links.unshift(x.url);
|
// Ensure all map results are first, maintaining their order
|
||||||
|
links = [mapResults[0].url, ...mapResults.slice(1).map(x => x.url), ...links];
|
||||||
} else {
|
} else {
|
||||||
|
mapResults.map((x) => {
|
||||||
links.push(x.url);
|
links.push(x.url);
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
links = links.map((x) => checkAndUpdateURLForMap(x).url.trim());
|
||||||
|
|
||||||
|
|
||||||
links = links.map((x) => checkAndUpdateURLForMap(x).url);
|
|
||||||
|
|
||||||
// allows for subdomains to be included
|
// allows for subdomains to be included
|
||||||
links = links.filter((x) => isSameDomain(x, req.body.url));
|
links = links.filter((x) => isSameDomain(x, req.body.url));
|
||||||
|
@ -118,6 +118,7 @@ export const mapRequestSchema = crawlerOptions.extend({
|
|||||||
origin: z.string().optional().default("api"),
|
origin: z.string().optional().default("api"),
|
||||||
includeSubdomains: z.boolean().default(false),
|
includeSubdomains: z.boolean().default(false),
|
||||||
search: z.string().optional(),
|
search: z.string().optional(),
|
||||||
|
ignoreSitemap: z.boolean().default(false),
|
||||||
});
|
});
|
||||||
|
|
||||||
// export type MapRequest = {
|
// export type MapRequest = {
|
||||||
|
@ -113,7 +113,7 @@ export const checkAndUpdateURLForMap = (url: string) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// remove any query params
|
// remove any query params
|
||||||
url = url.split("?")[0];
|
url = url.split("?")[0].trim();
|
||||||
|
|
||||||
return { urlObj: typedUrlObj, url: url };
|
return { urlObj: typedUrlObj, url: url };
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user