mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 02:28:59 +08:00
Nick: fixed map search
This commit is contained in:
parent
27903247b6
commit
55dad82df1
@ -469,9 +469,7 @@ describe("POST /v1/map", () => {
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key", async () => {
|
||||
const mapRequest = {
|
||||
url: "https://roastmywebsite.ai",
|
||||
includeSubdomains: true,
|
||||
search: "test",
|
||||
url: "https://roastmywebsite.ai"
|
||||
};
|
||||
|
||||
const response: ScrapeResponseRequestTest = await request(TEST_URL)
|
||||
@ -491,6 +489,30 @@ describe("POST /v1/map", () => {
|
||||
expect(links.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it.concurrent("should return a successful response with a valid API key and search", async () => {
|
||||
const mapRequest = {
|
||||
url: "https://usemotion.com",
|
||||
search: "pricing"
|
||||
};
|
||||
|
||||
const response: ScrapeResponseRequestTest = await request(TEST_URL)
|
||||
.post("/v1/map")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send(mapRequest);
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("success", true);
|
||||
expect(response.body).toHaveProperty("links");
|
||||
if (!("links" in response.body)) {
|
||||
throw new Error("Expected response body to have 'links' property");
|
||||
}
|
||||
const links = response.body.links as unknown[];
|
||||
expect(Array.isArray(links)).toBe(true);
|
||||
expect(links.length).toBeGreaterThan(0);
|
||||
expect(links[0]).toContain("usemotion.com/pricing");
|
||||
});
|
||||
|
||||
it.concurrent("should return an error for invalid URL", async () => {
|
||||
const mapRequest = {
|
||||
url: "invalid-url",
|
||||
|
@ -26,11 +26,10 @@ export async function mapController(
|
||||
const id = uuidv4();
|
||||
let links: string[] = [req.body.url];
|
||||
|
||||
const crawlerOptions = legacyCrawlerOptions(req.body);
|
||||
|
||||
const sc: StoredCrawl = {
|
||||
originUrl: req.body.url,
|
||||
crawlerOptions,
|
||||
crawlerOptions: legacyCrawlerOptions(req.body),
|
||||
pageOptions: {},
|
||||
team_id: req.auth.team_id,
|
||||
createdAt: Date.now(),
|
||||
@ -39,7 +38,7 @@ export async function mapController(
|
||||
const crawler = crawlToCrawler(id, sc);
|
||||
|
||||
const sitemap =
|
||||
sc.crawlerOptions.ignoreSitemap || req.body.search
|
||||
req.body.ignoreSitemap
|
||||
? null
|
||||
: await crawler.tryGetSitemap();
|
||||
|
||||
@ -58,16 +57,19 @@ export async function mapController(
|
||||
});
|
||||
|
||||
if (mapResults.length > 0) {
|
||||
mapResults.map((x) => {
|
||||
if (req.body.search) {
|
||||
links.unshift(x.url);
|
||||
} else {
|
||||
if (req.body.search) {
|
||||
// Ensure all map results are first, maintaining their order
|
||||
links = [mapResults[0].url, ...mapResults.slice(1).map(x => x.url), ...links];
|
||||
} else {
|
||||
mapResults.map((x) => {
|
||||
links.push(x.url);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
links = links.map((x) => checkAndUpdateURLForMap(x).url);
|
||||
links = links.map((x) => checkAndUpdateURLForMap(x).url.trim());
|
||||
|
||||
|
||||
|
||||
// allows for subdomains to be included
|
||||
links = links.filter((x) => isSameDomain(x, req.body.url));
|
||||
|
@ -118,6 +118,7 @@ export const mapRequestSchema = crawlerOptions.extend({
|
||||
origin: z.string().optional().default("api"),
|
||||
includeSubdomains: z.boolean().default(false),
|
||||
search: z.string().optional(),
|
||||
ignoreSitemap: z.boolean().default(false),
|
||||
});
|
||||
|
||||
// export type MapRequest = {
|
||||
|
@ -113,7 +113,7 @@ export const checkAndUpdateURLForMap = (url: string) => {
|
||||
}
|
||||
|
||||
// remove any query params
|
||||
url = url.split("?")[0];
|
||||
url = url.split("?")[0].trim();
|
||||
|
||||
return { urlObj: typedUrlObj, url: url };
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user