Nick: fixed map search

This commit is contained in:
Nicolas 2024-08-20 12:17:53 -03:00
parent 27903247b6
commit 55dad82df1
4 changed files with 39 additions and 14 deletions

View File

@ -469,9 +469,7 @@ describe("POST /v1/map", () => {
it.concurrent("should return a successful response with a valid API key", async () => {
const mapRequest = {
url: "https://roastmywebsite.ai",
includeSubdomains: true,
search: "test",
url: "https://roastmywebsite.ai"
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@ -491,6 +489,30 @@ describe("POST /v1/map", () => {
expect(links.length).toBeGreaterThan(0);
});
it.concurrent("should return a successful response with a valid API key and search", async () => {
const mapRequest = {
url: "https://usemotion.com",
search: "pricing"
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
.post("/v1/map")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send(mapRequest);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("success", true);
expect(response.body).toHaveProperty("links");
if (!("links" in response.body)) {
throw new Error("Expected response body to have 'links' property");
}
const links = response.body.links as unknown[];
expect(Array.isArray(links)).toBe(true);
expect(links.length).toBeGreaterThan(0);
expect(links[0]).toContain("usemotion.com/pricing");
});
it.concurrent("should return an error for invalid URL", async () => {
const mapRequest = {
url: "invalid-url",

View File

@ -26,11 +26,10 @@ export async function mapController(
const id = uuidv4();
let links: string[] = [req.body.url];
const crawlerOptions = legacyCrawlerOptions(req.body);
const sc: StoredCrawl = {
originUrl: req.body.url,
crawlerOptions,
crawlerOptions: legacyCrawlerOptions(req.body),
pageOptions: {},
team_id: req.auth.team_id,
createdAt: Date.now(),
@ -39,7 +38,7 @@ export async function mapController(
const crawler = crawlToCrawler(id, sc);
const sitemap =
sc.crawlerOptions.ignoreSitemap || req.body.search
req.body.ignoreSitemap
? null
: await crawler.tryGetSitemap();
@ -58,16 +57,19 @@ export async function mapController(
});
if (mapResults.length > 0) {
mapResults.map((x) => {
if (req.body.search) {
links.unshift(x.url);
} else {
if (req.body.search) {
// Ensure all map results are first, maintaining their order
links = [mapResults[0].url, ...mapResults.slice(1).map(x => x.url), ...links];
} else {
mapResults.map((x) => {
links.push(x.url);
}
});
});
}
}
links = links.map((x) => checkAndUpdateURLForMap(x).url);
links = links.map((x) => checkAndUpdateURLForMap(x).url.trim());
// allows for subdomains to be included
links = links.filter((x) => isSameDomain(x, req.body.url));

View File

@ -118,6 +118,7 @@ export const mapRequestSchema = crawlerOptions.extend({
origin: z.string().optional().default("api"),
includeSubdomains: z.boolean().default(false),
search: z.string().optional(),
ignoreSitemap: z.boolean().default(false),
});
// export type MapRequest = {

View File

@ -113,7 +113,7 @@ export const checkAndUpdateURLForMap = (url: string) => {
}
// remove any query params
url = url.split("?")[0];
url = url.split("?")[0].trim();
return { urlObj: typedUrlObj, url: url };
};