Nick: fixed map search

This commit is contained in:
Nicolas 2024-08-20 12:17:53 -03:00
parent 27903247b6
commit 55dad82df1
4 changed files with 39 additions and 14 deletions

View File

@ -469,9 +469,7 @@ describe("POST /v1/map", () => {
it.concurrent("should return a successful response with a valid API key", async () => { it.concurrent("should return a successful response with a valid API key", async () => {
const mapRequest = { const mapRequest = {
url: "https://roastmywebsite.ai", url: "https://roastmywebsite.ai"
includeSubdomains: true,
search: "test",
}; };
const response: ScrapeResponseRequestTest = await request(TEST_URL) const response: ScrapeResponseRequestTest = await request(TEST_URL)
@ -491,6 +489,30 @@ describe("POST /v1/map", () => {
expect(links.length).toBeGreaterThan(0); expect(links.length).toBeGreaterThan(0);
}); });
it.concurrent("should return a successful response with a valid API key and search", async () => {
const mapRequest = {
url: "https://usemotion.com",
search: "pricing"
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
.post("/v1/map")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send(mapRequest);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("success", true);
expect(response.body).toHaveProperty("links");
if (!("links" in response.body)) {
throw new Error("Expected response body to have 'links' property");
}
const links = response.body.links as unknown[];
expect(Array.isArray(links)).toBe(true);
expect(links.length).toBeGreaterThan(0);
expect(links[0]).toContain("usemotion.com/pricing");
});
it.concurrent("should return an error for invalid URL", async () => { it.concurrent("should return an error for invalid URL", async () => {
const mapRequest = { const mapRequest = {
url: "invalid-url", url: "invalid-url",

View File

@ -26,11 +26,10 @@ export async function mapController(
const id = uuidv4(); const id = uuidv4();
let links: string[] = [req.body.url]; let links: string[] = [req.body.url];
const crawlerOptions = legacyCrawlerOptions(req.body);
const sc: StoredCrawl = { const sc: StoredCrawl = {
originUrl: req.body.url, originUrl: req.body.url,
crawlerOptions, crawlerOptions: legacyCrawlerOptions(req.body),
pageOptions: {}, pageOptions: {},
team_id: req.auth.team_id, team_id: req.auth.team_id,
createdAt: Date.now(), createdAt: Date.now(),
@ -39,7 +38,7 @@ export async function mapController(
const crawler = crawlToCrawler(id, sc); const crawler = crawlToCrawler(id, sc);
const sitemap = const sitemap =
sc.crawlerOptions.ignoreSitemap || req.body.search req.body.ignoreSitemap
? null ? null
: await crawler.tryGetSitemap(); : await crawler.tryGetSitemap();
@ -58,16 +57,19 @@ export async function mapController(
}); });
if (mapResults.length > 0) { if (mapResults.length > 0) {
mapResults.map((x) => {
if (req.body.search) { if (req.body.search) {
links.unshift(x.url); // Ensure all map results are first, maintaining their order
links = [mapResults[0].url, ...mapResults.slice(1).map(x => x.url), ...links];
} else { } else {
mapResults.map((x) => {
links.push(x.url); links.push(x.url);
}
}); });
} }
}
links = links.map((x) => checkAndUpdateURLForMap(x).url.trim());
links = links.map((x) => checkAndUpdateURLForMap(x).url);
// allows for subdomains to be included // allows for subdomains to be included
links = links.filter((x) => isSameDomain(x, req.body.url)); links = links.filter((x) => isSameDomain(x, req.body.url));

View File

@ -118,6 +118,7 @@ export const mapRequestSchema = crawlerOptions.extend({
origin: z.string().optional().default("api"), origin: z.string().optional().default("api"),
includeSubdomains: z.boolean().default(false), includeSubdomains: z.boolean().default(false),
search: z.string().optional(), search: z.string().optional(),
ignoreSitemap: z.boolean().default(false),
}); });
// export type MapRequest = { // export type MapRequest = {

View File

@ -113,7 +113,7 @@ export const checkAndUpdateURLForMap = (url: string) => {
} }
// remove any query params // remove any query params
url = url.split("?")[0]; url = url.split("?")[0].trim();
return { urlObj: typedUrlObj, url: url }; return { urlObj: typedUrlObj, url: url };
}; };