fix(map): make sitemapOnly simpler

This commit is contained in:
Gergő Móricz 2024-11-15 21:14:32 +01:00
parent a4d3dba865
commit 1b032b05fa

View File

@ -61,6 +61,15 @@ export async function mapController(
sitemap.forEach((x) => {
links.push(x.url);
});
links = links.slice(1)
.map((x) => {
try {
return checkAndUpdateURLForMap(x).url.trim();
} catch (_) {
return null;
}
})
.filter((x) => x !== null) as string[];
// links = links.slice(1, limit); // don't slice, unnecessary
}
} else {
@ -139,8 +148,6 @@ export async function mapController(
}
}
}
// Perform cosine similarity between the search query and the list of links
if (req.body.search) {
const searchQuery = req.body.search.toLowerCase();
@ -168,6 +175,8 @@ export async function mapController(
// remove duplicates that could be due to http/https or www
links = removeDuplicateUrls(links);
links.slice(0, limit);
}
billTeam(req.auth.team_id, req.acuc?.sub_id, 1).catch((error) => {
logger.error(
@ -179,14 +188,12 @@ export async function mapController(
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;
const linksToReturn = links.slice(0, limit);
logJob({
job_id: id,
success: links.length > 0,
message: "Map completed",
num_docs: linksToReturn.length,
docs: linksToReturn,
num_docs: links.length,
docs: links,
time_taken: timeTakenInSeconds,
team_id: req.auth.team_id,
mode: "map",
@ -199,7 +206,7 @@ export async function mapController(
return res.status(200).json({
success: true,
links: linksToReturn,
links: links,
scrape_id: req.body.origin?.includes("website") ? id : undefined,
});
}