(fix/search) Search logs fix (#1491)

* Update search.ts

* Update search.ts
This commit is contained in:
Nicolas 2025-04-22 17:12:10 -04:00 committed by GitHub
parent e10d4c7b0c
commit e532a96b0c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -95,9 +95,10 @@ async function scrapeSearchResult(
mode: "single_urls" as Mode, mode: "single_urls" as Mode,
team_id: options.teamId, team_id: options.teamId,
scrapeOptions: options.scrapeOptions, scrapeOptions: options.scrapeOptions,
internalOptions: { teamId: options.teamId }, internalOptions: { teamId: options.teamId, useCache: true },
origin: options.origin, origin: options.origin,
is_scrape: true, is_scrape: true,
}, },
{}, {},
jobId, jobId,
@ -157,6 +158,13 @@ export async function searchController(
method: "searchController", method: "searchController",
}); });
let responseData: SearchResponse = {
success: true,
data: [],
};
const startTime = new Date().getTime();
const costTracking = new CostTracking();
try { try {
req.body = searchRequestSchema.parse(req.body); req.body = searchRequestSchema.parse(req.body);
@ -165,8 +173,6 @@ export async function searchController(
origin: req.body.origin, origin: req.body.origin,
}); });
const startTime = new Date().getTime();
let limit = req.body.limit; let limit = req.body.limit;
// Buffer results by 50% to account for filtered URLs // Buffer results by 50% to account for filtered URLs
@ -196,90 +202,69 @@ export async function searchController(
if (searchResults.length === 0) { if (searchResults.length === 0) {
logger.info("No search results found"); logger.info("No search results found");
return res.status(200).json({ responseData.warning = "No search results found";
success: true, } else if (
data: [],
warning: "No search results found",
});
}
if (
!req.body.scrapeOptions.formats || !req.body.scrapeOptions.formats ||
req.body.scrapeOptions.formats.length === 0 req.body.scrapeOptions.formats.length === 0
) { ) {
billTeam(req.auth.team_id, req.acuc?.sub_id, searchResults.length).catch( responseData.data = searchResults.map((r) => ({
(error) => { url: r.url,
logger.error( title: r.title,
`Failed to bill team ${req.auth.team_id} for ${searchResults.length} credits: ${error}`, description: r.description,
); })) as Document[];
}, } else {
logger.info("Scraping search results");
const scrapePromises = searchResults.map((result) =>
scrapeSearchResult(result, {
teamId: req.auth.team_id,
origin: req.body.origin,
timeout: req.body.timeout,
scrapeOptions: req.body.scrapeOptions,
}, logger, costTracking),
); );
return res.status(200).json({
success: true, const docs = await Promise.all(scrapePromises);
data: searchResults.map((r) => ({ logger.info("Scraping completed", {
url: r.url, num_docs: docs.length,
title: r.title,
description: r.description,
})) as Document[],
}); });
const filteredDocs = docs.filter(
(doc) =>
doc.serpResults || (doc.markdown && doc.markdown.trim().length > 0),
);
logger.info("Filtering completed", {
num_docs: filteredDocs.length,
});
if (filteredDocs.length === 0) {
responseData.data = docs;
responseData.warning = "No content found in search results";
} else {
responseData.data = filteredDocs;
}
} }
const costTracking = new CostTracking(); // Bill team once for all successful results
billTeam(req.auth.team_id, req.acuc?.sub_id, responseData.data.length).catch((error) => {
// Scrape each non-blocked result, handling timeouts individually
logger.info("Scraping search results");
const scrapePromises = searchResults.map((result) =>
scrapeSearchResult(result, {
teamId: req.auth.team_id,
origin: req.body.origin,
timeout: req.body.timeout,
scrapeOptions: req.body.scrapeOptions,
}, logger, costTracking),
);
const docs = await Promise.all(scrapePromises);
logger.info("Scraping completed", {
num_docs: docs.length,
});
// Bill for successful scrapes only
billTeam(req.auth.team_id, req.acuc?.sub_id, docs.length).catch((error) => {
logger.error( logger.error(
`Failed to bill team ${req.auth.team_id} for ${docs.length} credits: ${error}`, `Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
); );
}); });
// Filter out empty content but keep docs with SERP results
const filteredDocs = docs.filter(
(doc) =>
doc.serpResults || (doc.markdown && doc.markdown.trim().length > 0),
);
logger.info("Filtering completed", {
num_docs: filteredDocs.length,
});
if (filteredDocs.length === 0) {
return res.status(200).json({
success: true,
data: docs,
warning: "No content found in search results",
});
}
const endTime = new Date().getTime(); const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000; const timeTakenInSeconds = (endTime - startTime) / 1000;
logger.info("Logging job", { logger.info("Logging job", {
num_docs: filteredDocs.length, num_docs: responseData.data.length,
time_taken: timeTakenInSeconds, time_taken: timeTakenInSeconds,
}); });
logJob({ logJob({
job_id: jobId, job_id: jobId,
success: true, success: true,
num_docs: filteredDocs.length, num_docs: responseData.data.length,
docs: filteredDocs, docs: responseData.data,
time_taken: timeTakenInSeconds, time_taken: timeTakenInSeconds,
team_id: req.auth.team_id, team_id: req.auth.team_id,
mode: "search", mode: "search",
@ -288,10 +273,8 @@ export async function searchController(
cost_tracking: costTracking, cost_tracking: costTracking,
}); });
return res.status(200).json({ return res.status(200).json(responseData);
success: true,
data: filteredDocs,
});
} catch (error) { } catch (error) {
if ( if (
error instanceof Error && error instanceof Error &&