mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 18:48:59 +08:00
(fix/search) Search logs fix (#1491)
* Update search.ts * Update search.ts
This commit is contained in:
parent
e10d4c7b0c
commit
e532a96b0c
@ -95,9 +95,10 @@ async function scrapeSearchResult(
|
||||
mode: "single_urls" as Mode,
|
||||
team_id: options.teamId,
|
||||
scrapeOptions: options.scrapeOptions,
|
||||
internalOptions: { teamId: options.teamId },
|
||||
internalOptions: { teamId: options.teamId, useCache: true },
|
||||
origin: options.origin,
|
||||
is_scrape: true,
|
||||
|
||||
},
|
||||
{},
|
||||
jobId,
|
||||
@ -157,6 +158,13 @@ export async function searchController(
|
||||
method: "searchController",
|
||||
});
|
||||
|
||||
let responseData: SearchResponse = {
|
||||
success: true,
|
||||
data: [],
|
||||
};
|
||||
const startTime = new Date().getTime();
|
||||
const costTracking = new CostTracking();
|
||||
|
||||
try {
|
||||
req.body = searchRequestSchema.parse(req.body);
|
||||
|
||||
@ -165,8 +173,6 @@ export async function searchController(
|
||||
origin: req.body.origin,
|
||||
});
|
||||
|
||||
const startTime = new Date().getTime();
|
||||
|
||||
let limit = req.body.limit;
|
||||
|
||||
// Buffer results by 50% to account for filtered URLs
|
||||
@ -196,90 +202,69 @@ export async function searchController(
|
||||
|
||||
if (searchResults.length === 0) {
|
||||
logger.info("No search results found");
|
||||
return res.status(200).json({
|
||||
success: true,
|
||||
data: [],
|
||||
warning: "No search results found",
|
||||
});
|
||||
}
|
||||
|
||||
if (
|
||||
responseData.warning = "No search results found";
|
||||
} else if (
|
||||
!req.body.scrapeOptions.formats ||
|
||||
req.body.scrapeOptions.formats.length === 0
|
||||
) {
|
||||
billTeam(req.auth.team_id, req.acuc?.sub_id, searchResults.length).catch(
|
||||
(error) => {
|
||||
logger.error(
|
||||
`Failed to bill team ${req.auth.team_id} for ${searchResults.length} credits: ${error}`,
|
||||
);
|
||||
},
|
||||
responseData.data = searchResults.map((r) => ({
|
||||
url: r.url,
|
||||
title: r.title,
|
||||
description: r.description,
|
||||
})) as Document[];
|
||||
} else {
|
||||
logger.info("Scraping search results");
|
||||
const scrapePromises = searchResults.map((result) =>
|
||||
scrapeSearchResult(result, {
|
||||
teamId: req.auth.team_id,
|
||||
origin: req.body.origin,
|
||||
timeout: req.body.timeout,
|
||||
scrapeOptions: req.body.scrapeOptions,
|
||||
}, logger, costTracking),
|
||||
);
|
||||
return res.status(200).json({
|
||||
success: true,
|
||||
data: searchResults.map((r) => ({
|
||||
url: r.url,
|
||||
title: r.title,
|
||||
description: r.description,
|
||||
})) as Document[],
|
||||
|
||||
const docs = await Promise.all(scrapePromises);
|
||||
logger.info("Scraping completed", {
|
||||
num_docs: docs.length,
|
||||
});
|
||||
|
||||
const filteredDocs = docs.filter(
|
||||
(doc) =>
|
||||
doc.serpResults || (doc.markdown && doc.markdown.trim().length > 0),
|
||||
);
|
||||
|
||||
logger.info("Filtering completed", {
|
||||
num_docs: filteredDocs.length,
|
||||
});
|
||||
|
||||
if (filteredDocs.length === 0) {
|
||||
responseData.data = docs;
|
||||
responseData.warning = "No content found in search results";
|
||||
} else {
|
||||
responseData.data = filteredDocs;
|
||||
}
|
||||
}
|
||||
|
||||
const costTracking = new CostTracking();
|
||||
|
||||
// Scrape each non-blocked result, handling timeouts individually
|
||||
logger.info("Scraping search results");
|
||||
const scrapePromises = searchResults.map((result) =>
|
||||
scrapeSearchResult(result, {
|
||||
teamId: req.auth.team_id,
|
||||
origin: req.body.origin,
|
||||
timeout: req.body.timeout,
|
||||
scrapeOptions: req.body.scrapeOptions,
|
||||
}, logger, costTracking),
|
||||
);
|
||||
|
||||
const docs = await Promise.all(scrapePromises);
|
||||
logger.info("Scraping completed", {
|
||||
num_docs: docs.length,
|
||||
});
|
||||
|
||||
// Bill for successful scrapes only
|
||||
billTeam(req.auth.team_id, req.acuc?.sub_id, docs.length).catch((error) => {
|
||||
// Bill team once for all successful results
|
||||
billTeam(req.auth.team_id, req.acuc?.sub_id, responseData.data.length).catch((error) => {
|
||||
logger.error(
|
||||
`Failed to bill team ${req.auth.team_id} for ${docs.length} credits: ${error}`,
|
||||
`Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
|
||||
);
|
||||
});
|
||||
|
||||
// Filter out empty content but keep docs with SERP results
|
||||
const filteredDocs = docs.filter(
|
||||
(doc) =>
|
||||
doc.serpResults || (doc.markdown && doc.markdown.trim().length > 0),
|
||||
);
|
||||
|
||||
logger.info("Filtering completed", {
|
||||
num_docs: filteredDocs.length,
|
||||
});
|
||||
|
||||
if (filteredDocs.length === 0) {
|
||||
return res.status(200).json({
|
||||
success: true,
|
||||
data: docs,
|
||||
warning: "No content found in search results",
|
||||
});
|
||||
}
|
||||
|
||||
const endTime = new Date().getTime();
|
||||
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
||||
|
||||
logger.info("Logging job", {
|
||||
num_docs: filteredDocs.length,
|
||||
num_docs: responseData.data.length,
|
||||
time_taken: timeTakenInSeconds,
|
||||
});
|
||||
|
||||
logJob({
|
||||
job_id: jobId,
|
||||
success: true,
|
||||
num_docs: filteredDocs.length,
|
||||
docs: filteredDocs,
|
||||
num_docs: responseData.data.length,
|
||||
docs: responseData.data,
|
||||
time_taken: timeTakenInSeconds,
|
||||
team_id: req.auth.team_id,
|
||||
mode: "search",
|
||||
@ -288,10 +273,8 @@ export async function searchController(
|
||||
cost_tracking: costTracking,
|
||||
});
|
||||
|
||||
return res.status(200).json({
|
||||
success: true,
|
||||
data: filteredDocs,
|
||||
});
|
||||
return res.status(200).json(responseData);
|
||||
|
||||
} catch (error) {
|
||||
if (
|
||||
error instanceof Error &&
|
||||
|
Loading…
x
Reference in New Issue
Block a user