mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 00:48:58 +08:00
(fix/search) Search logs fix (#1491)
* Update search.ts * Update search.ts
This commit is contained in:
parent
e10d4c7b0c
commit
e532a96b0c
@ -95,9 +95,10 @@ async function scrapeSearchResult(
|
|||||||
mode: "single_urls" as Mode,
|
mode: "single_urls" as Mode,
|
||||||
team_id: options.teamId,
|
team_id: options.teamId,
|
||||||
scrapeOptions: options.scrapeOptions,
|
scrapeOptions: options.scrapeOptions,
|
||||||
internalOptions: { teamId: options.teamId },
|
internalOptions: { teamId: options.teamId, useCache: true },
|
||||||
origin: options.origin,
|
origin: options.origin,
|
||||||
is_scrape: true,
|
is_scrape: true,
|
||||||
|
|
||||||
},
|
},
|
||||||
{},
|
{},
|
||||||
jobId,
|
jobId,
|
||||||
@ -157,6 +158,13 @@ export async function searchController(
|
|||||||
method: "searchController",
|
method: "searchController",
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let responseData: SearchResponse = {
|
||||||
|
success: true,
|
||||||
|
data: [],
|
||||||
|
};
|
||||||
|
const startTime = new Date().getTime();
|
||||||
|
const costTracking = new CostTracking();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
req.body = searchRequestSchema.parse(req.body);
|
req.body = searchRequestSchema.parse(req.body);
|
||||||
|
|
||||||
@ -165,8 +173,6 @@ export async function searchController(
|
|||||||
origin: req.body.origin,
|
origin: req.body.origin,
|
||||||
});
|
});
|
||||||
|
|
||||||
const startTime = new Date().getTime();
|
|
||||||
|
|
||||||
let limit = req.body.limit;
|
let limit = req.body.limit;
|
||||||
|
|
||||||
// Buffer results by 50% to account for filtered URLs
|
// Buffer results by 50% to account for filtered URLs
|
||||||
@ -196,90 +202,69 @@ export async function searchController(
|
|||||||
|
|
||||||
if (searchResults.length === 0) {
|
if (searchResults.length === 0) {
|
||||||
logger.info("No search results found");
|
logger.info("No search results found");
|
||||||
return res.status(200).json({
|
responseData.warning = "No search results found";
|
||||||
success: true,
|
} else if (
|
||||||
data: [],
|
|
||||||
warning: "No search results found",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (
|
|
||||||
!req.body.scrapeOptions.formats ||
|
!req.body.scrapeOptions.formats ||
|
||||||
req.body.scrapeOptions.formats.length === 0
|
req.body.scrapeOptions.formats.length === 0
|
||||||
) {
|
) {
|
||||||
billTeam(req.auth.team_id, req.acuc?.sub_id, searchResults.length).catch(
|
responseData.data = searchResults.map((r) => ({
|
||||||
(error) => {
|
url: r.url,
|
||||||
logger.error(
|
title: r.title,
|
||||||
`Failed to bill team ${req.auth.team_id} for ${searchResults.length} credits: ${error}`,
|
description: r.description,
|
||||||
);
|
})) as Document[];
|
||||||
},
|
} else {
|
||||||
|
logger.info("Scraping search results");
|
||||||
|
const scrapePromises = searchResults.map((result) =>
|
||||||
|
scrapeSearchResult(result, {
|
||||||
|
teamId: req.auth.team_id,
|
||||||
|
origin: req.body.origin,
|
||||||
|
timeout: req.body.timeout,
|
||||||
|
scrapeOptions: req.body.scrapeOptions,
|
||||||
|
}, logger, costTracking),
|
||||||
);
|
);
|
||||||
return res.status(200).json({
|
|
||||||
success: true,
|
const docs = await Promise.all(scrapePromises);
|
||||||
data: searchResults.map((r) => ({
|
logger.info("Scraping completed", {
|
||||||
url: r.url,
|
num_docs: docs.length,
|
||||||
title: r.title,
|
|
||||||
description: r.description,
|
|
||||||
})) as Document[],
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const filteredDocs = docs.filter(
|
||||||
|
(doc) =>
|
||||||
|
doc.serpResults || (doc.markdown && doc.markdown.trim().length > 0),
|
||||||
|
);
|
||||||
|
|
||||||
|
logger.info("Filtering completed", {
|
||||||
|
num_docs: filteredDocs.length,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (filteredDocs.length === 0) {
|
||||||
|
responseData.data = docs;
|
||||||
|
responseData.warning = "No content found in search results";
|
||||||
|
} else {
|
||||||
|
responseData.data = filteredDocs;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const costTracking = new CostTracking();
|
// Bill team once for all successful results
|
||||||
|
billTeam(req.auth.team_id, req.acuc?.sub_id, responseData.data.length).catch((error) => {
|
||||||
// Scrape each non-blocked result, handling timeouts individually
|
|
||||||
logger.info("Scraping search results");
|
|
||||||
const scrapePromises = searchResults.map((result) =>
|
|
||||||
scrapeSearchResult(result, {
|
|
||||||
teamId: req.auth.team_id,
|
|
||||||
origin: req.body.origin,
|
|
||||||
timeout: req.body.timeout,
|
|
||||||
scrapeOptions: req.body.scrapeOptions,
|
|
||||||
}, logger, costTracking),
|
|
||||||
);
|
|
||||||
|
|
||||||
const docs = await Promise.all(scrapePromises);
|
|
||||||
logger.info("Scraping completed", {
|
|
||||||
num_docs: docs.length,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Bill for successful scrapes only
|
|
||||||
billTeam(req.auth.team_id, req.acuc?.sub_id, docs.length).catch((error) => {
|
|
||||||
logger.error(
|
logger.error(
|
||||||
`Failed to bill team ${req.auth.team_id} for ${docs.length} credits: ${error}`,
|
`Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Filter out empty content but keep docs with SERP results
|
|
||||||
const filteredDocs = docs.filter(
|
|
||||||
(doc) =>
|
|
||||||
doc.serpResults || (doc.markdown && doc.markdown.trim().length > 0),
|
|
||||||
);
|
|
||||||
|
|
||||||
logger.info("Filtering completed", {
|
|
||||||
num_docs: filteredDocs.length,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (filteredDocs.length === 0) {
|
|
||||||
return res.status(200).json({
|
|
||||||
success: true,
|
|
||||||
data: docs,
|
|
||||||
warning: "No content found in search results",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const endTime = new Date().getTime();
|
const endTime = new Date().getTime();
|
||||||
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
||||||
|
|
||||||
logger.info("Logging job", {
|
logger.info("Logging job", {
|
||||||
num_docs: filteredDocs.length,
|
num_docs: responseData.data.length,
|
||||||
time_taken: timeTakenInSeconds,
|
time_taken: timeTakenInSeconds,
|
||||||
});
|
});
|
||||||
|
|
||||||
logJob({
|
logJob({
|
||||||
job_id: jobId,
|
job_id: jobId,
|
||||||
success: true,
|
success: true,
|
||||||
num_docs: filteredDocs.length,
|
num_docs: responseData.data.length,
|
||||||
docs: filteredDocs,
|
docs: responseData.data,
|
||||||
time_taken: timeTakenInSeconds,
|
time_taken: timeTakenInSeconds,
|
||||||
team_id: req.auth.team_id,
|
team_id: req.auth.team_id,
|
||||||
mode: "search",
|
mode: "search",
|
||||||
@ -288,10 +273,8 @@ export async function searchController(
|
|||||||
cost_tracking: costTracking,
|
cost_tracking: costTracking,
|
||||||
});
|
});
|
||||||
|
|
||||||
return res.status(200).json({
|
return res.status(200).json(responseData);
|
||||||
success: true,
|
|
||||||
data: filteredDocs,
|
|
||||||
});
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (
|
if (
|
||||||
error instanceof Error &&
|
error instanceof Error &&
|
||||||
|
Loading…
x
Reference in New Issue
Block a user