mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-04 11:24:40 +08:00
feat(extract): log failed extracts
This commit is contained in:
parent
aa20246571
commit
d82f44c93e
@ -36,7 +36,7 @@ import { getCachedDocs, saveCachedDocs } from "./helpers/cached-docs";
|
||||
import { normalizeUrl } from "../canonical-url";
|
||||
import { search } from "../../search";
|
||||
import { buildRephraseToSerpPrompt } from "./build-prompts";
|
||||
import fs from "fs/promises";
|
||||
|
||||
interface ExtractServiceOptions {
|
||||
request: ExtractRequest;
|
||||
teamId: string;
|
||||
@ -109,6 +109,8 @@ export async function performExtraction(
|
||||
teamId,
|
||||
});
|
||||
|
||||
try {
|
||||
|
||||
// If no URLs are provided, generate URLs from the prompt
|
||||
if ((!request.urls || request.urls.length === 0) && request.prompt) {
|
||||
logger.debug("Generating URLs from prompt...", {
|
||||
@ -134,6 +136,23 @@ export async function performExtraction(
|
||||
logger.error("No search results found", {
|
||||
query: request.prompt,
|
||||
});
|
||||
logJob({
|
||||
job_id: extractId,
|
||||
success: false,
|
||||
message: "No search results found",
|
||||
num_docs: 1,
|
||||
docs: [],
|
||||
time_taken: (new Date().getTime() - Date.now()) / 1000,
|
||||
team_id: teamId,
|
||||
mode: "extract",
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
num_tokens: 0,
|
||||
tokens_billed: 0,
|
||||
sources,
|
||||
cost_tracking: costTracking,
|
||||
});
|
||||
return {
|
||||
success: false,
|
||||
error: "No search results found",
|
||||
@ -549,8 +568,6 @@ export async function performExtraction(
|
||||
);
|
||||
logger.debug("Successfully transformed results with sourceTracker");
|
||||
} catch (error) {
|
||||
const errorLog = `[${new Date().toISOString()}] Error in sourceTracker.transformResults: ${JSON.stringify(error, null, 2)}\n`;
|
||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
||||
logger.error(`Error in sourceTracker.transformResults:`, { error });
|
||||
throw error;
|
||||
}
|
||||
@ -562,8 +579,6 @@ export async function performExtraction(
|
||||
);
|
||||
logger.debug("Successfully transformed array to object");
|
||||
} catch (error) {
|
||||
const errorLog = `[${new Date().toISOString()}] Error in transformArrayToObject: ${JSON.stringify(error, null, 2)}\n`;
|
||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
||||
logger.error(`Error in transformArrayToObject:`, { error });
|
||||
throw error;
|
||||
}
|
||||
@ -573,8 +588,6 @@ export async function performExtraction(
|
||||
sourceTracker.trackPreDeduplicationSources(multiEntityResult);
|
||||
logger.debug("Successfully tracked pre-deduplication sources");
|
||||
} catch (error) {
|
||||
const errorLog = `[${new Date().toISOString()}] Error in trackPreDeduplicationSources: ${JSON.stringify(error, null, 2)}\n`;
|
||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
||||
logger.error(`Error in trackPreDeduplicationSources:`, { error });
|
||||
throw error;
|
||||
}
|
||||
@ -584,8 +597,6 @@ export async function performExtraction(
|
||||
multiEntityResult = deduplicateObjectsArray(multiEntityResult);
|
||||
logger.debug("Successfully deduplicated objects array");
|
||||
} catch (error) {
|
||||
const errorLog = `[${new Date().toISOString()}] Error in deduplicateObjectsArray: ${JSON.stringify(error, null, 2)}\n`;
|
||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
||||
logger.error(`Error in deduplicateObjectsArray:`, { error });
|
||||
throw error;
|
||||
}
|
||||
@ -594,8 +605,6 @@ export async function performExtraction(
|
||||
multiEntityResult = mergeNullValObjs(multiEntityResult);
|
||||
logger.debug("Successfully merged null value objects");
|
||||
} catch (error) {
|
||||
const errorLog = `[${new Date().toISOString()}] Error in mergeNullValObjs: ${JSON.stringify(error, null, 2)}\n`;
|
||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
||||
logger.error(`Error in mergeNullValObjs:`, { error });
|
||||
throw error;
|
||||
}
|
||||
@ -609,14 +618,10 @@ export async function performExtraction(
|
||||
Object.assign(sources, multiEntitySources);
|
||||
logger.debug("Successfully mapped sources to final items");
|
||||
} catch (error) {
|
||||
const errorLog = `[${new Date().toISOString()}] Error in mapSourcesToFinalItems: ${JSON.stringify(error, null, 2)}\n`;
|
||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
||||
logger.error(`Error in mapSourcesToFinalItems:`, { error });
|
||||
throw error;
|
||||
}
|
||||
} catch (error) {
|
||||
const errorLog = `[${new Date().toISOString()}] Failed to transform array to object\nError: ${JSON.stringify(error, null, 2)}\nStack: ${error.stack}\nMultiEntityResult: ${JSON.stringify(multiEntityResult, null, 2)}\nMultiEntityCompletions: ${JSON.stringify(multiEntityCompletions, null, 2)}\nMultiEntitySchema: ${JSON.stringify(multiEntitySchema, null, 2)}\n\n`;
|
||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
||||
logger.error(`Failed to transform array to object`, {
|
||||
error,
|
||||
errorMessage: error.message,
|
||||
@ -625,6 +630,23 @@ export async function performExtraction(
|
||||
multiEntityCompletions: JSON.stringify(multiEntityCompletions),
|
||||
multiEntitySchema: JSON.stringify(multiEntitySchema)
|
||||
});
|
||||
logJob({
|
||||
job_id: extractId,
|
||||
success: false,
|
||||
message: (error instanceof Error ? error.message : "Failed to transform array to object"),
|
||||
num_docs: 1,
|
||||
docs: [],
|
||||
time_taken: (new Date().getTime() - Date.now()) / 1000,
|
||||
team_id: teamId,
|
||||
mode: "extract",
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
num_tokens: 0,
|
||||
tokens_billed: 0,
|
||||
sources,
|
||||
cost_tracking: costTracking,
|
||||
});
|
||||
return {
|
||||
success: false,
|
||||
error:
|
||||
@ -950,4 +972,24 @@ export async function performExtraction(
|
||||
totalUrlsScraped,
|
||||
sources,
|
||||
};
|
||||
} catch (error) {
|
||||
await logJob({
|
||||
job_id: extractId,
|
||||
success: false,
|
||||
message: (error instanceof Error ? error.message : typeof error === "string" ? error : "An unexpected error occurred"),
|
||||
num_docs: 1,
|
||||
docs: [],
|
||||
time_taken: (new Date().getTime() - Date.now()) / 1000,
|
||||
team_id: teamId,
|
||||
mode: "extract",
|
||||
url: request.urls?.join(", ") || "",
|
||||
scrapeOptions: request,
|
||||
origin: request.origin ?? "api",
|
||||
num_tokens: 0,
|
||||
tokens_billed: 0,
|
||||
sources,
|
||||
cost_tracking: costTracking,
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user