feat(extract): log failed extracts

This commit is contained in:
Gergő Móricz 2025-04-16 03:13:02 -07:00
parent aa20246571
commit d82f44c93e

View File

@ -36,7 +36,7 @@ import { getCachedDocs, saveCachedDocs } from "./helpers/cached-docs";
import { normalizeUrl } from "../canonical-url";
import { search } from "../../search";
import { buildRephraseToSerpPrompt } from "./build-prompts";
import fs from "fs/promises";
interface ExtractServiceOptions {
request: ExtractRequest;
teamId: string;
@ -109,6 +109,8 @@ export async function performExtraction(
teamId,
});
try {
// If no URLs are provided, generate URLs from the prompt
if ((!request.urls || request.urls.length === 0) && request.prompt) {
logger.debug("Generating URLs from prompt...", {
@ -134,6 +136,23 @@ export async function performExtraction(
logger.error("No search results found", {
query: request.prompt,
});
logJob({
job_id: extractId,
success: false,
message: "No search results found",
num_docs: 1,
docs: [],
time_taken: (new Date().getTime() - Date.now()) / 1000,
team_id: teamId,
mode: "extract",
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
num_tokens: 0,
tokens_billed: 0,
sources,
cost_tracking: costTracking,
});
return {
success: false,
error: "No search results found",
@ -549,8 +568,6 @@ export async function performExtraction(
);
logger.debug("Successfully transformed results with sourceTracker");
} catch (error) {
const errorLog = `[${new Date().toISOString()}] Error in sourceTracker.transformResults: ${JSON.stringify(error, null, 2)}\n`;
await fs.appendFile('logs/extraction-errors.log', errorLog);
logger.error(`Error in sourceTracker.transformResults:`, { error });
throw error;
}
@ -562,8 +579,6 @@ export async function performExtraction(
);
logger.debug("Successfully transformed array to object");
} catch (error) {
const errorLog = `[${new Date().toISOString()}] Error in transformArrayToObject: ${JSON.stringify(error, null, 2)}\n`;
await fs.appendFile('logs/extraction-errors.log', errorLog);
logger.error(`Error in transformArrayToObject:`, { error });
throw error;
}
@ -573,8 +588,6 @@ export async function performExtraction(
sourceTracker.trackPreDeduplicationSources(multiEntityResult);
logger.debug("Successfully tracked pre-deduplication sources");
} catch (error) {
const errorLog = `[${new Date().toISOString()}] Error in trackPreDeduplicationSources: ${JSON.stringify(error, null, 2)}\n`;
await fs.appendFile('logs/extraction-errors.log', errorLog);
logger.error(`Error in trackPreDeduplicationSources:`, { error });
throw error;
}
@ -584,8 +597,6 @@ export async function performExtraction(
multiEntityResult = deduplicateObjectsArray(multiEntityResult);
logger.debug("Successfully deduplicated objects array");
} catch (error) {
const errorLog = `[${new Date().toISOString()}] Error in deduplicateObjectsArray: ${JSON.stringify(error, null, 2)}\n`;
await fs.appendFile('logs/extraction-errors.log', errorLog);
logger.error(`Error in deduplicateObjectsArray:`, { error });
throw error;
}
@ -594,8 +605,6 @@ export async function performExtraction(
multiEntityResult = mergeNullValObjs(multiEntityResult);
logger.debug("Successfully merged null value objects");
} catch (error) {
const errorLog = `[${new Date().toISOString()}] Error in mergeNullValObjs: ${JSON.stringify(error, null, 2)}\n`;
await fs.appendFile('logs/extraction-errors.log', errorLog);
logger.error(`Error in mergeNullValObjs:`, { error });
throw error;
}
@ -609,14 +618,10 @@ export async function performExtraction(
Object.assign(sources, multiEntitySources);
logger.debug("Successfully mapped sources to final items");
} catch (error) {
const errorLog = `[${new Date().toISOString()}] Error in mapSourcesToFinalItems: ${JSON.stringify(error, null, 2)}\n`;
await fs.appendFile('logs/extraction-errors.log', errorLog);
logger.error(`Error in mapSourcesToFinalItems:`, { error });
throw error;
}
} catch (error) {
const errorLog = `[${new Date().toISOString()}] Failed to transform array to object\nError: ${JSON.stringify(error, null, 2)}\nStack: ${error.stack}\nMultiEntityResult: ${JSON.stringify(multiEntityResult, null, 2)}\nMultiEntityCompletions: ${JSON.stringify(multiEntityCompletions, null, 2)}\nMultiEntitySchema: ${JSON.stringify(multiEntitySchema, null, 2)}\n\n`;
await fs.appendFile('logs/extraction-errors.log', errorLog);
logger.error(`Failed to transform array to object`, {
error,
errorMessage: error.message,
@ -625,6 +630,23 @@ export async function performExtraction(
multiEntityCompletions: JSON.stringify(multiEntityCompletions),
multiEntitySchema: JSON.stringify(multiEntitySchema)
});
logJob({
job_id: extractId,
success: false,
message: (error instanceof Error ? error.message : "Failed to transform array to object"),
num_docs: 1,
docs: [],
time_taken: (new Date().getTime() - Date.now()) / 1000,
team_id: teamId,
mode: "extract",
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
num_tokens: 0,
tokens_billed: 0,
sources,
cost_tracking: costTracking,
});
return {
success: false,
error:
@ -950,4 +972,24 @@ export async function performExtraction(
totalUrlsScraped,
sources,
};
} catch (error) {
await logJob({
job_id: extractId,
success: false,
message: (error instanceof Error ? error.message : typeof error === "string" ? error : "An unexpected error occurred"),
num_docs: 1,
docs: [],
time_taken: (new Date().getTime() - Date.now()) / 1000,
team_id: teamId,
mode: "extract",
url: request.urls?.join(", ") || "",
scrapeOptions: request,
origin: request.origin ?? "api",
num_tokens: 0,
tokens_billed: 0,
sources,
cost_tracking: costTracking,
});
throw error;
}
}