mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-04 11:24:40 +08:00
feat(extract): log failed extracts
This commit is contained in:
parent
aa20246571
commit
d82f44c93e
@ -36,7 +36,7 @@ import { getCachedDocs, saveCachedDocs } from "./helpers/cached-docs";
|
|||||||
import { normalizeUrl } from "../canonical-url";
|
import { normalizeUrl } from "../canonical-url";
|
||||||
import { search } from "../../search";
|
import { search } from "../../search";
|
||||||
import { buildRephraseToSerpPrompt } from "./build-prompts";
|
import { buildRephraseToSerpPrompt } from "./build-prompts";
|
||||||
import fs from "fs/promises";
|
|
||||||
interface ExtractServiceOptions {
|
interface ExtractServiceOptions {
|
||||||
request: ExtractRequest;
|
request: ExtractRequest;
|
||||||
teamId: string;
|
teamId: string;
|
||||||
@ -109,6 +109,8 @@ export async function performExtraction(
|
|||||||
teamId,
|
teamId,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
// If no URLs are provided, generate URLs from the prompt
|
// If no URLs are provided, generate URLs from the prompt
|
||||||
if ((!request.urls || request.urls.length === 0) && request.prompt) {
|
if ((!request.urls || request.urls.length === 0) && request.prompt) {
|
||||||
logger.debug("Generating URLs from prompt...", {
|
logger.debug("Generating URLs from prompt...", {
|
||||||
@ -134,6 +136,23 @@ export async function performExtraction(
|
|||||||
logger.error("No search results found", {
|
logger.error("No search results found", {
|
||||||
query: request.prompt,
|
query: request.prompt,
|
||||||
});
|
});
|
||||||
|
logJob({
|
||||||
|
job_id: extractId,
|
||||||
|
success: false,
|
||||||
|
message: "No search results found",
|
||||||
|
num_docs: 1,
|
||||||
|
docs: [],
|
||||||
|
time_taken: (new Date().getTime() - Date.now()) / 1000,
|
||||||
|
team_id: teamId,
|
||||||
|
mode: "extract",
|
||||||
|
url: request.urls?.join(", ") || "",
|
||||||
|
scrapeOptions: request,
|
||||||
|
origin: request.origin ?? "api",
|
||||||
|
num_tokens: 0,
|
||||||
|
tokens_billed: 0,
|
||||||
|
sources,
|
||||||
|
cost_tracking: costTracking,
|
||||||
|
});
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: "No search results found",
|
error: "No search results found",
|
||||||
@ -549,8 +568,6 @@ export async function performExtraction(
|
|||||||
);
|
);
|
||||||
logger.debug("Successfully transformed results with sourceTracker");
|
logger.debug("Successfully transformed results with sourceTracker");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorLog = `[${new Date().toISOString()}] Error in sourceTracker.transformResults: ${JSON.stringify(error, null, 2)}\n`;
|
|
||||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
|
||||||
logger.error(`Error in sourceTracker.transformResults:`, { error });
|
logger.error(`Error in sourceTracker.transformResults:`, { error });
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
@ -562,8 +579,6 @@ export async function performExtraction(
|
|||||||
);
|
);
|
||||||
logger.debug("Successfully transformed array to object");
|
logger.debug("Successfully transformed array to object");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorLog = `[${new Date().toISOString()}] Error in transformArrayToObject: ${JSON.stringify(error, null, 2)}\n`;
|
|
||||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
|
||||||
logger.error(`Error in transformArrayToObject:`, { error });
|
logger.error(`Error in transformArrayToObject:`, { error });
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
@ -573,8 +588,6 @@ export async function performExtraction(
|
|||||||
sourceTracker.trackPreDeduplicationSources(multiEntityResult);
|
sourceTracker.trackPreDeduplicationSources(multiEntityResult);
|
||||||
logger.debug("Successfully tracked pre-deduplication sources");
|
logger.debug("Successfully tracked pre-deduplication sources");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorLog = `[${new Date().toISOString()}] Error in trackPreDeduplicationSources: ${JSON.stringify(error, null, 2)}\n`;
|
|
||||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
|
||||||
logger.error(`Error in trackPreDeduplicationSources:`, { error });
|
logger.error(`Error in trackPreDeduplicationSources:`, { error });
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
@ -584,8 +597,6 @@ export async function performExtraction(
|
|||||||
multiEntityResult = deduplicateObjectsArray(multiEntityResult);
|
multiEntityResult = deduplicateObjectsArray(multiEntityResult);
|
||||||
logger.debug("Successfully deduplicated objects array");
|
logger.debug("Successfully deduplicated objects array");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorLog = `[${new Date().toISOString()}] Error in deduplicateObjectsArray: ${JSON.stringify(error, null, 2)}\n`;
|
|
||||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
|
||||||
logger.error(`Error in deduplicateObjectsArray:`, { error });
|
logger.error(`Error in deduplicateObjectsArray:`, { error });
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
@ -594,8 +605,6 @@ export async function performExtraction(
|
|||||||
multiEntityResult = mergeNullValObjs(multiEntityResult);
|
multiEntityResult = mergeNullValObjs(multiEntityResult);
|
||||||
logger.debug("Successfully merged null value objects");
|
logger.debug("Successfully merged null value objects");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorLog = `[${new Date().toISOString()}] Error in mergeNullValObjs: ${JSON.stringify(error, null, 2)}\n`;
|
|
||||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
|
||||||
logger.error(`Error in mergeNullValObjs:`, { error });
|
logger.error(`Error in mergeNullValObjs:`, { error });
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
@ -609,14 +618,10 @@ export async function performExtraction(
|
|||||||
Object.assign(sources, multiEntitySources);
|
Object.assign(sources, multiEntitySources);
|
||||||
logger.debug("Successfully mapped sources to final items");
|
logger.debug("Successfully mapped sources to final items");
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorLog = `[${new Date().toISOString()}] Error in mapSourcesToFinalItems: ${JSON.stringify(error, null, 2)}\n`;
|
|
||||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
|
||||||
logger.error(`Error in mapSourcesToFinalItems:`, { error });
|
logger.error(`Error in mapSourcesToFinalItems:`, { error });
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorLog = `[${new Date().toISOString()}] Failed to transform array to object\nError: ${JSON.stringify(error, null, 2)}\nStack: ${error.stack}\nMultiEntityResult: ${JSON.stringify(multiEntityResult, null, 2)}\nMultiEntityCompletions: ${JSON.stringify(multiEntityCompletions, null, 2)}\nMultiEntitySchema: ${JSON.stringify(multiEntitySchema, null, 2)}\n\n`;
|
|
||||||
await fs.appendFile('logs/extraction-errors.log', errorLog);
|
|
||||||
logger.error(`Failed to transform array to object`, {
|
logger.error(`Failed to transform array to object`, {
|
||||||
error,
|
error,
|
||||||
errorMessage: error.message,
|
errorMessage: error.message,
|
||||||
@ -625,6 +630,23 @@ export async function performExtraction(
|
|||||||
multiEntityCompletions: JSON.stringify(multiEntityCompletions),
|
multiEntityCompletions: JSON.stringify(multiEntityCompletions),
|
||||||
multiEntitySchema: JSON.stringify(multiEntitySchema)
|
multiEntitySchema: JSON.stringify(multiEntitySchema)
|
||||||
});
|
});
|
||||||
|
logJob({
|
||||||
|
job_id: extractId,
|
||||||
|
success: false,
|
||||||
|
message: (error instanceof Error ? error.message : "Failed to transform array to object"),
|
||||||
|
num_docs: 1,
|
||||||
|
docs: [],
|
||||||
|
time_taken: (new Date().getTime() - Date.now()) / 1000,
|
||||||
|
team_id: teamId,
|
||||||
|
mode: "extract",
|
||||||
|
url: request.urls?.join(", ") || "",
|
||||||
|
scrapeOptions: request,
|
||||||
|
origin: request.origin ?? "api",
|
||||||
|
num_tokens: 0,
|
||||||
|
tokens_billed: 0,
|
||||||
|
sources,
|
||||||
|
cost_tracking: costTracking,
|
||||||
|
});
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error:
|
error:
|
||||||
@ -950,4 +972,24 @@ export async function performExtraction(
|
|||||||
totalUrlsScraped,
|
totalUrlsScraped,
|
||||||
sources,
|
sources,
|
||||||
};
|
};
|
||||||
|
} catch (error) {
|
||||||
|
await logJob({
|
||||||
|
job_id: extractId,
|
||||||
|
success: false,
|
||||||
|
message: (error instanceof Error ? error.message : typeof error === "string" ? error : "An unexpected error occurred"),
|
||||||
|
num_docs: 1,
|
||||||
|
docs: [],
|
||||||
|
time_taken: (new Date().getTime() - Date.now()) / 1000,
|
||||||
|
team_id: teamId,
|
||||||
|
mode: "extract",
|
||||||
|
url: request.urls?.join(", ") || "",
|
||||||
|
scrapeOptions: request,
|
||||||
|
origin: request.origin ?? "api",
|
||||||
|
num_tokens: 0,
|
||||||
|
tokens_billed: 0,
|
||||||
|
sources,
|
||||||
|
cost_tracking: costTracking,
|
||||||
|
});
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user