Revert "Nick: extract api reference"

This reverts commit 522c5b35da7d5cd997aa5ebe2002a38ede7ace93.
This commit is contained in:
Nicolas 2025-01-26 21:06:37 -03:00
parent 522c5b35da
commit 61d7ba76f7
5 changed files with 0 additions and 88 deletions

View File

@ -30,7 +30,6 @@ export async function extractStatusController(
data = jobData[0].docs; data = jobData[0].docs;
} }
console.log(extract.sources);
return res.status(200).json({ return res.status(200).json({
success: extract.status === "failed" ? false : true, success: extract.status === "failed" ? false : true,
data: data, data: data,
@ -39,6 +38,5 @@ export async function extractStatusController(
expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(), expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),
steps: extract.showSteps ? extract.steps : undefined, steps: extract.showSteps ? extract.steps : undefined,
llmUsage: extract.showLLMUsage ? extract.llmUsage : undefined, llmUsage: extract.showLLMUsage ? extract.llmUsage : undefined,
sources: extract.sources,
}); });
} }

View File

@ -537,7 +537,6 @@ export interface URLTrace {
}; };
relevanceScore?: number; relevanceScore?: number;
usedInCompletion?: boolean; usedInCompletion?: boolean;
extractedFields?: string[];
} }
export interface ExtractResponse { export interface ExtractResponse {
@ -548,9 +547,6 @@ export interface ExtractResponse {
id?: string; id?: string;
warning?: string; warning?: string;
urlTrace?: URLTrace[]; urlTrace?: URLTrace[];
sources?: {
[key: string]: string[];
};
} }
export interface ExtractResponseRequestTest { export interface ExtractResponseRequestTest {

View File

@ -32,9 +32,6 @@ export type StoredExtract = {
steps?: ExtractedStep[]; steps?: ExtractedStep[];
showLLMUsage?: boolean; showLLMUsage?: boolean;
llmUsage?: number; llmUsage?: number;
sources?: {
[key: string]: string[];
};
}; };
// Reduce TTL to 6 hours instead of 24 // Reduce TTL to 6 hours instead of 24

View File

@ -56,9 +56,6 @@ interface ExtractResult {
tokenUsageBreakdown?: TokenUsage[]; tokenUsageBreakdown?: TokenUsage[];
llmUsage?: number; llmUsage?: number;
totalUrlsScraped?: number; totalUrlsScraped?: number;
sources?: {
[key: string]: string[];
};
} }
async function analyzeSchemaAndPrompt( async function analyzeSchemaAndPrompt(
@ -182,45 +179,6 @@ function getRootDomain(url: string): string {
} }
} }
// Add helper function to track sources
function trackFieldSources(data: any, url: string, parentPath: string = ''): string[] {
const extractedFields: string[] = [];
if (data && typeof data === 'object') {
Object.entries(data).forEach(([key, value]) => {
const currentPath = parentPath ? `${parentPath}.${key}` : key;
if (value !== null && value !== undefined) {
extractedFields.push(currentPath);
if (typeof value === 'object') {
extractedFields.push(...trackFieldSources(value, url, currentPath));
}
}
});
}
return extractedFields;
}
// Add helper to merge sources from multiple extractions
function mergeSources(sources: { [key: string]: string[] }[]): { [key: string]: string[] } {
const mergedSources: { [key: string]: string[] } = {};
sources.forEach(sourceMap => {
Object.entries(sourceMap).forEach(([field, urls]) => {
if (!mergedSources[field]) {
mergedSources[field] = [];
}
mergedSources[field].push(...urls);
// Deduplicate URLs
mergedSources[field] = [...new Set(mergedSources[field])];
});
});
return mergedSources;
}
export async function performExtraction( export async function performExtraction(
extractId: string, extractId: string,
options: ExtractServiceOptions, options: ExtractServiceOptions,
@ -233,7 +191,6 @@ export async function performExtraction(
let multiEntityResult: any = {}; let multiEntityResult: any = {};
let singleAnswerResult: any = {}; let singleAnswerResult: any = {};
let totalUrlsScraped = 0; let totalUrlsScraped = 0;
let extractionSources: { [key: string]: string[] } = {};
const logger = _logger.child({ const logger = _logger.child({
module: "extract", module: "extract",
@ -594,24 +551,6 @@ export async function performExtraction(
// return null; // return null;
// } // }
if (multiEntityCompletion?.extract) {
const extractedFields = trackFieldSources(multiEntityCompletion.extract, doc.metadata.url || doc.metadata.sourceURL!);
// Update URL trace with extracted fields
const trace = urlTraces.find(t => t.url === (doc.metadata.url || doc.metadata.sourceURL!));
if (trace) {
trace.extractedFields = extractedFields;
}
// Track sources for each field
extractedFields.forEach(field => {
if (!extractionSources[field]) {
extractionSources[field] = [];
}
extractionSources[field].push(doc.metadata.url || doc.metadata.sourceURL!);
});
}
return multiEntityCompletion.extract; return multiEntityCompletion.extract;
} catch (error) { } catch (error) {
logger.error(`Failed to process document.`, { error, url: doc.metadata.url ?? doc.metadata.sourceURL! }); logger.error(`Failed to process document.`, { error, url: doc.metadata.url ?? doc.metadata.sourceURL! });
@ -788,21 +727,6 @@ export async function performExtraction(
// } // }
// }); // });
// } // }
if (singleAnswerCompletions?.extract) {
const singleAnswerSources: { [key: string]: string[] } = {};
const usedUrls = Array.from(docsMap.values())
.map(doc => doc.metadata.url || doc.metadata.sourceURL!)
.filter(Boolean);
const extractedFields = trackFieldSources(singleAnswerCompletions.extract, '');
extractedFields.forEach(field => {
singleAnswerSources[field] = usedUrls;
});
// Merge with multi-entity sources
extractionSources = mergeSources([extractionSources, singleAnswerSources]);
}
} }
let finalResult = reqSchema let finalResult = reqSchema
@ -893,7 +817,6 @@ export async function performExtraction(
updateExtract(extractId, { updateExtract(extractId, {
status: "completed", status: "completed",
llmUsage, llmUsage,
sources: extractionSources
}).catch((error) => { }).catch((error) => {
logger.error( logger.error(
`Failed to update extract ${extractId} status to completed: ${error}`, `Failed to update extract ${extractId} status to completed: ${error}`,
@ -911,6 +834,5 @@ export async function performExtraction(
urlTrace: request.urlTrace ? urlTraces : undefined, urlTrace: request.urlTrace ? urlTraces : undefined,
llmUsage, llmUsage,
totalUrlsScraped, totalUrlsScraped,
sources: extractionSources
}; };
} }

View File

@ -227,7 +227,6 @@ export function getRateLimiterPoints(
const points: number = const points: number =
rateLimitConfig[makePlanKey(plan)] || rateLimitConfig.default; // 5 rateLimitConfig[makePlanKey(plan)] || rateLimitConfig.default; // 5
return points; return points;
} }