feat(extractSmartScrape): resolve refs in provided schema

This commit is contained in:
Gergő Móricz 2025-04-16 14:32:00 -07:00
parent 5ee2434c9d
commit ad7e3f62d1

View File

@ -181,6 +181,33 @@ export function prepareSmartScrapeSchema(
return { schemaToUse: wrappedSchema };
}
// Resolve all $defs references in the schema
const resolveRefs = (obj: any, defs: any): any => {
if (!obj || typeof obj !== 'object') return obj;
if (obj.$ref && typeof obj.$ref === 'string') {
// Handle $ref references
const refPath = obj.$ref.split('/');
if (refPath[0] === '#' && refPath[1] === '$defs') {
const defName = refPath[refPath.length - 1];
return resolveRefs({ ...defs[defName] }, defs);
}
}
// Handle arrays
if (Array.isArray(obj)) {
return obj.map(item => resolveRefs(item, defs));
}
// Handle objects
const resolved: any = {};
for (const [key, value] of Object.entries(obj)) {
if (key === '$defs') continue;
resolved[key] = resolveRefs(value, defs);
}
return resolved;
};
export async function extractData({
extractOptions,
urls,
@ -221,6 +248,15 @@ export async function extractData({
schema = genRes.extract;
}
if (schema) {
const defs = schema.$defs || {};
schema = resolveRefs(schema, defs);
delete schema.$defs;
logger.info("Resolved schema refs", {
schema,
});
}
const { schemaToUse } = prepareSmartScrapeSchema(schema, logger, isSingleUrl);
const extractOptionsNewSchema = {
...extractOptions,