mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-04 11:24:40 +08:00
feat(extractSmartScrape): resolve refs in provided schema
This commit is contained in:
parent
5ee2434c9d
commit
ad7e3f62d1
@ -181,6 +181,33 @@ export function prepareSmartScrapeSchema(
|
||||
return { schemaToUse: wrappedSchema };
|
||||
}
|
||||
|
||||
// Resolve all $defs references in the schema
|
||||
const resolveRefs = (obj: any, defs: any): any => {
|
||||
if (!obj || typeof obj !== 'object') return obj;
|
||||
|
||||
if (obj.$ref && typeof obj.$ref === 'string') {
|
||||
// Handle $ref references
|
||||
const refPath = obj.$ref.split('/');
|
||||
if (refPath[0] === '#' && refPath[1] === '$defs') {
|
||||
const defName = refPath[refPath.length - 1];
|
||||
return resolveRefs({ ...defs[defName] }, defs);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle arrays
|
||||
if (Array.isArray(obj)) {
|
||||
return obj.map(item => resolveRefs(item, defs));
|
||||
}
|
||||
|
||||
// Handle objects
|
||||
const resolved: any = {};
|
||||
for (const [key, value] of Object.entries(obj)) {
|
||||
if (key === '$defs') continue;
|
||||
resolved[key] = resolveRefs(value, defs);
|
||||
}
|
||||
return resolved;
|
||||
};
|
||||
|
||||
export async function extractData({
|
||||
extractOptions,
|
||||
urls,
|
||||
@ -221,6 +248,15 @@ export async function extractData({
|
||||
schema = genRes.extract;
|
||||
}
|
||||
|
||||
if (schema) {
|
||||
const defs = schema.$defs || {};
|
||||
schema = resolveRefs(schema, defs);
|
||||
delete schema.$defs;
|
||||
logger.info("Resolved schema refs", {
|
||||
schema,
|
||||
});
|
||||
}
|
||||
|
||||
const { schemaToUse } = prepareSmartScrapeSchema(schema, logger, isSingleUrl);
|
||||
const extractOptionsNewSchema = {
|
||||
...extractOptions,
|
||||
|
Loading…
x
Reference in New Issue
Block a user