mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 14:19:00 +08:00
Nick:
This commit is contained in:
parent
6383bf270a
commit
34b40f6a23
@ -33,6 +33,7 @@ export async function scrapeController(
|
|||||||
basePriority: 10,
|
basePriority: 10,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
await addScrapeJob(
|
await addScrapeJob(
|
||||||
{
|
{
|
||||||
url: req.body.url,
|
url: req.body.url,
|
||||||
@ -96,7 +97,7 @@ export async function scrapeController(
|
|||||||
// Don't bill if we're early returning
|
// Don't bill if we're early returning
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (req.body.extract && req.body.formats.includes("extract")) {
|
if (req.body.extract && req.body.formats.includes("extract") ) {
|
||||||
creditsToBeBilled = 5;
|
creditsToBeBilled = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -125,6 +125,7 @@ export const scrapeOptions = z
|
|||||||
"screenshot",
|
"screenshot",
|
||||||
"screenshot@fullPage",
|
"screenshot@fullPage",
|
||||||
"extract",
|
"extract",
|
||||||
|
"json"
|
||||||
])
|
])
|
||||||
.array()
|
.array()
|
||||||
.optional()
|
.optional()
|
||||||
@ -139,7 +140,10 @@ export const scrapeOptions = z
|
|||||||
onlyMainContent: z.boolean().default(true),
|
onlyMainContent: z.boolean().default(true),
|
||||||
timeout: z.number().int().positive().finite().safe().optional(),
|
timeout: z.number().int().positive().finite().safe().optional(),
|
||||||
waitFor: z.number().int().nonnegative().finite().safe().default(0),
|
waitFor: z.number().int().nonnegative().finite().safe().default(0),
|
||||||
|
// Deprecate this to jsonOptions
|
||||||
extract: extractOptions.optional(),
|
extract: extractOptions.optional(),
|
||||||
|
// New
|
||||||
|
jsonOptions: extractOptions.optional(),
|
||||||
mobile: z.boolean().default(false),
|
mobile: z.boolean().default(false),
|
||||||
parsePDF: z.boolean().default(true),
|
parsePDF: z.boolean().default(true),
|
||||||
actions: actionsSchema.optional(),
|
actions: actionsSchema.optional(),
|
||||||
@ -242,20 +246,43 @@ export const scrapeRequestSchema = scrapeOptions
|
|||||||
(obj) => {
|
(obj) => {
|
||||||
const hasExtractFormat = obj.formats?.includes("extract");
|
const hasExtractFormat = obj.formats?.includes("extract");
|
||||||
const hasExtractOptions = obj.extract !== undefined;
|
const hasExtractOptions = obj.extract !== undefined;
|
||||||
|
const hasJsonFormat = obj.formats?.includes("json");
|
||||||
|
const hasJsonOptions = obj.jsonOptions !== undefined;
|
||||||
return (
|
return (
|
||||||
(hasExtractFormat && hasExtractOptions) ||
|
(hasExtractFormat && hasExtractOptions) ||
|
||||||
(!hasExtractFormat && !hasExtractOptions)
|
(!hasExtractFormat && !hasExtractOptions) ||
|
||||||
|
(hasJsonFormat && hasJsonOptions) ||
|
||||||
|
(!hasJsonFormat && !hasJsonOptions)
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
message:
|
message:
|
||||||
"When 'extract' format is specified, 'extract' options must be provided, and vice versa",
|
"When 'extract' or 'json' format is specified, corresponding options must be provided, and vice versa",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.transform((obj) => {
|
.transform((obj) => {
|
||||||
if ((obj.formats?.includes("extract") || obj.extract) && !obj.timeout) {
|
// Handle timeout
|
||||||
return { ...obj, timeout: 60000 };
|
if ((obj.formats?.includes("extract") || obj.extract || obj.formats?.includes("json") || obj.jsonOptions) && !obj.timeout) {
|
||||||
|
obj = { ...obj, timeout: 60000 };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(obj.formats?.includes("json")) {
|
||||||
|
obj.formats.push("extract");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert JSON options to extract options if needed
|
||||||
|
if (obj.jsonOptions && !obj.extract) {
|
||||||
|
obj = {
|
||||||
|
...obj,
|
||||||
|
extract: {
|
||||||
|
prompt: obj.jsonOptions.prompt,
|
||||||
|
systemPrompt: obj.jsonOptions.systemPrompt,
|
||||||
|
schema: obj.jsonOptions.schema,
|
||||||
|
mode: "llm"
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return obj;
|
return obj;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -410,6 +437,7 @@ export type Document = {
|
|||||||
links?: string[];
|
links?: string[];
|
||||||
screenshot?: string;
|
screenshot?: string;
|
||||||
extract?: any;
|
extract?: any;
|
||||||
|
json?: any;
|
||||||
warning?: string;
|
warning?: string;
|
||||||
actions?: {
|
actions?: {
|
||||||
screenshots?: string[];
|
screenshots?: string[];
|
||||||
|
@ -233,7 +233,12 @@ export async function performLLMExtract(
|
|||||||
document.markdown,
|
document.markdown,
|
||||||
document.warning,
|
document.warning,
|
||||||
);
|
);
|
||||||
document.extract = extract;
|
|
||||||
|
if (meta.options.formats.includes("json")) {
|
||||||
|
document.json = extract;
|
||||||
|
} else {
|
||||||
|
document.extract = extract;
|
||||||
|
}
|
||||||
document.warning = warning;
|
document.warning = warning;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user