mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 23:58:59 +08:00
[FIR-796] feat(api/types): Add action and wait time validation for scrape requests
- Implement max actions limit (15 actions) - Add total wait time calculation for actions - Increase max waitFor time to 60 seconds - Refactor scrape options schema to include validation
This commit is contained in:
parent
2b7b7400f6
commit
843cec971d
@ -67,6 +67,25 @@ export const extractOptions = z
|
||||
|
||||
export type ExtractOptions = z.infer<typeof extractOptions>;
|
||||
|
||||
const ACTIONS_MAX_WAIT_TIME = 60;
|
||||
const MAX_ACTIONS = 15;
|
||||
function calculateTotalWaitTime(actions: any[] = [], waitFor: number = 0): number {
|
||||
const actionWaitTime = actions.reduce((acc, action) => {
|
||||
if (action.type === "wait") {
|
||||
if (action.milliseconds) {
|
||||
return acc + action.milliseconds;
|
||||
}
|
||||
// Consider selector actions as 1 second
|
||||
if (action.selector) {
|
||||
return acc + 1000;
|
||||
}
|
||||
}
|
||||
return acc;
|
||||
}, 0);
|
||||
|
||||
return waitFor + actionWaitTime;
|
||||
}
|
||||
|
||||
export const actionsSchema = z.array(
|
||||
z.union([
|
||||
z
|
||||
@ -113,9 +132,19 @@ export const actionsSchema = z.array(
|
||||
script: z.string(),
|
||||
}),
|
||||
]),
|
||||
).refine(
|
||||
(actions) => actions.length <= MAX_ACTIONS,
|
||||
{
|
||||
message: `Maximum of ${MAX_ACTIONS} actions allowed`,
|
||||
},
|
||||
).refine(
|
||||
(actions) => calculateTotalWaitTime(actions) <= ACTIONS_MAX_WAIT_TIME * 1000,
|
||||
{
|
||||
message: `Total wait time (waitFor + wait actions) cannot exceed ${ACTIONS_MAX_WAIT_TIME} seconds`,
|
||||
},
|
||||
);
|
||||
|
||||
export const scrapeOptions = z
|
||||
const baseScrapeOptions = z
|
||||
.object({
|
||||
formats: z
|
||||
.enum([
|
||||
@ -140,7 +169,7 @@ export const scrapeOptions = z
|
||||
excludeTags: z.string().array().optional(),
|
||||
onlyMainContent: z.boolean().default(true),
|
||||
timeout: z.number().int().positive().finite().safe().optional(),
|
||||
waitFor: z.number().int().nonnegative().finite().safe().max(30000).default(0),
|
||||
waitFor: z.number().int().nonnegative().finite().safe().max(60000).default(0),
|
||||
// Deprecate this to jsonOptions
|
||||
extract: extractOptions.optional(),
|
||||
// New
|
||||
@ -191,7 +220,17 @@ export const scrapeOptions = z
|
||||
})
|
||||
.strict(strictMessage);
|
||||
|
||||
export type ScrapeOptions = z.infer<typeof scrapeOptions>;
|
||||
export const scrapeOptions = baseScrapeOptions.refine(
|
||||
(obj) => {
|
||||
if (!obj.actions) return true;
|
||||
return calculateTotalWaitTime(obj.actions, obj.waitFor) <= ACTIONS_MAX_WAIT_TIME * 1000;
|
||||
},
|
||||
{
|
||||
message: `Total wait time (waitFor + wait actions) cannot exceed ${ACTIONS_MAX_WAIT_TIME} seconds`,
|
||||
}
|
||||
);
|
||||
|
||||
export type ScrapeOptions = z.infer<typeof baseScrapeOptions>;
|
||||
|
||||
import Ajv from "ajv";
|
||||
|
||||
@ -246,7 +285,7 @@ export type ExtractV1Options = z.infer<typeof extractV1Options>;
|
||||
export const extractRequestSchema = extractV1Options;
|
||||
export type ExtractRequest = z.infer<typeof extractRequestSchema>;
|
||||
|
||||
export const scrapeRequestSchema = scrapeOptions
|
||||
export const scrapeRequestSchema = baseScrapeOptions
|
||||
.omit({ timeout: true })
|
||||
.extend({
|
||||
url,
|
||||
@ -325,7 +364,7 @@ export const webhookSchema = z.preprocess(
|
||||
.strict(strictMessage),
|
||||
);
|
||||
|
||||
export const batchScrapeRequestSchema = scrapeOptions
|
||||
export const batchScrapeRequestSchema = baseScrapeOptions
|
||||
.extend({
|
||||
urls: url.array(),
|
||||
origin: z.string().optional().default("api"),
|
||||
@ -349,7 +388,7 @@ export const batchScrapeRequestSchema = scrapeOptions
|
||||
},
|
||||
);
|
||||
|
||||
export const batchScrapeRequestSchemaNoURLValidation = scrapeOptions
|
||||
export const batchScrapeRequestSchemaNoURLValidation = baseScrapeOptions
|
||||
.extend({
|
||||
urls: z.string().array(),
|
||||
origin: z.string().optional().default("api"),
|
||||
@ -876,8 +915,7 @@ export const searchRequestSchema = z
|
||||
location: z.string().optional(),
|
||||
origin: z.string().optional().default("api"),
|
||||
timeout: z.number().int().positive().finite().safe().default(60000),
|
||||
scrapeOptions: scrapeOptions
|
||||
.extend({
|
||||
scrapeOptions: baseScrapeOptions.extend({
|
||||
formats: z
|
||||
.array(
|
||||
z.enum([
|
||||
|
@ -6,7 +6,8 @@
|
||||
"test:suite": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false",
|
||||
"test:load": "artillery run --output ./load-test-results/test-run-report.json load-test.yml",
|
||||
"test:scrape": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathPattern=tests/scrape.test.ts",
|
||||
"test:crawl": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathPattern=tests/crawl.test.ts"
|
||||
"test:crawl": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathPattern=tests/crawl.test.ts",
|
||||
"test:schema-validation": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathPattern=tests/schema-validation.test.ts"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
@ -22,9 +23,11 @@
|
||||
"ts-jest": "^29.1.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/supertest": "^6.0.2",
|
||||
"artillery": "^2.0.19",
|
||||
"typescript": "^5.4.5"
|
||||
"typescript": "^5.4.5",
|
||||
"zod": "^3.24.1"
|
||||
}
|
||||
}
|
||||
|
11
apps/test-suite/pnpm-lock.yaml
generated
11
apps/test-suite/pnpm-lock.yaml
generated
@ -36,6 +36,9 @@ importers:
|
||||
specifier: ^29.1.2
|
||||
version: 29.1.5(@babel/core@7.24.5)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.5))(jest@29.7.0(@types/node@20.14.9)(ts-node@10.9.2(@types/node@20.14.9)(typescript@5.4.5)))(typescript@5.4.5)
|
||||
devDependencies:
|
||||
'@jest/globals':
|
||||
specifier: ^29.7.0
|
||||
version: 29.7.0
|
||||
'@types/jest':
|
||||
specifier: ^29.5.12
|
||||
version: 29.5.12
|
||||
@ -48,6 +51,9 @@ importers:
|
||||
typescript:
|
||||
specifier: ^5.4.5
|
||||
version: 5.4.5
|
||||
zod:
|
||||
specifier: ^3.24.1
|
||||
version: 3.24.1
|
||||
|
||||
packages:
|
||||
|
||||
@ -4315,6 +4321,9 @@ packages:
|
||||
resolution: {integrity: sha512-9qv4rlDiopXg4E69k+vMHjNN63YFMe9sZMrdlvKnCjlCRWeCBswPPMPUfx+ipsAWq1LXHe70RcbaHdJJpS6hyQ==}
|
||||
engines: {node: '>= 10'}
|
||||
|
||||
zod@3.24.1:
|
||||
resolution: {integrity: sha512-muH7gBL9sI1nciMZV67X5fTKKBLtwpZ5VBp1vsOQzj1MhrBZ4wlVCm3gedKZWLp0Oyel8sIGfeiz54Su+OVT+A==}
|
||||
|
||||
snapshots:
|
||||
|
||||
'@alcalzone/ansi-tokenize@0.1.3':
|
||||
@ -9997,3 +10006,5 @@ snapshots:
|
||||
archiver-utils: 3.0.4
|
||||
compress-commons: 4.1.2
|
||||
readable-stream: 3.6.2
|
||||
|
||||
zod@3.24.1: {}
|
||||
|
271
apps/test-suite/tests/schema-validation.test.ts
Normal file
271
apps/test-suite/tests/schema-validation.test.ts
Normal file
@ -0,0 +1,271 @@
|
||||
import {actionsSchema, scrapeOptions} from '../../api/src/controllers/v1/types';
|
||||
import {describe, it, expect} from '@jest/globals';
|
||||
|
||||
describe('Schema Validation Tests', () => {
|
||||
describe('Actions Schema Validation', () => {
|
||||
it('should allow valid actions within limits', () => {
|
||||
const validActions = [
|
||||
{type: 'wait', milliseconds: 1000},
|
||||
{type: 'click', selector: '#button'},
|
||||
{type: 'screenshot', fullPage: false},
|
||||
{type: 'write', text: 'olá - hello'},
|
||||
{type: 'press', key: 'Enter'},
|
||||
{type: 'scroll', direction: 'down'},
|
||||
{type: 'scrape'},
|
||||
{type: 'executeJavascript', script: 'console.log("test")'},
|
||||
];
|
||||
|
||||
const result = actionsSchema.safeParse(validActions);
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it('should reject more than 15 actions', () => {
|
||||
const tooManyActions = Array(16).fill({type: 'click', selector: '#button'});
|
||||
|
||||
const result = actionsSchema.safeParse(tooManyActions);
|
||||
expect(result.success).toBe(false);
|
||||
if (!result.success) {
|
||||
expect(result.error.errors[0].message).toBe('Maximum of 15 actions allowed');
|
||||
}
|
||||
});
|
||||
|
||||
describe('Wait Action Validations', () => {
|
||||
it('should validate wait with milliseconds', () => {
|
||||
const validWait = [{type: 'wait', milliseconds: 1000}];
|
||||
expect(actionsSchema.safeParse(validWait).success).toBe(true);
|
||||
|
||||
const invalidWait = [{type: 'wait', milliseconds: -1000}];
|
||||
expect(actionsSchema.safeParse(invalidWait).success).toBe(false);
|
||||
});
|
||||
|
||||
it('should validate wait with selector', () => {
|
||||
const validWait = [{type: 'wait', selector: '#element'}];
|
||||
expect(actionsSchema.safeParse(validWait).success).toBe(true);
|
||||
});
|
||||
|
||||
it('should reject wait with both milliseconds and selector', () => {
|
||||
const invalidWait = [{type: 'wait', milliseconds: 1000, selector: '#element'}];
|
||||
const result = actionsSchema.safeParse(invalidWait);
|
||||
expect(result.success).toBe(false);
|
||||
if (!result.success) {
|
||||
expect(result.error.errors[0].message).toBe(
|
||||
"Either 'milliseconds' or 'selector' must be provided, but not both.",
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it('should reject wait without either milliseconds or selector', () => {
|
||||
const invalidWait = [{type: 'wait'}];
|
||||
const result = actionsSchema.safeParse(invalidWait);
|
||||
expect(result.success).toBe(false);
|
||||
});
|
||||
|
||||
it('should reject when total wait time exceeds 60 seconds', () => {
|
||||
const longWaitActions = [
|
||||
{type: 'wait', milliseconds: 50000},
|
||||
{type: 'wait', milliseconds: 11000},
|
||||
];
|
||||
|
||||
const result = actionsSchema.safeParse(longWaitActions);
|
||||
expect(result.success).toBe(false);
|
||||
if (!result.success) {
|
||||
expect(result.error.errors[0].message).toBe(
|
||||
'Total wait time (waitFor + wait actions) cannot exceed 60 seconds',
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it('should count selector waits as 1 second each', () => {
|
||||
// 15 selector clicks = 15 seconds total, should pass
|
||||
const maxWaitSelectors = Array(15).fill({type: 'click', selector: '#element'});
|
||||
const result = actionsSchema.safeParse(maxWaitSelectors);
|
||||
expect(result.success).toBe(true);
|
||||
|
||||
// Test the time limit with mixed waits
|
||||
const mixedWaits = [
|
||||
{type: 'wait', milliseconds: 58000}, // 58 seconds
|
||||
{type: 'wait', selector: '#element'}, // 1 second
|
||||
{type: 'wait', selector: '#load-more-button'}, // 1 second
|
||||
{type: 'wait', selector: '#toomuch'}, // 1 second, exceeds 60 seconds total
|
||||
];
|
||||
const timeFailResult = actionsSchema.safeParse(mixedWaits);
|
||||
expect(timeFailResult.success).toBe(false);
|
||||
if (!timeFailResult.success) {
|
||||
expect(timeFailResult.error.errors[0].message).toBe(
|
||||
'Total wait time (waitFor + wait actions) cannot exceed 60 seconds',
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('Other Action Type Validations', () => {
|
||||
it('should validate click action', () => {
|
||||
const validClick = [{type: 'click', selector: '#button'}];
|
||||
expect(actionsSchema.safeParse(validClick).success).toBe(true);
|
||||
|
||||
const invalidClick = [{type: 'click'}];
|
||||
expect(actionsSchema.safeParse(invalidClick).success).toBe(false);
|
||||
});
|
||||
|
||||
it('should validate screenshot action', () => {
|
||||
const validScreenshot = [{type: 'screenshot', fullPage: true}];
|
||||
expect(actionsSchema.safeParse(validScreenshot).success).toBe(true);
|
||||
|
||||
const defaultScreenshot = [{type: 'screenshot', fullPage: false}];
|
||||
const result = actionsSchema.safeParse(defaultScreenshot);
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it('should validate write action', () => {
|
||||
const validWrite = [{type: 'write', text: 'hello'}];
|
||||
expect(actionsSchema.safeParse(validWrite).success).toBe(true);
|
||||
|
||||
const invalidWrite = [{type: 'write'}];
|
||||
expect(actionsSchema.safeParse(invalidWrite).success).toBe(false);
|
||||
});
|
||||
|
||||
it('should validate press action', () => {
|
||||
const validPress = [{type: 'press', key: 'Enter'}];
|
||||
expect(actionsSchema.safeParse(validPress).success).toBe(true);
|
||||
|
||||
const invalidPress = [{type: 'press'}];
|
||||
expect(actionsSchema.safeParse(invalidPress).success).toBe(false);
|
||||
});
|
||||
|
||||
it('should validate scroll action', () => {
|
||||
const validScroll = [{type: 'scroll', direction: 'up', selector: '#element'}];
|
||||
expect(actionsSchema.safeParse(validScroll).success).toBe(true);
|
||||
|
||||
const defaultScroll = [{type: 'scroll', direction: 'down'}];
|
||||
const result = actionsSchema.safeParse(defaultScroll);
|
||||
expect(result.success).toBe(true);
|
||||
|
||||
const invalidDirection = [{type: 'scroll', direction: 'left'}];
|
||||
expect(actionsSchema.safeParse(invalidDirection).success).toBe(false);
|
||||
});
|
||||
|
||||
it('should validate scrape action', () => {
|
||||
const validScrape = [{type: 'scrape'}];
|
||||
expect(actionsSchema.safeParse(validScrape).success).toBe(true);
|
||||
});
|
||||
|
||||
it('should validate executeJavascript action', () => {
|
||||
const validJs = [{type: 'executeJavascript', script: 'console.log("test")'}];
|
||||
expect(actionsSchema.safeParse(validJs).success).toBe(true);
|
||||
|
||||
const invalidJs = [{type: 'executeJavascript'}];
|
||||
expect(actionsSchema.safeParse(invalidJs).success).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Scrape Options Schema Validation', () => {
|
||||
it('should validate waitFor limit', () => {
|
||||
const validOptions = {
|
||||
waitFor: 60000, // 60 seconds
|
||||
};
|
||||
expect(scrapeOptions.safeParse(validOptions).success).toBe(true);
|
||||
|
||||
const invalidOptions = {
|
||||
waitFor: 61000, // 61 seconds
|
||||
};
|
||||
expect(scrapeOptions.safeParse(invalidOptions).success).toBe(false);
|
||||
});
|
||||
|
||||
describe('Combined Wait Time Validations', () => {
|
||||
it('should validate combined waitFor and actions wait time', () => {
|
||||
// Test valid combination (at the limit)
|
||||
const validOptions = {
|
||||
waitFor: 30000, // 30 seconds
|
||||
actions: [
|
||||
{type: 'wait', milliseconds: 29000}, // 29 seconds
|
||||
{type: 'wait', selector: '#element'}, // 1 second
|
||||
],
|
||||
};
|
||||
expect(scrapeOptions.safeParse(validOptions).success).toBe(true);
|
||||
|
||||
// Test invalid combination (exceeds limit)
|
||||
const invalidOptions = {
|
||||
waitFor: 30000, // 30 seconds
|
||||
actions: [
|
||||
{type: 'wait', milliseconds: 29000}, // 29 seconds
|
||||
{type: 'wait', selector: '#element'}, // 1 second
|
||||
{type: 'wait', selector: '#another'}, // 1 second
|
||||
],
|
||||
};
|
||||
const failResult = scrapeOptions.safeParse(invalidOptions);
|
||||
expect(failResult.success).toBe(false);
|
||||
if (!failResult.success) {
|
||||
expect(failResult.error.errors[0].message).toBe(
|
||||
'Total wait time (waitFor + wait actions) cannot exceed 60 seconds',
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle edge cases of combined wait times', () => {
|
||||
// Test with only waitFor at limit
|
||||
const maxWaitFor = {
|
||||
waitFor: 60000, // 60 seconds
|
||||
actions: [
|
||||
{type: 'write', text: 'Olá galera!'}, // non-wait action
|
||||
],
|
||||
};
|
||||
expect(scrapeOptions.safeParse(maxWaitFor).success).toBe(true);
|
||||
|
||||
// Test with only action waits at limit
|
||||
const maxActionWaits = {
|
||||
waitFor: 0,
|
||||
actions: [
|
||||
{type: 'wait', milliseconds: 59000}, // 59 seconds
|
||||
{type: 'wait', selector: '#element'}, // 1 second
|
||||
],
|
||||
};
|
||||
expect(scrapeOptions.safeParse(maxActionWaits).success).toBe(true);
|
||||
|
||||
// Test with mixed waits slightly over limit
|
||||
const slightlyOver = {
|
||||
waitFor: 30000, // 30 seconds
|
||||
actions: [
|
||||
{type: 'wait', milliseconds: 30000}, // 30 seconds
|
||||
{type: 'wait', selector: '#element'}, // 1 second
|
||||
],
|
||||
};
|
||||
const overResult = scrapeOptions.safeParse(slightlyOver);
|
||||
expect(overResult.success).toBe(false);
|
||||
if (!overResult.success) {
|
||||
expect(overResult.error.errors[0].message).toBe(
|
||||
'Total wait time (waitFor + wait actions) cannot exceed 60 seconds',
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('Format Validations', () => {
|
||||
it('should validate screenshot format combinations', () => {
|
||||
const validScreenshot = {
|
||||
formats: ['screenshot'],
|
||||
};
|
||||
expect(scrapeOptions.safeParse(validScreenshot).success).toBe(true);
|
||||
|
||||
const validFullPage = {
|
||||
formats: ['screenshot@fullPage'],
|
||||
};
|
||||
expect(scrapeOptions.safeParse(validFullPage).success).toBe(true);
|
||||
|
||||
const invalidBoth = {
|
||||
formats: ['screenshot', 'screenshot@fullPage'],
|
||||
};
|
||||
expect(scrapeOptions.safeParse(invalidBoth).success).toBe(false);
|
||||
});
|
||||
|
||||
it('should default to markdown format', () => {
|
||||
const noFormat = {};
|
||||
const result = scrapeOptions.safeParse(noFormat);
|
||||
expect(result.success).toBe(true);
|
||||
if (result.success) {
|
||||
expect(result.data.formats).toEqual(['markdown']);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
Loading…
x
Reference in New Issue
Block a user