diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 80dceb78..5b076322 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -67,6 +67,25 @@ export const extractOptions = z export type ExtractOptions = z.infer; +const ACTIONS_MAX_WAIT_TIME = 60; +const MAX_ACTIONS = 15; +function calculateTotalWaitTime(actions: any[] = [], waitFor: number = 0): number { + const actionWaitTime = actions.reduce((acc, action) => { + if (action.type === "wait") { + if (action.milliseconds) { + return acc + action.milliseconds; + } + // Consider selector actions as 1 second + if (action.selector) { + return acc + 1000; + } + } + return acc; + }, 0); + + return waitFor + actionWaitTime; +} + export const actionsSchema = z.array( z.union([ z @@ -113,9 +132,19 @@ export const actionsSchema = z.array( script: z.string(), }), ]), +).refine( + (actions) => actions.length <= MAX_ACTIONS, + { + message: `Maximum of ${MAX_ACTIONS} actions allowed`, + }, +).refine( + (actions) => calculateTotalWaitTime(actions) <= ACTIONS_MAX_WAIT_TIME * 1000, + { + message: `Total wait time (waitFor + wait actions) cannot exceed ${ACTIONS_MAX_WAIT_TIME} seconds`, + }, ); -export const scrapeOptions = z +const baseScrapeOptions = z .object({ formats: z .enum([ @@ -140,7 +169,7 @@ export const scrapeOptions = z excludeTags: z.string().array().optional(), onlyMainContent: z.boolean().default(true), timeout: z.number().int().positive().finite().safe().optional(), - waitFor: z.number().int().nonnegative().finite().safe().max(30000).default(0), + waitFor: z.number().int().nonnegative().finite().safe().max(60000).default(0), // Deprecate this to jsonOptions extract: extractOptions.optional(), // New @@ -191,7 +220,17 @@ export const scrapeOptions = z }) .strict(strictMessage); -export type ScrapeOptions = z.infer; +export const scrapeOptions = baseScrapeOptions.refine( + (obj) => { + if (!obj.actions) return true; + return calculateTotalWaitTime(obj.actions, obj.waitFor) <= ACTIONS_MAX_WAIT_TIME * 1000; + }, + { + message: `Total wait time (waitFor + wait actions) cannot exceed ${ACTIONS_MAX_WAIT_TIME} seconds`, + } +); + +export type ScrapeOptions = z.infer; import Ajv from "ajv"; @@ -246,7 +285,7 @@ export type ExtractV1Options = z.infer; export const extractRequestSchema = extractV1Options; export type ExtractRequest = z.infer; -export const scrapeRequestSchema = scrapeOptions +export const scrapeRequestSchema = baseScrapeOptions .omit({ timeout: true }) .extend({ url, @@ -325,7 +364,7 @@ export const webhookSchema = z.preprocess( .strict(strictMessage), ); -export const batchScrapeRequestSchema = scrapeOptions +export const batchScrapeRequestSchema = baseScrapeOptions .extend({ urls: url.array(), origin: z.string().optional().default("api"), @@ -349,7 +388,7 @@ export const batchScrapeRequestSchema = scrapeOptions }, ); -export const batchScrapeRequestSchemaNoURLValidation = scrapeOptions +export const batchScrapeRequestSchemaNoURLValidation = baseScrapeOptions .extend({ urls: z.string().array(), origin: z.string().optional().default("api"), @@ -876,8 +915,7 @@ export const searchRequestSchema = z location: z.string().optional(), origin: z.string().optional().default("api"), timeout: z.number().int().positive().finite().safe().default(60000), - scrapeOptions: scrapeOptions - .extend({ + scrapeOptions: baseScrapeOptions.extend({ formats: z .array( z.enum([ diff --git a/apps/test-suite/package.json b/apps/test-suite/package.json index 91c95801..c542b48e 100644 --- a/apps/test-suite/package.json +++ b/apps/test-suite/package.json @@ -6,7 +6,8 @@ "test:suite": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false", "test:load": "artillery run --output ./load-test-results/test-run-report.json load-test.yml", "test:scrape": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathPattern=tests/scrape.test.ts", - "test:crawl": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathPattern=tests/crawl.test.ts" + "test:crawl": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathPattern=tests/crawl.test.ts", + "test:schema-validation": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathPattern=tests/schema-validation.test.ts" }, "author": "", "license": "ISC", @@ -22,9 +23,11 @@ "ts-jest": "^29.1.2" }, "devDependencies": { + "@jest/globals": "^29.7.0", "@types/jest": "^29.5.12", "@types/supertest": "^6.0.2", "artillery": "^2.0.19", - "typescript": "^5.4.5" + "typescript": "^5.4.5", + "zod": "^3.24.1" } } diff --git a/apps/test-suite/pnpm-lock.yaml b/apps/test-suite/pnpm-lock.yaml index bf714b55..257cbe5b 100644 --- a/apps/test-suite/pnpm-lock.yaml +++ b/apps/test-suite/pnpm-lock.yaml @@ -36,6 +36,9 @@ importers: specifier: ^29.1.2 version: 29.1.5(@babel/core@7.24.5)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.5))(jest@29.7.0(@types/node@20.14.9)(ts-node@10.9.2(@types/node@20.14.9)(typescript@5.4.5)))(typescript@5.4.5) devDependencies: + '@jest/globals': + specifier: ^29.7.0 + version: 29.7.0 '@types/jest': specifier: ^29.5.12 version: 29.5.12 @@ -48,6 +51,9 @@ importers: typescript: specifier: ^5.4.5 version: 5.4.5 + zod: + specifier: ^3.24.1 + version: 3.24.1 packages: @@ -4315,6 +4321,9 @@ packages: resolution: {integrity: sha512-9qv4rlDiopXg4E69k+vMHjNN63YFMe9sZMrdlvKnCjlCRWeCBswPPMPUfx+ipsAWq1LXHe70RcbaHdJJpS6hyQ==} engines: {node: '>= 10'} + zod@3.24.1: + resolution: {integrity: sha512-muH7gBL9sI1nciMZV67X5fTKKBLtwpZ5VBp1vsOQzj1MhrBZ4wlVCm3gedKZWLp0Oyel8sIGfeiz54Su+OVT+A==} + snapshots: '@alcalzone/ansi-tokenize@0.1.3': @@ -9997,3 +10006,5 @@ snapshots: archiver-utils: 3.0.4 compress-commons: 4.1.2 readable-stream: 3.6.2 + + zod@3.24.1: {} diff --git a/apps/test-suite/tests/schema-validation.test.ts b/apps/test-suite/tests/schema-validation.test.ts new file mode 100644 index 00000000..c0418c0e --- /dev/null +++ b/apps/test-suite/tests/schema-validation.test.ts @@ -0,0 +1,271 @@ +import {actionsSchema, scrapeOptions} from '../../api/src/controllers/v1/types'; +import {describe, it, expect} from '@jest/globals'; + +describe('Schema Validation Tests', () => { + describe('Actions Schema Validation', () => { + it('should allow valid actions within limits', () => { + const validActions = [ + {type: 'wait', milliseconds: 1000}, + {type: 'click', selector: '#button'}, + {type: 'screenshot', fullPage: false}, + {type: 'write', text: 'olá - hello'}, + {type: 'press', key: 'Enter'}, + {type: 'scroll', direction: 'down'}, + {type: 'scrape'}, + {type: 'executeJavascript', script: 'console.log("test")'}, + ]; + + const result = actionsSchema.safeParse(validActions); + expect(result.success).toBe(true); + }); + + it('should reject more than 15 actions', () => { + const tooManyActions = Array(16).fill({type: 'click', selector: '#button'}); + + const result = actionsSchema.safeParse(tooManyActions); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.errors[0].message).toBe('Maximum of 15 actions allowed'); + } + }); + + describe('Wait Action Validations', () => { + it('should validate wait with milliseconds', () => { + const validWait = [{type: 'wait', milliseconds: 1000}]; + expect(actionsSchema.safeParse(validWait).success).toBe(true); + + const invalidWait = [{type: 'wait', milliseconds: -1000}]; + expect(actionsSchema.safeParse(invalidWait).success).toBe(false); + }); + + it('should validate wait with selector', () => { + const validWait = [{type: 'wait', selector: '#element'}]; + expect(actionsSchema.safeParse(validWait).success).toBe(true); + }); + + it('should reject wait with both milliseconds and selector', () => { + const invalidWait = [{type: 'wait', milliseconds: 1000, selector: '#element'}]; + const result = actionsSchema.safeParse(invalidWait); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.errors[0].message).toBe( + "Either 'milliseconds' or 'selector' must be provided, but not both.", + ); + } + }); + + it('should reject wait without either milliseconds or selector', () => { + const invalidWait = [{type: 'wait'}]; + const result = actionsSchema.safeParse(invalidWait); + expect(result.success).toBe(false); + }); + + it('should reject when total wait time exceeds 60 seconds', () => { + const longWaitActions = [ + {type: 'wait', milliseconds: 50000}, + {type: 'wait', milliseconds: 11000}, + ]; + + const result = actionsSchema.safeParse(longWaitActions); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.errors[0].message).toBe( + 'Total wait time (waitFor + wait actions) cannot exceed 60 seconds', + ); + } + }); + + it('should count selector waits as 1 second each', () => { + // 15 selector clicks = 15 seconds total, should pass + const maxWaitSelectors = Array(15).fill({type: 'click', selector: '#element'}); + const result = actionsSchema.safeParse(maxWaitSelectors); + expect(result.success).toBe(true); + + // Test the time limit with mixed waits + const mixedWaits = [ + {type: 'wait', milliseconds: 58000}, // 58 seconds + {type: 'wait', selector: '#element'}, // 1 second + {type: 'wait', selector: '#load-more-button'}, // 1 second + {type: 'wait', selector: '#toomuch'}, // 1 second, exceeds 60 seconds total + ]; + const timeFailResult = actionsSchema.safeParse(mixedWaits); + expect(timeFailResult.success).toBe(false); + if (!timeFailResult.success) { + expect(timeFailResult.error.errors[0].message).toBe( + 'Total wait time (waitFor + wait actions) cannot exceed 60 seconds', + ); + } + }); + }); + + describe('Other Action Type Validations', () => { + it('should validate click action', () => { + const validClick = [{type: 'click', selector: '#button'}]; + expect(actionsSchema.safeParse(validClick).success).toBe(true); + + const invalidClick = [{type: 'click'}]; + expect(actionsSchema.safeParse(invalidClick).success).toBe(false); + }); + + it('should validate screenshot action', () => { + const validScreenshot = [{type: 'screenshot', fullPage: true}]; + expect(actionsSchema.safeParse(validScreenshot).success).toBe(true); + + const defaultScreenshot = [{type: 'screenshot', fullPage: false}]; + const result = actionsSchema.safeParse(defaultScreenshot); + expect(result.success).toBe(true); + }); + + it('should validate write action', () => { + const validWrite = [{type: 'write', text: 'hello'}]; + expect(actionsSchema.safeParse(validWrite).success).toBe(true); + + const invalidWrite = [{type: 'write'}]; + expect(actionsSchema.safeParse(invalidWrite).success).toBe(false); + }); + + it('should validate press action', () => { + const validPress = [{type: 'press', key: 'Enter'}]; + expect(actionsSchema.safeParse(validPress).success).toBe(true); + + const invalidPress = [{type: 'press'}]; + expect(actionsSchema.safeParse(invalidPress).success).toBe(false); + }); + + it('should validate scroll action', () => { + const validScroll = [{type: 'scroll', direction: 'up', selector: '#element'}]; + expect(actionsSchema.safeParse(validScroll).success).toBe(true); + + const defaultScroll = [{type: 'scroll', direction: 'down'}]; + const result = actionsSchema.safeParse(defaultScroll); + expect(result.success).toBe(true); + + const invalidDirection = [{type: 'scroll', direction: 'left'}]; + expect(actionsSchema.safeParse(invalidDirection).success).toBe(false); + }); + + it('should validate scrape action', () => { + const validScrape = [{type: 'scrape'}]; + expect(actionsSchema.safeParse(validScrape).success).toBe(true); + }); + + it('should validate executeJavascript action', () => { + const validJs = [{type: 'executeJavascript', script: 'console.log("test")'}]; + expect(actionsSchema.safeParse(validJs).success).toBe(true); + + const invalidJs = [{type: 'executeJavascript'}]; + expect(actionsSchema.safeParse(invalidJs).success).toBe(false); + }); + }); + }); + + describe('Scrape Options Schema Validation', () => { + it('should validate waitFor limit', () => { + const validOptions = { + waitFor: 60000, // 60 seconds + }; + expect(scrapeOptions.safeParse(validOptions).success).toBe(true); + + const invalidOptions = { + waitFor: 61000, // 61 seconds + }; + expect(scrapeOptions.safeParse(invalidOptions).success).toBe(false); + }); + + describe('Combined Wait Time Validations', () => { + it('should validate combined waitFor and actions wait time', () => { + // Test valid combination (at the limit) + const validOptions = { + waitFor: 30000, // 30 seconds + actions: [ + {type: 'wait', milliseconds: 29000}, // 29 seconds + {type: 'wait', selector: '#element'}, // 1 second + ], + }; + expect(scrapeOptions.safeParse(validOptions).success).toBe(true); + + // Test invalid combination (exceeds limit) + const invalidOptions = { + waitFor: 30000, // 30 seconds + actions: [ + {type: 'wait', milliseconds: 29000}, // 29 seconds + {type: 'wait', selector: '#element'}, // 1 second + {type: 'wait', selector: '#another'}, // 1 second + ], + }; + const failResult = scrapeOptions.safeParse(invalidOptions); + expect(failResult.success).toBe(false); + if (!failResult.success) { + expect(failResult.error.errors[0].message).toBe( + 'Total wait time (waitFor + wait actions) cannot exceed 60 seconds', + ); + } + }); + + it('should handle edge cases of combined wait times', () => { + // Test with only waitFor at limit + const maxWaitFor = { + waitFor: 60000, // 60 seconds + actions: [ + {type: 'write', text: 'Olá galera!'}, // non-wait action + ], + }; + expect(scrapeOptions.safeParse(maxWaitFor).success).toBe(true); + + // Test with only action waits at limit + const maxActionWaits = { + waitFor: 0, + actions: [ + {type: 'wait', milliseconds: 59000}, // 59 seconds + {type: 'wait', selector: '#element'}, // 1 second + ], + }; + expect(scrapeOptions.safeParse(maxActionWaits).success).toBe(true); + + // Test with mixed waits slightly over limit + const slightlyOver = { + waitFor: 30000, // 30 seconds + actions: [ + {type: 'wait', milliseconds: 30000}, // 30 seconds + {type: 'wait', selector: '#element'}, // 1 second + ], + }; + const overResult = scrapeOptions.safeParse(slightlyOver); + expect(overResult.success).toBe(false); + if (!overResult.success) { + expect(overResult.error.errors[0].message).toBe( + 'Total wait time (waitFor + wait actions) cannot exceed 60 seconds', + ); + } + }); + }); + + describe('Format Validations', () => { + it('should validate screenshot format combinations', () => { + const validScreenshot = { + formats: ['screenshot'], + }; + expect(scrapeOptions.safeParse(validScreenshot).success).toBe(true); + + const validFullPage = { + formats: ['screenshot@fullPage'], + }; + expect(scrapeOptions.safeParse(validFullPage).success).toBe(true); + + const invalidBoth = { + formats: ['screenshot', 'screenshot@fullPage'], + }; + expect(scrapeOptions.safeParse(invalidBoth).success).toBe(false); + }); + + it('should default to markdown format', () => { + const noFormat = {}; + const result = scrapeOptions.safeParse(noFormat); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.formats).toEqual(['markdown']); + } + }); + }); + }); +});