diff --git a/apps/api/package.json b/apps/api/package.json index 732472e2..12024b68 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -57,6 +57,8 @@ "@nangohq/node": "^0.40.8", "@sentry/node": "^8.13.0", "@supabase/supabase-js": "^2.44.2", + "@types/express-ws": "^3.0.4", + "@types/ws": "^8.5.12", "ajv": "^8.16.0", "async": "^3.2.5", "async-mutex": "^0.5.0", @@ -71,6 +73,7 @@ "date-fns": "^3.6.0", "dotenv": "^16.3.1", "express-rate-limit": "^7.3.1", + "express-ws": "^5.0.2", "form-data": "^4.0.0", "glob": "^10.4.2", "gpt3-tokenizer": "^1.1.5", @@ -105,6 +108,7 @@ "unstructured-client": "^0.11.3", "uuid": "^10.0.0", "wordpos": "^2.1.0", + "ws": "^8.18.0", "xml2js": "^0.6.2", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.1" diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 7b2e07fa..93d0ed13 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -41,6 +41,12 @@ importers: '@supabase/supabase-js': specifier: ^2.44.2 version: 2.44.2 + '@types/express-ws': + specifier: ^3.0.4 + version: 3.0.4 + '@types/ws': + specifier: ^8.5.12 + version: 8.5.12 ajv: specifier: ^8.16.0 version: 8.16.0 @@ -83,6 +89,9 @@ importers: express-rate-limit: specifier: ^7.3.1 version: 7.3.1(express@4.19.2) + express-ws: + specifier: ^5.0.2 + version: 5.0.2(express@4.19.2) form-data: specifier: ^4.0.0 version: 4.0.0 @@ -106,7 +115,7 @@ importers: version: 0.0.28 langchain: specifier: ^0.2.8 - version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1) + version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) languagedetect: specifier: ^2.0.0 version: 2.0.0 @@ -185,6 +194,9 @@ importers: wordpos: specifier: ^2.1.0 version: 2.1.0 + ws: + specifier: ^8.18.0 + version: 8.18.0 xml2js: specifier: ^0.6.2 version: 0.6.2 @@ -1559,6 +1571,9 @@ packages: '@types/express-serve-static-core@4.19.3': resolution: {integrity: sha512-KOzM7MhcBFlmnlr/fzISFF5vGWVSvN6fTd4T+ExOt08bA/dA5kpSzY52nMsI1KDFmUREpJelPYyuslLRSjjgCg==} + '@types/express-ws@3.0.4': + resolution: {integrity: sha512-Yjj18CaivG5KndgcvzttWe8mPFinPCHJC2wvyQqVzA7hqeufM8EtWMj6mpp5omg3s8XALUexhOu8aXAyi/DyJQ==} + '@types/express@4.17.21': resolution: {integrity: sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ==} @@ -1661,8 +1676,8 @@ packages: '@types/whatwg-url@11.0.5': resolution: {integrity: sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==} - '@types/ws@8.5.10': - resolution: {integrity: sha512-vmQSUcfalpIq0R9q7uTo2lXs6eGIpt9wtnLdMv9LVpIjCA/+ufZRozlVoVelIYixx1ugCBKDhn89vnsEGOCx9A==} + '@types/ws@8.5.12': + resolution: {integrity: sha512-3tPRkv1EtkDpzlgyKyI8pGsGZAGPEaXeu0DOj5DI25Ja91bdAYddYHbADRYVrZMRbfW+1l5YwXVDKohDJNQxkQ==} '@types/yargs-parser@21.0.3': resolution: {integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==} @@ -2416,6 +2431,12 @@ packages: peerDependencies: express: 4 || 5 || ^5.0.0-beta.1 + express-ws@5.0.2: + resolution: {integrity: sha512-0uvmuk61O9HXgLhGl3QhNSEtRsQevtmbL94/eILaliEADZBHZOQUAiHFrGPrgsjikohyrmSG5g+sCfASTt0lkQ==} + engines: {node: '>=4.5.0'} + peerDependencies: + express: ^4.0.0 || ^5.0.0-alpha.1 + express@4.19.2: resolution: {integrity: sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==} engines: {node: '>= 0.10.0'} @@ -4547,8 +4568,20 @@ packages: resolution: {integrity: sha512-+QU2zd6OTD8XWIJCbffaiQeH9U73qIqafo1x6V1snCWYGJf6cVE0cDR4D8xRzcEnfI21IFrUPzPGtcPf8AC+Rw==} engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0} - ws@8.17.1: - resolution: {integrity: sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==} + ws@7.5.10: + resolution: {integrity: sha512-+dbF1tHwZpXcbOJdVOkzLDxZP1ailvSxM6ZweXTegylPny803bFhA+vqBYw4s31NSAk4S2Qz+AKXK9a4wkdjcQ==} + engines: {node: '>=8.3.0'} + peerDependencies: + bufferutil: ^4.0.1 + utf-8-validate: ^5.0.2 + peerDependenciesMeta: + bufferutil: + optional: true + utf-8-validate: + optional: true + + ws@8.18.0: + resolution: {integrity: sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==} engines: {node: '>=10.0.0'} peerDependencies: bufferutil: ^4.0.1 @@ -5185,13 +5218,13 @@ snapshots: '@js-sdsl/ordered-map@4.4.2': {} - '@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)': + '@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)': dependencies: ansi-styles: 5.2.0 camelcase: 6.3.0 decamelize: 1.2.0 js-tiktoken: 1.0.12 - langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2) + langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) ml-distance: 4.0.1 mustache: 4.2.0 p-queue: 6.6.2 @@ -5203,9 +5236,9 @@ snapshots: - langchain - openai - '@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))': + '@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))': dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2) + '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) js-tiktoken: 1.0.12 openai: 4.52.2 zod: 3.23.8 @@ -5214,9 +5247,9 @@ snapshots: - encoding - langchain - '@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)': + '@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)': dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2) + '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) js-tiktoken: 1.0.12 transitivePeerDependencies: - langchain @@ -6374,8 +6407,8 @@ snapshots: dependencies: '@supabase/node-fetch': 2.6.15 '@types/phoenix': 1.6.5 - '@types/ws': 8.5.10 - ws: 8.17.1 + '@types/ws': 8.5.12 + ws: 8.18.0 transitivePeerDependencies: - bufferutil - utf-8-validate @@ -6472,6 +6505,12 @@ snapshots: '@types/range-parser': 1.2.7 '@types/send': 0.17.4 + '@types/express-ws@3.0.4': + dependencies: + '@types/express': 4.17.21 + '@types/express-serve-static-core': 4.19.3 + '@types/ws': 8.5.12 + '@types/express@4.17.21': dependencies: '@types/body-parser': 1.19.5 @@ -6595,7 +6634,7 @@ snapshots: dependencies: '@types/webidl-conversions': 7.0.3 - '@types/ws@8.5.10': + '@types/ws@8.5.12': dependencies: '@types/node': 20.14.1 @@ -7336,6 +7375,14 @@ snapshots: dependencies: express: 4.19.2 + express-ws@5.0.2(express@4.19.2): + dependencies: + express: 4.19.2 + ws: 7.5.10 + transitivePeerDependencies: + - bufferutil + - utf-8-validate + express@4.19.2: dependencies: accepts: 1.3.8 @@ -8248,17 +8295,17 @@ snapshots: kleur@3.0.3: {} - langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1): + langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0): dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2) - '@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1)) - '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2) + '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) + '@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)) + '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) binary-extensions: 2.3.0 js-tiktoken: 1.0.12 js-yaml: 4.1.0 jsonpointer: 5.0.1 langchainhub: 0.0.11 - langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2) + langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) ml-distance: 4.0.1 openapi-types: 12.1.3 p-retry: 4.6.2 @@ -8278,14 +8325,14 @@ snapshots: pdf-parse: 1.1.1 puppeteer: 22.12.1(typescript@5.4.5) redis: 4.6.14 - ws: 8.17.1 + ws: 8.18.0 transitivePeerDependencies: - encoding - openai langchainhub@0.0.11: {} - langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2): + langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2): dependencies: '@types/uuid': 9.0.8 commander: 10.0.1 @@ -8294,8 +8341,8 @@ snapshots: p-retry: 4.6.2 uuid: 9.0.1 optionalDependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2) - langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1) + '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) + langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) openai: 4.52.2 languagedetect@2.0.0: {} @@ -8999,7 +9046,7 @@ snapshots: chromium-bidi: 0.5.24(devtools-protocol@0.0.1299070) debug: 4.3.5 devtools-protocol: 0.0.1299070 - ws: 8.17.1 + ws: 8.18.0 transitivePeerDependencies: - bufferutil - supports-color @@ -9681,7 +9728,9 @@ snapshots: imurmurhash: 0.1.4 signal-exit: 4.1.0 - ws@8.17.1: {} + ws@7.5.10: {} + + ws@8.18.0: {} xml2js@0.6.2: dependencies: diff --git a/apps/api/src/controllers/v1/crawl-status-ws.ts b/apps/api/src/controllers/v1/crawl-status-ws.ts new file mode 100644 index 00000000..6e2e2eaf --- /dev/null +++ b/apps/api/src/controllers/v1/crawl-status-ws.ts @@ -0,0 +1,148 @@ +import { authMiddleware } from "../../routes/v1"; +import { RateLimiterMode } from "../../types"; +import { authenticateUser } from "../v0/auth"; +import { CrawlStatusParams, CrawlStatusResponse, Document, ErrorResponse, legacyDocumentConverter, RequestWithAuth } from "./types"; +import { WebSocket } from "ws"; +import { v4 as uuidv4 } from "uuid"; +import { Logger } from "../../lib/logger"; +import { getCrawl, getCrawlExpiry, getCrawlJobs, getDoneJobsOrdered, getDoneJobsOrderedLength, isCrawlFinished, isCrawlFinishedLocked } from "../../lib/crawl-redis"; +import { getScrapeQueue, scrapeQueueEvents } from "../../services/queue-service"; +import { getJob, getJobs } from "./crawl-status"; + +type ErrorMessage = { + type: "error", + error: string, +} + +type CatchupMessage = { + type: "catchup", + data: CrawlStatusResponse, +} + +type DocumentMessage = { + type: "document", + data: Document, +} + +type DoneMessage = { type: "done" } + +type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage; + +function send(ws: WebSocket, msg: Message) { + if (ws.readyState === 1) { + return new Promise((resolve, reject) => { + ws.send(JSON.stringify(msg), (err) => { + if (err) reject(err); + else resolve(null); + }); + }); + } +} + +function close(ws: WebSocket, code: number, msg: Message) { + if (ws.readyState <= 1) { + ws.close(code, JSON.stringify(msg)); + } +} + +async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth) { + const sc = await getCrawl(req.params.jobId); + if (!sc) { + return close(ws, 1008, { type: "error", error: "Job not found" }); + } + + if (sc.team_id !== req.auth.team_id) { + return close(ws, 3003, { type: "error", error: "Forbidden" }); + } + + let doneJobIDs = []; + + const completedListener = async e => { + const job = await getScrapeQueue().getJob(e.jobId) + if (job.data.crawl_id === req.params.jobId) { + if (doneJobIDs.includes(job.id)) return; + const j = await getJob(job.id); + if (j.returnvalue) { + send(ws, { + type: "document", + data: legacyDocumentConverter(j.returnvalue), + }); + if (await isCrawlFinishedLocked(req.params.jobId)) { + await new Promise((resolve) => setTimeout(() => resolve(true), 5000)) // wait for last events to pour in + scrapeQueueEvents.removeListener("completed", completedListener); + close(ws, 1000, { type: "done" }) + } + } else { + // FAILED + } + } + }; + + // TODO: handle failed jobs + + scrapeQueueEvents.addListener("completed", completedListener); + + doneJobIDs = await getDoneJobsOrdered(req.params.jobId); + + const jobIDs = await getCrawlJobs(req.params.jobId); + const jobStatuses = await Promise.all(jobIDs.map(x => getScrapeQueue().getJobState(x))); + const status: Exclude["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "scraping"; + const doneJobs = await getJobs(doneJobIDs); + const data = doneJobs.map(x => x.returnvalue); + + send(ws, { + type: "catchup", + data: { + status, + totalCount: jobIDs.length, + creditsUsed: jobIDs.length, + expiresAt: (await getCrawlExpiry(req.params.jobId)).toISOString(), + data: data.map(x => legacyDocumentConverter(x)), + } + }); + + if (status !== "scraping") { + scrapeQueueEvents.removeListener("completed", completedListener); + return close(ws, 1000, { type: "done" }); + } +} + +// Basically just middleware and error wrapping +export async function crawlStatusWSController(ws: WebSocket, req: RequestWithAuth) { + try { + const { success, team_id, error, status, plan } = await authenticateUser( + req, + null, + RateLimiterMode.CrawlStatus, + ); + + if (!success) { + return close(ws, 3000, { + type: "error", + error, + }); + } + + req.auth = { team_id, plan }; + + await crawlStatusWS(ws, req); + } catch (err) { + const id = uuidv4(); + let verbose = JSON.stringify(err); + if (verbose === "{}") { + if (err instanceof Error) { + verbose = JSON.stringify({ + message: err.message, + name: err.name, + stack: err.stack, + }); + } + } + + Logger.error("Error occurred in WebSocket! (" + req.path + ") -- ID " + id + " -- " + verbose); + return close(ws, 1011, { + type: "error", + error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id + }); + } +} diff --git a/apps/api/src/controllers/v1/crawl-status.ts b/apps/api/src/controllers/v1/crawl-status.ts index 4b8e1e54..3cbacea3 100644 --- a/apps/api/src/controllers/v1/crawl-status.ts +++ b/apps/api/src/controllers/v1/crawl-status.ts @@ -4,7 +4,24 @@ import { getCrawl, getCrawlExpiry, getCrawlJobs, getDoneJobsOrdered, getDoneJobs import { getScrapeQueue } from "../../services/queue-service"; import { supabaseGetJobById, supabaseGetJobsById } from "../../lib/supabase-jobs"; -async function getJobs(ids: string[]) { +export async function getJob(id: string) { + const job = await getScrapeQueue().getJob(id); + if (!job) return job; + + if (process.env.USE_DB_AUTHENTICATION === "true") { + const supabaseData = await supabaseGetJobById(id); + + if (supabaseData) { + job.returnvalue = supabaseData.docs; + } + } + + job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue; + + return job; +} + +export async function getJobs(ids: string[]) { const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x); if (process.env.USE_DB_AUTHENTICATION === "true") { diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index a5138a7a..2bf5e6d0 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -14,6 +14,8 @@ import http from 'node:http'; import https from 'node:https'; import CacheableLookup from 'cacheable-lookup'; import { v1Router } from "./routes/v1"; +import expressWs from "express-ws"; +import { crawlStatusWSController } from "./controllers/v1/crawl-status-ws"; const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); @@ -46,7 +48,8 @@ if (cluster.isMaster) { } }); } else { - const app = express(); + const ws = expressWs(express()); + const app = ws.app; global.isProduction = process.env.IS_PRODUCTION === "true"; @@ -79,7 +82,7 @@ if (cluster.isMaster) { // register router app.use(v0Router); - app.use(v1Router); + app.use("/v1", v1Router); app.use(adminRouter); const DEFAULT_PORT = process.env.PORT ?? 3002; diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts index 3f326811..6640678d 100644 --- a/apps/api/src/lib/crawl-redis.ts +++ b/apps/api/src/lib/crawl-redis.ts @@ -63,6 +63,10 @@ export async function isCrawlFinished(id: string) { return (await redisConnection.scard("crawl:" + id + ":jobs_done")) === (await redisConnection.scard("crawl:" + id + ":jobs")); } +export async function isCrawlFinishedLocked(id: string) { + return (await redisConnection.exists("crawl:" + id + ":finish")); +} + export async function finishCrawl(id: string) { if (await isCrawlFinished(id)) { const set = await redisConnection.setnx("crawl:" + id + ":finish", "yes"); diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index b2ef5958..ea555987 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -13,6 +13,8 @@ import { validateIdempotencyKey } from "../services/idempotency/validate"; import { ZodError } from "zod"; import { checkTeamCredits } from "../services/billing/credit_billing"; import { v4 as uuidv4 } from "uuid"; +import expressWs from "express-ws"; +import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws"; // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview"; // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status"; // import { searchController } from "../../src/controllers/v1/search"; @@ -33,7 +35,7 @@ function checkCreditsMiddleware(minimum: number): (req: RequestWithAuth, res: Re }; } -function authMiddleware(rateLimiterMode: RateLimiterMode): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void { +export function authMiddleware(rateLimiterMode: RateLimiterMode): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void { return (req, res, next) => { (async () => { const { success, team_id, error, status, plan } = await authenticateUser( @@ -74,17 +76,19 @@ function wrap(controller: (req: Request, res: Response) => Promise): (req: } } +expressWs(express()); + export const v1Router = express.Router(); v1Router.post( - "/v1/scrape", + "/scrape", authMiddleware(RateLimiterMode.Scrape), checkCreditsMiddleware(1), wrap(scrapeController) ); v1Router.post( - "/v1/crawl", + "/crawl", authMiddleware(RateLimiterMode.Crawl), idempotencyMiddleware, checkCreditsMiddleware(1), @@ -92,31 +96,36 @@ v1Router.post( ); v1Router.post( - "/v1/map", + "/map", authMiddleware(RateLimiterMode.Crawl), checkCreditsMiddleware(1), wrap(mapController) ); v1Router.get( - "/v1/crawl/:jobId", + "/crawl/:jobId", authMiddleware(RateLimiterMode.CrawlStatus), wrap(crawlStatusController) ); -// v1Router.post("/v1/crawlWebsitePreview", crawlPreviewController); -// v1Router.delete("/v1/crawl/:jobId", crawlCancelController); -// v1Router.get("/v1/checkJobStatus/:jobId", crawlJobStatusPreviewController); +v1Router.ws( + "/crawl/:jobId", + crawlStatusWSController +); + +// v1Router.post("/crawlWebsitePreview", crawlPreviewController); +// v1Router.delete("/crawl/:jobId", crawlCancelController); +// v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController); // // Auth route for key based authentication -// v1Router.get("/v1/keyAuth", keyAuthController); +// v1Router.get("/keyAuth", keyAuthController); // // Search routes -// v0Router.post("/v1/search", searchController); +// v0Router.post("/search", searchController); // Health/Probe routes -// v1Router.get("/v1/health/liveness", livenessController); -// v1Router.get("/v1/health/readiness", readinessController); +// v1Router.get("/health/liveness", livenessController); +// v1Router.get("/health/readiness", readinessController); v1Router.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: Response, next: NextFunction) => { if (err instanceof ZodError) {