diff --git a/apps/api/package.json b/apps/api/package.json index 00c1bc0e..670dfc7a 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -58,6 +58,7 @@ "@devil7softwares/pos": "^1.0.2", "@dqbd/tiktoken": "^1.0.17", "@nangohq/node": "^0.40.8", + "@pinecone-database/pinecone": "^4.0.0", "@sentry/cli": "^2.33.1", "@sentry/node": "^8.26.0", "@sentry/profiling-node": "^8.26.0", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 17532d25..082a200f 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -10,7 +10,7 @@ importers: dependencies: '@anthropic-ai/sdk': specifier: ^0.24.3 - version: 0.24.3 + version: 0.24.3(encoding@0.1.13) '@brillout/import': specifier: ^0.2.2 version: 0.2.3 @@ -29,9 +29,12 @@ importers: '@nangohq/node': specifier: ^0.40.8 version: 0.40.8 + '@pinecone-database/pinecone': + specifier: ^4.0.0 + version: 4.0.0 '@sentry/cli': specifier: ^2.33.1 - version: 2.33.1 + version: 2.33.1(encoding@0.1.13) '@sentry/node': specifier: ^8.26.0 version: 8.26.0 @@ -79,7 +82,7 @@ importers: version: 1.1.1 cohere-ai: specifier: ^7.14.0 - version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(encoding@0.1.13) cors: specifier: ^2.8.5 version: 2.8.5 @@ -130,13 +133,13 @@ importers: version: 2.9.0 langchain: specifier: ^0.2.8 - version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) + version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) languagedetect: specifier: ^2.0.0 version: 2.0.0 logsnag: specifier: ^1.0.0 - version: 1.0.0 + version: 1.0.0(encoding@0.1.13) luxon: specifier: ^3.4.3 version: 3.4.4 @@ -157,7 +160,7 @@ importers: version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) openai: specifier: ^4.57.0 - version: 4.57.0(zod@3.23.8) + version: 4.57.0(encoding@0.1.13)(zod@3.23.8) pdf-parse: specifier: ^1.1.1 version: 1.1.1 @@ -275,7 +278,7 @@ importers: version: 29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5)) jest-fetch-mock: specifier: ^3.0.3 - version: 3.0.3 + version: 3.0.3(encoding@0.1.13) mammoth: specifier: ^1.7.2 version: 1.7.2 @@ -1006,6 +1009,10 @@ packages: '@pdf-lib/upng@1.0.1': resolution: {integrity: sha512-dQK2FUMQtowVP00mtIksrlZhdFXQZPC+taih1q4CvPZ5vqdxR/LKBaFg0oAfzd1GlHZXXSPdQfzQnt+ViGvEIQ==} + '@pinecone-database/pinecone@4.0.0': + resolution: {integrity: sha512-INYS+GBys9v5BRTyn0tv8srVsPTlSRvE3BPE4Wkc/lOEyAIyB9F7DEMXbeF19FOLEgRwCuHTLjzm1niENl+4FA==} + engines: {node: '>=18.0.0'} + '@pkgjs/parseargs@0.11.0': resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==} engines: {node: '>=14'} @@ -2279,6 +2286,9 @@ packages: resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==} engines: {node: '>= 0.8'} + encoding@0.1.13: + resolution: {integrity: sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==} + end-of-stream@1.4.4: resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==} @@ -4599,7 +4609,7 @@ snapshots: '@jridgewell/gen-mapping': 0.3.5 '@jridgewell/trace-mapping': 0.3.25 - '@anthropic-ai/sdk@0.24.3': + '@anthropic-ai/sdk@0.24.3(encoding@0.1.13)': dependencies: '@types/node': 18.19.39 '@types/node-fetch': 2.6.11 @@ -4607,7 +4617,7 @@ snapshots: agentkeepalive: 4.5.0 form-data-encoder: 1.7.2 formdata-node: 4.4.1 - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) web-streams-polyfill: 3.3.3 transitivePeerDependencies: - encoding @@ -5577,13 +5587,13 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.4.15 - '@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': + '@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))': dependencies: ansi-styles: 5.2.0 camelcase: 6.3.0 decamelize: 1.2.0 js-tiktoken: 1.0.12 - langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu) + langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4) ml-distance: 4.0.1 mustache: 4.2.0 p-queue: 6.6.2 @@ -5595,20 +5605,20 @@ snapshots: - langchain - openai - '@langchain/openai@0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))': + '@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))': dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8)) js-tiktoken: 1.0.12 - openai: 4.57.0(zod@3.23.8) + openai: 4.57.0(encoding@0.1.13)(zod@3.23.8) zod: 3.23.8 zod-to-json-schema: 3.23.1(zod@3.23.8) transitivePeerDependencies: - encoding - langchain - '@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': + '@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))': dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8)) js-tiktoken: 1.0.12 transitivePeerDependencies: - langchain @@ -5866,6 +5876,10 @@ snapshots: dependencies: pako: 1.0.11 + '@pinecone-database/pinecone@4.0.0': + dependencies: + encoding: 0.1.13 + '@pkgjs/parseargs@0.11.0': optional: true @@ -5950,10 +5964,10 @@ snapshots: '@sentry/cli-win32-x64@2.33.1': optional: true - '@sentry/cli@2.33.1': + '@sentry/cli@2.33.1(encoding@0.1.13)': dependencies: https-proxy-agent: 5.0.1 - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) progress: 2.0.3 proxy-from-env: 1.1.0 which: 2.0.2 @@ -7088,7 +7102,7 @@ snapshots: co@4.6.0: {} - cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)): + cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(encoding@0.1.13): dependencies: '@aws-sdk/client-sagemaker': 3.679.0 '@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) @@ -7098,7 +7112,7 @@ snapshots: form-data-encoder: 4.0.2 formdata-node: 6.0.3 js-base64: 3.7.2 - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) qs: 6.11.2 readable-stream: 4.5.2 url-join: 4.0.1 @@ -7208,9 +7222,9 @@ snapshots: dependencies: luxon: 3.4.4 - cross-fetch@3.1.8: + cross-fetch@3.1.8(encoding@0.1.13): dependencies: - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) transitivePeerDependencies: - encoding @@ -7365,6 +7379,10 @@ snapshots: encodeurl@1.0.2: {} + encoding@0.1.13: + dependencies: + iconv-lite: 0.6.3 + end-of-stream@1.4.4: dependencies: once: 1.4.0 @@ -7899,9 +7917,9 @@ snapshots: isexe@2.0.0: {} - isomorphic-fetch@3.0.0: + isomorphic-fetch@3.0.0(encoding@0.1.13): dependencies: - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) whatwg-fetch: 3.6.20 transitivePeerDependencies: - encoding @@ -8070,9 +8088,9 @@ snapshots: jest-mock: 29.7.0 jest-util: 29.7.0 - jest-fetch-mock@3.0.3: + jest-fetch-mock@3.0.3(encoding@0.1.13): dependencies: - cross-fetch: 3.1.8 + cross-fetch: 3.1.8(encoding@0.1.13) promise-polyfill: 8.3.0 transitivePeerDependencies: - encoding @@ -8342,17 +8360,17 @@ snapshots: kuler@2.0.0: {} - langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0): + langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0): dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) - '@langchain/openai': 0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)) - '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8)) + '@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)) + '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8)) binary-extensions: 2.3.0 js-tiktoken: 1.0.12 js-yaml: 4.1.0 jsonpointer: 5.0.1 langchainhub: 0.0.11 - langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu) + langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4) ml-distance: 4.0.1 openapi-types: 12.1.3 p-retry: 4.6.2 @@ -8362,6 +8380,7 @@ snapshots: zod-to-json-schema: 3.23.1(zod@3.23.8) optionalDependencies: '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@pinecone-database/pinecone': 4.0.0 '@supabase/supabase-js': 2.44.2 axios: 1.7.2 cheerio: 1.0.0-rc.12 @@ -8381,7 +8400,7 @@ snapshots: langchainhub@0.0.11: {} - langsmith@0.1.34(zyeavx4tfqw3smbbpiinhfxxeu): + langsmith@0.1.34(npkyd6f7wyl3urgrzoxaktl5a4): dependencies: '@types/uuid': 9.0.8 commander: 10.0.1 @@ -8390,9 +8409,9 @@ snapshots: p-retry: 4.6.2 uuid: 9.0.1 optionalDependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) - langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) - openai: 4.57.0(zod@3.23.8) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8)) + langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) + openai: 4.57.0(encoding@0.1.13)(zod@3.23.8) languagedetect@2.0.0: {} @@ -8442,9 +8461,9 @@ snapshots: loglevel@1.9.1: {} - logsnag@1.0.0: + logsnag@1.0.0(encoding@0.1.13): dependencies: - isomorphic-fetch: 3.0.0 + isomorphic-fetch: 3.0.0(encoding@0.1.13) transitivePeerDependencies: - encoding @@ -8703,9 +8722,11 @@ snapshots: node-ensure@0.0.0: {} - node-fetch@2.7.0: + node-fetch@2.7.0(encoding@0.1.13): dependencies: whatwg-url: 5.0.0 + optionalDependencies: + encoding: 0.1.13 node-fetch@3.3.2: dependencies: @@ -8780,7 +8801,7 @@ snapshots: transitivePeerDependencies: - debug - openai@4.57.0(zod@3.23.8): + openai@4.57.0(encoding@0.1.13)(zod@3.23.8): dependencies: '@types/node': 18.19.39 '@types/node-fetch': 2.6.11 @@ -8789,7 +8810,7 @@ snapshots: agentkeepalive: 4.5.0 form-data-encoder: 1.7.2 formdata-node: 4.4.1 - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) qs: 6.12.2 optionalDependencies: zod: 3.23.8 diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index adc080f2..d4769283 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -17,6 +17,7 @@ import expressWs from "express-ws"; import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types"; import { ZodError } from "zod"; import { v4 as uuidv4 } from "uuid"; +import { searchSimilarPages } from "./lib/extract/index/pinecone"; const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); diff --git a/apps/api/src/lib/extract/index/pinecone.ts b/apps/api/src/lib/extract/index/pinecone.ts new file mode 100644 index 00000000..603cd38f --- /dev/null +++ b/apps/api/src/lib/extract/index/pinecone.ts @@ -0,0 +1,141 @@ +import { Pinecone } from '@pinecone-database/pinecone'; +import { Document } from '../../../controllers/v1/types'; +import { logger } from '../../logger'; +import OpenAI from "openai"; + +const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, +}); + +const pinecone = new Pinecone({ + apiKey: process.env.PINECONE_API_KEY!, +}); + +const INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? ""; + +export interface PageMetadata { + url: string; + originUrl: string; + title?: string; + description?: string; + crawlId?: string; + teamId?: string; + timestamp: number; + markdown?: string; +} + +async function getEmbedding(text: string) { + const embedding = await openai.embeddings.create({ + model: "text-embedding-3-small", + input: text, + encoding_format: "float", + }); + + return embedding.data[0].embedding; +} + +function normalizeUrl(url: string) { + const urlO = new URL(url); + if (!urlO.hostname.startsWith("www.")) { + urlO.hostname = "www." + urlO.hostname; + } + return urlO.href; +} + +export async function indexPage( + document: Document, + originUrl: string, + crawlId?: string, + teamId?: string +) { + try { + const index = pinecone.index(INDEX_NAME); + + // Create text to embed + const textToEmbed = [ + document.metadata.title, + document.metadata.description, + document.markdown + ].filter(Boolean).join('\n\n'); + + // Get embedding from OpenAI + const embedding = await getEmbedding(textToEmbed); + + // Prepare metadata + const metadata: PageMetadata = { + url: normalizeUrl(document.metadata.sourceURL || document.metadata.url!), + originUrl: normalizeUrl(originUrl), + title: document.metadata.title, + description: document.metadata.description, + crawlId, + teamId, + markdown: document.markdown, + timestamp: Date.now() + }; + + // Upsert to Pinecone + await index.upsert([{ + id: document.metadata.sourceURL || document.metadata.url!, + values: embedding, + metadata: { + ...metadata, + [document.metadata.sourceURL || document.metadata.url!]: true + } + }]); + + logger.debug('Successfully indexed page in Pinecone', { + url: metadata.url, + crawlId + }); + + } catch (error) { + logger.error('Failed to index page in Pinecone', { + error, + url: document.metadata.sourceURL || document.metadata.url, + crawlId + }); + } +} + +export async function searchSimilarPages( + query: string, + originUrl?: string, + limit: number = 10 +) { + try { + const index = pinecone.index(INDEX_NAME); + + // Get query embedding from OpenAI + const queryEmbedding = await getEmbedding(query); + + const queryParams: any = { + vector: queryEmbedding, + topK: limit, + includeMetadata: true + }; + + // Add filter if originUrl is provided + if (originUrl) { + queryParams.filter = { + [originUrl]: { $contains: normalizeUrl(originUrl) } + }; + } + + const results = await index.query(queryParams); + return results.matches.map(match => ({ + url: match.metadata?.url, + title: match.metadata?.title, + description: match.metadata?.description, + score: match.score, + markdown: match.metadata?.markdown + })); + + } catch (error) { + logger.error('Failed to search similar pages in Pinecone', { + error, + query, + originUrl + }); + return []; + } +} diff --git a/apps/api/src/lib/ranker.ts b/apps/api/src/lib/ranker.ts index bffbc9c2..02d59457 100644 --- a/apps/api/src/lib/ranker.ts +++ b/apps/api/src/lib/ranker.ts @@ -10,7 +10,7 @@ const openai = new OpenAI({ async function getEmbedding(text: string) { const embedding = await openai.embeddings.create({ - model: "text-embedding-ada-002", + model: "text-embedding-3-small", input: text, encoding_format: "float", });