Nick: index exploration

This commit is contained in:
Nicolas 2024-12-30 19:37:48 -03:00
parent 0847a6038e
commit bf9d41d0b2
5 changed files with 205 additions and 41 deletions

View File

@ -58,6 +58,7 @@
"@devil7softwares/pos": "^1.0.2", "@devil7softwares/pos": "^1.0.2",
"@dqbd/tiktoken": "^1.0.17", "@dqbd/tiktoken": "^1.0.17",
"@nangohq/node": "^0.40.8", "@nangohq/node": "^0.40.8",
"@pinecone-database/pinecone": "^4.0.0",
"@sentry/cli": "^2.33.1", "@sentry/cli": "^2.33.1",
"@sentry/node": "^8.26.0", "@sentry/node": "^8.26.0",
"@sentry/profiling-node": "^8.26.0", "@sentry/profiling-node": "^8.26.0",

101
apps/api/pnpm-lock.yaml generated
View File

@ -10,7 +10,7 @@ importers:
dependencies: dependencies:
'@anthropic-ai/sdk': '@anthropic-ai/sdk':
specifier: ^0.24.3 specifier: ^0.24.3
version: 0.24.3 version: 0.24.3(encoding@0.1.13)
'@brillout/import': '@brillout/import':
specifier: ^0.2.2 specifier: ^0.2.2
version: 0.2.3 version: 0.2.3
@ -29,9 +29,12 @@ importers:
'@nangohq/node': '@nangohq/node':
specifier: ^0.40.8 specifier: ^0.40.8
version: 0.40.8 version: 0.40.8
'@pinecone-database/pinecone':
specifier: ^4.0.0
version: 4.0.0
'@sentry/cli': '@sentry/cli':
specifier: ^2.33.1 specifier: ^2.33.1
version: 2.33.1 version: 2.33.1(encoding@0.1.13)
'@sentry/node': '@sentry/node':
specifier: ^8.26.0 specifier: ^8.26.0
version: 8.26.0 version: 8.26.0
@ -79,7 +82,7 @@ importers:
version: 1.1.1 version: 1.1.1
cohere-ai: cohere-ai:
specifier: ^7.14.0 specifier: ^7.14.0
version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(encoding@0.1.13)
cors: cors:
specifier: ^2.8.5 specifier: ^2.8.5
version: 2.8.5 version: 2.8.5
@ -130,13 +133,13 @@ importers:
version: 2.9.0 version: 2.9.0
langchain: langchain:
specifier: ^0.2.8 specifier: ^0.2.8
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
languagedetect: languagedetect:
specifier: ^2.0.0 specifier: ^2.0.0
version: 2.0.0 version: 2.0.0
logsnag: logsnag:
specifier: ^1.0.0 specifier: ^1.0.0
version: 1.0.0 version: 1.0.0(encoding@0.1.13)
luxon: luxon:
specifier: ^3.4.3 specifier: ^3.4.3
version: 3.4.4 version: 3.4.4
@ -157,7 +160,7 @@ importers:
version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
openai: openai:
specifier: ^4.57.0 specifier: ^4.57.0
version: 4.57.0(zod@3.23.8) version: 4.57.0(encoding@0.1.13)(zod@3.23.8)
pdf-parse: pdf-parse:
specifier: ^1.1.1 specifier: ^1.1.1
version: 1.1.1 version: 1.1.1
@ -275,7 +278,7 @@ importers:
version: 29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5)) version: 29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5))
jest-fetch-mock: jest-fetch-mock:
specifier: ^3.0.3 specifier: ^3.0.3
version: 3.0.3 version: 3.0.3(encoding@0.1.13)
mammoth: mammoth:
specifier: ^1.7.2 specifier: ^1.7.2
version: 1.7.2 version: 1.7.2
@ -1006,6 +1009,10 @@ packages:
'@pdf-lib/upng@1.0.1': '@pdf-lib/upng@1.0.1':
resolution: {integrity: sha512-dQK2FUMQtowVP00mtIksrlZhdFXQZPC+taih1q4CvPZ5vqdxR/LKBaFg0oAfzd1GlHZXXSPdQfzQnt+ViGvEIQ==} resolution: {integrity: sha512-dQK2FUMQtowVP00mtIksrlZhdFXQZPC+taih1q4CvPZ5vqdxR/LKBaFg0oAfzd1GlHZXXSPdQfzQnt+ViGvEIQ==}
'@pinecone-database/pinecone@4.0.0':
resolution: {integrity: sha512-INYS+GBys9v5BRTyn0tv8srVsPTlSRvE3BPE4Wkc/lOEyAIyB9F7DEMXbeF19FOLEgRwCuHTLjzm1niENl+4FA==}
engines: {node: '>=18.0.0'}
'@pkgjs/parseargs@0.11.0': '@pkgjs/parseargs@0.11.0':
resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==} resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
engines: {node: '>=14'} engines: {node: '>=14'}
@ -2279,6 +2286,9 @@ packages:
resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==} resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==}
engines: {node: '>= 0.8'} engines: {node: '>= 0.8'}
encoding@0.1.13:
resolution: {integrity: sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==}
end-of-stream@1.4.4: end-of-stream@1.4.4:
resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==} resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==}
@ -4599,7 +4609,7 @@ snapshots:
'@jridgewell/gen-mapping': 0.3.5 '@jridgewell/gen-mapping': 0.3.5
'@jridgewell/trace-mapping': 0.3.25 '@jridgewell/trace-mapping': 0.3.25
'@anthropic-ai/sdk@0.24.3': '@anthropic-ai/sdk@0.24.3(encoding@0.1.13)':
dependencies: dependencies:
'@types/node': 18.19.39 '@types/node': 18.19.39
'@types/node-fetch': 2.6.11 '@types/node-fetch': 2.6.11
@ -4607,7 +4617,7 @@ snapshots:
agentkeepalive: 4.5.0 agentkeepalive: 4.5.0
form-data-encoder: 1.7.2 form-data-encoder: 1.7.2
formdata-node: 4.4.1 formdata-node: 4.4.1
node-fetch: 2.7.0 node-fetch: 2.7.0(encoding@0.1.13)
web-streams-polyfill: 3.3.3 web-streams-polyfill: 3.3.3
transitivePeerDependencies: transitivePeerDependencies:
- encoding - encoding
@ -5577,13 +5587,13 @@ snapshots:
'@jridgewell/resolve-uri': 3.1.2 '@jridgewell/resolve-uri': 3.1.2
'@jridgewell/sourcemap-codec': 1.4.15 '@jridgewell/sourcemap-codec': 1.4.15
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': '@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
dependencies: dependencies:
ansi-styles: 5.2.0 ansi-styles: 5.2.0
camelcase: 6.3.0 camelcase: 6.3.0
decamelize: 1.2.0 decamelize: 1.2.0
js-tiktoken: 1.0.12 js-tiktoken: 1.0.12
langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu) langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
ml-distance: 4.0.1 ml-distance: 4.0.1
mustache: 4.2.0 mustache: 4.2.0
p-queue: 6.6.2 p-queue: 6.6.2
@ -5595,20 +5605,20 @@ snapshots:
- langchain - langchain
- openai - openai
'@langchain/openai@0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))': '@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
dependencies: dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
js-tiktoken: 1.0.12 js-tiktoken: 1.0.12
openai: 4.57.0(zod@3.23.8) openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
zod: 3.23.8 zod: 3.23.8
zod-to-json-schema: 3.23.1(zod@3.23.8) zod-to-json-schema: 3.23.1(zod@3.23.8)
transitivePeerDependencies: transitivePeerDependencies:
- encoding - encoding
- langchain - langchain
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': '@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
dependencies: dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
js-tiktoken: 1.0.12 js-tiktoken: 1.0.12
transitivePeerDependencies: transitivePeerDependencies:
- langchain - langchain
@ -5866,6 +5876,10 @@ snapshots:
dependencies: dependencies:
pako: 1.0.11 pako: 1.0.11
'@pinecone-database/pinecone@4.0.0':
dependencies:
encoding: 0.1.13
'@pkgjs/parseargs@0.11.0': '@pkgjs/parseargs@0.11.0':
optional: true optional: true
@ -5950,10 +5964,10 @@ snapshots:
'@sentry/cli-win32-x64@2.33.1': '@sentry/cli-win32-x64@2.33.1':
optional: true optional: true
'@sentry/cli@2.33.1': '@sentry/cli@2.33.1(encoding@0.1.13)':
dependencies: dependencies:
https-proxy-agent: 5.0.1 https-proxy-agent: 5.0.1
node-fetch: 2.7.0 node-fetch: 2.7.0(encoding@0.1.13)
progress: 2.0.3 progress: 2.0.3
proxy-from-env: 1.1.0 proxy-from-env: 1.1.0
which: 2.0.2 which: 2.0.2
@ -7088,7 +7102,7 @@ snapshots:
co@4.6.0: {} co@4.6.0: {}
cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)): cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(encoding@0.1.13):
dependencies: dependencies:
'@aws-sdk/client-sagemaker': 3.679.0 '@aws-sdk/client-sagemaker': 3.679.0
'@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) '@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))
@ -7098,7 +7112,7 @@ snapshots:
form-data-encoder: 4.0.2 form-data-encoder: 4.0.2
formdata-node: 6.0.3 formdata-node: 6.0.3
js-base64: 3.7.2 js-base64: 3.7.2
node-fetch: 2.7.0 node-fetch: 2.7.0(encoding@0.1.13)
qs: 6.11.2 qs: 6.11.2
readable-stream: 4.5.2 readable-stream: 4.5.2
url-join: 4.0.1 url-join: 4.0.1
@ -7208,9 +7222,9 @@ snapshots:
dependencies: dependencies:
luxon: 3.4.4 luxon: 3.4.4
cross-fetch@3.1.8: cross-fetch@3.1.8(encoding@0.1.13):
dependencies: dependencies:
node-fetch: 2.7.0 node-fetch: 2.7.0(encoding@0.1.13)
transitivePeerDependencies: transitivePeerDependencies:
- encoding - encoding
@ -7365,6 +7379,10 @@ snapshots:
encodeurl@1.0.2: {} encodeurl@1.0.2: {}
encoding@0.1.13:
dependencies:
iconv-lite: 0.6.3
end-of-stream@1.4.4: end-of-stream@1.4.4:
dependencies: dependencies:
once: 1.4.0 once: 1.4.0
@ -7899,9 +7917,9 @@ snapshots:
isexe@2.0.0: {} isexe@2.0.0: {}
isomorphic-fetch@3.0.0: isomorphic-fetch@3.0.0(encoding@0.1.13):
dependencies: dependencies:
node-fetch: 2.7.0 node-fetch: 2.7.0(encoding@0.1.13)
whatwg-fetch: 3.6.20 whatwg-fetch: 3.6.20
transitivePeerDependencies: transitivePeerDependencies:
- encoding - encoding
@ -8070,9 +8088,9 @@ snapshots:
jest-mock: 29.7.0 jest-mock: 29.7.0
jest-util: 29.7.0 jest-util: 29.7.0
jest-fetch-mock@3.0.3: jest-fetch-mock@3.0.3(encoding@0.1.13):
dependencies: dependencies:
cross-fetch: 3.1.8 cross-fetch: 3.1.8(encoding@0.1.13)
promise-polyfill: 8.3.0 promise-polyfill: 8.3.0
transitivePeerDependencies: transitivePeerDependencies:
- encoding - encoding
@ -8342,17 +8360,17 @@ snapshots:
kuler@2.0.0: {} kuler@2.0.0: {}
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0): langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
dependencies: dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
'@langchain/openai': 0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)) '@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
binary-extensions: 2.3.0 binary-extensions: 2.3.0
js-tiktoken: 1.0.12 js-tiktoken: 1.0.12
js-yaml: 4.1.0 js-yaml: 4.1.0
jsonpointer: 5.0.1 jsonpointer: 5.0.1
langchainhub: 0.0.11 langchainhub: 0.0.11
langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu) langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
ml-distance: 4.0.1 ml-distance: 4.0.1
openapi-types: 12.1.3 openapi-types: 12.1.3
p-retry: 4.6.2 p-retry: 4.6.2
@ -8362,6 +8380,7 @@ snapshots:
zod-to-json-schema: 3.23.1(zod@3.23.8) zod-to-json-schema: 3.23.1(zod@3.23.8)
optionalDependencies: optionalDependencies:
'@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)
'@pinecone-database/pinecone': 4.0.0
'@supabase/supabase-js': 2.44.2 '@supabase/supabase-js': 2.44.2
axios: 1.7.2 axios: 1.7.2
cheerio: 1.0.0-rc.12 cheerio: 1.0.0-rc.12
@ -8381,7 +8400,7 @@ snapshots:
langchainhub@0.0.11: {} langchainhub@0.0.11: {}
langsmith@0.1.34(zyeavx4tfqw3smbbpiinhfxxeu): langsmith@0.1.34(npkyd6f7wyl3urgrzoxaktl5a4):
dependencies: dependencies:
'@types/uuid': 9.0.8 '@types/uuid': 9.0.8
commander: 10.0.1 commander: 10.0.1
@ -8390,9 +8409,9 @@ snapshots:
p-retry: 4.6.2 p-retry: 4.6.2
uuid: 9.0.1 uuid: 9.0.1
optionalDependencies: optionalDependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
openai: 4.57.0(zod@3.23.8) openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
languagedetect@2.0.0: {} languagedetect@2.0.0: {}
@ -8442,9 +8461,9 @@ snapshots:
loglevel@1.9.1: {} loglevel@1.9.1: {}
logsnag@1.0.0: logsnag@1.0.0(encoding@0.1.13):
dependencies: dependencies:
isomorphic-fetch: 3.0.0 isomorphic-fetch: 3.0.0(encoding@0.1.13)
transitivePeerDependencies: transitivePeerDependencies:
- encoding - encoding
@ -8703,9 +8722,11 @@ snapshots:
node-ensure@0.0.0: {} node-ensure@0.0.0: {}
node-fetch@2.7.0: node-fetch@2.7.0(encoding@0.1.13):
dependencies: dependencies:
whatwg-url: 5.0.0 whatwg-url: 5.0.0
optionalDependencies:
encoding: 0.1.13
node-fetch@3.3.2: node-fetch@3.3.2:
dependencies: dependencies:
@ -8780,7 +8801,7 @@ snapshots:
transitivePeerDependencies: transitivePeerDependencies:
- debug - debug
openai@4.57.0(zod@3.23.8): openai@4.57.0(encoding@0.1.13)(zod@3.23.8):
dependencies: dependencies:
'@types/node': 18.19.39 '@types/node': 18.19.39
'@types/node-fetch': 2.6.11 '@types/node-fetch': 2.6.11
@ -8789,7 +8810,7 @@ snapshots:
agentkeepalive: 4.5.0 agentkeepalive: 4.5.0
form-data-encoder: 1.7.2 form-data-encoder: 1.7.2
formdata-node: 4.4.1 formdata-node: 4.4.1
node-fetch: 2.7.0 node-fetch: 2.7.0(encoding@0.1.13)
qs: 6.12.2 qs: 6.12.2
optionalDependencies: optionalDependencies:
zod: 3.23.8 zod: 3.23.8

View File

@ -17,6 +17,7 @@ import expressWs from "express-ws";
import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types"; import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types";
import { ZodError } from "zod"; import { ZodError } from "zod";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { searchSimilarPages } from "./lib/extract/index/pinecone";
const { createBullBoard } = require("@bull-board/api"); const { createBullBoard } = require("@bull-board/api");
const { BullAdapter } = require("@bull-board/api/bullAdapter"); const { BullAdapter } = require("@bull-board/api/bullAdapter");

View File

@ -0,0 +1,141 @@
import { Pinecone } from '@pinecone-database/pinecone';
import { Document } from '../../../controllers/v1/types';
import { logger } from '../../logger';
import OpenAI from "openai";
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const pinecone = new Pinecone({
apiKey: process.env.PINECONE_API_KEY!,
});
const INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? "";
export interface PageMetadata {
url: string;
originUrl: string;
title?: string;
description?: string;
crawlId?: string;
teamId?: string;
timestamp: number;
markdown?: string;
}
async function getEmbedding(text: string) {
const embedding = await openai.embeddings.create({
model: "text-embedding-3-small",
input: text,
encoding_format: "float",
});
return embedding.data[0].embedding;
}
function normalizeUrl(url: string) {
const urlO = new URL(url);
if (!urlO.hostname.startsWith("www.")) {
urlO.hostname = "www." + urlO.hostname;
}
return urlO.href;
}
export async function indexPage(
document: Document,
originUrl: string,
crawlId?: string,
teamId?: string
) {
try {
const index = pinecone.index(INDEX_NAME);
// Create text to embed
const textToEmbed = [
document.metadata.title,
document.metadata.description,
document.markdown
].filter(Boolean).join('\n\n');
// Get embedding from OpenAI
const embedding = await getEmbedding(textToEmbed);
// Prepare metadata
const metadata: PageMetadata = {
url: normalizeUrl(document.metadata.sourceURL || document.metadata.url!),
originUrl: normalizeUrl(originUrl),
title: document.metadata.title,
description: document.metadata.description,
crawlId,
teamId,
markdown: document.markdown,
timestamp: Date.now()
};
// Upsert to Pinecone
await index.upsert([{
id: document.metadata.sourceURL || document.metadata.url!,
values: embedding,
metadata: {
...metadata,
[document.metadata.sourceURL || document.metadata.url!]: true
}
}]);
logger.debug('Successfully indexed page in Pinecone', {
url: metadata.url,
crawlId
});
} catch (error) {
logger.error('Failed to index page in Pinecone', {
error,
url: document.metadata.sourceURL || document.metadata.url,
crawlId
});
}
}
export async function searchSimilarPages(
query: string,
originUrl?: string,
limit: number = 10
) {
try {
const index = pinecone.index(INDEX_NAME);
// Get query embedding from OpenAI
const queryEmbedding = await getEmbedding(query);
const queryParams: any = {
vector: queryEmbedding,
topK: limit,
includeMetadata: true
};
// Add filter if originUrl is provided
if (originUrl) {
queryParams.filter = {
[originUrl]: { $contains: normalizeUrl(originUrl) }
};
}
const results = await index.query(queryParams);
return results.matches.map(match => ({
url: match.metadata?.url,
title: match.metadata?.title,
description: match.metadata?.description,
score: match.score,
markdown: match.metadata?.markdown
}));
} catch (error) {
logger.error('Failed to search similar pages in Pinecone', {
error,
query,
originUrl
});
return [];
}
}

View File

@ -10,7 +10,7 @@ const openai = new OpenAI({
async function getEmbedding(text: string) { async function getEmbedding(text: string) {
const embedding = await openai.embeddings.create({ const embedding = await openai.embeddings.create({
model: "text-embedding-ada-002", model: "text-embedding-3-small",
input: text, input: text,
encoding_format: "float", encoding_format: "float",
}); });