Nick: index exploration

This commit is contained in:
Nicolas 2024-12-30 19:37:48 -03:00
parent 0847a6038e
commit bf9d41d0b2
5 changed files with 205 additions and 41 deletions

View File

@ -58,6 +58,7 @@
"@devil7softwares/pos": "^1.0.2",
"@dqbd/tiktoken": "^1.0.17",
"@nangohq/node": "^0.40.8",
"@pinecone-database/pinecone": "^4.0.0",
"@sentry/cli": "^2.33.1",
"@sentry/node": "^8.26.0",
"@sentry/profiling-node": "^8.26.0",

101
apps/api/pnpm-lock.yaml generated
View File

@ -10,7 +10,7 @@ importers:
dependencies:
'@anthropic-ai/sdk':
specifier: ^0.24.3
version: 0.24.3
version: 0.24.3(encoding@0.1.13)
'@brillout/import':
specifier: ^0.2.2
version: 0.2.3
@ -29,9 +29,12 @@ importers:
'@nangohq/node':
specifier: ^0.40.8
version: 0.40.8
'@pinecone-database/pinecone':
specifier: ^4.0.0
version: 4.0.0
'@sentry/cli':
specifier: ^2.33.1
version: 2.33.1
version: 2.33.1(encoding@0.1.13)
'@sentry/node':
specifier: ^8.26.0
version: 8.26.0
@ -79,7 +82,7 @@ importers:
version: 1.1.1
cohere-ai:
specifier: ^7.14.0
version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))
version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(encoding@0.1.13)
cors:
specifier: ^2.8.5
version: 2.8.5
@ -130,13 +133,13 @@ importers:
version: 2.9.0
langchain:
specifier: ^0.2.8
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
languagedetect:
specifier: ^2.0.0
version: 2.0.0
logsnag:
specifier: ^1.0.0
version: 1.0.0
version: 1.0.0(encoding@0.1.13)
luxon:
specifier: ^3.4.3
version: 3.4.4
@ -157,7 +160,7 @@ importers:
version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
openai:
specifier: ^4.57.0
version: 4.57.0(zod@3.23.8)
version: 4.57.0(encoding@0.1.13)(zod@3.23.8)
pdf-parse:
specifier: ^1.1.1
version: 1.1.1
@ -275,7 +278,7 @@ importers:
version: 29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5))
jest-fetch-mock:
specifier: ^3.0.3
version: 3.0.3
version: 3.0.3(encoding@0.1.13)
mammoth:
specifier: ^1.7.2
version: 1.7.2
@ -1006,6 +1009,10 @@ packages:
'@pdf-lib/upng@1.0.1':
resolution: {integrity: sha512-dQK2FUMQtowVP00mtIksrlZhdFXQZPC+taih1q4CvPZ5vqdxR/LKBaFg0oAfzd1GlHZXXSPdQfzQnt+ViGvEIQ==}
'@pinecone-database/pinecone@4.0.0':
resolution: {integrity: sha512-INYS+GBys9v5BRTyn0tv8srVsPTlSRvE3BPE4Wkc/lOEyAIyB9F7DEMXbeF19FOLEgRwCuHTLjzm1niENl+4FA==}
engines: {node: '>=18.0.0'}
'@pkgjs/parseargs@0.11.0':
resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
engines: {node: '>=14'}
@ -2279,6 +2286,9 @@ packages:
resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==}
engines: {node: '>= 0.8'}
encoding@0.1.13:
resolution: {integrity: sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==}
end-of-stream@1.4.4:
resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==}
@ -4599,7 +4609,7 @@ snapshots:
'@jridgewell/gen-mapping': 0.3.5
'@jridgewell/trace-mapping': 0.3.25
'@anthropic-ai/sdk@0.24.3':
'@anthropic-ai/sdk@0.24.3(encoding@0.1.13)':
dependencies:
'@types/node': 18.19.39
'@types/node-fetch': 2.6.11
@ -4607,7 +4617,7 @@ snapshots:
agentkeepalive: 4.5.0
form-data-encoder: 1.7.2
formdata-node: 4.4.1
node-fetch: 2.7.0
node-fetch: 2.7.0(encoding@0.1.13)
web-streams-polyfill: 3.3.3
transitivePeerDependencies:
- encoding
@ -5577,13 +5587,13 @@ snapshots:
'@jridgewell/resolve-uri': 3.1.2
'@jridgewell/sourcemap-codec': 1.4.15
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
dependencies:
ansi-styles: 5.2.0
camelcase: 6.3.0
decamelize: 1.2.0
js-tiktoken: 1.0.12
langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu)
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
ml-distance: 4.0.1
mustache: 4.2.0
p-queue: 6.6.2
@ -5595,20 +5605,20 @@ snapshots:
- langchain
- openai
'@langchain/openai@0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
js-tiktoken: 1.0.12
openai: 4.57.0(zod@3.23.8)
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
zod: 3.23.8
zod-to-json-schema: 3.23.1(zod@3.23.8)
transitivePeerDependencies:
- encoding
- langchain
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
js-tiktoken: 1.0.12
transitivePeerDependencies:
- langchain
@ -5866,6 +5876,10 @@ snapshots:
dependencies:
pako: 1.0.11
'@pinecone-database/pinecone@4.0.0':
dependencies:
encoding: 0.1.13
'@pkgjs/parseargs@0.11.0':
optional: true
@ -5950,10 +5964,10 @@ snapshots:
'@sentry/cli-win32-x64@2.33.1':
optional: true
'@sentry/cli@2.33.1':
'@sentry/cli@2.33.1(encoding@0.1.13)':
dependencies:
https-proxy-agent: 5.0.1
node-fetch: 2.7.0
node-fetch: 2.7.0(encoding@0.1.13)
progress: 2.0.3
proxy-from-env: 1.1.0
which: 2.0.2
@ -7088,7 +7102,7 @@ snapshots:
co@4.6.0: {}
cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)):
cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(encoding@0.1.13):
dependencies:
'@aws-sdk/client-sagemaker': 3.679.0
'@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))
@ -7098,7 +7112,7 @@ snapshots:
form-data-encoder: 4.0.2
formdata-node: 6.0.3
js-base64: 3.7.2
node-fetch: 2.7.0
node-fetch: 2.7.0(encoding@0.1.13)
qs: 6.11.2
readable-stream: 4.5.2
url-join: 4.0.1
@ -7208,9 +7222,9 @@ snapshots:
dependencies:
luxon: 3.4.4
cross-fetch@3.1.8:
cross-fetch@3.1.8(encoding@0.1.13):
dependencies:
node-fetch: 2.7.0
node-fetch: 2.7.0(encoding@0.1.13)
transitivePeerDependencies:
- encoding
@ -7365,6 +7379,10 @@ snapshots:
encodeurl@1.0.2: {}
encoding@0.1.13:
dependencies:
iconv-lite: 0.6.3
end-of-stream@1.4.4:
dependencies:
once: 1.4.0
@ -7899,9 +7917,9 @@ snapshots:
isexe@2.0.0: {}
isomorphic-fetch@3.0.0:
isomorphic-fetch@3.0.0(encoding@0.1.13):
dependencies:
node-fetch: 2.7.0
node-fetch: 2.7.0(encoding@0.1.13)
whatwg-fetch: 3.6.20
transitivePeerDependencies:
- encoding
@ -8070,9 +8088,9 @@ snapshots:
jest-mock: 29.7.0
jest-util: 29.7.0
jest-fetch-mock@3.0.3:
jest-fetch-mock@3.0.3(encoding@0.1.13):
dependencies:
cross-fetch: 3.1.8
cross-fetch: 3.1.8(encoding@0.1.13)
promise-polyfill: 8.3.0
transitivePeerDependencies:
- encoding
@ -8342,17 +8360,17 @@ snapshots:
kuler@2.0.0: {}
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
'@langchain/openai': 0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
binary-extensions: 2.3.0
js-tiktoken: 1.0.12
js-yaml: 4.1.0
jsonpointer: 5.0.1
langchainhub: 0.0.11
langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu)
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
ml-distance: 4.0.1
openapi-types: 12.1.3
p-retry: 4.6.2
@ -8362,6 +8380,7 @@ snapshots:
zod-to-json-schema: 3.23.1(zod@3.23.8)
optionalDependencies:
'@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)
'@pinecone-database/pinecone': 4.0.0
'@supabase/supabase-js': 2.44.2
axios: 1.7.2
cheerio: 1.0.0-rc.12
@ -8381,7 +8400,7 @@ snapshots:
langchainhub@0.0.11: {}
langsmith@0.1.34(zyeavx4tfqw3smbbpiinhfxxeu):
langsmith@0.1.34(npkyd6f7wyl3urgrzoxaktl5a4):
dependencies:
'@types/uuid': 9.0.8
commander: 10.0.1
@ -8390,9 +8409,9 @@ snapshots:
p-retry: 4.6.2
uuid: 9.0.1
optionalDependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
openai: 4.57.0(zod@3.23.8)
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
languagedetect@2.0.0: {}
@ -8442,9 +8461,9 @@ snapshots:
loglevel@1.9.1: {}
logsnag@1.0.0:
logsnag@1.0.0(encoding@0.1.13):
dependencies:
isomorphic-fetch: 3.0.0
isomorphic-fetch: 3.0.0(encoding@0.1.13)
transitivePeerDependencies:
- encoding
@ -8703,9 +8722,11 @@ snapshots:
node-ensure@0.0.0: {}
node-fetch@2.7.0:
node-fetch@2.7.0(encoding@0.1.13):
dependencies:
whatwg-url: 5.0.0
optionalDependencies:
encoding: 0.1.13
node-fetch@3.3.2:
dependencies:
@ -8780,7 +8801,7 @@ snapshots:
transitivePeerDependencies:
- debug
openai@4.57.0(zod@3.23.8):
openai@4.57.0(encoding@0.1.13)(zod@3.23.8):
dependencies:
'@types/node': 18.19.39
'@types/node-fetch': 2.6.11
@ -8789,7 +8810,7 @@ snapshots:
agentkeepalive: 4.5.0
form-data-encoder: 1.7.2
formdata-node: 4.4.1
node-fetch: 2.7.0
node-fetch: 2.7.0(encoding@0.1.13)
qs: 6.12.2
optionalDependencies:
zod: 3.23.8

View File

@ -17,6 +17,7 @@ import expressWs from "express-ws";
import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types";
import { ZodError } from "zod";
import { v4 as uuidv4 } from "uuid";
import { searchSimilarPages } from "./lib/extract/index/pinecone";
const { createBullBoard } = require("@bull-board/api");
const { BullAdapter } = require("@bull-board/api/bullAdapter");

View File

@ -0,0 +1,141 @@
import { Pinecone } from '@pinecone-database/pinecone';
import { Document } from '../../../controllers/v1/types';
import { logger } from '../../logger';
import OpenAI from "openai";
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const pinecone = new Pinecone({
apiKey: process.env.PINECONE_API_KEY!,
});
const INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? "";
export interface PageMetadata {
url: string;
originUrl: string;
title?: string;
description?: string;
crawlId?: string;
teamId?: string;
timestamp: number;
markdown?: string;
}
async function getEmbedding(text: string) {
const embedding = await openai.embeddings.create({
model: "text-embedding-3-small",
input: text,
encoding_format: "float",
});
return embedding.data[0].embedding;
}
function normalizeUrl(url: string) {
const urlO = new URL(url);
if (!urlO.hostname.startsWith("www.")) {
urlO.hostname = "www." + urlO.hostname;
}
return urlO.href;
}
export async function indexPage(
document: Document,
originUrl: string,
crawlId?: string,
teamId?: string
) {
try {
const index = pinecone.index(INDEX_NAME);
// Create text to embed
const textToEmbed = [
document.metadata.title,
document.metadata.description,
document.markdown
].filter(Boolean).join('\n\n');
// Get embedding from OpenAI
const embedding = await getEmbedding(textToEmbed);
// Prepare metadata
const metadata: PageMetadata = {
url: normalizeUrl(document.metadata.sourceURL || document.metadata.url!),
originUrl: normalizeUrl(originUrl),
title: document.metadata.title,
description: document.metadata.description,
crawlId,
teamId,
markdown: document.markdown,
timestamp: Date.now()
};
// Upsert to Pinecone
await index.upsert([{
id: document.metadata.sourceURL || document.metadata.url!,
values: embedding,
metadata: {
...metadata,
[document.metadata.sourceURL || document.metadata.url!]: true
}
}]);
logger.debug('Successfully indexed page in Pinecone', {
url: metadata.url,
crawlId
});
} catch (error) {
logger.error('Failed to index page in Pinecone', {
error,
url: document.metadata.sourceURL || document.metadata.url,
crawlId
});
}
}
export async function searchSimilarPages(
query: string,
originUrl?: string,
limit: number = 10
) {
try {
const index = pinecone.index(INDEX_NAME);
// Get query embedding from OpenAI
const queryEmbedding = await getEmbedding(query);
const queryParams: any = {
vector: queryEmbedding,
topK: limit,
includeMetadata: true
};
// Add filter if originUrl is provided
if (originUrl) {
queryParams.filter = {
[originUrl]: { $contains: normalizeUrl(originUrl) }
};
}
const results = await index.query(queryParams);
return results.matches.map(match => ({
url: match.metadata?.url,
title: match.metadata?.title,
description: match.metadata?.description,
score: match.score,
markdown: match.metadata?.markdown
}));
} catch (error) {
logger.error('Failed to search similar pages in Pinecone', {
error,
query,
originUrl
});
return [];
}
}

View File

@ -10,7 +10,7 @@ const openai = new OpenAI({
async function getEmbedding(text: string) {
const embedding = await openai.embeddings.create({
model: "text-embedding-ada-002",
model: "text-embedding-3-small",
input: text,
encoding_format: "float",
});