mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-15 20:25:59 +08:00
Nick: index exploration
This commit is contained in:
parent
0847a6038e
commit
bf9d41d0b2
@ -58,6 +58,7 @@
|
|||||||
"@devil7softwares/pos": "^1.0.2",
|
"@devil7softwares/pos": "^1.0.2",
|
||||||
"@dqbd/tiktoken": "^1.0.17",
|
"@dqbd/tiktoken": "^1.0.17",
|
||||||
"@nangohq/node": "^0.40.8",
|
"@nangohq/node": "^0.40.8",
|
||||||
|
"@pinecone-database/pinecone": "^4.0.0",
|
||||||
"@sentry/cli": "^2.33.1",
|
"@sentry/cli": "^2.33.1",
|
||||||
"@sentry/node": "^8.26.0",
|
"@sentry/node": "^8.26.0",
|
||||||
"@sentry/profiling-node": "^8.26.0",
|
"@sentry/profiling-node": "^8.26.0",
|
||||||
|
101
apps/api/pnpm-lock.yaml
generated
101
apps/api/pnpm-lock.yaml
generated
@ -10,7 +10,7 @@ importers:
|
|||||||
dependencies:
|
dependencies:
|
||||||
'@anthropic-ai/sdk':
|
'@anthropic-ai/sdk':
|
||||||
specifier: ^0.24.3
|
specifier: ^0.24.3
|
||||||
version: 0.24.3
|
version: 0.24.3(encoding@0.1.13)
|
||||||
'@brillout/import':
|
'@brillout/import':
|
||||||
specifier: ^0.2.2
|
specifier: ^0.2.2
|
||||||
version: 0.2.3
|
version: 0.2.3
|
||||||
@ -29,9 +29,12 @@ importers:
|
|||||||
'@nangohq/node':
|
'@nangohq/node':
|
||||||
specifier: ^0.40.8
|
specifier: ^0.40.8
|
||||||
version: 0.40.8
|
version: 0.40.8
|
||||||
|
'@pinecone-database/pinecone':
|
||||||
|
specifier: ^4.0.0
|
||||||
|
version: 4.0.0
|
||||||
'@sentry/cli':
|
'@sentry/cli':
|
||||||
specifier: ^2.33.1
|
specifier: ^2.33.1
|
||||||
version: 2.33.1
|
version: 2.33.1(encoding@0.1.13)
|
||||||
'@sentry/node':
|
'@sentry/node':
|
||||||
specifier: ^8.26.0
|
specifier: ^8.26.0
|
||||||
version: 8.26.0
|
version: 8.26.0
|
||||||
@ -79,7 +82,7 @@ importers:
|
|||||||
version: 1.1.1
|
version: 1.1.1
|
||||||
cohere-ai:
|
cohere-ai:
|
||||||
specifier: ^7.14.0
|
specifier: ^7.14.0
|
||||||
version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))
|
version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(encoding@0.1.13)
|
||||||
cors:
|
cors:
|
||||||
specifier: ^2.8.5
|
specifier: ^2.8.5
|
||||||
version: 2.8.5
|
version: 2.8.5
|
||||||
@ -130,13 +133,13 @@ importers:
|
|||||||
version: 2.9.0
|
version: 2.9.0
|
||||||
langchain:
|
langchain:
|
||||||
specifier: ^0.2.8
|
specifier: ^0.2.8
|
||||||
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||||
languagedetect:
|
languagedetect:
|
||||||
specifier: ^2.0.0
|
specifier: ^2.0.0
|
||||||
version: 2.0.0
|
version: 2.0.0
|
||||||
logsnag:
|
logsnag:
|
||||||
specifier: ^1.0.0
|
specifier: ^1.0.0
|
||||||
version: 1.0.0
|
version: 1.0.0(encoding@0.1.13)
|
||||||
luxon:
|
luxon:
|
||||||
specifier: ^3.4.3
|
specifier: ^3.4.3
|
||||||
version: 3.4.4
|
version: 3.4.4
|
||||||
@ -157,7 +160,7 @@ importers:
|
|||||||
version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
|
version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
|
||||||
openai:
|
openai:
|
||||||
specifier: ^4.57.0
|
specifier: ^4.57.0
|
||||||
version: 4.57.0(zod@3.23.8)
|
version: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||||
pdf-parse:
|
pdf-parse:
|
||||||
specifier: ^1.1.1
|
specifier: ^1.1.1
|
||||||
version: 1.1.1
|
version: 1.1.1
|
||||||
@ -275,7 +278,7 @@ importers:
|
|||||||
version: 29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5))
|
version: 29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5))
|
||||||
jest-fetch-mock:
|
jest-fetch-mock:
|
||||||
specifier: ^3.0.3
|
specifier: ^3.0.3
|
||||||
version: 3.0.3
|
version: 3.0.3(encoding@0.1.13)
|
||||||
mammoth:
|
mammoth:
|
||||||
specifier: ^1.7.2
|
specifier: ^1.7.2
|
||||||
version: 1.7.2
|
version: 1.7.2
|
||||||
@ -1006,6 +1009,10 @@ packages:
|
|||||||
'@pdf-lib/upng@1.0.1':
|
'@pdf-lib/upng@1.0.1':
|
||||||
resolution: {integrity: sha512-dQK2FUMQtowVP00mtIksrlZhdFXQZPC+taih1q4CvPZ5vqdxR/LKBaFg0oAfzd1GlHZXXSPdQfzQnt+ViGvEIQ==}
|
resolution: {integrity: sha512-dQK2FUMQtowVP00mtIksrlZhdFXQZPC+taih1q4CvPZ5vqdxR/LKBaFg0oAfzd1GlHZXXSPdQfzQnt+ViGvEIQ==}
|
||||||
|
|
||||||
|
'@pinecone-database/pinecone@4.0.0':
|
||||||
|
resolution: {integrity: sha512-INYS+GBys9v5BRTyn0tv8srVsPTlSRvE3BPE4Wkc/lOEyAIyB9F7DEMXbeF19FOLEgRwCuHTLjzm1niENl+4FA==}
|
||||||
|
engines: {node: '>=18.0.0'}
|
||||||
|
|
||||||
'@pkgjs/parseargs@0.11.0':
|
'@pkgjs/parseargs@0.11.0':
|
||||||
resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
|
resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
|
||||||
engines: {node: '>=14'}
|
engines: {node: '>=14'}
|
||||||
@ -2279,6 +2286,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==}
|
resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==}
|
||||||
engines: {node: '>= 0.8'}
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
encoding@0.1.13:
|
||||||
|
resolution: {integrity: sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==}
|
||||||
|
|
||||||
end-of-stream@1.4.4:
|
end-of-stream@1.4.4:
|
||||||
resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==}
|
resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==}
|
||||||
|
|
||||||
@ -4599,7 +4609,7 @@ snapshots:
|
|||||||
'@jridgewell/gen-mapping': 0.3.5
|
'@jridgewell/gen-mapping': 0.3.5
|
||||||
'@jridgewell/trace-mapping': 0.3.25
|
'@jridgewell/trace-mapping': 0.3.25
|
||||||
|
|
||||||
'@anthropic-ai/sdk@0.24.3':
|
'@anthropic-ai/sdk@0.24.3(encoding@0.1.13)':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@types/node': 18.19.39
|
'@types/node': 18.19.39
|
||||||
'@types/node-fetch': 2.6.11
|
'@types/node-fetch': 2.6.11
|
||||||
@ -4607,7 +4617,7 @@ snapshots:
|
|||||||
agentkeepalive: 4.5.0
|
agentkeepalive: 4.5.0
|
||||||
form-data-encoder: 1.7.2
|
form-data-encoder: 1.7.2
|
||||||
formdata-node: 4.4.1
|
formdata-node: 4.4.1
|
||||||
node-fetch: 2.7.0
|
node-fetch: 2.7.0(encoding@0.1.13)
|
||||||
web-streams-polyfill: 3.3.3
|
web-streams-polyfill: 3.3.3
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- encoding
|
- encoding
|
||||||
@ -5577,13 +5587,13 @@ snapshots:
|
|||||||
'@jridgewell/resolve-uri': 3.1.2
|
'@jridgewell/resolve-uri': 3.1.2
|
||||||
'@jridgewell/sourcemap-codec': 1.4.15
|
'@jridgewell/sourcemap-codec': 1.4.15
|
||||||
|
|
||||||
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
|
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||||
dependencies:
|
dependencies:
|
||||||
ansi-styles: 5.2.0
|
ansi-styles: 5.2.0
|
||||||
camelcase: 6.3.0
|
camelcase: 6.3.0
|
||||||
decamelize: 1.2.0
|
decamelize: 1.2.0
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu)
|
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
|
||||||
ml-distance: 4.0.1
|
ml-distance: 4.0.1
|
||||||
mustache: 4.2.0
|
mustache: 4.2.0
|
||||||
p-queue: 6.6.2
|
p-queue: 6.6.2
|
||||||
@ -5595,20 +5605,20 @@ snapshots:
|
|||||||
- langchain
|
- langchain
|
||||||
- openai
|
- openai
|
||||||
|
|
||||||
'@langchain/openai@0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
openai: 4.57.0(zod@3.23.8)
|
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||||
zod: 3.23.8
|
zod: 3.23.8
|
||||||
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- encoding
|
- encoding
|
||||||
- langchain
|
- langchain
|
||||||
|
|
||||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
|
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- langchain
|
- langchain
|
||||||
@ -5866,6 +5876,10 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
pako: 1.0.11
|
pako: 1.0.11
|
||||||
|
|
||||||
|
'@pinecone-database/pinecone@4.0.0':
|
||||||
|
dependencies:
|
||||||
|
encoding: 0.1.13
|
||||||
|
|
||||||
'@pkgjs/parseargs@0.11.0':
|
'@pkgjs/parseargs@0.11.0':
|
||||||
optional: true
|
optional: true
|
||||||
|
|
||||||
@ -5950,10 +5964,10 @@ snapshots:
|
|||||||
'@sentry/cli-win32-x64@2.33.1':
|
'@sentry/cli-win32-x64@2.33.1':
|
||||||
optional: true
|
optional: true
|
||||||
|
|
||||||
'@sentry/cli@2.33.1':
|
'@sentry/cli@2.33.1(encoding@0.1.13)':
|
||||||
dependencies:
|
dependencies:
|
||||||
https-proxy-agent: 5.0.1
|
https-proxy-agent: 5.0.1
|
||||||
node-fetch: 2.7.0
|
node-fetch: 2.7.0(encoding@0.1.13)
|
||||||
progress: 2.0.3
|
progress: 2.0.3
|
||||||
proxy-from-env: 1.1.0
|
proxy-from-env: 1.1.0
|
||||||
which: 2.0.2
|
which: 2.0.2
|
||||||
@ -7088,7 +7102,7 @@ snapshots:
|
|||||||
|
|
||||||
co@4.6.0: {}
|
co@4.6.0: {}
|
||||||
|
|
||||||
cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)):
|
cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(encoding@0.1.13):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@aws-sdk/client-sagemaker': 3.679.0
|
'@aws-sdk/client-sagemaker': 3.679.0
|
||||||
'@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))
|
'@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))
|
||||||
@ -7098,7 +7112,7 @@ snapshots:
|
|||||||
form-data-encoder: 4.0.2
|
form-data-encoder: 4.0.2
|
||||||
formdata-node: 6.0.3
|
formdata-node: 6.0.3
|
||||||
js-base64: 3.7.2
|
js-base64: 3.7.2
|
||||||
node-fetch: 2.7.0
|
node-fetch: 2.7.0(encoding@0.1.13)
|
||||||
qs: 6.11.2
|
qs: 6.11.2
|
||||||
readable-stream: 4.5.2
|
readable-stream: 4.5.2
|
||||||
url-join: 4.0.1
|
url-join: 4.0.1
|
||||||
@ -7208,9 +7222,9 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
luxon: 3.4.4
|
luxon: 3.4.4
|
||||||
|
|
||||||
cross-fetch@3.1.8:
|
cross-fetch@3.1.8(encoding@0.1.13):
|
||||||
dependencies:
|
dependencies:
|
||||||
node-fetch: 2.7.0
|
node-fetch: 2.7.0(encoding@0.1.13)
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- encoding
|
- encoding
|
||||||
|
|
||||||
@ -7365,6 +7379,10 @@ snapshots:
|
|||||||
|
|
||||||
encodeurl@1.0.2: {}
|
encodeurl@1.0.2: {}
|
||||||
|
|
||||||
|
encoding@0.1.13:
|
||||||
|
dependencies:
|
||||||
|
iconv-lite: 0.6.3
|
||||||
|
|
||||||
end-of-stream@1.4.4:
|
end-of-stream@1.4.4:
|
||||||
dependencies:
|
dependencies:
|
||||||
once: 1.4.0
|
once: 1.4.0
|
||||||
@ -7899,9 +7917,9 @@ snapshots:
|
|||||||
|
|
||||||
isexe@2.0.0: {}
|
isexe@2.0.0: {}
|
||||||
|
|
||||||
isomorphic-fetch@3.0.0:
|
isomorphic-fetch@3.0.0(encoding@0.1.13):
|
||||||
dependencies:
|
dependencies:
|
||||||
node-fetch: 2.7.0
|
node-fetch: 2.7.0(encoding@0.1.13)
|
||||||
whatwg-fetch: 3.6.20
|
whatwg-fetch: 3.6.20
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- encoding
|
- encoding
|
||||||
@ -8070,9 +8088,9 @@ snapshots:
|
|||||||
jest-mock: 29.7.0
|
jest-mock: 29.7.0
|
||||||
jest-util: 29.7.0
|
jest-util: 29.7.0
|
||||||
|
|
||||||
jest-fetch-mock@3.0.3:
|
jest-fetch-mock@3.0.3(encoding@0.1.13):
|
||||||
dependencies:
|
dependencies:
|
||||||
cross-fetch: 3.1.8
|
cross-fetch: 3.1.8(encoding@0.1.13)
|
||||||
promise-polyfill: 8.3.0
|
promise-polyfill: 8.3.0
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- encoding
|
- encoding
|
||||||
@ -8342,17 +8360,17 @@ snapshots:
|
|||||||
|
|
||||||
kuler@2.0.0: {}
|
kuler@2.0.0: {}
|
||||||
|
|
||||||
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||||
'@langchain/openai': 0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||||
binary-extensions: 2.3.0
|
binary-extensions: 2.3.0
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
js-yaml: 4.1.0
|
js-yaml: 4.1.0
|
||||||
jsonpointer: 5.0.1
|
jsonpointer: 5.0.1
|
||||||
langchainhub: 0.0.11
|
langchainhub: 0.0.11
|
||||||
langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu)
|
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
|
||||||
ml-distance: 4.0.1
|
ml-distance: 4.0.1
|
||||||
openapi-types: 12.1.3
|
openapi-types: 12.1.3
|
||||||
p-retry: 4.6.2
|
p-retry: 4.6.2
|
||||||
@ -8362,6 +8380,7 @@ snapshots:
|
|||||||
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
'@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)
|
'@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)
|
||||||
|
'@pinecone-database/pinecone': 4.0.0
|
||||||
'@supabase/supabase-js': 2.44.2
|
'@supabase/supabase-js': 2.44.2
|
||||||
axios: 1.7.2
|
axios: 1.7.2
|
||||||
cheerio: 1.0.0-rc.12
|
cheerio: 1.0.0-rc.12
|
||||||
@ -8381,7 +8400,7 @@ snapshots:
|
|||||||
|
|
||||||
langchainhub@0.0.11: {}
|
langchainhub@0.0.11: {}
|
||||||
|
|
||||||
langsmith@0.1.34(zyeavx4tfqw3smbbpiinhfxxeu):
|
langsmith@0.1.34(npkyd6f7wyl3urgrzoxaktl5a4):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@types/uuid': 9.0.8
|
'@types/uuid': 9.0.8
|
||||||
commander: 10.0.1
|
commander: 10.0.1
|
||||||
@ -8390,9 +8409,9 @@ snapshots:
|
|||||||
p-retry: 4.6.2
|
p-retry: 4.6.2
|
||||||
uuid: 9.0.1
|
uuid: 9.0.1
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||||
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||||
openai: 4.57.0(zod@3.23.8)
|
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||||
|
|
||||||
languagedetect@2.0.0: {}
|
languagedetect@2.0.0: {}
|
||||||
|
|
||||||
@ -8442,9 +8461,9 @@ snapshots:
|
|||||||
|
|
||||||
loglevel@1.9.1: {}
|
loglevel@1.9.1: {}
|
||||||
|
|
||||||
logsnag@1.0.0:
|
logsnag@1.0.0(encoding@0.1.13):
|
||||||
dependencies:
|
dependencies:
|
||||||
isomorphic-fetch: 3.0.0
|
isomorphic-fetch: 3.0.0(encoding@0.1.13)
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- encoding
|
- encoding
|
||||||
|
|
||||||
@ -8703,9 +8722,11 @@ snapshots:
|
|||||||
|
|
||||||
node-ensure@0.0.0: {}
|
node-ensure@0.0.0: {}
|
||||||
|
|
||||||
node-fetch@2.7.0:
|
node-fetch@2.7.0(encoding@0.1.13):
|
||||||
dependencies:
|
dependencies:
|
||||||
whatwg-url: 5.0.0
|
whatwg-url: 5.0.0
|
||||||
|
optionalDependencies:
|
||||||
|
encoding: 0.1.13
|
||||||
|
|
||||||
node-fetch@3.3.2:
|
node-fetch@3.3.2:
|
||||||
dependencies:
|
dependencies:
|
||||||
@ -8780,7 +8801,7 @@ snapshots:
|
|||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- debug
|
- debug
|
||||||
|
|
||||||
openai@4.57.0(zod@3.23.8):
|
openai@4.57.0(encoding@0.1.13)(zod@3.23.8):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@types/node': 18.19.39
|
'@types/node': 18.19.39
|
||||||
'@types/node-fetch': 2.6.11
|
'@types/node-fetch': 2.6.11
|
||||||
@ -8789,7 +8810,7 @@ snapshots:
|
|||||||
agentkeepalive: 4.5.0
|
agentkeepalive: 4.5.0
|
||||||
form-data-encoder: 1.7.2
|
form-data-encoder: 1.7.2
|
||||||
formdata-node: 4.4.1
|
formdata-node: 4.4.1
|
||||||
node-fetch: 2.7.0
|
node-fetch: 2.7.0(encoding@0.1.13)
|
||||||
qs: 6.12.2
|
qs: 6.12.2
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
zod: 3.23.8
|
zod: 3.23.8
|
||||||
|
@ -17,6 +17,7 @@ import expressWs from "express-ws";
|
|||||||
import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types";
|
import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types";
|
||||||
import { ZodError } from "zod";
|
import { ZodError } from "zod";
|
||||||
import { v4 as uuidv4 } from "uuid";
|
import { v4 as uuidv4 } from "uuid";
|
||||||
|
import { searchSimilarPages } from "./lib/extract/index/pinecone";
|
||||||
|
|
||||||
const { createBullBoard } = require("@bull-board/api");
|
const { createBullBoard } = require("@bull-board/api");
|
||||||
const { BullAdapter } = require("@bull-board/api/bullAdapter");
|
const { BullAdapter } = require("@bull-board/api/bullAdapter");
|
||||||
|
141
apps/api/src/lib/extract/index/pinecone.ts
Normal file
141
apps/api/src/lib/extract/index/pinecone.ts
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
import { Pinecone } from '@pinecone-database/pinecone';
|
||||||
|
import { Document } from '../../../controllers/v1/types';
|
||||||
|
import { logger } from '../../logger';
|
||||||
|
import OpenAI from "openai";
|
||||||
|
|
||||||
|
const openai = new OpenAI({
|
||||||
|
apiKey: process.env.OPENAI_API_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
|
const pinecone = new Pinecone({
|
||||||
|
apiKey: process.env.PINECONE_API_KEY!,
|
||||||
|
});
|
||||||
|
|
||||||
|
const INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? "";
|
||||||
|
|
||||||
|
export interface PageMetadata {
|
||||||
|
url: string;
|
||||||
|
originUrl: string;
|
||||||
|
title?: string;
|
||||||
|
description?: string;
|
||||||
|
crawlId?: string;
|
||||||
|
teamId?: string;
|
||||||
|
timestamp: number;
|
||||||
|
markdown?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getEmbedding(text: string) {
|
||||||
|
const embedding = await openai.embeddings.create({
|
||||||
|
model: "text-embedding-3-small",
|
||||||
|
input: text,
|
||||||
|
encoding_format: "float",
|
||||||
|
});
|
||||||
|
|
||||||
|
return embedding.data[0].embedding;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeUrl(url: string) {
|
||||||
|
const urlO = new URL(url);
|
||||||
|
if (!urlO.hostname.startsWith("www.")) {
|
||||||
|
urlO.hostname = "www." + urlO.hostname;
|
||||||
|
}
|
||||||
|
return urlO.href;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function indexPage(
|
||||||
|
document: Document,
|
||||||
|
originUrl: string,
|
||||||
|
crawlId?: string,
|
||||||
|
teamId?: string
|
||||||
|
) {
|
||||||
|
try {
|
||||||
|
const index = pinecone.index(INDEX_NAME);
|
||||||
|
|
||||||
|
// Create text to embed
|
||||||
|
const textToEmbed = [
|
||||||
|
document.metadata.title,
|
||||||
|
document.metadata.description,
|
||||||
|
document.markdown
|
||||||
|
].filter(Boolean).join('\n\n');
|
||||||
|
|
||||||
|
// Get embedding from OpenAI
|
||||||
|
const embedding = await getEmbedding(textToEmbed);
|
||||||
|
|
||||||
|
// Prepare metadata
|
||||||
|
const metadata: PageMetadata = {
|
||||||
|
url: normalizeUrl(document.metadata.sourceURL || document.metadata.url!),
|
||||||
|
originUrl: normalizeUrl(originUrl),
|
||||||
|
title: document.metadata.title,
|
||||||
|
description: document.metadata.description,
|
||||||
|
crawlId,
|
||||||
|
teamId,
|
||||||
|
markdown: document.markdown,
|
||||||
|
timestamp: Date.now()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Upsert to Pinecone
|
||||||
|
await index.upsert([{
|
||||||
|
id: document.metadata.sourceURL || document.metadata.url!,
|
||||||
|
values: embedding,
|
||||||
|
metadata: {
|
||||||
|
...metadata,
|
||||||
|
[document.metadata.sourceURL || document.metadata.url!]: true
|
||||||
|
}
|
||||||
|
}]);
|
||||||
|
|
||||||
|
logger.debug('Successfully indexed page in Pinecone', {
|
||||||
|
url: metadata.url,
|
||||||
|
crawlId
|
||||||
|
});
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Failed to index page in Pinecone', {
|
||||||
|
error,
|
||||||
|
url: document.metadata.sourceURL || document.metadata.url,
|
||||||
|
crawlId
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function searchSimilarPages(
|
||||||
|
query: string,
|
||||||
|
originUrl?: string,
|
||||||
|
limit: number = 10
|
||||||
|
) {
|
||||||
|
try {
|
||||||
|
const index = pinecone.index(INDEX_NAME);
|
||||||
|
|
||||||
|
// Get query embedding from OpenAI
|
||||||
|
const queryEmbedding = await getEmbedding(query);
|
||||||
|
|
||||||
|
const queryParams: any = {
|
||||||
|
vector: queryEmbedding,
|
||||||
|
topK: limit,
|
||||||
|
includeMetadata: true
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add filter if originUrl is provided
|
||||||
|
if (originUrl) {
|
||||||
|
queryParams.filter = {
|
||||||
|
[originUrl]: { $contains: normalizeUrl(originUrl) }
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = await index.query(queryParams);
|
||||||
|
return results.matches.map(match => ({
|
||||||
|
url: match.metadata?.url,
|
||||||
|
title: match.metadata?.title,
|
||||||
|
description: match.metadata?.description,
|
||||||
|
score: match.score,
|
||||||
|
markdown: match.metadata?.markdown
|
||||||
|
}));
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Failed to search similar pages in Pinecone', {
|
||||||
|
error,
|
||||||
|
query,
|
||||||
|
originUrl
|
||||||
|
});
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
@ -10,7 +10,7 @@ const openai = new OpenAI({
|
|||||||
|
|
||||||
async function getEmbedding(text: string) {
|
async function getEmbedding(text: string) {
|
||||||
const embedding = await openai.embeddings.create({
|
const embedding = await openai.embeddings.create({
|
||||||
model: "text-embedding-ada-002",
|
model: "text-embedding-3-small",
|
||||||
input: text,
|
input: text,
|
||||||
encoding_format: "float",
|
encoding_format: "float",
|
||||||
});
|
});
|
||||||
|
Loading…
x
Reference in New Issue
Block a user