mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 06:29:01 +08:00
Nick: index exploration
This commit is contained in:
parent
0847a6038e
commit
bf9d41d0b2
@ -58,6 +58,7 @@
|
||||
"@devil7softwares/pos": "^1.0.2",
|
||||
"@dqbd/tiktoken": "^1.0.17",
|
||||
"@nangohq/node": "^0.40.8",
|
||||
"@pinecone-database/pinecone": "^4.0.0",
|
||||
"@sentry/cli": "^2.33.1",
|
||||
"@sentry/node": "^8.26.0",
|
||||
"@sentry/profiling-node": "^8.26.0",
|
||||
|
101
apps/api/pnpm-lock.yaml
generated
101
apps/api/pnpm-lock.yaml
generated
@ -10,7 +10,7 @@ importers:
|
||||
dependencies:
|
||||
'@anthropic-ai/sdk':
|
||||
specifier: ^0.24.3
|
||||
version: 0.24.3
|
||||
version: 0.24.3(encoding@0.1.13)
|
||||
'@brillout/import':
|
||||
specifier: ^0.2.2
|
||||
version: 0.2.3
|
||||
@ -29,9 +29,12 @@ importers:
|
||||
'@nangohq/node':
|
||||
specifier: ^0.40.8
|
||||
version: 0.40.8
|
||||
'@pinecone-database/pinecone':
|
||||
specifier: ^4.0.0
|
||||
version: 4.0.0
|
||||
'@sentry/cli':
|
||||
specifier: ^2.33.1
|
||||
version: 2.33.1
|
||||
version: 2.33.1(encoding@0.1.13)
|
||||
'@sentry/node':
|
||||
specifier: ^8.26.0
|
||||
version: 8.26.0
|
||||
@ -79,7 +82,7 @@ importers:
|
||||
version: 1.1.1
|
||||
cohere-ai:
|
||||
specifier: ^7.14.0
|
||||
version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))
|
||||
version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(encoding@0.1.13)
|
||||
cors:
|
||||
specifier: ^2.8.5
|
||||
version: 2.8.5
|
||||
@ -130,13 +133,13 @@ importers:
|
||||
version: 2.9.0
|
||||
langchain:
|
||||
specifier: ^0.2.8
|
||||
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
languagedetect:
|
||||
specifier: ^2.0.0
|
||||
version: 2.0.0
|
||||
logsnag:
|
||||
specifier: ^1.0.0
|
||||
version: 1.0.0
|
||||
version: 1.0.0(encoding@0.1.13)
|
||||
luxon:
|
||||
specifier: ^3.4.3
|
||||
version: 3.4.4
|
||||
@ -157,7 +160,7 @@ importers:
|
||||
version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
|
||||
openai:
|
||||
specifier: ^4.57.0
|
||||
version: 4.57.0(zod@3.23.8)
|
||||
version: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||
pdf-parse:
|
||||
specifier: ^1.1.1
|
||||
version: 1.1.1
|
||||
@ -275,7 +278,7 @@ importers:
|
||||
version: 29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5))
|
||||
jest-fetch-mock:
|
||||
specifier: ^3.0.3
|
||||
version: 3.0.3
|
||||
version: 3.0.3(encoding@0.1.13)
|
||||
mammoth:
|
||||
specifier: ^1.7.2
|
||||
version: 1.7.2
|
||||
@ -1006,6 +1009,10 @@ packages:
|
||||
'@pdf-lib/upng@1.0.1':
|
||||
resolution: {integrity: sha512-dQK2FUMQtowVP00mtIksrlZhdFXQZPC+taih1q4CvPZ5vqdxR/LKBaFg0oAfzd1GlHZXXSPdQfzQnt+ViGvEIQ==}
|
||||
|
||||
'@pinecone-database/pinecone@4.0.0':
|
||||
resolution: {integrity: sha512-INYS+GBys9v5BRTyn0tv8srVsPTlSRvE3BPE4Wkc/lOEyAIyB9F7DEMXbeF19FOLEgRwCuHTLjzm1niENl+4FA==}
|
||||
engines: {node: '>=18.0.0'}
|
||||
|
||||
'@pkgjs/parseargs@0.11.0':
|
||||
resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
|
||||
engines: {node: '>=14'}
|
||||
@ -2279,6 +2286,9 @@ packages:
|
||||
resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
encoding@0.1.13:
|
||||
resolution: {integrity: sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==}
|
||||
|
||||
end-of-stream@1.4.4:
|
||||
resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==}
|
||||
|
||||
@ -4599,7 +4609,7 @@ snapshots:
|
||||
'@jridgewell/gen-mapping': 0.3.5
|
||||
'@jridgewell/trace-mapping': 0.3.25
|
||||
|
||||
'@anthropic-ai/sdk@0.24.3':
|
||||
'@anthropic-ai/sdk@0.24.3(encoding@0.1.13)':
|
||||
dependencies:
|
||||
'@types/node': 18.19.39
|
||||
'@types/node-fetch': 2.6.11
|
||||
@ -4607,7 +4617,7 @@ snapshots:
|
||||
agentkeepalive: 4.5.0
|
||||
form-data-encoder: 1.7.2
|
||||
formdata-node: 4.4.1
|
||||
node-fetch: 2.7.0
|
||||
node-fetch: 2.7.0(encoding@0.1.13)
|
||||
web-streams-polyfill: 3.3.3
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
@ -5577,13 +5587,13 @@ snapshots:
|
||||
'@jridgewell/resolve-uri': 3.1.2
|
||||
'@jridgewell/sourcemap-codec': 1.4.15
|
||||
|
||||
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
|
||||
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||
dependencies:
|
||||
ansi-styles: 5.2.0
|
||||
camelcase: 6.3.0
|
||||
decamelize: 1.2.0
|
||||
js-tiktoken: 1.0.12
|
||||
langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu)
|
||||
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
|
||||
ml-distance: 4.0.1
|
||||
mustache: 4.2.0
|
||||
p-queue: 6.6.2
|
||||
@ -5595,20 +5605,20 @@ snapshots:
|
||||
- langchain
|
||||
- openai
|
||||
|
||||
'@langchain/openai@0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
js-tiktoken: 1.0.12
|
||||
openai: 4.57.0(zod@3.23.8)
|
||||
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||
zod: 3.23.8
|
||||
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
- langchain
|
||||
|
||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
|
||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
js-tiktoken: 1.0.12
|
||||
transitivePeerDependencies:
|
||||
- langchain
|
||||
@ -5866,6 +5876,10 @@ snapshots:
|
||||
dependencies:
|
||||
pako: 1.0.11
|
||||
|
||||
'@pinecone-database/pinecone@4.0.0':
|
||||
dependencies:
|
||||
encoding: 0.1.13
|
||||
|
||||
'@pkgjs/parseargs@0.11.0':
|
||||
optional: true
|
||||
|
||||
@ -5950,10 +5964,10 @@ snapshots:
|
||||
'@sentry/cli-win32-x64@2.33.1':
|
||||
optional: true
|
||||
|
||||
'@sentry/cli@2.33.1':
|
||||
'@sentry/cli@2.33.1(encoding@0.1.13)':
|
||||
dependencies:
|
||||
https-proxy-agent: 5.0.1
|
||||
node-fetch: 2.7.0
|
||||
node-fetch: 2.7.0(encoding@0.1.13)
|
||||
progress: 2.0.3
|
||||
proxy-from-env: 1.1.0
|
||||
which: 2.0.2
|
||||
@ -7088,7 +7102,7 @@ snapshots:
|
||||
|
||||
co@4.6.0: {}
|
||||
|
||||
cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)):
|
||||
cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(encoding@0.1.13):
|
||||
dependencies:
|
||||
'@aws-sdk/client-sagemaker': 3.679.0
|
||||
'@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))
|
||||
@ -7098,7 +7112,7 @@ snapshots:
|
||||
form-data-encoder: 4.0.2
|
||||
formdata-node: 6.0.3
|
||||
js-base64: 3.7.2
|
||||
node-fetch: 2.7.0
|
||||
node-fetch: 2.7.0(encoding@0.1.13)
|
||||
qs: 6.11.2
|
||||
readable-stream: 4.5.2
|
||||
url-join: 4.0.1
|
||||
@ -7208,9 +7222,9 @@ snapshots:
|
||||
dependencies:
|
||||
luxon: 3.4.4
|
||||
|
||||
cross-fetch@3.1.8:
|
||||
cross-fetch@3.1.8(encoding@0.1.13):
|
||||
dependencies:
|
||||
node-fetch: 2.7.0
|
||||
node-fetch: 2.7.0(encoding@0.1.13)
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
|
||||
@ -7365,6 +7379,10 @@ snapshots:
|
||||
|
||||
encodeurl@1.0.2: {}
|
||||
|
||||
encoding@0.1.13:
|
||||
dependencies:
|
||||
iconv-lite: 0.6.3
|
||||
|
||||
end-of-stream@1.4.4:
|
||||
dependencies:
|
||||
once: 1.4.0
|
||||
@ -7899,9 +7917,9 @@ snapshots:
|
||||
|
||||
isexe@2.0.0: {}
|
||||
|
||||
isomorphic-fetch@3.0.0:
|
||||
isomorphic-fetch@3.0.0(encoding@0.1.13):
|
||||
dependencies:
|
||||
node-fetch: 2.7.0
|
||||
node-fetch: 2.7.0(encoding@0.1.13)
|
||||
whatwg-fetch: 3.6.20
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
@ -8070,9 +8088,9 @@ snapshots:
|
||||
jest-mock: 29.7.0
|
||||
jest-util: 29.7.0
|
||||
|
||||
jest-fetch-mock@3.0.3:
|
||||
jest-fetch-mock@3.0.3(encoding@0.1.13):
|
||||
dependencies:
|
||||
cross-fetch: 3.1.8
|
||||
cross-fetch: 3.1.8(encoding@0.1.13)
|
||||
promise-polyfill: 8.3.0
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
@ -8342,17 +8360,17 @@ snapshots:
|
||||
|
||||
kuler@2.0.0: {}
|
||||
|
||||
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||
'@langchain/openai': 0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
binary-extensions: 2.3.0
|
||||
js-tiktoken: 1.0.12
|
||||
js-yaml: 4.1.0
|
||||
jsonpointer: 5.0.1
|
||||
langchainhub: 0.0.11
|
||||
langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu)
|
||||
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
|
||||
ml-distance: 4.0.1
|
||||
openapi-types: 12.1.3
|
||||
p-retry: 4.6.2
|
||||
@ -8362,6 +8380,7 @@ snapshots:
|
||||
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
||||
optionalDependencies:
|
||||
'@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)
|
||||
'@pinecone-database/pinecone': 4.0.0
|
||||
'@supabase/supabase-js': 2.44.2
|
||||
axios: 1.7.2
|
||||
cheerio: 1.0.0-rc.12
|
||||
@ -8381,7 +8400,7 @@ snapshots:
|
||||
|
||||
langchainhub@0.0.11: {}
|
||||
|
||||
langsmith@0.1.34(zyeavx4tfqw3smbbpiinhfxxeu):
|
||||
langsmith@0.1.34(npkyd6f7wyl3urgrzoxaktl5a4):
|
||||
dependencies:
|
||||
'@types/uuid': 9.0.8
|
||||
commander: 10.0.1
|
||||
@ -8390,9 +8409,9 @@ snapshots:
|
||||
p-retry: 4.6.2
|
||||
uuid: 9.0.1
|
||||
optionalDependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
openai: 4.57.0(zod@3.23.8)
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||
|
||||
languagedetect@2.0.0: {}
|
||||
|
||||
@ -8442,9 +8461,9 @@ snapshots:
|
||||
|
||||
loglevel@1.9.1: {}
|
||||
|
||||
logsnag@1.0.0:
|
||||
logsnag@1.0.0(encoding@0.1.13):
|
||||
dependencies:
|
||||
isomorphic-fetch: 3.0.0
|
||||
isomorphic-fetch: 3.0.0(encoding@0.1.13)
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
|
||||
@ -8703,9 +8722,11 @@ snapshots:
|
||||
|
||||
node-ensure@0.0.0: {}
|
||||
|
||||
node-fetch@2.7.0:
|
||||
node-fetch@2.7.0(encoding@0.1.13):
|
||||
dependencies:
|
||||
whatwg-url: 5.0.0
|
||||
optionalDependencies:
|
||||
encoding: 0.1.13
|
||||
|
||||
node-fetch@3.3.2:
|
||||
dependencies:
|
||||
@ -8780,7 +8801,7 @@ snapshots:
|
||||
transitivePeerDependencies:
|
||||
- debug
|
||||
|
||||
openai@4.57.0(zod@3.23.8):
|
||||
openai@4.57.0(encoding@0.1.13)(zod@3.23.8):
|
||||
dependencies:
|
||||
'@types/node': 18.19.39
|
||||
'@types/node-fetch': 2.6.11
|
||||
@ -8789,7 +8810,7 @@ snapshots:
|
||||
agentkeepalive: 4.5.0
|
||||
form-data-encoder: 1.7.2
|
||||
formdata-node: 4.4.1
|
||||
node-fetch: 2.7.0
|
||||
node-fetch: 2.7.0(encoding@0.1.13)
|
||||
qs: 6.12.2
|
||||
optionalDependencies:
|
||||
zod: 3.23.8
|
||||
|
@ -17,6 +17,7 @@ import expressWs from "express-ws";
|
||||
import { ErrorResponse, ResponseWithSentry } from "./controllers/v1/types";
|
||||
import { ZodError } from "zod";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { searchSimilarPages } from "./lib/extract/index/pinecone";
|
||||
|
||||
const { createBullBoard } = require("@bull-board/api");
|
||||
const { BullAdapter } = require("@bull-board/api/bullAdapter");
|
||||
|
141
apps/api/src/lib/extract/index/pinecone.ts
Normal file
141
apps/api/src/lib/extract/index/pinecone.ts
Normal file
@ -0,0 +1,141 @@
|
||||
import { Pinecone } from '@pinecone-database/pinecone';
|
||||
import { Document } from '../../../controllers/v1/types';
|
||||
import { logger } from '../../logger';
|
||||
import OpenAI from "openai";
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
const pinecone = new Pinecone({
|
||||
apiKey: process.env.PINECONE_API_KEY!,
|
||||
});
|
||||
|
||||
const INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? "";
|
||||
|
||||
export interface PageMetadata {
|
||||
url: string;
|
||||
originUrl: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
crawlId?: string;
|
||||
teamId?: string;
|
||||
timestamp: number;
|
||||
markdown?: string;
|
||||
}
|
||||
|
||||
async function getEmbedding(text: string) {
|
||||
const embedding = await openai.embeddings.create({
|
||||
model: "text-embedding-3-small",
|
||||
input: text,
|
||||
encoding_format: "float",
|
||||
});
|
||||
|
||||
return embedding.data[0].embedding;
|
||||
}
|
||||
|
||||
function normalizeUrl(url: string) {
|
||||
const urlO = new URL(url);
|
||||
if (!urlO.hostname.startsWith("www.")) {
|
||||
urlO.hostname = "www." + urlO.hostname;
|
||||
}
|
||||
return urlO.href;
|
||||
}
|
||||
|
||||
export async function indexPage(
|
||||
document: Document,
|
||||
originUrl: string,
|
||||
crawlId?: string,
|
||||
teamId?: string
|
||||
) {
|
||||
try {
|
||||
const index = pinecone.index(INDEX_NAME);
|
||||
|
||||
// Create text to embed
|
||||
const textToEmbed = [
|
||||
document.metadata.title,
|
||||
document.metadata.description,
|
||||
document.markdown
|
||||
].filter(Boolean).join('\n\n');
|
||||
|
||||
// Get embedding from OpenAI
|
||||
const embedding = await getEmbedding(textToEmbed);
|
||||
|
||||
// Prepare metadata
|
||||
const metadata: PageMetadata = {
|
||||
url: normalizeUrl(document.metadata.sourceURL || document.metadata.url!),
|
||||
originUrl: normalizeUrl(originUrl),
|
||||
title: document.metadata.title,
|
||||
description: document.metadata.description,
|
||||
crawlId,
|
||||
teamId,
|
||||
markdown: document.markdown,
|
||||
timestamp: Date.now()
|
||||
};
|
||||
|
||||
// Upsert to Pinecone
|
||||
await index.upsert([{
|
||||
id: document.metadata.sourceURL || document.metadata.url!,
|
||||
values: embedding,
|
||||
metadata: {
|
||||
...metadata,
|
||||
[document.metadata.sourceURL || document.metadata.url!]: true
|
||||
}
|
||||
}]);
|
||||
|
||||
logger.debug('Successfully indexed page in Pinecone', {
|
||||
url: metadata.url,
|
||||
crawlId
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Failed to index page in Pinecone', {
|
||||
error,
|
||||
url: document.metadata.sourceURL || document.metadata.url,
|
||||
crawlId
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function searchSimilarPages(
|
||||
query: string,
|
||||
originUrl?: string,
|
||||
limit: number = 10
|
||||
) {
|
||||
try {
|
||||
const index = pinecone.index(INDEX_NAME);
|
||||
|
||||
// Get query embedding from OpenAI
|
||||
const queryEmbedding = await getEmbedding(query);
|
||||
|
||||
const queryParams: any = {
|
||||
vector: queryEmbedding,
|
||||
topK: limit,
|
||||
includeMetadata: true
|
||||
};
|
||||
|
||||
// Add filter if originUrl is provided
|
||||
if (originUrl) {
|
||||
queryParams.filter = {
|
||||
[originUrl]: { $contains: normalizeUrl(originUrl) }
|
||||
};
|
||||
}
|
||||
|
||||
const results = await index.query(queryParams);
|
||||
return results.matches.map(match => ({
|
||||
url: match.metadata?.url,
|
||||
title: match.metadata?.title,
|
||||
description: match.metadata?.description,
|
||||
score: match.score,
|
||||
markdown: match.metadata?.markdown
|
||||
}));
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Failed to search similar pages in Pinecone', {
|
||||
error,
|
||||
query,
|
||||
originUrl
|
||||
});
|
||||
return [];
|
||||
}
|
||||
}
|
@ -10,7 +10,7 @@ const openai = new OpenAI({
|
||||
|
||||
async function getEmbedding(text: string) {
|
||||
const embedding = await openai.embeddings.create({
|
||||
model: "text-embedding-ada-002",
|
||||
model: "text-embedding-3-small",
|
||||
input: text,
|
||||
encoding_format: "float",
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user