mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-20 01:59:22 +08:00
(feat/ai-sdk) Migrate to AI-SDK (#1220)
* Nick: init * Update llmExtract.ts * Update llmExtract.ts * Nick rename * fix(v1/types): extract json schema validation * Update url-processor.ts * feat(ai-sdk): ollama support * feat(ai-sdk): further ollama support * Nick: it is broken btw * feat(ai-sdk): abstract model adapter * Update pnpm-lock.yaml * Update analyzeSchemaAndPrompt.ts * Nick: * feat(ai-sdk): ollama support * doc(SELF_HOST): update with embedding param * Nick: * Update ranker.ts * Nick: * feat(ai-sdk): fixes * Update llmExtract.ts * feat: remove zod-to-json-schema * fix * Update llmExtract.ts * use openai * fixes --------- Co-authored-by: Gergő Móricz <mo.geryy@gmail.com>
This commit is contained in:
parent
943eb775c6
commit
25d9bdb1f6
@ -52,9 +52,9 @@ USE_DB_AUTHENTICATION=false
|
|||||||
# OPENAI_API_KEY=
|
# OPENAI_API_KEY=
|
||||||
|
|
||||||
# Experimental: Use Ollama
|
# Experimental: Use Ollama
|
||||||
# OPENAI_API_KEY=ollama
|
# OLLAMA_BASE_URL=http://localhost:11434/api
|
||||||
# OPENAI_BASE_URL=http://localhost:11434/v1
|
|
||||||
# MODEL_NAME=deepseek-r1:7b
|
# MODEL_NAME=deepseek-r1:7b
|
||||||
|
# MODEL_EMBEDDING_NAME=nomic-embed-text
|
||||||
|
|
||||||
## === Proxy ===
|
## === Proxy ===
|
||||||
# PROXY_SERVER can be a full URL (e.g. http://0.1.2.3:1234) or just an IP and port combo (e.g. 0.1.2.3:1234)
|
# PROXY_SERVER can be a full URL (e.g. http://0.1.2.3:1234) or just an IP and port combo (e.g. 0.1.2.3:1234)
|
||||||
|
@ -56,7 +56,7 @@
|
|||||||
"typescript": "^5.4.2"
|
"typescript": "^5.4.2"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"jsdom": "^26.0.0",
|
"@ai-sdk/openai": "^1.1.13",
|
||||||
"@anthropic-ai/sdk": "^0.24.3",
|
"@anthropic-ai/sdk": "^0.24.3",
|
||||||
"@apidevtools/json-schema-ref-parser": "^11.7.3",
|
"@apidevtools/json-schema-ref-parser": "^11.7.3",
|
||||||
"@brillout/import": "^0.2.2",
|
"@brillout/import": "^0.2.2",
|
||||||
@ -72,6 +72,7 @@
|
|||||||
"@supabase/supabase-js": "^2.44.2",
|
"@supabase/supabase-js": "^2.44.2",
|
||||||
"@types/express-ws": "^3.0.4",
|
"@types/express-ws": "^3.0.4",
|
||||||
"@types/ws": "^8.5.12",
|
"@types/ws": "^8.5.12",
|
||||||
|
"ai": "^4.1.45",
|
||||||
"ajv": "^8.16.0",
|
"ajv": "^8.16.0",
|
||||||
"async": "^3.2.5",
|
"async": "^3.2.5",
|
||||||
"async-mutex": "^0.5.0",
|
"async-mutex": "^0.5.0",
|
||||||
@ -96,6 +97,7 @@
|
|||||||
"ioredis": "^5.4.1",
|
"ioredis": "^5.4.1",
|
||||||
"ip-address": "^10.0.1",
|
"ip-address": "^10.0.1",
|
||||||
"joplin-turndown-plugin-gfm": "^1.0.12",
|
"joplin-turndown-plugin-gfm": "^1.0.12",
|
||||||
|
"jsdom": "^26.0.0",
|
||||||
"json-schema-to-zod": "^2.3.0",
|
"json-schema-to-zod": "^2.3.0",
|
||||||
"keyword-extractor": "^0.0.28",
|
"keyword-extractor": "^0.0.28",
|
||||||
"koffi": "^2.9.0",
|
"koffi": "^2.9.0",
|
||||||
@ -109,7 +111,7 @@
|
|||||||
"moment": "^2.29.4",
|
"moment": "^2.29.4",
|
||||||
"mongoose": "^8.4.4",
|
"mongoose": "^8.4.4",
|
||||||
"natural": "^7.0.7",
|
"natural": "^7.0.7",
|
||||||
"openai": "^4.57.0",
|
"ollama-ai-provider": "^1.2.0",
|
||||||
"pdf-parse": "^1.1.1",
|
"pdf-parse": "^1.1.1",
|
||||||
"pos": "^0.4.2",
|
"pos": "^0.4.2",
|
||||||
"posthog-node": "^4.0.1",
|
"posthog-node": "^4.0.1",
|
||||||
@ -134,8 +136,7 @@
|
|||||||
"wordpos": "^2.1.0",
|
"wordpos": "^2.1.0",
|
||||||
"ws": "^8.18.0",
|
"ws": "^8.18.0",
|
||||||
"xml2js": "^0.6.2",
|
"xml2js": "^0.6.2",
|
||||||
"zod": "^3.23.8",
|
"zod": "^3.24.2"
|
||||||
"zod-to-json-schema": "^3.23.1"
|
|
||||||
},
|
},
|
||||||
"nodemonConfig": {
|
"nodemonConfig": {
|
||||||
"ignore": [
|
"ignore": [
|
||||||
|
294
apps/api/pnpm-lock.yaml
generated
294
apps/api/pnpm-lock.yaml
generated
@ -8,6 +8,9 @@ importers:
|
|||||||
|
|
||||||
.:
|
.:
|
||||||
dependencies:
|
dependencies:
|
||||||
|
'@ai-sdk/openai':
|
||||||
|
specifier: ^1.1.13
|
||||||
|
version: 1.1.13(zod@3.24.2)
|
||||||
'@anthropic-ai/sdk':
|
'@anthropic-ai/sdk':
|
||||||
specifier: ^0.24.3
|
specifier: ^0.24.3
|
||||||
version: 0.24.3(encoding@0.1.13)
|
version: 0.24.3(encoding@0.1.13)
|
||||||
@ -53,6 +56,9 @@ importers:
|
|||||||
'@types/ws':
|
'@types/ws':
|
||||||
specifier: ^8.5.12
|
specifier: ^8.5.12
|
||||||
version: 8.5.12
|
version: 8.5.12
|
||||||
|
ai:
|
||||||
|
specifier: ^4.1.45
|
||||||
|
version: 4.1.45(react@18.3.1)(zod@3.24.2)
|
||||||
ajv:
|
ajv:
|
||||||
specifier: ^8.16.0
|
specifier: ^8.16.0
|
||||||
version: 8.16.0
|
version: 8.16.0
|
||||||
@ -139,7 +145,7 @@ importers:
|
|||||||
version: 2.9.0
|
version: 2.9.0
|
||||||
langchain:
|
langchain:
|
||||||
specifier: ^0.2.8
|
specifier: ^0.2.8
|
||||||
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||||
languagedetect:
|
languagedetect:
|
||||||
specifier: ^2.0.0
|
specifier: ^2.0.0
|
||||||
version: 2.0.0
|
version: 2.0.0
|
||||||
@ -167,9 +173,9 @@ importers:
|
|||||||
natural:
|
natural:
|
||||||
specifier: ^7.0.7
|
specifier: ^7.0.7
|
||||||
version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
|
version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
|
||||||
openai:
|
ollama-ai-provider:
|
||||||
specifier: ^4.57.0
|
specifier: ^1.2.0
|
||||||
version: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
version: 1.2.0(zod@3.24.2)
|
||||||
pdf-parse:
|
pdf-parse:
|
||||||
specifier: ^1.1.1
|
specifier: ^1.1.1
|
||||||
version: 1.1.1
|
version: 1.1.1
|
||||||
@ -223,7 +229,7 @@ importers:
|
|||||||
version: 6.20.1
|
version: 6.20.1
|
||||||
unstructured-client:
|
unstructured-client:
|
||||||
specifier: ^0.11.3
|
specifier: ^0.11.3
|
||||||
version: 0.11.3(zod@3.23.8)
|
version: 0.11.3(zod@3.24.2)
|
||||||
uuid:
|
uuid:
|
||||||
specifier: ^10.0.0
|
specifier: ^10.0.0
|
||||||
version: 10.0.0
|
version: 10.0.0
|
||||||
@ -243,11 +249,8 @@ importers:
|
|||||||
specifier: ^0.6.2
|
specifier: ^0.6.2
|
||||||
version: 0.6.2
|
version: 0.6.2
|
||||||
zod:
|
zod:
|
||||||
specifier: ^3.23.8
|
specifier: ^3.24.2
|
||||||
version: 3.23.8
|
version: 3.24.2
|
||||||
zod-to-json-schema:
|
|
||||||
specifier: ^3.23.1
|
|
||||||
version: 3.23.1(zod@3.23.8)
|
|
||||||
devDependencies:
|
devDependencies:
|
||||||
'@flydotio/dockerfile':
|
'@flydotio/dockerfile':
|
||||||
specifier: ^0.4.10
|
specifier: ^0.4.10
|
||||||
@ -324,6 +327,46 @@ importers:
|
|||||||
|
|
||||||
packages:
|
packages:
|
||||||
|
|
||||||
|
'@ai-sdk/openai@1.1.13':
|
||||||
|
resolution: {integrity: sha512-IdChK1pJTW3NQis02PG/hHTG0gZSyQIMOLPt7f7ES56C0xH2yaKOU1Tp2aib7pZzWGwDlzTOW2h5TtAB8+V6CQ==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.0.0
|
||||||
|
|
||||||
|
'@ai-sdk/provider-utils@2.1.9':
|
||||||
|
resolution: {integrity: sha512-NerKjTuuUUs6glJGaentaXEBH52jRM0pR+cRCzc7aWke/K5jYBD6Frv1JYBpcxS7gnnCqSQZR9woiyS+6jrdjw==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.0.0
|
||||||
|
peerDependenciesMeta:
|
||||||
|
zod:
|
||||||
|
optional: true
|
||||||
|
|
||||||
|
'@ai-sdk/provider@1.0.8':
|
||||||
|
resolution: {integrity: sha512-f9jSYwKMdXvm44Dmab1vUBnfCDSFfI5rOtvV1W9oKB7WYHR5dGvCC6x68Mk3NUfrdmNoMVHGoh6JT9HCVMlMow==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
|
'@ai-sdk/react@1.1.17':
|
||||||
|
resolution: {integrity: sha512-NAuEflFvjw1uh1AOmpyi7rBF4xasWsiWUb86JQ8ScjDGxoGDYEdBnaHOxUpooLna0dGNbSPkvDMnVRhoLKoxPQ==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
react: ^18 || ^19 || ^19.0.0-rc
|
||||||
|
zod: ^3.0.0
|
||||||
|
peerDependenciesMeta:
|
||||||
|
react:
|
||||||
|
optional: true
|
||||||
|
zod:
|
||||||
|
optional: true
|
||||||
|
|
||||||
|
'@ai-sdk/ui-utils@1.1.15':
|
||||||
|
resolution: {integrity: sha512-NsV/3CMmjc4m53snzRdtZM6teTQUXIKi8u0Kf7GBruSzaMSuZ4DWaAAlUshhR3p2FpZgtsogW+vYG1/rXsGu+Q==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.0.0
|
||||||
|
peerDependenciesMeta:
|
||||||
|
zod:
|
||||||
|
optional: true
|
||||||
|
|
||||||
'@ampproject/remapping@2.3.0':
|
'@ampproject/remapping@2.3.0':
|
||||||
resolution: {integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==}
|
resolution: {integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==}
|
||||||
engines: {node: '>=6.0.0'}
|
engines: {node: '>=6.0.0'}
|
||||||
@ -1578,6 +1621,9 @@ packages:
|
|||||||
'@types/cors@2.8.17':
|
'@types/cors@2.8.17':
|
||||||
resolution: {integrity: sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA==}
|
resolution: {integrity: sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA==}
|
||||||
|
|
||||||
|
'@types/diff-match-patch@1.0.36':
|
||||||
|
resolution: {integrity: sha512-xFdR6tkm0MWvBfO8xXCSsinYxHcqkQUlcHeSpMC2ukzOb6lwQAfDmW+Qt0AvlGd8HpsS28qKsB+oPeJn9I39jg==}
|
||||||
|
|
||||||
'@types/escape-html@1.0.4':
|
'@types/escape-html@1.0.4':
|
||||||
resolution: {integrity: sha512-qZ72SFTgUAZ5a7Tj6kf2SHLetiH5S6f8G5frB2SPQ3EyF02kxdyBFf4Tz4banE3xCgGnKgWLt//a6VuYHKYJTg==}
|
resolution: {integrity: sha512-qZ72SFTgUAZ5a7Tj6kf2SHLetiH5S6f8G5frB2SPQ3EyF02kxdyBFf4Tz4banE3xCgGnKgWLt//a6VuYHKYJTg==}
|
||||||
|
|
||||||
@ -1757,6 +1803,18 @@ packages:
|
|||||||
resolution: {integrity: sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==}
|
resolution: {integrity: sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==}
|
||||||
engines: {node: '>= 8.0.0'}
|
engines: {node: '>= 8.0.0'}
|
||||||
|
|
||||||
|
ai@4.1.45:
|
||||||
|
resolution: {integrity: sha512-nQkxQ2zCD+O/h8zJ+PxmBv9coyMaG1uP9kGJvhNaGAA25hbZRQWL0NbTsSJ/QMOUraXKLa+6fBm3VF1NkJK9Kg==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
react: ^18 || ^19 || ^19.0.0-rc
|
||||||
|
zod: ^3.0.0
|
||||||
|
peerDependenciesMeta:
|
||||||
|
react:
|
||||||
|
optional: true
|
||||||
|
zod:
|
||||||
|
optional: true
|
||||||
|
|
||||||
ajv@8.16.0:
|
ajv@8.16.0:
|
||||||
resolution: {integrity: sha512-F0twR8U1ZU67JIEtekUcLkXkoO5mMMmgGD8sK/xUFzJ805jxHQl92hImFAqqXMyMYjSPOyUPAwHYhB72g5sTXw==}
|
resolution: {integrity: sha512-F0twR8U1ZU67JIEtekUcLkXkoO5mMMmgGD8sK/xUFzJ805jxHQl92hImFAqqXMyMYjSPOyUPAwHYhB72g5sTXw==}
|
||||||
|
|
||||||
@ -2278,6 +2336,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==}
|
resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==}
|
||||||
engines: {node: '>= 0.8'}
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
dequal@2.0.3:
|
||||||
|
resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==}
|
||||||
|
engines: {node: '>=6'}
|
||||||
|
|
||||||
destroy@1.2.0:
|
destroy@1.2.0:
|
||||||
resolution: {integrity: sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==}
|
resolution: {integrity: sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==}
|
||||||
engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
|
engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
|
||||||
@ -2296,6 +2358,9 @@ packages:
|
|||||||
dezalgo@1.0.4:
|
dezalgo@1.0.4:
|
||||||
resolution: {integrity: sha512-rXSP0bf+5n0Qonsb+SVVfNfIsimO4HEtmnIpPHY8Q1UCzKlQrDMfdobr8nJOOsRgWCyMRqeSBQzmWUMq7zvVig==}
|
resolution: {integrity: sha512-rXSP0bf+5n0Qonsb+SVVfNfIsimO4HEtmnIpPHY8Q1UCzKlQrDMfdobr8nJOOsRgWCyMRqeSBQzmWUMq7zvVig==}
|
||||||
|
|
||||||
|
diff-match-patch@1.0.5:
|
||||||
|
resolution: {integrity: sha512-IayShXAgj/QMXgB0IWmKx+rOPuGMhqm5w6jvFxmVenXKIzRqTAAsbBPT3kWQeGANj3jGgvcvv4yK6SxqYmikgw==}
|
||||||
|
|
||||||
diff-sequences@29.6.3:
|
diff-sequences@29.6.3:
|
||||||
resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
|
resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
|
||||||
engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
|
engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
|
||||||
@ -2448,6 +2513,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==}
|
resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==}
|
||||||
engines: {node: '>=0.8.x'}
|
engines: {node: '>=0.8.x'}
|
||||||
|
|
||||||
|
eventsource-parser@3.0.0:
|
||||||
|
resolution: {integrity: sha512-T1C0XCUimhxVQzW4zFipdx0SficT651NnkR0ZSH3yQwh+mFMdLfgjABVi4YtMTtaL4s168593DaoaRLMqryavA==}
|
||||||
|
engines: {node: '>=18.0.0'}
|
||||||
|
|
||||||
execa@5.1.1:
|
execa@5.1.1:
|
||||||
resolution: {integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==}
|
resolution: {integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==}
|
||||||
engines: {node: '>=10'}
|
engines: {node: '>=10'}
|
||||||
@ -3100,11 +3169,19 @@ packages:
|
|||||||
json-schema-traverse@1.0.0:
|
json-schema-traverse@1.0.0:
|
||||||
resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==}
|
resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==}
|
||||||
|
|
||||||
|
json-schema@0.4.0:
|
||||||
|
resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==}
|
||||||
|
|
||||||
json5@2.2.3:
|
json5@2.2.3:
|
||||||
resolution: {integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==}
|
resolution: {integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==}
|
||||||
engines: {node: '>=6'}
|
engines: {node: '>=6'}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
|
jsondiffpatch@0.6.0:
|
||||||
|
resolution: {integrity: sha512-3QItJOXp2AP1uv7waBkao5nCvhEv+QmJAd38Ybq7wNI74Q+BBmnLn4EDKz6yI9xGAIQoUF87qHt+kc1IVxB4zQ==}
|
||||||
|
engines: {node: ^18.0.0 || >=20.0.0}
|
||||||
|
hasBin: true
|
||||||
|
|
||||||
jsonfile@6.1.0:
|
jsonfile@6.1.0:
|
||||||
resolution: {integrity: sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==}
|
resolution: {integrity: sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==}
|
||||||
|
|
||||||
@ -3580,6 +3657,11 @@ packages:
|
|||||||
resolution: {integrity: sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==}
|
resolution: {integrity: sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
|
nanoid@3.3.8:
|
||||||
|
resolution: {integrity: sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==}
|
||||||
|
engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1}
|
||||||
|
hasBin: true
|
||||||
|
|
||||||
natural-compare@1.4.0:
|
natural-compare@1.4.0:
|
||||||
resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==}
|
resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==}
|
||||||
|
|
||||||
@ -3674,6 +3756,15 @@ packages:
|
|||||||
object-inspect@1.13.1:
|
object-inspect@1.13.1:
|
||||||
resolution: {integrity: sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==}
|
resolution: {integrity: sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==}
|
||||||
|
|
||||||
|
ollama-ai-provider@1.2.0:
|
||||||
|
resolution: {integrity: sha512-jTNFruwe3O/ruJeppI/quoOUxG7NA6blG3ZyQj3lei4+NnJo7bi3eIRWqlVpRlu/mbzbFXeJSBuYQWF6pzGKww==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.0.0
|
||||||
|
peerDependenciesMeta:
|
||||||
|
zod:
|
||||||
|
optional: true
|
||||||
|
|
||||||
on-finished@2.4.1:
|
on-finished@2.4.1:
|
||||||
resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
|
resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
|
||||||
engines: {node: '>= 0.8'}
|
engines: {node: '>= 0.8'}
|
||||||
@ -3786,6 +3877,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
|
resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
|
||||||
engines: {node: '>= 0.8'}
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
partial-json@0.1.7:
|
||||||
|
resolution: {integrity: sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA==}
|
||||||
|
|
||||||
path-exists@4.0.0:
|
path-exists@4.0.0:
|
||||||
resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==}
|
resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
@ -4132,6 +4226,9 @@ packages:
|
|||||||
secure-compare@3.0.1:
|
secure-compare@3.0.1:
|
||||||
resolution: {integrity: sha512-AckIIV90rPDcBcglUwXPF3kg0P0qmPsPXAj6BBEENQE1p5yA1xfmDJzfi1Tappj37Pv2mVbKpL3Z1T+Nn7k1Qw==}
|
resolution: {integrity: sha512-AckIIV90rPDcBcglUwXPF3kg0P0qmPsPXAj6BBEENQE1p5yA1xfmDJzfi1Tappj37Pv2mVbKpL3Z1T+Nn7k1Qw==}
|
||||||
|
|
||||||
|
secure-json-parse@2.7.0:
|
||||||
|
resolution: {integrity: sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==}
|
||||||
|
|
||||||
selderee@0.11.0:
|
selderee@0.11.0:
|
||||||
resolution: {integrity: sha512-5TF+l7p4+OsnP8BCCvSyZiSPc4x4//p5uPwK8TCnVPJYRmU2aYKMpOXvw8zM5a5JvuuCGN1jmsMwuU2W02ukfA==}
|
resolution: {integrity: sha512-5TF+l7p4+OsnP8BCCvSyZiSPc4x4//p5uPwK8TCnVPJYRmU2aYKMpOXvw8zM5a5JvuuCGN1jmsMwuU2W02ukfA==}
|
||||||
|
|
||||||
@ -4345,6 +4442,11 @@ packages:
|
|||||||
resolution: {integrity: sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==}
|
resolution: {integrity: sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==}
|
||||||
engines: {node: '>= 0.4'}
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
swr@2.3.2:
|
||||||
|
resolution: {integrity: sha512-RosxFpiabojs75IwQ316DGoDRmOqtiAj0tg8wCcbEu4CiLZBs/a9QNtHV7TUfDXmmlgqij/NqzKq/eLelyv9xA==}
|
||||||
|
peerDependencies:
|
||||||
|
react: ^16.11.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
|
||||||
|
|
||||||
sylvester@0.0.12:
|
sylvester@0.0.12:
|
||||||
resolution: {integrity: sha512-SzRP5LQ6Ts2G5NyAa/jg16s8e3R7rfdFjizy1zeoecYWw+nGL+YA1xZvW/+iJmidBGSdLkuvdwTYEyJEb+EiUw==}
|
resolution: {integrity: sha512-SzRP5LQ6Ts2G5NyAa/jg16s8e3R7rfdFjizy1zeoecYWw+nGL+YA1xZvW/+iJmidBGSdLkuvdwTYEyJEb+EiUw==}
|
||||||
engines: {node: '>=0.2.6'}
|
engines: {node: '>=0.2.6'}
|
||||||
@ -4378,6 +4480,10 @@ packages:
|
|||||||
text-hex@1.0.0:
|
text-hex@1.0.0:
|
||||||
resolution: {integrity: sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==}
|
resolution: {integrity: sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==}
|
||||||
|
|
||||||
|
throttleit@2.1.0:
|
||||||
|
resolution: {integrity: sha512-nt6AMGKW1p/70DF/hGBdJB57B8Tspmbp5gfJ8ilhLnt7kkr2ye7hzD6NVG8GGErk2HWF34igrL2CXmNIkzKqKw==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
through@2.3.8:
|
through@2.3.8:
|
||||||
resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==}
|
resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==}
|
||||||
|
|
||||||
@ -4557,6 +4663,11 @@ packages:
|
|||||||
urlpattern-polyfill@10.0.0:
|
urlpattern-polyfill@10.0.0:
|
||||||
resolution: {integrity: sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==}
|
resolution: {integrity: sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==}
|
||||||
|
|
||||||
|
use-sync-external-store@1.4.0:
|
||||||
|
resolution: {integrity: sha512-9WXSPC5fMv61vaupRkCKCxsPxBocVnwakBEkMIHHpkTTg6icbJtg6jzgtLDm4bl3cSHAca52rYWih0k4K3PfHw==}
|
||||||
|
peerDependencies:
|
||||||
|
react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
|
||||||
|
|
||||||
util-deprecate@1.0.2:
|
util-deprecate@1.0.2:
|
||||||
resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==}
|
resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==}
|
||||||
|
|
||||||
@ -4769,11 +4880,56 @@ packages:
|
|||||||
peerDependencies:
|
peerDependencies:
|
||||||
zod: ^3.23.3
|
zod: ^3.23.3
|
||||||
|
|
||||||
|
zod-to-json-schema@3.24.2:
|
||||||
|
resolution: {integrity: sha512-pNUqrcSxuuB3/+jBbU8qKUbTbDqYUaG1vf5cXFjbhGgoUuA1amO/y4Q8lzfOhHU8HNPK6VFJ18lBDKj3OHyDsg==}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.24.1
|
||||||
|
|
||||||
zod@3.23.8:
|
zod@3.23.8:
|
||||||
resolution: {integrity: sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==}
|
resolution: {integrity: sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==}
|
||||||
|
|
||||||
|
zod@3.24.2:
|
||||||
|
resolution: {integrity: sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==}
|
||||||
|
|
||||||
snapshots:
|
snapshots:
|
||||||
|
|
||||||
|
'@ai-sdk/openai@1.1.13(zod@3.24.2)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 1.0.8
|
||||||
|
'@ai-sdk/provider-utils': 2.1.9(zod@3.24.2)
|
||||||
|
zod: 3.24.2
|
||||||
|
|
||||||
|
'@ai-sdk/provider-utils@2.1.9(zod@3.24.2)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 1.0.8
|
||||||
|
eventsource-parser: 3.0.0
|
||||||
|
nanoid: 3.3.8
|
||||||
|
secure-json-parse: 2.7.0
|
||||||
|
optionalDependencies:
|
||||||
|
zod: 3.24.2
|
||||||
|
|
||||||
|
'@ai-sdk/provider@1.0.8':
|
||||||
|
dependencies:
|
||||||
|
json-schema: 0.4.0
|
||||||
|
|
||||||
|
'@ai-sdk/react@1.1.17(react@18.3.1)(zod@3.24.2)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider-utils': 2.1.9(zod@3.24.2)
|
||||||
|
'@ai-sdk/ui-utils': 1.1.15(zod@3.24.2)
|
||||||
|
swr: 2.3.2(react@18.3.1)
|
||||||
|
throttleit: 2.1.0
|
||||||
|
optionalDependencies:
|
||||||
|
react: 18.3.1
|
||||||
|
zod: 3.24.2
|
||||||
|
|
||||||
|
'@ai-sdk/ui-utils@1.1.15(zod@3.24.2)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 1.0.8
|
||||||
|
'@ai-sdk/provider-utils': 2.1.9(zod@3.24.2)
|
||||||
|
zod-to-json-schema: 3.24.2(zod@3.24.2)
|
||||||
|
optionalDependencies:
|
||||||
|
zod: 3.24.2
|
||||||
|
|
||||||
'@ampproject/remapping@2.3.0':
|
'@ampproject/remapping@2.3.0':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@jridgewell/gen-mapping': 0.3.5
|
'@jridgewell/gen-mapping': 0.3.5
|
||||||
@ -5793,38 +5949,38 @@ snapshots:
|
|||||||
|
|
||||||
'@jsdevtools/ono@7.1.3': {}
|
'@jsdevtools/ono@7.1.3': {}
|
||||||
|
|
||||||
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))':
|
||||||
dependencies:
|
dependencies:
|
||||||
ansi-styles: 5.2.0
|
ansi-styles: 5.2.0
|
||||||
camelcase: 6.3.0
|
camelcase: 6.3.0
|
||||||
decamelize: 1.2.0
|
decamelize: 1.2.0
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
langsmith: 0.1.34(7lljbsleilzgkaubvlq4ipicvq)
|
langsmith: 0.1.34(7c31787ccbd7899ead3aa20aba61c53a)
|
||||||
ml-distance: 4.0.1
|
ml-distance: 4.0.1
|
||||||
mustache: 4.2.0
|
mustache: 4.2.0
|
||||||
p-queue: 6.6.2
|
p-queue: 6.6.2
|
||||||
p-retry: 4.6.2
|
p-retry: 4.6.2
|
||||||
uuid: 9.0.1
|
uuid: 9.0.1
|
||||||
zod: 3.23.8
|
zod: 3.24.2
|
||||||
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
zod-to-json-schema: 3.23.1(zod@3.24.2)
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- langchain
|
- langchain
|
||||||
- openai
|
- openai
|
||||||
|
|
||||||
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
openai: 4.57.0(encoding@0.1.13)(zod@3.24.2)
|
||||||
zod: 3.23.8
|
zod: 3.24.2
|
||||||
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
zod-to-json-schema: 3.23.1(zod@3.24.2)
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- encoding
|
- encoding
|
||||||
- langchain
|
- langchain
|
||||||
|
|
||||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- langchain
|
- langchain
|
||||||
@ -6778,6 +6934,8 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
'@types/node': 20.14.1
|
'@types/node': 20.14.1
|
||||||
|
|
||||||
|
'@types/diff-match-patch@1.0.36': {}
|
||||||
|
|
||||||
'@types/escape-html@1.0.4': {}
|
'@types/escape-html@1.0.4': {}
|
||||||
|
|
||||||
'@types/express-serve-static-core@4.19.3':
|
'@types/express-serve-static-core@4.19.3':
|
||||||
@ -6968,6 +7126,18 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
humanize-ms: 1.2.1
|
humanize-ms: 1.2.1
|
||||||
|
|
||||||
|
ai@4.1.45(react@18.3.1)(zod@3.24.2):
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 1.0.8
|
||||||
|
'@ai-sdk/provider-utils': 2.1.9(zod@3.24.2)
|
||||||
|
'@ai-sdk/react': 1.1.17(react@18.3.1)(zod@3.24.2)
|
||||||
|
'@ai-sdk/ui-utils': 1.1.15(zod@3.24.2)
|
||||||
|
'@opentelemetry/api': 1.9.0
|
||||||
|
jsondiffpatch: 0.6.0
|
||||||
|
optionalDependencies:
|
||||||
|
react: 18.3.1
|
||||||
|
zod: 3.24.2
|
||||||
|
|
||||||
ajv@8.16.0:
|
ajv@8.16.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
fast-deep-equal: 3.1.3
|
fast-deep-equal: 3.1.3
|
||||||
@ -7538,6 +7708,8 @@ snapshots:
|
|||||||
|
|
||||||
depd@2.0.0: {}
|
depd@2.0.0: {}
|
||||||
|
|
||||||
|
dequal@2.0.3: {}
|
||||||
|
|
||||||
destroy@1.2.0: {}
|
destroy@1.2.0: {}
|
||||||
|
|
||||||
detect-libc@2.0.3: {}
|
detect-libc@2.0.3: {}
|
||||||
@ -7551,6 +7723,8 @@ snapshots:
|
|||||||
asap: 2.0.6
|
asap: 2.0.6
|
||||||
wrappy: 1.0.2
|
wrappy: 1.0.2
|
||||||
|
|
||||||
|
diff-match-patch@1.0.5: {}
|
||||||
|
|
||||||
diff-sequences@29.6.3: {}
|
diff-sequences@29.6.3: {}
|
||||||
|
|
||||||
diff@4.0.2: {}
|
diff@4.0.2: {}
|
||||||
@ -7671,6 +7845,8 @@ snapshots:
|
|||||||
|
|
||||||
events@3.3.0: {}
|
events@3.3.0: {}
|
||||||
|
|
||||||
|
eventsource-parser@3.0.0: {}
|
||||||
|
|
||||||
execa@5.1.1:
|
execa@5.1.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
cross-spawn: 7.0.3
|
cross-spawn: 7.0.3
|
||||||
@ -8620,8 +8796,16 @@ snapshots:
|
|||||||
|
|
||||||
json-schema-traverse@1.0.0: {}
|
json-schema-traverse@1.0.0: {}
|
||||||
|
|
||||||
|
json-schema@0.4.0: {}
|
||||||
|
|
||||||
json5@2.2.3: {}
|
json5@2.2.3: {}
|
||||||
|
|
||||||
|
jsondiffpatch@0.6.0:
|
||||||
|
dependencies:
|
||||||
|
'@types/diff-match-patch': 1.0.36
|
||||||
|
chalk: 5.3.0
|
||||||
|
diff-match-patch: 1.0.5
|
||||||
|
|
||||||
jsonfile@6.1.0:
|
jsonfile@6.1.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
universalify: 2.0.1
|
universalify: 2.0.1
|
||||||
@ -8647,24 +8831,24 @@ snapshots:
|
|||||||
|
|
||||||
kuler@2.0.0: {}
|
kuler@2.0.0: {}
|
||||||
|
|
||||||
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))
|
||||||
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))
|
||||||
binary-extensions: 2.3.0
|
binary-extensions: 2.3.0
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
js-yaml: 4.1.0
|
js-yaml: 4.1.0
|
||||||
jsonpointer: 5.0.1
|
jsonpointer: 5.0.1
|
||||||
langchainhub: 0.0.11
|
langchainhub: 0.0.11
|
||||||
langsmith: 0.1.34(7lljbsleilzgkaubvlq4ipicvq)
|
langsmith: 0.1.34(7c31787ccbd7899ead3aa20aba61c53a)
|
||||||
ml-distance: 4.0.1
|
ml-distance: 4.0.1
|
||||||
openapi-types: 12.1.3
|
openapi-types: 12.1.3
|
||||||
p-retry: 4.6.2
|
p-retry: 4.6.2
|
||||||
uuid: 9.0.1
|
uuid: 9.0.1
|
||||||
yaml: 2.4.5
|
yaml: 2.4.5
|
||||||
zod: 3.23.8
|
zod: 3.24.2
|
||||||
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
zod-to-json-schema: 3.23.1(zod@3.24.2)
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
'@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)
|
'@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)
|
||||||
'@pinecone-database/pinecone': 4.0.0
|
'@pinecone-database/pinecone': 4.0.0
|
||||||
@ -8688,7 +8872,7 @@ snapshots:
|
|||||||
|
|
||||||
langchainhub@0.0.11: {}
|
langchainhub@0.0.11: {}
|
||||||
|
|
||||||
langsmith@0.1.34(7lljbsleilzgkaubvlq4ipicvq):
|
langsmith@0.1.34(7c31787ccbd7899ead3aa20aba61c53a):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@types/uuid': 9.0.8
|
'@types/uuid': 9.0.8
|
||||||
commander: 10.0.1
|
commander: 10.0.1
|
||||||
@ -8697,9 +8881,9 @@ snapshots:
|
|||||||
p-retry: 4.6.2
|
p-retry: 4.6.2
|
||||||
uuid: 9.0.1
|
uuid: 9.0.1
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))
|
||||||
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.24.2))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||||
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
openai: 4.57.0(encoding@0.1.13)(zod@3.24.2)
|
||||||
|
|
||||||
languagedetect@2.0.0: {}
|
languagedetect@2.0.0: {}
|
||||||
|
|
||||||
@ -8964,6 +9148,8 @@ snapshots:
|
|||||||
|
|
||||||
mustache@4.2.0: {}
|
mustache@4.2.0: {}
|
||||||
|
|
||||||
|
nanoid@3.3.8: {}
|
||||||
|
|
||||||
natural-compare@1.4.0: {}
|
natural-compare@1.4.0: {}
|
||||||
|
|
||||||
natural@7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3):
|
natural@7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3):
|
||||||
@ -9070,6 +9256,14 @@ snapshots:
|
|||||||
|
|
||||||
object-inspect@1.13.1: {}
|
object-inspect@1.13.1: {}
|
||||||
|
|
||||||
|
ollama-ai-provider@1.2.0(zod@3.24.2):
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 1.0.8
|
||||||
|
'@ai-sdk/provider-utils': 2.1.9(zod@3.24.2)
|
||||||
|
partial-json: 0.1.7
|
||||||
|
optionalDependencies:
|
||||||
|
zod: 3.24.2
|
||||||
|
|
||||||
on-finished@2.4.1:
|
on-finished@2.4.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
ee-first: 1.1.1
|
ee-first: 1.1.1
|
||||||
@ -9093,7 +9287,7 @@ snapshots:
|
|||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- debug
|
- debug
|
||||||
|
|
||||||
openai@4.57.0(encoding@0.1.13)(zod@3.23.8):
|
openai@4.57.0(encoding@0.1.13)(zod@3.24.2):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@types/node': 18.19.39
|
'@types/node': 18.19.39
|
||||||
'@types/node-fetch': 2.6.11
|
'@types/node-fetch': 2.6.11
|
||||||
@ -9105,7 +9299,7 @@ snapshots:
|
|||||||
node-fetch: 2.7.0(encoding@0.1.13)
|
node-fetch: 2.7.0(encoding@0.1.13)
|
||||||
qs: 6.12.2
|
qs: 6.12.2
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
zod: 3.23.8
|
zod: 3.24.2
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- encoding
|
- encoding
|
||||||
|
|
||||||
@ -9207,6 +9401,8 @@ snapshots:
|
|||||||
|
|
||||||
parseurl@1.3.3: {}
|
parseurl@1.3.3: {}
|
||||||
|
|
||||||
|
partial-json@0.1.7: {}
|
||||||
|
|
||||||
path-exists@4.0.0: {}
|
path-exists@4.0.0: {}
|
||||||
|
|
||||||
path-is-absolute@1.0.1: {}
|
path-is-absolute@1.0.1: {}
|
||||||
@ -9338,7 +9534,7 @@ snapshots:
|
|||||||
openai: 3.3.0
|
openai: 3.3.0
|
||||||
typescript: 5.7.3
|
typescript: 5.7.3
|
||||||
uuid: 9.0.1
|
uuid: 9.0.1
|
||||||
zod: 3.23.8
|
zod: 3.24.2
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- debug
|
- debug
|
||||||
|
|
||||||
@ -9574,6 +9770,8 @@ snapshots:
|
|||||||
|
|
||||||
secure-compare@3.0.1: {}
|
secure-compare@3.0.1: {}
|
||||||
|
|
||||||
|
secure-json-parse@2.7.0: {}
|
||||||
|
|
||||||
selderee@0.11.0:
|
selderee@0.11.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
parseley: 0.12.1
|
parseley: 0.12.1
|
||||||
@ -9808,6 +10006,12 @@ snapshots:
|
|||||||
|
|
||||||
supports-preserve-symlinks-flag@1.0.0: {}
|
supports-preserve-symlinks-flag@1.0.0: {}
|
||||||
|
|
||||||
|
swr@2.3.2(react@18.3.1):
|
||||||
|
dependencies:
|
||||||
|
dequal: 2.0.3
|
||||||
|
react: 18.3.1
|
||||||
|
use-sync-external-store: 1.4.0(react@18.3.1)
|
||||||
|
|
||||||
sylvester@0.0.12: {}
|
sylvester@0.0.12: {}
|
||||||
|
|
||||||
symbol-tree@3.2.4: {}
|
symbol-tree@3.2.4: {}
|
||||||
@ -9849,6 +10053,8 @@ snapshots:
|
|||||||
|
|
||||||
text-hex@1.0.0: {}
|
text-hex@1.0.0: {}
|
||||||
|
|
||||||
|
throttleit@2.1.0: {}
|
||||||
|
|
||||||
through@2.3.8: {}
|
through@2.3.8: {}
|
||||||
|
|
||||||
tldts-core@6.1.75: {}
|
tldts-core@6.1.75: {}
|
||||||
@ -9976,11 +10182,11 @@ snapshots:
|
|||||||
|
|
||||||
unpipe@1.0.0: {}
|
unpipe@1.0.0: {}
|
||||||
|
|
||||||
unstructured-client@0.11.3(zod@3.23.8):
|
unstructured-client@0.11.3(zod@3.24.2):
|
||||||
dependencies:
|
dependencies:
|
||||||
async: 3.2.5
|
async: 3.2.5
|
||||||
pdf-lib: 1.17.1
|
pdf-lib: 1.17.1
|
||||||
zod: 3.23.8
|
zod: 3.24.2
|
||||||
|
|
||||||
update-browserslist-db@1.0.16(browserslist@4.23.0):
|
update-browserslist-db@1.0.16(browserslist@4.23.0):
|
||||||
dependencies:
|
dependencies:
|
||||||
@ -9996,6 +10202,10 @@ snapshots:
|
|||||||
|
|
||||||
urlpattern-polyfill@10.0.0: {}
|
urlpattern-polyfill@10.0.0: {}
|
||||||
|
|
||||||
|
use-sync-external-store@1.4.0(react@18.3.1):
|
||||||
|
dependencies:
|
||||||
|
react: 18.3.1
|
||||||
|
|
||||||
util-deprecate@1.0.2: {}
|
util-deprecate@1.0.2: {}
|
||||||
|
|
||||||
utils-merge@1.0.1: {}
|
utils-merge@1.0.1: {}
|
||||||
@ -10173,8 +10383,14 @@ snapshots:
|
|||||||
|
|
||||||
yocto-queue@0.1.0: {}
|
yocto-queue@0.1.0: {}
|
||||||
|
|
||||||
zod-to-json-schema@3.23.1(zod@3.23.8):
|
zod-to-json-schema@3.23.1(zod@3.24.2):
|
||||||
dependencies:
|
dependencies:
|
||||||
zod: 3.23.8
|
zod: 3.24.2
|
||||||
|
|
||||||
|
zod-to-json-schema@3.24.2(zod@3.24.2):
|
||||||
|
dependencies:
|
||||||
|
zod: 3.24.2
|
||||||
|
|
||||||
zod@3.23.8: {}
|
zod@3.23.8: {}
|
||||||
|
|
||||||
|
zod@3.24.2: {}
|
||||||
|
@ -51,7 +51,7 @@ function expectExtractToSucceed(response: Awaited<ReturnType<typeof extractStatu
|
|||||||
}
|
}
|
||||||
|
|
||||||
describe("Extract tests", () => {
|
describe("Extract tests", () => {
|
||||||
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY) {
|
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY || process.env.OLLAMA_BASE_URL) {
|
||||||
it.concurrent("works", async () => {
|
it.concurrent("works", async () => {
|
||||||
const res = await extract({
|
const res = await extract({
|
||||||
urls: ["https://firecrawl.dev"],
|
urls: ["https://firecrawl.dev"],
|
||||||
|
@ -180,7 +180,7 @@ describe("Scrape tests", () => {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY) {
|
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY || process.env.OLLAMA_BASE_URL) {
|
||||||
describe("JSON format", () => {
|
describe("JSON format", () => {
|
||||||
it.concurrent("works", async () => {
|
it.concurrent("works", async () => {
|
||||||
const response = await scrape({
|
const response = await scrape({
|
||||||
|
@ -65,6 +65,7 @@ export const extractOptions = z
|
|||||||
"Based on the information on the page, extract all the information from the schema in JSON format. Try to extract all the fields even those that might not be marked as required.",
|
"Based on the information on the page, extract all the information from the schema in JSON format. Try to extract all the fields even those that might not be marked as required.",
|
||||||
),
|
),
|
||||||
prompt: z.string().max(10000).optional(),
|
prompt: z.string().max(10000).optional(),
|
||||||
|
temperature: z.number().optional(),
|
||||||
})
|
})
|
||||||
.strict(strictMessage);
|
.strict(strictMessage);
|
||||||
|
|
||||||
@ -244,10 +245,11 @@ const extractRefine = (obj) => {
|
|||||||
const hasJsonFormat = obj.formats?.includes("json");
|
const hasJsonFormat = obj.formats?.includes("json");
|
||||||
const hasJsonOptions = obj.jsonOptions !== undefined;
|
const hasJsonOptions = obj.jsonOptions !== undefined;
|
||||||
return (
|
return (
|
||||||
(hasExtractFormat && hasExtractOptions) ||
|
(hasExtractFormat && hasExtractOptions)
|
||||||
(!hasExtractFormat && !hasExtractOptions) ||
|
|| (!hasExtractFormat && !hasExtractOptions)
|
||||||
(hasJsonFormat && hasJsonOptions) ||
|
) && (
|
||||||
(!hasJsonFormat && !hasJsonOptions)
|
(hasJsonFormat && hasJsonOptions)
|
||||||
|
|| (!hasJsonFormat && !hasJsonOptions)
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
const extractRefineOpts = {
|
const extractRefineOpts = {
|
||||||
@ -261,7 +263,7 @@ const extractTransform = (obj) => {
|
|||||||
obj.extract ||
|
obj.extract ||
|
||||||
obj.formats?.includes("json") ||
|
obj.formats?.includes("json") ||
|
||||||
obj.jsonOptions) &&
|
obj.jsonOptions) &&
|
||||||
!obj.timeout
|
(obj.timeout === 30000)
|
||||||
) {
|
) {
|
||||||
obj = { ...obj, timeout: 60000 };
|
obj = { ...obj, timeout: 60000 };
|
||||||
}
|
}
|
||||||
|
@ -1,78 +0,0 @@
|
|||||||
import OpenAI from "openai";
|
|
||||||
import Ajv from "ajv";
|
|
||||||
const ajv = new Ajv(); // Initialize AJV for JSON schema validation
|
|
||||||
|
|
||||||
import { generateOpenAICompletions } from "./models";
|
|
||||||
import { Document, ExtractorOptions } from "../entities";
|
|
||||||
import { logger } from "../logger";
|
|
||||||
|
|
||||||
// Generate completion using OpenAI
|
|
||||||
export async function generateCompletions(
|
|
||||||
documents: Document[],
|
|
||||||
extractionOptions: ExtractorOptions | undefined,
|
|
||||||
mode: "markdown" | "raw-html",
|
|
||||||
): Promise<Document[]> {
|
|
||||||
// const schema = zodToJsonSchema(options.schema)
|
|
||||||
|
|
||||||
const schema = extractionOptions?.extractionSchema;
|
|
||||||
const systemPrompt = extractionOptions?.extractionPrompt;
|
|
||||||
const prompt = extractionOptions?.userPrompt;
|
|
||||||
|
|
||||||
const switchVariable = "openAI"; // Placholder, want to think more about how we abstract the model provider
|
|
||||||
|
|
||||||
const completions = await Promise.all(
|
|
||||||
documents.map(async (document: Document) => {
|
|
||||||
switch (switchVariable) {
|
|
||||||
case "openAI":
|
|
||||||
const llm = new OpenAI();
|
|
||||||
try {
|
|
||||||
const completionResult = await generateOpenAICompletions({
|
|
||||||
client: llm,
|
|
||||||
document: document,
|
|
||||||
schema: schema,
|
|
||||||
prompt: prompt,
|
|
||||||
systemPrompt: systemPrompt,
|
|
||||||
mode: mode,
|
|
||||||
});
|
|
||||||
// Validate the JSON output against the schema using AJV
|
|
||||||
if (schema) {
|
|
||||||
const validate = ajv.compile(schema);
|
|
||||||
if (!validate(completionResult.llm_extraction)) {
|
|
||||||
//TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc.
|
|
||||||
throw new Error(
|
|
||||||
`JSON parsing error(s): ${validate.errors
|
|
||||||
?.map((err) => err.message)
|
|
||||||
.join(
|
|
||||||
", ",
|
|
||||||
)}\n\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return completionResult;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Error generating completions: ${error}`);
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw new Error("Invalid client");
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
return completions;
|
|
||||||
}
|
|
||||||
|
|
||||||
// generate basic completion
|
|
||||||
|
|
||||||
export async function generateBasicCompletion(prompt: string) {
|
|
||||||
const openai = new OpenAI();
|
|
||||||
const model = process.env.MODEL_NAME || "gpt-4o";
|
|
||||||
|
|
||||||
const completion = await openai.chat.completions.create({
|
|
||||||
temperature: 0,
|
|
||||||
model,
|
|
||||||
messages: [{ role: "user", content: prompt }],
|
|
||||||
});
|
|
||||||
return completion.choices[0].message.content;
|
|
||||||
}
|
|
@ -1,145 +0,0 @@
|
|||||||
import OpenAI from "openai";
|
|
||||||
import { Document } from "../../lib/entities";
|
|
||||||
import { numTokensFromString } from "./helpers";
|
|
||||||
|
|
||||||
export type ScraperCompletionResult = {
|
|
||||||
data: any | null;
|
|
||||||
url: string;
|
|
||||||
};
|
|
||||||
|
|
||||||
const maxTokens = 32000;
|
|
||||||
const modifier = 4;
|
|
||||||
const defaultPrompt =
|
|
||||||
"You are a professional web scraper. Extract the contents of the webpage";
|
|
||||||
|
|
||||||
function prepareOpenAIDoc(
|
|
||||||
document: Document,
|
|
||||||
mode: "markdown" | "raw-html",
|
|
||||||
): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null {
|
|
||||||
let markdown = document.markdown;
|
|
||||||
|
|
||||||
let extractionTarget = document.markdown;
|
|
||||||
|
|
||||||
if (mode === "raw-html") {
|
|
||||||
extractionTarget = document.rawHtml;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if the markdown content exists in the document
|
|
||||||
if (!extractionTarget) {
|
|
||||||
return null;
|
|
||||||
// throw new Error(
|
|
||||||
// `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai`
|
|
||||||
// );
|
|
||||||
}
|
|
||||||
|
|
||||||
// count number of tokens
|
|
||||||
const numTokens = numTokensFromString(extractionTarget, "gpt-4");
|
|
||||||
|
|
||||||
if (numTokens > maxTokens) {
|
|
||||||
// trim the document to the maximum number of tokens, tokens != characters
|
|
||||||
extractionTarget = extractionTarget.slice(0, maxTokens * modifier);
|
|
||||||
}
|
|
||||||
return [[{ type: "text", text: extractionTarget }], numTokens];
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function generateOpenAICompletions({
|
|
||||||
client,
|
|
||||||
model = process.env.MODEL_NAME || "gpt-4o-mini",
|
|
||||||
document,
|
|
||||||
schema, //TODO - add zod dynamic type checking
|
|
||||||
systemPrompt = defaultPrompt,
|
|
||||||
prompt,
|
|
||||||
temperature,
|
|
||||||
mode,
|
|
||||||
}: {
|
|
||||||
client: OpenAI;
|
|
||||||
model?: string;
|
|
||||||
document: Document;
|
|
||||||
schema: any; // This should be replaced with a proper Zod schema type when available
|
|
||||||
prompt?: string;
|
|
||||||
systemPrompt?: string;
|
|
||||||
temperature?: number;
|
|
||||||
mode: "markdown" | "raw-html";
|
|
||||||
}): Promise<Document> {
|
|
||||||
const openai = client as OpenAI;
|
|
||||||
const preparedDoc = prepareOpenAIDoc(document, mode);
|
|
||||||
|
|
||||||
if (preparedDoc === null) {
|
|
||||||
return {
|
|
||||||
...document,
|
|
||||||
warning:
|
|
||||||
"LLM extraction was not performed since the document's content is empty or missing.",
|
|
||||||
};
|
|
||||||
}
|
|
||||||
const [content, numTokens] = preparedDoc;
|
|
||||||
|
|
||||||
let completion;
|
|
||||||
let llmExtraction;
|
|
||||||
if (prompt && !schema) {
|
|
||||||
const jsonCompletion = await openai.chat.completions.create({
|
|
||||||
model,
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: systemPrompt,
|
|
||||||
},
|
|
||||||
{ role: "user", content },
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: `Transform the above content into structured json output based on the following user request: ${prompt}`,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
response_format: { type: "json_object" },
|
|
||||||
temperature,
|
|
||||||
});
|
|
||||||
|
|
||||||
try {
|
|
||||||
llmExtraction = JSON.parse(
|
|
||||||
(jsonCompletion.choices[0].message.content ?? "").trim(),
|
|
||||||
);
|
|
||||||
} catch (e) {
|
|
||||||
throw new Error("Invalid JSON");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
completion = await openai.chat.completions.create({
|
|
||||||
model,
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: systemPrompt,
|
|
||||||
},
|
|
||||||
{ role: "user", content },
|
|
||||||
],
|
|
||||||
tools: [
|
|
||||||
{
|
|
||||||
type: "function",
|
|
||||||
function: {
|
|
||||||
name: "extract_content",
|
|
||||||
description: "Extracts the content from the given webpage(s)",
|
|
||||||
parameters: schema,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
],
|
|
||||||
tool_choice: { type: "function", function: { name: "extract_content" } },
|
|
||||||
temperature,
|
|
||||||
});
|
|
||||||
const c = completion.choices[0].message.tool_calls[0].function.arguments;
|
|
||||||
|
|
||||||
// Extract the LLM extraction content from the completion response
|
|
||||||
try {
|
|
||||||
llmExtraction = JSON.parse(c);
|
|
||||||
} catch (e) {
|
|
||||||
throw new Error("Invalid JSON");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the document with the LLM extraction content added
|
|
||||||
return {
|
|
||||||
...document,
|
|
||||||
llm_extraction: llmExtraction,
|
|
||||||
warning:
|
|
||||||
numTokens > maxTokens
|
|
||||||
? `Page was trimmed to fit the maximum token limit defined by the LLM model (Max: ${maxTokens} tokens, Attemped: ${numTokens} tokens). If results are not good, email us at help@mendable.ai so we can help you.`
|
|
||||||
: undefined,
|
|
||||||
};
|
|
||||||
}
|
|
@ -5,7 +5,7 @@ import {
|
|||||||
DeepResearchSource,
|
DeepResearchSource,
|
||||||
updateDeepResearch,
|
updateDeepResearch,
|
||||||
} from "./deep-research-redis";
|
} from "./deep-research-redis";
|
||||||
import { generateOpenAICompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
|
import { generateCompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
import { truncateText } from "../../scraper/scrapeURL/transformers/llmExtract";
|
import { truncateText } from "../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
|
|
||||||
interface AnalysisResult {
|
interface AnalysisResult {
|
||||||
@ -146,11 +146,11 @@ export class ResearchLLMService {
|
|||||||
topic: string,
|
topic: string,
|
||||||
findings: DeepResearchFinding[] = [],
|
findings: DeepResearchFinding[] = [],
|
||||||
): Promise<{ query: string; researchGoal: string }[]> {
|
): Promise<{ query: string; researchGoal: string }[]> {
|
||||||
const { extract } = await generateOpenAICompletions(
|
const { extract } = await generateCompletions({
|
||||||
this.logger.child({
|
logger: this.logger.child({
|
||||||
method: "generateSearchQueries",
|
method: "generateSearchQueries",
|
||||||
}),
|
}),
|
||||||
{
|
options: {
|
||||||
mode: "llm",
|
mode: "llm",
|
||||||
systemPrompt:
|
systemPrompt:
|
||||||
"You are an expert research agent that generates search queries (SERP) to explore topics deeply and thoroughly. Do not generate repeated queries. Today's date is " +
|
"You are an expert research agent that generates search queries (SERP) to explore topics deeply and thoroughly. Do not generate repeated queries. Today's date is " +
|
||||||
@ -186,10 +186,8 @@ export class ResearchLLMService {
|
|||||||
Every search query is a new SERP query so make sure the whole context is added without overwhelming the search engine.
|
Every search query is a new SERP query so make sure the whole context is added without overwhelming the search engine.
|
||||||
The first SERP query you generate should be a very concise, simple version of the topic. `,
|
The first SERP query you generate should be a very concise, simple version of the topic. `,
|
||||||
},
|
},
|
||||||
"",
|
markdown: ""
|
||||||
undefined,
|
});
|
||||||
true,
|
|
||||||
);
|
|
||||||
|
|
||||||
return extract.queries;
|
return extract.queries;
|
||||||
}
|
}
|
||||||
@ -203,11 +201,11 @@ export class ResearchLLMService {
|
|||||||
const timeRemainingMinutes =
|
const timeRemainingMinutes =
|
||||||
Math.round((timeRemaining / 1000 / 60) * 10) / 10;
|
Math.round((timeRemaining / 1000 / 60) * 10) / 10;
|
||||||
|
|
||||||
const { extract } = await generateOpenAICompletions(
|
const { extract } = await generateCompletions({
|
||||||
this.logger.child({
|
logger: this.logger.child({
|
||||||
method: "analyzeAndPlan",
|
method: "analyzeAndPlan",
|
||||||
}),
|
}),
|
||||||
{
|
options: {
|
||||||
mode: "llm",
|
mode: "llm",
|
||||||
systemPrompt:
|
systemPrompt:
|
||||||
"You are an expert research agent that is analyzing findings. Your goal is to synthesize information and identify gaps for further research. Today's date is " +
|
"You are an expert research agent that is analyzing findings. Your goal is to synthesize information and identify gaps for further research. Today's date is " +
|
||||||
@ -238,10 +236,8 @@ export class ResearchLLMService {
|
|||||||
120000,
|
120000,
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
"",
|
markdown: "",
|
||||||
undefined,
|
});
|
||||||
true,
|
|
||||||
);
|
|
||||||
|
|
||||||
return extract.analysis;
|
return extract.analysis;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -255,11 +251,11 @@ export class ResearchLLMService {
|
|||||||
findings: DeepResearchFinding[],
|
findings: DeepResearchFinding[],
|
||||||
summaries: string[],
|
summaries: string[],
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const { extract } = await generateOpenAICompletions(
|
const { extract } = await generateCompletions({
|
||||||
this.logger.child({
|
logger: this.logger.child({
|
||||||
method: "generateFinalAnalysis",
|
method: "generateFinalAnalysis",
|
||||||
}),
|
}),
|
||||||
{
|
options: {
|
||||||
mode: "llm",
|
mode: "llm",
|
||||||
systemPrompt:
|
systemPrompt:
|
||||||
"You are an expert research analyst who creates comprehensive, well-structured reports. Your reports are detailed, properly formatted in Markdown, and include clear sections with citations. Today's date is " +
|
"You are an expert research analyst who creates comprehensive, well-structured reports. Your reports are detailed, properly formatted in Markdown, and include clear sections with citations. Today's date is " +
|
||||||
@ -287,11 +283,8 @@ export class ResearchLLMService {
|
|||||||
100000,
|
100000,
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
"",
|
markdown: "",
|
||||||
undefined,
|
});
|
||||||
true,
|
|
||||||
"gpt-4o"
|
|
||||||
);
|
|
||||||
|
|
||||||
return extract.report;
|
return extract.report;
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,16 @@
|
|||||||
import { generateSchemaFromPrompt } from "../../../scraper/scrapeURL/transformers/llmExtract";
|
import {
|
||||||
|
generateCompletions,
|
||||||
|
generateSchemaFromPrompt,
|
||||||
|
} from "../../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
import { TokenUsage } from "../../../controllers/v1/types";
|
import { TokenUsage } from "../../../controllers/v1/types";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
import {
|
import {
|
||||||
buildAnalyzeSchemaPrompt,
|
buildAnalyzeSchemaPrompt,
|
||||||
buildAnalyzeSchemaUserPrompt,
|
buildAnalyzeSchemaUserPrompt,
|
||||||
} from "../build-prompts";
|
} from "../build-prompts";
|
||||||
import OpenAI from "openai";
|
|
||||||
import { logger } from "../../../lib/logger";
|
import { logger } from "../../../lib/logger";
|
||||||
|
import { jsonSchema } from "ai";
|
||||||
|
import { getModel } from "../../../lib/generic-ai";
|
||||||
|
|
||||||
export async function analyzeSchemaAndPrompt(
|
export async function analyzeSchemaAndPrompt(
|
||||||
urls: string[],
|
urls: string[],
|
||||||
@ -25,6 +29,8 @@ export async function analyzeSchemaAndPrompt(
|
|||||||
|
|
||||||
const schemaString = JSON.stringify(schema);
|
const schemaString = JSON.stringify(schema);
|
||||||
|
|
||||||
|
const model = getModel("gpt-4o");
|
||||||
|
|
||||||
const checkSchema = z
|
const checkSchema = z
|
||||||
.object({
|
.object({
|
||||||
isMultiEntity: z.boolean(),
|
isMultiEntity: z.boolean(),
|
||||||
@ -37,61 +43,28 @@ export async function analyzeSchemaAndPrompt(
|
|||||||
"isMultiEntity was true, but no multiEntityKeys",
|
"isMultiEntity was true, but no multiEntityKeys",
|
||||||
);
|
);
|
||||||
|
|
||||||
const model = process.env.MODEL_NAME || "gpt-4o";
|
try {
|
||||||
|
const { extract: result, totalUsage } = await generateCompletions({
|
||||||
const openai = new OpenAI();
|
logger,
|
||||||
const result = await openai.beta.chat.completions.parse({
|
options: {
|
||||||
|
mode: "llm",
|
||||||
|
schema: checkSchema,
|
||||||
|
prompt: buildAnalyzeSchemaUserPrompt(schemaString, prompt, urls),
|
||||||
|
systemPrompt: buildAnalyzeSchemaPrompt(),
|
||||||
|
},
|
||||||
|
markdown: "",
|
||||||
model,
|
model,
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: buildAnalyzeSchemaPrompt(),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: buildAnalyzeSchemaUserPrompt(schemaString, prompt, urls),
|
|
||||||
},
|
|
||||||
],
|
|
||||||
response_format: {
|
|
||||||
type: "json_schema",
|
|
||||||
json_schema: {
|
|
||||||
schema: {
|
|
||||||
type: "object",
|
|
||||||
properties: {
|
|
||||||
isMultiEntity: { type: "boolean" },
|
|
||||||
multiEntityKeys: { type: "array", items: { type: "string" } },
|
|
||||||
reasoning: { type: "string" },
|
|
||||||
keyIndicators: { type: "array", items: { type: "string" } },
|
|
||||||
},
|
|
||||||
required: [
|
|
||||||
"isMultiEntity",
|
|
||||||
"multiEntityKeys",
|
|
||||||
"reasoning",
|
|
||||||
"keyIndicators",
|
|
||||||
],
|
|
||||||
additionalProperties: false,
|
|
||||||
},
|
|
||||||
name: "checkSchema",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
|
|
||||||
const tokenUsage: TokenUsage = {
|
|
||||||
promptTokens: result.usage?.prompt_tokens ?? 0,
|
|
||||||
completionTokens: result.usage?.completion_tokens ?? 0,
|
|
||||||
totalTokens: result.usage?.total_tokens ?? 0,
|
|
||||||
model: model,
|
|
||||||
};
|
|
||||||
|
|
||||||
try {
|
|
||||||
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } =
|
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } =
|
||||||
checkSchema.parse(result.choices[0].message.parsed);
|
checkSchema.parse(result);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
isMultiEntity,
|
isMultiEntity,
|
||||||
multiEntityKeys,
|
multiEntityKeys,
|
||||||
reasoning,
|
reasoning,
|
||||||
keyIndicators,
|
keyIndicators,
|
||||||
tokenUsage,
|
tokenUsage: totalUsage,
|
||||||
};
|
};
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
logger.warn("(analyzeSchemaAndPrompt) Error parsing schema analysis", {
|
logger.warn("(analyzeSchemaAndPrompt) Error parsing schema analysis", {
|
||||||
@ -104,6 +77,11 @@ export async function analyzeSchemaAndPrompt(
|
|||||||
multiEntityKeys: [],
|
multiEntityKeys: [],
|
||||||
reasoning: "",
|
reasoning: "",
|
||||||
keyIndicators: [],
|
keyIndicators: [],
|
||||||
tokenUsage,
|
tokenUsage: {
|
||||||
|
promptTokens: 0,
|
||||||
|
completionTokens: 0,
|
||||||
|
totalTokens: 0,
|
||||||
|
model: model.modelId,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import { logger } from "../../../lib/logger";
|
import { logger } from "../../../lib/logger";
|
||||||
import { generateOpenAICompletions } from "../../../scraper/scrapeURL/transformers/llmExtract";
|
import { generateCompletions } from "../../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
import { buildDocument } from "../build-document";
|
import { buildDocument } from "../build-document";
|
||||||
import { ExtractResponse, TokenUsage } from "../../../controllers/v1/types";
|
import { ExtractResponse, TokenUsage } from "../../../controllers/v1/types";
|
||||||
import { Document } from "../../../controllers/v1/types";
|
import { Document } from "../../../controllers/v1/types";
|
||||||
@ -30,11 +30,11 @@ export async function batchExtractPromise(
|
|||||||
warning?: string;
|
warning?: string;
|
||||||
sources: string[];
|
sources: string[];
|
||||||
}> {
|
}> {
|
||||||
const completion = await generateOpenAICompletions(
|
const completion = await generateCompletions({
|
||||||
logger.child({
|
logger: logger.child({
|
||||||
method: "extractService/generateOpenAICompletions",
|
method: "extractService/generateCompletions",
|
||||||
}),
|
}),
|
||||||
{
|
options: {
|
||||||
mode: "llm",
|
mode: "llm",
|
||||||
systemPrompt: buildBatchExtractSystemPrompt(
|
systemPrompt: buildBatchExtractSystemPrompt(
|
||||||
systemPrompt,
|
systemPrompt,
|
||||||
@ -44,10 +44,9 @@ export async function batchExtractPromise(
|
|||||||
prompt: buildBatchExtractPrompt(prompt),
|
prompt: buildBatchExtractPrompt(prompt),
|
||||||
schema: multiEntitySchema,
|
schema: multiEntitySchema,
|
||||||
},
|
},
|
||||||
buildDocument(doc),
|
markdown: buildDocument(doc),
|
||||||
undefined,
|
isExtractEndpoint: true
|
||||||
true,
|
});
|
||||||
);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
extract: completion.extract,
|
extract: completion.extract,
|
||||||
|
@ -1,20 +1,21 @@
|
|||||||
import { logger } from "../../../lib/logger";
|
import { logger } from "../../../lib/logger";
|
||||||
import { buildDocument } from "../build-document";
|
import { buildDocument } from "../build-document";
|
||||||
import { Document, TokenUsage } from "../../../controllers/v1/types";
|
import { Document, TokenUsage } from "../../../controllers/v1/types";
|
||||||
import { generateOpenAICompletions } from "../../../scraper/scrapeURL/transformers/llmExtract";
|
import { generateCompletions } from "../../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
import {
|
import {
|
||||||
buildShouldExtractSystemPrompt,
|
buildShouldExtractSystemPrompt,
|
||||||
buildShouldExtractUserPrompt,
|
buildShouldExtractUserPrompt,
|
||||||
} from "../build-prompts";
|
} from "../build-prompts";
|
||||||
|
import { getModel } from "../../../lib/generic-ai";
|
||||||
|
|
||||||
export async function checkShouldExtract(
|
export async function checkShouldExtract(
|
||||||
prompt: string,
|
prompt: string,
|
||||||
multiEntitySchema: any,
|
multiEntitySchema: any,
|
||||||
doc: Document,
|
doc: Document,
|
||||||
): Promise<{ tokenUsage: TokenUsage; extract: boolean }> {
|
): Promise<{ tokenUsage: TokenUsage; extract: boolean }> {
|
||||||
const shouldExtractCheck = await generateOpenAICompletions(
|
const shouldExtractCheck = await generateCompletions({
|
||||||
logger.child({ method: "extractService/checkShouldExtract" }),
|
logger: logger.child({ method: "extractService/checkShouldExtract" }),
|
||||||
{
|
options: {
|
||||||
mode: "llm",
|
mode: "llm",
|
||||||
systemPrompt: buildShouldExtractSystemPrompt(),
|
systemPrompt: buildShouldExtractSystemPrompt(),
|
||||||
prompt: buildShouldExtractUserPrompt(prompt, multiEntitySchema),
|
prompt: buildShouldExtractUserPrompt(prompt, multiEntitySchema),
|
||||||
@ -28,10 +29,10 @@ export async function checkShouldExtract(
|
|||||||
required: ["extract"],
|
required: ["extract"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
buildDocument(doc),
|
markdown: buildDocument(doc),
|
||||||
undefined,
|
isExtractEndpoint: true,
|
||||||
true,
|
model: getModel("gpt-4o-mini"),
|
||||||
);
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
tokenUsage: shouldExtractCheck.totalUsage,
|
tokenUsage: shouldExtractCheck.totalUsage,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import { logger } from "../../../lib/logger";
|
import { logger } from "../../../lib/logger";
|
||||||
import { generateOpenAICompletions } from "../../../scraper/scrapeURL/transformers/llmExtract";
|
import { generateCompletions } from "../../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
import { buildDocument } from "../build-document";
|
import { buildDocument } from "../build-document";
|
||||||
import { Document, TokenUsage } from "../../../controllers/v1/types";
|
import { Document, TokenUsage } from "../../../controllers/v1/types";
|
||||||
|
|
||||||
@ -20,9 +20,9 @@ export async function singleAnswerCompletion({
|
|||||||
tokenUsage: TokenUsage;
|
tokenUsage: TokenUsage;
|
||||||
sources: string[];
|
sources: string[];
|
||||||
}> {
|
}> {
|
||||||
const completion = await generateOpenAICompletions(
|
const completion = await generateCompletions({
|
||||||
logger.child({ module: "extract", method: "generateOpenAICompletions" }),
|
logger: logger.child({ module: "extract", method: "generateCompletions" }),
|
||||||
{
|
options: {
|
||||||
mode: "llm",
|
mode: "llm",
|
||||||
systemPrompt:
|
systemPrompt:
|
||||||
(systemPrompt ? `${systemPrompt}\n` : "") +
|
(systemPrompt ? `${systemPrompt}\n` : "") +
|
||||||
@ -31,10 +31,9 @@ export async function singleAnswerCompletion({
|
|||||||
prompt: "Today is: " + new Date().toISOString() + "\n" + prompt,
|
prompt: "Today is: " + new Date().toISOString() + "\n" + prompt,
|
||||||
schema: rSchema,
|
schema: rSchema,
|
||||||
},
|
},
|
||||||
singleAnswerDocs.map((x) => buildDocument(x)).join("\n"),
|
markdown: singleAnswerDocs.map((x) => buildDocument(x)).join("\n"),
|
||||||
undefined,
|
isExtractEndpoint: true
|
||||||
true,
|
});
|
||||||
);
|
|
||||||
return {
|
return {
|
||||||
extract: completion.extract,
|
extract: completion.extract,
|
||||||
tokenUsage: completion.totalUsage,
|
tokenUsage: completion.totalUsage,
|
||||||
|
@ -9,7 +9,7 @@ import { logger as _logger } from "../logger";
|
|||||||
import { processUrl } from "./url-processor";
|
import { processUrl } from "./url-processor";
|
||||||
import { scrapeDocument } from "./document-scraper";
|
import { scrapeDocument } from "./document-scraper";
|
||||||
import {
|
import {
|
||||||
generateOpenAICompletions,
|
generateCompletions,
|
||||||
generateSchemaFromPrompt,
|
generateSchemaFromPrompt,
|
||||||
} from "../../scraper/scrapeURL/transformers/llmExtract";
|
} from "../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
import { billTeam } from "../../services/billing/credit_billing";
|
import { billTeam } from "../../services/billing/credit_billing";
|
||||||
@ -410,7 +410,7 @@ export async function performExtraction(
|
|||||||
const multiEntityCompletion = (await Promise.race([
|
const multiEntityCompletion = (await Promise.race([
|
||||||
completionPromise,
|
completionPromise,
|
||||||
timeoutPromise,
|
timeoutPromise,
|
||||||
])) as Awaited<ReturnType<typeof generateOpenAICompletions>>;
|
])) as Awaited<ReturnType<typeof generateCompletions>>;
|
||||||
|
|
||||||
// Track multi-entity extraction tokens
|
// Track multi-entity extraction tokens
|
||||||
if (multiEntityCompletion) {
|
if (multiEntityCompletion) {
|
||||||
@ -680,7 +680,7 @@ export async function performExtraction(
|
|||||||
// }
|
// }
|
||||||
// // Deduplicate and validate final result against schema
|
// // Deduplicate and validate final result against schema
|
||||||
// if (reqSchema && finalResult && finalResult.length <= extractConfig.DEDUPLICATION.MAX_TOKENS) {
|
// if (reqSchema && finalResult && finalResult.length <= extractConfig.DEDUPLICATION.MAX_TOKENS) {
|
||||||
// const schemaValidation = await generateOpenAICompletions(
|
// const schemaValidation = await generateCompletions(
|
||||||
// logger.child({ method: "extractService/validateAndDeduplicate" }),
|
// logger.child({ method: "extractService/validateAndDeduplicate" }),
|
||||||
// {
|
// {
|
||||||
// mode: "llm",
|
// mode: "llm",
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import { Pinecone } from "@pinecone-database/pinecone";
|
import { Pinecone } from "@pinecone-database/pinecone";
|
||||||
import { Document } from "../../../controllers/v1/types";
|
import { Document } from "../../../controllers/v1/types";
|
||||||
import { logger } from "../../logger";
|
import { logger } from "../../logger";
|
||||||
import OpenAI from "openai";
|
import { embed } from "ai";
|
||||||
|
import { getEmbeddingModel } from "../../generic-ai";
|
||||||
|
|
||||||
const pinecone = new Pinecone({
|
const pinecone = new Pinecone({
|
||||||
apiKey: process.env.PINECONE_API_KEY!,
|
apiKey: process.env.PINECONE_API_KEY!,
|
||||||
@ -23,17 +24,12 @@ export interface PageMetadata {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function getEmbedding(text: string) {
|
async function getEmbedding(text: string) {
|
||||||
const openai = new OpenAI({
|
const { embedding } = await embed({
|
||||||
apiKey: process.env.OPENAI_API_KEY,
|
model: getEmbeddingModel("text-embedding-3-small"),
|
||||||
|
value: text,
|
||||||
});
|
});
|
||||||
|
|
||||||
const embedding = await openai.embeddings.create({
|
return embedding;
|
||||||
model: process.env.EMBEDDING_MODEL_NAME || "text-embedding-3-small",
|
|
||||||
input: text,
|
|
||||||
encoding_format: "float",
|
|
||||||
});
|
|
||||||
|
|
||||||
return embedding.data[0].embedding;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeUrl(url: string) {
|
function normalizeUrl(url: string) {
|
||||||
|
@ -5,7 +5,7 @@ import { logger } from "../logger";
|
|||||||
import { CohereClient } from "cohere-ai";
|
import { CohereClient } from "cohere-ai";
|
||||||
import { extractConfig } from "./config";
|
import { extractConfig } from "./config";
|
||||||
import { searchSimilarPages } from "./index/pinecone";
|
import { searchSimilarPages } from "./index/pinecone";
|
||||||
import { generateOpenAICompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
|
import { generateCompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
import { buildRerankerUserPrompt } from "./build-prompts";
|
import { buildRerankerUserPrompt } from "./build-prompts";
|
||||||
import { buildRerankerSystemPrompt } from "./build-prompts";
|
import { buildRerankerSystemPrompt } from "./build-prompts";
|
||||||
import { dumpToFile } from "./helpers/dump-to-file";
|
import { dumpToFile } from "./helpers/dump-to-file";
|
||||||
@ -221,22 +221,21 @@ export async function rerankLinksWithLLM(options: RerankerOptions): Promise<Rera
|
|||||||
});
|
});
|
||||||
|
|
||||||
// dumpToFile(new Date().toISOString(),[buildRerankerSystemPrompt(), buildRerankerUserPrompt(searchQuery), schema, linksContent])
|
// dumpToFile(new Date().toISOString(),[buildRerankerSystemPrompt(), buildRerankerUserPrompt(searchQuery), schema, linksContent])
|
||||||
const completionPromise = generateOpenAICompletions(
|
const completionPromise = generateCompletions({
|
||||||
logger.child({
|
logger: logger.child({
|
||||||
method: "rerankLinksWithLLM",
|
method: "rerankLinksWithLLM",
|
||||||
chunk: chunkIndex + 1,
|
chunk: chunkIndex + 1,
|
||||||
retry,
|
retry,
|
||||||
}),
|
}),
|
||||||
{
|
options: {
|
||||||
mode: "llm",
|
mode: "llm",
|
||||||
systemPrompt: buildRerankerSystemPrompt(),
|
systemPrompt: buildRerankerSystemPrompt(),
|
||||||
prompt: buildRerankerUserPrompt(searchQuery),
|
prompt: buildRerankerUserPrompt(searchQuery),
|
||||||
schema: schema,
|
schema: schema,
|
||||||
},
|
},
|
||||||
linksContent,
|
markdown: linksContent,
|
||||||
undefined,
|
isExtractEndpoint: true
|
||||||
true
|
});
|
||||||
);
|
|
||||||
|
|
||||||
const completion = await Promise.race([
|
const completion = await Promise.race([
|
||||||
completionPromise,
|
completionPromise,
|
||||||
|
@ -3,14 +3,21 @@ import { getMapResults } from "../../controllers/v1/map";
|
|||||||
import { PlanType } from "../../types";
|
import { PlanType } from "../../types";
|
||||||
import { removeDuplicateUrls } from "../validateUrl";
|
import { removeDuplicateUrls } from "../validateUrl";
|
||||||
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
|
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
|
||||||
import { generateBasicCompletion } from "../LLM-extraction";
|
|
||||||
import { buildPreRerankPrompt, buildRefrasedPrompt } from "./build-prompts";
|
import { buildPreRerankPrompt, buildRefrasedPrompt } from "./build-prompts";
|
||||||
import { rerankLinksWithLLM } from "./reranker";
|
import { rerankLinksWithLLM } from "./reranker";
|
||||||
import { extractConfig } from "./config";
|
import { extractConfig } from "./config";
|
||||||
import { updateExtract } from "./extract-redis";
|
|
||||||
import { ExtractStep } from "./extract-redis";
|
|
||||||
import type { Logger } from "winston";
|
import type { Logger } from "winston";
|
||||||
|
import { generateText } from "ai";
|
||||||
|
import { getModel } from "../generic-ai";
|
||||||
|
|
||||||
|
export async function generateBasicCompletion(prompt: string) {
|
||||||
|
const { text } = await generateText({
|
||||||
|
model: getModel("gpt-4o"),
|
||||||
|
prompt: prompt,
|
||||||
|
temperature: 0
|
||||||
|
});
|
||||||
|
return text;
|
||||||
|
}
|
||||||
interface ProcessUrlOptions {
|
interface ProcessUrlOptions {
|
||||||
url: string;
|
url: string;
|
||||||
prompt?: string;
|
prompt?: string;
|
||||||
|
@ -1,16 +1,17 @@
|
|||||||
import { logger as _logger } from "../logger";
|
import { logger as _logger } from "../logger";
|
||||||
import { updateGeneratedLlmsTxt } from "./generate-llmstxt-redis";
|
import { updateGeneratedLlmsTxt } from "./generate-llmstxt-redis";
|
||||||
import { getMapResults } from "../../controllers/v1/map";
|
import { getMapResults } from "../../controllers/v1/map";
|
||||||
import { MapResponse, ScrapeResponse, Document } from "../../controllers/v1/types";
|
|
||||||
import { Response } from "express";
|
|
||||||
import OpenAI from "openai";
|
|
||||||
import { zodResponseFormat } from "openai/helpers/zod";
|
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
import { scrapeDocument } from "../extract/document-scraper";
|
import { scrapeDocument } from "../extract/document-scraper";
|
||||||
import { PlanType } from "../../types";
|
import { PlanType } from "../../types";
|
||||||
import { getLlmsTextFromCache, saveLlmsTextToCache } from "./generate-llmstxt-supabase";
|
import {
|
||||||
|
getLlmsTextFromCache,
|
||||||
|
saveLlmsTextToCache,
|
||||||
|
} from "./generate-llmstxt-supabase";
|
||||||
import { billTeam } from "../../services/billing/credit_billing";
|
import { billTeam } from "../../services/billing/credit_billing";
|
||||||
import { logJob } from "../../services/logging/log_job";
|
import { logJob } from "../../services/logging/log_job";
|
||||||
|
import { getModel } from "../generic-ai";
|
||||||
|
import { generateCompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
|
|
||||||
interface GenerateLLMsTextServiceOptions {
|
interface GenerateLLMsTextServiceOptions {
|
||||||
generationId: string;
|
generationId: string;
|
||||||
@ -22,15 +23,14 @@ interface GenerateLLMsTextServiceOptions {
|
|||||||
subId?: string;
|
subId?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const descriptionSchema = z.object({
|
||||||
const DescriptionSchema = z.object({
|
|
||||||
description: z.string(),
|
description: z.string(),
|
||||||
title: z.string(),
|
title: z.string(),
|
||||||
});
|
});
|
||||||
|
|
||||||
// Helper function to remove page separators
|
// Helper function to remove page separators
|
||||||
function removePageSeparators(text: string): string {
|
function removePageSeparators(text: string): string {
|
||||||
return text.replace(/<\|firecrawl-page-\d+-lllmstxt\|>\n/g, '');
|
return text.replace(/<\|firecrawl-page-\d+-lllmstxt\|>\n/g, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function to limit pages in full text
|
// Helper function to limit pages in full text
|
||||||
@ -38,12 +38,14 @@ function limitPages(fullText: string, maxPages: number): string {
|
|||||||
const pages = fullText.split(/<\|firecrawl-page-\d+-lllmstxt\|>\n/);
|
const pages = fullText.split(/<\|firecrawl-page-\d+-lllmstxt\|>\n/);
|
||||||
// First element is the header, so we start from index 1
|
// First element is the header, so we start from index 1
|
||||||
const limitedPages = pages.slice(0, maxPages + 1);
|
const limitedPages = pages.slice(0, maxPages + 1);
|
||||||
return limitedPages.join('');
|
return limitedPages.join("");
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOptions) {
|
export async function performGenerateLlmsTxt(
|
||||||
const openai = new OpenAI();
|
options: GenerateLLMsTextServiceOptions,
|
||||||
const { generationId, teamId, plan, url, maxUrls, showFullText, subId } = options;
|
) {
|
||||||
|
const { generationId, teamId, plan, url, maxUrls, showFullText, subId } =
|
||||||
|
options;
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
const logger = _logger.child({
|
const logger = _logger.child({
|
||||||
module: "generate-llmstxt",
|
module: "generate-llmstxt",
|
||||||
@ -102,12 +104,12 @@ export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOpt
|
|||||||
let llmstxt = `# ${url} llms.txt\n\n`;
|
let llmstxt = `# ${url} llms.txt\n\n`;
|
||||||
let llmsFulltxt = `# ${url} llms-full.txt\n\n`;
|
let llmsFulltxt = `# ${url} llms-full.txt\n\n`;
|
||||||
|
|
||||||
|
|
||||||
// Process URLs in batches of 10
|
// Process URLs in batches of 10
|
||||||
for (let i = 0; i < urls.length; i += 10) {
|
for (let i = 0; i < urls.length; i += 10) {
|
||||||
const batch = urls.slice(i, i + 10);
|
const batch = urls.slice(i, i + 10);
|
||||||
|
|
||||||
const batchResults = await Promise.all(batch.map(async (url) => {
|
const batchResults = await Promise.all(
|
||||||
|
batch.map(async (url) => {
|
||||||
_logger.debug(`Scraping URL: ${url}`);
|
_logger.debug(`Scraping URL: ${url}`);
|
||||||
try {
|
try {
|
||||||
const document = await scrapeDocument(
|
const document = await scrapeDocument(
|
||||||
@ -121,7 +123,7 @@ export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOpt
|
|||||||
},
|
},
|
||||||
[],
|
[],
|
||||||
logger,
|
logger,
|
||||||
{ onlyMainContent: true }
|
{ onlyMainContent: true },
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!document || !document.markdown) {
|
if (!document || !document.markdown) {
|
||||||
@ -129,31 +131,34 @@ export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOpt
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
_logger.debug(`Generating description for ${document.metadata?.url}`);
|
_logger.debug(
|
||||||
|
`Generating description for ${document.metadata?.url}`,
|
||||||
|
);
|
||||||
|
|
||||||
const completion = await openai.beta.chat.completions.parse({
|
const { extract } = await generateCompletions({
|
||||||
model: process.env.MODEL_NAME || "gpt-4o-mini",
|
logger,
|
||||||
messages: [
|
model: getModel("gpt-4o-mini"),
|
||||||
{
|
options: {
|
||||||
role: "user",
|
systemPrompt: "",
|
||||||
content: `Generate a 9-10 word description and a 3-4 word title of the entire page based on ALL the content one will find on the page for this url: ${document.metadata?.url}. This will help in a user finding the page for its intended purpose. Here is the content: ${document.markdown}`
|
mode: "llm",
|
||||||
}
|
schema: descriptionSchema,
|
||||||
],
|
prompt: `Generate a 9-10 word description and a 3-4 word title of the entire page based on ALL the content one will find on the page for this url: ${document.metadata?.url}. This will help in a user finding the page for its intended purpose.`,
|
||||||
response_format: zodResponseFormat(DescriptionSchema, "description")
|
},
|
||||||
|
markdown: document.markdown,
|
||||||
});
|
});
|
||||||
|
|
||||||
const parsedResponse = completion.choices[0].message.parsed;
|
|
||||||
return {
|
return {
|
||||||
title: parsedResponse!.title,
|
title: extract.title,
|
||||||
description: parsedResponse!.description,
|
description: extract.description,
|
||||||
url: document.metadata?.url,
|
url: document.metadata?.url,
|
||||||
markdown: document.markdown
|
markdown: document.markdown,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Failed to process URL ${url}`, { error });
|
logger.error(`Failed to process URL ${url}`, { error });
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}));
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
// Process successful results from batch
|
// Process successful results from batch
|
||||||
for (const result of batchResults) {
|
for (const result of batchResults) {
|
||||||
@ -205,13 +210,13 @@ export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOpt
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Bill team for usage
|
// Bill team for usage
|
||||||
billTeam(teamId, subId, urls.length, logger).catch(
|
billTeam(teamId, subId, urls.length, logger).catch((error) => {
|
||||||
(error) => {
|
logger.error(`Failed to bill team ${teamId} for ${urls.length} urls`, {
|
||||||
logger.error(
|
teamId,
|
||||||
`Failed to bill team ${teamId} for ${urls.length} urls`, { teamId, count: urls.length, error },
|
count: urls.length,
|
||||||
);
|
error,
|
||||||
},
|
});
|
||||||
);
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
@ -221,7 +226,6 @@ export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOpt
|
|||||||
showFullText: showFullText,
|
showFullText: showFullText,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
logger.error("Generate LLMs text error", { error });
|
logger.error("Generate LLMs text error", { error });
|
||||||
|
|
||||||
|
14
apps/api/src/lib/generic-ai.ts
Normal file
14
apps/api/src/lib/generic-ai.ts
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
import { openai } from '@ai-sdk/openai';
|
||||||
|
import { createOllama } from "ollama-ai-provider";
|
||||||
|
|
||||||
|
const modelAdapter = process.env.OLLAMA_BASE_URL ? createOllama({
|
||||||
|
baseURL: process.env.OLLAMA_BASE_URL!,
|
||||||
|
}) : openai;
|
||||||
|
|
||||||
|
export function getModel(name: string) {
|
||||||
|
return process.env.MODEL_NAME ? modelAdapter(process.env.MODEL_NAME) : modelAdapter(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getEmbeddingModel(name: string) {
|
||||||
|
return process.env.MODEL_EMBEDDING_NAME ? modelAdapter.embedding(process.env.MODEL_EMBEDDING_NAME) : modelAdapter.embedding(name);
|
||||||
|
}
|
@ -1,20 +1,16 @@
|
|||||||
|
import { embed } from "ai";
|
||||||
import { configDotenv } from "dotenv";
|
import { configDotenv } from "dotenv";
|
||||||
import OpenAI from "openai";
|
import { getEmbeddingModel } from "./generic-ai";
|
||||||
|
|
||||||
configDotenv();
|
configDotenv();
|
||||||
|
|
||||||
async function getEmbedding(text: string) {
|
async function getEmbedding(text: string) {
|
||||||
const openai = new OpenAI({
|
const { embedding } = await embed({
|
||||||
apiKey: process.env.OPENAI_API_KEY,
|
model: getEmbeddingModel("text-embedding-3-small"),
|
||||||
|
value: text,
|
||||||
});
|
});
|
||||||
|
|
||||||
const embedding = await openai.embeddings.create({
|
return embedding;
|
||||||
model: "text-embedding-3-small",
|
|
||||||
input: text,
|
|
||||||
encoding_format: "float",
|
|
||||||
});
|
|
||||||
|
|
||||||
return embedding.data[0].embedding;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const cosineSimilarity = (vec1: number[], vec2: number[]): number => {
|
const cosineSimilarity = (vec1: number[], vec2: number[]): number => {
|
||||||
|
@ -137,14 +137,14 @@ export function coerceFieldsToFormats(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!formats.has("extract") && document.extract !== undefined) {
|
if (!formats.has("extract") && (document.extract !== undefined || document.json !== undefined)) {
|
||||||
meta.logger.warn(
|
meta.logger.warn(
|
||||||
"Removed extract from Document because it wasn't in formats -- this is extremely wasteful and indicates a bug.",
|
"Removed extract from Document because it wasn't in formats -- this is extremely wasteful and indicates a bug.",
|
||||||
);
|
);
|
||||||
delete document.extract;
|
delete document.extract;
|
||||||
} else if (formats.has("extract") && document.extract === undefined) {
|
} else if (formats.has("extract") && document.extract === undefined && document.json === undefined) {
|
||||||
meta.logger.warn(
|
meta.logger.warn(
|
||||||
"Request had format: extract, but there was no extract field in the result.",
|
"Request had format extract, but there was no extract field in the result.",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import OpenAI from "openai";
|
|
||||||
import { encoding_for_model } from "@dqbd/tiktoken";
|
import { encoding_for_model } from "@dqbd/tiktoken";
|
||||||
import { TiktokenModel } from "@dqbd/tiktoken";
|
import { TiktokenModel } from "@dqbd/tiktoken";
|
||||||
import {
|
import {
|
||||||
@ -10,6 +9,10 @@ import { Logger } from "winston";
|
|||||||
import { EngineResultsTracker, Meta } from "..";
|
import { EngineResultsTracker, Meta } from "..";
|
||||||
import { logger } from "../../../lib/logger";
|
import { logger } from "../../../lib/logger";
|
||||||
import { modelPrices } from "../../../lib/extract/usage/model-prices";
|
import { modelPrices } from "../../../lib/extract/usage/model-prices";
|
||||||
|
import { generateObject, generateText, LanguageModel } from 'ai';
|
||||||
|
import { jsonSchema } from 'ai';
|
||||||
|
import { getModel } from "../../../lib/generic-ai";
|
||||||
|
import { z } from "zod";
|
||||||
|
|
||||||
// Get max tokens from model prices
|
// Get max tokens from model prices
|
||||||
const getModelLimits = (model: string) => {
|
const getModelLimits = (model: string) => {
|
||||||
@ -117,16 +120,21 @@ export function truncateText(text: string, maxTokens: number): string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function generateCompletions({
|
||||||
export async function generateOpenAICompletions(
|
logger,
|
||||||
logger: Logger,
|
options,
|
||||||
options: ExtractOptions,
|
markdown,
|
||||||
markdown?: string,
|
previousWarning,
|
||||||
previousWarning?: string,
|
isExtractEndpoint,
|
||||||
isExtractEndpoint?: boolean,
|
model = getModel("gpt-4o-mini"),
|
||||||
model: TiktokenModel = (process.env.MODEL_NAME as TiktokenModel) ||
|
}: {
|
||||||
"gpt-4o-mini",
|
model?: LanguageModel;
|
||||||
): Promise<{
|
logger: Logger;
|
||||||
|
options: ExtractOptions;
|
||||||
|
markdown?: string;
|
||||||
|
previousWarning?: string;
|
||||||
|
isExtractEndpoint?: boolean;
|
||||||
|
}): Promise<{
|
||||||
extract: any;
|
extract: any;
|
||||||
numTokens: number;
|
numTokens: number;
|
||||||
warning: string | undefined;
|
warning: string | undefined;
|
||||||
@ -136,13 +144,11 @@ export async function generateOpenAICompletions(
|
|||||||
let extract: any;
|
let extract: any;
|
||||||
let warning: string | undefined;
|
let warning: string | undefined;
|
||||||
|
|
||||||
const openai = new OpenAI();
|
|
||||||
|
|
||||||
if (markdown === undefined) {
|
if (markdown === undefined) {
|
||||||
throw new Error("document.markdown is undefined -- this is unexpected");
|
throw new Error("document.markdown is undefined -- this is unexpected");
|
||||||
}
|
}
|
||||||
|
|
||||||
const { maxInputTokens, maxOutputTokens } = getModelLimits(model);
|
const { maxInputTokens, maxOutputTokens } = getModelLimits(model.modelId);
|
||||||
|
|
||||||
// Ratio of 4 was way too high, now 3.5.
|
// Ratio of 4 was way too high, now 3.5.
|
||||||
const modifier = 3.5; // tokens to characters ratio
|
const modifier = 3.5; // tokens to characters ratio
|
||||||
@ -153,7 +159,7 @@ export async function generateOpenAICompletions(
|
|||||||
let numTokens = 0;
|
let numTokens = 0;
|
||||||
try {
|
try {
|
||||||
// Encode the message into tokens
|
// Encode the message into tokens
|
||||||
const encoder = encoding_for_model(model as TiktokenModel);
|
const encoder = encoding_for_model(model.modelId as TiktokenModel);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const tokens = encoder.encode(markdown);
|
const tokens = encoder.encode(markdown);
|
||||||
@ -190,6 +196,9 @@ export async function generateOpenAICompletions(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let schema = options.schema;
|
let schema = options.schema;
|
||||||
|
// Normalize the bad json schema users write (mogery)
|
||||||
|
if (schema && !(schema instanceof z.ZodType)) {
|
||||||
|
// let schema = options.schema;
|
||||||
if (schema) {
|
if (schema) {
|
||||||
schema = removeDefaultProperty(schema);
|
schema = removeDefaultProperty(schema);
|
||||||
}
|
}
|
||||||
@ -217,67 +226,42 @@ export async function generateOpenAICompletions(
|
|||||||
}
|
}
|
||||||
|
|
||||||
schema = normalizeSchema(schema);
|
schema = normalizeSchema(schema);
|
||||||
|
|
||||||
const jsonCompletion = await openai.beta.chat.completions.parse({
|
|
||||||
model,
|
|
||||||
temperature: 0,
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: options.systemPrompt,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: [{ type: "text", text: markdown }],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content:
|
|
||||||
options.prompt !== undefined
|
|
||||||
? `Transform the above content into structured JSON output based on the provided schema if any and the following user request: ${options.prompt}. If schema is provided, strictly follow it.`
|
|
||||||
: "Transform the above content into structured JSON output based on the provided schema if any.",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
response_format: options.schema
|
|
||||||
? {
|
|
||||||
type: "json_schema",
|
|
||||||
json_schema: {
|
|
||||||
name: "schema",
|
|
||||||
schema: schema,
|
|
||||||
strict: true,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
: { type: "json_object" },
|
|
||||||
});
|
|
||||||
|
|
||||||
if (jsonCompletion.choices[0].message.refusal !== null && jsonCompletion.choices[0].message.refusal !== undefined) {
|
|
||||||
throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extract = jsonCompletion.choices[0].message.parsed;
|
|
||||||
|
|
||||||
if (extract === null && jsonCompletion.choices[0].message.content !== null) {
|
|
||||||
try {
|
try {
|
||||||
if (!isExtractEndpoint) {
|
const prompt = options.prompt !== undefined
|
||||||
extract = JSON.parse(jsonCompletion.choices[0].message.content);
|
? `Transform the following content into structured JSON output based on the provided schema and this user request: ${options.prompt}. If schema is provided, strictly follow it.\n\n${markdown}`
|
||||||
} else {
|
: `Transform the following content into structured JSON output based on the provided schema if any.\n\n${markdown}`;
|
||||||
const extractData = JSON.parse(
|
|
||||||
jsonCompletion.choices[0].message.content,
|
|
||||||
);
|
|
||||||
extract = options.schema ? extractData.data.extract : extractData;
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
logger.error("Failed to parse returned JSON, no schema specified.", {
|
|
||||||
error: e,
|
|
||||||
});
|
|
||||||
throw new LLMRefusalError(
|
|
||||||
"Failed to parse returned JSON. Please specify a schema in the extract object.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const promptTokens = jsonCompletion.usage?.prompt_tokens ?? 0;
|
const repairConfig = {
|
||||||
const completionTokens = jsonCompletion.usage?.completion_tokens ?? 0;
|
experimental_repairText: async ({ text, error }) => {
|
||||||
|
const { text: fixedText } = await generateText({
|
||||||
|
model: model,
|
||||||
|
prompt: `Fix this JSON that had the following error: ${error}\n\nOriginal text:\n${text}\n\nReturn only the fixed JSON, no explanation.`,
|
||||||
|
system: "You are a JSON repair expert. Your only job is to fix malformed JSON and return valid JSON that matches the original structure and intent as closely as possible. Do not include any explanation or commentary - only return the fixed JSON."
|
||||||
|
});
|
||||||
|
return fixedText;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const generateObjectConfig = {
|
||||||
|
model: model,
|
||||||
|
prompt: prompt,
|
||||||
|
temperature: options.temperature ?? 0,
|
||||||
|
system: options.systemPrompt,
|
||||||
|
...(schema && { schema: schema instanceof z.ZodType ? schema : jsonSchema(schema) }),
|
||||||
|
...(!schema && { output: 'no-schema' as const }),
|
||||||
|
...repairConfig,
|
||||||
|
...(!schema && {
|
||||||
|
onError: (error: Error) => {
|
||||||
|
console.error(error);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
} satisfies Parameters<typeof generateObject>[0];
|
||||||
|
|
||||||
|
const result = await generateObject(generateObjectConfig);
|
||||||
|
extract = result.object;
|
||||||
|
|
||||||
// If the users actually wants the items object, they can specify it as 'required' in the schema
|
// If the users actually wants the items object, they can specify it as 'required' in the schema
|
||||||
// otherwise, we just return the items array
|
// otherwise, we just return the items array
|
||||||
@ -288,8 +272,11 @@ export async function generateOpenAICompletions(
|
|||||||
) {
|
) {
|
||||||
extract = extract?.items;
|
extract = extract?.items;
|
||||||
}
|
}
|
||||||
// num tokens (just user prompt tokenized) | deprecated
|
|
||||||
// totalTokens = promptTokens + completionTokens
|
// Since generateObject doesn't provide token usage, we'll estimate it
|
||||||
|
const promptTokens = numTokens;
|
||||||
|
const completionTokens = result?.usage?.completionTokens ?? 0;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
extract,
|
extract,
|
||||||
warning,
|
warning,
|
||||||
@ -299,8 +286,14 @@ export async function generateOpenAICompletions(
|
|||||||
completionTokens,
|
completionTokens,
|
||||||
totalTokens: promptTokens + completionTokens,
|
totalTokens: promptTokens + completionTokens,
|
||||||
},
|
},
|
||||||
model,
|
model: model.modelId,
|
||||||
};
|
};
|
||||||
|
} catch (error) {
|
||||||
|
if (error.message?.includes('refused')) {
|
||||||
|
throw new LLMRefusalError(error.message);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function performLLMExtract(
|
export async function performLLMExtract(
|
||||||
@ -309,14 +302,14 @@ export async function performLLMExtract(
|
|||||||
): Promise<Document> {
|
): Promise<Document> {
|
||||||
if (meta.options.formats.includes("extract")) {
|
if (meta.options.formats.includes("extract")) {
|
||||||
meta.internalOptions.abort?.throwIfAborted();
|
meta.internalOptions.abort?.throwIfAborted();
|
||||||
const { extract, warning } = await generateOpenAICompletions(
|
const { extract, warning } = await generateCompletions({
|
||||||
meta.logger.child({
|
logger: meta.logger.child({
|
||||||
method: "performLLMExtract/generateOpenAICompletions",
|
method: "performLLMExtract/generateCompletions",
|
||||||
}),
|
}),
|
||||||
meta.options.extract!,
|
options: meta.options.extract!,
|
||||||
document.markdown,
|
markdown: document.markdown,
|
||||||
document.warning,
|
previousWarning: document.warning
|
||||||
);
|
});
|
||||||
|
|
||||||
if (meta.options.formats.includes("json")) {
|
if (meta.options.formats.includes("json")) {
|
||||||
document.json = extract;
|
document.json = extract;
|
||||||
@ -346,20 +339,20 @@ export function removeDefaultProperty(schema: any): any {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function generateSchemaFromPrompt(prompt: string): Promise<any> {
|
export async function generateSchemaFromPrompt(prompt: string): Promise<any> {
|
||||||
const openai = new OpenAI();
|
const model = getModel("gpt-4o");
|
||||||
|
|
||||||
const temperatures = [0, 0.1, 0.3]; // Different temperatures to try
|
const temperatures = [0, 0.1, 0.3]; // Different temperatures to try
|
||||||
let lastError: Error | null = null;
|
let lastError: Error | null = null;
|
||||||
|
|
||||||
for (const temp of temperatures) {
|
for (const temp of temperatures) {
|
||||||
try {
|
try {
|
||||||
const result = await openai.beta.chat.completions.parse({
|
const { extract } = await generateCompletions({
|
||||||
model: process.env.MODEL_NAME || "gpt-4o",
|
logger: logger.child({
|
||||||
temperature: temp,
|
method: "generateSchemaFromPrompt/generateCompletions",
|
||||||
messages: [
|
}),
|
||||||
{
|
model: model,
|
||||||
role: "system",
|
options: {
|
||||||
content: `You are a schema generator for a web scraping system. Generate a JSON schema based on the user's prompt.
|
mode: "llm",
|
||||||
|
systemPrompt: `You are a schema generator for a web scraping system. Generate a JSON schema based on the user's prompt.
|
||||||
Consider:
|
Consider:
|
||||||
1. The type of data being requested
|
1. The type of data being requested
|
||||||
2. Required fields vs optional fields
|
2. Required fields vs optional fields
|
||||||
@ -384,28 +377,14 @@ Optionals are not supported.
|
|||||||
DO NOT USE FORMATS.
|
DO NOT USE FORMATS.
|
||||||
Keep it simple. Don't create too many properties, just the ones that are needed. Don't invent properties.
|
Keep it simple. Don't create too many properties, just the ones that are needed. Don't invent properties.
|
||||||
Return a valid JSON schema object with properties that would capture the information requested in the prompt.`,
|
Return a valid JSON schema object with properties that would capture the information requested in the prompt.`,
|
||||||
|
prompt: `Generate a JSON schema for extracting the following information: ${prompt}`,
|
||||||
|
temperature: temp
|
||||||
},
|
},
|
||||||
{
|
markdown: prompt
|
||||||
role: "user",
|
|
||||||
content: `Generate a JSON schema for extracting the following information: ${prompt}`,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
response_format: {
|
|
||||||
type: "json_object",
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (result.choices[0].message.refusal !== null && result.choices[0].message.refusal !== undefined) {
|
return extract;
|
||||||
throw new Error("LLM refused to generate schema");
|
|
||||||
}
|
|
||||||
|
|
||||||
let schema;
|
|
||||||
try {
|
|
||||||
schema = JSON.parse(result.choices[0].message.content ?? "");
|
|
||||||
return schema;
|
|
||||||
} catch (e) {
|
|
||||||
throw new Error("Failed to parse schema JSON from LLM response");
|
|
||||||
}
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
lastError = error as Error;
|
lastError = error as Error;
|
||||||
logger.warn(`Failed attempt with temperature ${temp}: ${error.message}`);
|
logger.warn(`Failed attempt with temperature ${temp}: ${error.message}`);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user