Nick: prompt option, still need to convert to new structured outputs

This commit is contained in:
Nicolas 2024-08-29 21:00:57 -03:00
parent 49e1cb7ca0
commit 41eb620959
6 changed files with 121 additions and 63 deletions

View File

@ -94,7 +94,7 @@
"moment": "^2.29.4", "moment": "^2.29.4",
"mongoose": "^8.4.4", "mongoose": "^8.4.4",
"natural": "^7.0.7", "natural": "^7.0.7",
"openai": "^4.52.2", "openai": "^4.57.0",
"pdf-parse": "^1.1.1", "pdf-parse": "^1.1.1",
"pos": "^0.4.2", "pos": "^0.4.2",
"posthog-node": "^4.0.1", "posthog-node": "^4.0.1",

View File

@ -124,7 +124,7 @@ importers:
version: 0.0.28 version: 0.0.28
langchain: langchain:
specifier: ^0.2.8 specifier: ^0.2.8
version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
languagedetect: languagedetect:
specifier: ^2.0.0 specifier: ^2.0.0
version: 2.0.0 version: 2.0.0
@ -147,8 +147,8 @@ importers:
specifier: ^7.0.7 specifier: ^7.0.7
version: 7.0.7(socks@2.8.3) version: 7.0.7(socks@2.8.3)
openai: openai:
specifier: ^4.52.2 specifier: ^4.57.0
version: 4.52.2 version: 4.57.0(zod@3.23.8)
pdf-parse: pdf-parse:
specifier: ^1.1.1 specifier: ^1.1.1
version: 1.1.1 version: 1.1.1
@ -3733,9 +3733,14 @@ packages:
openai@3.3.0: openai@3.3.0:
resolution: {integrity: sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==} resolution: {integrity: sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==}
openai@4.52.2: openai@4.57.0:
resolution: {integrity: sha512-mMc0XgFuVSkcm0lRIi8zaw++otC82ZlfkCur1qguXYWPETr/+ZwL9A/vvp3YahX+shpaT6j03dwsmUyLAfmEfg==} resolution: {integrity: sha512-JnwBSIYqiZ3jYjB5f2in8hQ0PRA092c6m+/6dYB0MzK0BEbn+0dioxZsPLBm5idJbg9xzLNOiGVm2OSuhZ+BdQ==}
hasBin: true hasBin: true
peerDependencies:
zod: ^3.23.8
peerDependenciesMeta:
zod:
optional: true
openapi-types@12.1.3: openapi-types@12.1.3:
resolution: {integrity: sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==} resolution: {integrity: sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==}
@ -5319,13 +5324,13 @@ snapshots:
'@js-sdsl/ordered-map@4.4.2': {} '@js-sdsl/ordered-map@4.4.2': {}
'@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)': '@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
dependencies: dependencies:
ansi-styles: 5.2.0 ansi-styles: 5.2.0
camelcase: 6.3.0 camelcase: 6.3.0
decamelize: 1.2.0 decamelize: 1.2.0
js-tiktoken: 1.0.12 js-tiktoken: 1.0.12
langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
ml-distance: 4.0.1 ml-distance: 4.0.1
mustache: 4.2.0 mustache: 4.2.0
p-queue: 6.6.2 p-queue: 6.6.2
@ -5337,20 +5342,20 @@ snapshots:
- langchain - langchain
- openai - openai
'@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))': '@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
dependencies: dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
js-tiktoken: 1.0.12 js-tiktoken: 1.0.12
openai: 4.52.2 openai: 4.57.0(zod@3.23.8)
zod: 3.23.8 zod: 3.23.8
zod-to-json-schema: 3.23.1(zod@3.23.8) zod-to-json-schema: 3.23.1(zod@3.23.8)
transitivePeerDependencies: transitivePeerDependencies:
- encoding - encoding
- langchain - langchain
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)': '@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
dependencies: dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
js-tiktoken: 1.0.12 js-tiktoken: 1.0.12
transitivePeerDependencies: transitivePeerDependencies:
- langchain - langchain
@ -8487,17 +8492,17 @@ snapshots:
kleur@3.0.3: {} kleur@3.0.3: {}
langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0): langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
dependencies: dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
'@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)) '@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
binary-extensions: 2.3.0 binary-extensions: 2.3.0
js-tiktoken: 1.0.12 js-tiktoken: 1.0.12
js-yaml: 4.1.0 js-yaml: 4.1.0
jsonpointer: 5.0.1 jsonpointer: 5.0.1
langchainhub: 0.0.11 langchainhub: 0.0.11
langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
ml-distance: 4.0.1 ml-distance: 4.0.1
openapi-types: 12.1.3 openapi-types: 12.1.3
p-retry: 4.6.2 p-retry: 4.6.2
@ -8524,7 +8529,7 @@ snapshots:
langchainhub@0.0.11: {} langchainhub@0.0.11: {}
langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2): langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)):
dependencies: dependencies:
'@types/uuid': 9.0.8 '@types/uuid': 9.0.8
commander: 10.0.1 commander: 10.0.1
@ -8533,9 +8538,9 @@ snapshots:
p-retry: 4.6.2 p-retry: 4.6.2
uuid: 9.0.1 uuid: 9.0.1
optionalDependencies: optionalDependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2) '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
openai: 4.52.2 openai: 4.57.0(zod@3.23.8)
languagedetect@2.0.0: {} languagedetect@2.0.0: {}
@ -8928,16 +8933,19 @@ snapshots:
transitivePeerDependencies: transitivePeerDependencies:
- debug - debug
openai@4.52.2: openai@4.57.0(zod@3.23.8):
dependencies: dependencies:
'@types/node': 18.19.39 '@types/node': 18.19.39
'@types/node-fetch': 2.6.11 '@types/node-fetch': 2.6.11
'@types/qs': 6.9.15
abort-controller: 3.0.0 abort-controller: 3.0.0
agentkeepalive: 4.5.0 agentkeepalive: 4.5.0
form-data-encoder: 1.7.2 form-data-encoder: 1.7.2
formdata-node: 4.4.1 formdata-node: 4.4.1
node-fetch: 2.7.0 node-fetch: 2.7.0
web-streams-polyfill: 3.3.3 qs: 6.12.2
optionalDependencies:
zod: 3.23.8
transitivePeerDependencies: transitivePeerDependencies:
- encoding - encoding

View File

@ -44,7 +44,8 @@ const strictMessage = "Unrecognized key in body -- please review the v1 API docu
export const extractOptions = z.object({ export const extractOptions = z.object({
mode: z.enum(["llm"]).default("llm"), mode: z.enum(["llm"]).default("llm"),
schema: z.any().optional(), schema: z.any().optional(),
prompt: z.string().default("Based on the information on the page, extract the information from the schema.") systemPrompt: z.string().default("Based on the information on the page, extract the information from the schema."),
prompt: z.string().optional()
}).strict(strictMessage); }).strict(strictMessage);
export type ExtractOptions = z.infer<typeof extractOptions>; export type ExtractOptions = z.infer<typeof extractOptions>;
@ -316,6 +317,7 @@ export function legacyExtractorOptions(x: ExtractOptions): ExtractorOptions {
mode: x.mode ? "llm-extraction" : "markdown", mode: x.mode ? "llm-extraction" : "markdown",
extractionPrompt: x.prompt ?? "Based on the information on the page, extract the information from the schema.", extractionPrompt: x.prompt ?? "Based on the information on the page, extract the information from the schema.",
extractionSchema: x.schema, extractionSchema: x.schema,
userPrompt: x.prompt ?? "",
}; };
} }

View File

@ -15,7 +15,8 @@ export async function generateCompletions(
// const schema = zodToJsonSchema(options.schema) // const schema = zodToJsonSchema(options.schema)
const schema = extractionOptions.extractionSchema; const schema = extractionOptions.extractionSchema;
const prompt = extractionOptions.extractionPrompt; const systemPrompt = extractionOptions.extractionPrompt;
const prompt = extractionOptions.userPrompt;
const switchVariable = "openAI"; // Placholder, want to think more about how we abstract the model provider const switchVariable = "openAI"; // Placholder, want to think more about how we abstract the model provider
@ -31,9 +32,11 @@ export async function generateCompletions(
document: document, document: document,
schema: schema, schema: schema,
prompt: prompt, prompt: prompt,
systemPrompt: systemPrompt,
mode: mode, mode: mode,
}); });
// Validate the JSON output against the schema using AJV // Validate the JSON output against the schema using AJV
if(schema){
const validate = ajv.compile(schema); const validate = ajv.compile(schema);
if (!validate(completionResult.llm_extraction)) { if (!validate(completionResult.llm_extraction)) {
//TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc. //TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc.
@ -41,7 +44,8 @@ export async function generateCompletions(
`JSON parsing error(s): ${validate.errors `JSON parsing error(s): ${validate.errors
?.map((err) => err.message) ?.map((err) => err.message)
.join(", ")}\n\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.` .join(", ")}\n\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.`
); );
}
} }
return completionResult; return completionResult;

View File

@ -16,7 +16,6 @@ function prepareOpenAIDoc(
document: Document, document: Document,
mode: "markdown" | "raw-html" mode: "markdown" | "raw-html"
): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null { ): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null {
let markdown = document.markdown; let markdown = document.markdown;
let extractionTarget = document.markdown; let extractionTarget = document.markdown;
@ -25,7 +24,6 @@ function prepareOpenAIDoc(
extractionTarget = document.rawHtml; extractionTarget = document.rawHtml;
} }
// Check if the markdown content exists in the document // Check if the markdown content exists in the document
if (!extractionTarget) { if (!extractionTarget) {
return null; return null;
@ -34,33 +32,32 @@ function prepareOpenAIDoc(
// ); // );
} }
// count number of tokens // count number of tokens
const numTokens = numTokensFromString(extractionTarget, "gpt-4"); const numTokens = numTokensFromString(extractionTarget, "gpt-4");
if (numTokens > maxTokens) { if (numTokens > maxTokens) {
// trim the document to the maximum number of tokens, tokens != characters // trim the document to the maximum number of tokens, tokens != characters
extractionTarget = extractionTarget.slice(0, (maxTokens * modifier)); extractionTarget = extractionTarget.slice(0, maxTokens * modifier);
} }
return [[{ type: "text", text: extractionTarget }], numTokens]; return [[{ type: "text", text: extractionTarget }], numTokens];
} }
export async function generateOpenAICompletions({ export async function generateOpenAICompletions({
client, client,
model = process.env.MODEL_NAME || "gpt-4o", model = process.env.MODEL_NAME || "gpt-4o-mini",
document, document,
schema, //TODO - add zod dynamic type checking schema, //TODO - add zod dynamic type checking
prompt = defaultPrompt, systemPrompt = defaultPrompt,
prompt,
temperature, temperature,
mode mode,
}: { }: {
client: OpenAI; client: OpenAI;
model?: string; model?: string;
document: Document; document: Document;
schema: any; // This should be replaced with a proper Zod schema type when available schema: any; // This should be replaced with a proper Zod schema type when available
prompt?: string; prompt?: string;
systemPrompt?: string;
temperature?: number; temperature?: number;
mode: "markdown" | "raw-html"; mode: "markdown" | "raw-html";
}): Promise<Document> { }): Promise<Document> {
@ -70,44 +67,90 @@ export async function generateOpenAICompletions({
if (preparedDoc === null) { if (preparedDoc === null) {
return { return {
...document, ...document,
warning: "LLM extraction was not performed since the document's content is empty or missing.", warning:
"LLM extraction was not performed since the document's content is empty or missing.",
}; };
} }
const [content, numTokens] = preparedDoc; const [content, numTokens] = preparedDoc;
const completion = await openai.chat.completions.create({ let completion;
model, let llmExtraction;
messages: [ if (prompt && !schema) {
{ // If prompt is defined, ask OpenAI to generate a schema based on the prompt
role: "system", // const schemaCompletion = await openai.chat.completions.create({
content: prompt, // model,
}, // messages: [
{ role: "user", content }, // {
], // role: "system",
tools: [ // content: "You are a helpful assistant that generates JSON schemas based on user prompts.",
{ // },
type: "function", // {
function: { // role: "user",
name: "extract_content", // content: `Generate a JSON schema compatible with openai function calling based on this prompt: ${prompt}`,
description: "Extracts the content from the given webpage(s)", // },
parameters: schema, // ],
// temperature: 0,
// response_format: { type: "json_object" },
// });
// console.log(schemaCompletion.choices[0].message.content);
// const generatedSchema = JSON.parse(schemaCompletion.choices[0].message.content);
console.log(prompt);
const jsonCompletion = await openai.chat.completions.create({
model,
messages: [
{
role: "system",
content: systemPrompt,
}, },
}, { role: "user", content },
], { role: "user", content: `Transform the above content into structured json output based on the following user request: ${prompt}` },
tool_choice: { "type": "function", "function": {"name": "extract_content"}}, ],
temperature, response_format: { type: "json_object" },
}); temperature,
});
const c = completion.choices[0].message.tool_calls[0].function.arguments; console.log(jsonCompletion.choices[0].message.content);
// Extract the LLM extraction content from the completion response llmExtraction = JSON.parse(jsonCompletion.choices[0].message.content.trim());
const llmExtraction = JSON.parse(c); console.log(llmExtraction);
} else {
completion = await openai.chat.completions.create({
model,
messages: [
{
role: "system",
content: systemPrompt,
},
{ role: "user", content },
],
tools: [
{
type: "function",
function: {
name: "extract_content",
description: "Extracts the content from the given webpage(s)",
parameters: schema,
},
},
],
tool_choice: { type: "function", function: { name: "extract_content" } },
temperature,
});
const c = completion.choices[0].message.tool_calls[0].function.arguments;
// Extract the LLM extraction content from the completion response
llmExtraction = JSON.parse(c);
}
// Return the document with the LLM extraction content added // Return the document with the LLM extraction content added
return { return {
...document, ...document,
llm_extraction: llmExtraction, llm_extraction: llmExtraction,
warning: numTokens > maxTokens ? `Page was trimmed to fit the maximum token limit defined by the LLM model (Max: ${maxTokens} tokens, Attemped: ${numTokens} tokens). If results are not good, email us at help@mendable.ai so we can help you.` : undefined, warning:
numTokens > maxTokens
? `Page was trimmed to fit the maximum token limit defined by the LLM model (Max: ${maxTokens} tokens, Attemped: ${numTokens} tokens). If results are not good, email us at help@mendable.ai so we can help you.`
: undefined,
}; };
} }

View File

@ -36,6 +36,7 @@ export type ExtractorOptions = {
mode: "markdown" | "llm-extraction" | "llm-extraction-from-markdown" | "llm-extraction-from-raw-html"; mode: "markdown" | "llm-extraction" | "llm-extraction-from-markdown" | "llm-extraction-from-raw-html";
extractionPrompt?: string; extractionPrompt?: string;
extractionSchema?: Record<string, any>; extractionSchema?: Record<string, any>;
userPrompt?: string;
} }
export type SearchOptions = { export type SearchOptions = {