mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-06 05:47:19 +08:00
Nick: prompt option, still need to convert to new structured outputs
This commit is contained in:
parent
49e1cb7ca0
commit
41eb620959
@ -94,7 +94,7 @@
|
|||||||
"moment": "^2.29.4",
|
"moment": "^2.29.4",
|
||||||
"mongoose": "^8.4.4",
|
"mongoose": "^8.4.4",
|
||||||
"natural": "^7.0.7",
|
"natural": "^7.0.7",
|
||||||
"openai": "^4.52.2",
|
"openai": "^4.57.0",
|
||||||
"pdf-parse": "^1.1.1",
|
"pdf-parse": "^1.1.1",
|
||||||
"pos": "^0.4.2",
|
"pos": "^0.4.2",
|
||||||
"posthog-node": "^4.0.1",
|
"posthog-node": "^4.0.1",
|
||||||
|
54
apps/api/pnpm-lock.yaml
generated
54
apps/api/pnpm-lock.yaml
generated
@ -124,7 +124,7 @@ importers:
|
|||||||
version: 0.0.28
|
version: 0.0.28
|
||||||
langchain:
|
langchain:
|
||||||
specifier: ^0.2.8
|
specifier: ^0.2.8
|
||||||
version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||||
languagedetect:
|
languagedetect:
|
||||||
specifier: ^2.0.0
|
specifier: ^2.0.0
|
||||||
version: 2.0.0
|
version: 2.0.0
|
||||||
@ -147,8 +147,8 @@ importers:
|
|||||||
specifier: ^7.0.7
|
specifier: ^7.0.7
|
||||||
version: 7.0.7(socks@2.8.3)
|
version: 7.0.7(socks@2.8.3)
|
||||||
openai:
|
openai:
|
||||||
specifier: ^4.52.2
|
specifier: ^4.57.0
|
||||||
version: 4.52.2
|
version: 4.57.0(zod@3.23.8)
|
||||||
pdf-parse:
|
pdf-parse:
|
||||||
specifier: ^1.1.1
|
specifier: ^1.1.1
|
||||||
version: 1.1.1
|
version: 1.1.1
|
||||||
@ -3733,9 +3733,14 @@ packages:
|
|||||||
openai@3.3.0:
|
openai@3.3.0:
|
||||||
resolution: {integrity: sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==}
|
resolution: {integrity: sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==}
|
||||||
|
|
||||||
openai@4.52.2:
|
openai@4.57.0:
|
||||||
resolution: {integrity: sha512-mMc0XgFuVSkcm0lRIi8zaw++otC82ZlfkCur1qguXYWPETr/+ZwL9A/vvp3YahX+shpaT6j03dwsmUyLAfmEfg==}
|
resolution: {integrity: sha512-JnwBSIYqiZ3jYjB5f2in8hQ0PRA092c6m+/6dYB0MzK0BEbn+0dioxZsPLBm5idJbg9xzLNOiGVm2OSuhZ+BdQ==}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.23.8
|
||||||
|
peerDependenciesMeta:
|
||||||
|
zod:
|
||||||
|
optional: true
|
||||||
|
|
||||||
openapi-types@12.1.3:
|
openapi-types@12.1.3:
|
||||||
resolution: {integrity: sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==}
|
resolution: {integrity: sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==}
|
||||||
@ -5319,13 +5324,13 @@ snapshots:
|
|||||||
|
|
||||||
'@js-sdsl/ordered-map@4.4.2': {}
|
'@js-sdsl/ordered-map@4.4.2': {}
|
||||||
|
|
||||||
'@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)':
|
'@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
|
||||||
dependencies:
|
dependencies:
|
||||||
ansi-styles: 5.2.0
|
ansi-styles: 5.2.0
|
||||||
camelcase: 6.3.0
|
camelcase: 6.3.0
|
||||||
decamelize: 1.2.0
|
decamelize: 1.2.0
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||||
ml-distance: 4.0.1
|
ml-distance: 4.0.1
|
||||||
mustache: 4.2.0
|
mustache: 4.2.0
|
||||||
p-queue: 6.6.2
|
p-queue: 6.6.2
|
||||||
@ -5337,20 +5342,20 @@ snapshots:
|
|||||||
- langchain
|
- langchain
|
||||||
- openai
|
- openai
|
||||||
|
|
||||||
'@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
'@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
openai: 4.52.2
|
openai: 4.57.0(zod@3.23.8)
|
||||||
zod: 3.23.8
|
zod: 3.23.8
|
||||||
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
zod-to-json-schema: 3.23.1(zod@3.23.8)
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- encoding
|
- encoding
|
||||||
- langchain
|
- langchain
|
||||||
|
|
||||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)':
|
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- langchain
|
- langchain
|
||||||
@ -8487,17 +8492,17 @@ snapshots:
|
|||||||
|
|
||||||
kleur@3.0.3: {}
|
kleur@3.0.3: {}
|
||||||
|
|
||||||
langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||||
'@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
'@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||||
binary-extensions: 2.3.0
|
binary-extensions: 2.3.0
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
js-yaml: 4.1.0
|
js-yaml: 4.1.0
|
||||||
jsonpointer: 5.0.1
|
jsonpointer: 5.0.1
|
||||||
langchainhub: 0.0.11
|
langchainhub: 0.0.11
|
||||||
langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||||
ml-distance: 4.0.1
|
ml-distance: 4.0.1
|
||||||
openapi-types: 12.1.3
|
openapi-types: 12.1.3
|
||||||
p-retry: 4.6.2
|
p-retry: 4.6.2
|
||||||
@ -8524,7 +8529,7 @@ snapshots:
|
|||||||
|
|
||||||
langchainhub@0.0.11: {}
|
langchainhub@0.0.11: {}
|
||||||
|
|
||||||
langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2):
|
langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@types/uuid': 9.0.8
|
'@types/uuid': 9.0.8
|
||||||
commander: 10.0.1
|
commander: 10.0.1
|
||||||
@ -8533,9 +8538,9 @@ snapshots:
|
|||||||
p-retry: 4.6.2
|
p-retry: 4.6.2
|
||||||
uuid: 9.0.1
|
uuid: 9.0.1
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))
|
||||||
langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||||
openai: 4.52.2
|
openai: 4.57.0(zod@3.23.8)
|
||||||
|
|
||||||
languagedetect@2.0.0: {}
|
languagedetect@2.0.0: {}
|
||||||
|
|
||||||
@ -8928,16 +8933,19 @@ snapshots:
|
|||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- debug
|
- debug
|
||||||
|
|
||||||
openai@4.52.2:
|
openai@4.57.0(zod@3.23.8):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@types/node': 18.19.39
|
'@types/node': 18.19.39
|
||||||
'@types/node-fetch': 2.6.11
|
'@types/node-fetch': 2.6.11
|
||||||
|
'@types/qs': 6.9.15
|
||||||
abort-controller: 3.0.0
|
abort-controller: 3.0.0
|
||||||
agentkeepalive: 4.5.0
|
agentkeepalive: 4.5.0
|
||||||
form-data-encoder: 1.7.2
|
form-data-encoder: 1.7.2
|
||||||
formdata-node: 4.4.1
|
formdata-node: 4.4.1
|
||||||
node-fetch: 2.7.0
|
node-fetch: 2.7.0
|
||||||
web-streams-polyfill: 3.3.3
|
qs: 6.12.2
|
||||||
|
optionalDependencies:
|
||||||
|
zod: 3.23.8
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- encoding
|
- encoding
|
||||||
|
|
||||||
|
@ -44,7 +44,8 @@ const strictMessage = "Unrecognized key in body -- please review the v1 API docu
|
|||||||
export const extractOptions = z.object({
|
export const extractOptions = z.object({
|
||||||
mode: z.enum(["llm"]).default("llm"),
|
mode: z.enum(["llm"]).default("llm"),
|
||||||
schema: z.any().optional(),
|
schema: z.any().optional(),
|
||||||
prompt: z.string().default("Based on the information on the page, extract the information from the schema.")
|
systemPrompt: z.string().default("Based on the information on the page, extract the information from the schema."),
|
||||||
|
prompt: z.string().optional()
|
||||||
}).strict(strictMessage);
|
}).strict(strictMessage);
|
||||||
|
|
||||||
export type ExtractOptions = z.infer<typeof extractOptions>;
|
export type ExtractOptions = z.infer<typeof extractOptions>;
|
||||||
@ -316,6 +317,7 @@ export function legacyExtractorOptions(x: ExtractOptions): ExtractorOptions {
|
|||||||
mode: x.mode ? "llm-extraction" : "markdown",
|
mode: x.mode ? "llm-extraction" : "markdown",
|
||||||
extractionPrompt: x.prompt ?? "Based on the information on the page, extract the information from the schema.",
|
extractionPrompt: x.prompt ?? "Based on the information on the page, extract the information from the schema.",
|
||||||
extractionSchema: x.schema,
|
extractionSchema: x.schema,
|
||||||
|
userPrompt: x.prompt ?? "",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,7 +15,8 @@ export async function generateCompletions(
|
|||||||
// const schema = zodToJsonSchema(options.schema)
|
// const schema = zodToJsonSchema(options.schema)
|
||||||
|
|
||||||
const schema = extractionOptions.extractionSchema;
|
const schema = extractionOptions.extractionSchema;
|
||||||
const prompt = extractionOptions.extractionPrompt;
|
const systemPrompt = extractionOptions.extractionPrompt;
|
||||||
|
const prompt = extractionOptions.userPrompt;
|
||||||
|
|
||||||
const switchVariable = "openAI"; // Placholder, want to think more about how we abstract the model provider
|
const switchVariable = "openAI"; // Placholder, want to think more about how we abstract the model provider
|
||||||
|
|
||||||
@ -31,9 +32,11 @@ export async function generateCompletions(
|
|||||||
document: document,
|
document: document,
|
||||||
schema: schema,
|
schema: schema,
|
||||||
prompt: prompt,
|
prompt: prompt,
|
||||||
|
systemPrompt: systemPrompt,
|
||||||
mode: mode,
|
mode: mode,
|
||||||
});
|
});
|
||||||
// Validate the JSON output against the schema using AJV
|
// Validate the JSON output against the schema using AJV
|
||||||
|
if(schema){
|
||||||
const validate = ajv.compile(schema);
|
const validate = ajv.compile(schema);
|
||||||
if (!validate(completionResult.llm_extraction)) {
|
if (!validate(completionResult.llm_extraction)) {
|
||||||
//TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc.
|
//TODO: add Custom Error handling middleware that bubbles this up with proper Error code, etc.
|
||||||
@ -41,7 +44,8 @@ export async function generateCompletions(
|
|||||||
`JSON parsing error(s): ${validate.errors
|
`JSON parsing error(s): ${validate.errors
|
||||||
?.map((err) => err.message)
|
?.map((err) => err.message)
|
||||||
.join(", ")}\n\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.`
|
.join(", ")}\n\nLLM extraction did not match the extraction schema you provided. This could be because of a model hallucination, or an Error on our side. Try adjusting your prompt, and if it doesn't work reach out to support.`
|
||||||
);
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return completionResult;
|
return completionResult;
|
||||||
|
@ -16,7 +16,6 @@ function prepareOpenAIDoc(
|
|||||||
document: Document,
|
document: Document,
|
||||||
mode: "markdown" | "raw-html"
|
mode: "markdown" | "raw-html"
|
||||||
): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null {
|
): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null {
|
||||||
|
|
||||||
let markdown = document.markdown;
|
let markdown = document.markdown;
|
||||||
|
|
||||||
let extractionTarget = document.markdown;
|
let extractionTarget = document.markdown;
|
||||||
@ -25,7 +24,6 @@ function prepareOpenAIDoc(
|
|||||||
extractionTarget = document.rawHtml;
|
extractionTarget = document.rawHtml;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Check if the markdown content exists in the document
|
// Check if the markdown content exists in the document
|
||||||
if (!extractionTarget) {
|
if (!extractionTarget) {
|
||||||
return null;
|
return null;
|
||||||
@ -34,33 +32,32 @@ function prepareOpenAIDoc(
|
|||||||
// );
|
// );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// count number of tokens
|
// count number of tokens
|
||||||
const numTokens = numTokensFromString(extractionTarget, "gpt-4");
|
const numTokens = numTokensFromString(extractionTarget, "gpt-4");
|
||||||
|
|
||||||
if (numTokens > maxTokens) {
|
if (numTokens > maxTokens) {
|
||||||
// trim the document to the maximum number of tokens, tokens != characters
|
// trim the document to the maximum number of tokens, tokens != characters
|
||||||
extractionTarget = extractionTarget.slice(0, (maxTokens * modifier));
|
extractionTarget = extractionTarget.slice(0, maxTokens * modifier);
|
||||||
}
|
}
|
||||||
return [[{ type: "text", text: extractionTarget }], numTokens];
|
return [[{ type: "text", text: extractionTarget }], numTokens];
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function generateOpenAICompletions({
|
export async function generateOpenAICompletions({
|
||||||
client,
|
client,
|
||||||
model = process.env.MODEL_NAME || "gpt-4o",
|
model = process.env.MODEL_NAME || "gpt-4o-mini",
|
||||||
document,
|
document,
|
||||||
schema, //TODO - add zod dynamic type checking
|
schema, //TODO - add zod dynamic type checking
|
||||||
prompt = defaultPrompt,
|
systemPrompt = defaultPrompt,
|
||||||
|
prompt,
|
||||||
temperature,
|
temperature,
|
||||||
mode
|
mode,
|
||||||
}: {
|
}: {
|
||||||
client: OpenAI;
|
client: OpenAI;
|
||||||
model?: string;
|
model?: string;
|
||||||
document: Document;
|
document: Document;
|
||||||
schema: any; // This should be replaced with a proper Zod schema type when available
|
schema: any; // This should be replaced with a proper Zod schema type when available
|
||||||
prompt?: string;
|
prompt?: string;
|
||||||
|
systemPrompt?: string;
|
||||||
temperature?: number;
|
temperature?: number;
|
||||||
mode: "markdown" | "raw-html";
|
mode: "markdown" | "raw-html";
|
||||||
}): Promise<Document> {
|
}): Promise<Document> {
|
||||||
@ -70,44 +67,90 @@ export async function generateOpenAICompletions({
|
|||||||
if (preparedDoc === null) {
|
if (preparedDoc === null) {
|
||||||
return {
|
return {
|
||||||
...document,
|
...document,
|
||||||
warning: "LLM extraction was not performed since the document's content is empty or missing.",
|
warning:
|
||||||
|
"LLM extraction was not performed since the document's content is empty or missing.",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
const [content, numTokens] = preparedDoc;
|
const [content, numTokens] = preparedDoc;
|
||||||
|
|
||||||
const completion = await openai.chat.completions.create({
|
let completion;
|
||||||
model,
|
let llmExtraction;
|
||||||
messages: [
|
if (prompt && !schema) {
|
||||||
{
|
// If prompt is defined, ask OpenAI to generate a schema based on the prompt
|
||||||
role: "system",
|
// const schemaCompletion = await openai.chat.completions.create({
|
||||||
content: prompt,
|
// model,
|
||||||
},
|
// messages: [
|
||||||
{ role: "user", content },
|
// {
|
||||||
],
|
// role: "system",
|
||||||
tools: [
|
// content: "You are a helpful assistant that generates JSON schemas based on user prompts.",
|
||||||
{
|
// },
|
||||||
type: "function",
|
// {
|
||||||
function: {
|
// role: "user",
|
||||||
name: "extract_content",
|
// content: `Generate a JSON schema compatible with openai function calling based on this prompt: ${prompt}`,
|
||||||
description: "Extracts the content from the given webpage(s)",
|
// },
|
||||||
parameters: schema,
|
// ],
|
||||||
|
// temperature: 0,
|
||||||
|
// response_format: { type: "json_object" },
|
||||||
|
// });
|
||||||
|
|
||||||
|
// console.log(schemaCompletion.choices[0].message.content);
|
||||||
|
|
||||||
|
// const generatedSchema = JSON.parse(schemaCompletion.choices[0].message.content);
|
||||||
|
console.log(prompt);
|
||||||
|
const jsonCompletion = await openai.chat.completions.create({
|
||||||
|
model,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "system",
|
||||||
|
content: systemPrompt,
|
||||||
},
|
},
|
||||||
},
|
{ role: "user", content },
|
||||||
],
|
{ role: "user", content: `Transform the above content into structured json output based on the following user request: ${prompt}` },
|
||||||
tool_choice: { "type": "function", "function": {"name": "extract_content"}},
|
],
|
||||||
temperature,
|
response_format: { type: "json_object" },
|
||||||
});
|
temperature,
|
||||||
|
});
|
||||||
|
|
||||||
const c = completion.choices[0].message.tool_calls[0].function.arguments;
|
console.log(jsonCompletion.choices[0].message.content);
|
||||||
|
|
||||||
// Extract the LLM extraction content from the completion response
|
llmExtraction = JSON.parse(jsonCompletion.choices[0].message.content.trim());
|
||||||
const llmExtraction = JSON.parse(c);
|
console.log(llmExtraction);
|
||||||
|
} else {
|
||||||
|
completion = await openai.chat.completions.create({
|
||||||
|
model,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "system",
|
||||||
|
content: systemPrompt,
|
||||||
|
},
|
||||||
|
{ role: "user", content },
|
||||||
|
],
|
||||||
|
tools: [
|
||||||
|
{
|
||||||
|
type: "function",
|
||||||
|
function: {
|
||||||
|
name: "extract_content",
|
||||||
|
description: "Extracts the content from the given webpage(s)",
|
||||||
|
parameters: schema,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
tool_choice: { type: "function", function: { name: "extract_content" } },
|
||||||
|
temperature,
|
||||||
|
});
|
||||||
|
const c = completion.choices[0].message.tool_calls[0].function.arguments;
|
||||||
|
|
||||||
|
// Extract the LLM extraction content from the completion response
|
||||||
|
llmExtraction = JSON.parse(c);
|
||||||
|
}
|
||||||
|
|
||||||
// Return the document with the LLM extraction content added
|
// Return the document with the LLM extraction content added
|
||||||
return {
|
return {
|
||||||
...document,
|
...document,
|
||||||
llm_extraction: llmExtraction,
|
llm_extraction: llmExtraction,
|
||||||
warning: numTokens > maxTokens ? `Page was trimmed to fit the maximum token limit defined by the LLM model (Max: ${maxTokens} tokens, Attemped: ${numTokens} tokens). If results are not good, email us at help@mendable.ai so we can help you.` : undefined,
|
warning:
|
||||||
|
numTokens > maxTokens
|
||||||
|
? `Page was trimmed to fit the maximum token limit defined by the LLM model (Max: ${maxTokens} tokens, Attemped: ${numTokens} tokens). If results are not good, email us at help@mendable.ai so we can help you.`
|
||||||
|
: undefined,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ export type ExtractorOptions = {
|
|||||||
mode: "markdown" | "llm-extraction" | "llm-extraction-from-markdown" | "llm-extraction-from-raw-html";
|
mode: "markdown" | "llm-extraction" | "llm-extraction-from-markdown" | "llm-extraction-from-raw-html";
|
||||||
extractionPrompt?: string;
|
extractionPrompt?: string;
|
||||||
extractionSchema?: Record<string, any>;
|
extractionSchema?: Record<string, any>;
|
||||||
|
userPrompt?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type SearchOptions = {
|
export type SearchOptions = {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user