mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-01 20:30:44 +08:00
retry model
This commit is contained in:
parent
9fdbd3e4d6
commit
844749be6e
1
.gitignore
vendored
1
.gitignore
vendored
@ -40,3 +40,4 @@ apps/js-sdk/firecrawl/dist
|
|||||||
.vscode
|
.vscode
|
||||||
llm-links.txt
|
llm-links.txt
|
||||||
mapped-links.txt
|
mapped-links.txt
|
||||||
|
gke-key.json
|
@ -55,6 +55,7 @@
|
|||||||
"@ai-sdk/deepinfra": "^0.2.4",
|
"@ai-sdk/deepinfra": "^0.2.4",
|
||||||
"@ai-sdk/fireworks": "^0.2.4",
|
"@ai-sdk/fireworks": "^0.2.4",
|
||||||
"@ai-sdk/google": "^1.2.3",
|
"@ai-sdk/google": "^1.2.3",
|
||||||
|
"@ai-sdk/google-vertex": "^2.2.13",
|
||||||
"@ai-sdk/groq": "^1.2.1",
|
"@ai-sdk/groq": "^1.2.1",
|
||||||
"@ai-sdk/openai": "^1.1.13",
|
"@ai-sdk/openai": "^1.1.13",
|
||||||
"@anthropic-ai/sdk": "^0.24.3",
|
"@anthropic-ai/sdk": "^0.24.3",
|
||||||
|
182
apps/api/pnpm-lock.yaml
generated
182
apps/api/pnpm-lock.yaml
generated
@ -20,6 +20,9 @@ importers:
|
|||||||
'@ai-sdk/google':
|
'@ai-sdk/google':
|
||||||
specifier: ^1.2.3
|
specifier: ^1.2.3
|
||||||
version: 1.2.3(zod@3.24.2)
|
version: 1.2.3(zod@3.24.2)
|
||||||
|
'@ai-sdk/google-vertex':
|
||||||
|
specifier: ^2.2.13
|
||||||
|
version: 2.2.13(encoding@0.1.13)(zod@3.24.2)
|
||||||
'@ai-sdk/groq':
|
'@ai-sdk/groq':
|
||||||
specifier: ^1.2.1
|
specifier: ^1.2.1
|
||||||
version: 1.2.1(zod@3.24.2)
|
version: 1.2.1(zod@3.24.2)
|
||||||
@ -348,6 +351,12 @@ packages:
|
|||||||
peerDependencies:
|
peerDependencies:
|
||||||
zod: ^3.0.0
|
zod: ^3.0.0
|
||||||
|
|
||||||
|
'@ai-sdk/anthropic@1.2.8':
|
||||||
|
resolution: {integrity: sha512-UchR+xGtTASkHyJ5uX4jVJGfxo2SCoqxtHJQzuO8INYEbpFwa9cUD06934tNqICxsNOhupS9y4b1sd/Uep6OXw==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.0.0
|
||||||
|
|
||||||
'@ai-sdk/deepinfra@0.2.4':
|
'@ai-sdk/deepinfra@0.2.4':
|
||||||
resolution: {integrity: sha512-JBF3tUOLYgQDCwkvN9I5ZbSqsAxTJWOKmIpyJXJl5RpLXOEviJUqpKSZufs11J9S4Z0U9vZX9jfhO1+DBjS56w==}
|
resolution: {integrity: sha512-JBF3tUOLYgQDCwkvN9I5ZbSqsAxTJWOKmIpyJXJl5RpLXOEviJUqpKSZufs11J9S4Z0U9vZX9jfhO1+DBjS56w==}
|
||||||
engines: {node: '>=18'}
|
engines: {node: '>=18'}
|
||||||
@ -360,6 +369,18 @@ packages:
|
|||||||
peerDependencies:
|
peerDependencies:
|
||||||
zod: ^3.0.0
|
zod: ^3.0.0
|
||||||
|
|
||||||
|
'@ai-sdk/google-vertex@2.2.13':
|
||||||
|
resolution: {integrity: sha512-ddtrFYQnPRwr5/Cf9tIJ7Q/kAlYOtxA1xk3ZFnnfEjOArHAb+3KcLeanBSGpd+xuPm9nwK0DgYlm3lElGNjRIg==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.0.0
|
||||||
|
|
||||||
|
'@ai-sdk/google@1.2.10':
|
||||||
|
resolution: {integrity: sha512-YmZ9DIO6Un0+RU9PtjM9TfoExmUQg2fk8vTlwT+NOaARyhv8eskRCUTne0zf5uUOazPIJuBEv2I6YE9XnS+tUg==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.0.0
|
||||||
|
|
||||||
'@ai-sdk/google@1.2.3':
|
'@ai-sdk/google@1.2.3':
|
||||||
resolution: {integrity: sha512-zsgwko7T+MFIdEfhg4fIXv6O2dnzTLFr6BOpAA21eo/moOBA5szVzOto1jTwIwoBYsF2ixPGNZBoc+k/fQ2AWw==}
|
resolution: {integrity: sha512-zsgwko7T+MFIdEfhg4fIXv6O2dnzTLFr6BOpAA21eo/moOBA5szVzOto1jTwIwoBYsF2ixPGNZBoc+k/fQ2AWw==}
|
||||||
engines: {node: '>=18'}
|
engines: {node: '>=18'}
|
||||||
@ -414,6 +435,12 @@ packages:
|
|||||||
peerDependencies:
|
peerDependencies:
|
||||||
zod: ^3.23.8
|
zod: ^3.23.8
|
||||||
|
|
||||||
|
'@ai-sdk/provider-utils@2.2.6':
|
||||||
|
resolution: {integrity: sha512-sUlZ7Gnq84DCGWMQRIK8XVbkzIBnvPR1diV4v6JwPgpn5armnLI/j+rqn62MpLrU5ZCQZlDKl/Lw6ed3ulYqaA==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
zod: ^3.23.8
|
||||||
|
|
||||||
'@ai-sdk/provider@1.0.8':
|
'@ai-sdk/provider@1.0.8':
|
||||||
resolution: {integrity: sha512-f9jSYwKMdXvm44Dmab1vUBnfCDSFfI5rOtvV1W9oKB7WYHR5dGvCC6x68Mk3NUfrdmNoMVHGoh6JT9HCVMlMow==}
|
resolution: {integrity: sha512-f9jSYwKMdXvm44Dmab1vUBnfCDSFfI5rOtvV1W9oKB7WYHR5dGvCC6x68Mk3NUfrdmNoMVHGoh6JT9HCVMlMow==}
|
||||||
engines: {node: '>=18'}
|
engines: {node: '>=18'}
|
||||||
@ -426,6 +453,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-0M+qjp+clUD0R1E5eWQFhxEvWLNaOtGQRUaBn8CUABnSKredagq92hUS9VjOzGsTm37xLfpaxl97AVtbeOsHew==}
|
resolution: {integrity: sha512-0M+qjp+clUD0R1E5eWQFhxEvWLNaOtGQRUaBn8CUABnSKredagq92hUS9VjOzGsTm37xLfpaxl97AVtbeOsHew==}
|
||||||
engines: {node: '>=18'}
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
|
'@ai-sdk/provider@1.1.2':
|
||||||
|
resolution: {integrity: sha512-ITdgNilJZwLKR7X5TnUr1BsQW6UTX5yFp0h66Nfx8XjBYkWD9W3yugr50GOz3CnE9m/U/Cd5OyEbTMI0rgi6ZQ==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
'@ai-sdk/react@1.2.5':
|
'@ai-sdk/react@1.2.5':
|
||||||
resolution: {integrity: sha512-0jOop3S2WkDOdO4X5I+5fTGqZlNX8/h1T1eYokpkR9xh8Vmrxqw8SsovqGvrddTsZykH8uXRsvI+G4FTyy894A==}
|
resolution: {integrity: sha512-0jOop3S2WkDOdO4X5I+5fTGqZlNX8/h1T1eYokpkR9xh8Vmrxqw8SsovqGvrddTsZykH8uXRsvI+G4FTyy894A==}
|
||||||
engines: {node: '>=18'}
|
engines: {node: '>=18'}
|
||||||
@ -2033,6 +2064,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==}
|
resolution: {integrity: sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==}
|
||||||
engines: {node: '>=10.0.0'}
|
engines: {node: '>=10.0.0'}
|
||||||
|
|
||||||
|
bignumber.js@9.2.0:
|
||||||
|
resolution: {integrity: sha512-JocpCSOixzy5XFJi2ub6IMmV/G9i8Lrm2lZvwBv9xPdglmZM0ufDVBbjbrfU/zuLvBfD7Bv2eYxz9i+OHTgkew==}
|
||||||
|
|
||||||
bin-links@4.0.4:
|
bin-links@4.0.4:
|
||||||
resolution: {integrity: sha512-cMtq4W5ZsEwcutJrVId+a/tjt8GSbS+h0oNkdl6+6rBuEv8Ot33Bevj5KPm40t309zuhVic8NjpuL42QCiJWWA==}
|
resolution: {integrity: sha512-cMtq4W5ZsEwcutJrVId+a/tjt8GSbS+h0oNkdl6+6rBuEv8Ot33Bevj5KPm40t309zuhVic8NjpuL42QCiJWWA==}
|
||||||
engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
|
engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
|
||||||
@ -2089,6 +2123,9 @@ packages:
|
|||||||
buffer-crc32@0.2.13:
|
buffer-crc32@0.2.13:
|
||||||
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
|
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
|
||||||
|
|
||||||
|
buffer-equal-constant-time@1.0.1:
|
||||||
|
resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==}
|
||||||
|
|
||||||
buffer-from@1.1.2:
|
buffer-from@1.1.2:
|
||||||
resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==}
|
resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==}
|
||||||
|
|
||||||
@ -2477,6 +2514,9 @@ packages:
|
|||||||
eastasianwidth@0.2.0:
|
eastasianwidth@0.2.0:
|
||||||
resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==}
|
resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==}
|
||||||
|
|
||||||
|
ecdsa-sig-formatter@1.0.11:
|
||||||
|
resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==}
|
||||||
|
|
||||||
editorconfig@1.0.4:
|
editorconfig@1.0.4:
|
||||||
resolution: {integrity: sha512-L9Qe08KWTlqYMVvMcTIvMAdl1cDUubzRNYL+WfA4bLDMHe4nemKkpmYzkznE1FwLKu0EEmy6obgQKzMJrg4x9Q==}
|
resolution: {integrity: sha512-L9Qe08KWTlqYMVvMcTIvMAdl1cDUubzRNYL+WfA4bLDMHe4nemKkpmYzkznE1FwLKu0EEmy6obgQKzMJrg4x9Q==}
|
||||||
engines: {node: '>=14'}
|
engines: {node: '>=14'}
|
||||||
@ -2615,6 +2655,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==}
|
resolution: {integrity: sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==}
|
||||||
engines: {node: '>= 0.10.0'}
|
engines: {node: '>= 0.10.0'}
|
||||||
|
|
||||||
|
extend@3.0.2:
|
||||||
|
resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==}
|
||||||
|
|
||||||
extract-zip@2.0.1:
|
extract-zip@2.0.1:
|
||||||
resolution: {integrity: sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==}
|
resolution: {integrity: sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==}
|
||||||
engines: {node: '>= 10.17.0'}
|
engines: {node: '>= 10.17.0'}
|
||||||
@ -2740,6 +2783,14 @@ packages:
|
|||||||
function-bind@1.1.2:
|
function-bind@1.1.2:
|
||||||
resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
|
resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
|
||||||
|
|
||||||
|
gaxios@6.7.1:
|
||||||
|
resolution: {integrity: sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==}
|
||||||
|
engines: {node: '>=14'}
|
||||||
|
|
||||||
|
gcp-metadata@6.1.1:
|
||||||
|
resolution: {integrity: sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==}
|
||||||
|
engines: {node: '>=14'}
|
||||||
|
|
||||||
generic-pool@3.9.0:
|
generic-pool@3.9.0:
|
||||||
resolution: {integrity: sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g==}
|
resolution: {integrity: sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g==}
|
||||||
engines: {node: '>= 4'}
|
engines: {node: '>= 4'}
|
||||||
@ -2789,6 +2840,14 @@ packages:
|
|||||||
resolution: {integrity: sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==}
|
resolution: {integrity: sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==}
|
||||||
engines: {node: '>=4'}
|
engines: {node: '>=4'}
|
||||||
|
|
||||||
|
google-auth-library@9.15.1:
|
||||||
|
resolution: {integrity: sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==}
|
||||||
|
engines: {node: '>=14'}
|
||||||
|
|
||||||
|
google-logging-utils@0.0.2:
|
||||||
|
resolution: {integrity: sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==}
|
||||||
|
engines: {node: '>=14'}
|
||||||
|
|
||||||
gopd@1.0.1:
|
gopd@1.0.1:
|
||||||
resolution: {integrity: sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==}
|
resolution: {integrity: sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==}
|
||||||
|
|
||||||
@ -2799,6 +2858,10 @@ packages:
|
|||||||
graceful-fs@4.2.11:
|
graceful-fs@4.2.11:
|
||||||
resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
|
resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
|
||||||
|
|
||||||
|
gtoken@7.1.0:
|
||||||
|
resolution: {integrity: sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==}
|
||||||
|
engines: {node: '>=14.0.0'}
|
||||||
|
|
||||||
handlebars@4.7.8:
|
handlebars@4.7.8:
|
||||||
resolution: {integrity: sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==}
|
resolution: {integrity: sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==}
|
||||||
engines: {node: '>=0.4.7'}
|
engines: {node: '>=0.4.7'}
|
||||||
@ -3229,6 +3292,9 @@ packages:
|
|||||||
engines: {node: '>=4'}
|
engines: {node: '>=4'}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
|
json-bigint@1.0.0:
|
||||||
|
resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==}
|
||||||
|
|
||||||
json-parse-even-better-errors@2.3.1:
|
json-parse-even-better-errors@2.3.1:
|
||||||
resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==}
|
resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==}
|
||||||
|
|
||||||
@ -3262,6 +3328,12 @@ packages:
|
|||||||
jszip@3.10.1:
|
jszip@3.10.1:
|
||||||
resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==}
|
resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==}
|
||||||
|
|
||||||
|
jwa@2.0.0:
|
||||||
|
resolution: {integrity: sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==}
|
||||||
|
|
||||||
|
jws@4.0.0:
|
||||||
|
resolution: {integrity: sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==}
|
||||||
|
|
||||||
kareem@2.6.3:
|
kareem@2.6.3:
|
||||||
resolution: {integrity: sha512-C3iHfuGUXK2u8/ipq9LfjFfXFxAZMQJJq7vLS45r3D9Y2xQ/m4S8zaR4zMLFWh9AsNPXmcFfUDhTEO8UIC/V6Q==}
|
resolution: {integrity: sha512-C3iHfuGUXK2u8/ipq9LfjFfXFxAZMQJJq7vLS45r3D9Y2xQ/m4S8zaR4zMLFWh9AsNPXmcFfUDhTEO8UIC/V6Q==}
|
||||||
engines: {node: '>=12.0.0'}
|
engines: {node: '>=12.0.0'}
|
||||||
@ -4666,8 +4738,8 @@ packages:
|
|||||||
engines: {node: '>=14.17'}
|
engines: {node: '>=14.17'}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
typescript@5.8.2:
|
typescript@5.8.3:
|
||||||
resolution: {integrity: sha512-aJn6wq13/afZp/jT9QZmwEjDqqvSGp1VT5GVg+f/t6/oVyrgXM6BY1h9BRh/O5p3PlUPAe+WuiEZOmb/49RqoQ==}
|
resolution: {integrity: sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==}
|
||||||
engines: {node: '>=14.17'}
|
engines: {node: '>=14.17'}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
@ -4966,6 +5038,12 @@ snapshots:
|
|||||||
'@ai-sdk/provider-utils': 2.2.3(zod@3.24.2)
|
'@ai-sdk/provider-utils': 2.2.3(zod@3.24.2)
|
||||||
zod: 3.24.2
|
zod: 3.24.2
|
||||||
|
|
||||||
|
'@ai-sdk/anthropic@1.2.8(zod@3.24.2)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 1.1.2
|
||||||
|
'@ai-sdk/provider-utils': 2.2.6(zod@3.24.2)
|
||||||
|
zod: 3.24.2
|
||||||
|
|
||||||
'@ai-sdk/deepinfra@0.2.4(zod@3.24.2)':
|
'@ai-sdk/deepinfra@0.2.4(zod@3.24.2)':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@ai-sdk/openai-compatible': 0.2.4(zod@3.24.2)
|
'@ai-sdk/openai-compatible': 0.2.4(zod@3.24.2)
|
||||||
@ -4980,6 +5058,24 @@ snapshots:
|
|||||||
'@ai-sdk/provider-utils': 2.2.3(zod@3.24.2)
|
'@ai-sdk/provider-utils': 2.2.3(zod@3.24.2)
|
||||||
zod: 3.24.2
|
zod: 3.24.2
|
||||||
|
|
||||||
|
'@ai-sdk/google-vertex@2.2.13(encoding@0.1.13)(zod@3.24.2)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/anthropic': 1.2.8(zod@3.24.2)
|
||||||
|
'@ai-sdk/google': 1.2.10(zod@3.24.2)
|
||||||
|
'@ai-sdk/provider': 1.1.2
|
||||||
|
'@ai-sdk/provider-utils': 2.2.6(zod@3.24.2)
|
||||||
|
google-auth-library: 9.15.1(encoding@0.1.13)
|
||||||
|
zod: 3.24.2
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- encoding
|
||||||
|
- supports-color
|
||||||
|
|
||||||
|
'@ai-sdk/google@1.2.10(zod@3.24.2)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 1.1.2
|
||||||
|
'@ai-sdk/provider-utils': 2.2.6(zod@3.24.2)
|
||||||
|
zod: 3.24.2
|
||||||
|
|
||||||
'@ai-sdk/google@1.2.3(zod@3.24.2)':
|
'@ai-sdk/google@1.2.3(zod@3.24.2)':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@ai-sdk/provider': 1.1.0
|
'@ai-sdk/provider': 1.1.0
|
||||||
@ -5036,6 +5132,13 @@ snapshots:
|
|||||||
secure-json-parse: 2.7.0
|
secure-json-parse: 2.7.0
|
||||||
zod: 3.24.2
|
zod: 3.24.2
|
||||||
|
|
||||||
|
'@ai-sdk/provider-utils@2.2.6(zod@3.24.2)':
|
||||||
|
dependencies:
|
||||||
|
'@ai-sdk/provider': 1.1.2
|
||||||
|
nanoid: 3.3.8
|
||||||
|
secure-json-parse: 2.7.0
|
||||||
|
zod: 3.24.2
|
||||||
|
|
||||||
'@ai-sdk/provider@1.0.8':
|
'@ai-sdk/provider@1.0.8':
|
||||||
dependencies:
|
dependencies:
|
||||||
json-schema: 0.4.0
|
json-schema: 0.4.0
|
||||||
@ -5048,6 +5151,10 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
json-schema: 0.4.0
|
json-schema: 0.4.0
|
||||||
|
|
||||||
|
'@ai-sdk/provider@1.1.2':
|
||||||
|
dependencies:
|
||||||
|
json-schema: 0.4.0
|
||||||
|
|
||||||
'@ai-sdk/react@1.2.5(react@18.3.1)(zod@3.24.2)':
|
'@ai-sdk/react@1.2.5(react@18.3.1)(zod@3.24.2)':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@ai-sdk/provider-utils': 2.2.3(zod@3.24.2)
|
'@ai-sdk/provider-utils': 2.2.3(zod@3.24.2)
|
||||||
@ -7448,6 +7555,8 @@ snapshots:
|
|||||||
|
|
||||||
basic-ftp@5.0.5: {}
|
basic-ftp@5.0.5: {}
|
||||||
|
|
||||||
|
bignumber.js@9.2.0: {}
|
||||||
|
|
||||||
bin-links@4.0.4:
|
bin-links@4.0.4:
|
||||||
dependencies:
|
dependencies:
|
||||||
cmd-shim: 6.0.3
|
cmd-shim: 6.0.3
|
||||||
@ -7516,6 +7625,8 @@ snapshots:
|
|||||||
|
|
||||||
buffer-crc32@0.2.13: {}
|
buffer-crc32@0.2.13: {}
|
||||||
|
|
||||||
|
buffer-equal-constant-time@1.0.1: {}
|
||||||
|
|
||||||
buffer-from@1.1.2: {}
|
buffer-from@1.1.2: {}
|
||||||
|
|
||||||
buffer@5.7.1:
|
buffer@5.7.1:
|
||||||
@ -7899,6 +8010,10 @@ snapshots:
|
|||||||
|
|
||||||
eastasianwidth@0.2.0: {}
|
eastasianwidth@0.2.0: {}
|
||||||
|
|
||||||
|
ecdsa-sig-formatter@1.0.11:
|
||||||
|
dependencies:
|
||||||
|
safe-buffer: 5.2.1
|
||||||
|
|
||||||
editorconfig@1.0.4:
|
editorconfig@1.0.4:
|
||||||
dependencies:
|
dependencies:
|
||||||
'@one-ini/wasm': 0.1.1
|
'@one-ini/wasm': 0.1.1
|
||||||
@ -8048,6 +8163,8 @@ snapshots:
|
|||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- supports-color
|
- supports-color
|
||||||
|
|
||||||
|
extend@3.0.2: {}
|
||||||
|
|
||||||
extract-zip@2.0.1:
|
extract-zip@2.0.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
debug: 4.3.5
|
debug: 4.3.5
|
||||||
@ -8174,6 +8291,26 @@ snapshots:
|
|||||||
|
|
||||||
function-bind@1.1.2: {}
|
function-bind@1.1.2: {}
|
||||||
|
|
||||||
|
gaxios@6.7.1(encoding@0.1.13):
|
||||||
|
dependencies:
|
||||||
|
extend: 3.0.2
|
||||||
|
https-proxy-agent: 7.0.6
|
||||||
|
is-stream: 2.0.1
|
||||||
|
node-fetch: 2.7.0(encoding@0.1.13)
|
||||||
|
uuid: 9.0.1
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- encoding
|
||||||
|
- supports-color
|
||||||
|
|
||||||
|
gcp-metadata@6.1.1(encoding@0.1.13):
|
||||||
|
dependencies:
|
||||||
|
gaxios: 6.7.1(encoding@0.1.13)
|
||||||
|
google-logging-utils: 0.0.2
|
||||||
|
json-bigint: 1.0.0
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- encoding
|
||||||
|
- supports-color
|
||||||
|
|
||||||
generic-pool@3.9.0: {}
|
generic-pool@3.9.0: {}
|
||||||
|
|
||||||
gensync@1.0.0-beta.2: {}
|
gensync@1.0.0-beta.2: {}
|
||||||
@ -8229,6 +8366,20 @@ snapshots:
|
|||||||
|
|
||||||
globals@11.12.0: {}
|
globals@11.12.0: {}
|
||||||
|
|
||||||
|
google-auth-library@9.15.1(encoding@0.1.13):
|
||||||
|
dependencies:
|
||||||
|
base64-js: 1.5.1
|
||||||
|
ecdsa-sig-formatter: 1.0.11
|
||||||
|
gaxios: 6.7.1(encoding@0.1.13)
|
||||||
|
gcp-metadata: 6.1.1(encoding@0.1.13)
|
||||||
|
gtoken: 7.1.0(encoding@0.1.13)
|
||||||
|
jws: 4.0.0
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- encoding
|
||||||
|
- supports-color
|
||||||
|
|
||||||
|
google-logging-utils@0.0.2: {}
|
||||||
|
|
||||||
gopd@1.0.1:
|
gopd@1.0.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
get-intrinsic: 1.2.4
|
get-intrinsic: 1.2.4
|
||||||
@ -8239,6 +8390,14 @@ snapshots:
|
|||||||
|
|
||||||
graceful-fs@4.2.11: {}
|
graceful-fs@4.2.11: {}
|
||||||
|
|
||||||
|
gtoken@7.1.0(encoding@0.1.13):
|
||||||
|
dependencies:
|
||||||
|
gaxios: 6.7.1(encoding@0.1.13)
|
||||||
|
jws: 4.0.0
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- encoding
|
||||||
|
- supports-color
|
||||||
|
|
||||||
handlebars@4.7.8:
|
handlebars@4.7.8:
|
||||||
dependencies:
|
dependencies:
|
||||||
minimist: 1.2.8
|
minimist: 1.2.8
|
||||||
@ -8921,6 +9080,10 @@ snapshots:
|
|||||||
|
|
||||||
jsesc@2.5.2: {}
|
jsesc@2.5.2: {}
|
||||||
|
|
||||||
|
json-bigint@1.0.0:
|
||||||
|
dependencies:
|
||||||
|
bignumber.js: 9.2.0
|
||||||
|
|
||||||
json-parse-even-better-errors@2.3.1: {}
|
json-parse-even-better-errors@2.3.1: {}
|
||||||
|
|
||||||
json-schema-to-zod@2.3.0: {}
|
json-schema-to-zod@2.3.0: {}
|
||||||
@ -8952,6 +9115,17 @@ snapshots:
|
|||||||
readable-stream: 2.3.8
|
readable-stream: 2.3.8
|
||||||
setimmediate: 1.0.5
|
setimmediate: 1.0.5
|
||||||
|
|
||||||
|
jwa@2.0.0:
|
||||||
|
dependencies:
|
||||||
|
buffer-equal-constant-time: 1.0.1
|
||||||
|
ecdsa-sig-formatter: 1.0.11
|
||||||
|
safe-buffer: 5.2.1
|
||||||
|
|
||||||
|
jws@4.0.0:
|
||||||
|
dependencies:
|
||||||
|
jwa: 2.0.0
|
||||||
|
safe-buffer: 5.2.1
|
||||||
|
|
||||||
kareem@2.6.3: {}
|
kareem@2.6.3: {}
|
||||||
|
|
||||||
keyword-extractor@0.0.28: {}
|
keyword-extractor@0.0.28: {}
|
||||||
@ -9663,7 +9837,7 @@ snapshots:
|
|||||||
csv-parse: 5.5.6
|
csv-parse: 5.5.6
|
||||||
gpt3-tokenizer: 1.1.5
|
gpt3-tokenizer: 1.1.5
|
||||||
openai: 3.3.0
|
openai: 3.3.0
|
||||||
typescript: 5.8.2
|
typescript: 5.8.3
|
||||||
uuid: 9.0.1
|
uuid: 9.0.1
|
||||||
zod: 3.24.2
|
zod: 3.24.2
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
@ -10277,7 +10451,7 @@ snapshots:
|
|||||||
|
|
||||||
typescript@5.4.5: {}
|
typescript@5.4.5: {}
|
||||||
|
|
||||||
typescript@5.8.2: {}
|
typescript@5.8.3: {}
|
||||||
|
|
||||||
typesense@1.8.2(@babel/runtime@7.24.6):
|
typesense@1.8.2(@babel/runtime@7.24.6):
|
||||||
dependencies:
|
dependencies:
|
||||||
|
@ -314,42 +314,7 @@ export async function rerankLinksWithLLM(
|
|||||||
});
|
});
|
||||||
|
|
||||||
completion = await completionPromise;
|
completion = await completionPromise;
|
||||||
// completion = await Promise.race([
|
|
||||||
// completionPromise,
|
|
||||||
// timeoutPromise,
|
|
||||||
// ]);
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!˜",
|
|
||||||
);
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
console.log({ completion });
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn(
|
console.warn(
|
||||||
`Error processing chunk ${chunkIndex + 1} attempt ${retry + 1}:`,
|
`Error processing chunk ${chunkIndex + 1} attempt ${retry + 1}:`,
|
||||||
|
@ -6,6 +6,8 @@ import { google } from "@ai-sdk/google";
|
|||||||
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
|
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
|
||||||
import { fireworks } from "@ai-sdk/fireworks";
|
import { fireworks } from "@ai-sdk/fireworks";
|
||||||
import { deepinfra } from "@ai-sdk/deepinfra";
|
import { deepinfra } from "@ai-sdk/deepinfra";
|
||||||
|
import { createVertex } from "@ai-sdk/google-vertex";
|
||||||
|
|
||||||
type Provider =
|
type Provider =
|
||||||
| "openai"
|
| "openai"
|
||||||
| "ollama"
|
| "ollama"
|
||||||
@ -14,7 +16,8 @@ type Provider =
|
|||||||
| "google"
|
| "google"
|
||||||
| "openrouter"
|
| "openrouter"
|
||||||
| "fireworks"
|
| "fireworks"
|
||||||
| "deepinfra";
|
| "deepinfra"
|
||||||
|
| "vertex";
|
||||||
const defaultProvider: Provider = process.env.OLLAMA_BASE_URL
|
const defaultProvider: Provider = process.env.OLLAMA_BASE_URL
|
||||||
? "ollama"
|
? "ollama"
|
||||||
: "openai";
|
: "openai";
|
||||||
@ -32,6 +35,13 @@ const providerList: Record<Provider, any> = {
|
|||||||
}),
|
}),
|
||||||
fireworks, //FIREWORKS_API_KEY
|
fireworks, //FIREWORKS_API_KEY
|
||||||
deepinfra, //DEEPINFRA_API_KEY
|
deepinfra, //DEEPINFRA_API_KEY
|
||||||
|
vertex: createVertex({
|
||||||
|
project: "firecrawl",
|
||||||
|
location: "us-central1",
|
||||||
|
googleAuthOptions: {
|
||||||
|
keyFile: "./gke-key.json",
|
||||||
|
},
|
||||||
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
export function getModel(name: string, provider: Provider = defaultProvider) {
|
export function getModel(name: string, provider: Provider = defaultProvider) {
|
||||||
|
@ -7,6 +7,7 @@ import {
|
|||||||
import { smartScrape } from "./smartScrape";
|
import { smartScrape } from "./smartScrape";
|
||||||
import { parseMarkdown } from "../../../lib/html-to-markdown";
|
import { parseMarkdown } from "../../../lib/html-to-markdown";
|
||||||
import { getModel } from "../../../lib/generic-ai";
|
import { getModel } from "../../../lib/generic-ai";
|
||||||
|
import { TokenUsage } from "../../../controllers/v1/types";
|
||||||
|
|
||||||
const commonSmartScrapeProperties = {
|
const commonSmartScrapeProperties = {
|
||||||
shouldUseSmartscrape: {
|
shouldUseSmartscrape: {
|
||||||
@ -184,6 +185,9 @@ export async function extractData({
|
|||||||
extractOptions: GenerateCompletionsOptions;
|
extractOptions: GenerateCompletionsOptions;
|
||||||
urls: string[];
|
urls: string[];
|
||||||
}): Promise<{ extractedDataArray: any[]; warning: any }> {
|
}): Promise<{ extractedDataArray: any[]; warning: any }> {
|
||||||
|
// TODO: receive from user
|
||||||
|
const useSmartScrape = true;
|
||||||
|
|
||||||
//WRAP SCHEMA
|
//WRAP SCHEMA
|
||||||
const schema = extractOptions.options.schema;
|
const schema = extractOptions.options.schema;
|
||||||
const logger = extractOptions.logger;
|
const logger = extractOptions.logger;
|
||||||
@ -197,26 +201,23 @@ export async function extractData({
|
|||||||
console.log("schema", schema);
|
console.log("schema", schema);
|
||||||
console.log("schemaToUse", schemaToUse);
|
console.log("schemaToUse", schemaToUse);
|
||||||
|
|
||||||
let extract, warning, totalUsage;
|
let extract: any,
|
||||||
|
warning: string | undefined,
|
||||||
|
totalUsage: TokenUsage | undefined;
|
||||||
|
|
||||||
|
// checks if using smartScrape is needed for this case
|
||||||
try {
|
try {
|
||||||
const { extract: e, warning: w, totalUsage: t } = await generateCompletions(
|
const { extract: e, warning: w, totalUsage: t } = await generateCompletions(
|
||||||
{ ...extractOptionsNewSchema, model: getModel("gemini-2.5-pro-exp-03-25", "google") }
|
{ ...extractOptionsNewSchema,
|
||||||
);
|
model: getModel("gemini-2.5-pro-preview-03-25", "vertex"),
|
||||||
|
retryModel: getModel("o3-mini", "openai"),
|
||||||
|
});
|
||||||
extract = e;
|
extract = e;
|
||||||
warning = w;
|
warning = w;
|
||||||
totalUsage = t;
|
totalUsage = t;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log("failed during extractSmartScrape.ts:generateCompletions", error);
|
console.log("failed during extractSmartScrape.ts:generateCompletions", error);
|
||||||
}
|
}
|
||||||
console.log("extract", extract);
|
|
||||||
|
|
||||||
// const {
|
|
||||||
// extractedData,
|
|
||||||
// shouldUseSmartscrape,
|
|
||||||
// smartscrape_reasoning,
|
|
||||||
// smartscrape_prompt,
|
|
||||||
// } = processSmartScrapeResult(extract, logger);
|
|
||||||
|
|
||||||
let extractedData = extract?.extractedData;
|
let extractedData = extract?.extractedData;
|
||||||
|
|
||||||
@ -224,7 +225,7 @@ export async function extractData({
|
|||||||
console.log("smartscrape_reasoning", extract?.smartscrape_reasoning);
|
console.log("smartscrape_reasoning", extract?.smartscrape_reasoning);
|
||||||
console.log("smartscrape_prompt", extract?.smartscrape_prompt);
|
console.log("smartscrape_prompt", extract?.smartscrape_prompt);
|
||||||
try {
|
try {
|
||||||
if (extract?.shouldUseSmartscrape) {
|
if (useSmartScrape && extract?.shouldUseSmartscrape) {
|
||||||
let smartscrapeResults;
|
let smartscrapeResults;
|
||||||
if (isSingleUrl) {
|
if (isSingleUrl) {
|
||||||
smartscrapeResults = [
|
smartscrapeResults = [
|
||||||
|
@ -55,14 +55,16 @@ export async function smartScrape(
|
|||||||
prompt,
|
prompt,
|
||||||
models: {
|
models: {
|
||||||
thinkingModel: {
|
thinkingModel: {
|
||||||
model: "gemini-2.5-pro-exp-03-25",
|
model: "gemini-2.5-pro-preview-03-25",
|
||||||
provider: "google",
|
provider: "vertex",
|
||||||
supportTools: true,
|
supportTools: true,
|
||||||
toolChoice: "required",
|
toolChoice: "required",
|
||||||
cost: {
|
cost: {
|
||||||
input: 1.3,
|
input: 1.3,
|
||||||
output: 5,
|
output: 5,
|
||||||
},
|
},
|
||||||
|
retryModel: "qwen-qwq-32b",
|
||||||
|
retryProvider: "groq"
|
||||||
},
|
},
|
||||||
toolModel: {
|
toolModel: {
|
||||||
model: "gemini-2.0-flash",
|
model: "gemini-2.0-flash",
|
||||||
|
@ -9,7 +9,7 @@ import { Logger } from "winston";
|
|||||||
import { EngineResultsTracker, Meta } from "..";
|
import { EngineResultsTracker, Meta } from "..";
|
||||||
import { logger } from "../../../lib/logger";
|
import { logger } from "../../../lib/logger";
|
||||||
import { modelPrices } from "../../../lib/extract/usage/model-prices";
|
import { modelPrices } from "../../../lib/extract/usage/model-prices";
|
||||||
import { generateObject, generateText, LanguageModel } from "ai";
|
import { generateObject, generateText, LanguageModel, NoObjectGeneratedError } from "ai";
|
||||||
import { jsonSchema } from "ai";
|
import { jsonSchema } from "ai";
|
||||||
import { getModel } from "../../../lib/generic-ai";
|
import { getModel } from "../../../lib/generic-ai";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
@ -178,6 +178,7 @@ export type GenerateCompletionsOptions = {
|
|||||||
isExtractEndpoint?: boolean;
|
isExtractEndpoint?: boolean;
|
||||||
mode?: "object" | "no-object";
|
mode?: "object" | "no-object";
|
||||||
providerOptions?: LanguageModelV1ProviderMetadata;
|
providerOptions?: LanguageModelV1ProviderMetadata;
|
||||||
|
retryModel?: LanguageModel;
|
||||||
};
|
};
|
||||||
export async function generateCompletions({
|
export async function generateCompletions({
|
||||||
logger,
|
logger,
|
||||||
@ -188,6 +189,7 @@ export async function generateCompletions({
|
|||||||
model = getModel("gpt-4o-mini"),
|
model = getModel("gpt-4o-mini"),
|
||||||
mode = "object",
|
mode = "object",
|
||||||
providerOptions,
|
providerOptions,
|
||||||
|
retryModel = getModel("claude-3-7-sonnet-latest", "anthropic"),
|
||||||
}: GenerateCompletionsOptions): Promise<{
|
}: GenerateCompletionsOptions): Promise<{
|
||||||
extract: any;
|
extract: any;
|
||||||
numTokens: number;
|
numTokens: number;
|
||||||
@ -197,12 +199,14 @@ export async function generateCompletions({
|
|||||||
}> {
|
}> {
|
||||||
let extract: any;
|
let extract: any;
|
||||||
let warning: string | undefined;
|
let warning: string | undefined;
|
||||||
|
let currentModel = model;
|
||||||
|
let lastError: Error | null = null;
|
||||||
|
|
||||||
if (markdown === undefined) {
|
if (markdown === undefined) {
|
||||||
throw new Error("document.markdown is undefined -- this is unexpected");
|
throw new Error("document.markdown is undefined -- this is unexpected");
|
||||||
}
|
}
|
||||||
|
|
||||||
const { maxInputTokens, maxOutputTokens } = getModelLimits(model.modelId);
|
const { maxInputTokens, maxOutputTokens } = getModelLimits(currentModel.modelId);
|
||||||
// Calculate 80% of max input tokens (for content)
|
// Calculate 80% of max input tokens (for content)
|
||||||
const maxTokensSafe = Math.floor(maxInputTokens * 0.8);
|
const maxTokensSafe = Math.floor(maxInputTokens * 0.8);
|
||||||
|
|
||||||
@ -224,31 +228,72 @@ export async function generateCompletions({
|
|||||||
: `Transform the following content into structured JSON output based on the provided schema if any.\n\n${markdown}`;
|
: `Transform the following content into structured JSON output based on the provided schema if any.\n\n${markdown}`;
|
||||||
|
|
||||||
if (mode === "no-object") {
|
if (mode === "no-object") {
|
||||||
const result = await generateText({
|
try {
|
||||||
model: model,
|
const result = await generateText({
|
||||||
prompt: options.prompt + (markdown ? `\n\nData:${markdown}` : ""),
|
model: currentModel,
|
||||||
// temperature: options.temperature ?? 0,
|
prompt: options.prompt + (markdown ? `\n\nData:${markdown}` : ""),
|
||||||
system: options.systemPrompt,
|
system: options.systemPrompt,
|
||||||
providerOptions: {
|
providerOptions: {
|
||||||
anthropic: {
|
anthropic: {
|
||||||
thinking: { type: "enabled", budgetTokens: 12000 },
|
thinking: { type: "enabled", budgetTokens: 12000 },
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
});
|
||||||
});
|
|
||||||
|
|
||||||
extract = result.text;
|
extract = result.text;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
extract,
|
extract,
|
||||||
warning,
|
warning,
|
||||||
numTokens,
|
numTokens,
|
||||||
totalUsage: {
|
totalUsage: {
|
||||||
promptTokens: numTokens,
|
promptTokens: numTokens,
|
||||||
completionTokens: result.usage?.completionTokens ?? 0,
|
completionTokens: result.usage?.completionTokens ?? 0,
|
||||||
totalTokens: numTokens + (result.usage?.completionTokens ?? 0),
|
totalTokens: numTokens + (result.usage?.completionTokens ?? 0),
|
||||||
},
|
},
|
||||||
model: model.modelId,
|
model: currentModel.modelId,
|
||||||
};
|
};
|
||||||
|
} catch (error) {
|
||||||
|
lastError = error as Error;
|
||||||
|
if (error.message?.includes("Quota exceeded") || error.message?.includes("rate limit")) {
|
||||||
|
logger.warn("Quota exceeded, retrying with fallback model", { error: lastError.message });
|
||||||
|
currentModel = retryModel;
|
||||||
|
try {
|
||||||
|
const result = await generateText({
|
||||||
|
model: currentModel,
|
||||||
|
prompt: options.prompt + (markdown ? `\n\nData:${markdown}` : ""),
|
||||||
|
system: options.systemPrompt,
|
||||||
|
providerOptions: {
|
||||||
|
anthropic: {
|
||||||
|
thinking: { type: "enabled", budgetTokens: 12000 },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
extract = result.text;
|
||||||
|
|
||||||
|
return {
|
||||||
|
extract,
|
||||||
|
warning,
|
||||||
|
numTokens,
|
||||||
|
totalUsage: {
|
||||||
|
promptTokens: numTokens,
|
||||||
|
completionTokens: result.usage?.completionTokens ?? 0,
|
||||||
|
totalTokens: numTokens + (result.usage?.completionTokens ?? 0),
|
||||||
|
},
|
||||||
|
model: currentModel.modelId,
|
||||||
|
};
|
||||||
|
} catch (retryError) {
|
||||||
|
lastError = retryError as Error;
|
||||||
|
logger.error("Failed with fallback model", {
|
||||||
|
originalError: lastError.message,
|
||||||
|
model: currentModel.modelId
|
||||||
|
});
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let schema = options.schema;
|
let schema = options.schema;
|
||||||
@ -305,26 +350,31 @@ export async function generateCompletions({
|
|||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
const { text: fixedText } = await generateText({
|
try {
|
||||||
model: model,
|
const { text: fixedText } = await generateText({
|
||||||
prompt: `Fix this JSON that had the following error: ${error}\n\nOriginal text:\n${text}\n\nReturn only the fixed JSON, no explanation.`,
|
model: currentModel,
|
||||||
system:
|
prompt: `Fix this JSON that had the following error: ${error}\n\nOriginal text:\n${text}\n\nReturn only the fixed JSON, no explanation.`,
|
||||||
"You are a JSON repair expert. Your only job is to fix malformed JSON and return valid JSON that matches the original structure and intent as closely as possible. Do not include any explanation or commentary - only return the fixed JSON. Do not return it in a Markdown code block, just plain JSON.",
|
system:
|
||||||
providerOptions: {
|
"You are a JSON repair expert. Your only job is to fix malformed JSON and return valid JSON that matches the original structure and intent as closely as possible. Do not include any explanation or commentary - only return the fixed JSON. Do not return it in a Markdown code block, just plain JSON.",
|
||||||
anthropic: {
|
providerOptions: {
|
||||||
thinking: { type: "enabled", budgetTokens: 12000 },
|
anthropic: {
|
||||||
|
thinking: { type: "enabled", budgetTokens: 12000 },
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
});
|
||||||
});
|
return fixedText;
|
||||||
return fixedText;
|
} catch (repairError) {
|
||||||
|
lastError = repairError as Error;
|
||||||
|
logger.error("Failed to repair JSON", { error: lastError.message });
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const generateObjectConfig = {
|
const generateObjectConfig = {
|
||||||
model: model,
|
model: currentModel,
|
||||||
prompt: prompt,
|
prompt: prompt,
|
||||||
providerOptions: providerOptions || undefined,
|
providerOptions: providerOptions || undefined,
|
||||||
// temperature: options.temperature ?? 0,
|
|
||||||
system: options.systemPrompt,
|
system: options.systemPrompt,
|
||||||
...(schema && {
|
...(schema && {
|
||||||
schema: schema instanceof z.ZodType ? schema : jsonSchema(schema),
|
schema: schema instanceof z.ZodType ? schema : jsonSchema(schema),
|
||||||
@ -333,32 +383,67 @@ export async function generateCompletions({
|
|||||||
...repairConfig,
|
...repairConfig,
|
||||||
...(!schema && {
|
...(!schema && {
|
||||||
onError: (error: Error) => {
|
onError: (error: Error) => {
|
||||||
|
lastError = error;
|
||||||
console.error(error);
|
console.error(error);
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
} satisfies Parameters<typeof generateObject>[0];
|
} satisfies Parameters<typeof generateObject>[0];
|
||||||
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
const now = new Date().getTime();
|
const now = new Date().getTime();
|
||||||
console.log(now);
|
|
||||||
console.log({ generateObjectConfig });
|
|
||||||
|
|
||||||
await fs.writeFile(
|
await fs.writeFile(
|
||||||
`logs/generateObjectConfig-${now}.json`,
|
`logs/generateObjectConfig-${now}.json`,
|
||||||
JSON.stringify(generateObjectConfig, null, 2),
|
JSON.stringify(generateObjectConfig, null, 2),
|
||||||
);
|
);
|
||||||
|
|
||||||
const result = await generateObject(generateObjectConfig);
|
let result: { object: any, usage: TokenUsage } | undefined;
|
||||||
extract = result.object;
|
try {
|
||||||
|
result = await generateObject(generateObjectConfig);
|
||||||
|
} catch (error) {
|
||||||
|
lastError = error as Error;
|
||||||
|
if (error.message?.includes("Quota exceeded") || error.message?.includes("rate limit")) {
|
||||||
|
logger.warn("Quota exceeded, retrying with fallback model", { error: lastError.message });
|
||||||
|
currentModel = retryModel;
|
||||||
|
try {
|
||||||
|
const retryConfig = {
|
||||||
|
...generateObjectConfig,
|
||||||
|
model: currentModel,
|
||||||
|
};
|
||||||
|
result = await generateObject(retryConfig);
|
||||||
|
} catch (retryError) {
|
||||||
|
lastError = retryError as Error;
|
||||||
|
logger.error("Failed with fallback model", {
|
||||||
|
originalError: lastError.message,
|
||||||
|
model: currentModel.modelId
|
||||||
|
});
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
|
} else if (NoObjectGeneratedError.isInstance(error)) {
|
||||||
|
console.log("No object generated", error);
|
||||||
|
if (error.text && error.text.startsWith("```json") && error?.text.endsWith("```")) {
|
||||||
|
try {
|
||||||
|
extract = JSON.parse(error.text.slice("```json".length, -"```".length).trim());
|
||||||
|
result = {
|
||||||
|
object: extract,
|
||||||
|
usage: {
|
||||||
|
promptTokens: error.usage?.promptTokens ?? 0,
|
||||||
|
completionTokens: error.usage?.completionTokens ?? 0,
|
||||||
|
totalTokens: error.usage?.totalTokens ?? 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
} catch (parseError) {
|
||||||
|
lastError = parseError as Error;
|
||||||
|
logger.error("Failed to parse JSON from error text", { error: lastError.message });
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const now2 = new Date().getTime();
|
extract = result?.object;
|
||||||
console.log(">>>>>>", now2 - now);
|
|
||||||
console.log({ extract });
|
|
||||||
console.log(
|
|
||||||
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
|
||||||
);
|
|
||||||
|
|
||||||
// If the users actually wants the items object, they can specify it as 'required' in the schema
|
// If the users actually wants the items object, they can specify it as 'required' in the schema
|
||||||
// otherwise, we just return the items array
|
// otherwise, we just return the items array
|
||||||
@ -383,13 +468,19 @@ export async function generateCompletions({
|
|||||||
completionTokens,
|
completionTokens,
|
||||||
totalTokens: promptTokens + completionTokens,
|
totalTokens: promptTokens + completionTokens,
|
||||||
},
|
},
|
||||||
model: model.modelId,
|
model: currentModel.modelId,
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
lastError = error as Error;
|
||||||
if (error.message?.includes("refused")) {
|
if (error.message?.includes("refused")) {
|
||||||
throw new LLMRefusalError(error.message);
|
throw new LLMRefusalError(error.message);
|
||||||
}
|
}
|
||||||
throw error;
|
logger.error("LLM extraction failed", {
|
||||||
|
error: lastError.message,
|
||||||
|
model: currentModel.modelId,
|
||||||
|
mode
|
||||||
|
});
|
||||||
|
throw lastError;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -412,9 +503,10 @@ export async function performLLMExtract(
|
|||||||
// ... existing model and provider options ...
|
// ... existing model and provider options ...
|
||||||
// model: getModel("o3-mini", "openai"), // Keeping existing model selection
|
// model: getModel("o3-mini", "openai"), // Keeping existing model selection
|
||||||
// model: getModel("o3-mini", "openai"),
|
// model: getModel("o3-mini", "openai"),
|
||||||
// model: getModel("qwen-qwq-32b", "groq"),
|
model: getModel("qwen-qwq-32b", "groq"),
|
||||||
// model: getModel("gemini-2.0-flash", "google"),
|
// model: getModel("gemini-2.0-flash", "google"),
|
||||||
model: getModel("gemini-2.5-pro-exp-03-25", "google"),
|
// model: getModel("gemini-2.5-pro-preview-03-25", "vertex"),
|
||||||
|
retryModel: getModel("o3-mini", "openai"),
|
||||||
};
|
};
|
||||||
|
|
||||||
const { extractedDataArray, warning } = await extractData({
|
const { extractedDataArray, warning } = await extractData({
|
||||||
@ -555,7 +647,8 @@ export function removeDefaultProperty(schema: any): any {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function generateSchemaFromPrompt(prompt: string): Promise<any> {
|
export async function generateSchemaFromPrompt(prompt: string): Promise<any> {
|
||||||
const model = getModel("gpt-4o");
|
const model = getModel("qwen-qwq-32b", "groq");
|
||||||
|
const retryModel = getModel("gpt-4o", "openai");
|
||||||
const temperatures = [0, 0.1, 0.3]; // Different temperatures to try
|
const temperatures = [0, 0.1, 0.3]; // Different temperatures to try
|
||||||
let lastError: Error | null = null;
|
let lastError: Error | null = null;
|
||||||
|
|
||||||
@ -565,7 +658,8 @@ export async function generateSchemaFromPrompt(prompt: string): Promise<any> {
|
|||||||
logger: logger.child({
|
logger: logger.child({
|
||||||
method: "generateSchemaFromPrompt/generateCompletions",
|
method: "generateSchemaFromPrompt/generateCompletions",
|
||||||
}),
|
}),
|
||||||
model: model,
|
model,
|
||||||
|
retryModel,
|
||||||
options: {
|
options: {
|
||||||
mode: "llm",
|
mode: "llm",
|
||||||
systemPrompt: `You are a schema generator for a web scraping system. Generate a JSON schema based on the user's prompt.
|
systemPrompt: `You are a schema generator for a web scraping system. Generate a JSON schema based on the user's prompt.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user