From 78badf8f72652bc71947b51b5b67c43ff806ee90 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 28 Oct 2024 16:02:07 -0300 Subject: [PATCH 01/51] Nick: wip --- apps/api/package.json | 3 +- apps/api/pnpm-lock.yaml | 1324 ++++++++++++++++++++++- apps/api/src/controllers/v1/extract.ts | 197 ++++ apps/api/src/controllers/v1/map.ts | 209 ++-- apps/api/src/controllers/v1/types.ts | 38 + apps/api/src/lib/extract/completions.ts | 119 ++ apps/api/src/lib/extract/reranker.ts | 22 + 7 files changed, 1772 insertions(+), 140 deletions(-) create mode 100644 apps/api/src/controllers/v1/extract.ts create mode 100644 apps/api/src/lib/extract/completions.ts create mode 100644 apps/api/src/lib/extract/reranker.ts diff --git a/apps/api/package.json b/apps/api/package.json index a0f9cf8e..dc62d4c3 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -53,7 +53,7 @@ "@bull-board/api": "^5.20.5", "@bull-board/express": "^5.20.5", "@devil7softwares/pos": "^1.0.2", - "@dqbd/tiktoken": "^1.0.13", + "@dqbd/tiktoken": "^1.0.17", "@hyperdx/node-opentelemetry": "^0.8.1", "@logtail/node": "^0.4.12", "@nangohq/node": "^0.40.8", @@ -73,6 +73,7 @@ "cacheable-lookup": "^6.1.0", "cheerio": "^1.0.0-rc.12", "cohere": "^1.1.1", + "cohere-ai": "^7.14.0", "cors": "^2.8.5", "cron-parser": "^4.9.0", "date-fns": "^3.6.0", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 095b507c..f07c4ecd 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -24,8 +24,8 @@ importers: specifier: ^1.0.2 version: 1.0.2 '@dqbd/tiktoken': - specifier: ^1.0.13 - version: 1.0.15 + specifier: ^1.0.17 + version: 1.0.17 '@hyperdx/node-opentelemetry': specifier: ^0.8.1 version: 0.8.1 @@ -83,6 +83,9 @@ importers: cohere: specifier: ^1.1.1 version: 1.1.1 + cohere-ai: + specifier: ^7.14.0 + version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) cors: specifier: ^2.8.5 version: 2.8.5 @@ -130,7 +133,7 @@ importers: version: 2.9.0 langchain: specifier: ^0.2.8 - version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) + version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) languagedetect: specifier: ^2.0.0 version: 2.0.0 @@ -148,10 +151,10 @@ importers: version: 2.30.1 mongoose: specifier: ^8.4.4 - version: 8.4.4(socks@2.8.3) + version: 8.4.4(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) natural: specifier: ^7.0.7 - version: 7.0.7(socks@2.8.3) + version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) openai: specifier: ^4.57.0 version: 4.57.0(zod@3.23.8) @@ -289,6 +292,154 @@ packages: '@anthropic-ai/sdk@0.24.3': resolution: {integrity: sha512-916wJXO6T6k8R6BAAcLhLPv/pnLGy7YSEBZXZ1XTFbLcTZE8oTy3oDW9WJf9KKZwMvVcePIfoTSvzXHRcGxkQQ==} + '@aws-crypto/crc32@3.0.0': + resolution: {integrity: sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==} + + '@aws-crypto/sha256-browser@5.2.0': + resolution: {integrity: sha512-AXfN/lGotSQwu6HNcEsIASo7kWXZ5HYWvfOmSNKDsEqC4OashTp8alTmaz+F7TC2L083SFv5RdB+qU3Vs1kZqw==} + + '@aws-crypto/sha256-js@5.2.0': + resolution: {integrity: sha512-FFQQyu7edu4ufvIZ+OadFpHHOt+eSTBaYaki44c+akjg7qZg9oOQeLlk77F6tSYqjDAFClrHJk9tMf0HdVyOvA==} + engines: {node: '>=16.0.0'} + + '@aws-crypto/supports-web-crypto@5.2.0': + resolution: {integrity: sha512-iAvUotm021kM33eCdNfwIN//F77/IADDSs58i+MDaOqFrVjZo9bAal0NK7HurRuWLLpF1iLX7gbWrjHjeo+YFg==} + + '@aws-crypto/util@3.0.0': + resolution: {integrity: sha512-2OJlpeJpCR48CC8r+uKVChzs9Iungj9wkZrl8Z041DWEWvyIHILYKCPNzJghKsivj+S3mLo6BVc7mBNzdxA46w==} + + '@aws-crypto/util@5.2.0': + resolution: {integrity: sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ==} + + '@aws-sdk/client-cognito-identity@3.679.0': + resolution: {integrity: sha512-vJzQ6QpaMu8itJMe3FH1/0rwMjL0ELh63iLTxiAmhiV/SvCwNNoSFLd2HdKxbV0Bg/x8lUiPVq3pl6+cxaIrEQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/client-sagemaker@3.679.0': + resolution: {integrity: sha512-n1hTHpQl6LwNkwn4vLmtbwkNoX2jxtiliRd0IaHR1CfAQvKNTfQ52mARWr73hR+/YcVsBzPx8sYKq2XHWArHKQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/client-sso-oidc@3.679.0': + resolution: {integrity: sha512-/dBYWcCwbA/id4sFCIVZvf0UsvzHCC68SryxeNQk/PDkY9N4n5yRcMUkZDaEyQCjowc3kY4JOXp2AdUP037nhA==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sts': ^3.679.0 + + '@aws-sdk/client-sso@3.679.0': + resolution: {integrity: sha512-/0cAvYnpOZTo/Y961F1kx2fhDDLUYZ0SQQ5/75gh3xVImLj7Zw+vp74ieqFbqWLYGMaq8z1Arr9A8zG95mbLdg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/client-sts@3.679.0': + resolution: {integrity: sha512-3CvrT8w1RjFu1g8vKA5Azfr5V83r2/b68Ock43WE003Bq/5Y38mwmYX7vk0fPHzC3qejt4YMAWk/C3fSKOy25g==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/core@3.679.0': + resolution: {integrity: sha512-CS6PWGX8l4v/xyvX8RtXnBisdCa5+URzKd0L6GvHChype9qKUVxO/Gg6N/y43Hvg7MNWJt9FBPNWIxUB+byJwg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-cognito-identity@3.679.0': + resolution: {integrity: sha512-XvWd6RPk7TA7tmqITT+NXvJ6ltJP8BUtLO1NAvja4HKExPKR9HAyoOeeH7KM3lVRED4e4LUnLb3fzteH20IXaA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-env@3.679.0': + resolution: {integrity: sha512-EdlTYbzMm3G7VUNAMxr9S1nC1qUNqhKlAxFU8E7cKsAe8Bp29CD5HAs3POc56AVo9GC4yRIS+/mtlZSmrckzUA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-http@3.679.0': + resolution: {integrity: sha512-ZoKLubW5DqqV1/2a3TSn+9sSKg0T8SsYMt1JeirnuLJF0mCoYFUaWMyvxxKuxPoqvUsaycxKru4GkpJ10ltNBw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-ini@3.679.0': + resolution: {integrity: sha512-Rg7t8RwUzKcumpipG4neZqaeJ6DF+Bco1+FHn5BZB68jpvwvjBjcQUuWkxj18B6ctYHr1fkunnzeKEn/+vy7+w==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sts': ^3.679.0 + + '@aws-sdk/credential-provider-node@3.679.0': + resolution: {integrity: sha512-E3lBtaqCte8tWs6Rkssc8sLzvGoJ10TLGvpkijOlz43wPd6xCRh1YLwg6zolf9fVFtEyUs/GsgymiASOyxhFtw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-process@3.679.0': + resolution: {integrity: sha512-u/p4TV8kQ0zJWDdZD4+vdQFTMhkDEJFws040Gm113VHa/Xo1SYOjbpvqeuFoz6VmM0bLvoOWjxB9MxnSQbwKpQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-sso@3.679.0': + resolution: {integrity: sha512-SAtWonhi9asxn0ukEbcE81jkyanKgqpsrtskvYPpO9Z9KOednM4Cqt6h1bfcS9zaHjN2zu815Gv8O7WiV+F/DQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-web-identity@3.679.0': + resolution: {integrity: sha512-a74tLccVznXCaBefWPSysUcLXYJiSkeUmQGtalNgJ1vGkE36W5l/8czFiiowdWdKWz7+x6xf0w+Kjkjlj42Ung==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sts': ^3.679.0 + + '@aws-sdk/credential-providers@3.679.0': + resolution: {integrity: sha512-ZjZZb6OERw/UKlSqcJ24AUJIf/ekDLPZrPpo0kPMV70EQ0GkBiklIZ8qULu9bEcI2I4UIapBKRiXTrK4gA6YHg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-host-header@3.679.0': + resolution: {integrity: sha512-y176HuQ8JRY3hGX8rQzHDSbCl9P5Ny9l16z4xmaiLo+Qfte7ee4Yr3yaAKd7GFoJ3/Mhud2XZ37fR015MfYl2w==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-logger@3.679.0': + resolution: {integrity: sha512-0vet8InEj7nvIvGKk+ch7bEF5SyZ7Us9U7YTEgXPrBNStKeRUsgwRm0ijPWWd0a3oz2okaEwXsFl7G/vI0XiEA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-recursion-detection@3.679.0': + resolution: {integrity: sha512-sQoAZFsQiW/LL3DfKMYwBoGjYDEnMbA9WslWN8xneCmBAwKo6IcSksvYs23PP8XMIoBGe2I2J9BSr654XWygTQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-user-agent@3.679.0': + resolution: {integrity: sha512-4hdeXhPDURPqQLPd9jCpUEo9fQITXl3NM3W1MwcJpE0gdUM36uXkQOYsTPeeU/IRCLVjK8Htlh2oCaM9iJrLCA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/protocol-http@3.374.0': + resolution: {integrity: sha512-9WpRUbINdGroV3HiZZIBoJvL2ndoWk39OfwxWs2otxByppJZNN14bg/lvCx5e8ggHUti7IBk5rb0nqQZ4m05pg==} + engines: {node: '>=14.0.0'} + deprecated: This package has moved to @smithy/protocol-http + + '@aws-sdk/region-config-resolver@3.679.0': + resolution: {integrity: sha512-Ybx54P8Tg6KKq5ck7uwdjiKif7n/8g1x+V0V9uTjBjRWqaIgiqzXwKWoPj6NCNkE7tJNtqI4JrNxp/3S3HvmRw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/signature-v4@3.374.0': + resolution: {integrity: sha512-2xLJvSdzcZZAg0lsDLUAuSQuihzK0dcxIK7WmfuJeF7DGKJFmp9czQmz5f3qiDz6IDQzvgK1M9vtJSVCslJbyQ==} + engines: {node: '>=14.0.0'} + deprecated: This package has moved to @smithy/signature-v4 + + '@aws-sdk/token-providers@3.679.0': + resolution: {integrity: sha512-1/+Zso/x2jqgutKixYFQEGli0FELTgah6bm7aB+m2FAWH4Hz7+iMUsazg6nSWm714sG9G3h5u42Dmpvi9X6/hA==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sso-oidc': ^3.679.0 + + '@aws-sdk/types@3.679.0': + resolution: {integrity: sha512-NwVq8YvInxQdJ47+zz4fH3BRRLC6lL+WLkvr242PVBbUOLRyK/lkwHlfiKUoeVIMyK5NF+up6TRg71t/8Bny6Q==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/util-endpoints@3.679.0': + resolution: {integrity: sha512-YL6s4Y/1zC45OvddvgE139fjeWSKKPgLlnfrvhVL7alNyY9n7beR4uhoDpNrt5mI6sn9qiBF17790o+xLAXjjg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/util-locate-window@3.679.0': + resolution: {integrity: sha512-zKTd48/ZWrCplkXpYDABI74rQlbR0DNHs8nH95htfSLj9/mWRSwaGptoxwcihaq/77vi/fl2X3y0a1Bo8bt7RA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/util-user-agent-browser@3.679.0': + resolution: {integrity: sha512-CusSm2bTBG1kFypcsqU8COhnYc6zltobsqs3nRrvYqYaOqtMnuE46K4XTWpnzKgwDejgZGOE+WYyprtAxrPvmQ==} + + '@aws-sdk/util-user-agent-node@3.679.0': + resolution: {integrity: sha512-Bw4uXZ+NU5ed6TNfo4tBbhBSW+2eQxXYjYBGl5gLUNUpg2pDFToQAP6rXBFiwcG52V2ny5oLGiD82SoYuYkAVg==} + engines: {node: '>=16.0.0'} + peerDependencies: + aws-crt: '>=1.0.0' + peerDependenciesMeta: + aws-crt: + optional: true + + '@aws-sdk/util-utf8-browser@3.259.0': + resolution: {integrity: sha512-UvFa/vR+e19XookZF8RzFZBrw2EUkQWxiBW0yYQAhvk3C+QVGl0H3ouca8LDBlBfQKXwmW3huo/59H8rwb1wJw==} + '@babel/code-frame@7.24.6': resolution: {integrity: sha512-ZJhac6FkEd1yhG2AHOmfcXG4ceoLltoCVJjN5XsWN9BifBQr+cHJbWi0h68HZuSORq+3WtJ2z0hwF2NG1b5kcA==} engines: {node: '>=6.9.0'} @@ -499,8 +650,8 @@ packages: engines: {node: '>=0'} deprecated: This package has been renamed to `fast-tag-pos` - '@dqbd/tiktoken@1.0.15': - resolution: {integrity: sha512-a6I67K1xUkuqcuwulobIJiLikkoE7egMaviI1Jg5bxSn2V7QGqXsGE3jTKr8UIOU/o74mAAd5TkeXFNBtaKF4A==} + '@dqbd/tiktoken@1.0.17': + resolution: {integrity: sha512-v2gz0V6DiuR2TsALM32TkBThf6LdjLbxe6HS/nx9/KJxuDX0Z7SGX7N7PvQfqIvRyus42jI9poVUqezc/j/aQw==} '@flydotio/dockerfile@0.4.11': resolution: {integrity: sha512-L52UAfrOhmAn3T4TxpeRofQOSO+Kctg+uraB4nLzo4mvvh+4Z7HYxSi7Dnq0Kirz+xx6fDIc4OMNT1EdaORecA==} @@ -1475,6 +1626,216 @@ packages: '@sinonjs/fake-timers@10.3.0': resolution: {integrity: sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==} + '@smithy/abort-controller@3.1.6': + resolution: {integrity: sha512-0XuhuHQlEqbNQZp7QxxrFTdVWdwxch4vjxYgfInF91hZFkPxf9QDrdQka0KfxFMPqLNzSw0b95uGTrLliQUavQ==} + engines: {node: '>=16.0.0'} + + '@smithy/config-resolver@3.0.10': + resolution: {integrity: sha512-Uh0Sz9gdUuz538nvkPiyv1DZRX9+D15EKDtnQP5rYVAzM/dnYk3P8cg73jcxyOitPgT3mE3OVj7ky7sibzHWkw==} + engines: {node: '>=16.0.0'} + + '@smithy/core@2.5.1': + resolution: {integrity: sha512-DujtuDA7BGEKExJ05W5OdxCoyekcKT3Rhg1ZGeiUWaz2BJIWXjZmsG/DIP4W48GHno7AQwRsaCb8NcBgH3QZpg==} + engines: {node: '>=16.0.0'} + + '@smithy/credential-provider-imds@3.2.5': + resolution: {integrity: sha512-4FTQGAsuwqTzVMmiRVTn0RR9GrbRfkP0wfu/tXWVHd2LgNpTY0uglQpIScXK4NaEyXbB3JmZt8gfVqO50lP8wg==} + engines: {node: '>=16.0.0'} + + '@smithy/eventstream-codec@1.1.0': + resolution: {integrity: sha512-3tEbUb8t8an226jKB6V/Q2XU/J53lCwCzULuBPEaF4JjSh+FlCMp7TmogE/Aij5J9DwlsZ4VAD/IRDuQ/0ZtMw==} + + '@smithy/fetch-http-handler@3.2.9': + resolution: {integrity: sha512-hYNVQOqhFQ6vOpenifFME546f0GfJn2OiQ3M0FDmuUu8V/Uiwy2wej7ZXxFBNqdx0R5DZAqWM1l6VRhGz8oE6A==} + + '@smithy/fetch-http-handler@4.0.0': + resolution: {integrity: sha512-MLb1f5tbBO2X6K4lMEKJvxeLooyg7guq48C2zKr4qM7F2Gpkz4dc+hdSgu77pCJ76jVqFBjZczHYAs6dp15N+g==} + + '@smithy/hash-node@3.0.8': + resolution: {integrity: sha512-tlNQYbfpWXHimHqrvgo14DrMAgUBua/cNoz9fMYcDmYej7MAmUcjav/QKQbFc3NrcPxeJ7QClER4tWZmfwoPng==} + engines: {node: '>=16.0.0'} + + '@smithy/invalid-dependency@3.0.8': + resolution: {integrity: sha512-7Qynk6NWtTQhnGTTZwks++nJhQ1O54Mzi7fz4PqZOiYXb4Z1Flpb2yRvdALoggTS8xjtohWUM+RygOtB30YL3Q==} + + '@smithy/is-array-buffer@1.1.0': + resolution: {integrity: sha512-twpQ/n+3OWZJ7Z+xu43MJErmhB/WO/mMTnqR6PwWQShvSJ/emx5d1N59LQZk6ZpTAeuRWrc+eHhkzTp9NFjNRQ==} + engines: {node: '>=14.0.0'} + + '@smithy/is-array-buffer@2.2.0': + resolution: {integrity: sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==} + engines: {node: '>=14.0.0'} + + '@smithy/is-array-buffer@3.0.0': + resolution: {integrity: sha512-+Fsu6Q6C4RSJiy81Y8eApjEB5gVtM+oFKTffg+jSuwtvomJJrhUJBu2zS8wjXSgH/g1MKEWrzyChTBe6clb5FQ==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-content-length@3.0.10': + resolution: {integrity: sha512-T4dIdCs1d/+/qMpwhJ1DzOhxCZjZHbHazEPJWdB4GDi2HjIZllVzeBEcdJUN0fomV8DURsgOyrbEUzg3vzTaOg==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-endpoint@3.2.1': + resolution: {integrity: sha512-wWO3xYmFm6WRW8VsEJ5oU6h7aosFXfszlz3Dj176pTij6o21oZnzkCLzShfmRaaCHDkBXWBdO0c4sQAvLFP6zA==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-retry@3.0.25': + resolution: {integrity: sha512-m1F70cPaMBML4HiTgCw5I+jFNtjgz5z5UdGnUbG37vw6kh4UvizFYjqJGHvicfgKMkDL6mXwyPp5mhZg02g5sg==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-serde@3.0.8': + resolution: {integrity: sha512-Xg2jK9Wc/1g/MBMP/EUn2DLspN8LNt+GMe7cgF+Ty3vl+Zvu+VeZU5nmhveU+H8pxyTsjrAkci8NqY6OuvZnjA==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-stack@3.0.8': + resolution: {integrity: sha512-d7ZuwvYgp1+3682Nx0MD3D/HtkmZd49N3JUndYWQXfRZrYEnCWYc8BHcNmVsPAp9gKvlurdg/mubE6b/rPS9MA==} + engines: {node: '>=16.0.0'} + + '@smithy/node-config-provider@3.1.9': + resolution: {integrity: sha512-qRHoah49QJ71eemjuS/WhUXB+mpNtwHRWQr77J/m40ewBVVwvo52kYAmb7iuaECgGTTcYxHS4Wmewfwy++ueew==} + engines: {node: '>=16.0.0'} + + '@smithy/node-http-handler@3.2.5': + resolution: {integrity: sha512-PkOwPNeKdvX/jCpn0A8n9/TyoxjGZB8WVoJmm9YzsnAgggTj4CrjpRHlTQw7dlLZ320n1mY1y+nTRUDViKi/3w==} + engines: {node: '>=16.0.0'} + + '@smithy/property-provider@3.1.8': + resolution: {integrity: sha512-ukNUyo6rHmusG64lmkjFeXemwYuKge1BJ8CtpVKmrxQxc6rhUX0vebcptFA9MmrGsnLhwnnqeH83VTU9hwOpjA==} + engines: {node: '>=16.0.0'} + + '@smithy/protocol-http@1.2.0': + resolution: {integrity: sha512-GfGfruksi3nXdFok5RhgtOnWe5f6BndzYfmEXISD+5gAGdayFGpjWu5pIqIweTudMtse20bGbc+7MFZXT1Tb8Q==} + engines: {node: '>=14.0.0'} + + '@smithy/protocol-http@4.1.5': + resolution: {integrity: sha512-hsjtwpIemmCkm3ZV5fd/T0bPIugW1gJXwZ/hpuVubt2hEUApIoUTrf6qIdh9MAWlw0vjMrA1ztJLAwtNaZogvg==} + engines: {node: '>=16.0.0'} + + '@smithy/querystring-builder@3.0.8': + resolution: {integrity: sha512-btYxGVqFUARbUrN6VhL9c3dnSviIwBYD9Rz1jHuN1hgh28Fpv2xjU1HeCeDJX68xctz7r4l1PBnFhGg1WBBPuA==} + engines: {node: '>=16.0.0'} + + '@smithy/querystring-parser@3.0.8': + resolution: {integrity: sha512-BtEk3FG7Ks64GAbt+JnKqwuobJNX8VmFLBsKIwWr1D60T426fGrV2L3YS5siOcUhhp6/Y6yhBw1PSPxA5p7qGg==} + engines: {node: '>=16.0.0'} + + '@smithy/service-error-classification@3.0.8': + resolution: {integrity: sha512-uEC/kCCFto83bz5ZzapcrgGqHOh/0r69sZ2ZuHlgoD5kYgXJEThCoTuw/y1Ub3cE7aaKdznb+jD9xRPIfIwD7g==} + engines: {node: '>=16.0.0'} + + '@smithy/shared-ini-file-loader@3.1.9': + resolution: {integrity: sha512-/+OsJRNtoRbtsX0UpSgWVxFZLsJHo/4sTr+kBg/J78sr7iC+tHeOvOJrS5hCpVQ6sWBbhWLp1UNiuMyZhE6pmA==} + engines: {node: '>=16.0.0'} + + '@smithy/signature-v4@1.1.0': + resolution: {integrity: sha512-fDo3m7YqXBs7neciOePPd/X9LPm5QLlDMdIC4m1H6dgNLnXfLMFNIxEfPyohGA8VW9Wn4X8lygnPSGxDZSmp0Q==} + engines: {node: '>=14.0.0'} + + '@smithy/signature-v4@4.2.1': + resolution: {integrity: sha512-NsV1jF4EvmO5wqmaSzlnTVetemBS3FZHdyc5CExbDljcyJCEEkJr8ANu2JvtNbVg/9MvKAWV44kTrGS+Pi4INg==} + engines: {node: '>=16.0.0'} + + '@smithy/smithy-client@3.4.2': + resolution: {integrity: sha512-dxw1BDxJiY9/zI3cBqfVrInij6ShjpV4fmGHesGZZUiP9OSE/EVfdwdRz0PgvkEvrZHpsj2htRaHJfftE8giBA==} + engines: {node: '>=16.0.0'} + + '@smithy/types@1.2.0': + resolution: {integrity: sha512-z1r00TvBqF3dh4aHhya7nz1HhvCg4TRmw51fjMrh5do3h+ngSstt/yKlNbHeb9QxJmFbmN8KEVSWgb1bRvfEoA==} + engines: {node: '>=14.0.0'} + + '@smithy/types@3.6.0': + resolution: {integrity: sha512-8VXK/KzOHefoC65yRgCn5vG1cysPJjHnOVt9d0ybFQSmJgQj152vMn4EkYhGuaOmnnZvCPav/KnYyE6/KsNZ2w==} + engines: {node: '>=16.0.0'} + + '@smithy/url-parser@3.0.8': + resolution: {integrity: sha512-4FdOhwpTW7jtSFWm7SpfLGKIBC9ZaTKG5nBF0wK24aoQKQyDIKUw3+KFWCQ9maMzrgTJIuOvOnsV2lLGW5XjTg==} + + '@smithy/util-base64@3.0.0': + resolution: {integrity: sha512-Kxvoh5Qtt0CDsfajiZOCpJxgtPHXOKwmM+Zy4waD43UoEMA+qPxxa98aE/7ZhdnBFZFXMOiBR5xbcaMhLtznQQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-body-length-browser@3.0.0': + resolution: {integrity: sha512-cbjJs2A1mLYmqmyVl80uoLTJhAcfzMOyPgjwAYusWKMdLeNtzmMz9YxNl3/jRLoxSS3wkqkf0jwNdtXWtyEBaQ==} + + '@smithy/util-body-length-node@3.0.0': + resolution: {integrity: sha512-Tj7pZ4bUloNUP6PzwhN7K386tmSmEET9QtQg0TgdNOnxhZvCssHji+oZTUIuzxECRfG8rdm2PMw2WCFs6eIYkA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-buffer-from@1.1.0': + resolution: {integrity: sha512-9m6NXE0ww+ra5HKHCHig20T+FAwxBAm7DIdwc/767uGWbRcY720ybgPacQNB96JMOI7xVr/CDa3oMzKmW4a+kw==} + engines: {node: '>=14.0.0'} + + '@smithy/util-buffer-from@2.2.0': + resolution: {integrity: sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==} + engines: {node: '>=14.0.0'} + + '@smithy/util-buffer-from@3.0.0': + resolution: {integrity: sha512-aEOHCgq5RWFbP+UDPvPot26EJHjOC+bRgse5A8V3FSShqd5E5UN4qc7zkwsvJPPAVsf73QwYcHN1/gt/rtLwQA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-config-provider@3.0.0': + resolution: {integrity: sha512-pbjk4s0fwq3Di/ANL+rCvJMKM5bzAQdE5S/6RL5NXgMExFAi6UgQMPOm5yPaIWPpr+EOXKXRonJ3FoxKf4mCJQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-defaults-mode-browser@3.0.25': + resolution: {integrity: sha512-fRw7zymjIDt6XxIsLwfJfYUfbGoO9CmCJk6rjJ/X5cd20+d2Is7xjU5Kt/AiDt6hX8DAf5dztmfP5O82gR9emA==} + engines: {node: '>= 10.0.0'} + + '@smithy/util-defaults-mode-node@3.0.25': + resolution: {integrity: sha512-H3BSZdBDiVZGzt8TG51Pd2FvFO0PAx/A0mJ0EH8a13KJ6iUCdYnw/Dk/MdC1kTd0eUuUGisDFaxXVXo4HHFL1g==} + engines: {node: '>= 10.0.0'} + + '@smithy/util-endpoints@2.1.4': + resolution: {integrity: sha512-kPt8j4emm7rdMWQyL0F89o92q10gvCUa6sBkBtDJ7nV2+P7wpXczzOfoDJ49CKXe5CCqb8dc1W+ZdLlrKzSAnQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-hex-encoding@1.1.0': + resolution: {integrity: sha512-7UtIE9eH0u41zpB60Jzr0oNCQ3hMJUabMcKRUVjmyHTXiWDE4vjSqN6qlih7rCNeKGbioS7f/y2Jgym4QZcKFg==} + engines: {node: '>=14.0.0'} + + '@smithy/util-hex-encoding@3.0.0': + resolution: {integrity: sha512-eFndh1WEK5YMUYvy3lPlVmYY/fZcQE1D8oSf41Id2vCeIkKJXPcYDCZD+4+xViI6b1XSd7tE+s5AmXzz5ilabQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-middleware@1.1.0': + resolution: {integrity: sha512-6hhckcBqVgjWAqLy2vqlPZ3rfxLDhFWEmM7oLh2POGvsi7j0tHkbN7w4DFhuBExVJAbJ/qqxqZdRY6Fu7/OezQ==} + engines: {node: '>=14.0.0'} + + '@smithy/util-middleware@3.0.8': + resolution: {integrity: sha512-p7iYAPaQjoeM+AKABpYWeDdtwQNxasr4aXQEA/OmbOaug9V0odRVDy3Wx4ci8soljE/JXQo+abV0qZpW8NX0yA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-retry@3.0.8': + resolution: {integrity: sha512-TCEhLnY581YJ+g1x0hapPz13JFqzmh/pMWL2KEFASC51qCfw3+Y47MrTmea4bUE5vsdxQ4F6/KFbUeSz22Q1ow==} + engines: {node: '>=16.0.0'} + + '@smithy/util-stream@3.2.1': + resolution: {integrity: sha512-R3ufuzJRxSJbE58K9AEnL/uSZyVdHzud9wLS8tIbXclxKzoe09CRohj2xV8wpx5tj7ZbiJaKYcutMm1eYgz/0A==} + engines: {node: '>=16.0.0'} + + '@smithy/util-uri-escape@1.1.0': + resolution: {integrity: sha512-/jL/V1xdVRt5XppwiaEU8Etp5WHZj609n0xMTuehmCqdoOFbId1M+aEeDWZsQ+8JbEB/BJ6ynY2SlYmOaKtt8w==} + engines: {node: '>=14.0.0'} + + '@smithy/util-uri-escape@3.0.0': + resolution: {integrity: sha512-LqR7qYLgZTD7nWLBecUi4aqolw8Mhza9ArpNEQ881MJJIU2sE5iHCK6TdyqqzcDLy0OPe10IY4T8ctVdtynubg==} + engines: {node: '>=16.0.0'} + + '@smithy/util-utf8@1.1.0': + resolution: {integrity: sha512-p/MYV+JmqmPyjdgyN2UxAeYDj9cBqCjp0C/NsTWnnjoZUVqoeZ6IrW915L9CAKWVECgv9lVQGc4u/yz26/bI1A==} + engines: {node: '>=14.0.0'} + + '@smithy/util-utf8@2.3.0': + resolution: {integrity: sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==} + engines: {node: '>=14.0.0'} + + '@smithy/util-utf8@3.0.0': + resolution: {integrity: sha512-rUeT12bxFnplYDe815GXbq/oixEGHfRFFtcTF3YdDi/JaENIM6aSYYLJydG83UNzLXeRI5K8abYd/8Sp/QM0kA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-waiter@3.1.7': + resolution: {integrity: sha512-d5yGlQtmN/z5eoTtIYgkvOw27US2Ous4VycnXatyoImIF9tzlcpnKqQ/V7qhvJmb2p6xZne1NopCLakdTnkBBQ==} + engines: {node: '>=16.0.0'} + '@stdlib/assert-has-own-property@0.0.7': resolution: {integrity: sha512-3YHwSWiUqGlTLSwxAWxrqaD1PkgcJniGyotJeIt5X0tSNmSW0/c9RWroCImTUUB3zBkyBJ79MyU9Nf4Qgm59fQ==} engines: {node: '>=0.10.0', npm: '>2.7.0'} @@ -2004,6 +2365,9 @@ packages: bottleneck@2.19.5: resolution: {integrity: sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw==} + bowser@2.11.0: + resolution: {integrity: sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA==} + brace-expansion@1.1.11: resolution: {integrity: sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==} @@ -2146,6 +2510,9 @@ packages: resolution: {integrity: sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==} engines: {iojs: '>= 1.0.0', node: '>= 0.12.0'} + cohere-ai@7.14.0: + resolution: {integrity: sha512-hSo2/tFV29whjFFtVtdS7kHmtUsjfMO1sgwE/d5bhOE4O7Vkj5G1R9lLIqkIprp/+rrvCq3HGvEaOgry7xRcDA==} + cohere@1.1.1: resolution: {integrity: sha512-D116FKTuauCShJjPuOAFnkyAPMhV/6f403+yPZwyyFY6gErK1AA41y9rQdBvj8eHDZ9sXVJ6TzmzObVfAFh3ig==} @@ -2565,6 +2932,10 @@ packages: fast-safe-stringify@2.1.1: resolution: {integrity: sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA==} + fast-xml-parser@4.4.1: + resolution: {integrity: sha512-xkjOecfnKGkSsOwtZ5Pz7Us/T6mrbPQrq0nh+aCO5V9nk5NLWmasAHumTKjiPJPWANe+kAZ84Jc8ooJkzZ88Sw==} + hasBin: true + fb-watchman@2.0.2: resolution: {integrity: sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==} @@ -2613,6 +2984,10 @@ packages: form-data-encoder@1.7.2: resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==} + form-data-encoder@4.0.2: + resolution: {integrity: sha512-KQVhvhK8ZkWzxKxOr56CPulAhH3dobtuQ4+hNQ+HekH/Wp5gSOafqRAeTphQUJAIk0GBvHZgJ2ZGRWd5kphMuw==} + engines: {node: '>= 18'} + form-data@4.0.0: resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==} engines: {node: '>= 6'} @@ -2621,6 +2996,10 @@ packages: resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==} engines: {node: '>= 12.20'} + formdata-node@6.0.3: + resolution: {integrity: sha512-8e1++BCiTzUno9v5IZ2J6bv4RU+3UKDmqWUQD0MIMVCd9AdhWkO1gw57oo1mNEX1dMq2EGI+FbWz4B92pscSQg==} + engines: {node: '>= 18'} + formdata-polyfill@4.0.10: resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==} engines: {node: '>=12.20.0'} @@ -3107,6 +3486,9 @@ packages: joplin-turndown-plugin-gfm@1.0.12: resolution: {integrity: sha512-qL4+1iycQjZ1fs8zk3jSRk7cg3ROBUHk7GKtiLAQLFzLPKErnILUvz5DLszSQvz3s1sTjPbywLDISVUtBY6HaA==} + js-base64@3.7.2: + resolution: {integrity: sha512-NnRs6dsyqUXejqk/yv2aiXlAvOs56sLkX6nUdeaNezI5LFFLlsZjOThmwnrcwh5ZZRwZlCMnVAY3CvhIhoVEKQ==} + js-beautify@1.15.1: resolution: {integrity: sha512-ESjNzSlt/sWE8sciZH8kBF8BPlwXPwhR6pWKAw8bw4Bwj+iZcnKW6ONWUutJ7eObuBZQpiIb8S7OYspWrKt7rA==} engines: {node: '>=14'} @@ -4027,6 +4409,10 @@ packages: resolution: {integrity: sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==} engines: {node: '>=0.6'} + qs@6.11.2: + resolution: {integrity: sha512-tDNIz22aBzCDxLtVH++VnTfzxlfeK5CbqohpSqpJgj1Wg/cQbStNAz3NuqCs5vV+pjBsK4x4pN9HlVh7rcYRiA==} + engines: {node: '>=0.6'} + qs@6.12.2: resolution: {integrity: sha512-x+NLUpx9SYrcwXtX7ob1gnkSems4i/mGZX5SlYxwIau6RrUSODO89TR/XDGGpn5RPWSYIB+aSfuSlV5+CmbTBg==} engines: {node: '>=0.6'} @@ -4365,6 +4751,9 @@ packages: resolution: {integrity: sha512-syeEEd112om/waJ5gOQ+SaYi+setuidQ4ZIPiQREF4yJeegXhn2HKy6C0JYm7uhVQKfMAvuZ22dIRsnoDv7AMw==} engines: {node: '>=12.*'} + strnum@1.0.5: + resolution: {integrity: sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==} + supabase@1.172.2: resolution: {integrity: sha512-h2J6kKEikXnZyurUcCYg215qkQpINOhdWkiclHcWAuVeqXsNrfrYaf1s0qbbcdRyMtrVW48I+VdVTw71Cnn20Q==} engines: {npm: '>=8'} @@ -4526,8 +4915,8 @@ packages: engines: {node: '>=14.17'} hasBin: true - typescript@5.6.2: - resolution: {integrity: sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==} + typescript@5.6.3: + resolution: {integrity: sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==} engines: {node: '>=14.17'} hasBin: true @@ -4797,6 +5186,503 @@ snapshots: transitivePeerDependencies: - encoding + '@aws-crypto/crc32@3.0.0': + dependencies: + '@aws-crypto/util': 3.0.0 + '@aws-sdk/types': 3.679.0 + tslib: 1.14.1 + + '@aws-crypto/sha256-browser@5.2.0': + dependencies: + '@aws-crypto/sha256-js': 5.2.0 + '@aws-crypto/supports-web-crypto': 5.2.0 + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-locate-window': 3.679.0 + '@smithy/util-utf8': 2.3.0 + tslib: 2.6.3 + + '@aws-crypto/sha256-js@5.2.0': + dependencies: + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.679.0 + tslib: 2.6.3 + + '@aws-crypto/supports-web-crypto@5.2.0': + dependencies: + tslib: 2.6.3 + + '@aws-crypto/util@3.0.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-utf8-browser': 3.259.0 + tslib: 1.14.1 + + '@aws-crypto/util@5.2.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/util-utf8': 2.3.0 + tslib: 2.6.3 + + '@aws-sdk/client-cognito-identity@3.679.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sso-oidc': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/middleware-host-header': 3.679.0 + '@aws-sdk/middleware-logger': 3.679.0 + '@aws-sdk/middleware-recursion-detection': 3.679.0 + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/region-config-resolver': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@aws-sdk/util-user-agent-browser': 3.679.0 + '@aws-sdk/util-user-agent-node': 3.679.0 + '@smithy/config-resolver': 3.0.10 + '@smithy/core': 2.5.1 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/hash-node': 3.0.8 + '@smithy/invalid-dependency': 3.0.8 + '@smithy/middleware-content-length': 3.0.10 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-retry': 3.0.25 + '@smithy/middleware-serde': 3.0.8 + '@smithy/middleware-stack': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.25 + '@smithy/util-defaults-mode-node': 3.0.25 + '@smithy/util-endpoints': 2.1.4 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sagemaker@3.679.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sso-oidc': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/middleware-host-header': 3.679.0 + '@aws-sdk/middleware-logger': 3.679.0 + '@aws-sdk/middleware-recursion-detection': 3.679.0 + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/region-config-resolver': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@aws-sdk/util-user-agent-browser': 3.679.0 + '@aws-sdk/util-user-agent-node': 3.679.0 + '@smithy/config-resolver': 3.0.10 + '@smithy/core': 2.5.1 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/hash-node': 3.0.8 + '@smithy/invalid-dependency': 3.0.8 + '@smithy/middleware-content-length': 3.0.10 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-retry': 3.0.25 + '@smithy/middleware-serde': 3.0.8 + '@smithy/middleware-stack': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.25 + '@smithy/util-defaults-mode-node': 3.0.25 + '@smithy/util-endpoints': 2.1.4 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + '@smithy/util-utf8': 3.0.0 + '@smithy/util-waiter': 3.1.7 + '@types/uuid': 9.0.8 + tslib: 2.6.3 + uuid: 9.0.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/middleware-host-header': 3.679.0 + '@aws-sdk/middleware-logger': 3.679.0 + '@aws-sdk/middleware-recursion-detection': 3.679.0 + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/region-config-resolver': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@aws-sdk/util-user-agent-browser': 3.679.0 + '@aws-sdk/util-user-agent-node': 3.679.0 + '@smithy/config-resolver': 3.0.10 + '@smithy/core': 2.5.1 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/hash-node': 3.0.8 + '@smithy/invalid-dependency': 3.0.8 + '@smithy/middleware-content-length': 3.0.10 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-retry': 3.0.25 + '@smithy/middleware-serde': 3.0.8 + '@smithy/middleware-stack': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.25 + '@smithy/util-defaults-mode-node': 3.0.25 + '@smithy/util-endpoints': 2.1.4 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sso@3.679.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/middleware-host-header': 3.679.0 + '@aws-sdk/middleware-logger': 3.679.0 + '@aws-sdk/middleware-recursion-detection': 3.679.0 + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/region-config-resolver': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@aws-sdk/util-user-agent-browser': 3.679.0 + '@aws-sdk/util-user-agent-node': 3.679.0 + '@smithy/config-resolver': 3.0.10 + '@smithy/core': 2.5.1 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/hash-node': 3.0.8 + '@smithy/invalid-dependency': 3.0.8 + '@smithy/middleware-content-length': 3.0.10 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-retry': 3.0.25 + '@smithy/middleware-serde': 3.0.8 + '@smithy/middleware-stack': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.25 + '@smithy/util-defaults-mode-node': 3.0.25 + '@smithy/util-endpoints': 2.1.4 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sts@3.679.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sso-oidc': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/middleware-host-header': 3.679.0 + '@aws-sdk/middleware-logger': 3.679.0 + '@aws-sdk/middleware-recursion-detection': 3.679.0 + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/region-config-resolver': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@aws-sdk/util-user-agent-browser': 3.679.0 + '@aws-sdk/util-user-agent-node': 3.679.0 + '@smithy/config-resolver': 3.0.10 + '@smithy/core': 2.5.1 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/hash-node': 3.0.8 + '@smithy/invalid-dependency': 3.0.8 + '@smithy/middleware-content-length': 3.0.10 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-retry': 3.0.25 + '@smithy/middleware-serde': 3.0.8 + '@smithy/middleware-stack': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.25 + '@smithy/util-defaults-mode-node': 3.0.25 + '@smithy/util-endpoints': 2.1.4 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/core@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/core': 2.5.1 + '@smithy/node-config-provider': 3.1.9 + '@smithy/property-provider': 3.1.8 + '@smithy/protocol-http': 4.1.5 + '@smithy/signature-v4': 4.2.1 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/util-middleware': 3.0.8 + fast-xml-parser: 4.4.1 + tslib: 2.6.3 + + '@aws-sdk/credential-provider-cognito-identity@3.679.0': + dependencies: + '@aws-sdk/client-cognito-identity': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/credential-provider-env@3.679.0': + dependencies: + '@aws-sdk/core': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/credential-provider-http@3.679.0': + dependencies: + '@aws-sdk/core': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/property-provider': 3.1.8 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/util-stream': 3.2.1 + tslib: 2.6.3 + + '@aws-sdk/credential-provider-ini@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)': + dependencies: + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-env': 3.679.0 + '@aws-sdk/credential-provider-http': 3.679.0 + '@aws-sdk/credential-provider-process': 3.679.0 + '@aws-sdk/credential-provider-sso': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + '@aws-sdk/credential-provider-web-identity': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/types': 3.679.0 + '@smithy/credential-provider-imds': 3.2.5 + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - aws-crt + + '@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)': + dependencies: + '@aws-sdk/credential-provider-env': 3.679.0 + '@aws-sdk/credential-provider-http': 3.679.0 + '@aws-sdk/credential-provider-ini': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/credential-provider-process': 3.679.0 + '@aws-sdk/credential-provider-sso': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + '@aws-sdk/credential-provider-web-identity': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/types': 3.679.0 + '@smithy/credential-provider-imds': 3.2.5 + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - '@aws-sdk/client-sts' + - aws-crt + + '@aws-sdk/credential-provider-process@3.679.0': + dependencies: + '@aws-sdk/core': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/credential-provider-sso@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))': + dependencies: + '@aws-sdk/client-sso': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/token-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - aws-crt + + '@aws-sdk/credential-provider-web-identity@3.679.0(@aws-sdk/client-sts@3.679.0)': + dependencies: + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))': + dependencies: + '@aws-sdk/client-cognito-identity': 3.679.0 + '@aws-sdk/client-sso': 3.679.0 + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-cognito-identity': 3.679.0 + '@aws-sdk/credential-provider-env': 3.679.0 + '@aws-sdk/credential-provider-http': 3.679.0 + '@aws-sdk/credential-provider-ini': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/credential-provider-process': 3.679.0 + '@aws-sdk/credential-provider-sso': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + '@aws-sdk/credential-provider-web-identity': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/types': 3.679.0 + '@smithy/credential-provider-imds': 3.2.5 + '@smithy/property-provider': 3.1.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - aws-crt + + '@aws-sdk/middleware-host-header@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/middleware-logger@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/middleware-recursion-detection@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/middleware-user-agent@3.679.0': + dependencies: + '@aws-sdk/core': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@smithy/core': 2.5.1 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/protocol-http@3.374.0': + dependencies: + '@smithy/protocol-http': 1.2.0 + tslib: 2.6.3 + + '@aws-sdk/region-config-resolver@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/node-config-provider': 3.1.9 + '@smithy/types': 3.6.0 + '@smithy/util-config-provider': 3.0.0 + '@smithy/util-middleware': 3.0.8 + tslib: 2.6.3 + + '@aws-sdk/signature-v4@3.374.0': + dependencies: + '@smithy/signature-v4': 1.1.0 + tslib: 2.6.3 + + '@aws-sdk/token-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))': + dependencies: + '@aws-sdk/client-sso-oidc': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/types@3.679.0': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/util-endpoints@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/types': 3.6.0 + '@smithy/util-endpoints': 2.1.4 + tslib: 2.6.3 + + '@aws-sdk/util-locate-window@3.679.0': + dependencies: + tslib: 2.6.3 + + '@aws-sdk/util-user-agent-browser@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/types': 3.6.0 + bowser: 2.11.0 + tslib: 2.6.3 + + '@aws-sdk/util-user-agent-node@3.679.0': + dependencies: + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/node-config-provider': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/util-utf8-browser@3.259.0': + dependencies: + tslib: 2.6.3 + '@babel/code-frame@7.24.6': dependencies: '@babel/highlight': 7.24.6 @@ -5040,7 +5926,7 @@ snapshots: '@devil7softwares/pos@1.0.2': {} - '@dqbd/tiktoken@1.0.15': {} + '@dqbd/tiktoken@1.0.17': {} '@flydotio/dockerfile@0.4.11': dependencies: @@ -5338,13 +6224,13 @@ snapshots: '@js-sdsl/ordered-map@4.4.2': {} - '@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': + '@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': dependencies: ansi-styles: 5.2.0 camelcase: 6.3.0 decamelize: 1.2.0 js-tiktoken: 1.0.12 - langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu) ml-distance: 4.0.1 mustache: 4.2.0 p-queue: 6.6.2 @@ -5356,9 +6242,9 @@ snapshots: - langchain - openai - '@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))': + '@langchain/openai@0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))': dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) js-tiktoken: 1.0.12 openai: 4.57.0(zod@3.23.8) zod: 3.23.8 @@ -5367,9 +6253,9 @@ snapshots: - encoding - langchain - '@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': + '@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) js-tiktoken: 1.0.12 transitivePeerDependencies: - langchain @@ -6495,6 +7381,340 @@ snapshots: dependencies: '@sinonjs/commons': 3.0.1 + '@smithy/abort-controller@3.1.6': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/config-resolver@3.0.10': + dependencies: + '@smithy/node-config-provider': 3.1.9 + '@smithy/types': 3.6.0 + '@smithy/util-config-provider': 3.0.0 + '@smithy/util-middleware': 3.0.8 + tslib: 2.6.3 + + '@smithy/core@2.5.1': + dependencies: + '@smithy/middleware-serde': 3.0.8 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-stream': 3.2.1 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + + '@smithy/credential-provider-imds@3.2.5': + dependencies: + '@smithy/node-config-provider': 3.1.9 + '@smithy/property-provider': 3.1.8 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + tslib: 2.6.3 + + '@smithy/eventstream-codec@1.1.0': + dependencies: + '@aws-crypto/crc32': 3.0.0 + '@smithy/types': 1.2.0 + '@smithy/util-hex-encoding': 1.1.0 + tslib: 2.6.3 + + '@smithy/fetch-http-handler@3.2.9': + dependencies: + '@smithy/protocol-http': 4.1.5 + '@smithy/querystring-builder': 3.0.8 + '@smithy/types': 3.6.0 + '@smithy/util-base64': 3.0.0 + tslib: 2.6.3 + + '@smithy/fetch-http-handler@4.0.0': + dependencies: + '@smithy/protocol-http': 4.1.5 + '@smithy/querystring-builder': 3.0.8 + '@smithy/types': 3.6.0 + '@smithy/util-base64': 3.0.0 + tslib: 2.6.3 + + '@smithy/hash-node@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + '@smithy/util-buffer-from': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + + '@smithy/invalid-dependency@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/is-array-buffer@1.1.0': + dependencies: + tslib: 2.6.3 + + '@smithy/is-array-buffer@2.2.0': + dependencies: + tslib: 2.6.3 + + '@smithy/is-array-buffer@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/middleware-content-length@3.0.10': + dependencies: + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/middleware-endpoint@3.2.1': + dependencies: + '@smithy/core': 2.5.1 + '@smithy/middleware-serde': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-middleware': 3.0.8 + tslib: 2.6.3 + + '@smithy/middleware-retry@3.0.25': + dependencies: + '@smithy/node-config-provider': 3.1.9 + '@smithy/protocol-http': 4.1.5 + '@smithy/service-error-classification': 3.0.8 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + tslib: 2.6.3 + uuid: 9.0.1 + + '@smithy/middleware-serde@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/middleware-stack@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/node-config-provider@3.1.9': + dependencies: + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/node-http-handler@3.2.5': + dependencies: + '@smithy/abort-controller': 3.1.6 + '@smithy/protocol-http': 4.1.5 + '@smithy/querystring-builder': 3.0.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/property-provider@3.1.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/protocol-http@1.2.0': + dependencies: + '@smithy/types': 1.2.0 + tslib: 2.6.3 + + '@smithy/protocol-http@4.1.5': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/querystring-builder@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + '@smithy/util-uri-escape': 3.0.0 + tslib: 2.6.3 + + '@smithy/querystring-parser@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/service-error-classification@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + + '@smithy/shared-ini-file-loader@3.1.9': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/signature-v4@1.1.0': + dependencies: + '@smithy/eventstream-codec': 1.1.0 + '@smithy/is-array-buffer': 1.1.0 + '@smithy/types': 1.2.0 + '@smithy/util-hex-encoding': 1.1.0 + '@smithy/util-middleware': 1.1.0 + '@smithy/util-uri-escape': 1.1.0 + '@smithy/util-utf8': 1.1.0 + tslib: 2.6.3 + + '@smithy/signature-v4@4.2.1': + dependencies: + '@smithy/is-array-buffer': 3.0.0 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + '@smithy/util-hex-encoding': 3.0.0 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-uri-escape': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + + '@smithy/smithy-client@3.4.2': + dependencies: + '@smithy/core': 2.5.1 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-stack': 3.0.8 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + '@smithy/util-stream': 3.2.1 + tslib: 2.6.3 + + '@smithy/types@1.2.0': + dependencies: + tslib: 2.6.3 + + '@smithy/types@3.6.0': + dependencies: + tslib: 2.6.3 + + '@smithy/url-parser@3.0.8': + dependencies: + '@smithy/querystring-parser': 3.0.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/util-base64@3.0.0': + dependencies: + '@smithy/util-buffer-from': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + + '@smithy/util-body-length-browser@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-body-length-node@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-buffer-from@1.1.0': + dependencies: + '@smithy/is-array-buffer': 1.1.0 + tslib: 2.6.3 + + '@smithy/util-buffer-from@2.2.0': + dependencies: + '@smithy/is-array-buffer': 2.2.0 + tslib: 2.6.3 + + '@smithy/util-buffer-from@3.0.0': + dependencies: + '@smithy/is-array-buffer': 3.0.0 + tslib: 2.6.3 + + '@smithy/util-config-provider@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-defaults-mode-browser@3.0.25': + dependencies: + '@smithy/property-provider': 3.1.8 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + bowser: 2.11.0 + tslib: 2.6.3 + + '@smithy/util-defaults-mode-node@3.0.25': + dependencies: + '@smithy/config-resolver': 3.0.10 + '@smithy/credential-provider-imds': 3.2.5 + '@smithy/node-config-provider': 3.1.9 + '@smithy/property-provider': 3.1.8 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/util-endpoints@2.1.4': + dependencies: + '@smithy/node-config-provider': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/util-hex-encoding@1.1.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-hex-encoding@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-middleware@1.1.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-middleware@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/util-retry@3.0.8': + dependencies: + '@smithy/service-error-classification': 3.0.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/util-stream@3.2.1': + dependencies: + '@smithy/fetch-http-handler': 4.0.0 + '@smithy/node-http-handler': 3.2.5 + '@smithy/types': 3.6.0 + '@smithy/util-base64': 3.0.0 + '@smithy/util-buffer-from': 3.0.0 + '@smithy/util-hex-encoding': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + + '@smithy/util-uri-escape@1.1.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-uri-escape@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-utf8@1.1.0': + dependencies: + '@smithy/util-buffer-from': 1.1.0 + tslib: 2.6.3 + + '@smithy/util-utf8@2.3.0': + dependencies: + '@smithy/util-buffer-from': 2.2.0 + tslib: 2.6.3 + + '@smithy/util-utf8@3.0.0': + dependencies: + '@smithy/util-buffer-from': 3.0.0 + tslib: 2.6.3 + + '@smithy/util-waiter@3.1.7': + dependencies: + '@smithy/abort-controller': 3.1.6 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + '@stdlib/assert-has-own-property@0.0.7': {} '@stdlib/assert-has-symbol-support@0.0.8': @@ -7106,6 +8326,8 @@ snapshots: bottleneck@2.19.5: {} + bowser@2.11.0: {} + brace-expansion@1.1.11: dependencies: balanced-match: 1.0.2 @@ -7263,6 +8485,25 @@ snapshots: co@4.6.0: {} + cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)): + dependencies: + '@aws-sdk/client-sagemaker': 3.679.0 + '@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + '@aws-sdk/protocol-http': 3.374.0 + '@aws-sdk/signature-v4': 3.374.0 + form-data: 4.0.0 + form-data-encoder: 4.0.2 + formdata-node: 6.0.3 + js-base64: 3.7.2 + node-fetch: 2.7.0 + qs: 6.11.2 + readable-stream: 4.5.2 + url-join: 4.0.1 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - aws-crt + - encoding + cohere@1.1.1: {} collect-v8-coverage@1.0.2: {} @@ -7650,6 +8891,10 @@ snapshots: fast-safe-stringify@2.1.1: {} + fast-xml-parser@4.4.1: + dependencies: + strnum: 1.0.5 + fb-watchman@2.0.2: dependencies: bser: 2.1.1 @@ -7701,6 +8946,8 @@ snapshots: form-data-encoder@1.7.2: {} + form-data-encoder@4.0.2: {} + form-data@4.0.0: dependencies: asynckit: 0.4.0 @@ -7712,6 +8959,8 @@ snapshots: node-domexception: 1.0.0 web-streams-polyfill: 4.0.0-beta.3 + formdata-node@6.0.3: {} + formdata-polyfill@4.0.10: dependencies: fetch-blob: 3.2.0 @@ -8447,6 +9696,8 @@ snapshots: joplin-turndown-plugin-gfm@1.0.12: {} + js-base64@3.7.2: {} + js-beautify@1.15.1: dependencies: config-chain: 1.1.13 @@ -8513,17 +9764,17 @@ snapshots: koffi@2.9.0: {} - langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0): + langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0): dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) - '@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)) - '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/openai': 0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)) + '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) binary-extensions: 2.3.0 js-tiktoken: 1.0.12 js-yaml: 4.1.0 jsonpointer: 5.0.1 langchainhub: 0.0.11 - langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu) ml-distance: 4.0.1 openapi-types: 12.1.3 p-retry: 4.6.2 @@ -8532,14 +9783,16 @@ snapshots: zod: 3.23.8 zod-to-json-schema: 3.23.1(zod@3.23.8) optionalDependencies: + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) '@supabase/supabase-js': 2.44.2 axios: 1.7.2 cheerio: 1.0.0-rc.12 + fast-xml-parser: 4.4.1 handlebars: 4.7.8 html-to-text: 9.0.5 ioredis: 5.4.1 mammoth: 1.7.2 - mongodb: 6.6.2(socks@2.8.3) + mongodb: 6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) pdf-parse: 1.1.1 puppeteer: 22.12.1(typescript@5.4.5) redis: 4.6.14 @@ -8550,7 +9803,7 @@ snapshots: langchainhub@0.0.11: {} - langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)): + langsmith@0.1.34(zyeavx4tfqw3smbbpiinhfxxeu): dependencies: '@types/uuid': 9.0.8 commander: 10.0.1 @@ -8559,8 +9812,8 @@ snapshots: p-retry: 4.6.2 uuid: 9.0.1 optionalDependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) - langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) openai: 4.57.0(zod@3.23.8) languagedetect@2.0.0: {} @@ -8768,19 +10021,20 @@ snapshots: '@types/whatwg-url': 11.0.5 whatwg-url: 13.0.0 - mongodb@6.6.2(socks@2.8.3): + mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3): dependencies: '@mongodb-js/saslprep': 1.1.7 bson: 6.8.0 mongodb-connection-string-url: 3.0.1 optionalDependencies: + '@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) socks: 2.8.3 - mongoose@8.4.4(socks@2.8.3): + mongoose@8.4.4(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3): dependencies: bson: 6.8.0 kareem: 2.6.3 - mongodb: 6.6.2(socks@2.8.3) + mongodb: 6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) mpath: 0.9.0 mquery: 5.0.0 ms: 2.1.3 @@ -8829,7 +10083,7 @@ snapshots: natural-compare@1.4.0: {} - natural@7.0.7(socks@2.8.3): + natural@7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3): dependencies: afinn-165: 1.0.4 afinn-165-financialmarketnews: 3.0.0 @@ -8837,7 +10091,7 @@ snapshots: dotenv: 16.4.5 http-server: 14.1.1 memjs: 1.3.2 - mongoose: 8.4.4(socks@2.8.3) + mongoose: 8.4.4(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) pg: 8.12.0 redis: 4.6.14 safe-stable-stringify: 2.4.3 @@ -9208,7 +10462,7 @@ snapshots: csv-parse: 5.5.6 gpt3-tokenizer: 1.1.5 openai: 3.3.0 - typescript: 5.6.2 + typescript: 5.6.3 uuid: 9.0.1 zod: 3.23.8 transitivePeerDependencies: @@ -9295,6 +10549,10 @@ snapshots: dependencies: side-channel: 1.0.6 + qs@6.11.2: + dependencies: + side-channel: 1.0.6 + qs@6.12.2: dependencies: side-channel: 1.0.6 @@ -9643,6 +10901,8 @@ snapshots: '@types/node': 20.14.1 qs: 6.12.2 + strnum@1.0.5: {} + supabase@1.172.2: dependencies: bin-links: 4.0.4 @@ -9806,7 +11066,7 @@ snapshots: typescript@5.4.5: {} - typescript@5.6.2: {} + typescript@5.6.3: {} typesense@1.8.2(@babel/runtime@7.24.6): dependencies: diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts new file mode 100644 index 00000000..f4e93e7f --- /dev/null +++ b/apps/api/src/controllers/v1/extract.ts @@ -0,0 +1,197 @@ +import { Request, Response } from "express"; +import { Logger } from "../../lib/logger"; +import { + Document, + legacyDocumentConverter, + legacyExtractorOptions, + legacyScrapeOptions, + RequestWithAuth, + ExtractRequest, + extractRequestSchema, + ExtractResponse, + legacyCrawlerOptions, + MapDocument, +} from "./types"; +import { billTeam } from "../../services/billing/credit_billing"; +import { v4 as uuidv4 } from "uuid"; +import { numTokensFromString } from "../../lib/LLM-extraction/helpers"; +import { addScrapeJob, waitForJob } from "../../services/queue-jobs"; +import { logJob } from "../../services/logging/log_job"; +import { getJobPriority } from "../../lib/job-priority"; +import { PlanType } from "../../types"; +import { getMapResults } from "./map"; +import { rerankDocuments } from "../../lib/extract/reranker"; +import { generateBasicCompletion } from "../../lib/extract/completions"; + + + +export async function extractController( + req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, + res: Response +) { + req.body = extractRequestSchema.parse(req.body); + let earlyReturn = false; + + const origin = req.body.origin; + const timeout = req.body.timeout; +// const pageOptions = legacyScrapeOptions(req.body); +// const extractorOptions = req.body.extract ? legacyExtractorOptions(req.body.extract) : undefined; + const jobId = uuidv4(); + + const startTime = new Date().getTime(); + const jobPriority = await getJobPriority({ + plan: req.auth.plan as PlanType, + team_id: req.auth.team_id, + basePriority: 10, + }); + + const urls = req.body.urls; + const mappedDocuments: MapDocument[] = []; + + const prompt = req.body.prompt; + const keywords = await generateBasicCompletion(`If the user's prompt is: "${prompt}", what are the most important keywords besides the extraction task? Output only the keywords, separated by commas.`); + + for (const url of urls) { + if (url.endsWith("/*")) { + const mapResults = await getMapResults({ + url: url.slice(0, -2), + search: req.body.prompt, + limit: 100, + ignoreSitemap: true, + includeSubdomains: false, + crawlerOptions: {}, + teamId: req.auth.team_id, + plan: req.auth.plan, + origin: req.body.origin, + subId: req.acuc?.sub_id, + includeMetadata: true + }); + // top 3 links + const top3Links = (mapResults.links as MapDocument[]).slice(0, 3); + console.log(top3Links); + // console.log(top3Links); + mappedDocuments.push(...(mapResults.links as MapDocument[])); + // transform mappedUrls to just documents + // we quickly rerank + const rerank = await rerankDocuments(mappedDocuments.map(x => `URL: ${x.url}\nTITLE: ${x.title}\nDESCRIPTION: ${x.description}`), "What URLs are most relevant to the following prompt: " + req.body.prompt.toLocaleLowerCase().replace("extract", " ").replace("extract ", " ")); + console.log(rerank); + } else { + mappedDocuments.push({ url }); + } + } + + req.body.urls = mappedDocuments.map(x => x.url); + + + +// const job = await addScrapeJob( +// { +// url: req.body.url, +// mode: "single_urls", +// crawlerOptions: {}, +// team_id: req.auth.team_id, +// plan: req.auth.plan, +// pageOptions, +// extractorOptions, +// origin: req.body.origin, +// is_scrape: true, +// }, +// {}, +// jobId, +// jobPriority +// ); + +// const totalWait = (req.body.waitFor ?? 0) + (req.body.actions ?? []).reduce((a,x) => (x.type === "wait" ? x.milliseconds : 0) + a, 0); + +// let doc: any | undefined; +// try { +// doc = (await waitForJob(job.id, timeout + totalWait))[0]; +// } catch (e) { +// Logger.error(`Error in scrapeController: ${e}`); +// if (e instanceof Error && e.message.startsWith("Job wait")) { +// return res.status(408).json({ +// success: false, +// error: "Request timed out", +// }); +// } else { +// return res.status(500).json({ +// success: false, +// error: `(Internal server error) - ${e && e?.message ? e.message : e} ${ +// extractorOptions && extractorOptions.mode !== "markdown" +// ? " - Could be due to LLM parsing issues" +// : "" +// }`, +// }); +// } +// } + +// await job.remove(); + +// if (!doc) { +// console.error("!!! PANIC DOC IS", doc, job); +// return res.status(200).json({ +// success: true, +// warning: "No page found", +// data: doc, +// }); +// } + +// delete doc.index; +// delete doc.provider; + +// const endTime = new Date().getTime(); +// const timeTakenInSeconds = (endTime - startTime) / 1000; +// const numTokens = +// doc && doc.markdown +// ? numTokensFromString(doc.markdown, "gpt-3.5-turbo") +// : 0; + +// let creditsToBeBilled = 1; // Assuming 1 credit per document +// if (earlyReturn) { +// // Don't bill if we're early returning +// return; +// } +// if(req.body.extract && req.body.formats.includes("extract")) { +// creditsToBeBilled = 5; +// } + +// billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(error => { +// Logger.error(`Failed to bill team ${req.auth.team_id} for ${creditsToBeBilled} credits: ${error}`); +// // Optionally, you could notify an admin or add to a retry queue here +// }); + +// if (!pageOptions || !pageOptions.includeRawHtml) { +// if (doc && doc.rawHtml) { +// delete doc.rawHtml; +// } +// } + +// if(pageOptions && pageOptions.includeExtract) { +// if(!pageOptions.includeMarkdown && doc && doc.markdown) { +// delete doc.markdown; +// } +// } + +// logJob({ +// job_id: jobId, +// success: true, +// message: "Scrape completed", +// num_docs: 1, +// docs: [doc], +// time_taken: timeTakenInSeconds, +// team_id: req.auth.team_id, +// mode: "scrape", +// url: req.body.url, +// crawlerOptions: {}, +// pageOptions: pageOptions, +// origin: origin, +// extractor_options: extractorOptions, +// num_tokens: numTokens, +// }); + + return res.status(200).json({ + success: true, + data: null, + scrape_id: origin?.includes("website") ? jobId : undefined, + }); +} diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index 5ed3dd51..7e74b43e 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -15,11 +15,11 @@ import { removeDuplicateUrls, } from "../../lib/validateUrl"; import { fireEngineMap } from "../../search/fireEngine"; -import { billTeam } from "../../services/billing/credit_billing"; -import { logJob } from "../../services/logging/log_job"; import { performCosineSimilarity } from "../../lib/map-cosine"; import { Logger } from "../../lib/logger"; import Redis from "ioredis"; +import { billTeam } from "../../services/billing/credit_billing"; +import { logJob } from "../../services/logging/log_job"; configDotenv(); const redis = new Redis(process.env.REDIS_URL); @@ -29,35 +29,50 @@ const MAX_MAP_LIMIT = 5000; // Max Links that "Smart /map" can return const MAX_FIRE_ENGINE_RESULTS = 1000; -export async function mapController( - req: RequestWithAuth<{}, MapResponse, MapRequest>, - res: Response -) { +interface MapOptions { + url: string; + search?: string; + limit?: number; + ignoreSitemap?: boolean; + includeSubdomains?: boolean; + crawlerOptions?: any; + teamId: string; + plan: string; + origin?: string; + subId?: string; + includeMetadata?: boolean; +} + +export async function getMapResults({ + url, + search, + limit = MAX_MAP_LIMIT, + ignoreSitemap = false, + includeSubdomains = false, + crawlerOptions = {}, + teamId, + plan, + origin, + subId, + includeMetadata = false, +}: MapOptions) { const startTime = new Date().getTime(); - - req.body = mapRequestSchema.parse(req.body); - - const limit: number = req.body.limit ?? MAX_MAP_LIMIT; - const id = uuidv4(); - let links: string[] = [req.body.url]; + let links: { url: string; title?: string; description?: string }[] = [{ url }]; const sc: StoredCrawl = { - originUrl: req.body.url, - crawlerOptions: legacyCrawlerOptions(req.body), + originUrl: url, + crawlerOptions, pageOptions: {}, - team_id: req.auth.team_id, + team_id: teamId, createdAt: Date.now(), - plan: req.auth.plan, + plan, }; const crawler = crawlToCrawler(id, sc); - let urlWithoutWww = req.body.url.replace("www.", ""); - - let mapUrl = req.body.search - ? `"${req.body.search}" site:${urlWithoutWww}` - : `site:${req.body.url}`; + let urlWithoutWww = url.replace("www.", ""); + let mapUrl = search ? `"${search}" site:${urlWithoutWww}` : `site:${url}`; const resultsPerPage = 100; const maxPages = Math.ceil(Math.min(MAX_FIRE_ENGINE_RESULTS, limit) / resultsPerPage); @@ -81,12 +96,11 @@ export async function mapController( pagePromises = Array.from({ length: maxPages }, (_, i) => fetchPage(i + 1)); allResults = await Promise.all(pagePromises); - await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours + await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); } - // Parallelize sitemap fetch with serper search const [sitemap, ...searchResults] = await Promise.all([ - req.body.ignoreSitemap ? null : crawler.tryGetSitemap(), + ignoreSitemap ? null : crawler.tryGetSitemap(), ...(cachedResult ? [] : pagePromises), ]); @@ -96,7 +110,7 @@ export async function mapController( if (sitemap !== null) { sitemap.forEach((x) => { - links.push(x.url); + links.push({ url: x.url }); }); } @@ -110,67 +124,96 @@ export async function mapController( } if (mapResults.length > 0) { - if (req.body.search) { - // Ensure all map results are first, maintaining their order + if (search) { links = [ - mapResults[0].url, - ...mapResults.slice(1).map((x) => x.url), + { url: mapResults[0].url, title: mapResults[0].title, description: mapResults[0].description }, + ...mapResults.slice(1).map((x) => ({ + url: x.url, + title: x.title, + description: x.description + })), ...links, ]; } else { - mapResults.map((x) => { - links.push(x.url); + mapResults.forEach((x) => { + links.push({ + url: x.url, + title: x.title, + description: x.description + }); }); } } - // Perform cosine similarity between the search query and the list of links - if (req.body.search) { - const searchQuery = req.body.search.toLowerCase(); - - links = performCosineSimilarity(links, searchQuery); + if (search) { + const filteredLinks = performCosineSimilarity(links.map(l => l.url), search.toLowerCase()); + links = links.filter(l => filteredLinks.includes(l.url)); } links = links .map((x) => { try { - return checkAndUpdateURLForMap(x).url.trim(); + return { ...x, url: checkAndUpdateURLForMap(x.url).url.trim() }; } catch (_) { return null; } }) .filter((x) => x !== null); - // allows for subdomains to be included - links = links.filter((x) => isSameDomain(x, req.body.url)); + links = links.filter((x) => isSameDomain(x.url, url)); - // if includeSubdomains is false, filter out subdomains - if (!req.body.includeSubdomains) { - links = links.filter((x) => isSameSubdomain(x, req.body.url)); + if (!includeSubdomains) { + links = links.filter((x) => isSameSubdomain(x.url, url)); } - // remove duplicates that could be due to http/https or www - links = removeDuplicateUrls(links); - - billTeam(req.auth.team_id, req.acuc?.sub_id, 1).catch((error) => { - Logger.error( - `Failed to bill team ${req.auth.team_id} for 1 credit: ${error}` - ); - // Optionally, you could notify an admin or add to a retry queue here - }); + links = removeDuplicateUrls(links.map(l => l.url)).map(url => links.find(l => l.url === url)); const endTime = new Date().getTime(); const timeTakenInSeconds = (endTime - startTime) / 1000; const linksToReturn = links.slice(0, limit); - logJob({ - job_id: id, - success: links.length > 0, + return { + links: includeMetadata ? linksToReturn : linksToReturn.map(l => l.url), + scrapeId: origin?.includes("website") ? id : undefined, + timeTakenInSeconds, + id, + linksLength: links.length, + linksToReturnLength: linksToReturn.length, + docs: linksToReturn.map(l => l.url), + }; +} + +export async function mapController( + req: RequestWithAuth<{}, MapResponse, MapRequest>, + res: Response +) { + req.body = mapRequestSchema.parse(req.body); + + const results = await getMapResults({ + url: req.body.url, + search: req.body.search, + limit: req.body.limit, + ignoreSitemap: req.body.ignoreSitemap, + includeSubdomains: req.body.includeSubdomains, + crawlerOptions: legacyCrawlerOptions(req.body), + teamId: req.auth.team_id, + plan: req.auth.plan, + origin: req.body.origin, + subId: req.acuc?.sub_id, + }); + + await billTeam(req.auth.team_id, req.acuc?.sub_id, 1).catch((error) => { + Logger.error(`Failed to bill team ${req.auth.team_id} for 1 credit: ${error}`); + }); + + await logJob({ + job_id: results.id, + success: results.linksLength > 0, message: "Map completed", - num_docs: linksToReturn.length, - docs: linksToReturn, - time_taken: timeTakenInSeconds, + num_docs: results.linksToReturnLength, + docs: results.docs, + time_taken: results.timeTakenInSeconds, team_id: req.auth.team_id, mode: "map", url: req.body.url, @@ -183,55 +226,7 @@ export async function mapController( return res.status(200).json({ success: true, - links: linksToReturn, - scrape_id: req.body.origin?.includes("website") ? id : undefined, + links: results.links.map(l => l.url), + scrape_id: results.scrapeId, }); } - -// Subdomain sitemap url checking - -// // For each result, check for subdomains, get their sitemaps and add them to the links -// const processedUrls = new Set(); -// const processedSubdomains = new Set(); - -// for (const result of links) { -// let url; -// let hostParts; -// try { -// url = new URL(result); -// hostParts = url.hostname.split('.'); -// } catch (e) { -// continue; -// } - -// console.log("hostParts", hostParts); -// // Check if it's a subdomain (more than 2 parts, and not 'www') -// if (hostParts.length > 2 && hostParts[0] !== 'www') { -// const subdomain = hostParts[0]; -// console.log("subdomain", subdomain); -// const subdomainUrl = `${url.protocol}//${subdomain}.${hostParts.slice(-2).join('.')}`; -// console.log("subdomainUrl", subdomainUrl); - -// if (!processedSubdomains.has(subdomainUrl)) { -// processedSubdomains.add(subdomainUrl); - -// const subdomainCrawl = crawlToCrawler(id, { -// originUrl: subdomainUrl, -// crawlerOptions: legacyCrawlerOptions(req.body), -// pageOptions: {}, -// team_id: req.auth.team_id, -// createdAt: Date.now(), -// plan: req.auth.plan, -// }); -// const subdomainSitemap = await subdomainCrawl.tryGetSitemap(); -// if (subdomainSitemap) { -// subdomainSitemap.forEach((x) => { -// if (!processedUrls.has(x.url)) { -// processedUrls.add(x.url); -// links.push(x.url); -// } -// }); -// } -// } -// } -// } diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 22ac6294..4e31dd86 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -121,8 +121,21 @@ export const scrapeOptions = z.object({ }).strict(strictMessage) + export type ScrapeOptions = z.infer; +export const extractV1Options = z.object({ + urls: url.array(), + prompt: z.string().optional(), + schema: z.any().optional(), + origin: z.string().optional().default("api"), + timeout: z.number().int().positive().finite().safe().default(60000), +}).strict(strictMessage) + +export type ExtractV1Options = z.infer; +export const extractRequestSchema = extractV1Options; +export type ExtractRequest = z.infer; + export const scrapeRequestSchema = scrapeOptions.extend({ url, origin: z.string().optional().default("api"), @@ -142,6 +155,8 @@ export const scrapeRequestSchema = scrapeOptions.extend({ return obj; }); + + export type ScrapeRequest = z.infer; export const batchScrapeRequestSchema = scrapeOptions.extend({ @@ -296,6 +311,21 @@ export interface ScrapeResponseRequestTest { error?: string; } +export type ExtractResponse = + | ErrorResponse + | { + success: true; + warning?: string; + data: Document; + scrape_id?: string; + }; + +export interface ExtractResponseRequestTest { + statusCode: number; + body: ExtractResponse; + error?: string; +} + export type CrawlResponse = | ErrorResponse | { @@ -492,3 +522,11 @@ export function legacyDocumentConverter(doc: any): Document { }, }; } + + + +export interface MapDocument { + url: string; + title?: string; + description?: string; +} \ No newline at end of file diff --git a/apps/api/src/lib/extract/completions.ts b/apps/api/src/lib/extract/completions.ts new file mode 100644 index 00000000..230584b4 --- /dev/null +++ b/apps/api/src/lib/extract/completions.ts @@ -0,0 +1,119 @@ +import OpenAI from "openai"; +import { encoding_for_model } from "@dqbd/tiktoken"; +import { TiktokenModel } from "@dqbd/tiktoken"; +import { ExtractOptions } from "../../controllers/v1/types"; +import { Document } from "../entities"; +import { z } from "zod"; + +const maxTokens = 32000; +const modifier = 4; + +export class LLMRefusalError extends Error { + constructor(refusal: string) { + super("LLM refused to extract the website's content"); + this.name = "LLMRefusalError"; + } +} + +interface GenerateCompletionsParams { + systemPrompt?: string; + prompt?: string; + schema?: any; + pagesContent: string; +} + +export async function generateBasicCompletion(prompt: string) { + const openai = new OpenAI(); + const model: TiktokenModel = + (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; + + const completion = await openai.chat.completions.create({ + model, + messages: [{ role: "user", content: prompt }], + }); + + return completion.choices[0].message.content; +} + +export async function generateFinalExtraction({ + pagesContent, + systemPrompt, + prompt, + schema, +}: GenerateCompletionsParams): Promise<{ + content: string; + metadata: { numTokens: number; warning: string }; +}> { + const openai = new OpenAI(); + const model: TiktokenModel = + (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; + + let extractionContent = pagesContent; + let numTokens = 0; + let warning = ""; + + const encoder = encoding_for_model(model); + try { + const tokens = encoder.encode(extractionContent); + numTokens = tokens.length; + } catch (error) { + extractionContent = extractionContent.slice(0, maxTokens * modifier); + warning = `Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (${maxTokens}) we support.`; + } finally { + encoder.free(); + } + + if (numTokens > maxTokens) { + extractionContent = extractionContent.slice(0, maxTokens * modifier); + warning = `The extraction content would have used more tokens (${numTokens}) than the maximum we allow (${maxTokens}). -- the input has been automatically trimmed.`; + } + + if (schema && (schema.type === "array" || schema._type === "ZodArray")) { + schema = { + type: "object", + properties: { + items: schema, + }, + required: ["items"], + additionalProperties: false, + }; + } + + const jsonCompletion = await openai.beta.chat.completions.parse({ + model, + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: [{ type: "text", text: extractionContent }] }, + { + role: "user", + content: prompt + ? `Transform the above content into structured JSON output based on the following user request: ${prompt}` + : "Transform the above content into structured JSON output.", + }, + ], + response_format: schema + ? { + type: "json_schema", + json_schema: { + name: "websiteContent", + schema: schema.shape, + strict: true, + }, + } + : { type: "json_object" }, + }); + + if (jsonCompletion.choices[0].message.refusal !== null) { + throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal); + } + + const extraction = jsonCompletion.choices[0].message.parsed; + + return { + content: extraction, + metadata: { + numTokens, + warning, + }, + }; +} diff --git a/apps/api/src/lib/extract/reranker.ts b/apps/api/src/lib/extract/reranker.ts new file mode 100644 index 00000000..30aca441 --- /dev/null +++ b/apps/api/src/lib/extract/reranker.ts @@ -0,0 +1,22 @@ +import { CohereClient } from "cohere-ai"; +import { MapDocument } from "../../controllers/v1/types"; +const cohere = new CohereClient({ + token: process.env.COHERE_API_KEY, +}); + +export async function rerankDocuments( + documents: (string | Record)[], + query: string, + topN = 3, + model = "rerank-english-v3.0" +) { + const rerank = await cohere.v2.rerank({ + documents, + query, + topN, + model, + returnDocuments: true, + }); + + return rerank.results.sort((a, b) => b.relevanceScore - a.relevanceScore).map(x => ({ document: x.document, index: x.index, relevanceScore: x.relevanceScore })); +} From 5bbbb52a30dabc4f1b321737b472b7726fdc11fd Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 12 Nov 2024 12:17:03 -0500 Subject: [PATCH 02/51] Update fireEngine.ts --- apps/api/src/search/fireEngine.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/apps/api/src/search/fireEngine.ts b/apps/api/src/search/fireEngine.ts index 2b5ffd2e..09a58e4e 100644 --- a/apps/api/src/search/fireEngine.ts +++ b/apps/api/src/search/fireEngine.ts @@ -1,4 +1,3 @@ -import axios from "axios"; import dotenv from "dotenv"; import { SearchResult } from "../../src/lib/entities"; import * as Sentry from "@sentry/node"; @@ -40,19 +39,19 @@ export async function fireEngineMap( } console.log("process.env.FIRE_ENGINE_BETA_URL", process.env.FIRE_ENGINE_BETA_URL); - let config = { + const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/search`, { method: "POST", - url: `${process.env.FIRE_ENGINE_BETA_URL}/search`, headers: { "Content-Type": "application/json", "X-Disable-Cache": "true" }, - data: data, - }; - const response = await axios(config); - if (response && response.data) { - console.log("response", response.data); - return response.data; + body: data + }); + + if (response.ok) { + const responseData = await response.json(); + console.log("response", responseData); + return responseData; } else { return []; } From d430cfcbfbf89a04b73293cd697d51e37f6acc18 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 12 Nov 2024 12:17:48 -0500 Subject: [PATCH 03/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index f290c5ab..4ac32ddd 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -182,7 +182,7 @@ export async function extractController( return res.status(200).json({ success: true, - data: null, + data: {} as Document, scrape_id: origin?.includes("website") ? jobId : undefined, }); } From a4f15260a7fef1f8c1ddef72dad9d5c95e5fbb15 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 12 Nov 2024 12:23:24 -0500 Subject: [PATCH 04/51] Nick: --- apps/api/src/controllers/v1/extract.ts | 22 ++++++++++------------ apps/api/src/lib/extract/completions.ts | 5 ++--- apps/api/src/routes/v1.ts | 10 ++++++++++ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 4ac32ddd..ebe248ba 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -10,8 +10,6 @@ import { import { v4 as uuidv4 } from "uuid"; import { getJobPriority } from "../../lib/job-priority"; import { PlanType } from "../../types"; -import { rerankDocuments } from "../../lib/extract/reranker"; -import { generateBasicCompletion } from "../../lib/extract/completions"; import { getMapResults } from "./map"; @@ -40,7 +38,7 @@ export async function extractController( const mappedDocuments: MapDocument[] = []; const prompt = req.body.prompt; - const keywords = await generateBasicCompletion(`If the user's prompt is: "${prompt}", what are the most important keywords besides the extraction task? Output only the keywords, separated by commas.`); + // const keywords = await generateBasicCompletion(`If the user's prompt is: "${prompt}", what are the most important keywords besides the extraction task? Output only the keywords, separated by commas.`); for (const url of urls) { if (url.endsWith("/*")) { @@ -57,15 +55,15 @@ export async function extractController( subId: req.acuc?.sub_id, includeMetadata: true }); - // top 3 links - const top3Links = (mapResults.links as MapDocument[]).slice(0, 3); - console.log(top3Links); - // console.log(top3Links); - mappedDocuments.push(...(mapResults.links as MapDocument[])); - // transform mappedUrls to just documents - // we quickly rerank - const rerank = await rerankDocuments(mappedDocuments.map(x => `URL: ${x.url}\nTITLE: ${x.title}\nDESCRIPTION: ${x.description}`), "What URLs are most relevant to the following prompt: " + (req.body.prompt || '').toLocaleLowerCase().replace("extract", " ").replace("extract ", " ")); - console.log(rerank); + // // top 3 links + // const top3Links = (mapResults.links as MapDocument[]).slice(0, 3); + // console.log(top3Links); + // // console.log(top3Links); + // mappedDocuments.push(...(mapResults.links as MapDocument[])); + // // transform mappedUrls to just documents + // // we quickly rerank + // const rerank = await rerankDocuments(mappedDocuments.map(x => `URL: ${x.url}\nTITLE: ${x.title}\nDESCRIPTION: ${x.description}`), "What URLs are most relevant to the following prompt: " + (req.body.prompt || '').toLocaleLowerCase().replace("extract", " ").replace("extract ", " ")); + // console.log(rerank); } else { mappedDocuments.push({ url }); } diff --git a/apps/api/src/lib/extract/completions.ts b/apps/api/src/lib/extract/completions.ts index 230584b4..fa75594b 100644 --- a/apps/api/src/lib/extract/completions.ts +++ b/apps/api/src/lib/extract/completions.ts @@ -82,7 +82,7 @@ export async function generateFinalExtraction({ const jsonCompletion = await openai.beta.chat.completions.parse({ model, messages: [ - { role: "system", content: systemPrompt }, + { role: "system", content: systemPrompt ?? "" }, { role: "user", content: [{ type: "text", text: extractionContent }] }, { role: "user", @@ -108,9 +108,8 @@ export async function generateFinalExtraction({ } const extraction = jsonCompletion.choices[0].message.parsed; - return { - content: extraction, + content: extraction ?? "", metadata: { numTokens, warning, diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index 3eaace3b..e6055a99 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -18,6 +18,7 @@ import { logger } from "../lib/logger"; import { scrapeStatusController } from "../controllers/v1/scrape-status"; import { concurrencyCheckController } from "../controllers/v1/concurrency-check"; import { batchScrapeController } from "../controllers/v1/batch-scrape"; +import { extractController } from "../controllers/v1/extract"; // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview"; // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status"; // import { searchController } from "../../src/controllers/v1/search"; @@ -178,6 +179,14 @@ v1Router.ws( crawlStatusWSController ); +v1Router.post( + "/extract", + authMiddleware(RateLimiterMode.Scrape), + checkCreditsMiddleware(1), + blocklistMiddleware, + wrap(extractController) +); + // v1Router.post("/crawlWebsitePreview", crawlPreviewController); @@ -199,3 +208,4 @@ v1Router.delete( // Health/Probe routes // v1Router.get("/health/liveness", livenessController); // v1Router.get("/health/readiness", readinessController); + From a23364e5dae6fca4309aea91c44ef091d22ef96e Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 12 Nov 2024 12:23:44 -0500 Subject: [PATCH 05/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 166 +------------------------ 1 file changed, 1 insertion(+), 165 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index ebe248ba..9390ab9a 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -7,180 +7,16 @@ import { ExtractResponse, MapDocument, } from "./types"; -import { v4 as uuidv4 } from "uuid"; -import { getJobPriority } from "../../lib/job-priority"; -import { PlanType } from "../../types"; -import { getMapResults } from "./map"; - - export async function extractController( req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, res: Response ) { req.body = extractRequestSchema.parse(req.body); - let earlyReturn = false; - - const origin = req.body.origin; - const timeout = req.body.timeout; -// const pageOptions = legacyScrapeOptions(req.body); -// const extractorOptions = req.body.extract ? legacyExtractorOptions(req.body.extract) : undefined; - const jobId = uuidv4(); - - const startTime = new Date().getTime(); - const jobPriority = await getJobPriority({ - plan: req.auth.plan as PlanType, - team_id: req.auth.team_id, - basePriority: 10, - }); - - const urls = req.body.urls; - const mappedDocuments: MapDocument[] = []; - - const prompt = req.body.prompt; - // const keywords = await generateBasicCompletion(`If the user's prompt is: "${prompt}", what are the most important keywords besides the extraction task? Output only the keywords, separated by commas.`); - - for (const url of urls) { - if (url.endsWith("/*")) { - const mapResults = await getMapResults({ - url: url.slice(0, -2), - search: req.body.prompt, - limit: 100, - ignoreSitemap: true, - includeSubdomains: false, - crawlerOptions: {}, - teamId: req.auth.team_id, - plan: req.auth.plan, - origin: req.body.origin, - subId: req.acuc?.sub_id, - includeMetadata: true - }); - // // top 3 links - // const top3Links = (mapResults.links as MapDocument[]).slice(0, 3); - // console.log(top3Links); - // // console.log(top3Links); - // mappedDocuments.push(...(mapResults.links as MapDocument[])); - // // transform mappedUrls to just documents - // // we quickly rerank - // const rerank = await rerankDocuments(mappedDocuments.map(x => `URL: ${x.url}\nTITLE: ${x.title}\nDESCRIPTION: ${x.description}`), "What URLs are most relevant to the following prompt: " + (req.body.prompt || '').toLocaleLowerCase().replace("extract", " ").replace("extract ", " ")); - // console.log(rerank); - } else { - mappedDocuments.push({ url }); - } - } - - req.body.urls = mappedDocuments.map(x => x.url); - - - -// const job = await addScrapeJob( -// { -// url: req.body.url, -// mode: "single_urls", -// crawlerOptions: {}, -// team_id: req.auth.team_id, -// plan: req.auth.plan, -// pageOptions, -// extractorOptions, -// origin: req.body.origin, -// is_scrape: true, -// }, -// {}, -// jobId, -// jobPriority -// ); - -// const totalWait = (req.body.waitFor ?? 0) + (req.body.actions ?? []).reduce((a,x) => (x.type === "wait" ? x.milliseconds : 0) + a, 0); - -// let doc: any | undefined; -// try { -// doc = (await waitForJob(job.id, timeout + totalWait))[0]; -// } catch (e) { -// Logger.error(`Error in scrapeController: ${e}`); -// if (e instanceof Error && e.message.startsWith("Job wait")) { -// return res.status(408).json({ -// success: false, -// error: "Request timed out", -// }); -// } else { -// return res.status(500).json({ -// success: false, -// error: `(Internal server error) - ${e && e?.message ? e.message : e} ${ -// extractorOptions && extractorOptions.mode !== "markdown" -// ? " - Could be due to LLM parsing issues" -// : "" -// }`, -// }); -// } -// } - -// await job.remove(); - -// if (!doc) { -// console.error("!!! PANIC DOC IS", doc, job); -// return res.status(200).json({ -// success: true, -// warning: "No page found", -// data: doc, -// }); -// } - -// delete doc.index; -// delete doc.provider; - -// const endTime = new Date().getTime(); -// const timeTakenInSeconds = (endTime - startTime) / 1000; -// const numTokens = -// doc && doc.markdown -// ? numTokensFromString(doc.markdown, "gpt-3.5-turbo") -// : 0; - -// let creditsToBeBilled = 1; // Assuming 1 credit per document -// if (earlyReturn) { -// // Don't bill if we're early returning -// return; -// } -// if(req.body.extract && req.body.formats.includes("extract")) { -// creditsToBeBilled = 5; -// } - -// billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(error => { -// Logger.error(`Failed to bill team ${req.auth.team_id} for ${creditsToBeBilled} credits: ${error}`); -// // Optionally, you could notify an admin or add to a retry queue here -// }); - -// if (!pageOptions || !pageOptions.includeRawHtml) { -// if (doc && doc.rawHtml) { -// delete doc.rawHtml; -// } -// } - -// if(pageOptions && pageOptions.includeExtract) { -// if(!pageOptions.includeMarkdown && doc && doc.markdown) { -// delete doc.markdown; -// } -// } - -// logJob({ -// job_id: jobId, -// success: true, -// message: "Scrape completed", -// num_docs: 1, -// docs: [doc], -// time_taken: timeTakenInSeconds, -// team_id: req.auth.team_id, -// mode: "scrape", -// url: req.body.url, -// crawlerOptions: {}, -// pageOptions: pageOptions, -// origin: origin, -// extractor_options: extractorOptions, -// num_tokens: numTokens, -// }); return res.status(200).json({ success: true, data: {} as Document, - scrape_id: origin?.includes("website") ? jobId : undefined, + scrape_id: undefined, }); } From 807703d94c7f6a50c361e0da55848ab8cde8e702 Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 12 Nov 2024 18:44:14 -0300 Subject: [PATCH 06/51] wip --- apps/api/src/controllers/v1/extract.ts | 314 ++++++++++++++++++++++++- apps/api/src/controllers/v1/types.ts | 3 + apps/api/src/lib/ranker.ts | 69 ++++++ 3 files changed, 381 insertions(+), 5 deletions(-) create mode 100644 apps/api/src/lib/ranker.ts diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 9390ab9a..4adbe2ae 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -1,22 +1,326 @@ import { Request, Response } from "express"; import { - Document, + // Document, RequestWithAuth, ExtractRequest, extractRequestSchema, ExtractResponse, MapDocument, + scrapeOptions, } from "./types"; +import { Document } from "../../lib/entities"; +import { StoredCrawl, crawlToCrawler } from "../../lib/crawl-redis"; +import { fireEngineMap } from "../../search/fireEngine"; +import Redis from "ioredis"; +import { configDotenv } from "dotenv"; +import { performRanking } from "../../lib/ranker"; +import { checkAndUpdateURLForMap } from "../../lib/validateUrl"; +import { isSameDomain } from "../../lib/validateUrl"; +import { isSameSubdomain } from "../../lib/validateUrl"; +import { removeDuplicateUrls } from "../../lib/validateUrl"; +import { billTeam } from "../../services/billing/credit_billing"; +import { logJob } from "../../services/logging/log_job"; +import { logger } from "../../lib/logger"; +import { getScrapeQueue } from "../../services/queue-service"; +import { waitForJob } from "../../services/queue-jobs"; +import { addScrapeJob } from "../../services/queue-jobs"; +import { PlanType } from "../../types"; +import { getJobPriority } from "../../lib/job-priority"; +import { generateCompletions } from "../../lib/LLM-extraction"; + +configDotenv(); +const redis = new Redis(process.env.REDIS_URL!); + +const MAX_EXTRACT_LIMIT = 100; +const MAX_RANKING_LIMIT = 3; export async function extractController( req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, - res: Response + res: Response //ExtractResponse> ) { req.body = extractRequestSchema.parse(req.body); + const id = crypto.randomUUID(); + let links: string[] = req.body.urls; + + const sc: StoredCrawl = { + originUrl: req.body.urls[0], + crawlerOptions: { + // ...crawlerOptions, + scrapeOptions: undefined, + }, + scrapeOptions: scrapeOptions.parse({}), + internalOptions: {}, + team_id: req.auth.team_id, + createdAt: Date.now(), + plan: req.auth.plan!, + }; + + const crawler = crawlToCrawler(id, sc); + + let urlWithoutWww = req.body.urls[0].replace("www.", ""); + + let mapUrl = req.body.prompt + ? `"${req.body.prompt}" site:${urlWithoutWww}` + : `site:${req.body.urls[0]}`; + + const resultsPerPage = 100; + const maxPages = Math.ceil(MAX_EXTRACT_LIMIT / resultsPerPage); + + const cacheKey = `fireEngineMap:${mapUrl}`; + const cachedResult = null; + + let allResults: any[] = []; + let pagePromises: Promise[] = []; + + if (cachedResult) { + allResults = JSON.parse(cachedResult); + } else { + const fetchPage = async (page: number) => { + return fireEngineMap(mapUrl, { + numResults: resultsPerPage, + page: page, + }); + }; + + pagePromises = Array.from({ length: maxPages }, (_, i) => fetchPage(i + 1)); + allResults = await Promise.all(pagePromises); + + await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours + } + + console.log("allResults", allResults); + // Parallelize sitemap fetch with serper search + const [sitemap, ...searchResults] = await Promise.all([ + req.body.ignoreSitemap ? null : crawler.tryGetSitemap(), + ...(cachedResult ? [] : pagePromises), + ]); + + if (!cachedResult) { + allResults = searchResults; + } + + if (sitemap !== null) { + sitemap.forEach((x) => { + links.push(x.url); + }); + } + + let mapResults = allResults + .flat() + .filter((result) => result !== null && result !== undefined); + + const minumumCutoff = Math.min(MAX_EXTRACT_LIMIT, req.body.limit ?? MAX_EXTRACT_LIMIT); + if (mapResults.length > minumumCutoff) { + mapResults = mapResults.slice(0, minumumCutoff); + } + + if (mapResults.length > 0) { + if (req.body.prompt) { + // Ensure all map results are first, maintaining their order + links = [ + mapResults[0].url, + ...mapResults.slice(1).map((x) => x.url), + ...links, + ]; + } else { + mapResults.map((x) => { + links.push(x.url); + }); + } + } + + // console.log("links", links); + let linksAndScores: { link: string; score: number }[] = []; + // Perform cosine similarity between the search query and the list of links + if (req.body.prompt) { + const searchQuery = req.body.prompt.toLowerCase(); + linksAndScores = await performRanking(links, searchQuery); + } + + console.log("linksAndScores", linksAndScores); + + links = links + .map((x) => { + try { + return checkAndUpdateURLForMap(x).url.trim(); + } catch (_) { + return null; + } + }) + .filter((x) => x !== null) as string[]; + + // allows for subdomains to be included + links = links.filter((x) => isSameDomain(x, req.body.urls[0])); + + // if includeSubdomains is false, filter out subdomains + if (!req.body.includeSubdomains) { + links = links.filter((x) => isSameSubdomain(x, req.body.urls[0])); + } + + // remove duplicates that could be due to http/https or www + links = removeDuplicateUrls(links); + + // get top N links + links = links.slice(0, MAX_RANKING_LIMIT); + + // scrape the links + let earlyReturn = false; + let docs: Document[] = []; + + for (const url of links) { + const origin = req.body.origin || "api"; + const timeout = req.body.timeout; + const jobId = crypto.randomUUID(); + + const startTime = new Date().getTime(); + const jobPriority = await getJobPriority({ + plan: req.auth.plan as PlanType, + team_id: req.auth.team_id, + basePriority: 10, + }); + + await addScrapeJob( + { + url, + mode: "single_urls", + team_id: req.auth.team_id, + scrapeOptions: scrapeOptions.parse({}), + internalOptions: {}, + plan: req.auth.plan!, + origin, + is_scrape: true, + }, + {}, + jobId, + jobPriority + ); + + const totalWait = 60000 // (req.body.waitFor ?? 0) + (req.body.actions ?? []).reduce((a,x) => (x.type === "wait" ? x.milliseconds ?? 0 : 0) + a, 0); + + let doc: Document; + try { + doc = await waitForJob(jobId, timeout + totalWait); // TODO: better types for this + } catch (e) { + logger.error(`Error in scrapeController: ${e}`); + if (e instanceof Error && (e.message.startsWith("Job wait") || e.message === "timeout")) { + return res.status(408).json({ + success: false, + error: "Request timed out", + }); + } else { + return res.status(500).json({ + success: false, + error: `(Internal server error) - ${(e && e.message) ? e.message : e}`, + }); + } + } + + await getScrapeQueue().remove(jobId); + + const endTime = new Date().getTime(); + const timeTakenInSeconds = (endTime - startTime) / 1000; + // const numTokens = + // doc && doc.extract + // // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo") + // ? 0 // TODO: fix + // : 0; + + let creditsToBeBilled = 1; // Assuming 1 credit per document + if (earlyReturn) { + // Don't bill if we're early returning + return; + } + docs.push(doc); + } + + + console.log("docs", docs); + + // reduce to 1 document + const completions = await generateCompletions( + docs, { + extractionSchema: req.body.schema, + extractionPrompt: req.body.prompt, + userPrompt: req.body.prompt, + mode: "markdown" + }, + "markdown" + ); + + console.log("completions", completions.map(x => x.llm_extraction)); + + // if(req.body.extract && req.body.formats.includes("extract")) { + // creditsToBeBilled = 5; + // } + + // billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(error => { + // logger.error(`Failed to bill team ${req.auth.team_id} for ${creditsToBeBilled} credits: ${error}`); + // // Optionally, you could notify an admin or add to a retry queue here + // }); + + // if (!req.body.formats.includes("rawHtml")) { + // if (doc && doc.rawHtml) { + // delete doc.rawHtml; + // } + // } + + // logJob({ + // job_id: jobId, + // success: true, + // message: "Scrape completed", + // num_docs: 1, + // docs: [doc], + // time_taken: timeTakenInSeconds, + // team_id: req.auth.team_id, + // mode: "scrape", + // url: req.body.url, + // scrapeOptions: req.body, + // origin: origin, + // num_tokens: numTokens, + // }); + + + + // billTeam(teamId, subId, 1).catch((error) => { + // logger.error( + // `Failed to bill team ${teamId} for 1 credit: ${error}` + // ); + // }); + + // const linksToReturn = links.slice(0, limit); + + // logJob({ + // job_id: id, + // success: links.length > 0, + // message: "Extract completed", + // num_docs: linksToReturn.length, + // docs: linksToReturn, + // time_taken: (new Date().getTime() - Date.now()) / 1000, + // team_id: teamId, + // mode: "extract", + // url: urls[0], + // crawlerOptions: {}, + // scrapeOptions: {}, + // origin: origin ?? "api", + // num_tokens: 0, + // }); + + // return { + + // }; + + + + // const response = { + // success: true as const, + // data: result.data, + // scrape_id: result.scrape_id + // }; + return res.status(200).json({ success: true, - data: {} as Document, - scrape_id: undefined, + data: {}, // includeMetadata ? mapResults : linksToReturn, + scrape_id: id, //origin?.includes("website") ? id : undefined, }); -} +} \ No newline at end of file diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 530ca765..02523bb7 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -158,6 +158,9 @@ export const extractV1Options = z.object({ urls: url.array(), prompt: z.string().optional(), schema: z.any().optional(), + limit: z.number().int().positive().finite().safe().optional(), + ignoreSitemap: z.boolean().default(false), + includeSubdomains: z.boolean().default(true), origin: z.string().optional().default("api"), timeout: z.number().int().positive().finite().safe().default(60000), }).strict(strictMessage) diff --git a/apps/api/src/lib/ranker.ts b/apps/api/src/lib/ranker.ts new file mode 100644 index 00000000..6c1646da --- /dev/null +++ b/apps/api/src/lib/ranker.ts @@ -0,0 +1,69 @@ +import axios from 'axios'; +import { configDotenv } from 'dotenv'; +import OpenAI from "openai"; + +configDotenv(); + +const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, +}); + +async function getEmbedding(text: string) { + const embedding = await openai.embeddings.create({ + model: "text-embedding-ada-002", + input: text, + encoding_format: "float", + }); + + return embedding.data[0].embedding; +} + +const cosineSimilarity = (vec1: number[], vec2: number[]): number => { + const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0); + const magnitude1 = Math.sqrt( + vec1.reduce((sum, val) => sum + val * val, 0) + ); + const magnitude2 = Math.sqrt( + vec2.reduce((sum, val) => sum + val * val, 0) + ); + if (magnitude1 === 0 || magnitude2 === 0) return 0; + return dotProduct / (magnitude1 * magnitude2); +}; + +// Function to convert text to vector +const textToVector = (searchQuery: string, text: string): number[] => { + const words = searchQuery.toLowerCase().split(/\W+/); + return words.map((word) => { + const count = (text.toLowerCase().match(new RegExp(word, "g")) || []) + .length; + return count / text.length; + }); +}; + +async function performRanking(links: string[], searchQuery: string) { + try { + // Generate embeddings for the search query + const queryEmbedding = await getEmbedding(searchQuery); + + // Generate embeddings for each link and calculate similarity + const linksAndScores = await Promise.all(links.map(async (link) => { + const linkEmbedding = await getEmbedding(link); + + console.log("linkEmbedding", linkEmbedding); + // const linkVector = textToVector(searchQuery, link); + const score = cosineSimilarity(queryEmbedding, linkEmbedding); + console.log("score", score); + return { link, score }; + })); + + // Sort links based on similarity scores + linksAndScores.sort((a, b) => b.score - a.score); + + return linksAndScores; + } catch (error) { + console.error(`Error performing semantic search: ${error}`); + return []; + } +} + +export { performRanking }; From a175c1513a441e225493c7e07ebc9d0dc3b5eea2 Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 13 Nov 2024 08:09:51 -0300 Subject: [PATCH 07/51] wip --- apps/api/src/controllers/v1/extract.ts | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 4adbe2ae..6f7aced1 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -26,7 +26,7 @@ import { waitForJob } from "../../services/queue-jobs"; import { addScrapeJob } from "../../services/queue-jobs"; import { PlanType } from "../../types"; import { getJobPriority } from "../../lib/job-priority"; -import { generateCompletions } from "../../lib/LLM-extraction"; +import { generateFinalExtraction } from "../../lib/extract/completions"; configDotenv(); const redis = new Redis(process.env.REDIS_URL!); @@ -237,18 +237,15 @@ export async function extractController( console.log("docs", docs); - // reduce to 1 document - const completions = await generateCompletions( - docs, { - extractionSchema: req.body.schema, - extractionPrompt: req.body.prompt, - userPrompt: req.body.prompt, - mode: "markdown" - }, - "markdown" - ); + // {"message":"Missing required parameter: 'response_format.json_schema.schema'.","type":"invalid_request_error","param":"response_format.json_schema.schema","code":"missing_required_parameter"},"code":"missing_required_parameter","param":"response_format.json_schema.schema","type":"invalid_request_error"} + const completions = await generateFinalExtraction({ + pagesContent: docs.map(x => x.markdown).join('\n'), + systemPrompt: '', + prompt: req.body.prompt, + schema: req.body.schema, + }); - console.log("completions", completions.map(x => x.llm_extraction)); + console.log("completions", completions); // if(req.body.extract && req.body.formats.includes("extract")) { // creditsToBeBilled = 5; From 25f32000dbff7bc686963794b7be377e55bf8447 Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 13 Nov 2024 13:05:29 -0300 Subject: [PATCH 08/51] mvp done? --- apps/api/src/controllers/v1/extract.ts | 14 +++++++------- apps/api/src/lib/extract/completions.ts | 5 ++++- apps/api/src/lib/ranker.ts | 4 ++-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 6f7aced1..d3c06c34 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -89,7 +89,7 @@ export async function extractController( await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours } - console.log("allResults", allResults); + // console.log("allResults", allResults); // Parallelize sitemap fetch with serper search const [sitemap, ...searchResults] = await Promise.all([ req.body.ignoreSitemap ? null : crawler.tryGetSitemap(), @@ -138,7 +138,7 @@ export async function extractController( linksAndScores = await performRanking(links, searchQuery); } - console.log("linksAndScores", linksAndScores); + // console.log("linksAndScores", linksAndScores); links = links .map((x) => { @@ -218,8 +218,8 @@ export async function extractController( await getScrapeQueue().remove(jobId); - const endTime = new Date().getTime(); - const timeTakenInSeconds = (endTime - startTime) / 1000; + // const endTime = new Date().getTime(); + // const timeTakenInSeconds = (endTime - startTime) / 1000; // const numTokens = // doc && doc.extract // // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo") @@ -235,7 +235,7 @@ export async function extractController( } - console.log("docs", docs); + // console.log("docs", docs); // {"message":"Missing required parameter: 'response_format.json_schema.schema'.","type":"invalid_request_error","param":"response_format.json_schema.schema","code":"missing_required_parameter"},"code":"missing_required_parameter","param":"response_format.json_schema.schema","type":"invalid_request_error"} const completions = await generateFinalExtraction({ @@ -245,7 +245,7 @@ export async function extractController( schema: req.body.schema, }); - console.log("completions", completions); + // console.log("completions", completions); // if(req.body.extract && req.body.formats.includes("extract")) { // creditsToBeBilled = 5; @@ -317,7 +317,7 @@ export async function extractController( return res.status(200).json({ success: true, - data: {}, // includeMetadata ? mapResults : linksToReturn, + data: completions.content, // includeMetadata ? mapResults : linksToReturn, scrape_id: id, //origin?.includes("website") ? id : undefined, }); } \ No newline at end of file diff --git a/apps/api/src/lib/extract/completions.ts b/apps/api/src/lib/extract/completions.ts index fa75594b..c02b3e31 100644 --- a/apps/api/src/lib/extract/completions.ts +++ b/apps/api/src/lib/extract/completions.ts @@ -77,6 +77,9 @@ export async function generateFinalExtraction({ required: ["items"], additionalProperties: false, }; + } else if (schema) { + schema.additionalProperties = false; + schema.required = Object.keys(schema.properties); } const jsonCompletion = await openai.beta.chat.completions.parse({ @@ -96,7 +99,7 @@ export async function generateFinalExtraction({ type: "json_schema", json_schema: { name: "websiteContent", - schema: schema.shape, + schema: schema, strict: true, }, } diff --git a/apps/api/src/lib/ranker.ts b/apps/api/src/lib/ranker.ts index 6c1646da..7cd39820 100644 --- a/apps/api/src/lib/ranker.ts +++ b/apps/api/src/lib/ranker.ts @@ -49,10 +49,10 @@ async function performRanking(links: string[], searchQuery: string) { const linksAndScores = await Promise.all(links.map(async (link) => { const linkEmbedding = await getEmbedding(link); - console.log("linkEmbedding", linkEmbedding); + // console.log("linkEmbedding", linkEmbedding); // const linkVector = textToVector(searchQuery, link); const score = cosineSimilarity(queryEmbedding, linkEmbedding); - console.log("score", score); + // console.log("score", score); return { link, score }; })); From 904c904971913f08516a181dc7ffeb67ad2a3bae Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 13 Nov 2024 18:06:20 -0300 Subject: [PATCH 09/51] wip --- .../src/__tests__/e2e_extract/index.test.ts | 151 ++++++++++++ apps/api/src/controllers/v1/extract.ts | 176 ++++++++------ apps/api/src/controllers/v1/types.ts | 3 +- apps/api/src/lib/extract/completions.ts | 217 +++++++++--------- .../scrapeURL/transformers/llmExtract.ts | 46 ++-- apps/api/src/search/fireEngine.ts | 1 - 6 files changed, 397 insertions(+), 197 deletions(-) create mode 100644 apps/api/src/__tests__/e2e_extract/index.test.ts diff --git a/apps/api/src/__tests__/e2e_extract/index.test.ts b/apps/api/src/__tests__/e2e_extract/index.test.ts new file mode 100644 index 00000000..39416bd7 --- /dev/null +++ b/apps/api/src/__tests__/e2e_extract/index.test.ts @@ -0,0 +1,151 @@ +import request from "supertest"; +import dotenv from "dotenv"; +import { + FirecrawlCrawlResponse, + FirecrawlCrawlStatusResponse, + FirecrawlScrapeResponse, +} from "../../types"; + +dotenv.config(); +const TEST_URL = "http://127.0.0.1:3002"; + +describe("E2E Tests for Extract API Routes", () => { + describe("POST /v1/extract", () => { + it.concurrent("should return authors of blog posts on firecrawl.dev", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://firecrawl.dev"], + prompt: "Who are the authors of the blog posts?", + schema: { + type: "object", + properties: { authors: { type: "array", items: { type: "string" } } }, + }, + }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data).toHaveProperty("founders"); + + let gotItRight = 0; + for (const author of response.body.data?.authors) { + if (author.includes("Caleb Peffer")) gotItRight++; + if (author.includes("Gergő Móricz")) gotItRight++; + if (author.includes("Eric Ciarla")) gotItRight++; + if (author.includes("Nicolas Camara")) gotItRight++; + } + + expect(gotItRight).toBeGreaterThan(3); + }, 60000); + + it.concurrent("should return founders of firecrawl.dev (allowExternalLinks = true)", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["mendable.ai"], + prompt: "Who are the founders of the company?", + allowExternalLinks: true, + schema: { + type: "object", + properties: { founders: { type: "array", items: { type: "string" } } }, + }, + }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data).toHaveProperty("founders"); + + let gotItRight = 0; + for (const founder of response.body.data?.founders) { + if (founder.includes("Caleb")) gotItRight++; + if (founder.includes("Eric")) gotItRight++; + if (founder.includes("Nicolas")) gotItRight++; + } + + expect(gotItRight).toBe(3); + }, 60000); + + it.concurrent("should return hiring opportunities on firecrawl.dev (allowExternalLinks = true)", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://firecrawl.dev"], + prompt: "What are they hiring for?", + allowExternalLinks: true, + schema: { + type: "array", + items: { + type: "string" + } + }, + }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + console.log(response.body.data); + + let gotItRight = 0; + for (const hiring of response.body.data?.items) { + if (hiring.includes("Developer Relations Specialist")) gotItRight++; + if (hiring.includes("Web Automation Engineer")) gotItRight++; + if (hiring.includes("Developer Experience Engineer")) gotItRight++; + if (hiring.includes("Developer Support Engineer")) gotItRight++; + if (hiring.includes("Dev Ops Engineer")) gotItRight++; + if (hiring.includes("Founding Web Automation Engineer")) gotItRight++; + } + + expect(gotItRight).toBeGreaterThan(5); + }, 60000); + + it.concurrent("should return PCI DSS compliance for Fivetran", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["fivetran.com"], + prompt: "Does Fivetran have PCI DSS compliance?", + allowExternalLinks: true, + schema: { + type: "object", + properties: { + pciDssCompliance: { type: "boolean" } + } + }, + }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data?.pciDssCompliance).toBe(true); + }, 60000); + + it.concurrent("should return Azure Data Connectors for Fivetran", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["fivetran.com"], + prompt: "What are the Azure Data Connectors they offer?", + schema: { + type: "array", + items: { + type: "object", + properties: { + connector: { type: "string" }, + description: { type: "string" }, + supportsCaptureDelete: { type: "boolean" } + } + } + } + }) + + console.log(response.body); + // expect(response.statusCode).toBe(200); + // expect(response.body).toHaveProperty("data"); + // expect(response.body.data?.pciDssCompliance).toBe(true); + }, 60000); + }); +}); diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index d3c06c34..178213d0 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -26,22 +26,24 @@ import { waitForJob } from "../../services/queue-jobs"; import { addScrapeJob } from "../../services/queue-jobs"; import { PlanType } from "../../types"; import { getJobPriority } from "../../lib/job-priority"; -import { generateFinalExtraction } from "../../lib/extract/completions"; +import { generateOpenAICompletions } from "../../scraper/scrapeURL/transformers/llmExtract"; +import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; configDotenv(); const redis = new Redis(process.env.REDIS_URL!); const MAX_EXTRACT_LIMIT = 100; -const MAX_RANKING_LIMIT = 3; +const MAX_RANKING_LIMIT = 5; +const SCORE_THRESHOLD = 0.75; export async function extractController( req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, - res: Response //ExtractResponse> + res: Response ) { req.body = extractRequestSchema.parse(req.body); const id = crypto.randomUUID(); - let links: string[] = req.body.urls; + let links: string[]; //= req.body.urls; const sc: StoredCrawl = { originUrl: req.body.urls[0], @@ -59,10 +61,14 @@ export async function extractController( const crawler = crawlToCrawler(id, sc); let urlWithoutWww = req.body.urls[0].replace("www.", ""); + console.log("urlWithoutWww", urlWithoutWww); - let mapUrl = req.body.prompt - ? `"${req.body.prompt}" site:${urlWithoutWww}` - : `site:${req.body.urls[0]}`; + const allowExternalLinks = req.body.allowExternalLinks ?? false; + + let mapUrl = req.body.prompt && allowExternalLinks + ? `${req.body.prompt} ${urlWithoutWww}` + : req.body.prompt ? `${req.body.prompt} site:${urlWithoutWww}` + : `site:${urlWithoutWww}`; const resultsPerPage = 100; const maxPages = Math.ceil(MAX_EXTRACT_LIMIT / resultsPerPage); @@ -84,82 +90,103 @@ export async function extractController( }; pagePromises = Array.from({ length: maxPages }, (_, i) => fetchPage(i + 1)); - allResults = await Promise.all(pagePromises); + allResults = (await Promise.all(pagePromises)).flat(); + // console.log("allResults", allResults); + // if allResults is empty, return an error + if (allResults.length === 0) { + return res.status(400).json({ + success: false, + error: "No results found", + }); + } await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours } // console.log("allResults", allResults); // Parallelize sitemap fetch with serper search - const [sitemap, ...searchResults] = await Promise.all([ - req.body.ignoreSitemap ? null : crawler.tryGetSitemap(), - ...(cachedResult ? [] : pagePromises), - ]); + // const [sitemap, ...searchResults] = await Promise.all([ + // req.body.ignoreSitemap ? null : null, // crawler.tryGetSitemap(), + // ...(cachedResult ? [] : pagePromises), + // ]); - if (!cachedResult) { - allResults = searchResults; - } + // if (!cachedResult) { + // allResults = searchResults; + // } - if (sitemap !== null) { - sitemap.forEach((x) => { - links.push(x.url); - }); - } + links = allResults.map(x => `url: ${x.url}, title: ${x.title}, description: ${x.description}`); + console.log("links", links); + // if (sitemap !== null) { + // sitemap.forEach((x) => { + // links.push(x.url); + // }); + // } - let mapResults = allResults - .flat() - .filter((result) => result !== null && result !== undefined); + // let mapResults = allResults + // .flat() + // .filter((result) => result !== null && result !== undefined); - const minumumCutoff = Math.min(MAX_EXTRACT_LIMIT, req.body.limit ?? MAX_EXTRACT_LIMIT); - if (mapResults.length > minumumCutoff) { - mapResults = mapResults.slice(0, minumumCutoff); - } + // const minumumCutoff = Math.min(MAX_EXTRACT_LIMIT, req.body.limit ?? MAX_EXTRACT_LIMIT); + // if (mapResults.length > minumumCutoff) { + // mapResults = mapResults.slice(0, minumumCutoff); + // } - if (mapResults.length > 0) { - if (req.body.prompt) { - // Ensure all map results are first, maintaining their order - links = [ - mapResults[0].url, - ...mapResults.slice(1).map((x) => x.url), - ...links, - ]; - } else { - mapResults.map((x) => { - links.push(x.url); - }); - } - } + // if (mapResults.length > 0) { + // if (req.body.prompt) { + // // Ensure all map results are first, maintaining their order + // links = [ + // mapResults[0].url, + // ...mapResults.slice(1).map((x) => x.url), + // ...links, + // ]; + // } else { + // mapResults.map((x) => { + // links.push(x.url); + // }); + // } + // } + + // console.log("mapResults", mapResults); // console.log("links", links); let linksAndScores: { link: string; score: number }[] = []; // Perform cosine similarity between the search query and the list of links if (req.body.prompt) { - const searchQuery = req.body.prompt.toLowerCase(); + const searchQuery = mapUrl; //req.body.prompt.toLowerCase(); linksAndScores = await performRanking(links, searchQuery); } + console.log("linksAndScores", linksAndScores); + links = linksAndScores + .filter(x => x.score > SCORE_THRESHOLD) + .map(x => x.link.split("url: ")[1].split(",")[0]) + .filter(x => !isUrlBlocked(x)) + + console.log("links:", links.length); + + // should we use some sort of llm to determine the best links? // console.log("linksAndScores", linksAndScores); - links = links - .map((x) => { - try { - return checkAndUpdateURLForMap(x).url.trim(); - } catch (_) { - return null; - } - }) - .filter((x) => x !== null) as string[]; + // links = links + // .map((x) => { + // try { + // return checkAndUpdateURLForMap(x).url.trim(); + // } catch (_) { + // return null; + // } + // }) + // .filter((x) => x !== null) as string[]; // allows for subdomains to be included - links = links.filter((x) => isSameDomain(x, req.body.urls[0])); + // links = links.filter((x) => isSameDomain(x, req.body.urls[0])); // if includeSubdomains is false, filter out subdomains - if (!req.body.includeSubdomains) { - links = links.filter((x) => isSameSubdomain(x, req.body.urls[0])); - } + // if (!req.body.includeSubdomains) { + // links = links.filter((x) => isSameSubdomain(x, req.body.urls[0])); + // z} // remove duplicates that could be due to http/https or www - links = removeDuplicateUrls(links); + // links = removeDuplicateUrls(links); // get top N links links = links.slice(0, MAX_RANKING_LIMIT); @@ -170,7 +197,7 @@ export async function extractController( for (const url of links) { const origin = req.body.origin || "api"; - const timeout = req.body.timeout; + const timeout = req.body.timeout ?? 30000; const jobId = crypto.randomUUID(); const startTime = new Date().getTime(); @@ -196,7 +223,7 @@ export async function extractController( jobPriority ); - const totalWait = 60000 // (req.body.waitFor ?? 0) + (req.body.actions ?? []).reduce((a,x) => (x.type === "wait" ? x.milliseconds ?? 0 : 0) + a, 0); + const totalWait = 0 //60000 // (req.body.waitFor ?? 0) + (req.body.actions ?? []).reduce((a,x) => (x.type === "wait" ? x.milliseconds ?? 0 : 0) + a, 0); let doc: Document; try { @@ -234,18 +261,20 @@ export async function extractController( docs.push(doc); } + console.log(docs) - // console.log("docs", docs); + const completions = await generateOpenAICompletions( + logger.child({ method: "extractController/generateOpenAICompletions" }), + { + mode: "llm", + systemPrompt: "Only use the provided content to answer the question.", + prompt: mapUrl, + schema: req.body.schema, + }, + docs.map(x => x.markdown).join('\n') + ); - // {"message":"Missing required parameter: 'response_format.json_schema.schema'.","type":"invalid_request_error","param":"response_format.json_schema.schema","code":"missing_required_parameter"},"code":"missing_required_parameter","param":"response_format.json_schema.schema","type":"invalid_request_error"} - const completions = await generateFinalExtraction({ - pagesContent: docs.map(x => x.markdown).join('\n'), - systemPrompt: '', - prompt: req.body.prompt, - schema: req.body.schema, - }); - - // console.log("completions", completions); + console.log("completions", completions); // if(req.body.extract && req.body.formats.includes("extract")) { // creditsToBeBilled = 5; @@ -315,9 +344,18 @@ export async function extractController( // scrape_id: result.scrape_id // }; + console.log("completions.extract", completions.extract); + + let data: any; + try { + data = JSON.parse(completions.extract); + } catch (e) { + data = completions.extract; + } + return res.status(200).json({ success: true, - data: completions.content, // includeMetadata ? mapResults : linksToReturn, + data: data, // includeMetadata ? mapResults : linksToReturn, scrape_id: id, //origin?.includes("website") ? id : undefined, }); } \ No newline at end of file diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 02523bb7..f9048fd6 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -161,6 +161,7 @@ export const extractV1Options = z.object({ limit: z.number().int().positive().finite().safe().optional(), ignoreSitemap: z.boolean().default(false), includeSubdomains: z.boolean().default(true), + allowExternalLinks: z.boolean().default(false), origin: z.string().optional().default("api"), timeout: z.number().int().positive().finite().safe().default(60000), }).strict(strictMessage) @@ -353,7 +354,7 @@ export type ExtractResponse = | { success: true; warning?: string; - data: Document; + data: z.infer; scrape_id?: string; }; diff --git a/apps/api/src/lib/extract/completions.ts b/apps/api/src/lib/extract/completions.ts index c02b3e31..34a5a215 100644 --- a/apps/api/src/lib/extract/completions.ts +++ b/apps/api/src/lib/extract/completions.ts @@ -1,121 +1,124 @@ -import OpenAI from "openai"; -import { encoding_for_model } from "@dqbd/tiktoken"; -import { TiktokenModel } from "@dqbd/tiktoken"; -import { ExtractOptions } from "../../controllers/v1/types"; -import { Document } from "../entities"; -import { z } from "zod"; +// use llmExtract.ts instead -const maxTokens = 32000; -const modifier = 4; +// import OpenAI from "openai"; +// import { encoding_for_model } from "@dqbd/tiktoken"; +// import { TiktokenModel } from "@dqbd/tiktoken"; +// import { ExtractOptions } from "../../controllers/v1/types"; +// import { Document } from "../entities"; +// import { z } from "zod"; -export class LLMRefusalError extends Error { - constructor(refusal: string) { - super("LLM refused to extract the website's content"); - this.name = "LLMRefusalError"; - } -} +// const maxTokens = 32000; +// const modifier = 4; -interface GenerateCompletionsParams { - systemPrompt?: string; - prompt?: string; - schema?: any; - pagesContent: string; -} +// export class LLMRefusalError extends Error { +// constructor(refusal: string) { +// super("LLM refused to extract the website's content"); +// this.name = "LLMRefusalError"; +// } +// } -export async function generateBasicCompletion(prompt: string) { - const openai = new OpenAI(); - const model: TiktokenModel = - (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; +// interface GenerateCompletionsParams { +// systemPrompt?: string; +// prompt?: string; +// schema?: any; +// pagesContent: string; +// } - const completion = await openai.chat.completions.create({ - model, - messages: [{ role: "user", content: prompt }], - }); +// export async function generateBasicCompletion(prompt: string) { +// const openai = new OpenAI(); +// const model: TiktokenModel = +// (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; - return completion.choices[0].message.content; -} +// const completion = await openai.chat.completions.create({ +// model, +// messages: [{ role: "user", content: prompt }], +// }); -export async function generateFinalExtraction({ - pagesContent, - systemPrompt, - prompt, - schema, -}: GenerateCompletionsParams): Promise<{ - content: string; - metadata: { numTokens: number; warning: string }; -}> { - const openai = new OpenAI(); - const model: TiktokenModel = - (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; +// return completion.choices[0].message.content; +// } - let extractionContent = pagesContent; - let numTokens = 0; - let warning = ""; +// export async function generateFinalExtraction({ +// pagesContent, +// systemPrompt, +// prompt, +// schema, +// }: GenerateCompletionsParams): Promise<{ +// content: string; +// metadata: { numTokens: number; warning: string }; +// }> { +// const openai = new OpenAI(); +// const model: TiktokenModel = +// (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; - const encoder = encoding_for_model(model); - try { - const tokens = encoder.encode(extractionContent); - numTokens = tokens.length; - } catch (error) { - extractionContent = extractionContent.slice(0, maxTokens * modifier); - warning = `Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (${maxTokens}) we support.`; - } finally { - encoder.free(); - } +// let extractionContent = pagesContent; +// let numTokens = 0; +// let warning = ""; - if (numTokens > maxTokens) { - extractionContent = extractionContent.slice(0, maxTokens * modifier); - warning = `The extraction content would have used more tokens (${numTokens}) than the maximum we allow (${maxTokens}). -- the input has been automatically trimmed.`; - } +// const encoder = encoding_for_model(model); +// try { +// const tokens = encoder.encode(extractionContent); +// numTokens = tokens.length; +// } catch (error) { +// extractionContent = extractionContent.slice(0, maxTokens * modifier); +// warning = `Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (${maxTokens}) we support.`; +// } finally { +// encoder.free(); +// } - if (schema && (schema.type === "array" || schema._type === "ZodArray")) { - schema = { - type: "object", - properties: { - items: schema, - }, - required: ["items"], - additionalProperties: false, - }; - } else if (schema) { - schema.additionalProperties = false; - schema.required = Object.keys(schema.properties); - } +// if (numTokens > maxTokens) { +// extractionContent = extractionContent.slice(0, maxTokens * modifier); +// warning = `The extraction content would have used more tokens (${numTokens}) than the maximum we allow (${maxTokens}). -- the input has been automatically trimmed.`; +// } - const jsonCompletion = await openai.beta.chat.completions.parse({ - model, - messages: [ - { role: "system", content: systemPrompt ?? "" }, - { role: "user", content: [{ type: "text", text: extractionContent }] }, - { - role: "user", - content: prompt - ? `Transform the above content into structured JSON output based on the following user request: ${prompt}` - : "Transform the above content into structured JSON output.", - }, - ], - response_format: schema - ? { - type: "json_schema", - json_schema: { - name: "websiteContent", - schema: schema, - strict: true, - }, - } - : { type: "json_object" }, - }); +// if (schema && (schema.type === "array" || schema._type === "ZodArray")) { +// schema = { +// type: "object", +// properties: { +// items: schema, +// }, +// required: ["items"], +// additionalProperties: false, +// }; +// } else if (schema) { +// schema.additionalProperties = false; +// schema.required = Object.keys(schema.properties); +// } - if (jsonCompletion.choices[0].message.refusal !== null) { - throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal); - } +// const jsonCompletion = await openai.beta.chat.completions.parse({ +// temperature: 0, +// model, +// messages: [ +// { role: "system", content: systemPrompt ?? "" }, +// { role: "user", content: [{ type: "text", text: extractionContent }] }, +// { +// role: "user", +// content: prompt +// ? `Transform the above content into structured JSON output based on the following user request: ${prompt}` +// : "Transform the above content into structured JSON output.", +// }, +// ], +// response_format: schema +// ? { +// type: "json_schema", +// json_schema: { +// name: "websiteContent", +// schema: schema, +// strict: true, +// }, +// } +// : { type: "json_object" }, +// }); - const extraction = jsonCompletion.choices[0].message.parsed; - return { - content: extraction ?? "", - metadata: { - numTokens, - warning, - }, - }; -} +// if (jsonCompletion.choices[0].message.refusal !== null) { +// throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal); +// } + +// const extraction = jsonCompletion.choices[0].message.parsed; +// return { +// content: extraction ?? "", +// metadata: { +// numTokens, +// warning, +// }, +// }; +// } diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 69a92197..1a5abd66 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -58,32 +58,33 @@ function normalizeSchema(x: any): any { } } -async function generateOpenAICompletions(logger: Logger, document: Document, options: ExtractOptions): Promise { +export async function generateOpenAICompletions(logger: Logger, options: ExtractOptions, markdown?: string, previousWarning?: string): Promise<{ extract: any, warning: string | undefined }> { + let extract: any; + let warning: string | undefined; + const openai = new OpenAI(); const model: TiktokenModel = (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; - if (document.markdown === undefined) { + if (markdown === undefined) { throw new Error("document.markdown is undefined -- this is unexpected"); } - let extractionContent = document.markdown; - // count number of tokens let numTokens = 0; const encoder = encoding_for_model(model as TiktokenModel); try { // Encode the message into tokens - const tokens = encoder.encode(extractionContent); + const tokens = encoder.encode(markdown); // Return the number of tokens numTokens = tokens.length; } catch (error) { - logger.warn("Calculating num tokens of string failed", { error, extractionContent }); + logger.warn("Calculating num tokens of string failed", { error, markdown }); - extractionContent = extractionContent.slice(0, maxTokens * modifier); + markdown = markdown.slice(0, maxTokens * modifier); - const warning = "Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (" + maxTokens + ") we support."; - document.warning = document.warning === undefined ? warning : " " + warning; + let w = "Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (" + maxTokens + ") we support."; + warning = previousWarning === undefined ? w : w + " " + previousWarning; } finally { // Free the encoder resources after use encoder.free(); @@ -91,10 +92,10 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt if (numTokens > maxTokens) { // trim the document to the maximum number of tokens, tokens != characters - extractionContent = extractionContent.slice(0, maxTokens * modifier); + markdown = markdown.slice(0, maxTokens * modifier); - const warning = "The extraction content would have used more tokens (" + numTokens + ") than the maximum we allow (" + maxTokens + "). -- the input has been automatically trimmed."; - document.warning = document.warning === undefined ? warning : " " + warning; + const w = "The extraction content would have used more tokens (" + numTokens + ") than the maximum we allow (" + maxTokens + "). -- the input has been automatically trimmed."; + warning = previousWarning === undefined ? w : w + " " + previousWarning; } let schema = options.schema; @@ -120,7 +121,7 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt }, { role: "user", - content: [{ type: "text", text: extractionContent }], + content: [{ type: "text", text: markdown }], }, { role: "user", @@ -143,11 +144,11 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal); } - document.extract = jsonCompletion.choices[0].message.parsed; + extract = jsonCompletion.choices[0].message.parsed; - if (document.extract === null && jsonCompletion.choices[0].message.content !== null) { + if (extract === null && jsonCompletion.choices[0].message.content !== null) { try { - document.extract = JSON.parse(jsonCompletion.choices[0].message.content); + extract = JSON.parse(jsonCompletion.choices[0].message.content); } catch (e) { logger.error("Failed to parse returned JSON, no schema specified.", { error: e }); throw new LLMRefusalError("Failed to parse returned JSON. Please specify a schema in the extract object."); @@ -155,14 +156,21 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt } if (options.schema && options.schema.type === "array") { - document.extract = document.extract?.items; + extract = extract?.items; } - return document; + return { extract, warning }; } export async function performLLMExtract(meta: Meta, document: Document): Promise { if (meta.options.formats.includes("extract")) { - document = await generateOpenAICompletions(meta.logger.child({ method: "performLLMExtract/generateOpenAICompletions" }), document, meta.options.extract!); + const { extract, warning } = await generateOpenAICompletions( + meta.logger.child({ method: "performLLMExtract/generateOpenAICompletions" }), + meta.options.extract!, + document.markdown, + document.warning, + ); + document.extract = extract; + document.warning = warning; } return document; diff --git a/apps/api/src/search/fireEngine.ts b/apps/api/src/search/fireEngine.ts index 09a58e4e..0b82478e 100644 --- a/apps/api/src/search/fireEngine.ts +++ b/apps/api/src/search/fireEngine.ts @@ -37,7 +37,6 @@ export async function fireEngineMap( ); return []; } - console.log("process.env.FIRE_ENGINE_BETA_URL", process.env.FIRE_ENGINE_BETA_URL); const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/search`, { method: "POST", From 49ff37afb4ee389e8fa6bc353862b1d141b9bda1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 14 Nov 2024 19:47:12 +0100 Subject: [PATCH 10/51] feat: cache --- apps/api/src/lib/cache.ts | 50 +++++++++++++++++++ .../scraper/scrapeURL/engines/cache/index.ts | 19 +++++++ .../src/scraper/scrapeURL/engines/index.ts | 22 +++++++- .../scraper/scrapeURL/transformers/cache.ts | 24 +++++++++ .../scraper/scrapeURL/transformers/index.ts | 2 + apps/api/src/services/queue-jobs.ts | 2 +- 6 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 apps/api/src/lib/cache.ts create mode 100644 apps/api/src/scraper/scrapeURL/engines/cache/index.ts create mode 100644 apps/api/src/scraper/scrapeURL/transformers/cache.ts diff --git a/apps/api/src/lib/cache.ts b/apps/api/src/lib/cache.ts new file mode 100644 index 00000000..896d9429 --- /dev/null +++ b/apps/api/src/lib/cache.ts @@ -0,0 +1,50 @@ +import IORedis from "ioredis"; +import { ScrapeOptions } from "../controllers/v1/types"; +import { InternalOptions } from "../scraper/scrapeURL"; +import { logger as _logger } from "./logger"; +const logger = _logger.child({module: "cache"}); + +export const cacheRedis = process.env.CACHE_REDIS_URL ? new IORedis(process.env.CACHE_REDIS_URL, { + maxRetriesPerRequest: null, +}) : null; + +export function cacheKey(url: string, scrapeOptions: ScrapeOptions, internalOptions: InternalOptions): string | null { + if (!cacheRedis) return null; + + // these options disqualify a cache + if (internalOptions.v0CrawlOnlyUrls || internalOptions.forceEngine || internalOptions.v0UseFastMode || internalOptions.atsv + || (scrapeOptions.actions && scrapeOptions.actions.length > 0) + ) { + return null; + } + + return "cache:" + url + ":waitFor:" + scrapeOptions.waitFor; +} + +export type CacheEntry = { + url: string; + html: string; + statusCode: number; + error?: string; +}; + +export async function saveEntryToCache(key: string, entry: CacheEntry) { + if (!cacheRedis) return; + + try { + await cacheRedis.set(key, JSON.stringify(entry)); + } catch (error) { + logger.warn("Failed to save to cache", { key, error }); + } +} + +export async function getEntryFromCache(key: string): Promise { + if (!cacheRedis) return null; + + try { + return JSON.parse(await cacheRedis.get(key) ?? "null"); + } catch (error) { + logger.warn("Failed to get from cache", { key, error }); + return null; + } +} diff --git a/apps/api/src/scraper/scrapeURL/engines/cache/index.ts b/apps/api/src/scraper/scrapeURL/engines/cache/index.ts new file mode 100644 index 00000000..9506be0f --- /dev/null +++ b/apps/api/src/scraper/scrapeURL/engines/cache/index.ts @@ -0,0 +1,19 @@ +import { cacheKey, getEntryFromCache } from "../../../../lib/cache"; +import { EngineScrapeResult } from ".."; +import { Meta } from "../.."; +import { EngineError } from "../../error"; + +export async function scrapeCache(meta: Meta): Promise { + const key = cacheKey(meta.url, meta.options, meta.internalOptions); + if (key === null) throw new EngineError("Scrape not eligible for caching"); + + const entry = await getEntryFromCache(key); + if (entry === null) throw new EngineError("Cache missed"); + + return { + url: entry.url, + html: entry.html, + statusCode: entry.statusCode, + error: entry.error, + }; +} \ No newline at end of file diff --git a/apps/api/src/scraper/scrapeURL/engines/index.ts b/apps/api/src/scraper/scrapeURL/engines/index.ts index aadef7fc..d9168669 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index.ts @@ -6,14 +6,17 @@ import { scrapePDF } from "./pdf"; import { scrapeURLWithScrapingBee } from "./scrapingbee"; import { scrapeURLWithFetch } from "./fetch"; import { scrapeURLWithPlaywright } from "./playwright"; +import { scrapeCache } from "./cache"; -export type Engine = "fire-engine;chrome-cdp" | "fire-engine;playwright" | "fire-engine;tlsclient" | "scrapingbee" | "scrapingbeeLoad" | "playwright" | "fetch" | "pdf" | "docx"; +export type Engine = "fire-engine;chrome-cdp" | "fire-engine;playwright" | "fire-engine;tlsclient" | "scrapingbee" | "scrapingbeeLoad" | "playwright" | "fetch" | "pdf" | "docx" | "cache"; const useScrapingBee = process.env.SCRAPING_BEE_API_KEY !== '' && process.env.SCRAPING_BEE_API_KEY !== undefined; const useFireEngine = process.env.FIRE_ENGINE_BETA_URL !== '' && process.env.FIRE_ENGINE_BETA_URL !== undefined; const usePlaywright = process.env.PLAYWRIGHT_MICROSERVICE_URL !== '' && process.env.PLAYWRIGHT_MICROSERVICE_URL !== undefined; +const useCache = process.env.CACHE_REDIS_URL !== '' && process.env.CACHE_REDIS_URL !== undefined; export const engines: Engine[] = [ + ...(useCache ? [ "cache" as const ] : []), ...(useFireEngine ? [ "fire-engine;chrome-cdp" as const, "fire-engine;playwright" as const, "fire-engine;tlsclient" as const ] : []), ...(useScrapingBee ? [ "scrapingbee" as const, "scrapingbeeLoad" as const ] : []), ...(usePlaywright ? [ "playwright" as const ] : []), @@ -74,6 +77,7 @@ export type EngineScrapeResult = { const engineHandlers: { [E in Engine]: (meta: Meta) => Promise } = { + "cache": scrapeCache, "fire-engine;chrome-cdp": scrapeURLWithFireEngineChromeCDP, "fire-engine;playwright": scrapeURLWithFireEnginePlaywright, "fire-engine;tlsclient": scrapeURLWithFireEngineTLSClient, @@ -95,6 +99,22 @@ export const engineOptions: { quality: number, } } = { + "cache": { + features: { + "actions": false, + "waitFor": true, + "screenshot": false, + "screenshot@fullScreen": false, + "pdf": false, // TODO: figure this out + "docx": false, // TODO: figure this out + "atsv": false, + "location": false, + "mobile": false, + "skipTlsVerification": false, + "useFastMode": false, + }, + quality: 1000, // cache should always be tried first + }, "fire-engine;chrome-cdp": { features: { "actions": true, diff --git a/apps/api/src/scraper/scrapeURL/transformers/cache.ts b/apps/api/src/scraper/scrapeURL/transformers/cache.ts new file mode 100644 index 00000000..785047a1 --- /dev/null +++ b/apps/api/src/scraper/scrapeURL/transformers/cache.ts @@ -0,0 +1,24 @@ +import { Document } from "../../../controllers/v1/types"; +import { Meta } from ".."; +import { CacheEntry, cacheKey, saveEntryToCache } from "../../../lib/cache"; + +export function saveToCache(meta: Meta, document: Document): Document { + if (document.rawHtml === undefined) { + throw new Error("rawHtml is undefined -- this transformer is being called out of order"); + } + + const key = cacheKey(meta.url, meta.options, meta.internalOptions); + + if (key !== null) { + const entry: CacheEntry = { + html: document.rawHtml!, + statusCode: document.metadata.statusCode!, + url: document.metadata.url ?? document.metadata.sourceURL!, + error: document.metadata.error ?? undefined, + }; + + saveEntryToCache(key, entry); + } + + return document; +} \ No newline at end of file diff --git a/apps/api/src/scraper/scrapeURL/transformers/index.ts b/apps/api/src/scraper/scrapeURL/transformers/index.ts index d839f8bc..b8063f7e 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/index.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/index.ts @@ -7,6 +7,7 @@ import { extractMetadata } from "../lib/extractMetadata"; import { performLLMExtract } from "./llmExtract"; import { uploadScreenshot } from "./uploadScreenshot"; import { removeBase64Images } from "./removeBase64Images"; +import { saveToCache } from "./cache"; export type Transformer = (meta: Meta, document: Document) => Document | Promise; @@ -104,6 +105,7 @@ export function coerceFieldsToFormats(meta: Meta, document: Document): Document // TODO: allow some of these to run in parallel export const transformerStack: Transformer[] = [ + saveToCache, deriveHTMLFromRawHTML, deriveMarkdownFromHTML, deriveLinksFromHTML, diff --git a/apps/api/src/services/queue-jobs.ts b/apps/api/src/services/queue-jobs.ts index e4a5ace8..d59056bb 100644 --- a/apps/api/src/services/queue-jobs.ts +++ b/apps/api/src/services/queue-jobs.ts @@ -109,6 +109,6 @@ export function waitForJob(jobId: string, timeout: number): Promise } } } - }, 500); + }, 100); }) } From 359c30fbda54f01f1f84227a96e9bd333664b21d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 14 Nov 2024 19:49:34 +0100 Subject: [PATCH 11/51] fix(cache): don't cache on failure error code --- apps/api/src/scraper/scrapeURL/transformers/cache.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/api/src/scraper/scrapeURL/transformers/cache.ts b/apps/api/src/scraper/scrapeURL/transformers/cache.ts index 785047a1..e0c09c44 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/cache.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/cache.ts @@ -3,6 +3,8 @@ import { Meta } from ".."; import { CacheEntry, cacheKey, saveEntryToCache } from "../../../lib/cache"; export function saveToCache(meta: Meta, document: Document): Document { + if (document.metadata.statusCode! < 200 || document.metadata.statusCode! >= 300) return document; + if (document.rawHtml === undefined) { throw new Error("rawHtml is undefined -- this transformer is being called out of order"); } From 80d6cb16fb096d242a4a6c8221d19d0501e65631 Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:51:27 -0300 Subject: [PATCH 12/51] sdks wip --- apps/js-sdk/example.js | 13 ++++++ apps/js-sdk/example.ts | 13 ++++++ apps/js-sdk/firecrawl/package.json | 2 +- apps/js-sdk/firecrawl/src/index.ts | 59 +++++++++++++++++++++++- apps/python-sdk/example.py | 21 +++++++-- apps/python-sdk/firecrawl/__init__.py | 2 +- apps/python-sdk/firecrawl/firecrawl.py | 64 +++++++++++++++++++++++++- 7 files changed, 167 insertions(+), 7 deletions(-) diff --git a/apps/js-sdk/example.js b/apps/js-sdk/example.js index c4b21d5f..166cc18d 100644 --- a/apps/js-sdk/example.js +++ b/apps/js-sdk/example.js @@ -1,4 +1,5 @@ import FirecrawlApp from 'firecrawl'; +import { z } from 'zod'; const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"}); @@ -42,6 +43,18 @@ const main = async () => { const mapResult = await app.mapUrl('https://firecrawl.dev'); console.log(mapResult) + // Extract information from a website using LLM: + const extractSchema = z.object({ + title: z.string(), + description: z.string(), + links: z.array(z.string()) + }); + + const extractResult = await app.extractUrls(['https://firecrawl.dev'], { + prompt: "Extract the title, description, and links from the website", + schema: extractSchema + }); + console.log(extractResult); // Crawl a website with WebSockets: const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5}); diff --git a/apps/js-sdk/example.ts b/apps/js-sdk/example.ts index 7412e479..a8fff30a 100644 --- a/apps/js-sdk/example.ts +++ b/apps/js-sdk/example.ts @@ -42,6 +42,19 @@ const main = async () => { const mapResult = await app.mapUrl('https://firecrawl.dev'); console.log(mapResult) + // // Extract information from a website using LLM: + // const extractSchema = z.object({ + // title: z.string(), + // description: z.string(), + // links: z.array(z.string()) + // }); + + // const extractResult = await app.extractUrls(['https://firecrawl.dev'], { + // prompt: "Extract the title, description, and links from the website", + // schema: extractSchema + // }); + // console.log(extractResult); + // Crawl a website with WebSockets: const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5}); diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 5d0a7fc9..8f3682c2 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.8.2", + "version": "1.9.0", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 45e19197..2b3ca2b3 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -234,6 +234,26 @@ export interface MapResponse { error?: string; } +/** + * Parameters for extracting information from URLs. + * Defines options for extracting information from URLs. + */ +export interface ExtractParams { + prompt: string; + schema?: zt.ZodSchema; + systemPrompt?: string; +} + +/** + * Response interface for extracting information from URLs. + * Defines the structure of the response received after extracting information from URLs. + */ +export interface ExtractResponse { + success: true; + data: zt.infer; + error?: string; +} + /** * Error response interface. * Defines the structure of the response received when an error occurs. @@ -243,7 +263,6 @@ export interface ErrorResponse { error: string; } - /** * Custom error class for Firecrawl. * Extends the built-in Error class to include a status code. @@ -675,6 +694,44 @@ export default class FirecrawlApp { return { success: false, error: "Internal server error." }; } + /** + * Extracts information from a URL using the Firecrawl API. + * @param url - The URL to extract information from. + * @param params - Additional parameters for the extract request. + * @returns The response from the extract operation. + */ + async extractUrls(urls: string[], params?: ExtractParams): Promise { + const headers = this.prepareHeaders(); + + if (!params?.prompt) { + throw new FirecrawlError("Prompt is required", 400); + } + + let jsonData: { urls: string[] } & ExtractParams= { urls, ...params }; + let jsonSchema: any; + try { + jsonSchema = params?.schema ? zodToJsonSchema(params.schema) : undefined; + } catch (error: any) { + throw new FirecrawlError("Invalid schema. Use a valid Zod schema.", 400); + } + + try { + const response: AxiosResponse = await this.postRequest( + this.apiUrl + `/v1/extract`, + { ...jsonData, schema: jsonSchema }, + headers + ); + if (response.status === 200) { + return response.data as ExtractResponse; + } else { + this.handleError(response, "extract"); + } + } catch (error: any) { + throw new FirecrawlError(error.message, 500); + } + return { success: false, error: "Internal server error." }; + } + /** * Prepares the headers for an API request. * @param idempotencyKey - Optional key to ensure idempotency. diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py index e7c80b30..eba7cfd2 100644 --- a/apps/python-sdk/example.py +++ b/apps/python-sdk/example.py @@ -2,6 +2,8 @@ import time import nest_asyncio import uuid from firecrawl.firecrawl import FirecrawlApp +from pydantic import BaseModel, Field +from typing import List app = FirecrawlApp(api_key="fc-") @@ -50,9 +52,6 @@ print(crawl_status) # LLM Extraction: # Define schema to extract contents into using pydantic -from pydantic import BaseModel, Field -from typing import List - class ArticleSchema(BaseModel): title: str points: int @@ -115,6 +114,22 @@ llm_extraction_result = app2.scrape_url('https://news.ycombinator.com', { map_result = app.map_url('https://firecrawl.dev', { 'search': 'blog' }) print(map_result) +# Extract URLs: +class ExtractSchema(BaseModel): + title: str + description: str + links: List[str] + +# Define the schema using Pydantic +extract_schema = ExtractSchema.schema() + +# Perform the extraction +extract_result = app.extract_urls(['https://firecrawl.dev'], { + 'prompt': "Extract the title, description, and links from the website", + 'schema': extract_schema +}) +print(extract_result) + # Crawl a website with WebSockets: # inside an async function... nest_asyncio.apply() diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index cb897b7e..d39b77a8 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ import os from .firecrawl import FirecrawlApp # noqa -__version__ = "1.5.0" +__version__ = "1.6.0" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index c2693c3d..bae2797d 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -12,15 +12,39 @@ Classes: import logging import os import time -from typing import Any, Dict, Optional, List +from typing import Any, Dict, Optional, List, Union import json import requests +import pydantic import websockets logger : logging.Logger = logging.getLogger("firecrawl") class FirecrawlApp: + class ExtractParams(pydantic.BaseModel): + """ + Parameters for the extract operation. + """ + prompt: str + schema: Optional[Any] = None + system_prompt: Optional[str] = None + + class ExtractResponse(pydantic.BaseModel): + """ + Response from the extract operation. + """ + success: bool + data: Optional[Any] = None + error: Optional[str] = None + + class ErrorResponse(pydantic.BaseModel): + """ + Error response. + """ + success: bool + error: str + def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None: """ Initialize the FirecrawlApp instance with API key, API URL. @@ -434,6 +458,44 @@ class FirecrawlApp: else: self._handle_error(response, 'check batch scrape status') + + def extract_urls(self, urls: List[str], params: Optional[ExtractParams] = None) -> Union[ExtractResponse, ErrorResponse]: + """ + Extracts information from a URL using the Firecrawl API. + + Args: + urls (List[str]): The URLs to extract information from. + params (Optional[ExtractParams]): Additional parameters for the extract request. + + Returns: + Union[ExtractResponse, ErrorResponse]: The response from the extract operation. + """ + headers = self._prepare_headers() + + if not params or not params.get('prompt'): + raise ValueError("Prompt is required") + + if not params.get('schema'): + raise ValueError("Schema is required for extraction") + + jsonData = {'urls': urls, **params} + jsonSchema = params['schema'].schema() if hasattr(params['schema'], 'schema') else None + + try: + response = self._post_request( + f'{self.api_url}/v1/extract', + {**jsonData, 'schema': jsonSchema}, + headers + ) + if response.status_code == 200: + return response.json() + else: + self._handle_error(response, "extract") + except Exception as e: + raise ValueError(str(e), 500) + + return {'success': False, 'error': "Internal server error."} + def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]: """ Prepare the headers for API requests. From 41b45a844b1c878fff1a81d6929bcc4d0276648f Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:56:12 -0300 Subject: [PATCH 13/51] sdk allowexternallinks --- apps/js-sdk/firecrawl/src/index.ts | 1 + apps/python-sdk/firecrawl/firecrawl.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 2b3ca2b3..cee054d4 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -242,6 +242,7 @@ export interface ExtractParams { prompt: string; schema?: zt.ZodSchema; systemPrompt?: string; + allowExternalLinks?: boolean; } /** diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index bae2797d..9dc9b47b 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -29,6 +29,7 @@ class FirecrawlApp: prompt: str schema: Optional[Any] = None system_prompt: Optional[str] = None + allow_external_links: Optional[bool] = False class ExtractResponse(pydantic.BaseModel): """ @@ -484,7 +485,11 @@ class FirecrawlApp: try: response = self._post_request( f'{self.api_url}/v1/extract', - {**jsonData, 'schema': jsonSchema}, + { + **jsonData, + 'allowExternalLinks': params.get('allow_external_links', False), + 'schema': jsonSchema + }, headers ) if response.status_code == 200: From d6749c211d068d06b64a80e524d0854c1e847fd9 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 14 Nov 2024 14:57:38 -0500 Subject: [PATCH 14/51] Nick: refactor and /* glob pattern support --- apps/api/src/controllers/v1/extract.ts | 237 +++++++------------------ apps/api/src/controllers/v1/map.ts | 75 ++++---- 2 files changed, 105 insertions(+), 207 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 178213d0..e6d9c7c5 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -9,15 +9,9 @@ import { scrapeOptions, } from "./types"; import { Document } from "../../lib/entities"; -import { StoredCrawl, crawlToCrawler } from "../../lib/crawl-redis"; -import { fireEngineMap } from "../../search/fireEngine"; import Redis from "ioredis"; import { configDotenv } from "dotenv"; import { performRanking } from "../../lib/ranker"; -import { checkAndUpdateURLForMap } from "../../lib/validateUrl"; -import { isSameDomain } from "../../lib/validateUrl"; -import { isSameSubdomain } from "../../lib/validateUrl"; -import { removeDuplicateUrls } from "../../lib/validateUrl"; import { billTeam } from "../../services/billing/credit_billing"; import { logJob } from "../../services/logging/log_job"; import { logger } from "../../lib/logger"; @@ -28,6 +22,7 @@ import { PlanType } from "../../types"; import { getJobPriority } from "../../lib/job-priority"; import { generateOpenAICompletions } from "../../scraper/scrapeURL/transformers/llmExtract"; import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; +import { getMapResults } from "./map"; configDotenv(); const redis = new Redis(process.env.REDIS_URL!); @@ -43,164 +38,68 @@ export async function extractController( req.body = extractRequestSchema.parse(req.body); const id = crypto.randomUUID(); - let links: string[]; //= req.body.urls; - - const sc: StoredCrawl = { - originUrl: req.body.urls[0], - crawlerOptions: { - // ...crawlerOptions, - scrapeOptions: undefined, - }, - scrapeOptions: scrapeOptions.parse({}), - internalOptions: {}, - team_id: req.auth.team_id, - createdAt: Date.now(), - plan: req.auth.plan!, - }; - - const crawler = crawlToCrawler(id, sc); - - let urlWithoutWww = req.body.urls[0].replace("www.", ""); - console.log("urlWithoutWww", urlWithoutWww); - - const allowExternalLinks = req.body.allowExternalLinks ?? false; - - let mapUrl = req.body.prompt && allowExternalLinks - ? `${req.body.prompt} ${urlWithoutWww}` - : req.body.prompt ? `${req.body.prompt} site:${urlWithoutWww}` - : `site:${urlWithoutWww}`; - - const resultsPerPage = 100; - const maxPages = Math.ceil(MAX_EXTRACT_LIMIT / resultsPerPage); - - const cacheKey = `fireEngineMap:${mapUrl}`; - const cachedResult = null; - - let allResults: any[] = []; - let pagePromises: Promise[] = []; - - if (cachedResult) { - allResults = JSON.parse(cachedResult); - } else { - const fetchPage = async (page: number) => { - return fireEngineMap(mapUrl, { - numResults: resultsPerPage, - page: page, - }); - }; - - pagePromises = Array.from({ length: maxPages }, (_, i) => fetchPage(i + 1)); - allResults = (await Promise.all(pagePromises)).flat(); - // console.log("allResults", allResults); - // if allResults is empty, return an error - if (allResults.length === 0) { - return res.status(400).json({ - success: false, - error: "No results found", - }); - } - - await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours - } - - // console.log("allResults", allResults); - // Parallelize sitemap fetch with serper search - // const [sitemap, ...searchResults] = await Promise.all([ - // req.body.ignoreSitemap ? null : null, // crawler.tryGetSitemap(), - // ...(cachedResult ? [] : pagePromises), - // ]); - - // if (!cachedResult) { - // allResults = searchResults; - // } - - links = allResults.map(x => `url: ${x.url}, title: ${x.title}, description: ${x.description}`); - console.log("links", links); - // if (sitemap !== null) { - // sitemap.forEach((x) => { - // links.push(x.url); - // }); - // } - - // let mapResults = allResults - // .flat() - // .filter((result) => result !== null && result !== undefined); - - // const minumumCutoff = Math.min(MAX_EXTRACT_LIMIT, req.body.limit ?? MAX_EXTRACT_LIMIT); - // if (mapResults.length > minumumCutoff) { - // mapResults = mapResults.slice(0, minumumCutoff); - // } - - // if (mapResults.length > 0) { - // if (req.body.prompt) { - // // Ensure all map results are first, maintaining their order - // links = [ - // mapResults[0].url, - // ...mapResults.slice(1).map((x) => x.url), - // ...links, - // ]; - // } else { - // mapResults.map((x) => { - // links.push(x.url); - // }); - // } - // } - - // console.log("mapResults", mapResults); - - // console.log("links", links); - let linksAndScores: { link: string; score: number }[] = []; - // Perform cosine similarity between the search query and the list of links - if (req.body.prompt) { - const searchQuery = mapUrl; //req.body.prompt.toLowerCase(); - linksAndScores = await performRanking(links, searchQuery); - } - console.log("linksAndScores", linksAndScores); - links = linksAndScores - .filter(x => x.score > SCORE_THRESHOLD) - .map(x => x.link.split("url: ")[1].split(",")[0]) - .filter(x => !isUrlBlocked(x)) - - console.log("links:", links.length); - - // should we use some sort of llm to determine the best links? - - // console.log("linksAndScores", linksAndScores); - - // links = links - // .map((x) => { - // try { - // return checkAndUpdateURLForMap(x).url.trim(); - // } catch (_) { - // return null; - // } - // }) - // .filter((x) => x !== null) as string[]; - - // allows for subdomains to be included - // links = links.filter((x) => isSameDomain(x, req.body.urls[0])); - - // if includeSubdomains is false, filter out subdomains - // if (!req.body.includeSubdomains) { - // links = links.filter((x) => isSameSubdomain(x, req.body.urls[0])); - // z} - - // remove duplicates that could be due to http/https or www - // links = removeDuplicateUrls(links); - - // get top N links - links = links.slice(0, MAX_RANKING_LIMIT); - - // scrape the links - let earlyReturn = false; + let links: string[] = []; let docs: Document[] = []; + const earlyReturn = false; + for (const url of req.body.urls) { + if (url.includes('/*')) { + // Handle glob pattern URLs + const baseUrl = url.replace('/*', ''); + const pathPrefix = baseUrl.split('/').slice(3).join('/'); // Get path after domain if any + + const allowExternalLinks = req.body.allowExternalLinks ?? true; + let urlWithoutWww = baseUrl.replace("www.", ""); + let mapUrl = req.body.prompt && allowExternalLinks + ? `${req.body.prompt} ${urlWithoutWww}` + : req.body.prompt ? `${req.body.prompt} site:${urlWithoutWww}` + : `site:${urlWithoutWww}`; + + const mapResults = await getMapResults({ + url: baseUrl, + search: req.body.prompt, + teamId: req.auth.team_id, + plan: req.auth.plan, + allowExternalLinks, + origin: req.body.origin, + limit: req.body.limit, + ignoreSitemap: false, + includeMetadata: true, + includeSubdomains: req.body.includeSubdomains, + }); + + let mappedLinks = mapResults.links.map(x => `url: ${x.url}, title: ${x.title}, description: ${x.description}`); + + // Filter by path prefix if present + if (pathPrefix) { + mappedLinks = mappedLinks.filter(x => x.includes(`/${pathPrefix}/`)); + } + + if (req.body.prompt) { + const linksAndScores = await performRanking(mappedLinks, mapUrl); + mappedLinks = linksAndScores + .filter(x => x.score > SCORE_THRESHOLD) + .map(x => x.link.split("url: ")[1].split(",")[0]) + .filter(x => !isUrlBlocked(x)) + .slice(0, MAX_RANKING_LIMIT); + } + + links.push(...mappedLinks); + + } else { + // Handle direct URLs without glob pattern + if (!isUrlBlocked(url)) { + links.push(url); + } + } + } + + // Scrape each link for (const url of links) { const origin = req.body.origin || "api"; const timeout = req.body.timeout ?? 30000; const jobId = crypto.randomUUID(); - const startTime = new Date().getTime(); const jobPriority = await getJobPriority({ plan: req.auth.plan as PlanType, team_id: req.auth.team_id, @@ -223,11 +122,11 @@ export async function extractController( jobPriority ); - const totalWait = 0 //60000 // (req.body.waitFor ?? 0) + (req.body.actions ?? []).reduce((a,x) => (x.type === "wait" ? x.milliseconds ?? 0 : 0) + a, 0); + const totalWait = 0; let doc: Document; try { - doc = await waitForJob(jobId, timeout + totalWait); // TODO: better types for this + doc = await waitForJob(jobId, timeout + totalWait); } catch (e) { logger.error(`Error in scrapeController: ${e}`); if (e instanceof Error && (e.message.startsWith("Job wait") || e.message === "timeout")) { @@ -245,36 +144,24 @@ export async function extractController( await getScrapeQueue().remove(jobId); - // const endTime = new Date().getTime(); - // const timeTakenInSeconds = (endTime - startTime) / 1000; - // const numTokens = - // doc && doc.extract - // // ? numTokensFromString(doc.markdown, "gpt-3.5-turbo") - // ? 0 // TODO: fix - // : 0; - - let creditsToBeBilled = 1; // Assuming 1 credit per document if (earlyReturn) { - // Don't bill if we're early returning return; } docs.push(doc); } - console.log(docs) - const completions = await generateOpenAICompletions( logger.child({ method: "extractController/generateOpenAICompletions" }), { mode: "llm", systemPrompt: "Only use the provided content to answer the question.", - prompt: mapUrl, + prompt: req.body.prompt, schema: req.body.schema, }, docs.map(x => x.markdown).join('\n') ); - console.log("completions", completions); + // console.log("completions", completions); // if(req.body.extract && req.body.formats.includes("extract")) { // creditsToBeBilled = 5; @@ -355,7 +242,7 @@ export async function extractController( return res.status(200).json({ success: true, - data: data, // includeMetadata ? mapResults : linksToReturn, - scrape_id: id, //origin?.includes("website") ? id : undefined, + data: data, + scrape_id: id, }); } \ No newline at end of file diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index 91d712de..f2e9453a 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -29,6 +29,14 @@ const MAX_MAP_LIMIT = 5000; // Max Links that "Smart /map" can return const MAX_FIRE_ENGINE_RESULTS = 1000; +interface MapResult { + success: boolean; + links: string[] | any[]; + scrape_id?: string; + job_id: string; + time_taken: number; +} + export async function getMapResults({ url, search, @@ -39,8 +47,8 @@ export async function getMapResults({ teamId, plan, origin, - subId, - includeMetadata = false + includeMetadata = false, + allowExternalLinks }: { url: string; search?: string; @@ -51,9 +59,9 @@ export async function getMapResults({ teamId: string; plan?: string; origin?: string; - subId: string | null; includeMetadata?: boolean; -}) { + allowExternalLinks?: boolean; +}): Promise { const id = uuidv4(); let links: string[] = [url]; @@ -74,10 +82,11 @@ export async function getMapResults({ let urlWithoutWww = url.replace("www.", ""); - let mapUrl = search - ? `"${search}" site:${urlWithoutWww}` + let mapUrl = search && allowExternalLinks + ? `${search} ${urlWithoutWww}` + : search ? `${search} site:${urlWithoutWww}` : `site:${url}`; - + const resultsPerPage = 100; const maxPages = Math.ceil(Math.min(MAX_FIRE_ENGINE_RESULTS, limit) / resultsPerPage); @@ -171,34 +180,14 @@ export async function getMapResults({ // remove duplicates that could be due to http/https or www links = removeDuplicateUrls(links); - billTeam(teamId, subId, 1).catch((error) => { - logger.error( - `Failed to bill team ${teamId} for 1 credit: ${error}` - ); - }); - const linksToReturn = links.slice(0, limit); - logJob({ - job_id: id, - success: links.length > 0, - message: "Map completed", - num_docs: linksToReturn.length, - docs: linksToReturn, - time_taken: (new Date().getTime() - Date.now()) / 1000, - team_id: teamId, - mode: "map", - url: url, - crawlerOptions: {}, - scrapeOptions: {}, - origin: origin ?? "api", - num_tokens: 0, - }); - return { success: true, links: includeMetadata ? mapResults : linksToReturn, scrape_id: origin?.includes("website") ? id : undefined, + job_id: id, + time_taken: (new Date().getTime() - Date.now()) / 1000, }; } @@ -208,7 +197,6 @@ export async function mapController( ) { req.body = mapRequestSchema.parse(req.body); - console.log("req.body", req.body); const result = await getMapResults({ url: req.body.url, search: req.body.search, @@ -216,10 +204,33 @@ export async function mapController( ignoreSitemap: req.body.ignoreSitemap, includeSubdomains: req.body.includeSubdomains, crawlerOptions: req.body, + origin: req.body.origin, teamId: req.auth.team_id, plan: req.auth.plan, - origin: req.body.origin, - subId: req.acuc?.sub_id + }); + + // Bill the team + billTeam(req.auth.team_id, req.acuc?.sub_id, 1).catch((error) => { + logger.error( + `Failed to bill team ${req.auth.team_id} for 1 credit: ${error}` + ); + }); + + // Log the job + logJob({ + job_id: result.job_id, + success: result.links.length > 0, + message: "Map completed", + num_docs: result.links.length, + docs: result.links, + time_taken: result.time_taken, + team_id: req.auth.team_id, + mode: "map", + url: req.body.url, + crawlerOptions: {}, + scrapeOptions: {}, + origin: req.body.origin ?? "api", + num_tokens: 0, }); const response = { From 1b5f6a0959670874dffdcf4d61f27d94e8ef5665 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 14 Nov 2024 14:59:34 -0500 Subject: [PATCH 15/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 45 +++++++++++++++----------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index e6d9c7c5..be7feaa9 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -94,8 +94,8 @@ export async function extractController( } } - // Scrape each link - for (const url of links) { + // Scrape all links in parallel + const scrapePromises = links.map(async (url) => { const origin = req.body.origin || "api"; const timeout = req.body.timeout ?? 30000; const jobId = crypto.randomUUID(); @@ -109,7 +109,7 @@ export async function extractController( await addScrapeJob( { url, - mode: "single_urls", + mode: "single_urls", team_id: req.auth.team_id, scrapeOptions: scrapeOptions.parse({}), internalOptions: {}, @@ -124,30 +124,37 @@ export async function extractController( const totalWait = 0; - let doc: Document; try { - doc = await waitForJob(jobId, timeout + totalWait); + const doc = await waitForJob(jobId, timeout + totalWait); + await getScrapeQueue().remove(jobId); + if (earlyReturn) { + return null; + } + return doc; } catch (e) { logger.error(`Error in scrapeController: ${e}`); if (e instanceof Error && (e.message.startsWith("Job wait") || e.message === "timeout")) { - return res.status(408).json({ - success: false, - error: "Request timed out", - }); + throw { + status: 408, + error: "Request timed out" + }; } else { - return res.status(500).json({ - success: false, - error: `(Internal server error) - ${(e && e.message) ? e.message : e}`, - }); + throw { + status: 500, + error: `(Internal server error) - ${(e && e.message) ? e.message : e}` + }; } } + }); - await getScrapeQueue().remove(jobId); - - if (earlyReturn) { - return; - } - docs.push(doc); + try { + const results = await Promise.all(scrapePromises); + docs.push(...results.filter(doc => doc !== null).map(x => x!)); + } catch (e) { + return res.status(e.status).json({ + success: false, + error: e.error + }); } const completions = await generateOpenAICompletions( From 796cd0746db578cd6aa56af157ab8ba365ffc174 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 14 Nov 2024 15:03:06 -0500 Subject: [PATCH 16/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index be7feaa9..71b1f9eb 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -42,7 +42,8 @@ export async function extractController( let docs: Document[] = []; const earlyReturn = false; - for (const url of req.body.urls) { + // Process all URLs in parallel + const urlPromises = req.body.urls.map(async (url) => { if (url.includes('/*')) { // Handle glob pattern URLs const baseUrl = url.replace('/*', ''); @@ -84,15 +85,20 @@ export async function extractController( .slice(0, MAX_RANKING_LIMIT); } - links.push(...mappedLinks); + return mappedLinks; } else { // Handle direct URLs without glob pattern if (!isUrlBlocked(url)) { - links.push(url); + return [url]; } + return []; } - } + }); + + // Wait for all URL processing to complete and flatten results + const processedUrls = await Promise.all(urlPromises); + links.push(...processedUrls.flat()); // Scrape all links in parallel const scrapePromises = links.map(async (url) => { From be5e6da5ff13152efd6ca2acf19e3b55636b14e8 Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 14 Nov 2024 17:03:54 -0300 Subject: [PATCH 17/51] tests --- .../src/__tests__/e2e_extract/index.test.ts | 248 +++++++++--------- apps/api/src/types.ts | 9 + 2 files changed, 133 insertions(+), 124 deletions(-) diff --git a/apps/api/src/__tests__/e2e_extract/index.test.ts b/apps/api/src/__tests__/e2e_extract/index.test.ts index 39416bd7..9d6ffd86 100644 --- a/apps/api/src/__tests__/e2e_extract/index.test.ts +++ b/apps/api/src/__tests__/e2e_extract/index.test.ts @@ -10,142 +10,142 @@ dotenv.config(); const TEST_URL = "http://127.0.0.1:3002"; describe("E2E Tests for Extract API Routes", () => { - describe("POST /v1/extract", () => { - it.concurrent("should return authors of blog posts on firecrawl.dev", async () => { - const response = await request(TEST_URL) - .post("/v1/extract") - .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) - .set("Content-Type", "application/json") - .send({ - urls: ["https://firecrawl.dev"], - prompt: "Who are the authors of the blog posts?", - schema: { - type: "object", - properties: { authors: { type: "array", items: { type: "string" } } }, - }, - }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty("data"); - expect(response.body.data).toHaveProperty("founders"); + it.concurrent("should return authors of blog posts on firecrawl.dev", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://firecrawl.dev"], + prompt: "Who are the authors of the blog posts?", + schema: { + type: "object", + properties: { authors: { type: "array", items: { type: "string" } } }, + }, + }); - let gotItRight = 0; - for (const author of response.body.data?.authors) { - if (author.includes("Caleb Peffer")) gotItRight++; - if (author.includes("Gergő Móricz")) gotItRight++; - if (author.includes("Eric Ciarla")) gotItRight++; - if (author.includes("Nicolas Camara")) gotItRight++; - } + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data).toHaveProperty("authors"); - expect(gotItRight).toBeGreaterThan(3); - }, 60000); + let gotItRight = 0; + for (const author of response.body.data?.authors) { + if (author.includes("Caleb Peffer")) gotItRight++; + if (author.includes("Gergő Móricz")) gotItRight++; + if (author.includes("Eric Ciarla")) gotItRight++; + if (author.includes("Nicolas Camara")) gotItRight++; + } - it.concurrent("should return founders of firecrawl.dev (allowExternalLinks = true)", async () => { - const response = await request(TEST_URL) - .post("/v1/extract") - .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) - .set("Content-Type", "application/json") - .send({ - urls: ["mendable.ai"], - prompt: "Who are the founders of the company?", - allowExternalLinks: true, - schema: { - type: "object", - properties: { founders: { type: "array", items: { type: "string" } } }, - }, - }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty("data"); - expect(response.body.data).toHaveProperty("founders"); + expect(gotItRight).toBeGreaterThan(1); + }, 60000); - let gotItRight = 0; - for (const founder of response.body.data?.founders) { - if (founder.includes("Caleb")) gotItRight++; - if (founder.includes("Eric")) gotItRight++; - if (founder.includes("Nicolas")) gotItRight++; - } + it.concurrent("should return founders of firecrawl.dev (allowExternalLinks = true)", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["mendable.ai"], + prompt: "Who are the founders of the company?", + allowExternalLinks: true, + schema: { + type: "object", + properties: { founders: { type: "array", items: { type: "string" } } }, + }, + }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data).toHaveProperty("founders"); - expect(gotItRight).toBe(3); - }, 60000); + let gotItRight = 0; + for (const founder of response.body.data?.founders) { + if (founder.includes("Caleb")) gotItRight++; + if (founder.includes("Eric")) gotItRight++; + if (founder.includes("Nicolas")) gotItRight++; + } - it.concurrent("should return hiring opportunities on firecrawl.dev (allowExternalLinks = true)", async () => { - const response = await request(TEST_URL) - .post("/v1/extract") - .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) - .set("Content-Type", "application/json") - .send({ - urls: ["https://firecrawl.dev"], - prompt: "What are they hiring for?", - allowExternalLinks: true, - schema: { - type: "array", - items: { - type: "string" - } - }, - }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty("data"); - console.log(response.body.data); + expect(gotItRight).toBe(3); + }, 60000); - let gotItRight = 0; - for (const hiring of response.body.data?.items) { - if (hiring.includes("Developer Relations Specialist")) gotItRight++; - if (hiring.includes("Web Automation Engineer")) gotItRight++; - if (hiring.includes("Developer Experience Engineer")) gotItRight++; - if (hiring.includes("Developer Support Engineer")) gotItRight++; - if (hiring.includes("Dev Ops Engineer")) gotItRight++; - if (hiring.includes("Founding Web Automation Engineer")) gotItRight++; - } + it.concurrent("should return hiring opportunities on firecrawl.dev (allowExternalLinks = true)", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://firecrawl.dev"], + prompt: "What are they hiring for?", + allowExternalLinks: true, + schema: { + type: "array", + items: { + type: "string" + } + }, + }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + console.log(response.body.data); - expect(gotItRight).toBeGreaterThan(5); - }, 60000); + let gotItRight = 0; + for (const hiring of response.body.data?.items) { + if (hiring.includes("Developer Relations Specialist")) gotItRight++; + if (hiring.includes("Web Automation Engineer")) gotItRight++; + if (hiring.includes("Developer Experience Engineer")) gotItRight++; + if (hiring.includes("Developer Support Engineer")) gotItRight++; + if (hiring.includes("Dev Ops Engineer")) gotItRight++; + if (hiring.includes("Founding Web Automation Engineer")) gotItRight++; + } - it.concurrent("should return PCI DSS compliance for Fivetran", async () => { - const response = await request(TEST_URL) - .post("/v1/extract") - .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) - .set("Content-Type", "application/json") - .send({ - urls: ["fivetran.com"], - prompt: "Does Fivetran have PCI DSS compliance?", - allowExternalLinks: true, - schema: { + expect(gotItRight).toBeGreaterThan(5); + }, 60000); + + it.concurrent("should return PCI DSS compliance for Fivetran", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["fivetran.com"], + prompt: "Does Fivetran have PCI DSS compliance?", + allowExternalLinks: true, + schema: { + type: "object", + properties: { + pciDssCompliance: { type: "boolean" } + } + }, + }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data?.pciDssCompliance).toBe(true); + }, 60000); + + it.concurrent("should return Azure Data Connectors for Fivetran", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["fivetran.com"], + prompt: "What are the Azure Data Connectors they offer?", + schema: { + type: "array", + items: { type: "object", properties: { - pciDssCompliance: { type: "boolean" } - } - }, - }); - expect(response.statusCode).toBe(200); - expect(response.body).toHaveProperty("data"); - expect(response.body.data?.pciDssCompliance).toBe(true); - }, 60000); - - it.concurrent("should return Azure Data Connectors for Fivetran", async () => { - const response = await request(TEST_URL) - .post("/v1/extract") - .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) - .set("Content-Type", "application/json") - .send({ - urls: ["fivetran.com"], - prompt: "What are the Azure Data Connectors they offer?", - schema: { - type: "array", - items: { - type: "object", - properties: { - connector: { type: "string" }, - description: { type: "string" }, - supportsCaptureDelete: { type: "boolean" } - } + connector: { type: "string" }, + description: { type: "string" }, + supportsCaptureDelete: { type: "boolean" } } } - }) + } + }) - console.log(response.body); - // expect(response.statusCode).toBe(200); - // expect(response.body).toHaveProperty("data"); - // expect(response.body.data?.pciDssCompliance).toBe(true); - }, 60000); - }); + console.log(response.body); + // expect(response.statusCode).toBe(200); + // expect(response.body).toHaveProperty("data"); + // expect(response.body.data?.pciDssCompliance).toBe(true); + }, 60000); }); diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index d7821407..ee9aaf00 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -106,6 +106,15 @@ export interface FirecrawlCrawlStatusResponse { error?: string; } +export interface FirecrawlExtractResponse { + statusCode: number; + body: { + success: boolean; + data: any[]; + }; + error?: string; +} + export enum RateLimiterMode { Crawl = "crawl", CrawlStatus = "crawlStatus", From 5056dcd8e96171be0b10f8cbebb496a07668a9cf Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 14 Nov 2024 15:06:22 -0500 Subject: [PATCH 18/51] Update index.test.ts --- apps/api/src/__tests__/e2e_extract/index.test.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/api/src/__tests__/e2e_extract/index.test.ts b/apps/api/src/__tests__/e2e_extract/index.test.ts index 9d6ffd86..ec68ca12 100644 --- a/apps/api/src/__tests__/e2e_extract/index.test.ts +++ b/apps/api/src/__tests__/e2e_extract/index.test.ts @@ -16,7 +16,7 @@ describe("E2E Tests for Extract API Routes", () => { .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send({ - urls: ["https://firecrawl.dev"], + urls: ["https://firecrawl.dev/*"], prompt: "Who are the authors of the blog posts?", schema: { type: "object", @@ -46,7 +46,7 @@ describe("E2E Tests for Extract API Routes", () => { .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send({ - urls: ["mendable.ai"], + urls: ["mendable.ai/*"], prompt: "Who are the founders of the company?", allowExternalLinks: true, schema: { @@ -74,7 +74,7 @@ describe("E2E Tests for Extract API Routes", () => { .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send({ - urls: ["https://firecrawl.dev"], + urls: ["https://firecrawl.dev/*"], prompt: "What are they hiring for?", allowExternalLinks: true, schema: { @@ -107,7 +107,7 @@ describe("E2E Tests for Extract API Routes", () => { .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send({ - urls: ["fivetran.com"], + urls: ["fivetran.com/*"], prompt: "Does Fivetran have PCI DSS compliance?", allowExternalLinks: true, schema: { @@ -128,7 +128,7 @@ describe("E2E Tests for Extract API Routes", () => { .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send({ - urls: ["fivetran.com"], + urls: ["fivetran.com/*"], prompt: "What are the Azure Data Connectors they offer?", schema: { type: "array", From ebe9de2ac582a051773638e37ff0892962a54787 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 14 Nov 2024 15:26:15 -0500 Subject: [PATCH 19/51] Nick: --- apps/api/src/controllers/v1/extract.ts | 26 +++++++++++++++++--------- apps/api/src/controllers/v1/map.ts | 5 +++-- apps/api/src/lib/ranker.ts | 23 ++++++++++++++++------- apps/api/src/search/fireEngine.ts | 2 -- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 71b1f9eb..0a94289c 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -28,8 +28,8 @@ configDotenv(); const redis = new Redis(process.env.REDIS_URL!); const MAX_EXTRACT_LIMIT = 100; -const MAX_RANKING_LIMIT = 5; -const SCORE_THRESHOLD = 0.75; +const MAX_RANKING_LIMIT = 10; +const SCORE_THRESHOLD = 0.70; export async function extractController( req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, @@ -64,28 +64,34 @@ export async function extractController( allowExternalLinks, origin: req.body.origin, limit: req.body.limit, - ignoreSitemap: false, + ignoreSitemap: true, includeMetadata: true, includeSubdomains: req.body.includeSubdomains, }); - let mappedLinks = mapResults.links.map(x => `url: ${x.url}, title: ${x.title}, description: ${x.description}`); + let mappedLinks = mapResults.links as MapDocument[]; + // Limit number of links to MAX_EXTRACT_LIMIT + mappedLinks = mappedLinks.slice(0, MAX_EXTRACT_LIMIT); + + let mappedLinksRerank = mappedLinks.map(x => `url: ${x.url}, title: ${x.title}, description: ${x.description}`); // Filter by path prefix if present if (pathPrefix) { - mappedLinks = mappedLinks.filter(x => x.includes(`/${pathPrefix}/`)); + mappedLinks = mappedLinks.filter(x => x.url && x.url.includes(`/${pathPrefix}/`)); } if (req.body.prompt) { - const linksAndScores = await performRanking(mappedLinks, mapUrl); + const linksAndScores : { link: string, linkWithContext: string, score: number, originalIndex: number }[] = await performRanking(mappedLinksRerank, mappedLinks.map(l => l.url), mapUrl); mappedLinks = linksAndScores .filter(x => x.score > SCORE_THRESHOLD) - .map(x => x.link.split("url: ")[1].split(",")[0]) - .filter(x => !isUrlBlocked(x)) + .map(x => mappedLinks.find(link => link.url === x.link)) + .filter((x): x is MapDocument => x !== undefined && x.url !== undefined && !isUrlBlocked(x.url)) .slice(0, MAX_RANKING_LIMIT); + console.log("linksAndScores", linksAndScores); + console.log("linksAndScores", linksAndScores.length); } - return mappedLinks; + return mappedLinks.map(x => x.url) as string[]; } else { // Handle direct URLs without glob pattern @@ -100,6 +106,8 @@ export async function extractController( const processedUrls = await Promise.all(urlPromises); links.push(...processedUrls.flat()); + console.log("links", links.length); + console // Scrape all links in parallel const scrapePromises = links.map(async (url) => { const origin = req.body.origin || "api"; diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index f2e9453a..ba7be01f 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -1,6 +1,7 @@ import { Response } from "express"; import { v4 as uuidv4 } from "uuid"; import { + MapDocument, mapRequestSchema, RequestWithAuth, scrapeOptions, @@ -86,7 +87,7 @@ export async function getMapResults({ ? `${search} ${urlWithoutWww}` : search ? `${search} site:${urlWithoutWww}` : `site:${url}`; - + const resultsPerPage = 100; const maxPages = Math.ceil(Math.min(MAX_FIRE_ENGINE_RESULTS, limit) / resultsPerPage); @@ -129,7 +130,7 @@ export async function getMapResults({ }); } - let mapResults = allResults + let mapResults : MapDocument[] = allResults .flat() .filter((result) => result !== null && result !== undefined); diff --git a/apps/api/src/lib/ranker.ts b/apps/api/src/lib/ranker.ts index 7cd39820..9a200f49 100644 --- a/apps/api/src/lib/ranker.ts +++ b/apps/api/src/lib/ranker.ts @@ -40,24 +40,33 @@ const textToVector = (searchQuery: string, text: string): number[] => { }); }; -async function performRanking(links: string[], searchQuery: string) { +async function performRanking(linksWithContext: string[], links: string[], searchQuery: string) { try { // Generate embeddings for the search query const queryEmbedding = await getEmbedding(searchQuery); // Generate embeddings for each link and calculate similarity - const linksAndScores = await Promise.all(links.map(async (link) => { - const linkEmbedding = await getEmbedding(link); + const linksAndScores = await Promise.all(linksWithContext.map(async (linkWithContext, index) => { + const linkEmbedding = await getEmbedding(linkWithContext); // console.log("linkEmbedding", linkEmbedding); - // const linkVector = textToVector(searchQuery, link); + // const linkVector = textToVector(searchQuery, linkWithContext); const score = cosineSimilarity(queryEmbedding, linkEmbedding); // console.log("score", score); - return { link, score }; + return { + link: links[index], // Use corresponding link from links array + linkWithContext, + score, + originalIndex: index // Store original position + }; })); - // Sort links based on similarity scores - linksAndScores.sort((a, b) => b.score - a.score); + // Sort links based on similarity scores while preserving original order for equal scores + linksAndScores.sort((a, b) => { + const scoreDiff = b.score - a.score; + // If scores are equal, maintain original order + return scoreDiff === 0 ? a.originalIndex - b.originalIndex : scoreDiff; + }); return linksAndScores; } catch (error) { diff --git a/apps/api/src/search/fireEngine.ts b/apps/api/src/search/fireEngine.ts index 0b82478e..1eb2419f 100644 --- a/apps/api/src/search/fireEngine.ts +++ b/apps/api/src/search/fireEngine.ts @@ -29,8 +29,6 @@ export async function fireEngineMap( page: options.page ?? 1, }); - console.log("data", data); - if (!process.env.FIRE_ENGINE_BETA_URL) { console.warn( "(v1/map Beta) Results might differ from cloud offering currently." From 22848af5ae79e898d3a51984e9c31bfe78e88d3a Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 14 Nov 2024 15:34:02 -0500 Subject: [PATCH 20/51] Nick: --- apps/api/src/__tests__/e2e_extract/index.test.ts | 14 ++++++++------ apps/api/src/controllers/v1/extract.ts | 3 +-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/apps/api/src/__tests__/e2e_extract/index.test.ts b/apps/api/src/__tests__/e2e_extract/index.test.ts index ec68ca12..f97b111e 100644 --- a/apps/api/src/__tests__/e2e_extract/index.test.ts +++ b/apps/api/src/__tests__/e2e_extract/index.test.ts @@ -35,6 +35,9 @@ describe("E2E Tests for Extract API Routes", () => { if (author.includes("Gergő Móricz")) gotItRight++; if (author.includes("Eric Ciarla")) gotItRight++; if (author.includes("Nicolas Camara")) gotItRight++; + if (author.includes("Jon")) gotItRight++; + if (author.includes("Wendong")) gotItRight++; + } expect(gotItRight).toBeGreaterThan(1); @@ -46,7 +49,7 @@ describe("E2E Tests for Extract API Routes", () => { .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send({ - urls: ["mendable.ai/*"], + urls: ["firecrawl.dev/*"], prompt: "Who are the founders of the company?", allowExternalLinks: true, schema: { @@ -58,14 +61,16 @@ describe("E2E Tests for Extract API Routes", () => { expect(response.body).toHaveProperty("data"); expect(response.body.data).toHaveProperty("founders"); + console.log(response.body.data?.founders); let gotItRight = 0; for (const founder of response.body.data?.founders) { if (founder.includes("Caleb")) gotItRight++; if (founder.includes("Eric")) gotItRight++; if (founder.includes("Nicolas")) gotItRight++; + } - expect(gotItRight).toBe(3); + expect(gotItRight).toBeGreaterThanOrEqual(2); }, 60000); it.concurrent("should return hiring opportunities on firecrawl.dev (allowExternalLinks = true)", async () => { @@ -90,15 +95,12 @@ describe("E2E Tests for Extract API Routes", () => { let gotItRight = 0; for (const hiring of response.body.data?.items) { - if (hiring.includes("Developer Relations Specialist")) gotItRight++; - if (hiring.includes("Web Automation Engineer")) gotItRight++; - if (hiring.includes("Developer Experience Engineer")) gotItRight++; if (hiring.includes("Developer Support Engineer")) gotItRight++; if (hiring.includes("Dev Ops Engineer")) gotItRight++; if (hiring.includes("Founding Web Automation Engineer")) gotItRight++; } - expect(gotItRight).toBeGreaterThan(5); + expect(gotItRight).toBeGreaterThan(2); }, 60000); it.concurrent("should return PCI DSS compliance for Fivetran", async () => { diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 0a94289c..1513365d 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -29,7 +29,7 @@ const redis = new Redis(process.env.REDIS_URL!); const MAX_EXTRACT_LIMIT = 100; const MAX_RANKING_LIMIT = 10; -const SCORE_THRESHOLD = 0.70; +const SCORE_THRESHOLD = 0.75; export async function extractController( req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, @@ -107,7 +107,6 @@ export async function extractController( links.push(...processedUrls.flat()); console.log("links", links.length); - console // Scrape all links in parallel const scrapePromises = links.map(async (url) => { const origin = req.body.origin || "api"; From 91f4fd815fa5a84886709c4a96b798586ffb0ef2 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 14 Nov 2024 15:41:42 -0500 Subject: [PATCH 21/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 1513365d..6d9b4a2c 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -44,7 +44,7 @@ export async function extractController( // Process all URLs in parallel const urlPromises = req.body.urls.map(async (url) => { - if (url.includes('/*')) { + if (url.includes('/*') || req.body.allowExternalLinks) { // Handle glob pattern URLs const baseUrl = url.replace('/*', ''); const pathPrefix = baseUrl.split('/').slice(3).join('/'); // Get path after domain if any From 53134b7c85f88eed686ae657e7ff3ee35dbdf0f7 Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 19 Nov 2024 09:34:52 -0300 Subject: [PATCH 22/51] Rafa: removed throw error and added map to requests --- apps/api/requests.http | 14 +++++++++++++- apps/api/src/controllers/v1/map.ts | 4 ---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/apps/api/requests.http b/apps/api/requests.http index 4ce40b2c..9fe36aa7 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -46,4 +46,16 @@ content-type: application/json @batchScrapeId = {{batchScrape.response.body.$.id}} # @name batchScrapeStatus GET {{baseUrl}}/v1/crawl/{{batchScrapeId}} HTTP/1.1 -Authorization: Bearer {{$dotenv TEST_API_KEY}} \ No newline at end of file +Authorization: Bearer {{$dotenv TEST_API_KEY}} + + +### Map Website +# @name map +POST {{baseUrl}}/v1/map HTTP/1.1 +Authorization: Bearer {{$dotenv TEST_API_KEY}} +content-type: application/json + +{ + "url": "firecrawl.dev", + "sitemapOnly": true +} \ No newline at end of file diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index 8ab5c135..c8c07b6b 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -80,10 +80,6 @@ export async function getMapResults({ // If sitemapOnly is true, only get links from sitemap if (crawlerOptions.sitemapOnly) { - if (includeMetadata) { - throw new Error("includeMetadata is not supported with sitemapOnly"); - } - const sitemap = await crawler.tryGetSitemap(true, true); if (sitemap !== null) { sitemap.forEach((x) => { From 2fb8a3c8dc427a8f6f208c7f23a36b054df6580f Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 19 Nov 2024 10:04:42 -0300 Subject: [PATCH 23/51] fix schema --- apps/api/requests.http | 16 ++++++++++++++++ .../scraper/scrapeURL/transformers/llmExtract.ts | 9 +++++++++ 2 files changed, 25 insertions(+) diff --git a/apps/api/requests.http b/apps/api/requests.http index 9fe36aa7..0e3b9206 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -58,4 +58,20 @@ content-type: application/json { "url": "firecrawl.dev", "sitemapOnly": true +} + +### Extract +# @name extract +POST {{baseUrl}}/v1/extract HTTP/1.1 +Authorization: Bearer {{$dotenv TEST_API_KEY}} +content-type: application/json + +{ + "urls": ["firecrawl.dev"], + "prompt": "What is the title, description and main product of the page?", + "schema": { + "title": "string", + "description": "string", + "mainProduct": "string" + } } \ No newline at end of file diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 1a5abd66..ab619423 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -108,6 +108,15 @@ export async function generateOpenAICompletions(logger: Logger, options: Extract required: ["items"], additionalProperties: false, }; + } else if (schema && typeof schema === 'object' && !schema.type) { + schema = { + type: "object", + properties: Object.fromEntries( + Object.entries(schema).map(([key, value]) => [key, { type: value }]) + ), + required: Object.keys(schema), + additionalProperties: false + }; } schema = normalizeSchema(schema); From d02a8bcb82a83e77531bc38e2730d4c828df879f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 19 Nov 2024 13:49:23 -0800 Subject: [PATCH 24/51] Nick: extract urls to extract --- apps/js-sdk/example.js | 2 +- apps/js-sdk/firecrawl/src/index.ts | 4 ++-- apps/python-sdk/example.py | 2 +- apps/python-sdk/firecrawl/firecrawl.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/js-sdk/example.js b/apps/js-sdk/example.js index 166cc18d..21bdb2a1 100644 --- a/apps/js-sdk/example.js +++ b/apps/js-sdk/example.js @@ -50,7 +50,7 @@ const main = async () => { links: z.array(z.string()) }); - const extractResult = await app.extractUrls(['https://firecrawl.dev'], { + const extractResult = await app.extract(['https://firecrawl.dev'], { prompt: "Extract the title, description, and links from the website", schema: extractSchema }); diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 16cf674f..b6737855 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -699,12 +699,12 @@ export default class FirecrawlApp { } /** - * Extracts information from a URL using the Firecrawl API. + * Extracts information from URLs using the Firecrawl API. * @param url - The URL to extract information from. * @param params - Additional parameters for the extract request. * @returns The response from the extract operation. */ - async extractUrls(urls: string[], params?: ExtractParams): Promise { + async extract(urls: string[], params?: ExtractParams): Promise { const headers = this.prepareHeaders(); if (!params?.prompt) { diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py index eba7cfd2..686b7676 100644 --- a/apps/python-sdk/example.py +++ b/apps/python-sdk/example.py @@ -124,7 +124,7 @@ class ExtractSchema(BaseModel): extract_schema = ExtractSchema.schema() # Perform the extraction -extract_result = app.extract_urls(['https://firecrawl.dev'], { +extract_result = app.extract(['https://firecrawl.dev'], { 'prompt': "Extract the title, description, and links from the website", 'schema': extract_schema }) diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 9dc9b47b..bb87906c 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -460,7 +460,7 @@ class FirecrawlApp: self._handle_error(response, 'check batch scrape status') - def extract_urls(self, urls: List[str], params: Optional[ExtractParams] = None) -> Union[ExtractResponse, ErrorResponse]: + def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Union[ExtractResponse, ErrorResponse]: """ Extracts information from a URL using the Firecrawl API. From c9b0a80522a03642d51b79a7183bc06ba4d3fc82 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 10:23:44 -0800 Subject: [PATCH 25/51] Nick: --- .../src/__tests__/e2e_extract/index.test.ts | 25 +++++++++++++++++++ .../scrapeURL/transformers/llmExtract.ts | 1 + 2 files changed, 26 insertions(+) diff --git a/apps/api/src/__tests__/e2e_extract/index.test.ts b/apps/api/src/__tests__/e2e_extract/index.test.ts index f97b111e..31f14f4a 100644 --- a/apps/api/src/__tests__/e2e_extract/index.test.ts +++ b/apps/api/src/__tests__/e2e_extract/index.test.ts @@ -150,4 +150,29 @@ describe("E2E Tests for Extract API Routes", () => { // expect(response.body).toHaveProperty("data"); // expect(response.body.data?.pciDssCompliance).toBe(true); }, 60000); + + it.concurrent("should return Greenhouse Applicant Tracking System for Abnormal Security", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://careers.abnormalsecurity.com/jobs/6119456003?gh_jid=6119456003"], + prompt: "what applicant tracking system is this company using?", + schema: { + type: "object", + properties: { + isGreenhouseATS: { type: "boolean" }, + answer: { type: "string" } + } + }, + allowExternalLinks: true + }) + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data?.isGreenhouseATS).toBe(true); + }, 60000); + }); diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index ab619423..7c2a6696 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -123,6 +123,7 @@ export async function generateOpenAICompletions(logger: Logger, options: Extract const jsonCompletion = await openai.beta.chat.completions.parse({ model, + temperature: 0, messages: [ { role: "system", From d49f62fb5641ea123fcbba8d9eb41bcfeef74f1e Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 11:50:14 -0800 Subject: [PATCH 26/51] Nick: extract fixes --- apps/api/src/__tests__/e2e_extract/index.test.ts | 3 ++- apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/apps/api/src/__tests__/e2e_extract/index.test.ts b/apps/api/src/__tests__/e2e_extract/index.test.ts index 31f14f4a..346958da 100644 --- a/apps/api/src/__tests__/e2e_extract/index.test.ts +++ b/apps/api/src/__tests__/e2e_extract/index.test.ts @@ -86,7 +86,8 @@ describe("E2E Tests for Extract API Routes", () => { type: "array", items: { type: "string" - } + }, + required: ["items"] }, }); expect(response.statusCode).toBe(200); diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 7c2a6696..41d76779 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -165,7 +165,9 @@ export async function generateOpenAICompletions(logger: Logger, options: Extract } } - if (options.schema && options.schema.type === "array") { + // If the users actually wants the items object, they can specify it as 'required' in the schema + // otherwise, we just return the items array + if (options.schema && options.schema.type === "array" && !schema?.required?.includes("items")) { extract = extract?.items; } return { extract, warning }; From 28696da6b27add8eeae26f5db3d9a5513f2faae1 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 12:25:50 -0800 Subject: [PATCH 27/51] Nick: gpt-4o --- apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts | 2 +- apps/js-sdk/firecrawl/src/index.ts | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 41d76779..d8004c07 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -63,7 +63,7 @@ export async function generateOpenAICompletions(logger: Logger, options: Extract let warning: string | undefined; const openai = new OpenAI(); - const model: TiktokenModel = (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; + const model: TiktokenModel = (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o"; if (markdown === undefined) { throw new Error("document.markdown is undefined -- this is unexpected"); diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index b6737855..6fb8ad2e 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -86,6 +86,7 @@ export interface CrawlScrapeOptions { country?: string; languages?: string[]; }; + mobile?: boolean; skipTlsVerification?: boolean; removeBase64Images?: boolean; } From 67a29898742a7e0df28ee73c78a98d90e2a6ddd1 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 12:48:10 -0800 Subject: [PATCH 28/51] Nick: fixes --- apps/api/src/controllers/v1/extract.ts | 14 +++++++++----- apps/api/src/lib/extract/build-document.ts | 17 +++++++++++++++++ .../scrapeURL/transformers/llmExtract.ts | 2 +- 3 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 apps/api/src/lib/extract/build-document.ts diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 6d9b4a2c..ededd63d 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -23,6 +23,7 @@ import { getJobPriority } from "../../lib/job-priority"; import { generateOpenAICompletions } from "../../scraper/scrapeURL/transformers/llmExtract"; import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; import { getMapResults } from "./map"; +import { buildDocument } from "../../lib/extract/build-document"; configDotenv(); const redis = new Redis(process.env.REDIS_URL!); @@ -47,7 +48,7 @@ export async function extractController( if (url.includes('/*') || req.body.allowExternalLinks) { // Handle glob pattern URLs const baseUrl = url.replace('/*', ''); - const pathPrefix = baseUrl.split('/').slice(3).join('/'); // Get path after domain if any + // const pathPrefix = baseUrl.split('/').slice(3).join('/'); // Get path after domain if any const allowExternalLinks = req.body.allowExternalLinks ?? true; let urlWithoutWww = baseUrl.replace("www.", ""); @@ -76,9 +77,11 @@ export async function extractController( let mappedLinksRerank = mappedLinks.map(x => `url: ${x.url}, title: ${x.title}, description: ${x.description}`); // Filter by path prefix if present - if (pathPrefix) { - mappedLinks = mappedLinks.filter(x => x.url && x.url.includes(`/${pathPrefix}/`)); - } + // console.log("pathPrefix", pathPrefix); + // wrong + // if (pathPrefix) { + // mappedLinks = mappedLinks.filter(x => x.url && x.url.includes(`/${pathPrefix}/`)); + // } if (req.body.prompt) { const linksAndScores : { link: string, linkWithContext: string, score: number, originalIndex: number }[] = await performRanking(mappedLinksRerank, mappedLinks.map(l => l.url), mapUrl); @@ -170,6 +173,7 @@ export async function extractController( }); } + console.log("docs", docs.length); const completions = await generateOpenAICompletions( logger.child({ method: "extractController/generateOpenAICompletions" }), { @@ -178,7 +182,7 @@ export async function extractController( prompt: req.body.prompt, schema: req.body.schema, }, - docs.map(x => x.markdown).join('\n') + docs.map(x => buildDocument(x)).join('\n') ); // console.log("completions", completions); diff --git a/apps/api/src/lib/extract/build-document.ts b/apps/api/src/lib/extract/build-document.ts new file mode 100644 index 00000000..7486d2a7 --- /dev/null +++ b/apps/api/src/lib/extract/build-document.ts @@ -0,0 +1,17 @@ +import { Document } from "../../controllers/v1/types"; + +export function buildDocument(document: Document): string { + const metadata = document.metadata; + const markdown = document.markdown; + + const documentMetadataString = `\nHere is the metadata for the document:\n${JSON.stringify( + metadata, + null, + 2 + )}`; + + const documentString = `${markdown}${documentMetadataString}`; + + console.log("documentString", documentString); + return markdown ?? ""; +} diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index d8004c07..41d76779 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -63,7 +63,7 @@ export async function generateOpenAICompletions(logger: Logger, options: Extract let warning: string | undefined; const openai = new OpenAI(); - const model: TiktokenModel = (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o"; + const model: TiktokenModel = (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; if (markdown === undefined) { throw new Error("document.markdown is undefined -- this is unexpected"); From 09dd5136b7b05adcd389a1be0ba3cfb3f0c72444 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 12:51:16 -0800 Subject: [PATCH 29/51] Update build-document.ts --- apps/api/src/lib/extract/build-document.ts | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/apps/api/src/lib/extract/build-document.ts b/apps/api/src/lib/extract/build-document.ts index 7486d2a7..66417a07 100644 --- a/apps/api/src/lib/extract/build-document.ts +++ b/apps/api/src/lib/extract/build-document.ts @@ -4,14 +4,12 @@ export function buildDocument(document: Document): string { const metadata = document.metadata; const markdown = document.markdown; - const documentMetadataString = `\nHere is the metadata for the document:\n${JSON.stringify( - metadata, - null, - 2 - )}`; + // for each key in the metadata allow up to 250 characters + const metadataString = Object.entries(metadata).map(([key, value]) => { + return `${key}: ${value?.toString().slice(0, 250)}`; + }).join('\n'); + const documentMetadataString = `\n- - - - - Page metadata - - - - -\n${metadataString}`; const documentString = `${markdown}${documentMetadataString}`; - - console.log("documentString", documentString); - return markdown ?? ""; + return documentString; } From 0e4e9a3b37d71f152c1bd31ce1bcfa9198714d4a Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 13:01:36 -0800 Subject: [PATCH 30/51] Nick: --- .../src/__tests__/e2e_extract/index.test.ts | 70 +++++++++++++++++++ apps/api/src/controllers/v1/extract.ts | 1 - apps/api/src/controllers/v1/map.ts | 2 +- 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/apps/api/src/__tests__/e2e_extract/index.test.ts b/apps/api/src/__tests__/e2e_extract/index.test.ts index 346958da..525ff6a2 100644 --- a/apps/api/src/__tests__/e2e_extract/index.test.ts +++ b/apps/api/src/__tests__/e2e_extract/index.test.ts @@ -67,6 +67,9 @@ describe("E2E Tests for Extract API Routes", () => { if (founder.includes("Caleb")) gotItRight++; if (founder.includes("Eric")) gotItRight++; if (founder.includes("Nicolas")) gotItRight++; + if (founder.includes("nick")) gotItRight++; + if (founder.includes("eric")) gotItRight++; + if (founder.includes("jon-noronha")) gotItRight++; } @@ -176,4 +179,71 @@ describe("E2E Tests for Extract API Routes", () => { expect(response.body.data?.isGreenhouseATS).toBe(true); }, 60000); + it.concurrent("should return mintlify api components", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://mintlify.com/docs/*"], + prompt: "what are the 4 API components?", + schema: { + type: "array", + items: { + type: "object", + properties: { + component: { type: "string" } + } + }, + required: ["items"] + }, + allowExternalLinks: true + }) + + console.log(response.body.data?.items); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data?.items.length).toBe(4); + let gotItRight = 0; + for (const component of response.body.data?.items) { + if (component.component.toLowerCase().includes("parameter")) gotItRight++; + if (component.component.toLowerCase().includes("response")) gotItRight++; + if (component.component.toLowerCase().includes("expandable")) gotItRight++; + if (component.component.toLowerCase().includes("sticky")) gotItRight++; + if (component.component.toLowerCase().includes("examples")) gotItRight++; + + } + expect(gotItRight).toBeGreaterThan(2); + }, 60000); + + it.concurrent("should return information about Eric Ciarla", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://ericciarla.com/"], + prompt: "Who is Eric Ciarla? Where does he work? Where did he go to school?", + schema: { + type: "object", + properties: { + name: { type: "string" }, + work: { type: "string" }, + education: { type: "string" } + }, + required: ["name", "work", "education"] + }, + allowExternalLinks: true + }) + + console.log(response.body.data); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data?.name).toBe("Eric Ciarla"); + expect(response.body.data?.work).toBeDefined(); + expect(response.body.data?.education).toBeDefined(); + }, 60000); + + + }); diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index ededd63d..83a35c9b 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -173,7 +173,6 @@ export async function extractController( }); } - console.log("docs", docs.length); const completions = await generateOpenAICompletions( logger.child({ method: "extractController/generateOpenAICompletions" }), { diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index c8c07b6b..31db5d58 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -108,7 +108,7 @@ export async function getMapResults({ const maxPages = Math.ceil(Math.min(MAX_FIRE_ENGINE_RESULTS, limit) / resultsPerPage); const cacheKey = `fireEngineMap:${mapUrl}`; - const cachedResult = null; + const cachedResult = await redis.get(cacheKey); let allResults: any[] = []; let pagePromises: Promise[] = []; From 769f08c10d54b9ffa113f809f0ef71fb9cad8ebb Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 13:08:09 -0800 Subject: [PATCH 31/51] Billing and log for extract --- apps/api/src/controllers/v1/extract.ts | 40 ++++++++++++++------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 83a35c9b..3797032a 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -190,10 +190,12 @@ export async function extractController( // creditsToBeBilled = 5; // } - // billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(error => { - // logger.error(`Failed to bill team ${req.auth.team_id} for ${creditsToBeBilled} credits: ${error}`); - // // Optionally, you could notify an admin or add to a retry queue here - // }); + // TODO: change this later + // While on beta, we're billing 5 credits per link discovered/scraped. + billTeam(req.auth.team_id, req.acuc?.sub_id, links.length * 5).catch(error => { + logger.error(`Failed to bill team ${req.auth.team_id} for ${links.length * 5} credits: ${error}`); + // Optionally, you could notify an admin or add to a retry queue here + }); // if (!req.body.formats.includes("rawHtml")) { // if (doc && doc.rawHtml) { @@ -201,20 +203,7 @@ export async function extractController( // } // } - // logJob({ - // job_id: jobId, - // success: true, - // message: "Scrape completed", - // num_docs: 1, - // docs: [doc], - // time_taken: timeTakenInSeconds, - // team_id: req.auth.team_id, - // mode: "scrape", - // url: req.body.url, - // scrapeOptions: req.body, - // origin: origin, - // num_tokens: numTokens, - // }); + @@ -263,6 +252,21 @@ export async function extractController( data = completions.extract; } + logJob({ + job_id: id, + success: true, + message: "Extract completed", + num_docs: 1, + docs: data, + time_taken: (new Date().getTime() - Date.now()) / 1000, + team_id: req.auth.team_id, + mode: "extract", + url: req.body.urls.join(", "), + scrapeOptions: req.body, + origin: req.body.origin ?? "api", + num_tokens: 0, // TODO: fix + }); + return res.status(200).json({ success: true, data: data, From 3de4997f4dbb9a8c5b6b26cff83b2b2ba6740440 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 13:09:46 -0800 Subject: [PATCH 32/51] Loggin num tokens --- apps/api/src/controllers/v1/extract.ts | 49 +------------------ .../scrapeURL/transformers/llmExtract.ts | 4 +- 2 files changed, 4 insertions(+), 49 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 3797032a..96e1ee78 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -197,51 +197,6 @@ export async function extractController( // Optionally, you could notify an admin or add to a retry queue here }); - // if (!req.body.formats.includes("rawHtml")) { - // if (doc && doc.rawHtml) { - // delete doc.rawHtml; - // } - // } - - - - - - // billTeam(teamId, subId, 1).catch((error) => { - // logger.error( - // `Failed to bill team ${teamId} for 1 credit: ${error}` - // ); - // }); - - // const linksToReturn = links.slice(0, limit); - - // logJob({ - // job_id: id, - // success: links.length > 0, - // message: "Extract completed", - // num_docs: linksToReturn.length, - // docs: linksToReturn, - // time_taken: (new Date().getTime() - Date.now()) / 1000, - // team_id: teamId, - // mode: "extract", - // url: urls[0], - // crawlerOptions: {}, - // scrapeOptions: {}, - // origin: origin ?? "api", - // num_tokens: 0, - // }); - - // return { - - // }; - - - - // const response = { - // success: true as const, - // data: result.data, - // scrape_id: result.scrape_id - // }; console.log("completions.extract", completions.extract); @@ -264,9 +219,9 @@ export async function extractController( url: req.body.urls.join(", "), scrapeOptions: req.body, origin: req.body.origin ?? "api", - num_tokens: 0, // TODO: fix + num_tokens: completions.numTokens ?? 0 }); - + return res.status(200).json({ success: true, data: data, diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 41d76779..3866683a 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -58,7 +58,7 @@ function normalizeSchema(x: any): any { } } -export async function generateOpenAICompletions(logger: Logger, options: ExtractOptions, markdown?: string, previousWarning?: string): Promise<{ extract: any, warning: string | undefined }> { +export async function generateOpenAICompletions(logger: Logger, options: ExtractOptions, markdown?: string, previousWarning?: string): Promise<{ extract: any, numTokens: number, warning: string | undefined }> { let extract: any; let warning: string | undefined; @@ -170,7 +170,7 @@ export async function generateOpenAICompletions(logger: Logger, options: Extract if (options.schema && options.schema.type === "array" && !schema?.required?.includes("items")) { extract = extract?.items; } - return { extract, warning }; + return { extract, warning, numTokens }; } export async function performLLMExtract(meta: Meta, document: Document): Promise { From 9512d81e05f45d34e76c4c5ea639de23f2c176b3 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 13:15:52 -0800 Subject: [PATCH 33/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 96e1ee78..1dff5236 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -36,6 +36,8 @@ export async function extractController( req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, res: Response ) { + const selfHosted = process.env.USE_DB_AUTHENTICATION !== "true"; + req.body = extractRequestSchema.parse(req.body); const id = crypto.randomUUID(); @@ -65,7 +67,8 @@ export async function extractController( allowExternalLinks, origin: req.body.origin, limit: req.body.limit, - ignoreSitemap: true, + // If we're self-hosted, we don't want to ignore the sitemap, due to our fire-engine mapping + ignoreSitemap: !selfHosted ? true : false, includeMetadata: true, includeSubdomains: req.body.includeSubdomains, }); From d196b9d93d6c8a0f63603f27c4d0d2b8da6da875 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 13:16:36 -0800 Subject: [PATCH 34/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 1dff5236..f3dd199e 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -32,6 +32,13 @@ const MAX_EXTRACT_LIMIT = 100; const MAX_RANKING_LIMIT = 10; const SCORE_THRESHOLD = 0.75; +/** + * Extracts data from the provided URLs based on the request parameters. + * Currently in beta. + * @param req - The request object containing authentication and extraction details. + * @param res - The response object to send the extraction results. + * @returns A promise that resolves when the extraction process is complete. + */ export async function extractController( req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, res: Response From 945183ffbd22f365f14ac029301bd2d7a06604f7 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 16:40:55 -0800 Subject: [PATCH 35/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index f3dd199e..0654860c 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -87,7 +87,6 @@ export async function extractController( let mappedLinksRerank = mappedLinks.map(x => `url: ${x.url}, title: ${x.title}, description: ${x.description}`); // Filter by path prefix if present - // console.log("pathPrefix", pathPrefix); // wrong // if (pathPrefix) { // mappedLinks = mappedLinks.filter(x => x.url && x.url.includes(`/${pathPrefix}/`)); @@ -100,8 +99,8 @@ export async function extractController( .map(x => mappedLinks.find(link => link.url === x.link)) .filter((x): x is MapDocument => x !== undefined && x.url !== undefined && !isUrlBlocked(x.url)) .slice(0, MAX_RANKING_LIMIT); - console.log("linksAndScores", linksAndScores); - console.log("linksAndScores", linksAndScores.length); + // console.log("linksAndScores", linksAndScores); + // console.log("linksAndScores", linksAndScores.length); } return mappedLinks.map(x => x.url) as string[]; @@ -119,7 +118,7 @@ export async function extractController( const processedUrls = await Promise.all(urlPromises); links.push(...processedUrls.flat()); - console.log("links", links.length); + // console.log("links", links.length); // Scrape all links in parallel const scrapePromises = links.map(async (url) => { const origin = req.body.origin || "api"; @@ -208,7 +207,7 @@ export async function extractController( }); - console.log("completions.extract", completions.extract); + // console.log("completions.extract", completions.extract); let data: any; try { From 3eaa3b38aba02fcf90d004b903b961380d5c22cf Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 16:42:42 -0800 Subject: [PATCH 36/51] Nick: formatting --- .../src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts index 1f1bcc69..2c67e196 100644 --- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts +++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts @@ -103,4 +103,4 @@ export async function fireEngineCheckStatus(logger: Logger, jobId: string): Prom } }); } -} \ No newline at end of file +} From 93e106d3217d2d4a89da56a34f61ef82482629c8 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 16:43:02 -0800 Subject: [PATCH 37/51] Update v0.ts --- apps/api/src/routes/v0.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/routes/v0.ts b/apps/api/src/routes/v0.ts index 3a7bda65..2169c2bd 100644 --- a/apps/api/src/routes/v0.ts +++ b/apps/api/src/routes/v0.ts @@ -27,4 +27,4 @@ v0Router.post("/v0/search", searchController); // Health/Probe routes v0Router.get("/v0/health/liveness", livenessController); -v0Router.get("/v0/health/readiness", readinessController); +v0Router.get("/v0/health/readiness", readinessController); \ No newline at end of file From 42922c68d6a6c33ea873a87fe22b5314714d4b9e Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 16:44:40 -0800 Subject: [PATCH 38/51] Update package.json --- apps/api/package.json | 2 -- 1 file changed, 2 deletions(-) diff --git a/apps/api/package.json b/apps/api/package.json index fc7cf224..56724de7 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -56,8 +56,6 @@ "@bull-board/express": "^5.20.5", "@devil7softwares/pos": "^1.0.2", "@dqbd/tiktoken": "^1.0.17", - "@hyperdx/node-opentelemetry": "^0.8.1", - "@logtail/node": "^0.4.12", "@nangohq/node": "^0.40.8", "@sentry/cli": "^2.33.1", "@sentry/node": "^8.26.0", From 5f4c8da1096e104842df6ddb17d56e810c830c27 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 16:44:52 -0800 Subject: [PATCH 39/51] Update pnpm-lock.yaml --- apps/api/pnpm-lock.yaml | 1609 --------------------------------------- 1 file changed, 1609 deletions(-) diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 42e05c4c..4557afa9 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -26,12 +26,6 @@ importers: '@dqbd/tiktoken': specifier: ^1.0.17 version: 1.0.17 - '@hyperdx/node-opentelemetry': - specifier: ^0.8.1 - version: 0.8.1 - '@logtail/node': - specifier: ^0.4.12 - version: 0.4.21 '@nangohq/node': specifier: ^0.40.8 version: 0.40.8 @@ -679,29 +673,6 @@ packages: engines: {node: '>=16.0.0'} hasBin: true - '@grpc/grpc-js@1.12.2': - resolution: {integrity: sha512-bgxdZmgTrJZX50OjyVwz3+mNEnCTNkh3cIqGPWVNeW9jX6bn1ZkU80uPd+67/ZpIJIjRQ9qaHCjhavyoWYxumg==} - engines: {node: '>=12.10.0'} - - '@grpc/proto-loader@0.7.13': - resolution: {integrity: sha512-AiXO/bfe9bmxBjxxtYxFAXGZvMaN5s8kO+jBHAJCON8rJoB5YS/D6X7ZNc6XQkuHNmyl4CYaMI1fJ/Gn27RGGw==} - engines: {node: '>=6'} - hasBin: true - - '@hyperdx/instrumentation-exception@0.1.0': - resolution: {integrity: sha512-Jgk7JY5J07Mq9fgXApGVhSkS4+WdzzRcWLReAZhxgo46KShxE6w614mFqUSnuo+z6ghlehsy4ForViUfxrFyew==} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@hyperdx/instrumentation-sentry-node@0.1.0': - resolution: {integrity: sha512-n8d/K/8M2owL2w4FNfV+lSVW6yoznEj5SdRCysV/ZIfyrZwpijiiSn7gkRcrOfKHmrxrupyp7DVg5L19cGuH6A==} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@hyperdx/node-opentelemetry@0.8.1': - resolution: {integrity: sha512-wNw0yQf54j/9KXVWeEOu8G6C5FT5EFlrz4dcmscTkwCvo6fQOLRZa/NbGcqugt0LSFMc0/6/Q5RDWVqDpEn0LQ==} - hasBin: true - '@ioredis/commands@1.2.0': resolution: {integrity: sha512-Sx1pU8EM64o2BrqNpEO1CNLtKQwyhuXuqyfH7oGKCk+1a33d2r5saW8zNwm3j6BTExtjrv2BxTgzzkMwts6vGg==} @@ -808,9 +779,6 @@ packages: '@jridgewell/trace-mapping@0.3.9': resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==} - '@js-sdsl/ordered-map@4.4.2': - resolution: {integrity: sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==} - '@langchain/core@0.2.12': resolution: {integrity: sha512-zaKvUcWU1Cxcpd/fxklygY6iUrxls10KTRzyHZGBAIKJq1JD/B10vX59YlFgBs7nqqVTEvaChfIE0O0e2qBttA==} engines: {node: '>=18'} @@ -823,28 +791,12 @@ packages: resolution: {integrity: sha512-cXWgKE3sdWLSqAa8ykbCcUsUF1Kyr5J3HOWYGuobhPEycXW4WI++d5DhzdpL238mzoEXTi90VqfSCra37l5YqA==} engines: {node: '>=18'} - '@logtail/core@0.4.21': - resolution: {integrity: sha512-QDq194+24bwi4e+a/pxyf4X67NewhTvBmh9iwM2NhbSVSQz4Fo8xQn1Ul8zuUrXETycu/Od2D8wT2tZFNFx/7A==} - - '@logtail/node@0.4.21': - resolution: {integrity: sha512-zpwkhJgcYaM+vsjotHRJthc0ot1vP0CAVy+fwrkL8XjfdC3NHiWb6f0agQpHlqdRX8RTsAbcYpWNXKPpFB5U9Q==} - - '@logtail/tools@0.4.21': - resolution: {integrity: sha512-xIaolScUwJEikllopGphxBX0lVlN/rA8pLAZiNCMNJXpPbwitoFKLW3w4qRuYdKoFCCJZKwOdwEqU2Fv0i9Cuw==} - - '@logtail/types@0.4.20': - resolution: {integrity: sha512-nYsum10eJMTo+ySBlYXvSrvgD1NDCVUeOlxLBbelq3XUmHu9L48VNR3P0BOmhLamYCTEgjatTj0PyPLfjL1W9g==} - '@mixmark-io/domino@2.2.0': resolution: {integrity: sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==} '@mongodb-js/saslprep@1.1.7': resolution: {integrity: sha512-dCHW/oEX0KJ4NjDULBo3JiOaK5+6axtpBbS+ao2ZInoAL9/YRQLhXzSNAFz7hP4nzLkIqsfYAK/PDE3+XHny0Q==} - '@msgpack/msgpack@2.8.0': - resolution: {integrity: sha512-h9u4u/jiIRKbq25PM+zymTyW6bhTzELvOoUd+AvYriWOAKpLGnIamaET3pnHYoI5iYphAHBI4ayx0MehR+VVPQ==} - engines: {node: '>= 10'} - '@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3': resolution: {integrity: sha512-QZHtlVgbAdy2zAqNA9Gu1UpIuI8Xvsd1v8ic6B2pZmeFnFcMWiPLfWXh7TVw4eGEZ/C9TH281KwhVoeQUKbyjw==} cpu: [arm64] @@ -882,10 +834,6 @@ packages: '@one-ini/wasm@0.1.1': resolution: {integrity: sha512-XuySG1E38YScSJoMlqovLru4KTUNSjgVTIjyh7qMX6aNN5HY5Ct5LhRJdxO79JtTzKfzV/bnWpz+zquYrISsvw==} - '@opentelemetry/api-logs@0.51.1': - resolution: {integrity: sha512-E3skn949Pk1z2XtXu/lxf6QAZpawuTM/IUEXcAzpiUkTd73Hmvw26FiN3cJuTmkpM5hZzHwkomVdtrh/n/zzwA==} - engines: {node: '>=14'} - '@opentelemetry/api-logs@0.52.1': resolution: {integrity: sha512-qnSqB2DQ9TPP96dl8cDubDvrUyWc0/sK81xHTK8eSUspzDM3bsewX903qclQFvVhgStjRWdC5bLb3kQqMkfV5A==} engines: {node: '>=14'} @@ -894,581 +842,152 @@ packages: resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==} engines: {node: '>=8.0.0'} - '@opentelemetry/auto-instrumentations-node@0.46.1': - resolution: {integrity: sha512-s0CwmY9KYtPawOhV5YO2Gf62uVOQRNvT6Or8IZ0S4gr/kPVNhoMehTsQvqBwSWQfoFrkmW3KKOHiKJEp4dVGXg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.4.1 - - '@opentelemetry/context-async-hooks@1.24.1': - resolution: {integrity: sha512-R5r6DO4kgEOVBxFXhXjwospLQkv+sYxwCfjvoZBe7Zm6KKXAV9kDSJhi/D1BweowdZmO+sdbENLs374gER8hpQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.0.0 <1.9.0' - '@opentelemetry/context-async-hooks@1.25.1': resolution: {integrity: sha512-UW/ge9zjvAEmRWVapOP0qyCvPulWU6cQxGxDbWEFfGOj1VBBZAuOqTo3X6yWmDTD3Xe15ysCZChHncr2xFMIfQ==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': '>=1.0.0 <1.10.0' - '@opentelemetry/core@1.24.1': - resolution: {integrity: sha512-wMSGfsdmibI88K9wB498zXY04yThPexo8jvwNNlm542HZB7XrrMRBbAyKJqG8qDRJwIBdBrPMi4V9ZPW/sqrcg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.0.0 <1.9.0' - '@opentelemetry/core@1.25.1': resolution: {integrity: sha512-GeT/l6rBYWVQ4XArluLVB6WWQ8flHbdb6r2FCHC3smtdOAbrJBIv35tpV/yp9bmYUJf+xmZpu9DRTIeJVhFbEQ==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': '>=1.0.0 <1.10.0' - '@opentelemetry/core@1.26.0': - resolution: {integrity: sha512-1iKxXXE8415Cdv0yjG3G6hQnB5eVEsJce3QaawX8SjDn0mAS0ZM8fAbZZJD4ajvhC15cePvosSCut404KrIIvQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.0.0 <1.10.0' - - '@opentelemetry/exporter-logs-otlp-http@0.51.1': - resolution: {integrity: sha512-cd6GZ9IqCrmvOJwi1HjRR7o9ihF7xhZTekgxUsoyTsPF+SjKMsLF9ur6HeBYkYhk+YjZ1ken3XUMH47oUTvu8Q==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/exporter-metrics-otlp-http@0.51.1': - resolution: {integrity: sha512-oFXvif9iksHUxrzG3P8ohMLt7xSrl+oDMqxD/3XXndU761RFAKSbRDpfrQs25U5D+A2aMV3qk+4kfUWdJhZ77g==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/exporter-metrics-otlp-proto@0.51.1': - resolution: {integrity: sha512-jhj8xD6S4cydXGCuf2tp56+4QI0DbDH6g+0MiPPJVdXjxLj+iycQuqB2cwljWpByblFaOjyUsL/VKtm8C7sQ9A==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/exporter-trace-otlp-grpc@0.51.1': - resolution: {integrity: sha512-P9+Hkszih95ITvldGZ+kXvj9HpD1QfS+PwooyHK72GYA+Bgm+yUSAsDkUkDms8+s9HW6poxURv3LcjaMuBBpVQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/exporter-trace-otlp-http@0.51.1': - resolution: {integrity: sha512-n+LhLPsX07URh+HhV2SHVSvz1t4G/l/CE5BjpmhAPqeTceFac1VpyQkavWEJbvnK5bUEXijWt4LxAxFpt2fXyw==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/exporter-trace-otlp-proto@0.51.1': - resolution: {integrity: sha512-SE9f0/6V6EeXC9i+WA4WFjS1EYgaBCpAnI5+lxWvZ7iO7EU1IvHvZhP6Kojr0nLldo83gqg6G7OWFqsID3uF+w==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/exporter-zipkin@1.24.1': - resolution: {integrity: sha512-+Rl/VFmu2n6eaRMnVbyfZx1DqR/1KNyWebYuHyQBZaEAVIn/ZLgmofRpXN1X2nhJ4BNaptQUNxAstCYYz6dKoQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/instrumentation-amqplib@0.37.0': - resolution: {integrity: sha512-XjOHeAOreh0XX4jlzTTUWWqu1dIGvMWM8yvd43JJdRMAmTZisezjKsxLjMEMIvF0PzQdoXwh9DiS9nYE4/QmpA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-aws-lambda@0.41.1': - resolution: {integrity: sha512-/BLG+0DQr2tCILFGJKJH2Fg6eyjhqOlVflYpNddUEXnzyQ/PAhTdgirkqbICFgeSW2XYcEY9zXpuRldrVNw9cA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-aws-sdk@0.41.0': - resolution: {integrity: sha512-7+8WMY0LQeqv6KIObXK+Py44qNFLeCU0ZLLxSZtXEbZ2wJlQISP1St65jRto0NV7isnZoyuOxb2+ZpypPPNv7Q==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-bunyan@0.38.0': - resolution: {integrity: sha512-ThNcgTE22W7PKzTzz5qfGxb5Gf7rA3EORousYo2nJWHHcF6gqiMNv2+GXY3MdpjLBr8IgCfhtvbQdD6rlIPUpA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-cassandra-driver@0.38.0': - resolution: {integrity: sha512-ML4Vw0it2uIpETfX6skuSIGLHF9D3TUKOfdfrk9lnrzzWSzg2aS6pl3UeepkQX4wXHdzlxVRB0USrUqsmxMd5Q==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-connect@0.36.1': - resolution: {integrity: sha512-xI5Q/CMmzBmHshPnzzjD19ptFaYO/rQWzokpNio4QixZYWhJsa35QgRvN9FhPkwgtuJIbt/CWWAufJ3egJNHEA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-connect@0.38.0': resolution: {integrity: sha512-2/nRnx3pjYEmdPIaBwtgtSviTKHWnDZN3R+TkRUnhIVrvBKVcq+I5B2rtd6mr6Fe9cHlZ9Ojcuh7pkNh/xdWWg==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-cucumber@0.6.0': - resolution: {integrity: sha512-90eAF2JPSbPAsOuGfYyctYaoYXqy4Clbxt0j/uUgg6dto4oqwUw3AvTyHQEztLGxeXwEzC1EQigDtVPg5ZexYA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/instrumentation-dataloader@0.9.0': - resolution: {integrity: sha512-fiyCOAw+tlbneok1x7P5UseoGW5nS60CWWx7NXzYW+WOexpSmDQQW7olttGa8fqE6/sVCoi1l+QdfVoETZi/NQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-dns@0.36.1': - resolution: {integrity: sha512-NWRbQ7q0E3co/CNTWLZZvUzZoKhB1iTitY282IM8HDTXkA6VRssCfOcvaHw5ezOh23TJbAeYxmmpVj4hFvDPYQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-express@0.39.0': - resolution: {integrity: sha512-AG8U7z7D0JcBu/7dDcwb47UMEzj9/FMiJV2iQZqrsZnxR3FjB9J9oIH2iszJYci2eUdp2WbdvtpD9RV/zmME5A==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-express@0.41.1': resolution: {integrity: sha512-uRx0V3LPGzjn2bxAnV8eUsDT82vT7NTwI0ezEuPMBOTOsnPpGhWdhcdNdhH80sM4TrWrOfXm9HGEdfWE3TRIww==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-fastify@0.36.1': - resolution: {integrity: sha512-3Nfm43PI0I+3EX+1YbSy6xbDu276R1Dh1tqAk68yd4yirnIh52Kd5B+nJ8CgHA7o3UKakpBjj6vSzi5vNCzJIA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-fastify@0.38.0': resolution: {integrity: sha512-HBVLpTSYpkQZ87/Df3N0gAw7VzYZV3n28THIBrJWfuqw3Or7UqdhnjeuMIPQ04BKk3aZc0cWn2naSQObbh5vXw==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-fs@0.12.0': - resolution: {integrity: sha512-Waf+2hekJRxIwq1PmivxOWLdMOtYbY22hKr34gEtfbv2CArSv8FBJH4BmQxB9o5ZcwkdKu589qs009dbuSfNmQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-fs@0.14.0': resolution: {integrity: sha512-pVc8P5AgliC1DphyyBUgsxXlm2XaPH4BpYvt7rAZDMIqUpRk8gs19SioABtKqqxvFzg5jPtgJfJsdxq0Y+maLw==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-generic-pool@0.36.0': - resolution: {integrity: sha512-CExAEqJvK8jYxrhN8cl6EaGg57EGJi+qsSKouLC5lndXi68gZLOKbZIMZg4pF0kNfp/D4BFaGmA6Ap7d5WoPTw==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-graphql@0.40.0': - resolution: {integrity: sha512-LVRdEHWACWOczv2imD+mhUrLMxsEjPPi32vIZJT57zygR5aUiA4em8X3aiGOCycgbMWkIu8xOSGSxdx3JmzN+w==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-graphql@0.42.0': resolution: {integrity: sha512-N8SOwoKL9KQSX7z3gOaw5UaTeVQcfDO1c21csVHnmnmGUoqsXbArK2B8VuwPWcv6/BC/i3io+xTo7QGRZ/z28Q==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-grpc@0.51.1': - resolution: {integrity: sha512-coRTugFL7De/VNH/1NqPlxnfik87jS+jBXsny+Y/lMhXIA3x8t71IyL9ihuewkD+lNtIxIz6Y7Sq6kPuOqz5dQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-hapi@0.38.0': - resolution: {integrity: sha512-ZcOqEuwuutTDYIjhDIStix22ECblG/i9pHje23QGs4Q4YS4RMaZ5hKCoQJxW88Z4K7T53rQkdISmoXFKDV8xMg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-hapi@0.40.0': resolution: {integrity: sha512-8U/w7Ifumtd2bSN1OLaSwAAFhb9FyqWUki3lMMB0ds+1+HdSxYBe9aspEJEgvxAqOkrQnVniAPTEGf1pGM7SOw==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-http@0.51.1': - resolution: {integrity: sha512-6b3nZnFFEz/3xZ6w8bVxctPUWIPWiXuPQ725530JgxnN1cvYFd8CJ75PrHZNjynmzSSnqBkN3ef4R9N+RpMh8Q==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-http@0.52.1': resolution: {integrity: sha512-dG/aevWhaP+7OLv4BQQSEKMJv8GyeOp3Wxl31NHqE8xo9/fYMfEljiZphUHIfyg4gnZ9swMyWjfOQs5GUQe54Q==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-ioredis@0.40.0': - resolution: {integrity: sha512-Jv/fH7KhpWe4KBirsiqeUJIYrsdR2iu2l4nWhfOlRvaZ+zYIiLEzTQR6QhBbyRoAbU4OuYJzjWusOmmpGBnwng==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-ioredis@0.42.0': resolution: {integrity: sha512-P11H168EKvBB9TUSasNDOGJCSkpT44XgoM6d3gRIWAa9ghLpYhl0uRkS8//MqPzcJVHr3h3RmfXIpiYLjyIZTw==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-knex@0.36.1': - resolution: {integrity: sha512-6bEuiI+yMf3D0+ZWZE2AKmXhIhBvZ0brdO/0A8lUqeqeS+sS4fTcjA1F2CclsCNxYWEgcs8o3QyQqPceBeVRlg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-koa@0.40.0': - resolution: {integrity: sha512-dJc3H/bKMcgUYcQpLF+1IbmUKus0e5Fnn/+ru/3voIRHwMADT3rFSUcGLWSczkg68BCgz0vFWGDTvPtcWIFr7A==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-koa@0.42.0': resolution: {integrity: sha512-H1BEmnMhho8o8HuNRq5zEI4+SIHDIglNB7BPKohZyWG4fWNuR7yM4GTlR01Syq21vODAS7z5omblScJD/eZdKw==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-lru-memoizer@0.37.0': - resolution: {integrity: sha512-dHLrn55qVWsHJQYdForPWPUWDk2HZ2jjzkT+WoQSqpYT1j4HxfoiLfBTF+I3EbEYFAJnDRmRAUfA6nU5GPdCLQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-memcached@0.36.0': - resolution: {integrity: sha512-5efkT8ZfN8il5z+yfKYFGm2YR3mhlhaJoGfNOAylKE/6tUH3WDTTWaP7nrURtWGc+fuvDktcEch18Se8qsGS7w==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-mongodb@0.43.0': - resolution: {integrity: sha512-bMKej7Y76QVUD3l55Q9YqizXybHUzF3pujsBFjqbZrRn2WYqtsDtTUlbCK7fvXNPwFInqZ2KhnTqd0gwo8MzaQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mongodb@0.46.0': resolution: {integrity: sha512-VF/MicZ5UOBiXrqBslzwxhN7TVqzu1/LN/QDpkskqM0Zm0aZ4CVRbUygL8d7lrjLn15x5kGIe8VsSphMfPJzlA==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mongoose@0.38.1': - resolution: {integrity: sha512-zaeiasdnRjXe6VhYCBMdkmAVh1S5MmXC/0spet+yqoaViGnYst/DOxPvhwg3yT4Yag5crZNWsVXnA538UjP6Ow==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mongoose@0.40.0': resolution: {integrity: sha512-niRi5ZUnkgzRhIGMOozTyoZIvJKNJyhijQI4nF4iFSb+FUx2v5fngfR+8XLmdQAO7xmsD8E5vEGdDVYVtKbZew==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mysql2@0.38.1': - resolution: {integrity: sha512-qkpHMgWSDTYVB1vlZ9sspf7l2wdS5DDq/rbIepDwX5BA0N0068JTQqh0CgAh34tdFqSCnWXIhcyOXC2TtRb0sg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mysql2@0.40.0': resolution: {integrity: sha512-0xfS1xcqUmY7WE1uWjlmI67Xg3QsSUlNT+AcXHeA4BDUPwZtWqF4ezIwLgpVZfHOnkAEheqGfNSWd1PIu3Wnfg==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mysql@0.38.1': - resolution: {integrity: sha512-+iBAawUaTfX/HAlvySwozx0C2B6LBfNPXX1W8Z2On1Uva33AGkw2UjL9XgIg1Pj4eLZ9R4EoJ/aFz+Xj4E/7Fw==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-mysql@0.40.0': resolution: {integrity: sha512-d7ja8yizsOCNMYIJt5PH/fKZXjb/mS48zLROO4BzZTtDfhNCl2UM/9VIomP2qkGIFVouSJrGr/T00EzY7bPtKA==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-nestjs-core@0.37.1': - resolution: {integrity: sha512-ebYQjHZEmGHWEALwwDGhSQVLBaurFnuLIkZD5igPXrt7ohfF4lc5/4al1LO+vKc0NHk8SJWStuRueT86ISA8Vg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-nestjs-core@0.39.0': resolution: {integrity: sha512-mewVhEXdikyvIZoMIUry8eb8l3HUjuQjSjVbmLVTt4NQi35tkpnHQrG9bTRBrl3403LoWZ2njMPJyg4l6HfKvA==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-net@0.36.0': - resolution: {integrity: sha512-rZlbSgwAJys8lpug+xIeAdO98ypYMAPVqrHqc4AHuUl5S4MULHEcjGLMZLoE/guEGO4xAQ5XUezpRFGM1SAnsg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-pg@0.41.0': - resolution: {integrity: sha512-BSlhpivzBD77meQNZY9fS4aKgydA8AJBzv2dqvxXFy/Hq64b7HURgw/ztbmwFeYwdF5raZZUifiiNSMLpOJoSA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-pg@0.43.0': resolution: {integrity: sha512-og23KLyoxdnAeFs1UWqzSonuCkePUzCX30keSYigIzJe/6WSYA8rnEI5lobcxPEzg+GcU06J7jzokuEHbjVJNw==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-pino@0.39.0': - resolution: {integrity: sha512-uA17F2iP77o3NculB63QD2zv3jkJ093Gfb0GxHLEqTIqpYs1ToJ53ybWwjJwqFByxk7GrliaxaxVtWC23PKzBg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-redis-4@0.39.0': - resolution: {integrity: sha512-Zpfqfi83KeKgVQ0C2083GZPon3ZPYQ5E59v9FAbhubtOoUb9Rh7n111YD8FPW3sgx6JKp1odXmBmfQhWCaTOpQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-redis-4@0.41.0': resolution: {integrity: sha512-H7IfGTqW2reLXqput4yzAe8YpDC0fmVNal95GHMLOrS89W+qWUKIqxolSh63hJyfmwPSFwXASzj7wpSk8Az+Dg==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation-redis@0.39.1': - resolution: {integrity: sha512-HUjTerD84jRJnSyDrRPqn6xQ7K91o9qLflRPZqzRvq0GRj5PMfc6TJ/z3q/ayWy/2Kzffhrp7HCIVp0u0TkgUg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-restify@0.38.0': - resolution: {integrity: sha512-VYK47Z9GBaZX5MQLL7kZDdzQDdyUtHRD4J/GSr6kdwmIpdpUQXLsV3EnboeB8P+BlpucF57FyJKE8yWTOEMfnA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-router@0.37.0': - resolution: {integrity: sha512-+OPcm7C9I5oPqnpStE+1WkdPWjRx0k5XKratxQmIDFZrmhRcqvMte3vrrzE/OBPg9iqh2tKrSe0y7+0sRfTJyQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-runtime-node@0.4.0': - resolution: {integrity: sha512-/NOgUF5gf3T5c3GMyy6fnQxaVzbOf9j2xcetgymIIX2HSN3Gk7o64G7KDvwHwhaa20ZiF0QDLb3m4AT+tn9eRg==} - engines: {node: '>=14.10.0'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-socket.io@0.39.0': - resolution: {integrity: sha512-4J2ehk5mJyDT6j2yJCOuPxAjit5QB1Fwzhx0LID5jjvhI9LxzZIGDNAPTTHyghSiaRDeNMzceXKkkEQJkg2MNw==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-tedious@0.10.1': - resolution: {integrity: sha512-maSXMxgS0szU52khQzAROV4nWr+3M8mZajMQOc3/7tYjo+Q3HlWAowOuagPvp4pwROK4x6oDaFYlY+ZSj1qjYA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - - '@opentelemetry/instrumentation-undici@0.2.0': - resolution: {integrity: sha512-RH9WdVRtpnyp8kvya2RYqKsJouPxvHl7jKPsIfrbL8u2QCKloAGi0uEqDHoOS15ZRYPQTDXZ7d8jSpUgSQmvpA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.7.0 - - '@opentelemetry/instrumentation-winston@0.37.0': - resolution: {integrity: sha512-vOx55fxdNjo2XojJf8JN4jP7VVvQCh7UQzzQ2Q2FpGJpt8Z3EErKaY8xOBkOuJH0TtL/Q72rmIn9c+mRG46BxA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation@0.46.0': resolution: {integrity: sha512-a9TijXZZbk0vI5TGLZl+0kxyFfrXHhX6Svtz7Pp2/VBlCSKrazuULEyoJQrOknJyFWNMEmbbJgOciHCCpQcisw==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation@0.51.1': - resolution: {integrity: sha512-JIrvhpgqY6437QIqToyozrUG1h5UhwHkaGK/WAX+fkrpyPtc+RO5FkRtUd9BH0MibabHHvqsnBGKfKVijbmp8w==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.3.0 - '@opentelemetry/instrumentation@0.52.1': resolution: {integrity: sha512-uXJbYU/5/MBHjMp1FqrILLRuiJCs3Ofk0MeRDk8g1S1gD47U8X3JnSwcMO1rtRo1x1a7zKaQHaoYu49p/4eSKw==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': ^1.3.0 - '@opentelemetry/otlp-exporter-base@0.51.1': - resolution: {integrity: sha512-UYlnOYyDdzo1Gw559EHCzru0RwhvuXCwoH8jGo9J4gO1TE58GjnEmIjomMsKBCym3qWNJfIQXw+9SZCV0DdQNg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/otlp-grpc-exporter-base@0.51.1': - resolution: {integrity: sha512-ZAS+4pq8o7dsugGTwV9s6JMKSxi+guIHdn0acOv0bqj26e9pWDFx5Ky+bI0aY46uR9Y0JyXqY+KAEYM/SO3DFA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/otlp-proto-exporter-base@0.51.1': - resolution: {integrity: sha512-gxxxwfk0inDMb5DLeuxQ3L8TtptxSiTNHE4nnAJH34IQXAVRhXSXW1rK8PmDKDngRPIZ6J7ncUCjjIn8b+AgqQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/otlp-transformer@0.51.1': - resolution: {integrity: sha512-OppYOXwV9LQqqtYUCywqoOqX/JT9LQ5/FMuPZ//eTkvuHdUC4ZMwz2c6uSoT2R90GWvvGnF1iEqTGyTT3xAt2Q==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.3.0 <1.9.0' - - '@opentelemetry/propagation-utils@0.30.12': - resolution: {integrity: sha512-bgab3q/4dYUutUpQCEaSDa+mLoQJG3vJKeSiGuhM4iZaSpkz8ov0fs1MGil5PfxCo6Hhw3bB3bFYhUtnsfT/Pg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/propagator-aws-xray@1.26.0': - resolution: {integrity: sha512-Sex+JyEZ/xX328TArBqQjh1NZSfNyw5NdASUIi9hnPsnMBMSBaDe7B9JRnXv0swz7niNyAnXa6MY7yOCV76EvA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.0.0 <1.10.0' - - '@opentelemetry/propagator-b3@1.24.1': - resolution: {integrity: sha512-nda97ZwhpZKyUJTXqQuKzNhPMUgMLunbbGWn8kroBwegn+nh6OhtyGkrVQsQLNdVKJl0KeB5z0ZgeWszrYhwFw==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.0.0 <1.9.0' - - '@opentelemetry/propagator-jaeger@1.24.1': - resolution: {integrity: sha512-7bRBJn3FG1l195A1m+xXRHvgzAOBsfmRi9uZ5Da18oTh7BLmNDiA8+kpk51FpTsU1PCikPVpRDNPhKVB6lyzZg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.0.0 <1.9.0' - '@opentelemetry/redis-common@0.36.2': resolution: {integrity: sha512-faYX1N0gpLhej/6nyp6bgRjzAKXn5GOEMYY7YhciSfCoITAktLUtQ36d24QEWNA1/WA1y6qQunCe0OhHRkVl9g==} engines: {node: '>=14'} - '@opentelemetry/resource-detector-alibaba-cloud@0.28.10': - resolution: {integrity: sha512-TZv/1Y2QCL6sJ+X9SsPPBXe4786bc/Qsw0hQXFsNTbJzDTGGUmOAlSZ2qPiuqAd4ZheUYfD+QA20IvAjUz9Hhg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/resource-detector-aws@1.7.0': - resolution: {integrity: sha512-VxrwUi/9QcVIV+40d/jOKQthfD/E4/ppQ9FsYpDH7qy16cOO5519QOdihCQJYpVNbgDqf6q3hVrCy1f8UuG8YA==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/resource-detector-azure@0.2.12': - resolution: {integrity: sha512-iIarQu6MiCjEEp8dOzmBvCSlRITPFTinFB2oNKAjU6xhx8d7eUcjNOKhBGQTvuCriZrxrEvDaEEY9NfrPQ6uYQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/resource-detector-container@0.3.11': - resolution: {integrity: sha512-22ndMDakxX+nuhAYwqsciexV8/w26JozRUV0FN9kJiqSWtA1b5dCVtlp3J6JivG5t8kDN9UF5efatNnVbqRT9Q==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/resource-detector-gcp@0.29.13': - resolution: {integrity: sha512-vdotx+l3Q+89PeyXMgKEGnZ/CwzwMtuMi/ddgD9/5tKZ08DfDGB2Npz9m2oXPHRCjc4Ro6ifMqFlRyzIvgOjhg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': ^1.0.0 - - '@opentelemetry/resources@1.24.1': - resolution: {integrity: sha512-cyv0MwAaPF7O86x5hk3NNgenMObeejZFLJJDVuSeSMIsknlsj3oOZzRv3qSzlwYomXsICfBeFFlxwHQte5mGXQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.0.0 <1.9.0' - '@opentelemetry/resources@1.25.1': resolution: {integrity: sha512-pkZT+iFYIZsVn6+GzM0kSX+u3MSLCY9md+lIJOoKl/P+gJFfxJte/60Usdp8Ce4rOs8GduUpSPNe1ddGyDT1sQ==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': '>=1.0.0 <1.10.0' - '@opentelemetry/sdk-logs@0.51.1': - resolution: {integrity: sha512-ULQQtl82b673PpZc5/0EtH4V+BrwVOgKJZEB7tYZnGTG3I98tQVk89S9/JSixomDr++F4ih+LSJTCqIKBz+MQQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.4.0 <1.9.0' - '@opentelemetry/api-logs': '>=0.39.1' - - '@opentelemetry/sdk-metrics@1.24.1': - resolution: {integrity: sha512-FrAqCbbGao9iKI+Mgh+OsC9+U2YMoXnlDHe06yH7dvavCKzE3S892dGtX54+WhSFVxHR/TMRVJiK/CV93GR0TQ==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.3.0 <1.9.0' - '@opentelemetry/sdk-metrics@1.25.1': resolution: {integrity: sha512-9Mb7q5ioFL4E4dDrc4wC/A3NTHDat44v4I3p2pLPSxRvqUbDIQyMVr9uK+EU69+HWhlET1VaSrRzwdckWqY15Q==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': '>=1.3.0 <1.10.0' - '@opentelemetry/sdk-node@0.51.1': - resolution: {integrity: sha512-GgmNF9C+6esr8PIJxCqHw84rEOkYm6XdFWZ2+Wyc3qaUt92ACoN7uSw5iKNvaUq62W0xii1wsGxwHzyENtPP8w==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.3.0 <1.9.0' - - '@opentelemetry/sdk-trace-base@1.24.1': - resolution: {integrity: sha512-zz+N423IcySgjihl2NfjBf0qw1RWe11XIAWVrTNOSSI6dtSPJiVom2zipFB2AEEtJWpv0Iz6DY6+TjnyTV5pWg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.0.0 <1.9.0' - '@opentelemetry/sdk-trace-base@1.25.1': resolution: {integrity: sha512-C8k4hnEbc5FamuZQ92nTOp8X/diCY56XUTnMiv9UTuJitCzaNNHAVsdm5+HLCdI8SLQsLWIrG38tddMxLVoftw==} engines: {node: '>=14'} peerDependencies: '@opentelemetry/api': '>=1.0.0 <1.10.0' - '@opentelemetry/sdk-trace-node@1.24.1': - resolution: {integrity: sha512-/FZX8uWaGIAwsDhqI8VvQ+qWtfMNlXjaFYGc+vmxgdRFppCSSIRwrPyIhJO1qx61okyYhoyxVEZAfoiNxrfJCg==} - engines: {node: '>=14'} - peerDependencies: - '@opentelemetry/api': '>=1.0.0 <1.9.0' - - '@opentelemetry/semantic-conventions@1.24.1': - resolution: {integrity: sha512-VkliWlS4/+GHLLW7J/rVBA00uXus1SWvwFvcUDxDwmFxYfg/2VI6ekwdXS28cjI8Qz2ky2BzG8OUHo+WeYIWqw==} - engines: {node: '>=14'} - '@opentelemetry/semantic-conventions@1.25.1': resolution: {integrity: sha512-ZDjMJJQRlyk8A1KZFCc+bCbsyrn1wTwdNt56F7twdfUfnHUZUq77/WfONCj8p72NZOyP7pNTdUWSTYC3GTbuuQ==} engines: {node: '>=14'} - '@opentelemetry/semantic-conventions@1.27.0': - resolution: {integrity: sha512-sAay1RrB+ONOem0OZanAR1ZI/k7yDpnOQSQmTMuGImUQb2y8EbSaCJ94FQluM74xoU03vlb2d2U90hZluL6nQg==} - engines: {node: '>=14'} - '@opentelemetry/sql-common@0.40.1': resolution: {integrity: sha512-nSDlnHSqzC3pXn/wZEZVLuAuJ1MYMXPBwtv2qAbCa3847SaHItdE7SzUq/Jtb0KZmh1zfAbNi3AAMjztTT4Ugg==} engines: {node: '>=14'} @@ -1488,36 +1007,6 @@ packages: '@prisma/instrumentation@5.17.0': resolution: {integrity: sha512-c1Sle4ji8aasMcYfBBHFM56We4ljfenVtRmS8aY06BllS7SoU6SmJBwG7vil+GHiR0Yrh+t9iBwt4AY0Jr4KNQ==} - '@protobufjs/aspromise@1.1.2': - resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==} - - '@protobufjs/base64@1.1.2': - resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==} - - '@protobufjs/codegen@2.0.4': - resolution: {integrity: sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==} - - '@protobufjs/eventemitter@1.1.0': - resolution: {integrity: sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==} - - '@protobufjs/fetch@1.1.0': - resolution: {integrity: sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==} - - '@protobufjs/float@1.0.2': - resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==} - - '@protobufjs/inquire@1.1.0': - resolution: {integrity: sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==} - - '@protobufjs/path@1.1.2': - resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==} - - '@protobufjs/pool@1.1.0': - resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==} - - '@protobufjs/utf8@1.1.0': - resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==} - '@puppeteer/browsers@2.2.3': resolution: {integrity: sha512-bJ0UBsk0ESOs6RFcLXOt99a3yTDcOKlzfjad+rhFwdaG1Lu/Wzq58GHYCDTlZ9z6mldf4g+NTb+TXEfe0PpnsQ==} engines: {node: '>=18'} @@ -1993,12 +1482,6 @@ packages: '@tsconfig/recommended@1.0.6': resolution: {integrity: sha512-0IKu9GHYF1NGTJiYgfWwqnOQSlnE9V9R7YohHNNf0/fj/SyOZWzdd06JFr0fLpg1Mqw0kGbYg8w5xdkSqLKM9g==} - '@types/accepts@1.3.7': - resolution: {integrity: sha512-Pay9fq2lM2wXPWbteBsRAGiWH2hig4ZE2asK+mm7kUzlxRTfL961rj89I6zV/E3PcIkDqyuBEcMxFT7rccugeQ==} - - '@types/aws-lambda@8.10.122': - resolution: {integrity: sha512-vBkIh9AY22kVOCEKo5CJlyCgmSWvasC+SWUxL/x/vOwRobMpI/HG1xp/Ae3AqmSiZeLUbOhW0FCD3ZjqqUxmXw==} - '@types/babel__core@7.20.5': resolution: {integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==} @@ -2014,21 +1497,12 @@ packages: '@types/body-parser@1.19.5': resolution: {integrity: sha512-fB3Zu92ucau0iQ0JMCFQE7b/dv8Ot07NI3KaZIkIUNXq82k4eBAqUaneXfleGY9JWskeS9y+u0nXMyspcuQrCg==} - '@types/bunyan@1.8.9': - resolution: {integrity: sha512-ZqS9JGpBxVOvsawzmVt30sP++gSQMTejCkIAQ3VdadOcRE8izTyW66hufvwLeH+YEGP6Js2AW7Gz+RMyvrEbmw==} - '@types/connect@3.4.36': resolution: {integrity: sha512-P63Zd/JUGq+PdrM1lv0Wv5SBYeA2+CORvbrXbngriYY0jzLUWfQMQQxOhjONEz/wlHOAxOdY7CY65rgQdTjq2w==} '@types/connect@3.4.38': resolution: {integrity: sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==} - '@types/content-disposition@0.5.8': - resolution: {integrity: sha512-QVSSvno3dE0MgO76pJhmv4Qyi/j0Yk9pBp0Y7TJ2Tlj+KCgJWY6qX7nnxCOLkZ3VYRSIk1WTxCvwUSdx6CCLdg==} - - '@types/cookies@0.9.0': - resolution: {integrity: sha512-40Zk8qR147RABiQ7NQnBzWzDcjKzNrntB5BAmeGCb2p/MIyOE+4BVvc17wumsUqUw00bJYqoXFHYygQnEFh4/Q==} - '@types/cors@2.8.17': resolution: {integrity: sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA==} @@ -2047,9 +1521,6 @@ packages: '@types/graceful-fs@4.1.9': resolution: {integrity: sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==} - '@types/http-assert@1.5.6': - resolution: {integrity: sha512-TTEwmtjgVbYAzZYWyeHPrrtWnfVkm8tQkP8P21uQifPgMRgjrow3XDEYqucuC8SKZJT7pUnhU/JymvjggxO9vw==} - '@types/http-errors@2.0.4': resolution: {integrity: sha512-D0CFMMtydbJAegzOyHjtiKPLlvnm3iTZyZRSZoLq2mRhDdmLfIWOCYPfQJ4cu2erKghU++QvjcUjp/5h7hESpA==} @@ -2065,21 +1536,6 @@ packages: '@types/jest@29.5.12': resolution: {integrity: sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==} - '@types/keygrip@1.0.6': - resolution: {integrity: sha512-lZuNAY9xeJt7Bx4t4dx0rYCDqGPW8RXhQZK1td7d4H6E9zYbLoOtjBvfwdTKpsyxQI/2jv+armjX/RW+ZNpXOQ==} - - '@types/koa-compose@3.2.8': - resolution: {integrity: sha512-4Olc63RY+MKvxMwVknCUDhRQX1pFQoBZ/lXcRLP69PQkEpze/0cr8LNqJQe5NFb/b19DWi2a5bTi2VAlQzhJuA==} - - '@types/koa@2.14.0': - resolution: {integrity: sha512-DTDUyznHGNHAl+wd1n0z1jxNajduyTh8R53xoewuerdBzGo6Ogj6F2299BFtrexJw4NtgjsI5SMPCmV9gZwGXA==} - - '@types/koa__router@12.0.3': - resolution: {integrity: sha512-5YUJVv6NwM1z7m6FuYpKfNLTZ932Z6EF6xy2BbtpJSyn13DKNQEkXVffFVSnJHxvwwWh2SAeumpjAYUELqgjyw==} - - '@types/memcached@2.2.10': - resolution: {integrity: sha512-AM9smvZN55Gzs2wRrqeMHVP7KE8KWgCJO/XL5yCly2xF6EKa4YlbpK+cLSAH4NG/Ah64HrlegmGqW8kYws7Vxg==} - '@types/mime@1.3.5': resolution: {integrity: sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w==} @@ -2125,15 +1581,9 @@ packages: '@types/shimmer@1.0.5': resolution: {integrity: sha512-9Hp0ObzwwO57DpLFF0InUjUm/II8GmKAvzbefxQTihCb7KI6yc9yzf0nLc4mVdby5N4DRCgQM2wCup9KTieeww==} - '@types/stack-trace@0.0.29': - resolution: {integrity: sha512-TgfOX+mGY/NyNxJLIbDWrO9DjGoVSW9+aB8H2yy1fy32jsvxijhmyJI9fDFgvz3YP4lvJaq9DzdR/M1bOgVc9g==} - '@types/stack-utils@2.0.3': resolution: {integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==} - '@types/tedious@4.0.14': - resolution: {integrity: sha512-KHPsfX/FoVbUGbyYvk1q9MMQHLPeRZhRJZdO45Q4YjvFkv4hMNghCWTvy7rdKessBsmtz4euWCWAB6/tVpI1Iw==} - '@types/triple-beam@1.3.5': resolution: {integrity: sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw==} @@ -2360,9 +1810,6 @@ packages: resolution: {integrity: sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==} engines: {node: '>=10.0.0'} - bignumber.js@9.1.2: - resolution: {integrity: sha512-2/mKyZH9K85bzOEfhXDBFZTGd1CTs+5IHpeFQo9luiBG7hghdC851Pj2WAhb6E3R6b9tZj/XKhbg4fum+Kepug==} - bin-links@4.0.4: resolution: {integrity: sha512-cMtq4W5ZsEwcutJrVId+a/tjt8GSbS+h0oNkdl6+6rBuEv8Ot33Bevj5KPm40t309zuhVic8NjpuL42QCiJWWA==} engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0} @@ -2374,9 +1821,6 @@ packages: binary-search@1.3.6: resolution: {integrity: sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA==} - bl@4.1.0: - resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==} - bluebird@3.4.7: resolution: {integrity: sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==} @@ -2507,22 +1951,10 @@ packages: cjs-module-lexer@1.3.1: resolution: {integrity: sha512-a3KdPAANPbNE4ZUv9h6LckSl9zLsYOP4MBmhIPkRaeyybt+r4UghLvq+xw/YwUcC1gqylCkL4rdVs3Lwupjm4Q==} - cli-cursor@3.1.0: - resolution: {integrity: sha512-I/zHAwsKf9FqGoXM4WWRACob9+SNukZTd94DWF57E4toouRulbCxcUh6RKUEOQlYTHJnzkPMySvPNaaSLNfLZw==} - engines: {node: '>=8'} - - cli-spinners@2.9.2: - resolution: {integrity: sha512-ywqV+5MmyL4E7ybXgKys4DugZbX0FC6LnwrhjuykIjnK9k8OQacQ7axGKnjDXWNhns0xot3bZI5h55H8yo9cJg==} - engines: {node: '>=6'} - cliui@8.0.1: resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==} engines: {node: '>=12'} - clone@1.0.4: - resolution: {integrity: sha512-JQHZ2QMW6l3aH/j6xCqQThY/9OH4D/9ls34cgkUBiEeocRTU04tHfKPBsUK1PqZCUQM7GiA0IIXJSuXHI64Kbg==} - engines: {node: '>=0.8'} - cluster-key-slot@1.1.2: resolution: {integrity: sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==} engines: {node: '>=0.10.0'} @@ -2724,17 +2156,10 @@ packages: resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} engines: {node: '>=0.10.0'} - defaults@1.0.4: - resolution: {integrity: sha512-eFuaLoy/Rxalv2kr+lqMlUnrDWV+3j4pljOIJgLIhI058IQfWJ7vXhyEIHu+HtC738klGALYxOKDO0bQP3tg8A==} - define-data-property@1.1.4: resolution: {integrity: sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==} engines: {node: '>= 0.4'} - define-lazy-prop@2.0.0: - resolution: {integrity: sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==} - engines: {node: '>=8'} - degenerator@5.0.1: resolution: {integrity: sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==} engines: {node: '>= 14'} @@ -2946,9 +2371,6 @@ packages: resolution: {integrity: sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==} engines: {node: '>= 0.10.0'} - extend@3.0.2: - resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==} - extract-zip@2.0.1: resolution: {integrity: sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==} engines: {node: '>= 10.17.0'} @@ -3070,14 +2492,6 @@ packages: function-bind@1.1.2: resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} - gaxios@6.7.1: - resolution: {integrity: sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==} - engines: {node: '>=14'} - - gcp-metadata@6.1.0: - resolution: {integrity: sha512-Jh/AIwwgaxan+7ZUUmRLCjtchyDiqh4KjBJ5tW3plBZb5iL/BPcso8A5DlzeD9qlw0duCamnNdpFjxwaT0KyKg==} - engines: {node: '>=14'} - generic-pool@3.9.0: resolution: {integrity: sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g==} engines: {node: '>= 4'} @@ -3250,9 +2664,6 @@ packages: import-in-the-middle@1.7.1: resolution: {integrity: sha512-1LrZPDtW+atAxH42S6288qyDFNQ2YCty+2mxEPRtfazH6Z5QwkaBSTS2ods7hnVJioF6rkRfNoA6A/MstpFXLg==} - import-in-the-middle@1.7.4: - resolution: {integrity: sha512-Lk+qzWmiQuRPPulGQeK5qq0v32k2bHnWrRPFgqyvhw7Kkov5L6MOLOIU3pcWeujc9W4q54Cp3Q2WV16eQkc7Bg==} - import-local@3.1.0: resolution: {integrity: sha512-ASB07uLtnDs1o6EHjKpX34BKYDSqnFerfTOJL2HvMqF70LnxpjkzDB8J44oT9pu4AMPkQwf8jl6szgvNd2tRIg==} engines: {node: '>=8'} @@ -3303,11 +2714,6 @@ packages: is-core-module@2.13.1: resolution: {integrity: sha512-hHrIjvZsftOsvKSn2TRYl63zvxsgE0K+0mYMoH6gD4omR5IWB2KynivBQczo3+wF1cCkjzvptnI9Q0sPU66ilw==} - is-docker@2.2.1: - resolution: {integrity: sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==} - engines: {node: '>=8'} - hasBin: true - is-extglob@2.1.1: resolution: {integrity: sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==} engines: {node: '>=0.10.0'} @@ -3324,10 +2730,6 @@ packages: resolution: {integrity: sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==} engines: {node: '>=0.10.0'} - is-interactive@1.0.0: - resolution: {integrity: sha512-2HvIEKRoqS62guEC+qBjpvRubdX910WCMuJTZ+I9yvqKU2/12eSL549HMwtabb4oupdj2sMP50k+XJfB/8JE6w==} - engines: {node: '>=8'} - is-number@7.0.0: resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==} engines: {node: '>=0.12.0'} @@ -3344,14 +2746,6 @@ packages: resolution: {integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==} engines: {node: '>=8'} - is-unicode-supported@0.1.0: - resolution: {integrity: sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==} - engines: {node: '>=10'} - - is-wsl@2.2.0: - resolution: {integrity: sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==} - engines: {node: '>=8'} - isarray@1.0.0: resolution: {integrity: sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==} @@ -3563,9 +2957,6 @@ packages: engines: {node: '>=4'} hasBin: true - json-bigint@1.0.0: - resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==} - json-parse-even-better-errors@2.3.1: resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==} @@ -3576,9 +2967,6 @@ packages: json-schema-traverse@1.0.0: resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==} - json-stringify-safe@5.0.1: - resolution: {integrity: sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==} - json5@2.2.3: resolution: {integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==} engines: {node: '>=6'} @@ -3806,24 +3194,12 @@ packages: resolution: {integrity: sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==} engines: {node: '>=8'} - lodash.camelcase@4.3.0: - resolution: {integrity: sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==} - lodash.defaults@4.2.0: resolution: {integrity: sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==} lodash.isarguments@3.1.0: resolution: {integrity: sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg==} - lodash.isobject@3.0.2: - resolution: {integrity: sha512-3/Qptq2vr7WeJbB4KHUSKlq8Pl7ASXi3UG6CMbBm8WRtXi8+GHm7mKaU3urfpSEzWe2wCIChs6/sdocUsTKJiA==} - - lodash.isplainobject@4.0.6: - resolution: {integrity: sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==} - - lodash.isstring@4.0.1: - resolution: {integrity: sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==} - lodash.memoize@4.1.2: resolution: {integrity: sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==} @@ -3836,10 +3212,6 @@ packages: lodash@4.17.21: resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==} - log-symbols@4.1.0: - resolution: {integrity: sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==} - engines: {node: '>=10'} - logform@2.6.0: resolution: {integrity: sha512-1ulHeNPp6k/LD8H91o7VYFBng5i1BDE7HoKxVbZiGFidS1Rj65qcywLxX+pVfAPoQJEjRdvKcusKwOupHCVOVQ==} engines: {node: '>= 12.0.0'} @@ -3855,9 +3227,6 @@ packages: logsnag@1.0.0: resolution: {integrity: sha512-HMzjh75OR5EVY7Be4Rw8TcDTIY5UPsrXF1HvQ6EzDi21x5cQcDzi4Ts0Y/ruPCbxKY2KG17YjeeTzErXFewFBg==} - long@5.2.3: - resolution: {integrity: sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==} - loose-envify@1.4.0: resolution: {integrity: sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==} hasBin: true @@ -4180,10 +3549,6 @@ packages: resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} engines: {node: '>=6'} - open@8.4.2: - resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==} - engines: {node: '>=12'} - openai@3.3.0: resolution: {integrity: sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==} @@ -4212,10 +3577,6 @@ packages: option@0.2.4: resolution: {integrity: sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==} - ora@5.4.1: - resolution: {integrity: sha512-5b6Y85tPxZZ7QytO+BQzysW31HJku27cRIlkbAXaNx+BdcVi+LlRFmVXzeF6a7JCwJpyw5c4b+YSVImQIrBpuQ==} - engines: {node: '>=10'} - p-finally@1.0.0: resolution: {integrity: sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==} engines: {node: '>=4'} @@ -4359,9 +3720,6 @@ packages: resolution: {integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==} engines: {node: '>=8.6'} - pino-abstract-transport@1.2.0: - resolution: {integrity: sha512-Guhh8EZfPCfH+PMXAb6rKOjGQEoy0xlAIn+irODG5kgfYV+BQ0rGYYWTIel3P5mmyXqkYkPmdIkywsn6QKUR1Q==} - pirates@4.0.6: resolution: {integrity: sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==} engines: {node: '>= 6'} @@ -4426,10 +3784,6 @@ packages: proto-list@1.2.4: resolution: {integrity: sha512-vtK/94akxsTMhe0/cbfpR+syPuszcuwhqVjJq26CuNDgFGj682oRBXOP5MJpv2r7JtE8MsiepGIqvvOTBwn2vA==} - protobufjs@7.4.0: - resolution: {integrity: sha512-mRUWCc3KUU4w1jU8sGxICXH/gNS94DvI1gxqDvBzhj1JpcsimQkYiOJfwsPUykUI5ZaspFbSgmBLER8IrQ3tqw==} - engines: {node: '>=12.0.0'} - proxy-addr@2.0.7: resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==} engines: {node: '>= 0.10'} @@ -4583,10 +3937,6 @@ packages: resolution: {integrity: sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==} hasBin: true - restore-cursor@3.1.0: - resolution: {integrity: sha512-l+sSefzHpj5qimhFSE5a8nufZYAM3sBSVMAPtYkmC+4EH2anSGaEMXSD0izRQbu9nfyQ9y5JrVmp7E8oZrUjvA==} - engines: {node: '>=8'} - retry@0.13.1: resolution: {integrity: sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==} engines: {node: '>= 4'} @@ -4657,10 +4007,6 @@ packages: resolution: {integrity: sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==} engines: {node: '>= 0.8.0'} - serialize-error@8.1.0: - resolution: {integrity: sha512-3NnuWfM6vBYoy5gZFvHiYsVbafvI9vZv/+jlIigFn4oP4zjNPK3LhcY0xSCgeb1a5L8jO71Mit9LlNoi2UfDDQ==} - engines: {node: '>=10'} - serve-static@1.15.0: resolution: {integrity: sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==} engines: {node: '>= 0.8.0'} @@ -4962,10 +4308,6 @@ packages: resolution: {integrity: sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==} engines: {node: '>=4'} - type-fest@0.20.2: - resolution: {integrity: sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==} - engines: {node: '>=10'} - type-fest@0.21.3: resolution: {integrity: sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==} engines: {node: '>=10'} @@ -5072,9 +4414,6 @@ packages: walker@1.0.8: resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==} - wcwidth@1.0.1: - resolution: {integrity: sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg==} - web-streams-polyfill@3.3.3: resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==} engines: {node: '>= 8'} @@ -6014,83 +5353,6 @@ snapshots: shell-quote: 1.8.1 yargs: 17.7.2 - '@grpc/grpc-js@1.12.2': - dependencies: - '@grpc/proto-loader': 0.7.13 - '@js-sdsl/ordered-map': 4.4.2 - - '@grpc/proto-loader@0.7.13': - dependencies: - lodash.camelcase: 4.3.0 - long: 5.2.3 - protobufjs: 7.4.0 - yargs: 17.7.2 - - '@hyperdx/instrumentation-exception@0.1.0(@opentelemetry/api@1.9.0)': - dependencies: - '@hyperdx/instrumentation-sentry-node': 0.1.0(@opentelemetry/api@1.9.0) - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@sentry/core': 8.26.0 - '@sentry/types': 8.26.0 - '@sentry/utils': 8.26.0 - json-stringify-safe: 5.0.1 - shimmer: 1.2.1 - tslib: 2.6.3 - transitivePeerDependencies: - - supports-color - - '@hyperdx/instrumentation-sentry-node@0.1.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - json-stringify-safe: 5.0.1 - shimmer: 1.2.1 - tslib: 2.6.3 - transitivePeerDependencies: - - supports-color - - '@hyperdx/node-opentelemetry@0.8.1': - dependencies: - '@hyperdx/instrumentation-exception': 0.1.0(@opentelemetry/api@1.9.0) - '@hyperdx/instrumentation-sentry-node': 0.1.0(@opentelemetry/api@1.9.0) - '@opentelemetry/api': 1.9.0 - '@opentelemetry/api-logs': 0.51.1 - '@opentelemetry/auto-instrumentations-node': 0.46.1(@opentelemetry/api@1.9.0) - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/exporter-logs-otlp-http': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/exporter-metrics-otlp-proto': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/exporter-trace-otlp-proto': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-http': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-runtime-node': 0.4.0(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-logs': 0.51.1(@opentelemetry/api-logs@0.51.1)(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-metrics': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-node': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - cli-spinners: 2.9.2 - json-stringify-safe: 5.0.1 - lodash.isobject: 3.0.2 - lodash.isplainobject: 4.0.6 - lodash.isstring: 4.0.1 - node-fetch: 2.7.0 - open: 8.4.2 - ora: 5.4.1 - pino-abstract-transport: 1.2.0 - semver: 7.6.2 - shimmer: 1.2.1 - tslib: 2.6.3 - winston-transport: 4.8.0 - transitivePeerDependencies: - - encoding - - supports-color - '@ioredis/commands@1.2.0': {} '@isaacs/cliui@8.0.2': @@ -6300,8 +5562,6 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.4.15 - '@js-sdsl/ordered-map@4.4.2': {} - '@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': dependencies: ansi-styles: 5.2.0 @@ -6339,39 +5599,12 @@ snapshots: - langchain - openai - '@logtail/core@0.4.21': - dependencies: - '@logtail/tools': 0.4.21 - '@logtail/types': 0.4.20 - serialize-error: 8.1.0 - - '@logtail/node@0.4.21': - dependencies: - '@logtail/core': 0.4.21 - '@logtail/types': 0.4.20 - '@msgpack/msgpack': 2.8.0 - '@types/stack-trace': 0.0.29 - cross-fetch: 3.1.8 - minimatch: 3.1.2 - serialize-error: 8.1.0 - stack-trace: 0.0.10 - transitivePeerDependencies: - - encoding - - '@logtail/tools@0.4.21': - dependencies: - '@logtail/types': 0.4.20 - - '@logtail/types@0.4.20': {} - '@mixmark-io/domino@2.2.0': {} '@mongodb-js/saslprep@1.1.7': dependencies: sparse-bitfield: 3.0.3 - '@msgpack/msgpack@2.8.0': {} - '@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3': optional: true @@ -6398,215 +5631,21 @@ snapshots: '@one-ini/wasm@0.1.1': {} - '@opentelemetry/api-logs@0.51.1': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/api-logs@0.52.1': dependencies: '@opentelemetry/api': 1.9.0 '@opentelemetry/api@1.9.0': {} - '@opentelemetry/auto-instrumentations-node@0.46.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-amqplib': 0.37.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-aws-lambda': 0.41.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-aws-sdk': 0.41.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-bunyan': 0.38.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-cassandra-driver': 0.38.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-connect': 0.36.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-cucumber': 0.6.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-dataloader': 0.9.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-dns': 0.36.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-express': 0.39.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-fastify': 0.36.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-fs': 0.12.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-generic-pool': 0.36.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-graphql': 0.40.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-grpc': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-hapi': 0.38.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-http': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-ioredis': 0.40.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-knex': 0.36.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-koa': 0.40.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-lru-memoizer': 0.37.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-memcached': 0.36.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-mongodb': 0.43.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-mongoose': 0.38.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-mysql': 0.38.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-mysql2': 0.38.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-nestjs-core': 0.37.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-net': 0.36.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-pg': 0.41.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-pino': 0.39.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-redis': 0.39.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-redis-4': 0.39.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-restify': 0.38.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-router': 0.37.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-socket.io': 0.39.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-tedious': 0.10.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-undici': 0.2.0(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation-winston': 0.37.0(@opentelemetry/api@1.9.0) - '@opentelemetry/resource-detector-alibaba-cloud': 0.28.10(@opentelemetry/api@1.9.0) - '@opentelemetry/resource-detector-aws': 1.7.0(@opentelemetry/api@1.9.0) - '@opentelemetry/resource-detector-azure': 0.2.12(@opentelemetry/api@1.9.0) - '@opentelemetry/resource-detector-container': 0.3.11(@opentelemetry/api@1.9.0) - '@opentelemetry/resource-detector-gcp': 0.29.13(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-node': 0.51.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - encoding - - supports-color - - '@opentelemetry/context-async-hooks@1.24.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/context-async-hooks@1.25.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 - '@opentelemetry/core@1.24.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/semantic-conventions': 1.24.1 - '@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 '@opentelemetry/semantic-conventions': 1.25.1 - '@opentelemetry/core@1.26.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/semantic-conventions': 1.27.0 - - '@opentelemetry/exporter-logs-otlp-http@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/api-logs': 0.51.1 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-logs': 0.51.1(@opentelemetry/api-logs@0.51.1)(@opentelemetry/api@1.9.0) - - '@opentelemetry/exporter-metrics-otlp-http@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-metrics': 1.24.1(@opentelemetry/api@1.9.0) - - '@opentelemetry/exporter-metrics-otlp-proto@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/exporter-metrics-otlp-http': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-proto-exporter-base': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-metrics': 1.24.1(@opentelemetry/api@1.9.0) - - '@opentelemetry/exporter-trace-otlp-grpc@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@grpc/grpc-js': 1.12.2 - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-grpc-exporter-base': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0) - - '@opentelemetry/exporter-trace-otlp-http@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0) - - '@opentelemetry/exporter-trace-otlp-proto@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-proto-exporter-base': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-transformer': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0) - - '@opentelemetry/exporter-zipkin@1.24.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.24.1 - - '@opentelemetry/instrumentation-amqplib@0.37.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-aws-lambda@0.41.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/propagator-aws-xray': 1.26.0(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@types/aws-lambda': 8.10.122 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-aws-sdk@0.41.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/propagation-utils': 0.30.12(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-bunyan@0.38.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/api-logs': 0.51.1 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@types/bunyan': 1.8.9 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-cassandra-driver@0.38.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-connect@0.36.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@types/connect': 3.4.36 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-connect@0.38.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6617,39 +5656,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-cucumber@0.6.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-dataloader@0.9.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-dns@0.36.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - semver: 7.6.2 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-express@0.39.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-express@0.41.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6659,15 +5665,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-fastify@0.36.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-fastify@0.38.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6677,14 +5674,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-fs@0.12.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-fs@0.14.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6693,21 +5682,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-generic-pool@0.36.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-graphql@0.40.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-graphql@0.42.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6715,23 +5689,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-grpc@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.24.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-hapi@0.38.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-hapi@0.40.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6741,16 +5698,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-http@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.24.1 - semver: 7.6.2 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-http@0.52.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6761,15 +5708,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-ioredis@0.40.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/redis-common': 0.36.2 - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-ioredis@0.42.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6779,25 +5717,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-knex@0.36.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-koa@0.40.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@types/koa': 2.14.0 - '@types/koa__router': 12.0.3 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-koa@0.42.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6807,31 +5726,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-lru-memoizer@0.37.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-memcached@0.36.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@types/memcached': 2.2.10 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-mongodb@0.43.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-metrics': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-mongodb@0.46.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6841,15 +5735,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-mongoose@0.38.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-mongoose@0.40.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6859,15 +5744,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-mysql2@0.38.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@opentelemetry/sql-common': 0.40.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-mysql2@0.40.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6877,15 +5753,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-mysql@0.38.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@types/mysql': 2.15.22 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-mysql@0.40.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6895,14 +5762,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-nestjs-core@0.37.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-nestjs-core@0.39.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6911,25 +5770,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-net@0.36.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-pg@0.41.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@opentelemetry/sql-common': 0.40.1(@opentelemetry/api@1.9.0) - '@types/pg': 8.6.1 - '@types/pg-pool': 2.0.4 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-pg@0.43.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6941,22 +5781,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-pino@0.39.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-redis-4@0.39.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/redis-common': 0.36.2 - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation-redis-4@0.41.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -6966,72 +5790,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/instrumentation-redis@0.39.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/redis-common': 0.36.2 - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-restify@0.38.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-router@0.37.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-runtime-node@0.4.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-socket.io@0.39.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-tedious@0.10.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - '@types/tedious': 4.0.14 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-undici@0.2.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - - '@opentelemetry/instrumentation-winston@0.37.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/api-logs': 0.51.1 - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation@0.46.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -7044,18 +5802,6 @@ snapshots: - supports-color optional: true - '@opentelemetry/instrumentation@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/api-logs': 0.51.1 - '@types/shimmer': 1.0.5 - import-in-the-middle: 1.7.4 - require-in-the-middle: 7.3.0 - semver: 7.6.2 - shimmer: 1.2.1 - transitivePeerDependencies: - - supports-color - '@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -7068,120 +5814,14 @@ snapshots: transitivePeerDependencies: - supports-color - '@opentelemetry/otlp-exporter-base@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - - '@opentelemetry/otlp-grpc-exporter-base@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@grpc/grpc-js': 1.12.2 - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0) - protobufjs: 7.4.0 - - '@opentelemetry/otlp-proto-exporter-base@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/otlp-exporter-base': 0.51.1(@opentelemetry/api@1.9.0) - protobufjs: 7.4.0 - - '@opentelemetry/otlp-transformer@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/api-logs': 0.51.1 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-logs': 0.51.1(@opentelemetry/api-logs@0.51.1)(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-metrics': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0) - - '@opentelemetry/propagation-utils@0.30.12(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - - '@opentelemetry/propagator-aws-xray@1.26.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.26.0(@opentelemetry/api@1.9.0) - - '@opentelemetry/propagator-b3@1.24.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - - '@opentelemetry/propagator-jaeger@1.24.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/redis-common@0.36.2': {} - '@opentelemetry/resource-detector-alibaba-cloud@0.28.10(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - - '@opentelemetry/resource-detector-aws@1.7.0(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.27.0 - - '@opentelemetry/resource-detector-azure@0.2.12(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.27.0 - - '@opentelemetry/resource-detector-container@0.3.11(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.25.1 - - '@opentelemetry/resource-detector-gcp@0.29.13(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.27.0 - gcp-metadata: 6.1.0 - transitivePeerDependencies: - - encoding - - supports-color - - '@opentelemetry/resources@1.24.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.24.1 - '@opentelemetry/resources@1.25.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 '@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0) '@opentelemetry/semantic-conventions': 1.25.1 - '@opentelemetry/sdk-logs@0.51.1(@opentelemetry/api-logs@0.51.1)(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/api-logs': 0.51.1 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - - '@opentelemetry/sdk-metrics@1.24.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - lodash.merge: 4.6.2 - '@opentelemetry/sdk-metrics@1.25.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -7189,32 +5829,6 @@ snapshots: '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) lodash.merge: 4.6.2 - '@opentelemetry/sdk-node@0.51.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/api-logs': 0.51.1 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/exporter-trace-otlp-grpc': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/exporter-trace-otlp-http': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/exporter-trace-otlp-proto': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/exporter-zipkin': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/instrumentation': 0.51.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-logs': 0.51.1(@opentelemetry/api-logs@0.51.1)(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-metrics': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-node': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.24.1 - transitivePeerDependencies: - - supports-color - - '@opentelemetry/sdk-trace-base@1.24.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/resources': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/semantic-conventions': 1.24.1 - '@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -7222,22 +5836,8 @@ snapshots: '@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0) '@opentelemetry/semantic-conventions': 1.25.1 - '@opentelemetry/sdk-trace-node@1.24.1(@opentelemetry/api@1.9.0)': - dependencies: - '@opentelemetry/api': 1.9.0 - '@opentelemetry/context-async-hooks': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/core': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/propagator-b3': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/propagator-jaeger': 1.24.1(@opentelemetry/api@1.9.0) - '@opentelemetry/sdk-trace-base': 1.24.1(@opentelemetry/api@1.9.0) - semver: 7.6.2 - - '@opentelemetry/semantic-conventions@1.24.1': {} - '@opentelemetry/semantic-conventions@1.25.1': {} - '@opentelemetry/semantic-conventions@1.27.0': {} - '@opentelemetry/sql-common@0.40.1(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -7262,29 +5862,6 @@ snapshots: transitivePeerDependencies: - supports-color - '@protobufjs/aspromise@1.1.2': {} - - '@protobufjs/base64@1.1.2': {} - - '@protobufjs/codegen@2.0.4': {} - - '@protobufjs/eventemitter@1.1.0': {} - - '@protobufjs/fetch@1.1.0': - dependencies: - '@protobufjs/aspromise': 1.1.2 - '@protobufjs/inquire': 1.1.0 - - '@protobufjs/float@1.0.2': {} - - '@protobufjs/inquire@1.1.0': {} - - '@protobufjs/path@1.1.2': {} - - '@protobufjs/pool@1.1.0': {} - - '@protobufjs/utf8@1.1.0': {} - '@puppeteer/browsers@2.2.3': dependencies: debug: 4.3.4 @@ -7926,12 +6503,6 @@ snapshots: '@tsconfig/recommended@1.0.6': {} - '@types/accepts@1.3.7': - dependencies: - '@types/node': 20.14.1 - - '@types/aws-lambda@8.10.122': {} - '@types/babel__core@7.20.5': dependencies: '@babel/parser': 7.24.6 @@ -7958,10 +6529,6 @@ snapshots: '@types/connect': 3.4.38 '@types/node': 20.14.1 - '@types/bunyan@1.8.9': - dependencies: - '@types/node': 20.14.1 - '@types/connect@3.4.36': dependencies: '@types/node': 20.14.1 @@ -7970,15 +6537,6 @@ snapshots: dependencies: '@types/node': 20.14.1 - '@types/content-disposition@0.5.8': {} - - '@types/cookies@0.9.0': - dependencies: - '@types/connect': 3.4.38 - '@types/express': 4.17.21 - '@types/keygrip': 1.0.6 - '@types/node': 20.14.1 - '@types/cors@2.8.17': dependencies: '@types/node': 20.14.1 @@ -8009,8 +6567,6 @@ snapshots: dependencies: '@types/node': 20.14.1 - '@types/http-assert@1.5.6': {} - '@types/http-errors@2.0.4': {} '@types/istanbul-lib-coverage@2.0.6': {} @@ -8028,31 +6584,6 @@ snapshots: expect: 29.7.0 pretty-format: 29.7.0 - '@types/keygrip@1.0.6': {} - - '@types/koa-compose@3.2.8': - dependencies: - '@types/koa': 2.14.0 - - '@types/koa@2.14.0': - dependencies: - '@types/accepts': 1.3.7 - '@types/content-disposition': 0.5.8 - '@types/cookies': 0.9.0 - '@types/http-assert': 1.5.6 - '@types/http-errors': 2.0.4 - '@types/keygrip': 1.0.6 - '@types/koa-compose': 3.2.8 - '@types/node': 20.14.1 - - '@types/koa__router@12.0.3': - dependencies: - '@types/koa': 2.14.0 - - '@types/memcached@2.2.10': - dependencies: - '@types/node': 20.14.1 - '@types/mime@1.3.5': {} '@types/mysql@2.15.22': @@ -8105,14 +6636,8 @@ snapshots: '@types/shimmer@1.0.5': {} - '@types/stack-trace@0.0.29': {} - '@types/stack-utils@2.0.3': {} - '@types/tedious@4.0.14': - dependencies: - '@types/node': 20.14.1 - '@types/triple-beam@1.3.5': {} '@types/uuid@9.0.8': {} @@ -8363,8 +6888,6 @@ snapshots: basic-ftp@5.0.5: {} - bignumber.js@9.1.2: {} - bin-links@4.0.4: dependencies: cmd-shim: 6.0.3 @@ -8376,12 +6899,6 @@ snapshots: binary-search@1.3.6: {} - bl@4.1.0: - dependencies: - buffer: 5.7.1 - inherits: 2.0.4 - readable-stream: 3.6.2 - bluebird@3.4.7: {} body-parser@1.20.2: @@ -8544,20 +7061,12 @@ snapshots: cjs-module-lexer@1.3.1: {} - cli-cursor@3.1.0: - dependencies: - restore-cursor: 3.1.0 - - cli-spinners@2.9.2: {} - cliui@8.0.1: dependencies: string-width: 4.2.3 strip-ansi: 6.0.1 wrap-ansi: 7.0.0 - clone@1.0.4: {} - cluster-key-slot@1.1.2: {} cmd-shim@6.0.3: {} @@ -8742,18 +7251,12 @@ snapshots: deepmerge@4.3.1: {} - defaults@1.0.4: - dependencies: - clone: 1.0.4 - define-data-property@1.1.4: dependencies: es-define-property: 1.0.0 es-errors: 1.3.0 gopd: 1.0.1 - define-lazy-prop@2.0.0: {} - degenerator@5.0.1: dependencies: ast-types: 0.13.4 @@ -8965,8 +7468,6 @@ snapshots: transitivePeerDependencies: - supports-color - extend@3.0.2: {} - extract-zip@2.0.1: dependencies: debug: 4.3.4 @@ -9087,25 +7588,6 @@ snapshots: function-bind@1.1.2: {} - gaxios@6.7.1: - dependencies: - extend: 3.0.2 - https-proxy-agent: 7.0.5 - is-stream: 2.0.1 - node-fetch: 2.7.0 - uuid: 9.0.1 - transitivePeerDependencies: - - encoding - - supports-color - - gcp-metadata@6.1.0: - dependencies: - gaxios: 6.7.1 - json-bigint: 1.0.0 - transitivePeerDependencies: - - encoding - - supports-color - generic-pool@3.9.0: {} gensync@1.0.0-beta.2: {} @@ -9325,13 +7807,6 @@ snapshots: module-details-from-path: 1.0.3 optional: true - import-in-the-middle@1.7.4: - dependencies: - acorn: 8.12.0 - acorn-import-attributes: 1.9.5(acorn@8.12.0) - cjs-module-lexer: 1.3.1 - module-details-from-path: 1.0.3 - import-local@3.1.0: dependencies: pkg-dir: 4.2.0 @@ -9385,8 +7860,6 @@ snapshots: dependencies: hasown: 2.0.2 - is-docker@2.2.1: {} - is-extglob@2.1.1: {} is-fullwidth-code-point@3.0.0: {} @@ -9397,8 +7870,6 @@ snapshots: dependencies: is-extglob: 2.1.1 - is-interactive@1.0.0: {} - is-number@7.0.0: {} is-plain-obj@2.1.0: {} @@ -9407,12 +7878,6 @@ snapshots: is-stream@2.0.1: {} - is-unicode-supported@0.1.0: {} - - is-wsl@2.2.0: - dependencies: - is-docker: 2.2.1 - isarray@1.0.0: {} isexe@2.0.0: {} @@ -9827,18 +8292,12 @@ snapshots: jsesc@2.5.2: {} - json-bigint@1.0.0: - dependencies: - bignumber.js: 9.1.2 - json-parse-even-better-errors@2.3.1: {} json-schema-to-zod@2.3.0: {} json-schema-traverse@1.0.0: {} - json-stringify-safe@5.0.1: {} - json5@2.2.3: {} jsonfile@6.1.0: @@ -9934,18 +8393,10 @@ snapshots: dependencies: p-locate: 4.1.0 - lodash.camelcase@4.3.0: {} - lodash.defaults@4.2.0: {} lodash.isarguments@3.1.0: {} - lodash.isobject@3.0.2: {} - - lodash.isplainobject@4.0.6: {} - - lodash.isstring@4.0.1: {} - lodash.memoize@4.1.2: {} lodash.merge@4.6.2: {} @@ -9954,11 +8405,6 @@ snapshots: lodash@4.17.21: {} - log-symbols@4.1.0: - dependencies: - chalk: 4.1.2 - is-unicode-supported: 0.1.0 - logform@2.6.0: dependencies: '@colors/colors': 1.6.0 @@ -9985,8 +8431,6 @@ snapshots: transitivePeerDependencies: - encoding - long@5.2.3: {} - loose-envify@1.4.0: dependencies: js-tokens: 4.0.0 @@ -10312,12 +8756,6 @@ snapshots: dependencies: mimic-fn: 2.1.0 - open@8.4.2: - dependencies: - define-lazy-prop: 2.0.0 - is-docker: 2.2.1 - is-wsl: 2.2.0 - openai@3.3.0: dependencies: axios: 0.26.1 @@ -10356,18 +8794,6 @@ snapshots: option@0.2.4: {} - ora@5.4.1: - dependencies: - bl: 4.1.0 - chalk: 4.1.2 - cli-cursor: 3.1.0 - cli-spinners: 2.9.2 - is-interactive: 1.0.0 - is-unicode-supported: 0.1.0 - log-symbols: 4.1.0 - strip-ansi: 6.0.1 - wcwidth: 1.0.1 - p-finally@1.0.0: {} p-limit@2.3.0: @@ -10519,11 +8945,6 @@ snapshots: picomatch@2.3.1: {} - pino-abstract-transport@1.2.0: - dependencies: - readable-stream: 4.5.2 - split2: 4.2.0 - pirates@4.0.6: {} pkg-dir@4.2.0: @@ -10592,21 +9013,6 @@ snapshots: proto-list@1.2.4: {} - protobufjs@7.4.0: - dependencies: - '@protobufjs/aspromise': 1.1.2 - '@protobufjs/base64': 1.1.2 - '@protobufjs/codegen': 2.0.4 - '@protobufjs/eventemitter': 1.1.0 - '@protobufjs/fetch': 1.1.0 - '@protobufjs/float': 1.0.2 - '@protobufjs/inquire': 1.1.0 - '@protobufjs/path': 1.1.2 - '@protobufjs/pool': 1.1.0 - '@protobufjs/utf8': 1.1.0 - '@types/node': 20.14.1 - long: 5.2.3 - proxy-addr@2.0.7: dependencies: forwarded: 0.2.0 @@ -10792,11 +9198,6 @@ snapshots: path-parse: 1.0.7 supports-preserve-symlinks-flag: 1.0.0 - restore-cursor@3.1.0: - dependencies: - onetime: 5.1.2 - signal-exit: 3.0.7 - retry@0.13.1: {} rimraf@5.0.7: @@ -10865,10 +9266,6 @@ snapshots: transitivePeerDependencies: - supports-color - serialize-error@8.1.0: - dependencies: - type-fest: 0.20.2 - serve-static@1.15.0: dependencies: encodeurl: 1.0.2 @@ -11178,8 +9575,6 @@ snapshots: type-detect@4.0.8: {} - type-fest@0.20.2: {} - type-fest@0.21.3: {} type-is@1.6.18: @@ -11265,10 +9660,6 @@ snapshots: dependencies: makeerror: 1.0.12 - wcwidth@1.0.1: - dependencies: - defaults: 1.0.4 - web-streams-polyfill@3.3.3: {} web-streams-polyfill@4.0.0-beta.3: {} From aaddbdc1bc2d841b48a341ece5c201ca7b794e67 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 16:47:07 -0800 Subject: [PATCH 40/51] Update map.ts --- apps/api/src/controllers/v1/map.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index 31db5d58..ec7f68c8 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -133,7 +133,7 @@ export async function getMapResults({ // Parallelize sitemap fetch with serper search const [sitemap, ...searchResults] = await Promise.all([ - ignoreSitemap ? null : crawler.tryGetSitemap(), + ignoreSitemap ? null : crawler.tryGetSitemap(true), ...(cachedResult ? [] : pagePromises), ]); From 6fbfeafe3832294fb469cf730ba592c814d5393d Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 16:51:13 -0800 Subject: [PATCH 41/51] Nick: fixed map settings --- apps/api/src/controllers/v1/map.ts | 2 +- apps/api/src/lib/timeout.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index ec7f68c8..9a0a5eb6 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -199,7 +199,7 @@ export async function getMapResults({ links = removeDuplicateUrls(links); } - const linksToReturn = links.slice(0, limit); + const linksToReturn = crawlerOptions.sitemapOnly ? links : links.slice(0, limit); return { success: true, diff --git a/apps/api/src/lib/timeout.ts b/apps/api/src/lib/timeout.ts index fd0e5ade..46d34a5a 100644 --- a/apps/api/src/lib/timeout.ts +++ b/apps/api/src/lib/timeout.ts @@ -1 +1 @@ -export const axiosTimeout = 3000; \ No newline at end of file +export const axiosTimeout = 5000; \ No newline at end of file From aa26dbe74ee81689f242663d39c44520479d158b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 20 Nov 2024 17:03:04 -0800 Subject: [PATCH 42/51] Nick: map e2e tests --- apps/api/src/__tests__/e2e_map/index.test.ts | 117 +++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 apps/api/src/__tests__/e2e_map/index.test.ts diff --git a/apps/api/src/__tests__/e2e_map/index.test.ts b/apps/api/src/__tests__/e2e_map/index.test.ts new file mode 100644 index 00000000..b065dff1 --- /dev/null +++ b/apps/api/src/__tests__/e2e_map/index.test.ts @@ -0,0 +1,117 @@ +import request from "supertest"; +import dotenv from "dotenv"; + +dotenv.config(); +const TEST_URL = "http://127.0.0.1:3002"; + +describe("E2E Tests for Map API Routes", () => { + it.concurrent( + "(feat-search)should return links containing 'smart-crawl'", + async () => { + const response = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://firecrawl.dev", + sitemapOnly: false, + search: "smart-crawl", + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("links"); + expect(response.body.links.length).toBeGreaterThan(0); + expect(response.body.links[0]).toContain("firecrawl.dev/smart-crawl"); + }, + 60000 + ); + + it.concurrent( + "(feat-subdomains) should return mapped links for firecrawl.dev with subdomains included", + async () => { + const response = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://firecrawl.dev", + sitemapOnly: false, + includeSubdomains: true, + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("links"); + expect(response.body.links.length).toBeGreaterThan(0); + expect(response.body.links[response.body.links.length - 1]).toContain( + "docs.firecrawl.dev" + ); + }, + 60000 + ); + + it.concurrent( + "(feat-sitemap-only) should return mapped links for firecrawl.dev with sitemap only", + async () => { + const response = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://firecrawl.dev", + sitemapOnly: true, + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("links"); + expect(response.body.links.length).toBeGreaterThan(0); + expect(response.body.links[response.body.links.length - 1]).not.toContain( + "docs.firecrawl.dev" + ); + }, + 60000 + ); + + it.concurrent( + "(feat-limit) should return mapped links for firecrawl.dev with a limit", + async () => { + const response = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://firecrawl.dev", + sitemapOnly: false, + limit: 10, + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("links"); + expect(response.body.links.length).toBeLessThanOrEqual(10); + }, + 60000 + ); + + it.concurrent( + "(feat-sitemap-large) should return more than 1900 links for geekflare sitemap", + async () => { + const response = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://geekflare.com/sitemap_index.xml", + sitemapOnly: true, + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("links"); + expect(response.body.links.length).toBeGreaterThan(1900); + }, + 60000 + ); +}); From 95bea6a39173dbd4e3bbf0bb0d4a1d799d78fc5b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 24 Nov 2024 19:34:56 -0800 Subject: [PATCH 43/51] Nick: re-ranker safety + unit tests --- apps/api/src/lib/ranker.test.ts | 68 +++++++++++++++++++++++++++++++++ apps/api/src/lib/ranker.ts | 42 +++++++++++++------- 2 files changed, 96 insertions(+), 14 deletions(-) create mode 100644 apps/api/src/lib/ranker.test.ts diff --git a/apps/api/src/lib/ranker.test.ts b/apps/api/src/lib/ranker.test.ts new file mode 100644 index 00000000..6d17a08b --- /dev/null +++ b/apps/api/src/lib/ranker.test.ts @@ -0,0 +1,68 @@ +import { performRanking } from './ranker'; + +describe('performRanking', () => { + it('should rank links based on similarity to search query', async () => { + const linksWithContext = [ + 'url: https://example.com/dogs, title: All about dogs, description: Learn about different dog breeds', + 'url: https://example.com/cats, title: Cat care guide, description: Everything about cats', + 'url: https://example.com/pets, title: General pet care, description: Care for all types of pets' + ]; + + const links = [ + 'https://example.com/dogs', + 'https://example.com/cats', + 'https://example.com/pets' + ]; + + const searchQuery = 'cats training'; + + const result = await performRanking(linksWithContext, links, searchQuery); + + // Should return array of objects with link, linkWithContext, score, originalIndex + expect(result).toBeInstanceOf(Array); + expect(result.length).toBe(3); + + // First result should be the dogs page since query is about dogs + expect(result[0].link).toBe('https://example.com/cats'); + + // Each result should have required properties + result.forEach(item => { + expect(item).toHaveProperty('link'); + expect(item).toHaveProperty('linkWithContext'); + expect(item).toHaveProperty('score'); + expect(item).toHaveProperty('originalIndex'); + expect(typeof item.score).toBe('number'); + expect(item.score).toBeGreaterThanOrEqual(0); + expect(item.score).toBeLessThanOrEqual(1); + }); + + // Scores should be in descending order + for (let i = 1; i < result.length; i++) { + expect(result[i].score).toBeLessThanOrEqual(result[i-1].score); + } + }); + + it('should handle empty inputs', async () => { + const result = await performRanking([], [], ''); + expect(result).toEqual([]); + }); + + it('should maintain original order for equal scores', async () => { + const linksWithContext = [ + 'url: https://example.com/1, title: Similar content A, description: test', + 'url: https://example.com/2, title: Similar content B, description: test' + ]; + + const links = [ + 'https://example.com/1', + 'https://example.com/2' + ]; + + const searchQuery = 'test'; + + const result = await performRanking(linksWithContext, links, searchQuery); + + // If scores are equal, original order should be maintained + expect(result[0].originalIndex).toBeLessThan(result[1].originalIndex); + }); +}); diff --git a/apps/api/src/lib/ranker.ts b/apps/api/src/lib/ranker.ts index 9a200f49..e7fa235c 100644 --- a/apps/api/src/lib/ranker.ts +++ b/apps/api/src/lib/ranker.ts @@ -42,29 +42,43 @@ const textToVector = (searchQuery: string, text: string): number[] => { async function performRanking(linksWithContext: string[], links: string[], searchQuery: string) { try { + // Handle invalid inputs + if (!searchQuery || !linksWithContext.length || !links.length) { + return []; + } + + // Sanitize search query by removing null characters + const sanitizedQuery = searchQuery; + // Generate embeddings for the search query - const queryEmbedding = await getEmbedding(searchQuery); + const queryEmbedding = await getEmbedding(sanitizedQuery); // Generate embeddings for each link and calculate similarity const linksAndScores = await Promise.all(linksWithContext.map(async (linkWithContext, index) => { - const linkEmbedding = await getEmbedding(linkWithContext); - - // console.log("linkEmbedding", linkEmbedding); - // const linkVector = textToVector(searchQuery, linkWithContext); - const score = cosineSimilarity(queryEmbedding, linkEmbedding); - // console.log("score", score); - return { - link: links[index], // Use corresponding link from links array - linkWithContext, - score, - originalIndex: index // Store original position - }; + try { + const linkEmbedding = await getEmbedding(linkWithContext); + const score = cosineSimilarity(queryEmbedding, linkEmbedding); + + return { + link: links[index], + linkWithContext, + score, + originalIndex: index + }; + } catch (err) { + // If embedding fails for a link, return with score 0 + return { + link: links[index], + linkWithContext, + score: 0, + originalIndex: index + }; + } })); // Sort links based on similarity scores while preserving original order for equal scores linksAndScores.sort((a, b) => { const scoreDiff = b.score - a.score; - // If scores are equal, maintain original order return scoreDiff === 0 ? a.originalIndex - b.originalIndex : scoreDiff; }); From b693c6c23bc151d732951867a0f10d1d78a4a991 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 24 Nov 2024 19:36:18 -0800 Subject: [PATCH 44/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 0654860c..e527b5b6 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -93,14 +93,20 @@ export async function extractController( // } if (req.body.prompt) { + // Get similarity scores between the search query and each link's context const linksAndScores : { link: string, linkWithContext: string, score: number, originalIndex: number }[] = await performRanking(mappedLinksRerank, mappedLinks.map(l => l.url), mapUrl); + mappedLinks = linksAndScores + // Only keep links that have a similarity score above the threshold .filter(x => x.score > SCORE_THRESHOLD) + // Map back to the original link objects .map(x => mappedLinks.find(link => link.url === x.link)) + // Remove any undefined links, links without URLs, and blocked URLs .filter((x): x is MapDocument => x !== undefined && x.url !== undefined && !isUrlBlocked(x.url)) + // Limit the number of results .slice(0, MAX_RANKING_LIMIT); - // console.log("linksAndScores", linksAndScores); - // console.log("linksAndScores", linksAndScores.length); + + // TODO: handle case where no links are returned } return mappedLinks.map(x => x.url) as string[]; From 30def84c0a14855bfa44048d624c17aab0125dda Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 24 Nov 2024 19:44:51 -0800 Subject: [PATCH 45/51] Nick: scrape timeout + warnings --- apps/api/src/controllers/v1/extract.ts | 10 ++++++---- apps/api/src/controllers/v1/types.ts | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index e527b5b6..477ad959 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -128,7 +128,7 @@ export async function extractController( // Scrape all links in parallel const scrapePromises = links.map(async (url) => { const origin = req.body.origin || "api"; - const timeout = req.body.timeout ?? 30000; + const timeout = Math.floor((req.body.timeout || 40000) * 0.7) || 30000; // Use 70% of total timeout for individual scrapes const jobId = crypto.randomUUID(); const jobPriority = await getJobPriority({ @@ -153,10 +153,8 @@ export async function extractController( jobPriority ); - const totalWait = 0; - try { - const doc = await waitForJob(jobId, timeout + totalWait); + const doc = await waitForJob(jobId, timeout); await getScrapeQueue().remove(jobId); if (earlyReturn) { return null; @@ -216,10 +214,13 @@ export async function extractController( // console.log("completions.extract", completions.extract); let data: any; + let warning = completions.warning ?? ""; try { data = JSON.parse(completions.extract); } catch (e) { + logger.warn(`ExtractController: Error parsing JSON: ${e}`); data = completions.extract; + warning = "JSON could not be parsed correctly. Returning raw LLM output..."; } logJob({ @@ -241,5 +242,6 @@ export async function extractController( success: true, data: data, scrape_id: id, + warning: warning }); } \ No newline at end of file diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 3883559a..f354c640 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -163,7 +163,7 @@ export const extractV1Options = z.object({ includeSubdomains: z.boolean().default(true), allowExternalLinks: z.boolean().default(false), origin: z.string().optional().default("api"), - timeout: z.number().int().positive().finite().safe().default(60000), + timeout: z.number().int().positive().finite().safe().default(60000) }).strict(strictMessage) export type ExtractV1Options = z.infer; From d817aa744f0c232717b7c52294f00062c5f36452 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 24 Nov 2024 19:46:31 -0800 Subject: [PATCH 46/51] Update v1.ts --- apps/api/src/routes/v1.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index e6055a99..048e1efc 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -99,7 +99,7 @@ function idempotencyMiddleware(req: Request, res: Response, next: NextFunction) function blocklistMiddleware(req: Request, res: Response, next: NextFunction) { if (typeof req.body.url === "string" && isUrlBlocked(req.body.url)) { if (!res.headersSent) { - return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." }); + return res.status(403).json({ success: false, error: "URL is blocked intentionally. Firecrawl currently does not support social media scraping due to policy restrictions." }); } } next(); @@ -183,7 +183,6 @@ v1Router.post( "/extract", authMiddleware(RateLimiterMode.Scrape), checkCreditsMiddleware(1), - blocklistMiddleware, wrap(extractController) ); From 18b864eacebbe42ea3fc5cb81c599d36bee821a8 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 24 Nov 2024 19:48:13 -0800 Subject: [PATCH 47/51] Update index.ts --- apps/api/src/scraper/scrapeURL/engines/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/scraper/scrapeURL/engines/index.ts b/apps/api/src/scraper/scrapeURL/engines/index.ts index d9168669..d2305d9c 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index.ts @@ -16,7 +16,7 @@ const usePlaywright = process.env.PLAYWRIGHT_MICROSERVICE_URL !== '' && process. const useCache = process.env.CACHE_REDIS_URL !== '' && process.env.CACHE_REDIS_URL !== undefined; export const engines: Engine[] = [ - ...(useCache ? [ "cache" as const ] : []), + // ...(useCache ? [ "cache" as const ] : []), ...(useFireEngine ? [ "fire-engine;chrome-cdp" as const, "fire-engine;playwright" as const, "fire-engine;tlsclient" as const ] : []), ...(useScrapingBee ? [ "scrapingbee" as const, "scrapingbeeLoad" as const ] : []), ...(usePlaywright ? [ "playwright" as const ] : []), From a18614cd00745f5e19156ed4138938a605401ab4 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 24 Nov 2024 19:48:57 -0800 Subject: [PATCH 48/51] Update queue-jobs.ts --- apps/api/src/services/queue-jobs.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/services/queue-jobs.ts b/apps/api/src/services/queue-jobs.ts index d59056bb..bc2debfe 100644 --- a/apps/api/src/services/queue-jobs.ts +++ b/apps/api/src/services/queue-jobs.ts @@ -109,6 +109,6 @@ export function waitForJob(jobId: string, timeout: number): Promise } } } - }, 100); + }, 250); }) } From 2513efc9718ec92f8d8e26fb4738f3ec257a1e15 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 24 Nov 2024 20:31:38 -0800 Subject: [PATCH 49/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 59 ++++++++++++++++++-------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 477ad959..8202a513 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -30,7 +30,9 @@ const redis = new Redis(process.env.REDIS_URL!); const MAX_EXTRACT_LIMIT = 100; const MAX_RANKING_LIMIT = 10; -const SCORE_THRESHOLD = 0.75; +const INITIAL_SCORE_THRESHOLD = 0.75; +const FALLBACK_SCORE_THRESHOLD = 0.5; +const MIN_REQUIRED_LINKS = 3; /** * Extracts data from the provided URLs based on the request parameters. @@ -94,19 +96,28 @@ export async function extractController( if (req.body.prompt) { // Get similarity scores between the search query and each link's context - const linksAndScores : { link: string, linkWithContext: string, score: number, originalIndex: number }[] = await performRanking(mappedLinksRerank, mappedLinks.map(l => l.url), mapUrl); + const linksAndScores = await performRanking(mappedLinksRerank, mappedLinks.map(l => l.url), mapUrl); - mappedLinks = linksAndScores - // Only keep links that have a similarity score above the threshold - .filter(x => x.score > SCORE_THRESHOLD) - // Map back to the original link objects - .map(x => mappedLinks.find(link => link.url === x.link)) - // Remove any undefined links, links without URLs, and blocked URLs - .filter((x): x is MapDocument => x !== undefined && x.url !== undefined && !isUrlBlocked(x.url)) - // Limit the number of results - .slice(0, MAX_RANKING_LIMIT); + // First try with high threshold + let filteredLinks = filterAndProcessLinks(mappedLinks, linksAndScores, INITIAL_SCORE_THRESHOLD); + + // If we don't have enough high-quality links, try with lower threshold + if (filteredLinks.length < MIN_REQUIRED_LINKS) { + logger.info(`Only found ${filteredLinks.length} links with score > ${INITIAL_SCORE_THRESHOLD}. Trying lower threshold...`); + filteredLinks = filterAndProcessLinks(mappedLinks, linksAndScores, FALLBACK_SCORE_THRESHOLD); + + if (filteredLinks.length === 0) { + // If still no results, take top N results regardless of score + logger.warn(`No links found with score > ${FALLBACK_SCORE_THRESHOLD}. Taking top ${MIN_REQUIRED_LINKS} results.`); + filteredLinks = linksAndScores + .sort((a, b) => b.score - a.score) + .slice(0, MIN_REQUIRED_LINKS) + .map(x => mappedLinks.find(link => link.url === x.link)) + .filter((x): x is MapDocument => x !== undefined && x.url !== undefined && !isUrlBlocked(x.url)); + } + } - // TODO: handle case where no links are returned + mappedLinks = filteredLinks.slice(0, MAX_RANKING_LIMIT); } return mappedLinks.map(x => x.url) as string[]; @@ -124,8 +135,14 @@ export async function extractController( const processedUrls = await Promise.all(urlPromises); links.push(...processedUrls.flat()); - // console.log("links", links.length); - // Scrape all links in parallel + if (links.length === 0) { + return res.status(400).json({ + success: false, + error: "No valid URLs found to scrape. Try adjusting your search criteria or including more URLs." + }); + } + + // Scrape all links in parallel with retries const scrapePromises = links.map(async (url) => { const origin = req.body.origin || "api"; const timeout = Math.floor((req.body.timeout || 40000) * 0.7) || 30000; // Use 70% of total timeout for individual scrapes @@ -210,9 +227,6 @@ export async function extractController( // Optionally, you could notify an admin or add to a retry queue here }); - - // console.log("completions.extract", completions.extract); - let data: any; let warning = completions.warning ?? ""; try { @@ -244,4 +258,15 @@ export async function extractController( scrape_id: id, warning: warning }); +} + +function filterAndProcessLinks( + mappedLinks: MapDocument[], + linksAndScores: { link: string, linkWithContext: string, score: number, originalIndex: number }[], + threshold: number +): MapDocument[] { + return linksAndScores + .filter(x => x.score > threshold) + .map(x => mappedLinks.find(link => link.url === x.link)) + .filter((x): x is MapDocument => x !== undefined && x.url !== undefined && !isUrlBlocked(x.url)); } \ No newline at end of file From 8a26f08b1487184eeace21289449d046680736f3 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 24 Nov 2024 20:37:58 -0800 Subject: [PATCH 50/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 8202a513..d462e651 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -32,7 +32,7 @@ const MAX_EXTRACT_LIMIT = 100; const MAX_RANKING_LIMIT = 10; const INITIAL_SCORE_THRESHOLD = 0.75; const FALLBACK_SCORE_THRESHOLD = 0.5; -const MIN_REQUIRED_LINKS = 3; +const MIN_REQUIRED_LINKS = 1; /** * Extracts data from the provided URLs based on the request parameters. @@ -103,6 +103,7 @@ export async function extractController( // If we don't have enough high-quality links, try with lower threshold if (filteredLinks.length < MIN_REQUIRED_LINKS) { + console.log(`Only found ${filteredLinks.length} links with score > ${INITIAL_SCORE_THRESHOLD}. Trying lower threshold...`); logger.info(`Only found ${filteredLinks.length} links with score > ${INITIAL_SCORE_THRESHOLD}. Trying lower threshold...`); filteredLinks = filterAndProcessLinks(mappedLinks, linksAndScores, FALLBACK_SCORE_THRESHOLD); @@ -227,15 +228,8 @@ export async function extractController( // Optionally, you could notify an admin or add to a retry queue here }); - let data: any; - let warning = completions.warning ?? ""; - try { - data = JSON.parse(completions.extract); - } catch (e) { - logger.warn(`ExtractController: Error parsing JSON: ${e}`); - data = completions.extract; - warning = "JSON could not be parsed correctly. Returning raw LLM output..."; - } + let data = completions.extract ?? {}; + let warning = completions.warning; logJob({ job_id: id, From 5522d6af7d2c0f3de094df93ae92b056115763c3 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Tue, 26 Nov 2024 15:01:42 -0300 Subject: [PATCH 51/51] Update extract.ts --- apps/api/src/controllers/v1/extract.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index d462e651..e5f2595c 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -103,7 +103,6 @@ export async function extractController( // If we don't have enough high-quality links, try with lower threshold if (filteredLinks.length < MIN_REQUIRED_LINKS) { - console.log(`Only found ${filteredLinks.length} links with score > ${INITIAL_SCORE_THRESHOLD}. Trying lower threshold...`); logger.info(`Only found ${filteredLinks.length} links with score > ${INITIAL_SCORE_THRESHOLD}. Trying lower threshold...`); filteredLinks = filterAndProcessLinks(mappedLinks, linksAndScores, FALLBACK_SCORE_THRESHOLD); @@ -208,24 +207,17 @@ export async function extractController( logger.child({ method: "extractController/generateOpenAICompletions" }), { mode: "llm", - systemPrompt: "Only use the provided content to answer the question.", + systemPrompt: "Always prioritize using the provided content to answer the question. Do not make up an answer. Be concise and follow the schema if provided.", prompt: req.body.prompt, schema: req.body.schema, }, docs.map(x => buildDocument(x)).join('\n') ); - // console.log("completions", completions); - - // if(req.body.extract && req.body.formats.includes("extract")) { - // creditsToBeBilled = 5; - // } - // TODO: change this later // While on beta, we're billing 5 credits per link discovered/scraped. billTeam(req.auth.team_id, req.acuc?.sub_id, links.length * 5).catch(error => { logger.error(`Failed to bill team ${req.auth.team_id} for ${links.length * 5} credits: ${error}`); - // Optionally, you could notify an admin or add to a retry queue here }); let data = completions.extract ?? {}; @@ -254,6 +246,13 @@ export async function extractController( }); } +/** + * Filters links based on their similarity score to the search query. + * @param mappedLinks - The list of mapped links to filter. + * @param linksAndScores - The list of links and their similarity scores. + * @param threshold - The score threshold to filter by. + * @returns The filtered list of links. + */ function filterAndProcessLinks( mappedLinks: MapDocument[], linksAndScores: { link: string, linkWithContext: string, score: number, originalIndex: number }[],