SELFHOST FIXES (#1207)

* fix(extract): construct OpenAI on demand

Fixes hard-crash if api key not specified in a self-hosting environment.

* fix(ci): try sleeping

* fix(ci): override host

* fix(ci): wait for server to start

* Support /extract and /crawl for self-hosted (FIR-1097) (#1137)

* Support /extract for self-hosted

This returns the job response from redis rather than supabase when db auth is disabled (self hosted mode)

* Use getJob for extract and use correct types

* fix(v1/crawl-status): only poll DB for total count if DB is enabled

* feat(snips): TEST_SUITE_SELF_HOSTED

* fix(ci/test-server-self-host): use pr trigger

* fix(scrapeURL): f-e mocking in selfhosted env

* fix(snips): do not try to eval json format on selfhost

* fix(scrapeURL): further f-e mocking

* fix(snips): don't timeout on hard fail polling

* fix(v1/extract-status): fix-up the db-agnostic impl

unfortunately had to separate the functions since the schema
was too divergent :(

* fix(snips): boost screenshot delay

* feat(ci): test with openai

* feat(ci): extract, search testing

* fix(ci): matrix

* fix(ci): bleh

* Update: fix default google search (#1174)

* fix log title

* search should always work

* asd

* fix ci

---------

Co-authored-by: Nick Roth <nlr06886@gmail.com>
Co-authored-by: William <sdustusun@gmail.com>
This commit is contained in:
Gergő Móricz 2025-02-20 00:41:22 +01:00 committed by GitHub
parent c1ca64fc07
commit 2200f084f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 608 additions and 262 deletions

View File

@ -1,22 +1,35 @@
name: Self-hosted Server Test Suite
on:
workflow_dispatch:
pull_request:
branches:
- main
paths:
- apps/api/**
env:
PORT: 3002
REDIS_URL: redis://localhost:6379
HOST: 0.0.0.0
ENV: ${{ secrets.ENV }}
TEST_SUITE_SELF_HOSTED: true
jobs:
test:
name: Run tests
strategy:
matrix:
openai: [true, false]
serper: [true, false]
runs-on: ubuntu-latest
services:
redis:
image: redis
ports:
- 6379:6379
env:
OPENAI_API_KEY: ${{ matrix.openai == true && secrets.OPENAI_API_KEY || '' }}
SERPER_API_KEY: ${{ matrix.serper == true && secrets.SERPER_API_KEY || '' }}
steps:
- uses: actions/checkout@v3
- name: Install pnpm
@ -40,6 +53,8 @@ jobs:
run: npm run workers > worker.log 2>&1 &
working-directory: ./apps/api
id: start_workers
- name: Wait for server
run: pnpx wait-on tcp:3002 -t 15s
- name: Run snippet tests
run: |
npm run test:snips
@ -50,7 +65,7 @@ jobs:
- uses: actions/upload-artifact@v4
if: always()
with:
name: Logs
name: Logs (openai ${{ matrix.openai }}, serper ${{ matrix.serper }})
path: |
./apps/api/api.log
./apps/api/worker.log

View File

@ -56,6 +56,7 @@
"typescript": "^5.4.2"
},
"dependencies": {
"jsdom": "^26.0.0",
"@anthropic-ai/sdk": "^0.24.3",
"@apidevtools/json-schema-ref-parser": "^11.7.3",
"@brillout/import": "^0.2.2",

286
apps/api/pnpm-lock.yaml generated
View File

@ -125,6 +125,9 @@ importers:
joplin-turndown-plugin-gfm:
specifier: ^1.0.12
version: 1.0.12
jsdom:
specifier: ^26.0.0
version: 26.0.0
json-schema-to-zod:
specifier: ^2.3.0
version: 2.3.0
@ -136,7 +139,7 @@ importers:
version: 2.9.0
langchain:
specifier: ^0.2.8
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
languagedetect:
specifier: ^2.0.0
version: 2.0.0
@ -332,6 +335,9 @@ packages:
resolution: {integrity: sha512-WApSdLdXEBb/1FUPca2lteASewEfpjEYJ8oXZP+0gExK5qSfsEKBKcA+WjY6Q4wvXwyv0+W6Kvc372pSceib9w==}
engines: {node: '>= 16'}
'@asamuzakjp/css-color@2.8.3':
resolution: {integrity: sha512-GIc76d9UI1hCvOATjZPyHFmE5qhRccp3/zGfMPapK3jBi+yocEzp6BBB0UnfRYP9NP4FANqUZYb0hnfs3TM3hw==}
'@aws-crypto/crc32@3.0.0':
resolution: {integrity: sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==}
@ -685,6 +691,34 @@ packages:
resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
engines: {node: '>=12'}
'@csstools/color-helpers@5.0.1':
resolution: {integrity: sha512-MKtmkA0BX87PKaO1NFRTFH+UnkgnmySQOvNxJubsadusqPEC2aJ9MOQiMceZJJ6oitUl/i0L6u0M1IrmAOmgBA==}
engines: {node: '>=18'}
'@csstools/css-calc@2.1.1':
resolution: {integrity: sha512-rL7kaUnTkL9K+Cvo2pnCieqNpTKgQzy5f+N+5Iuko9HAoasP+xgprVh7KN/MaJVvVL1l0EzQq2MoqBHKSrDrag==}
engines: {node: '>=18'}
peerDependencies:
'@csstools/css-parser-algorithms': ^3.0.4
'@csstools/css-tokenizer': ^3.0.3
'@csstools/css-color-parser@3.0.7':
resolution: {integrity: sha512-nkMp2mTICw32uE5NN+EsJ4f5N+IGFeCFu4bGpiKgb2Pq/7J/MpyLBeQ5ry4KKtRFZaYs6sTmcMYrSRIyj5DFKA==}
engines: {node: '>=18'}
peerDependencies:
'@csstools/css-parser-algorithms': ^3.0.4
'@csstools/css-tokenizer': ^3.0.3
'@csstools/css-parser-algorithms@3.0.4':
resolution: {integrity: sha512-Up7rBoV77rv29d3uKHUIVubz1BTcgyUK72IvCQAbfbMv584xHcGKCKbWh7i8hPrRJ7qU4Y8IO3IY9m+iTB7P3A==}
engines: {node: '>=18'}
peerDependencies:
'@csstools/css-tokenizer': ^3.0.3
'@csstools/css-tokenizer@3.0.3':
resolution: {integrity: sha512-UJnjoFsmxfKUdNYdWgOB0mWUypuLvAfQPH1+pyvRJs6euowbFkFC6P13w1l8mJyi3vxYMxc9kld5jZEGRQs6bw==}
engines: {node: '>=18'}
'@dabh/diagnostics@2.0.3':
resolution: {integrity: sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==}
@ -1715,6 +1749,10 @@ packages:
resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==}
engines: {node: '>= 14'}
agent-base@7.1.3:
resolution: {integrity: sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==}
engines: {node: '>= 14'}
agentkeepalive@4.5.0:
resolution: {integrity: sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==}
engines: {node: '>= 8.0.0'}
@ -2141,6 +2179,10 @@ packages:
resolution: {integrity: sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==}
engines: {node: '>= 6'}
cssstyle@4.2.1:
resolution: {integrity: sha512-9+vem03dMXG7gDmZ62uqmRiMRNtinIZ9ZyuF6BdxzfOD+FdN5hretzynkn0ReS2DO2GSw76RWHs0UmJPI2zUjw==}
engines: {node: '>=18'}
csv-parse@5.5.6:
resolution: {integrity: sha512-uNpm30m/AGSkLxxy7d9yRXpJQFrZzVWLFBkS+6ngPcZkw/5k3L/jjFuj7tVnEpRn+QgmiXr21nDlhCiUK4ij2A==}
@ -2152,6 +2194,10 @@ packages:
resolution: {integrity: sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==}
engines: {node: '>= 14'}
data-urls@5.0.0:
resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==}
engines: {node: '>=18'}
date-fns@3.6.0:
resolution: {integrity: sha512-fRHTG8g/Gif+kSh50gaGEdToemgfj74aRX3swtiouboip5JDLAyDE9F11nHMIcvOaXeOC6D7SpNhi7uFyB7Uww==}
@ -2197,6 +2243,9 @@ packages:
resolution: {integrity: sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ==}
engines: {node: '>=10'}
decimal.js@10.5.0:
resolution: {integrity: sha512-8vDa8Qxvr/+d94hSh5P3IJwI5t8/c0KsMp+g8bNw9cY2icONa5aPfvKeieW1WlG0WQYwwhJ7mjui2xtiePQSXw==}
dedent@1.5.3:
resolution: {integrity: sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==}
peerDependencies:
@ -2510,6 +2559,10 @@ packages:
resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==}
engines: {node: '>= 6'}
form-data@4.0.1:
resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==}
engines: {node: '>= 6'}
formdata-node@4.4.1:
resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==}
engines: {node: '>= 12.20'}
@ -2647,6 +2700,10 @@ packages:
resolution: {integrity: sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==}
engines: {node: '>=12'}
html-encoding-sniffer@4.0.0:
resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==}
engines: {node: '>=18'}
html-escaper@2.0.2:
resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
@ -2686,6 +2743,10 @@ packages:
resolution: {integrity: sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==}
engines: {node: '>= 14'}
https-proxy-agent@7.0.6:
resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==}
engines: {node: '>= 14'}
human-signals@2.1.0:
resolution: {integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==}
engines: {node: '>=10.17.0'}
@ -2798,6 +2859,9 @@ packages:
resolution: {integrity: sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==}
engines: {node: '>=8'}
is-potential-custom-element-name@1.0.1:
resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==}
is-retry-allowed@2.2.0:
resolution: {integrity: sha512-XVm7LOeLpTW4jV19QSH38vkswxoLud8sQ57YwJVTPWdiaI9I8keEhGFpBlslyVsgdQy4Opg8QOLb8YRgsyZiQg==}
engines: {node: '>=10'}
@ -3012,6 +3076,15 @@ packages:
jsbn@1.1.0:
resolution: {integrity: sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==}
jsdom@26.0.0:
resolution: {integrity: sha512-BZYDGVAIriBWTpIxYzrXjv3E/4u8+/pSG5bQdIYCbNCGOvsPkDQfTVLAIXAf9ETdCpduCVTkDe2NNZ8NIwUVzw==}
engines: {node: '>=18'}
peerDependencies:
canvas: ^3.0.0
peerDependenciesMeta:
canvas:
optional: true
jsesc@2.5.2:
resolution: {integrity: sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==}
engines: {node: '>=4'}
@ -3298,6 +3371,9 @@ packages:
resolution: {integrity: sha512-CQl19J/g+Hbjbv4Y3mFNNXFEL/5t/KCg8POCuUqd4rMKjGG+j1ybER83hxV58zL+dFI1PTkt3GNFSHRt+d8qEQ==}
engines: {node: 14 || >=16.14}
lru-cache@10.4.3:
resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
lru-cache@5.1.1:
resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==}
@ -3588,6 +3664,9 @@ packages:
resolution: {integrity: sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==}
engines: {node: '>=8'}
nwsapi@2.2.16:
resolution: {integrity: sha512-F1I/bimDpj3ncaNDhfyMWuFqmQDBwDB0Fogc2qpL3BWvkQteFD/8BzWuIRl83rq0DXfm8SGt/HFhLXZyljTXcQ==}
object-assign@4.1.1:
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
engines: {node: '>=0.10.0'}
@ -3697,6 +3776,9 @@ packages:
parse5@7.1.2:
resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==}
parse5@7.2.1:
resolution: {integrity: sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==}
parseley@0.12.1:
resolution: {integrity: sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==}
@ -4015,6 +4097,9 @@ packages:
resolution: {integrity: sha512-s+pyvQeIKIZ0dx5iJiQk1tPLJAWln39+MI5jtM8wnyws+G5azk+dMnMX0qfbqNetKKNgcWWOdi0sfm+FbQbgdQ==}
engines: {node: '>=10.0.0'}
rrweb-cssom@0.8.0:
resolution: {integrity: sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw==}
rusha@0.8.14:
resolution: {integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==}
@ -4034,6 +4119,10 @@ packages:
sax@1.4.1:
resolution: {integrity: sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==}
saxes@6.0.0:
resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==}
engines: {node: '>=v12.22.7'}
scheduler@0.23.2:
resolution: {integrity: sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==}
@ -4260,6 +4349,9 @@ packages:
resolution: {integrity: sha512-SzRP5LQ6Ts2G5NyAa/jg16s8e3R7rfdFjizy1zeoecYWw+nGL+YA1xZvW/+iJmidBGSdLkuvdwTYEyJEb+EiUw==}
engines: {node: '>=0.2.6'}
symbol-tree@3.2.4:
resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==}
systeminformation@5.22.11:
resolution: {integrity: sha512-aLws5yi4KCHTb0BVvbodQY5bY8eW4asMRDTxTW46hqw9lGjACX6TlLdJrkdoHYRB0qs+MekqEq1zG7WDnWE8Ug==}
engines: {node: '>=8.0.0'}
@ -4315,6 +4407,10 @@ packages:
resolution: {integrity: sha512-r0eojU4bI8MnHr8c5bNo7lJDdI2qXlWWJk6a9EAFG7vbhTjElYhBVS3/miuE0uOuoLdb8Mc/rVfsmm6eo5o9GA==}
hasBin: true
tough-cookie@5.1.1:
resolution: {integrity: sha512-Ek7HndSVkp10hmHP9V4qZO1u+pn1RU5sI0Fw+jCU3lyvuMZcgqsNgc6CmJJZyByK4Vm/qotGRJlfgAX8q+4JiA==}
engines: {node: '>=16'}
tr46@0.0.3:
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
@ -4322,6 +4418,10 @@ packages:
resolution: {integrity: sha512-2lv/66T7e5yNyhAAC4NaKe5nVavzuGJQVVtRYLyQ2OI8tsJ61PMLlelehb0wi2Hx6+hT/OJUWZcw8MjlSRnxvw==}
engines: {node: '>=14'}
tr46@5.0.0:
resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==}
engines: {node: '>=18'}
triple-beam@1.4.1:
resolution: {integrity: sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==}
engines: {node: '>= 14.0.0'}
@ -4483,6 +4583,10 @@ packages:
resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
engines: {node: '>= 0.8'}
w3c-xmlserializer@5.0.0:
resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==}
engines: {node: '>=18'}
walker@1.0.8:
resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==}
@ -4505,13 +4609,25 @@ packages:
resolution: {integrity: sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==}
engines: {node: '>=12'}
whatwg-encoding@3.1.1:
resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
engines: {node: '>=18'}
whatwg-fetch@3.6.20:
resolution: {integrity: sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==}
whatwg-mimetype@4.0.0:
resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==}
engines: {node: '>=18'}
whatwg-url@13.0.0:
resolution: {integrity: sha512-9WWbymnqj57+XEuqADHrCJ2eSXzn8WXIW/YSGaZtb2WKAInQ6CHfaUUcTyyver0p8BDg5StLQq8h1vtZuwmOig==}
engines: {node: '>=16'}
whatwg-url@14.1.1:
resolution: {integrity: sha512-mDGf9diDad/giZ/Sm9Xi2YcyzaFpbdLpJPr+E9fSkyQ7KpQD4SdFcugkRQYzhmfI4KeV4Qpnn2sKPdo+kmsgRQ==}
engines: {node: '>=18'}
whatwg-url@5.0.0:
resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==}
@ -4583,6 +4699,10 @@ packages:
utf-8-validate:
optional: true
xml-name-validator@5.0.0:
resolution: {integrity: sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==}
engines: {node: '>=18'}
xml2js@0.6.2:
resolution: {integrity: sha512-T4rieHaC1EXcES0Kxxj4JWgaUQHDk+qwHcYOCFHfiwKz7tOVPLq7Hjq9dM1WCMhylqMEfP7hMcOIChvotiZegA==}
engines: {node: '>=4.0.0'}
@ -4595,6 +4715,9 @@ packages:
resolution: {integrity: sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==}
engines: {node: '>=4.0'}
xmlchars@2.2.0:
resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==}
xtend@4.0.2:
resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==}
engines: {node: '>=0.4'}
@ -4675,6 +4798,14 @@ snapshots:
'@types/json-schema': 7.0.15
js-yaml: 4.1.0
'@asamuzakjp/css-color@2.8.3':
dependencies:
'@csstools/css-calc': 2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
'@csstools/css-color-parser': 3.0.7(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
'@csstools/css-tokenizer': 3.0.3
lru-cache: 10.4.3
'@aws-crypto/crc32@3.0.0':
dependencies:
'@aws-crypto/util': 3.0.0
@ -5413,6 +5544,26 @@ snapshots:
dependencies:
'@jridgewell/trace-mapping': 0.3.9
'@csstools/color-helpers@5.0.1': {}
'@csstools/css-calc@2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)':
dependencies:
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
'@csstools/css-tokenizer': 3.0.3
'@csstools/css-color-parser@3.0.7(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)':
dependencies:
'@csstools/color-helpers': 5.0.1
'@csstools/css-calc': 2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
'@csstools/css-tokenizer': 3.0.3
'@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3)':
dependencies:
'@csstools/css-tokenizer': 3.0.3
'@csstools/css-tokenizer@3.0.3': {}
'@dabh/diagnostics@2.0.3':
dependencies:
colorspace: 1.1.4
@ -5642,13 +5793,13 @@ snapshots:
'@jsdevtools/ono@7.1.3': {}
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
dependencies:
ansi-styles: 5.2.0
camelcase: 6.3.0
decamelize: 1.2.0
js-tiktoken: 1.0.12
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
langsmith: 0.1.34(7lljbsleilzgkaubvlq4ipicvq)
ml-distance: 4.0.1
mustache: 4.2.0
p-queue: 6.6.2
@ -5660,9 +5811,9 @@ snapshots:
- langchain
- openai
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
js-tiktoken: 1.0.12
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
zod: 3.23.8
@ -5671,9 +5822,9 @@ snapshots:
- encoding
- langchain
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
js-tiktoken: 1.0.12
transitivePeerDependencies:
- langchain
@ -6811,6 +6962,8 @@ snapshots:
transitivePeerDependencies:
- supports-color
agent-base@7.1.3: {}
agentkeepalive@4.5.0:
dependencies:
humanize-ms: 1.2.1
@ -7321,12 +7474,22 @@ snapshots:
css-what@6.1.0: {}
cssstyle@4.2.1:
dependencies:
'@asamuzakjp/css-color': 2.8.3
rrweb-cssom: 0.8.0
csv-parse@5.5.6: {}
data-uri-to-buffer@4.0.1: {}
data-uri-to-buffer@6.0.2: {}
data-urls@5.0.0:
dependencies:
whatwg-mimetype: 4.0.0
whatwg-url: 14.1.1
date-fns@3.6.0: {}
debug@2.6.9:
@ -7351,6 +7514,8 @@ snapshots:
decamelize@4.0.0: {}
decimal.js@10.5.0: {}
dedent@1.5.3: {}
deepmerge@4.3.1: {}
@ -7661,6 +7826,12 @@ snapshots:
combined-stream: 1.0.8
mime-types: 2.1.35
form-data@4.0.1:
dependencies:
asynckit: 0.4.0
combined-stream: 1.0.8
mime-types: 2.1.35
formdata-node@4.4.1:
dependencies:
node-domexception: 1.0.0
@ -7795,6 +7966,10 @@ snapshots:
dependencies:
whatwg-encoding: 2.0.0
html-encoding-sniffer@4.0.0:
dependencies:
whatwg-encoding: 3.1.1
html-escaper@2.0.2: {}
html-to-text@9.0.5:
@ -7875,6 +8050,13 @@ snapshots:
transitivePeerDependencies:
- supports-color
https-proxy-agent@7.0.6:
dependencies:
agent-base: 7.1.3
debug: 4.3.5
transitivePeerDependencies:
- supports-color
human-signals@2.1.0: {}
humanize-ms@1.2.1:
@ -7984,6 +8166,8 @@ snapshots:
is-plain-obj@2.1.0: {}
is-potential-custom-element-name@1.0.1: {}
is-retry-allowed@2.2.0: {}
is-stream@2.0.1: {}
@ -8400,6 +8584,34 @@ snapshots:
jsbn@1.1.0: {}
jsdom@26.0.0:
dependencies:
cssstyle: 4.2.1
data-urls: 5.0.0
decimal.js: 10.5.0
form-data: 4.0.1
html-encoding-sniffer: 4.0.0
http-proxy-agent: 7.0.2
https-proxy-agent: 7.0.6
is-potential-custom-element-name: 1.0.1
nwsapi: 2.2.16
parse5: 7.2.1
rrweb-cssom: 0.8.0
saxes: 6.0.0
symbol-tree: 3.2.4
tough-cookie: 5.1.1
w3c-xmlserializer: 5.0.0
webidl-conversions: 7.0.0
whatwg-encoding: 3.1.1
whatwg-mimetype: 4.0.0
whatwg-url: 14.1.1
ws: 8.18.0
xml-name-validator: 5.0.0
transitivePeerDependencies:
- bufferutil
- supports-color
- utf-8-validate
jsesc@2.5.2: {}
json-parse-even-better-errors@2.3.1: {}
@ -8435,17 +8647,17 @@ snapshots:
kuler@2.0.0: {}
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
dependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
binary-extensions: 2.3.0
js-tiktoken: 1.0.12
js-yaml: 4.1.0
jsonpointer: 5.0.1
langchainhub: 0.0.11
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
langsmith: 0.1.34(7lljbsleilzgkaubvlq4ipicvq)
ml-distance: 4.0.1
openapi-types: 12.1.3
p-retry: 4.6.2
@ -8463,6 +8675,7 @@ snapshots:
handlebars: 4.7.8
html-to-text: 9.0.5
ioredis: 5.4.1
jsdom: 26.0.0
mammoth: 1.7.2
mongodb: 6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
pdf-parse: 1.1.1
@ -8475,7 +8688,7 @@ snapshots:
langchainhub@0.0.11: {}
langsmith@0.1.34(npkyd6f7wyl3urgrzoxaktl5a4):
langsmith@0.1.34(7lljbsleilzgkaubvlq4ipicvq):
dependencies:
'@types/uuid': 9.0.8
commander: 10.0.1
@ -8484,8 +8697,8 @@ snapshots:
p-retry: 4.6.2
uuid: 9.0.1
optionalDependencies:
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
languagedetect@2.0.0: {}
@ -8554,6 +8767,8 @@ snapshots:
lru-cache@10.3.0: {}
lru-cache@10.4.3: {}
lru-cache@5.1.1:
dependencies:
yallist: 3.1.1
@ -8849,6 +9064,8 @@ snapshots:
num-sort@2.1.0: {}
nwsapi@2.2.16: {}
object-assign@4.1.1: {}
object-inspect@1.13.1: {}
@ -8979,6 +9196,10 @@ snapshots:
dependencies:
entities: 4.5.0
parse5@7.2.1:
dependencies:
entities: 4.5.0
parseley@0.12.1:
dependencies:
leac: 0.6.0
@ -9321,6 +9542,8 @@ snapshots:
robots-parser@3.0.1: {}
rrweb-cssom@0.8.0: {}
rusha@0.8.14: {}
safe-buffer@5.1.2: {}
@ -9333,6 +9556,10 @@ snapshots:
sax@1.4.1: {}
saxes@6.0.0:
dependencies:
xmlchars: 2.2.0
scheduler@0.23.2:
dependencies:
loose-envify: 1.4.0
@ -9583,6 +9810,8 @@ snapshots:
sylvester@0.0.12: {}
symbol-tree@3.2.4: {}
systeminformation@5.22.11: {}
tar-fs@3.0.5:
@ -9640,12 +9869,20 @@ snapshots:
touch@3.1.1: {}
tough-cookie@5.1.1:
dependencies:
tldts: 6.1.75
tr46@0.0.3: {}
tr46@4.1.1:
dependencies:
punycode: 2.3.1
tr46@5.0.0:
dependencies:
punycode: 2.3.1
triple-beam@1.4.1: {}
ts-jest@29.1.4(@babel/core@7.24.6)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.6))(jest@29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5)))(typescript@5.4.5):
@ -9777,6 +10014,10 @@ snapshots:
vary@1.1.2: {}
w3c-xmlserializer@5.0.0:
dependencies:
xml-name-validator: 5.0.0
walker@1.0.8:
dependencies:
makeerror: 1.0.12
@ -9793,13 +10034,24 @@ snapshots:
dependencies:
iconv-lite: 0.6.3
whatwg-encoding@3.1.1:
dependencies:
iconv-lite: 0.6.3
whatwg-fetch@3.6.20: {}
whatwg-mimetype@4.0.0: {}
whatwg-url@13.0.0:
dependencies:
tr46: 4.1.1
webidl-conversions: 7.0.0
whatwg-url@14.1.1:
dependencies:
tr46: 5.0.0
webidl-conversions: 7.0.0
whatwg-url@5.0.0:
dependencies:
tr46: 0.0.3
@ -9868,6 +10120,8 @@ snapshots:
ws@8.18.0: {}
xml-name-validator@5.0.0: {}
xml2js@0.6.2:
dependencies:
sax: 1.4.1
@ -9877,6 +10131,8 @@ snapshots:
xmlbuilder@11.0.1: {}
xmlchars@2.2.0: {}
xtend@4.0.2: {}
y18n@5.0.8: {}

View File

@ -30,7 +30,7 @@ async function batchScrape(body: BatchScrapeRequestInput): ReturnType<typeof bat
x = await batchScrapeStatus(bss.body.id);
expect(x.statusCode).toBe(200);
expect(typeof x.body.status).toBe("string");
} while (x.body.status !== "completed")
} while (x.body.status === "scraping");
expectBatchScrapeToSucceed(x);
return x;
@ -53,40 +53,51 @@ function expectBatchScrapeToSucceed(response: Awaited<ReturnType<typeof batchScr
}
describe("Batch scrape tests", () => {
describe("JSON format", () => {
it.concurrent("works", async () => {
const response = await batchScrape({
urls: ["http://firecrawl.dev"],
formats: ["json"],
jsonOptions: {
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
schema: {
type: "object",
properties: {
company_mission: {
type: "string",
},
supports_sso: {
type: "boolean",
},
is_open_source: {
type: "boolean",
urls: ["http://firecrawl.dev"]
});
expect(response.body.data[0]).toHaveProperty("markdown");
expect(response.body.data[0].markdown).toContain("Firecrawl");
}, 30000);
if (!process.env.TEST_SUITE_SELF_HOSTED) {
describe("JSON format", () => {
it.concurrent("works", async () => {
const response = await batchScrape({
urls: ["http://firecrawl.dev"],
formats: ["json"],
jsonOptions: {
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
schema: {
type: "object",
properties: {
company_mission: {
type: "string",
},
supports_sso: {
type: "boolean",
},
is_open_source: {
type: "boolean",
},
},
required: ["company_mission", "supports_sso", "is_open_source"],
},
},
required: ["company_mission", "supports_sso", "is_open_source"],
},
},
});
expect(response.body.data[0]).toHaveProperty("json");
expect(response.body.data[0].json).toHaveProperty("company_mission");
expect(typeof response.body.data[0].json.company_mission).toBe("string");
expect(response.body.data[0].json).toHaveProperty("supports_sso");
expect(response.body.data[0].json.supports_sso).toBe(false);
expect(typeof response.body.data[0].json.supports_sso).toBe("boolean");
expect(response.body.data[0].json).toHaveProperty("is_open_source");
expect(response.body.data[0].json.is_open_source).toBe(true);
expect(typeof response.body.data[0].json.is_open_source).toBe("boolean");
}, 30000);
});
expect(response.body.data[0]).toHaveProperty("json");
expect(response.body.data[0].json).toHaveProperty("company_mission");
expect(typeof response.body.data[0].json.company_mission).toBe("string");
expect(response.body.data[0].json).toHaveProperty("supports_sso");
expect(response.body.data[0].json.supports_sso).toBe(false);
expect(typeof response.body.data[0].json.supports_sso).toBe("boolean");
expect(response.body.data[0].json).toHaveProperty("is_open_source");
expect(response.body.data[0].json.is_open_source).toBe(true);
expect(typeof response.body.data[0].json.is_open_source).toBe("boolean");
}, 30000);
});
}
});

View File

@ -30,7 +30,7 @@ async function crawl(body: CrawlRequestInput): ReturnType<typeof crawlStatus> {
x = await crawlStatus(cs.body.id);
expect(x.statusCode).toBe(200);
expect(typeof x.body.status).toBe("string");
} while (x.body.status !== "completed")
} while (x.body.status === "scraping");
expectCrawlToSucceed(x);
return x;

View File

@ -30,7 +30,7 @@ async function extract(body: ExtractRequestInput): Promise<ExtractResponse> {
x = await extractStatus(es.body.id);
expect(x.statusCode).toBe(200);
expect(typeof x.body.status).toBe("string");
} while (x.body.status !== "completed");
} while (x.body.status === "processing");
expectExtractToSucceed(x);
return x.body;
@ -51,31 +51,37 @@ function expectExtractToSucceed(response: Awaited<ReturnType<typeof extractStatu
}
describe("Extract tests", () => {
it.concurrent("works", async () => {
const res = await extract({
urls: ["https://firecrawl.dev"],
schema: {
"type": "object",
"properties": {
"company_mission": {
"type": "string"
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY) {
it.concurrent("works", async () => {
const res = await extract({
urls: ["https://firecrawl.dev"],
schema: {
"type": "object",
"properties": {
"company_mission": {
"type": "string"
},
"is_open_source": {
"type": "boolean"
}
},
"is_open_source": {
"type": "boolean"
}
"required": [
"company_mission",
"is_open_source"
]
},
"required": [
"company_mission",
"is_open_source"
]
},
origin: "api-sdk",
});
origin: "api-sdk",
});
expect(res.data).toHaveProperty("company_mission");
expect(typeof res.data.company_mission).toBe("string")
expect(res.data).toHaveProperty("is_open_source");
expect(typeof res.data.is_open_source).toBe("boolean");
expect(res.data.is_open_source).toBe(true);
}, 60000);
expect(res.data).toHaveProperty("company_mission");
expect(typeof res.data.company_mission).toBe("string")
expect(res.data).toHaveProperty("is_open_source");
expect(typeof res.data.is_open_source).toBe("boolean");
expect(res.data.is_open_source).toBe(true);
}, 60000);
} else {
it.concurrent("dummy test", () => {
expect(true).toBe(true);
});
}
});

View File

@ -41,41 +41,13 @@ describe("Scrape tests", () => {
);
}, 10000);
describe("Ad blocking (f-e dependant)", () => {
it.concurrent("blocks ads by default", async () => {
const response = await scrape({
url: "https://canyoublockit.com/testing/",
});
it("works", async () => {
const response = await scrape({
url: "http://firecrawl.dev"
});
expect(response.markdown).not.toContain(".g.doubleclick.net/");
}, 10000);
it.concurrent("doesn't block ads if explicitly disabled", async () => {
const response = await scrape({
url: "https://canyoublockit.com/testing/",
blockAds: false,
});
expect(response.markdown).toContain(".g.doubleclick.net/");
}, 10000);
});
describe("Location API (f-e dependant)", () => {
it.concurrent("works without specifying an explicit location", async () => {
const response = await scrape({
url: "https://iplocation.com",
});
}, 10000);
it.concurrent("works with country US", async () => {
const response = await scrape({
url: "https://iplocation.com",
location: { country: "US" },
});
expect(response.markdown).toContain("| Country | United States |");
}, 10000);
});
expect(response.markdown).toContain("Firecrawl");
}, 10000);
describe("JSON scrape support", () => {
it.concurrent("returns parseable JSON", async () => {
@ -89,82 +61,122 @@ describe("Scrape tests", () => {
}, 25000); // TODO: mock and shorten
});
describe("Screenshot", () => {
it.concurrent("screenshot format works", async () => {
const response = await scrape({
url: "http://firecrawl.dev",
formats: ["screenshot"]
});
expect(typeof response.screenshot).toBe("string");
}, 15000);
if (!process.env.TEST_SUITE_SELF_HOSTED) {
describe("Ad blocking (f-e dependant)", () => {
it.concurrent("blocks ads by default", async () => {
const response = await scrape({
url: "https://canyoublockit.com/testing/",
});
it.concurrent("screenshot@fullPage format works", async () => {
const response = await scrape({
url: "http://firecrawl.dev",
formats: ["screenshot@fullPage"]
});
expect(typeof response.screenshot).toBe("string");
}, 15000);
});
expect(response.markdown).not.toContain(".g.doubleclick.net/");
}, 10000);
describe("JSON format", () => {
it.concurrent("works", async () => {
const response = await scrape({
url: "http://firecrawl.dev",
formats: ["json"],
jsonOptions: {
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
schema: {
type: "object",
properties: {
company_mission: {
type: "string",
},
supports_sso: {
type: "boolean",
},
is_open_source: {
type: "boolean",
it.concurrent("doesn't block ads if explicitly disabled", async () => {
const response = await scrape({
url: "https://canyoublockit.com/testing/",
blockAds: false,
});
expect(response.markdown).toContain(".g.doubleclick.net/");
}, 10000);
});
describe("Location API (f-e dependant)", () => {
it.concurrent("works without specifying an explicit location", async () => {
const response = await scrape({
url: "https://iplocation.com",
});
}, 10000);
it.concurrent("works with country US", async () => {
const response = await scrape({
url: "https://iplocation.com",
location: { country: "US" },
});
expect(response.markdown).toContain("| Country | United States |");
}, 10000);
});
describe("Screenshot (f-e/sb dependant)", () => {
it.concurrent("screenshot format works", async () => {
const response = await scrape({
url: "http://firecrawl.dev",
formats: ["screenshot"]
});
expect(typeof response.screenshot).toBe("string");
}, 30000);
it.concurrent("screenshot@fullPage format works", async () => {
const response = await scrape({
url: "http://firecrawl.dev",
formats: ["screenshot@fullPage"]
});
expect(typeof response.screenshot).toBe("string");
}, 30000);
});
describe("Proxy API (f-e dependant)", () => {
it.concurrent("undefined works", async () => {
await scrape({
url: "http://firecrawl.dev",
});
}, 15000);
it.concurrent("basic works", async () => {
await scrape({
url: "http://firecrawl.dev",
proxy: "basic",
});
}, 15000);
it.concurrent("stealth works", async () => {
await scrape({
url: "http://firecrawl.dev",
proxy: "stealth",
});
}, 15000);
});
}
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY) {
describe("JSON format", () => {
it.concurrent("works", async () => {
const response = await scrape({
url: "http://firecrawl.dev",
formats: ["json"],
jsonOptions: {
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
schema: {
type: "object",
properties: {
company_mission: {
type: "string",
},
supports_sso: {
type: "boolean",
},
is_open_source: {
type: "boolean",
},
},
required: ["company_mission", "supports_sso", "is_open_source"],
},
required: ["company_mission", "supports_sso", "is_open_source"],
},
},
});
expect(response).toHaveProperty("json");
expect(response.json).toHaveProperty("company_mission");
expect(typeof response.json.company_mission).toBe("string");
expect(response.json).toHaveProperty("supports_sso");
expect(response.json.supports_sso).toBe(false);
expect(typeof response.json.supports_sso).toBe("boolean");
expect(response.json).toHaveProperty("is_open_source");
expect(response.json.is_open_source).toBe(true);
expect(typeof response.json.is_open_source).toBe("boolean");
}, 30000);
});
describe("Proxy API (f-e dependant)", () => {
it.concurrent("undefined works", async () => {
await scrape({
url: "http://firecrawl.dev",
});
}, 15000);
it.concurrent("basic works", async () => {
await scrape({
url: "http://firecrawl.dev",
proxy: "basic",
});
}, 15000);
it.concurrent("stealth works", async () => {
await scrape({
url: "http://firecrawl.dev",
proxy: "stealth",
});
}, 15000);
});
});
expect(response).toHaveProperty("json");
expect(response.json).toHaveProperty("company_mission");
expect(typeof response.json.company_mission).toBe("string");
expect(response.json).toHaveProperty("supports_sso");
expect(response.json.supports_sso).toBe(false);
expect(typeof response.json.supports_sso).toBe("boolean");
expect(response.json).toHaveProperty("is_open_source");
expect(response.json.is_open_source).toBe(true);
expect(typeof response.json.is_open_source).toBe("boolean");
}, 30000);
});
}
});

View File

@ -27,8 +27,8 @@ async function search(body: SearchRequestInput): Promise<Document> {
return raw.body.data;
}
describe("Scrape tests", () => {
it("works", async () => {
describe("Search tests", () => {
it.concurrent("works", async () => {
await search({
query: "firecrawl"
});

View File

@ -13,13 +13,13 @@ import {
getDoneJobsOrderedLength,
isCrawlKickoffFinished,
} from "../../lib/crawl-redis";
import { getScrapeQueue } from "../../services/queue-service";
import { getScrapeQueue, QueueFunction } from "../../services/queue-service";
import {
supabaseGetJobById,
supabaseGetJobsById,
} from "../../lib/supabase-jobs";
import { configDotenv } from "dotenv";
import type { Job, JobState } from "bullmq";
import type { Job, JobState, Queue } from "bullmq";
import { logger } from "../../lib/logger";
import { supabase_service } from "../../services/supabase";
import { getConcurrencyLimitedJobs } from "../../lib/concurrency-limit";
@ -245,7 +245,7 @@ export async function crawlStatusController(
let totalCount = jobIDs.length;
if (totalCount === 0) {
if (totalCount === 0 && process.env.USE_DB_AUTHENTICATION === "true") {
const x = await supabase_service
.from('firecrawl_jobs')
.select('*', { count: 'exact', head: true })

View File

@ -1,7 +1,34 @@
import { Response } from "express";
import { supabaseGetJobsById } from "../../lib/supabase-jobs";
import { RequestWithAuth } from "./types";
import { getExtract, getExtractExpiry } from "../../lib/extract/extract-redis";
import { DBJob, PseudoJob } from "./crawl-status";
import { getExtractQueue } from "../../services/queue-service";
import { ExtractResult } from "../../lib/extract/extraction-service";
import { supabaseGetJobById } from "../../lib/supabase-jobs";
export async function getExtractJob(id: string): Promise<PseudoJob<ExtractResult> | null> {
const [bullJob, dbJob] = await Promise.all([
getExtractQueue().getJob(id),
(process.env.USE_DB_AUTHENTICATION === "true" ? supabaseGetJobById(id) : null) as Promise<DBJob | null>,
]);
if (!bullJob && !dbJob) return null;
const data = dbJob?.docs ?? bullJob?.returnvalue?.data;
const job: PseudoJob<any> = {
id,
getState: bullJob ? bullJob.getState : (() => dbJob!.success ? "completed" : "failed"),
returnvalue: data,
data: {
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
},
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
}
return job;
}
export async function extractStatusController(
req: RequestWithAuth<{ jobId: string }, any, any>,
@ -16,24 +43,29 @@ export async function extractStatusController(
});
}
let data: any[] = [];
let data: ExtractResult | [] = [];
if (extract.status === "completed") {
const jobData = await supabaseGetJobsById([req.params.jobId]);
if (!jobData || jobData.length === 0) {
const jobData = await getExtractJob(req.params.jobId);
if (!jobData) {
return res.status(404).json({
success: false,
error: "Job not found",
});
}
data = jobData[0].docs;
if (!jobData.returnvalue) {
// if we got in the split-second where the redis is updated but the bull isn't
// just pretend it's still processing - MG
extract.status = "processing";
} else {
data = jobData.returnvalue ?? [];
}
}
// console.log(extract.sources);
return res.status(200).json({
success: extract.status === "failed" ? false : true,
data: data,
data,
status: extract.status,
error: extract?.error ?? undefined,
expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),

View File

@ -7,7 +7,6 @@ import {
} from "../build-prompts";
import OpenAI from "openai";
import { logger } from "../../../lib/logger";
const openai = new OpenAI();
export async function analyzeSchemaAndPrompt(
urls: string[],
@ -40,6 +39,7 @@ export async function analyzeSchemaAndPrompt(
const model = "gpt-4o";
const openai = new OpenAI();
const result = await openai.beta.chat.completions.parse({
model: model,
messages: [

View File

@ -48,7 +48,7 @@ interface ExtractServiceOptions {
cacheKey?: string;
}
interface ExtractResult {
export interface ExtractResult {
success: boolean;
data?: any;
extractId: string;

View File

@ -3,10 +3,6 @@ import { Document } from "../../../controllers/v1/types";
import { logger } from "../../logger";
import OpenAI from "openai";
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const pinecone = new Pinecone({
apiKey: process.env.PINECONE_API_KEY!,
});
@ -27,6 +23,10 @@ export interface PageMetadata {
}
async function getEmbedding(text: string) {
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const embedding = await openai.embeddings.create({
model: "text-embedding-3-small",
input: text,

View File

@ -1,9 +1,5 @@
import OpenAI from "openai";
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
interface Message {
role: "system" | "user" | "assistant";
content: string;
@ -19,6 +15,10 @@ interface GenerateTextOptions {
export async function generateText(options: GenerateTextOptions) {
const { model, messages, temperature = 0.7, maxTokens } = options;
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const completion = await openai.chat.completions.create({
model,
messages,

View File

@ -1,14 +1,13 @@
import axios from "axios";
import { configDotenv } from "dotenv";
import OpenAI from "openai";
configDotenv();
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
async function getEmbedding(text: string) {
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const embedding = await openai.embeddings.create({
model: "text-embedding-3-small",
input: text,

View File

@ -10,6 +10,7 @@ import {
UnsupportedFileError,
} from "../../error";
import { MockState } from "../../lib/mock";
import { fireEngineURL } from "./scrape";
const successSchema = z.object({
jobId: z.string(),
@ -85,8 +86,6 @@ export async function fireEngineCheckStatus(
jobId: string,
mock: MockState | null,
): Promise<FireEngineCheckStatusSuccess> {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
const status = await Sentry.startSpan(
{
name: "fire-engine: Check status",

View File

@ -3,14 +3,13 @@ import * as Sentry from "@sentry/node";
import { robustFetch } from "../../lib/fetch";
import { MockState } from "../../lib/mock";
import { fireEngineURL } from "./scrape";
export async function fireEngineDelete(
logger: Logger,
jobId: string,
mock: MockState | null,
) {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
await Sentry.startSpan(
{
name: "fire-engine: Delete scrape",

View File

@ -65,6 +65,8 @@ const schema = z.object({
processing: z.boolean(),
});
export const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL ?? "<mock-fire-engine-url>";
export async function fireEngineScrape<
Engine extends
| FireEngineScrapeRequestChromeCDP
@ -75,10 +77,6 @@ export async function fireEngineScrape<
request: FireEngineScrapeRequestCommon & Engine,
mock: MockState | null,
): Promise<z.infer<typeof schema>> {
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
// TODO: retries
const scrapeRequest = await Sentry.startSpan(
{
name: "fire-engine: Scrape",

View File

@ -310,7 +310,12 @@ export function buildFallbackList(meta: Meta): {
engine: Engine;
unsupportedFeatures: Set<FeatureFlag>;
}[] {
const _engines = [...engines];
const _engines: Engine[] = [
...engines,
// enable fire-engine in self-hosted testing environment when mocks are supplied
...((!useFireEngine && meta.mock !== null) ? ["fire-engine;chrome-cdp", "fire-engine;playwright", "fire-engine;tlsclient"] as Engine[] : [])
];
if (meta.internalOptions.useCache !== true) {
const cacheIndex = _engines.indexOf("cache");

View File

@ -2,6 +2,7 @@ import { Logger } from "winston";
import { z, ZodError } from "zod";
import * as Sentry from "@sentry/node";
import { MockState, saveMock } from "./mock";
import { fireEngineURL } from "../engines/fire-engine/scrape";
export type RobustFetchParams<Schema extends z.Schema<any>> = {
url: string;
@ -126,14 +127,13 @@ export async function robustFetch<
const makeRequestTypeId = (
request: (typeof mock)["requests"][number]["options"],
) => {
let trueUrl = (process.env.FIRE_ENGINE_BETA_URL && request.url.startsWith(process.env.FIRE_ENGINE_BETA_URL))
? request.url.replace(process.env.FIRE_ENGINE_BETA_URL, "<fire-engine>")
let trueUrl = request.url.startsWith(fireEngineURL)
? request.url.replace(fireEngineURL, "<fire-engine>")
: request.url;
let out = trueUrl + ";" + request.method;
if (
process.env.FIRE_ENGINE_BETA_URL &&
(trueUrl.startsWith("<fire-engine>")) &&
trueUrl.startsWith("<fire-engine>") &&
request.method === "POST"
) {
out += "f-e;" + request.body?.engine + ";" + request.body?.url;

View File

@ -29,9 +29,7 @@ export async function fireEngineMap(
});
if (!process.env.FIRE_ENGINE_BETA_URL) {
console.warn(
"(v1/map Beta) Results might differ from cloud offering currently.",
);
logger.warn("(v1/map Beta) Results might differ from cloud offering currently.");
return [];
}

View File

@ -1,21 +1,18 @@
import axios from "axios";
import * as cheerio from "cheerio"; // TODO: rustify
import { JSDOM } from 'jsdom';
import * as querystring from "querystring";
import { SearchResult } from "../../src/lib/entities";
import { logger } from "../../src/lib/logger";
import https from 'https';
const _useragent_list = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0",
];
const getRandomInt = (min: number, max: number): number => Math.floor(Math.random() * (max - min + 1)) + min;
function get_useragent(): string {
return _useragent_list[Math.floor(Math.random() * _useragent_list.length)];
export function get_useragent(): string {
const lynx_version = `Lynx/${getRandomInt(2, 3)}.${getRandomInt(8, 9)}.${getRandomInt(0, 2)}`;
const libwww_version = `libwww-FM/${getRandomInt(2, 3)}.${getRandomInt(13, 15)}`;
const ssl_mm_version = `SSL-MM/${getRandomInt(1, 2)}.${getRandomInt(3, 5)}`;
const openssl_version = `OpenSSL/${getRandomInt(1, 3)}.${getRandomInt(0, 4)}.${getRandomInt(0, 9)}`;
return `${lynx_version} ${libwww_version} ${ssl_mm_version} ${openssl_version}`;
}
async function _req(
@ -31,9 +28,10 @@ async function _req(
) {
const params = {
q: term,
num: results, // Number of results to return
num: results+2, // Number of results to return
hl: lang,
gl: country,
safe: "active",
start: start,
};
if (tbs) {
@ -42,18 +40,25 @@ async function _req(
if (filter) {
params["filter"] = filter;
}
var agent = get_useragent();
try {
const resp = await axios.get("https://www.google.com/search", {
headers: {
"User-Agent": get_useragent(),
"User-Agent": agent,
"Accept": "*/*"
},
params: params,
proxy: proxies,
timeout: timeout,
httpsAgent: new https.Agent({
rejectUnauthorized: true
}),
withCredentials: true
});
return resp;
} catch (error) {
if (error.response && error.response.status === 429) {
logger.warn("Google Search: Too many requests, try again later.", error.response);
throw new Error("Google Search: Too many requests, try again later.");
}
throw error;
@ -100,34 +105,42 @@ export async function googleSearch(
tbs,
filter,
);
const $ = cheerio.load(resp.data);
const result_block = $("div.g");
const dom = new JSDOM(resp.data);
const document = dom.window.document;
const result_block = document.querySelectorAll("div.ezO2md");
let new_results = 0;
let unique = true;
let fetched_results = 0;
const fetched_links = new Set<string>();
if (result_block.length === 0) {
start += 1;
attempts += 1;
} else {
attempts = 0; // Reset attempts if we have results
attempts = 0;
}
result_block.each((index, element) => {
const linkElement = $(element).find("a");
const link =
linkElement && linkElement.attr("href")
? linkElement.attr("href")
: null;
const title = $(element).find("h3");
const ogImage = $(element).find("img").eq(1).attr("src");
const description_box = $(element).find(
"div[style='-webkit-line-clamp:2']",
);
const answerBox = $(element).find(".mod").text();
if (description_box) {
const description = description_box.text();
if (link && title && description) {
start += 1;
results.push(new SearchResult(link, title.text(), description));
for (const result of result_block) {
const link_tag = result.querySelector("a[href]") as HTMLAnchorElement;
const title_tag = link_tag ? link_tag.querySelector("span.CVA68e") : null;
const description_tag = result.querySelector("span.FrIlee");
if (link_tag && title_tag && description_tag) {
const link = decodeURIComponent(link_tag.href.split("&")[0].replace("/url?q=", ""));
if (fetched_links.has(link) && unique) continue;
fetched_links.add(link);
const title = title_tag.textContent || "";
const description = description_tag.textContent || "";
fetched_results++;
new_results++;
if (link && title && description) {
start += 1
results.push(new SearchResult(link, title, description));
}
if (fetched_results >= num_results) break;
}
}
});
}
await new Promise((resolve) =>
setTimeout(resolve, sleep_interval * 1000),
);

View File

@ -64,7 +64,7 @@ export async function search({
timeout,
);
} catch (error) {
logger.error(`Error in search function: ${error}`);
logger.error(`Error in search function`, { error });
return [];
}
}

View File

@ -2,6 +2,8 @@ import { Queue } from "bullmq";
import { logger } from "../lib/logger";
import IORedis from "ioredis";
export type QueueFunction = () => Queue<any, any, string, any, any, string>;
let scrapeQueue: Queue;
let extractQueue: Queue;
let loggingQueue: Queue;