mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 20:58:59 +08:00
SELFHOST FIXES (#1207)
* fix(extract): construct OpenAI on demand Fixes hard-crash if api key not specified in a self-hosting environment. * fix(ci): try sleeping * fix(ci): override host * fix(ci): wait for server to start * Support /extract and /crawl for self-hosted (FIR-1097) (#1137) * Support /extract for self-hosted This returns the job response from redis rather than supabase when db auth is disabled (self hosted mode) * Use getJob for extract and use correct types * fix(v1/crawl-status): only poll DB for total count if DB is enabled * feat(snips): TEST_SUITE_SELF_HOSTED * fix(ci/test-server-self-host): use pr trigger * fix(scrapeURL): f-e mocking in selfhosted env * fix(snips): do not try to eval json format on selfhost * fix(scrapeURL): further f-e mocking * fix(snips): don't timeout on hard fail polling * fix(v1/extract-status): fix-up the db-agnostic impl unfortunately had to separate the functions since the schema was too divergent :( * fix(snips): boost screenshot delay * feat(ci): test with openai * feat(ci): extract, search testing * fix(ci): matrix * fix(ci): bleh * Update: fix default google search (#1174) * fix log title * search should always work * asd * fix ci --------- Co-authored-by: Nick Roth <nlr06886@gmail.com> Co-authored-by: William <sdustusun@gmail.com>
This commit is contained in:
parent
c1ca64fc07
commit
2200f084f3
19
.github/workflows/test-server-self-host.yml
vendored
19
.github/workflows/test-server-self-host.yml
vendored
@ -1,22 +1,35 @@
|
||||
name: Self-hosted Server Test Suite
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- apps/api/**
|
||||
|
||||
env:
|
||||
PORT: 3002
|
||||
REDIS_URL: redis://localhost:6379
|
||||
HOST: 0.0.0.0
|
||||
ENV: ${{ secrets.ENV }}
|
||||
TEST_SUITE_SELF_HOSTED: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Run tests
|
||||
strategy:
|
||||
matrix:
|
||||
openai: [true, false]
|
||||
serper: [true, false]
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
redis:
|
||||
image: redis
|
||||
ports:
|
||||
- 6379:6379
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ matrix.openai == true && secrets.OPENAI_API_KEY || '' }}
|
||||
SERPER_API_KEY: ${{ matrix.serper == true && secrets.SERPER_API_KEY || '' }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install pnpm
|
||||
@ -40,6 +53,8 @@ jobs:
|
||||
run: npm run workers > worker.log 2>&1 &
|
||||
working-directory: ./apps/api
|
||||
id: start_workers
|
||||
- name: Wait for server
|
||||
run: pnpx wait-on tcp:3002 -t 15s
|
||||
- name: Run snippet tests
|
||||
run: |
|
||||
npm run test:snips
|
||||
@ -50,7 +65,7 @@ jobs:
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: always()
|
||||
with:
|
||||
name: Logs
|
||||
name: Logs (openai ${{ matrix.openai }}, serper ${{ matrix.serper }})
|
||||
path: |
|
||||
./apps/api/api.log
|
||||
./apps/api/worker.log
|
||||
|
@ -56,6 +56,7 @@
|
||||
"typescript": "^5.4.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"jsdom": "^26.0.0",
|
||||
"@anthropic-ai/sdk": "^0.24.3",
|
||||
"@apidevtools/json-schema-ref-parser": "^11.7.3",
|
||||
"@brillout/import": "^0.2.2",
|
||||
|
286
apps/api/pnpm-lock.yaml
generated
286
apps/api/pnpm-lock.yaml
generated
@ -125,6 +125,9 @@ importers:
|
||||
joplin-turndown-plugin-gfm:
|
||||
specifier: ^1.0.12
|
||||
version: 1.0.12
|
||||
jsdom:
|
||||
specifier: ^26.0.0
|
||||
version: 26.0.0
|
||||
json-schema-to-zod:
|
||||
specifier: ^2.3.0
|
||||
version: 2.3.0
|
||||
@ -136,7 +139,7 @@ importers:
|
||||
version: 2.9.0
|
||||
langchain:
|
||||
specifier: ^0.2.8
|
||||
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
languagedetect:
|
||||
specifier: ^2.0.0
|
||||
version: 2.0.0
|
||||
@ -332,6 +335,9 @@ packages:
|
||||
resolution: {integrity: sha512-WApSdLdXEBb/1FUPca2lteASewEfpjEYJ8oXZP+0gExK5qSfsEKBKcA+WjY6Q4wvXwyv0+W6Kvc372pSceib9w==}
|
||||
engines: {node: '>= 16'}
|
||||
|
||||
'@asamuzakjp/css-color@2.8.3':
|
||||
resolution: {integrity: sha512-GIc76d9UI1hCvOATjZPyHFmE5qhRccp3/zGfMPapK3jBi+yocEzp6BBB0UnfRYP9NP4FANqUZYb0hnfs3TM3hw==}
|
||||
|
||||
'@aws-crypto/crc32@3.0.0':
|
||||
resolution: {integrity: sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==}
|
||||
|
||||
@ -685,6 +691,34 @@ packages:
|
||||
resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
'@csstools/color-helpers@5.0.1':
|
||||
resolution: {integrity: sha512-MKtmkA0BX87PKaO1NFRTFH+UnkgnmySQOvNxJubsadusqPEC2aJ9MOQiMceZJJ6oitUl/i0L6u0M1IrmAOmgBA==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
'@csstools/css-calc@2.1.1':
|
||||
resolution: {integrity: sha512-rL7kaUnTkL9K+Cvo2pnCieqNpTKgQzy5f+N+5Iuko9HAoasP+xgprVh7KN/MaJVvVL1l0EzQq2MoqBHKSrDrag==}
|
||||
engines: {node: '>=18'}
|
||||
peerDependencies:
|
||||
'@csstools/css-parser-algorithms': ^3.0.4
|
||||
'@csstools/css-tokenizer': ^3.0.3
|
||||
|
||||
'@csstools/css-color-parser@3.0.7':
|
||||
resolution: {integrity: sha512-nkMp2mTICw32uE5NN+EsJ4f5N+IGFeCFu4bGpiKgb2Pq/7J/MpyLBeQ5ry4KKtRFZaYs6sTmcMYrSRIyj5DFKA==}
|
||||
engines: {node: '>=18'}
|
||||
peerDependencies:
|
||||
'@csstools/css-parser-algorithms': ^3.0.4
|
||||
'@csstools/css-tokenizer': ^3.0.3
|
||||
|
||||
'@csstools/css-parser-algorithms@3.0.4':
|
||||
resolution: {integrity: sha512-Up7rBoV77rv29d3uKHUIVubz1BTcgyUK72IvCQAbfbMv584xHcGKCKbWh7i8hPrRJ7qU4Y8IO3IY9m+iTB7P3A==}
|
||||
engines: {node: '>=18'}
|
||||
peerDependencies:
|
||||
'@csstools/css-tokenizer': ^3.0.3
|
||||
|
||||
'@csstools/css-tokenizer@3.0.3':
|
||||
resolution: {integrity: sha512-UJnjoFsmxfKUdNYdWgOB0mWUypuLvAfQPH1+pyvRJs6euowbFkFC6P13w1l8mJyi3vxYMxc9kld5jZEGRQs6bw==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
'@dabh/diagnostics@2.0.3':
|
||||
resolution: {integrity: sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==}
|
||||
|
||||
@ -1715,6 +1749,10 @@ packages:
|
||||
resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
agent-base@7.1.3:
|
||||
resolution: {integrity: sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
agentkeepalive@4.5.0:
|
||||
resolution: {integrity: sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==}
|
||||
engines: {node: '>= 8.0.0'}
|
||||
@ -2141,6 +2179,10 @@ packages:
|
||||
resolution: {integrity: sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==}
|
||||
engines: {node: '>= 6'}
|
||||
|
||||
cssstyle@4.2.1:
|
||||
resolution: {integrity: sha512-9+vem03dMXG7gDmZ62uqmRiMRNtinIZ9ZyuF6BdxzfOD+FdN5hretzynkn0ReS2DO2GSw76RWHs0UmJPI2zUjw==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
csv-parse@5.5.6:
|
||||
resolution: {integrity: sha512-uNpm30m/AGSkLxxy7d9yRXpJQFrZzVWLFBkS+6ngPcZkw/5k3L/jjFuj7tVnEpRn+QgmiXr21nDlhCiUK4ij2A==}
|
||||
|
||||
@ -2152,6 +2194,10 @@ packages:
|
||||
resolution: {integrity: sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
data-urls@5.0.0:
|
||||
resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
date-fns@3.6.0:
|
||||
resolution: {integrity: sha512-fRHTG8g/Gif+kSh50gaGEdToemgfj74aRX3swtiouboip5JDLAyDE9F11nHMIcvOaXeOC6D7SpNhi7uFyB7Uww==}
|
||||
|
||||
@ -2197,6 +2243,9 @@ packages:
|
||||
resolution: {integrity: sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ==}
|
||||
engines: {node: '>=10'}
|
||||
|
||||
decimal.js@10.5.0:
|
||||
resolution: {integrity: sha512-8vDa8Qxvr/+d94hSh5P3IJwI5t8/c0KsMp+g8bNw9cY2icONa5aPfvKeieW1WlG0WQYwwhJ7mjui2xtiePQSXw==}
|
||||
|
||||
dedent@1.5.3:
|
||||
resolution: {integrity: sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==}
|
||||
peerDependencies:
|
||||
@ -2510,6 +2559,10 @@ packages:
|
||||
resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==}
|
||||
engines: {node: '>= 6'}
|
||||
|
||||
form-data@4.0.1:
|
||||
resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==}
|
||||
engines: {node: '>= 6'}
|
||||
|
||||
formdata-node@4.4.1:
|
||||
resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==}
|
||||
engines: {node: '>= 12.20'}
|
||||
@ -2647,6 +2700,10 @@ packages:
|
||||
resolution: {integrity: sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
html-encoding-sniffer@4.0.0:
|
||||
resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
html-escaper@2.0.2:
|
||||
resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
|
||||
|
||||
@ -2686,6 +2743,10 @@ packages:
|
||||
resolution: {integrity: sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
https-proxy-agent@7.0.6:
|
||||
resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
human-signals@2.1.0:
|
||||
resolution: {integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==}
|
||||
engines: {node: '>=10.17.0'}
|
||||
@ -2798,6 +2859,9 @@ packages:
|
||||
resolution: {integrity: sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
is-potential-custom-element-name@1.0.1:
|
||||
resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==}
|
||||
|
||||
is-retry-allowed@2.2.0:
|
||||
resolution: {integrity: sha512-XVm7LOeLpTW4jV19QSH38vkswxoLud8sQ57YwJVTPWdiaI9I8keEhGFpBlslyVsgdQy4Opg8QOLb8YRgsyZiQg==}
|
||||
engines: {node: '>=10'}
|
||||
@ -3012,6 +3076,15 @@ packages:
|
||||
jsbn@1.1.0:
|
||||
resolution: {integrity: sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==}
|
||||
|
||||
jsdom@26.0.0:
|
||||
resolution: {integrity: sha512-BZYDGVAIriBWTpIxYzrXjv3E/4u8+/pSG5bQdIYCbNCGOvsPkDQfTVLAIXAf9ETdCpduCVTkDe2NNZ8NIwUVzw==}
|
||||
engines: {node: '>=18'}
|
||||
peerDependencies:
|
||||
canvas: ^3.0.0
|
||||
peerDependenciesMeta:
|
||||
canvas:
|
||||
optional: true
|
||||
|
||||
jsesc@2.5.2:
|
||||
resolution: {integrity: sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==}
|
||||
engines: {node: '>=4'}
|
||||
@ -3298,6 +3371,9 @@ packages:
|
||||
resolution: {integrity: sha512-CQl19J/g+Hbjbv4Y3mFNNXFEL/5t/KCg8POCuUqd4rMKjGG+j1ybER83hxV58zL+dFI1PTkt3GNFSHRt+d8qEQ==}
|
||||
engines: {node: 14 || >=16.14}
|
||||
|
||||
lru-cache@10.4.3:
|
||||
resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
|
||||
|
||||
lru-cache@5.1.1:
|
||||
resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==}
|
||||
|
||||
@ -3588,6 +3664,9 @@ packages:
|
||||
resolution: {integrity: sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
nwsapi@2.2.16:
|
||||
resolution: {integrity: sha512-F1I/bimDpj3ncaNDhfyMWuFqmQDBwDB0Fogc2qpL3BWvkQteFD/8BzWuIRl83rq0DXfm8SGt/HFhLXZyljTXcQ==}
|
||||
|
||||
object-assign@4.1.1:
|
||||
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
@ -3697,6 +3776,9 @@ packages:
|
||||
parse5@7.1.2:
|
||||
resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==}
|
||||
|
||||
parse5@7.2.1:
|
||||
resolution: {integrity: sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==}
|
||||
|
||||
parseley@0.12.1:
|
||||
resolution: {integrity: sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==}
|
||||
|
||||
@ -4015,6 +4097,9 @@ packages:
|
||||
resolution: {integrity: sha512-s+pyvQeIKIZ0dx5iJiQk1tPLJAWln39+MI5jtM8wnyws+G5azk+dMnMX0qfbqNetKKNgcWWOdi0sfm+FbQbgdQ==}
|
||||
engines: {node: '>=10.0.0'}
|
||||
|
||||
rrweb-cssom@0.8.0:
|
||||
resolution: {integrity: sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw==}
|
||||
|
||||
rusha@0.8.14:
|
||||
resolution: {integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==}
|
||||
|
||||
@ -4034,6 +4119,10 @@ packages:
|
||||
sax@1.4.1:
|
||||
resolution: {integrity: sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==}
|
||||
|
||||
saxes@6.0.0:
|
||||
resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==}
|
||||
engines: {node: '>=v12.22.7'}
|
||||
|
||||
scheduler@0.23.2:
|
||||
resolution: {integrity: sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==}
|
||||
|
||||
@ -4260,6 +4349,9 @@ packages:
|
||||
resolution: {integrity: sha512-SzRP5LQ6Ts2G5NyAa/jg16s8e3R7rfdFjizy1zeoecYWw+nGL+YA1xZvW/+iJmidBGSdLkuvdwTYEyJEb+EiUw==}
|
||||
engines: {node: '>=0.2.6'}
|
||||
|
||||
symbol-tree@3.2.4:
|
||||
resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==}
|
||||
|
||||
systeminformation@5.22.11:
|
||||
resolution: {integrity: sha512-aLws5yi4KCHTb0BVvbodQY5bY8eW4asMRDTxTW46hqw9lGjACX6TlLdJrkdoHYRB0qs+MekqEq1zG7WDnWE8Ug==}
|
||||
engines: {node: '>=8.0.0'}
|
||||
@ -4315,6 +4407,10 @@ packages:
|
||||
resolution: {integrity: sha512-r0eojU4bI8MnHr8c5bNo7lJDdI2qXlWWJk6a9EAFG7vbhTjElYhBVS3/miuE0uOuoLdb8Mc/rVfsmm6eo5o9GA==}
|
||||
hasBin: true
|
||||
|
||||
tough-cookie@5.1.1:
|
||||
resolution: {integrity: sha512-Ek7HndSVkp10hmHP9V4qZO1u+pn1RU5sI0Fw+jCU3lyvuMZcgqsNgc6CmJJZyByK4Vm/qotGRJlfgAX8q+4JiA==}
|
||||
engines: {node: '>=16'}
|
||||
|
||||
tr46@0.0.3:
|
||||
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
|
||||
|
||||
@ -4322,6 +4418,10 @@ packages:
|
||||
resolution: {integrity: sha512-2lv/66T7e5yNyhAAC4NaKe5nVavzuGJQVVtRYLyQ2OI8tsJ61PMLlelehb0wi2Hx6+hT/OJUWZcw8MjlSRnxvw==}
|
||||
engines: {node: '>=14'}
|
||||
|
||||
tr46@5.0.0:
|
||||
resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
triple-beam@1.4.1:
|
||||
resolution: {integrity: sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==}
|
||||
engines: {node: '>= 14.0.0'}
|
||||
@ -4483,6 +4583,10 @@ packages:
|
||||
resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
w3c-xmlserializer@5.0.0:
|
||||
resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
walker@1.0.8:
|
||||
resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==}
|
||||
|
||||
@ -4505,13 +4609,25 @@ packages:
|
||||
resolution: {integrity: sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
whatwg-encoding@3.1.1:
|
||||
resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
whatwg-fetch@3.6.20:
|
||||
resolution: {integrity: sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==}
|
||||
|
||||
whatwg-mimetype@4.0.0:
|
||||
resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
whatwg-url@13.0.0:
|
||||
resolution: {integrity: sha512-9WWbymnqj57+XEuqADHrCJ2eSXzn8WXIW/YSGaZtb2WKAInQ6CHfaUUcTyyver0p8BDg5StLQq8h1vtZuwmOig==}
|
||||
engines: {node: '>=16'}
|
||||
|
||||
whatwg-url@14.1.1:
|
||||
resolution: {integrity: sha512-mDGf9diDad/giZ/Sm9Xi2YcyzaFpbdLpJPr+E9fSkyQ7KpQD4SdFcugkRQYzhmfI4KeV4Qpnn2sKPdo+kmsgRQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
whatwg-url@5.0.0:
|
||||
resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==}
|
||||
|
||||
@ -4583,6 +4699,10 @@ packages:
|
||||
utf-8-validate:
|
||||
optional: true
|
||||
|
||||
xml-name-validator@5.0.0:
|
||||
resolution: {integrity: sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
xml2js@0.6.2:
|
||||
resolution: {integrity: sha512-T4rieHaC1EXcES0Kxxj4JWgaUQHDk+qwHcYOCFHfiwKz7tOVPLq7Hjq9dM1WCMhylqMEfP7hMcOIChvotiZegA==}
|
||||
engines: {node: '>=4.0.0'}
|
||||
@ -4595,6 +4715,9 @@ packages:
|
||||
resolution: {integrity: sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==}
|
||||
engines: {node: '>=4.0'}
|
||||
|
||||
xmlchars@2.2.0:
|
||||
resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==}
|
||||
|
||||
xtend@4.0.2:
|
||||
resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==}
|
||||
engines: {node: '>=0.4'}
|
||||
@ -4675,6 +4798,14 @@ snapshots:
|
||||
'@types/json-schema': 7.0.15
|
||||
js-yaml: 4.1.0
|
||||
|
||||
'@asamuzakjp/css-color@2.8.3':
|
||||
dependencies:
|
||||
'@csstools/css-calc': 2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-color-parser': 3.0.7(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-tokenizer': 3.0.3
|
||||
lru-cache: 10.4.3
|
||||
|
||||
'@aws-crypto/crc32@3.0.0':
|
||||
dependencies:
|
||||
'@aws-crypto/util': 3.0.0
|
||||
@ -5413,6 +5544,26 @@ snapshots:
|
||||
dependencies:
|
||||
'@jridgewell/trace-mapping': 0.3.9
|
||||
|
||||
'@csstools/color-helpers@5.0.1': {}
|
||||
|
||||
'@csstools/css-calc@2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)':
|
||||
dependencies:
|
||||
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-tokenizer': 3.0.3
|
||||
|
||||
'@csstools/css-color-parser@3.0.7(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)':
|
||||
dependencies:
|
||||
'@csstools/color-helpers': 5.0.1
|
||||
'@csstools/css-calc': 2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-tokenizer': 3.0.3
|
||||
|
||||
'@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3)':
|
||||
dependencies:
|
||||
'@csstools/css-tokenizer': 3.0.3
|
||||
|
||||
'@csstools/css-tokenizer@3.0.3': {}
|
||||
|
||||
'@dabh/diagnostics@2.0.3':
|
||||
dependencies:
|
||||
colorspace: 1.1.4
|
||||
@ -5642,13 +5793,13 @@ snapshots:
|
||||
|
||||
'@jsdevtools/ono@7.1.3': {}
|
||||
|
||||
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||
dependencies:
|
||||
ansi-styles: 5.2.0
|
||||
camelcase: 6.3.0
|
||||
decamelize: 1.2.0
|
||||
js-tiktoken: 1.0.12
|
||||
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
|
||||
langsmith: 0.1.34(7lljbsleilzgkaubvlq4ipicvq)
|
||||
ml-distance: 4.0.1
|
||||
mustache: 4.2.0
|
||||
p-queue: 6.6.2
|
||||
@ -5660,9 +5811,9 @@ snapshots:
|
||||
- langchain
|
||||
- openai
|
||||
|
||||
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
js-tiktoken: 1.0.12
|
||||
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||
zod: 3.23.8
|
||||
@ -5671,9 +5822,9 @@ snapshots:
|
||||
- encoding
|
||||
- langchain
|
||||
|
||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
js-tiktoken: 1.0.12
|
||||
transitivePeerDependencies:
|
||||
- langchain
|
||||
@ -6811,6 +6962,8 @@ snapshots:
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
agent-base@7.1.3: {}
|
||||
|
||||
agentkeepalive@4.5.0:
|
||||
dependencies:
|
||||
humanize-ms: 1.2.1
|
||||
@ -7321,12 +7474,22 @@ snapshots:
|
||||
|
||||
css-what@6.1.0: {}
|
||||
|
||||
cssstyle@4.2.1:
|
||||
dependencies:
|
||||
'@asamuzakjp/css-color': 2.8.3
|
||||
rrweb-cssom: 0.8.0
|
||||
|
||||
csv-parse@5.5.6: {}
|
||||
|
||||
data-uri-to-buffer@4.0.1: {}
|
||||
|
||||
data-uri-to-buffer@6.0.2: {}
|
||||
|
||||
data-urls@5.0.0:
|
||||
dependencies:
|
||||
whatwg-mimetype: 4.0.0
|
||||
whatwg-url: 14.1.1
|
||||
|
||||
date-fns@3.6.0: {}
|
||||
|
||||
debug@2.6.9:
|
||||
@ -7351,6 +7514,8 @@ snapshots:
|
||||
|
||||
decamelize@4.0.0: {}
|
||||
|
||||
decimal.js@10.5.0: {}
|
||||
|
||||
dedent@1.5.3: {}
|
||||
|
||||
deepmerge@4.3.1: {}
|
||||
@ -7661,6 +7826,12 @@ snapshots:
|
||||
combined-stream: 1.0.8
|
||||
mime-types: 2.1.35
|
||||
|
||||
form-data@4.0.1:
|
||||
dependencies:
|
||||
asynckit: 0.4.0
|
||||
combined-stream: 1.0.8
|
||||
mime-types: 2.1.35
|
||||
|
||||
formdata-node@4.4.1:
|
||||
dependencies:
|
||||
node-domexception: 1.0.0
|
||||
@ -7795,6 +7966,10 @@ snapshots:
|
||||
dependencies:
|
||||
whatwg-encoding: 2.0.0
|
||||
|
||||
html-encoding-sniffer@4.0.0:
|
||||
dependencies:
|
||||
whatwg-encoding: 3.1.1
|
||||
|
||||
html-escaper@2.0.2: {}
|
||||
|
||||
html-to-text@9.0.5:
|
||||
@ -7875,6 +8050,13 @@ snapshots:
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
https-proxy-agent@7.0.6:
|
||||
dependencies:
|
||||
agent-base: 7.1.3
|
||||
debug: 4.3.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
human-signals@2.1.0: {}
|
||||
|
||||
humanize-ms@1.2.1:
|
||||
@ -7984,6 +8166,8 @@ snapshots:
|
||||
|
||||
is-plain-obj@2.1.0: {}
|
||||
|
||||
is-potential-custom-element-name@1.0.1: {}
|
||||
|
||||
is-retry-allowed@2.2.0: {}
|
||||
|
||||
is-stream@2.0.1: {}
|
||||
@ -8400,6 +8584,34 @@ snapshots:
|
||||
|
||||
jsbn@1.1.0: {}
|
||||
|
||||
jsdom@26.0.0:
|
||||
dependencies:
|
||||
cssstyle: 4.2.1
|
||||
data-urls: 5.0.0
|
||||
decimal.js: 10.5.0
|
||||
form-data: 4.0.1
|
||||
html-encoding-sniffer: 4.0.0
|
||||
http-proxy-agent: 7.0.2
|
||||
https-proxy-agent: 7.0.6
|
||||
is-potential-custom-element-name: 1.0.1
|
||||
nwsapi: 2.2.16
|
||||
parse5: 7.2.1
|
||||
rrweb-cssom: 0.8.0
|
||||
saxes: 6.0.0
|
||||
symbol-tree: 3.2.4
|
||||
tough-cookie: 5.1.1
|
||||
w3c-xmlserializer: 5.0.0
|
||||
webidl-conversions: 7.0.0
|
||||
whatwg-encoding: 3.1.1
|
||||
whatwg-mimetype: 4.0.0
|
||||
whatwg-url: 14.1.1
|
||||
ws: 8.18.0
|
||||
xml-name-validator: 5.0.0
|
||||
transitivePeerDependencies:
|
||||
- bufferutil
|
||||
- supports-color
|
||||
- utf-8-validate
|
||||
|
||||
jsesc@2.5.2: {}
|
||||
|
||||
json-parse-even-better-errors@2.3.1: {}
|
||||
@ -8435,17 +8647,17 @@ snapshots:
|
||||
|
||||
kuler@2.0.0: {}
|
||||
|
||||
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
binary-extensions: 2.3.0
|
||||
js-tiktoken: 1.0.12
|
||||
js-yaml: 4.1.0
|
||||
jsonpointer: 5.0.1
|
||||
langchainhub: 0.0.11
|
||||
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
|
||||
langsmith: 0.1.34(7lljbsleilzgkaubvlq4ipicvq)
|
||||
ml-distance: 4.0.1
|
||||
openapi-types: 12.1.3
|
||||
p-retry: 4.6.2
|
||||
@ -8463,6 +8675,7 @@ snapshots:
|
||||
handlebars: 4.7.8
|
||||
html-to-text: 9.0.5
|
||||
ioredis: 5.4.1
|
||||
jsdom: 26.0.0
|
||||
mammoth: 1.7.2
|
||||
mongodb: 6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
|
||||
pdf-parse: 1.1.1
|
||||
@ -8475,7 +8688,7 @@ snapshots:
|
||||
|
||||
langchainhub@0.0.11: {}
|
||||
|
||||
langsmith@0.1.34(npkyd6f7wyl3urgrzoxaktl5a4):
|
||||
langsmith@0.1.34(7lljbsleilzgkaubvlq4ipicvq):
|
||||
dependencies:
|
||||
'@types/uuid': 9.0.8
|
||||
commander: 10.0.1
|
||||
@ -8484,8 +8697,8 @@ snapshots:
|
||||
p-retry: 4.6.2
|
||||
uuid: 9.0.1
|
||||
optionalDependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||
|
||||
languagedetect@2.0.0: {}
|
||||
@ -8554,6 +8767,8 @@ snapshots:
|
||||
|
||||
lru-cache@10.3.0: {}
|
||||
|
||||
lru-cache@10.4.3: {}
|
||||
|
||||
lru-cache@5.1.1:
|
||||
dependencies:
|
||||
yallist: 3.1.1
|
||||
@ -8849,6 +9064,8 @@ snapshots:
|
||||
|
||||
num-sort@2.1.0: {}
|
||||
|
||||
nwsapi@2.2.16: {}
|
||||
|
||||
object-assign@4.1.1: {}
|
||||
|
||||
object-inspect@1.13.1: {}
|
||||
@ -8979,6 +9196,10 @@ snapshots:
|
||||
dependencies:
|
||||
entities: 4.5.0
|
||||
|
||||
parse5@7.2.1:
|
||||
dependencies:
|
||||
entities: 4.5.0
|
||||
|
||||
parseley@0.12.1:
|
||||
dependencies:
|
||||
leac: 0.6.0
|
||||
@ -9321,6 +9542,8 @@ snapshots:
|
||||
|
||||
robots-parser@3.0.1: {}
|
||||
|
||||
rrweb-cssom@0.8.0: {}
|
||||
|
||||
rusha@0.8.14: {}
|
||||
|
||||
safe-buffer@5.1.2: {}
|
||||
@ -9333,6 +9556,10 @@ snapshots:
|
||||
|
||||
sax@1.4.1: {}
|
||||
|
||||
saxes@6.0.0:
|
||||
dependencies:
|
||||
xmlchars: 2.2.0
|
||||
|
||||
scheduler@0.23.2:
|
||||
dependencies:
|
||||
loose-envify: 1.4.0
|
||||
@ -9583,6 +9810,8 @@ snapshots:
|
||||
|
||||
sylvester@0.0.12: {}
|
||||
|
||||
symbol-tree@3.2.4: {}
|
||||
|
||||
systeminformation@5.22.11: {}
|
||||
|
||||
tar-fs@3.0.5:
|
||||
@ -9640,12 +9869,20 @@ snapshots:
|
||||
|
||||
touch@3.1.1: {}
|
||||
|
||||
tough-cookie@5.1.1:
|
||||
dependencies:
|
||||
tldts: 6.1.75
|
||||
|
||||
tr46@0.0.3: {}
|
||||
|
||||
tr46@4.1.1:
|
||||
dependencies:
|
||||
punycode: 2.3.1
|
||||
|
||||
tr46@5.0.0:
|
||||
dependencies:
|
||||
punycode: 2.3.1
|
||||
|
||||
triple-beam@1.4.1: {}
|
||||
|
||||
ts-jest@29.1.4(@babel/core@7.24.6)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.6))(jest@29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5)))(typescript@5.4.5):
|
||||
@ -9777,6 +10014,10 @@ snapshots:
|
||||
|
||||
vary@1.1.2: {}
|
||||
|
||||
w3c-xmlserializer@5.0.0:
|
||||
dependencies:
|
||||
xml-name-validator: 5.0.0
|
||||
|
||||
walker@1.0.8:
|
||||
dependencies:
|
||||
makeerror: 1.0.12
|
||||
@ -9793,13 +10034,24 @@ snapshots:
|
||||
dependencies:
|
||||
iconv-lite: 0.6.3
|
||||
|
||||
whatwg-encoding@3.1.1:
|
||||
dependencies:
|
||||
iconv-lite: 0.6.3
|
||||
|
||||
whatwg-fetch@3.6.20: {}
|
||||
|
||||
whatwg-mimetype@4.0.0: {}
|
||||
|
||||
whatwg-url@13.0.0:
|
||||
dependencies:
|
||||
tr46: 4.1.1
|
||||
webidl-conversions: 7.0.0
|
||||
|
||||
whatwg-url@14.1.1:
|
||||
dependencies:
|
||||
tr46: 5.0.0
|
||||
webidl-conversions: 7.0.0
|
||||
|
||||
whatwg-url@5.0.0:
|
||||
dependencies:
|
||||
tr46: 0.0.3
|
||||
@ -9868,6 +10120,8 @@ snapshots:
|
||||
|
||||
ws@8.18.0: {}
|
||||
|
||||
xml-name-validator@5.0.0: {}
|
||||
|
||||
xml2js@0.6.2:
|
||||
dependencies:
|
||||
sax: 1.4.1
|
||||
@ -9877,6 +10131,8 @@ snapshots:
|
||||
|
||||
xmlbuilder@11.0.1: {}
|
||||
|
||||
xmlchars@2.2.0: {}
|
||||
|
||||
xtend@4.0.2: {}
|
||||
|
||||
y18n@5.0.8: {}
|
||||
|
@ -30,7 +30,7 @@ async function batchScrape(body: BatchScrapeRequestInput): ReturnType<typeof bat
|
||||
x = await batchScrapeStatus(bss.body.id);
|
||||
expect(x.statusCode).toBe(200);
|
||||
expect(typeof x.body.status).toBe("string");
|
||||
} while (x.body.status !== "completed")
|
||||
} while (x.body.status === "scraping");
|
||||
|
||||
expectBatchScrapeToSucceed(x);
|
||||
return x;
|
||||
@ -53,40 +53,51 @@ function expectBatchScrapeToSucceed(response: Awaited<ReturnType<typeof batchScr
|
||||
}
|
||||
|
||||
describe("Batch scrape tests", () => {
|
||||
describe("JSON format", () => {
|
||||
it.concurrent("works", async () => {
|
||||
const response = await batchScrape({
|
||||
urls: ["http://firecrawl.dev"],
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
company_mission: {
|
||||
type: "string",
|
||||
},
|
||||
supports_sso: {
|
||||
type: "boolean",
|
||||
},
|
||||
is_open_source: {
|
||||
type: "boolean",
|
||||
urls: ["http://firecrawl.dev"]
|
||||
});
|
||||
|
||||
expect(response.body.data[0]).toHaveProperty("markdown");
|
||||
expect(response.body.data[0].markdown).toContain("Firecrawl");
|
||||
}, 30000);
|
||||
|
||||
if (!process.env.TEST_SUITE_SELF_HOSTED) {
|
||||
describe("JSON format", () => {
|
||||
it.concurrent("works", async () => {
|
||||
const response = await batchScrape({
|
||||
urls: ["http://firecrawl.dev"],
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
company_mission: {
|
||||
type: "string",
|
||||
},
|
||||
supports_sso: {
|
||||
type: "boolean",
|
||||
},
|
||||
is_open_source: {
|
||||
type: "boolean",
|
||||
},
|
||||
},
|
||||
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||
},
|
||||
},
|
||||
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(response.body.data[0]).toHaveProperty("json");
|
||||
expect(response.body.data[0].json).toHaveProperty("company_mission");
|
||||
expect(typeof response.body.data[0].json.company_mission).toBe("string");
|
||||
expect(response.body.data[0].json).toHaveProperty("supports_sso");
|
||||
expect(response.body.data[0].json.supports_sso).toBe(false);
|
||||
expect(typeof response.body.data[0].json.supports_sso).toBe("boolean");
|
||||
expect(response.body.data[0].json).toHaveProperty("is_open_source");
|
||||
expect(response.body.data[0].json.is_open_source).toBe(true);
|
||||
expect(typeof response.body.data[0].json.is_open_source).toBe("boolean");
|
||||
}, 30000);
|
||||
});
|
||||
|
||||
expect(response.body.data[0]).toHaveProperty("json");
|
||||
expect(response.body.data[0].json).toHaveProperty("company_mission");
|
||||
expect(typeof response.body.data[0].json.company_mission).toBe("string");
|
||||
expect(response.body.data[0].json).toHaveProperty("supports_sso");
|
||||
expect(response.body.data[0].json.supports_sso).toBe(false);
|
||||
expect(typeof response.body.data[0].json.supports_sso).toBe("boolean");
|
||||
expect(response.body.data[0].json).toHaveProperty("is_open_source");
|
||||
expect(response.body.data[0].json.is_open_source).toBe(true);
|
||||
expect(typeof response.body.data[0].json.is_open_source).toBe("boolean");
|
||||
}, 30000);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
@ -30,7 +30,7 @@ async function crawl(body: CrawlRequestInput): ReturnType<typeof crawlStatus> {
|
||||
x = await crawlStatus(cs.body.id);
|
||||
expect(x.statusCode).toBe(200);
|
||||
expect(typeof x.body.status).toBe("string");
|
||||
} while (x.body.status !== "completed")
|
||||
} while (x.body.status === "scraping");
|
||||
|
||||
expectCrawlToSucceed(x);
|
||||
return x;
|
||||
|
@ -30,7 +30,7 @@ async function extract(body: ExtractRequestInput): Promise<ExtractResponse> {
|
||||
x = await extractStatus(es.body.id);
|
||||
expect(x.statusCode).toBe(200);
|
||||
expect(typeof x.body.status).toBe("string");
|
||||
} while (x.body.status !== "completed");
|
||||
} while (x.body.status === "processing");
|
||||
|
||||
expectExtractToSucceed(x);
|
||||
return x.body;
|
||||
@ -51,31 +51,37 @@ function expectExtractToSucceed(response: Awaited<ReturnType<typeof extractStatu
|
||||
}
|
||||
|
||||
describe("Extract tests", () => {
|
||||
it.concurrent("works", async () => {
|
||||
const res = await extract({
|
||||
urls: ["https://firecrawl.dev"],
|
||||
schema: {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"company_mission": {
|
||||
"type": "string"
|
||||
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY) {
|
||||
it.concurrent("works", async () => {
|
||||
const res = await extract({
|
||||
urls: ["https://firecrawl.dev"],
|
||||
schema: {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"company_mission": {
|
||||
"type": "string"
|
||||
},
|
||||
"is_open_source": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"is_open_source": {
|
||||
"type": "boolean"
|
||||
}
|
||||
"required": [
|
||||
"company_mission",
|
||||
"is_open_source"
|
||||
]
|
||||
},
|
||||
"required": [
|
||||
"company_mission",
|
||||
"is_open_source"
|
||||
]
|
||||
},
|
||||
origin: "api-sdk",
|
||||
});
|
||||
origin: "api-sdk",
|
||||
});
|
||||
|
||||
expect(res.data).toHaveProperty("company_mission");
|
||||
expect(typeof res.data.company_mission).toBe("string")
|
||||
expect(res.data).toHaveProperty("is_open_source");
|
||||
expect(typeof res.data.is_open_source).toBe("boolean");
|
||||
expect(res.data.is_open_source).toBe(true);
|
||||
}, 60000);
|
||||
expect(res.data).toHaveProperty("company_mission");
|
||||
expect(typeof res.data.company_mission).toBe("string")
|
||||
expect(res.data).toHaveProperty("is_open_source");
|
||||
expect(typeof res.data.is_open_source).toBe("boolean");
|
||||
expect(res.data.is_open_source).toBe(true);
|
||||
}, 60000);
|
||||
} else {
|
||||
it.concurrent("dummy test", () => {
|
||||
expect(true).toBe(true);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
@ -41,41 +41,13 @@ describe("Scrape tests", () => {
|
||||
);
|
||||
}, 10000);
|
||||
|
||||
describe("Ad blocking (f-e dependant)", () => {
|
||||
it.concurrent("blocks ads by default", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://canyoublockit.com/testing/",
|
||||
});
|
||||
it("works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev"
|
||||
});
|
||||
|
||||
expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||
}, 10000);
|
||||
|
||||
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://canyoublockit.com/testing/",
|
||||
blockAds: false,
|
||||
});
|
||||
|
||||
expect(response.markdown).toContain(".g.doubleclick.net/");
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe("Location API (f-e dependant)", () => {
|
||||
it.concurrent("works without specifying an explicit location", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
});
|
||||
}, 10000);
|
||||
|
||||
it.concurrent("works with country US", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
location: { country: "US" },
|
||||
});
|
||||
|
||||
expect(response.markdown).toContain("| Country | United States |");
|
||||
}, 10000);
|
||||
});
|
||||
expect(response.markdown).toContain("Firecrawl");
|
||||
}, 10000);
|
||||
|
||||
describe("JSON scrape support", () => {
|
||||
it.concurrent("returns parseable JSON", async () => {
|
||||
@ -89,82 +61,122 @@ describe("Scrape tests", () => {
|
||||
}, 25000); // TODO: mock and shorten
|
||||
});
|
||||
|
||||
describe("Screenshot", () => {
|
||||
it.concurrent("screenshot format works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["screenshot"]
|
||||
});
|
||||
|
||||
expect(typeof response.screenshot).toBe("string");
|
||||
}, 15000);
|
||||
if (!process.env.TEST_SUITE_SELF_HOSTED) {
|
||||
describe("Ad blocking (f-e dependant)", () => {
|
||||
it.concurrent("blocks ads by default", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://canyoublockit.com/testing/",
|
||||
});
|
||||
|
||||
it.concurrent("screenshot@fullPage format works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["screenshot@fullPage"]
|
||||
});
|
||||
|
||||
expect(typeof response.screenshot).toBe("string");
|
||||
}, 15000);
|
||||
});
|
||||
expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||
}, 10000);
|
||||
|
||||
describe("JSON format", () => {
|
||||
it.concurrent("works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
company_mission: {
|
||||
type: "string",
|
||||
},
|
||||
supports_sso: {
|
||||
type: "boolean",
|
||||
},
|
||||
is_open_source: {
|
||||
type: "boolean",
|
||||
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://canyoublockit.com/testing/",
|
||||
blockAds: false,
|
||||
});
|
||||
|
||||
expect(response.markdown).toContain(".g.doubleclick.net/");
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe("Location API (f-e dependant)", () => {
|
||||
it.concurrent("works without specifying an explicit location", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
});
|
||||
}, 10000);
|
||||
|
||||
it.concurrent("works with country US", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
location: { country: "US" },
|
||||
});
|
||||
|
||||
expect(response.markdown).toContain("| Country | United States |");
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe("Screenshot (f-e/sb dependant)", () => {
|
||||
it.concurrent("screenshot format works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["screenshot"]
|
||||
});
|
||||
|
||||
expect(typeof response.screenshot).toBe("string");
|
||||
}, 30000);
|
||||
|
||||
it.concurrent("screenshot@fullPage format works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["screenshot@fullPage"]
|
||||
});
|
||||
|
||||
expect(typeof response.screenshot).toBe("string");
|
||||
}, 30000);
|
||||
});
|
||||
|
||||
describe("Proxy API (f-e dependant)", () => {
|
||||
it.concurrent("undefined works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
});
|
||||
}, 15000);
|
||||
|
||||
it.concurrent("basic works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
proxy: "basic",
|
||||
});
|
||||
}, 15000);
|
||||
|
||||
it.concurrent("stealth works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
proxy: "stealth",
|
||||
});
|
||||
}, 15000);
|
||||
});
|
||||
}
|
||||
|
||||
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY) {
|
||||
describe("JSON format", () => {
|
||||
it.concurrent("works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
company_mission: {
|
||||
type: "string",
|
||||
},
|
||||
supports_sso: {
|
||||
type: "boolean",
|
||||
},
|
||||
is_open_source: {
|
||||
type: "boolean",
|
||||
},
|
||||
},
|
||||
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||
},
|
||||
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).toHaveProperty("json");
|
||||
expect(response.json).toHaveProperty("company_mission");
|
||||
expect(typeof response.json.company_mission).toBe("string");
|
||||
expect(response.json).toHaveProperty("supports_sso");
|
||||
expect(response.json.supports_sso).toBe(false);
|
||||
expect(typeof response.json.supports_sso).toBe("boolean");
|
||||
expect(response.json).toHaveProperty("is_open_source");
|
||||
expect(response.json.is_open_source).toBe(true);
|
||||
expect(typeof response.json.is_open_source).toBe("boolean");
|
||||
}, 30000);
|
||||
});
|
||||
|
||||
describe("Proxy API (f-e dependant)", () => {
|
||||
it.concurrent("undefined works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
});
|
||||
}, 15000);
|
||||
|
||||
it.concurrent("basic works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
proxy: "basic",
|
||||
});
|
||||
}, 15000);
|
||||
|
||||
it.concurrent("stealth works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
proxy: "stealth",
|
||||
});
|
||||
}, 15000);
|
||||
});
|
||||
});
|
||||
|
||||
expect(response).toHaveProperty("json");
|
||||
expect(response.json).toHaveProperty("company_mission");
|
||||
expect(typeof response.json.company_mission).toBe("string");
|
||||
expect(response.json).toHaveProperty("supports_sso");
|
||||
expect(response.json.supports_sso).toBe(false);
|
||||
expect(typeof response.json.supports_sso).toBe("boolean");
|
||||
expect(response.json).toHaveProperty("is_open_source");
|
||||
expect(response.json.is_open_source).toBe(true);
|
||||
expect(typeof response.json.is_open_source).toBe("boolean");
|
||||
}, 30000);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
@ -27,8 +27,8 @@ async function search(body: SearchRequestInput): Promise<Document> {
|
||||
return raw.body.data;
|
||||
}
|
||||
|
||||
describe("Scrape tests", () => {
|
||||
it("works", async () => {
|
||||
describe("Search tests", () => {
|
||||
it.concurrent("works", async () => {
|
||||
await search({
|
||||
query: "firecrawl"
|
||||
});
|
||||
|
@ -13,13 +13,13 @@ import {
|
||||
getDoneJobsOrderedLength,
|
||||
isCrawlKickoffFinished,
|
||||
} from "../../lib/crawl-redis";
|
||||
import { getScrapeQueue } from "../../services/queue-service";
|
||||
import { getScrapeQueue, QueueFunction } from "../../services/queue-service";
|
||||
import {
|
||||
supabaseGetJobById,
|
||||
supabaseGetJobsById,
|
||||
} from "../../lib/supabase-jobs";
|
||||
import { configDotenv } from "dotenv";
|
||||
import type { Job, JobState } from "bullmq";
|
||||
import type { Job, JobState, Queue } from "bullmq";
|
||||
import { logger } from "../../lib/logger";
|
||||
import { supabase_service } from "../../services/supabase";
|
||||
import { getConcurrencyLimitedJobs } from "../../lib/concurrency-limit";
|
||||
@ -245,7 +245,7 @@ export async function crawlStatusController(
|
||||
|
||||
let totalCount = jobIDs.length;
|
||||
|
||||
if (totalCount === 0) {
|
||||
if (totalCount === 0 && process.env.USE_DB_AUTHENTICATION === "true") {
|
||||
const x = await supabase_service
|
||||
.from('firecrawl_jobs')
|
||||
.select('*', { count: 'exact', head: true })
|
||||
|
@ -1,7 +1,34 @@
|
||||
import { Response } from "express";
|
||||
import { supabaseGetJobsById } from "../../lib/supabase-jobs";
|
||||
import { RequestWithAuth } from "./types";
|
||||
import { getExtract, getExtractExpiry } from "../../lib/extract/extract-redis";
|
||||
import { DBJob, PseudoJob } from "./crawl-status";
|
||||
import { getExtractQueue } from "../../services/queue-service";
|
||||
import { ExtractResult } from "../../lib/extract/extraction-service";
|
||||
import { supabaseGetJobById } from "../../lib/supabase-jobs";
|
||||
|
||||
export async function getExtractJob(id: string): Promise<PseudoJob<ExtractResult> | null> {
|
||||
const [bullJob, dbJob] = await Promise.all([
|
||||
getExtractQueue().getJob(id),
|
||||
(process.env.USE_DB_AUTHENTICATION === "true" ? supabaseGetJobById(id) : null) as Promise<DBJob | null>,
|
||||
]);
|
||||
|
||||
if (!bullJob && !dbJob) return null;
|
||||
|
||||
const data = dbJob?.docs ?? bullJob?.returnvalue?.data;
|
||||
|
||||
const job: PseudoJob<any> = {
|
||||
id,
|
||||
getState: bullJob ? bullJob.getState : (() => dbJob!.success ? "completed" : "failed"),
|
||||
returnvalue: data,
|
||||
data: {
|
||||
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
|
||||
},
|
||||
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
|
||||
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
|
||||
}
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
export async function extractStatusController(
|
||||
req: RequestWithAuth<{ jobId: string }, any, any>,
|
||||
@ -16,24 +43,29 @@ export async function extractStatusController(
|
||||
});
|
||||
}
|
||||
|
||||
let data: any[] = [];
|
||||
let data: ExtractResult | [] = [];
|
||||
|
||||
if (extract.status === "completed") {
|
||||
const jobData = await supabaseGetJobsById([req.params.jobId]);
|
||||
if (!jobData || jobData.length === 0) {
|
||||
const jobData = await getExtractJob(req.params.jobId);
|
||||
if (!jobData) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: "Job not found",
|
||||
});
|
||||
}
|
||||
|
||||
data = jobData[0].docs;
|
||||
if (!jobData.returnvalue) {
|
||||
// if we got in the split-second where the redis is updated but the bull isn't
|
||||
// just pretend it's still processing - MG
|
||||
extract.status = "processing";
|
||||
} else {
|
||||
data = jobData.returnvalue ?? [];
|
||||
}
|
||||
}
|
||||
|
||||
// console.log(extract.sources);
|
||||
return res.status(200).json({
|
||||
success: extract.status === "failed" ? false : true,
|
||||
data: data,
|
||||
data,
|
||||
status: extract.status,
|
||||
error: extract?.error ?? undefined,
|
||||
expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),
|
||||
|
@ -7,7 +7,6 @@ import {
|
||||
} from "../build-prompts";
|
||||
import OpenAI from "openai";
|
||||
import { logger } from "../../../lib/logger";
|
||||
const openai = new OpenAI();
|
||||
|
||||
export async function analyzeSchemaAndPrompt(
|
||||
urls: string[],
|
||||
@ -40,6 +39,7 @@ export async function analyzeSchemaAndPrompt(
|
||||
|
||||
const model = "gpt-4o";
|
||||
|
||||
const openai = new OpenAI();
|
||||
const result = await openai.beta.chat.completions.parse({
|
||||
model: model,
|
||||
messages: [
|
||||
|
@ -48,7 +48,7 @@ interface ExtractServiceOptions {
|
||||
cacheKey?: string;
|
||||
}
|
||||
|
||||
interface ExtractResult {
|
||||
export interface ExtractResult {
|
||||
success: boolean;
|
||||
data?: any;
|
||||
extractId: string;
|
||||
|
@ -3,10 +3,6 @@ import { Document } from "../../../controllers/v1/types";
|
||||
import { logger } from "../../logger";
|
||||
import OpenAI from "openai";
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
const pinecone = new Pinecone({
|
||||
apiKey: process.env.PINECONE_API_KEY!,
|
||||
});
|
||||
@ -27,6 +23,10 @@ export interface PageMetadata {
|
||||
}
|
||||
|
||||
async function getEmbedding(text: string) {
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
const embedding = await openai.embeddings.create({
|
||||
model: "text-embedding-3-small",
|
||||
input: text,
|
||||
|
@ -1,9 +1,5 @@
|
||||
import OpenAI from "openai";
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
interface Message {
|
||||
role: "system" | "user" | "assistant";
|
||||
content: string;
|
||||
@ -19,6 +15,10 @@ interface GenerateTextOptions {
|
||||
export async function generateText(options: GenerateTextOptions) {
|
||||
const { model, messages, temperature = 0.7, maxTokens } = options;
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
const completion = await openai.chat.completions.create({
|
||||
model,
|
||||
messages,
|
||||
|
@ -1,14 +1,13 @@
|
||||
import axios from "axios";
|
||||
import { configDotenv } from "dotenv";
|
||||
import OpenAI from "openai";
|
||||
|
||||
configDotenv();
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
async function getEmbedding(text: string) {
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
const embedding = await openai.embeddings.create({
|
||||
model: "text-embedding-3-small",
|
||||
input: text,
|
||||
|
@ -10,6 +10,7 @@ import {
|
||||
UnsupportedFileError,
|
||||
} from "../../error";
|
||||
import { MockState } from "../../lib/mock";
|
||||
import { fireEngineURL } from "./scrape";
|
||||
|
||||
const successSchema = z.object({
|
||||
jobId: z.string(),
|
||||
@ -85,8 +86,6 @@ export async function fireEngineCheckStatus(
|
||||
jobId: string,
|
||||
mock: MockState | null,
|
||||
): Promise<FireEngineCheckStatusSuccess> {
|
||||
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
|
||||
|
||||
const status = await Sentry.startSpan(
|
||||
{
|
||||
name: "fire-engine: Check status",
|
||||
|
@ -3,14 +3,13 @@ import * as Sentry from "@sentry/node";
|
||||
|
||||
import { robustFetch } from "../../lib/fetch";
|
||||
import { MockState } from "../../lib/mock";
|
||||
import { fireEngineURL } from "./scrape";
|
||||
|
||||
export async function fireEngineDelete(
|
||||
logger: Logger,
|
||||
jobId: string,
|
||||
mock: MockState | null,
|
||||
) {
|
||||
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
|
||||
|
||||
await Sentry.startSpan(
|
||||
{
|
||||
name: "fire-engine: Delete scrape",
|
||||
|
@ -65,6 +65,8 @@ const schema = z.object({
|
||||
processing: z.boolean(),
|
||||
});
|
||||
|
||||
export const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL ?? "<mock-fire-engine-url>";
|
||||
|
||||
export async function fireEngineScrape<
|
||||
Engine extends
|
||||
| FireEngineScrapeRequestChromeCDP
|
||||
@ -75,10 +77,6 @@ export async function fireEngineScrape<
|
||||
request: FireEngineScrapeRequestCommon & Engine,
|
||||
mock: MockState | null,
|
||||
): Promise<z.infer<typeof schema>> {
|
||||
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
|
||||
|
||||
// TODO: retries
|
||||
|
||||
const scrapeRequest = await Sentry.startSpan(
|
||||
{
|
||||
name: "fire-engine: Scrape",
|
||||
|
@ -310,7 +310,12 @@ export function buildFallbackList(meta: Meta): {
|
||||
engine: Engine;
|
||||
unsupportedFeatures: Set<FeatureFlag>;
|
||||
}[] {
|
||||
const _engines = [...engines];
|
||||
const _engines: Engine[] = [
|
||||
...engines,
|
||||
|
||||
// enable fire-engine in self-hosted testing environment when mocks are supplied
|
||||
...((!useFireEngine && meta.mock !== null) ? ["fire-engine;chrome-cdp", "fire-engine;playwright", "fire-engine;tlsclient"] as Engine[] : [])
|
||||
];
|
||||
|
||||
if (meta.internalOptions.useCache !== true) {
|
||||
const cacheIndex = _engines.indexOf("cache");
|
||||
|
@ -2,6 +2,7 @@ import { Logger } from "winston";
|
||||
import { z, ZodError } from "zod";
|
||||
import * as Sentry from "@sentry/node";
|
||||
import { MockState, saveMock } from "./mock";
|
||||
import { fireEngineURL } from "../engines/fire-engine/scrape";
|
||||
|
||||
export type RobustFetchParams<Schema extends z.Schema<any>> = {
|
||||
url: string;
|
||||
@ -126,14 +127,13 @@ export async function robustFetch<
|
||||
const makeRequestTypeId = (
|
||||
request: (typeof mock)["requests"][number]["options"],
|
||||
) => {
|
||||
let trueUrl = (process.env.FIRE_ENGINE_BETA_URL && request.url.startsWith(process.env.FIRE_ENGINE_BETA_URL))
|
||||
? request.url.replace(process.env.FIRE_ENGINE_BETA_URL, "<fire-engine>")
|
||||
let trueUrl = request.url.startsWith(fireEngineURL)
|
||||
? request.url.replace(fireEngineURL, "<fire-engine>")
|
||||
: request.url;
|
||||
|
||||
let out = trueUrl + ";" + request.method;
|
||||
if (
|
||||
process.env.FIRE_ENGINE_BETA_URL &&
|
||||
(trueUrl.startsWith("<fire-engine>")) &&
|
||||
trueUrl.startsWith("<fire-engine>") &&
|
||||
request.method === "POST"
|
||||
) {
|
||||
out += "f-e;" + request.body?.engine + ";" + request.body?.url;
|
||||
|
@ -29,9 +29,7 @@ export async function fireEngineMap(
|
||||
});
|
||||
|
||||
if (!process.env.FIRE_ENGINE_BETA_URL) {
|
||||
console.warn(
|
||||
"(v1/map Beta) Results might differ from cloud offering currently.",
|
||||
);
|
||||
logger.warn("(v1/map Beta) Results might differ from cloud offering currently.");
|
||||
return [];
|
||||
}
|
||||
|
||||
|
@ -1,21 +1,18 @@
|
||||
import axios from "axios";
|
||||
import * as cheerio from "cheerio"; // TODO: rustify
|
||||
import { JSDOM } from 'jsdom';
|
||||
import * as querystring from "querystring";
|
||||
import { SearchResult } from "../../src/lib/entities";
|
||||
import { logger } from "../../src/lib/logger";
|
||||
import https from 'https';
|
||||
|
||||
const _useragent_list = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0",
|
||||
];
|
||||
const getRandomInt = (min: number, max: number): number => Math.floor(Math.random() * (max - min + 1)) + min;
|
||||
|
||||
function get_useragent(): string {
|
||||
return _useragent_list[Math.floor(Math.random() * _useragent_list.length)];
|
||||
export function get_useragent(): string {
|
||||
const lynx_version = `Lynx/${getRandomInt(2, 3)}.${getRandomInt(8, 9)}.${getRandomInt(0, 2)}`;
|
||||
const libwww_version = `libwww-FM/${getRandomInt(2, 3)}.${getRandomInt(13, 15)}`;
|
||||
const ssl_mm_version = `SSL-MM/${getRandomInt(1, 2)}.${getRandomInt(3, 5)}`;
|
||||
const openssl_version = `OpenSSL/${getRandomInt(1, 3)}.${getRandomInt(0, 4)}.${getRandomInt(0, 9)}`;
|
||||
return `${lynx_version} ${libwww_version} ${ssl_mm_version} ${openssl_version}`;
|
||||
}
|
||||
|
||||
async function _req(
|
||||
@ -31,9 +28,10 @@ async function _req(
|
||||
) {
|
||||
const params = {
|
||||
q: term,
|
||||
num: results, // Number of results to return
|
||||
num: results+2, // Number of results to return
|
||||
hl: lang,
|
||||
gl: country,
|
||||
safe: "active",
|
||||
start: start,
|
||||
};
|
||||
if (tbs) {
|
||||
@ -42,18 +40,25 @@ async function _req(
|
||||
if (filter) {
|
||||
params["filter"] = filter;
|
||||
}
|
||||
var agent = get_useragent();
|
||||
try {
|
||||
const resp = await axios.get("https://www.google.com/search", {
|
||||
headers: {
|
||||
"User-Agent": get_useragent(),
|
||||
"User-Agent": agent,
|
||||
"Accept": "*/*"
|
||||
},
|
||||
params: params,
|
||||
proxy: proxies,
|
||||
timeout: timeout,
|
||||
httpsAgent: new https.Agent({
|
||||
rejectUnauthorized: true
|
||||
}),
|
||||
withCredentials: true
|
||||
});
|
||||
return resp;
|
||||
} catch (error) {
|
||||
if (error.response && error.response.status === 429) {
|
||||
logger.warn("Google Search: Too many requests, try again later.", error.response);
|
||||
throw new Error("Google Search: Too many requests, try again later.");
|
||||
}
|
||||
throw error;
|
||||
@ -100,34 +105,42 @@ export async function googleSearch(
|
||||
tbs,
|
||||
filter,
|
||||
);
|
||||
const $ = cheerio.load(resp.data);
|
||||
const result_block = $("div.g");
|
||||
const dom = new JSDOM(resp.data);
|
||||
const document = dom.window.document;
|
||||
const result_block = document.querySelectorAll("div.ezO2md");
|
||||
let new_results = 0;
|
||||
let unique = true;
|
||||
let fetched_results = 0;
|
||||
|
||||
const fetched_links = new Set<string>();
|
||||
if (result_block.length === 0) {
|
||||
start += 1;
|
||||
attempts += 1;
|
||||
} else {
|
||||
attempts = 0; // Reset attempts if we have results
|
||||
attempts = 0;
|
||||
}
|
||||
result_block.each((index, element) => {
|
||||
const linkElement = $(element).find("a");
|
||||
const link =
|
||||
linkElement && linkElement.attr("href")
|
||||
? linkElement.attr("href")
|
||||
: null;
|
||||
const title = $(element).find("h3");
|
||||
const ogImage = $(element).find("img").eq(1).attr("src");
|
||||
const description_box = $(element).find(
|
||||
"div[style='-webkit-line-clamp:2']",
|
||||
);
|
||||
const answerBox = $(element).find(".mod").text();
|
||||
if (description_box) {
|
||||
const description = description_box.text();
|
||||
if (link && title && description) {
|
||||
start += 1;
|
||||
results.push(new SearchResult(link, title.text(), description));
|
||||
|
||||
for (const result of result_block) {
|
||||
const link_tag = result.querySelector("a[href]") as HTMLAnchorElement;
|
||||
const title_tag = link_tag ? link_tag.querySelector("span.CVA68e") : null;
|
||||
const description_tag = result.querySelector("span.FrIlee");
|
||||
|
||||
if (link_tag && title_tag && description_tag) {
|
||||
const link = decodeURIComponent(link_tag.href.split("&")[0].replace("/url?q=", ""));
|
||||
if (fetched_links.has(link) && unique) continue;
|
||||
fetched_links.add(link);
|
||||
const title = title_tag.textContent || "";
|
||||
const description = description_tag.textContent || "";
|
||||
fetched_results++;
|
||||
new_results++;
|
||||
if (link && title && description) {
|
||||
start += 1
|
||||
results.push(new SearchResult(link, title, description));
|
||||
}
|
||||
if (fetched_results >= num_results) break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, sleep_interval * 1000),
|
||||
);
|
||||
|
@ -64,7 +64,7 @@ export async function search({
|
||||
timeout,
|
||||
);
|
||||
} catch (error) {
|
||||
logger.error(`Error in search function: ${error}`);
|
||||
logger.error(`Error in search function`, { error });
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ import { Queue } from "bullmq";
|
||||
import { logger } from "../lib/logger";
|
||||
import IORedis from "ioredis";
|
||||
|
||||
export type QueueFunction = () => Queue<any, any, string, any, any, string>;
|
||||
|
||||
let scrapeQueue: Queue;
|
||||
let extractQueue: Queue;
|
||||
let loggingQueue: Queue;
|
||||
|
Loading…
x
Reference in New Issue
Block a user