mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 02:35:55 +08:00
Merge branch 'main' of https://github.com/mendableai/firecrawl
This commit is contained in:
commit
2151ca846c
@ -1,20 +0,0 @@
|
|||||||
name: Clean Every 30 Minutes Before 24h Completed Jobs
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: '30 * * * *'
|
|
||||||
|
|
||||||
env:
|
|
||||||
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
clean-jobs:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Send GET request to clean jobs
|
|
||||||
run: |
|
|
||||||
response=$(curl --write-out '%{http_code}' --silent --output /dev/null --max-time 180 https://api.firecrawl.dev/admin/${{ secrets.BULL_AUTH_KEY }}/clean-before-24h-complete-jobs)
|
|
||||||
if [ "$response" -ne 200 ]; then
|
|
||||||
echo "Failed to clean jobs. Response: $response"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "Successfully cleaned jobs. Response: $response"
|
|
3
.github/workflows/deploy-image-staging.yml
vendored
3
.github/workflows/deploy-image-staging.yml
vendored
@ -4,9 +4,6 @@ env:
|
|||||||
DOTNET_VERSION: '6.0.x'
|
DOTNET_VERSION: '6.0.x'
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- mog/webscraper-refactor
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
5
.github/workflows/deploy-image.yml
vendored
5
.github/workflows/deploy-image.yml
vendored
@ -2,12 +2,13 @@ name: Deploy Images to GHCR
|
|||||||
|
|
||||||
env:
|
env:
|
||||||
DOTNET_VERSION: '6.0.x'
|
DOTNET_VERSION: '6.0.x'
|
||||||
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
|
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
|
paths:
|
||||||
|
- apps/api/**
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
@ -29,5 +30,5 @@ jobs:
|
|||||||
|
|
||||||
- name: 'Build Inventory Image'
|
- name: 'Build Inventory Image'
|
||||||
run: |
|
run: |
|
||||||
docker build . --tag ghcr.io/mendableai/firecrawl:latest --secret id=SENTRY_AUTH_TOKEN
|
docker build . --tag ghcr.io/mendableai/firecrawl:latest
|
||||||
docker push ghcr.io/mendableai/firecrawl:latest
|
docker push ghcr.io/mendableai/firecrawl:latest
|
32
.github/workflows/publish-js-sdk.yml
vendored
Normal file
32
.github/workflows/publish-js-sdk.yml
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
name: Publish JS SDK
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths:
|
||||||
|
- apps/js-sdk/firecrawl/package.json
|
||||||
|
|
||||||
|
env:
|
||||||
|
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
publish:
|
||||||
|
name: Publish
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: "20"
|
||||||
|
- name: Authenticate
|
||||||
|
run: echo "//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}" > ~/.npmrc
|
||||||
|
- name: Publish
|
||||||
|
run: |
|
||||||
|
npm publish
|
||||||
|
sed -i 's/"name": "@mendable\/firecrawl-js"/"name": "@mendable\/firecrawl"/g' package.json
|
||||||
|
npm publish
|
||||||
|
sed -i 's/"name": "@mendable\/firecrawl-js"/"name": "firecrawl"/g' package.json
|
||||||
|
npm publish
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
30
.github/workflows/test-js-sdk.yml
vendored
Normal file
30
.github/workflows/test-js-sdk.yml
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
name: JS SDK Test Suite
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths:
|
||||||
|
- apps/js-sdk/firecrawl/**
|
||||||
|
|
||||||
|
env:
|
||||||
|
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: Run tests
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: "20"
|
||||||
|
cache: "npm"
|
||||||
|
cache-dependency-path: './apps/js-sdk/firecrawl/package-lock.json'
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm install
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
||||||
|
- name: Run tests
|
||||||
|
run: npm run test
|
||||||
|
working-directory: ./apps/js-sdk/firecrawl
|
138
.github/workflows/test-server-self-host.yml
vendored
Normal file
138
.github/workflows/test-server-self-host.yml
vendored
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
name: Self-hosted Server Test Suite
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths:
|
||||||
|
- apps/api/**
|
||||||
|
- apps/playwright-service-ts/**
|
||||||
|
|
||||||
|
env:
|
||||||
|
PORT: 3002
|
||||||
|
REDIS_URL: redis://localhost:6379
|
||||||
|
HOST: 0.0.0.0
|
||||||
|
ENV: ${{ secrets.ENV }}
|
||||||
|
TEST_SUITE_SELF_HOSTED: true
|
||||||
|
USE_GO_MARKDOWN_PARSER: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: Run tests
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
ai: ["openai", "no-ai"]
|
||||||
|
search: ["searxng", "google"]
|
||||||
|
engine: ["playwright", "fetch"]
|
||||||
|
proxy: ["proxy", "no-proxy"]
|
||||||
|
fail-fast: false
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
services:
|
||||||
|
redis:
|
||||||
|
image: redis
|
||||||
|
ports:
|
||||||
|
- 6379:6379
|
||||||
|
env:
|
||||||
|
OPENAI_API_KEY: ${{ matrix.ai == 'openai' && secrets.OPENAI_API_KEY || '' }}
|
||||||
|
SEARXNG_ENDPOINT: ${{ matrix.search == 'searxng' && 'http://localhost:3434' || '' }}
|
||||||
|
PLAYWRIGHT_MICROSERVICE_URL: ${{ matrix.engine == 'playwright' && 'http://localhost:3003/scrape' || '' }}
|
||||||
|
PROXY_SERVER: ${{ matrix.proxy == 'proxy' && secrets.PROXY_SERVER || '' }}
|
||||||
|
PROXY_USERNAME: ${{ matrix.proxy == 'proxy' && secrets.PROXY_USERNAME || '' }}
|
||||||
|
PROXY_PASSWORD: ${{ matrix.proxy == 'proxy' && secrets.PROXY_PASSWORD || '' }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Install pnpm
|
||||||
|
uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 10
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: "20"
|
||||||
|
cache: "pnpm"
|
||||||
|
cache-dependency-path: './apps/api/pnpm-lock.yaml'
|
||||||
|
- name: Install dependencies
|
||||||
|
run: pnpm install
|
||||||
|
working-directory: ./apps/api
|
||||||
|
- name: Install Playwright dependencies
|
||||||
|
if: matrix.engine == 'playwright'
|
||||||
|
run: |
|
||||||
|
pnpm install
|
||||||
|
pnpm exec playwright install-deps
|
||||||
|
pnpm exec playwright install
|
||||||
|
working-directory: ./apps/playwright-service-ts
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.19'
|
||||||
|
cache-dependency-path: ./apps/api/sharedLibs/go-html-to-md/go.sum
|
||||||
|
- name: Build go-html-to-md
|
||||||
|
run: |
|
||||||
|
go mod tidy
|
||||||
|
go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go
|
||||||
|
chmod +x html-to-markdown.so
|
||||||
|
working-directory: ./apps/api/sharedLibs/go-html-to-md
|
||||||
|
- name: Set up SearXNG
|
||||||
|
if: matrix.search == 'searxng'
|
||||||
|
run: |
|
||||||
|
mkdir searxng
|
||||||
|
|
||||||
|
echo "use_default_settings: true
|
||||||
|
search:
|
||||||
|
formats: [html, json, csv]
|
||||||
|
server:
|
||||||
|
secret_key: 'fcsecret'" > searxng/settings.yml
|
||||||
|
|
||||||
|
docker run -d -p 3434:8080 -v "${PWD}/searxng:/etc/searxng" --name searxng searxng/searxng
|
||||||
|
pnpx wait-on tcp:3434 -t 30s
|
||||||
|
working-directory: ./
|
||||||
|
- name: Start server
|
||||||
|
run: npm start > api.log 2>&1 &
|
||||||
|
working-directory: ./apps/api
|
||||||
|
- name: Start worker
|
||||||
|
run: npm run workers > worker.log 2>&1 &
|
||||||
|
working-directory: ./apps/api
|
||||||
|
- name: Start playwright
|
||||||
|
if: matrix.engine == 'playwright'
|
||||||
|
run: npm run dev > playwright.log 2>&1 &
|
||||||
|
working-directory: ./apps/playwright-service-ts
|
||||||
|
env:
|
||||||
|
PORT: 3003
|
||||||
|
- name: Wait for server
|
||||||
|
run: pnpx wait-on tcp:3002 -t 15s
|
||||||
|
- name: Wait for playwright
|
||||||
|
if: matrix.engine == 'playwright'
|
||||||
|
run: pnpx wait-on tcp:3003 -t 15s
|
||||||
|
- name: Run snippet tests
|
||||||
|
run: |
|
||||||
|
npm run test:snips
|
||||||
|
working-directory: ./apps/api
|
||||||
|
- name: Kill instances
|
||||||
|
if: always()
|
||||||
|
run: pkill -9 node
|
||||||
|
- name: Kill SearXNG
|
||||||
|
if: always() && matrix.search == 'searxng'
|
||||||
|
run: |
|
||||||
|
docker logs searxng > searxng/searxng.log 2>&1
|
||||||
|
docker kill searxng
|
||||||
|
working-directory: ./
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
if: always()
|
||||||
|
with:
|
||||||
|
name: Logs (${{ matrix.ai }}, ${{ matrix.search }}, ${{ matrix.engine }}, ${{ matrix.proxy }})
|
||||||
|
path: |
|
||||||
|
./apps/api/api.log
|
||||||
|
./apps/api/worker.log
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
if: always() && matrix.playwright
|
||||||
|
with:
|
||||||
|
name: Playwright Logs (${{ matrix.ai }}, ${{ matrix.search }}, ${{ matrix.proxy }})
|
||||||
|
path: |
|
||||||
|
./apps/playwright-service-ts/playwright.log
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
if: always() && matrix.search == 'searxng'
|
||||||
|
with:
|
||||||
|
name: SearXNG (${{ matrix.ai }}, ${{ matrix.engine }}, ${{ matrix.proxy }})
|
||||||
|
path: |
|
||||||
|
./searxng/searxng.log
|
||||||
|
./searxng/settings.yml
|
@ -1,8 +1,11 @@
|
|||||||
name: CI/CD
|
name: Server Test Suite
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
|
paths:
|
||||||
|
- apps/api/**
|
||||||
# schedule:
|
# schedule:
|
||||||
# - cron: '0 */4 * * *'
|
# - cron: '0 */4 * * *'
|
||||||
|
|
||||||
@ -29,10 +32,11 @@ env:
|
|||||||
USE_DB_AUTHENTICATION: ${{ secrets.USE_DB_AUTHENTICATION }}
|
USE_DB_AUTHENTICATION: ${{ secrets.USE_DB_AUTHENTICATION }}
|
||||||
SERPER_API_KEY: ${{ secrets.SERPER_API_KEY }}
|
SERPER_API_KEY: ${{ secrets.SERPER_API_KEY }}
|
||||||
ENV: ${{ secrets.ENV }}
|
ENV: ${{ secrets.ENV }}
|
||||||
|
USE_GO_MARKDOWN_PARSER: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
pre-deploy:
|
test:
|
||||||
name: Pre-deploy checks
|
name: Run tests
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
services:
|
services:
|
||||||
redis:
|
redis:
|
||||||
@ -47,15 +51,30 @@ jobs:
|
|||||||
oauth-client-id: ${{ secrets.TS_OAUTH_CLIENT_ID }}
|
oauth-client-id: ${{ secrets.TS_OAUTH_CLIENT_ID }}
|
||||||
oauth-secret: ${{ secrets.TS_OAUTH_SECRET }}
|
oauth-secret: ${{ secrets.TS_OAUTH_SECRET }}
|
||||||
tags: tag:ci
|
tags: tag:ci
|
||||||
|
- name: Install pnpm
|
||||||
|
uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 10
|
||||||
- name: Set up Node.js
|
- name: Set up Node.js
|
||||||
uses: actions/setup-node@v3
|
uses: actions/setup-node@v3
|
||||||
with:
|
with:
|
||||||
node-version: "20"
|
node-version: "20"
|
||||||
- name: Install pnpm
|
cache: "pnpm"
|
||||||
run: npm install -g pnpm
|
cache-dependency-path: './apps/api/pnpm-lock.yaml'
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: pnpm install
|
run: pnpm install
|
||||||
working-directory: ./apps/api
|
working-directory: ./apps/api
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.19'
|
||||||
|
cache-dependency-path: ./apps/api/sharedLibs/go-html-to-md/go.sum
|
||||||
|
- name: Build go-html-to-md
|
||||||
|
run: |
|
||||||
|
go mod tidy
|
||||||
|
go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go
|
||||||
|
chmod +x html-to-markdown.so
|
||||||
|
working-directory: ./apps/api/sharedLibs/go-html-to-md
|
||||||
- name: Start the application
|
- name: Start the application
|
||||||
run: npm start &
|
run: npm start &
|
||||||
working-directory: ./apps/api
|
working-directory: ./apps/api
|
@ -95,7 +95,7 @@ curl -X POST https://api.firecrawl.dev/v1/crawl \
|
|||||||
-H 'Authorization: Bearer fc-YOUR_API_KEY' \
|
-H 'Authorization: Bearer fc-YOUR_API_KEY' \
|
||||||
-d '{
|
-d '{
|
||||||
"url": "https://docs.firecrawl.dev",
|
"url": "https://docs.firecrawl.dev",
|
||||||
"limit": 100,
|
"limit": 10,
|
||||||
"scrapeOptions": {
|
"scrapeOptions": {
|
||||||
"formats": ["markdown", "html"]
|
"formats": ["markdown", "html"]
|
||||||
}
|
}
|
||||||
|
100
SELF_HOST.md
100
SELF_HOST.md
@ -34,62 +34,72 @@ Self-hosting Firecrawl is ideal for those who need full control over their scrap
|
|||||||
|
|
||||||
2. Set environment variables
|
2. Set environment variables
|
||||||
|
|
||||||
Create an `.env` in the root directory you can copy over the template in `apps/api/.env.example`
|
Create an `.env` in the root directory using the template below.
|
||||||
|
|
||||||
To start, we won't set up authentication or any optional subservices (pdf parsing, JS blocking support, AI features)
|
|
||||||
|
|
||||||
`.env:`
|
`.env:`
|
||||||
```
|
```
|
||||||
# ===== Required ENVS ======
|
# ===== Required ENVS ======
|
||||||
NUM_WORKERS_PER_QUEUE=8
|
|
||||||
PORT=3002
|
PORT=3002
|
||||||
HOST=0.0.0.0
|
HOST=0.0.0.0
|
||||||
REDIS_URL=redis://redis:6379
|
|
||||||
REDIS_RATE_LIMIT_URL=redis://redis:6379
|
|
||||||
|
|
||||||
## To turn on DB authentication, you need to set up Supabase.
|
# To turn on DB authentication, you need to set up Supabase.
|
||||||
USE_DB_AUTHENTICATION=false
|
USE_DB_AUTHENTICATION=false
|
||||||
|
|
||||||
# ===== Optional ENVS ======
|
# ===== Optional ENVS ======
|
||||||
|
|
||||||
# Supabase Setup (used to support DB authentication, advanced logging, etc.)
|
## === AI features (JSON format on scrape, /extract API) ===
|
||||||
SUPABASE_ANON_TOKEN=
|
# Provide your OpenAI API key here to enable AI features
|
||||||
SUPABASE_URL=
|
# OPENAI_API_KEY=
|
||||||
SUPABASE_SERVICE_TOKEN=
|
|
||||||
|
|
||||||
# Other Optionals
|
## === Proxy ===
|
||||||
TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
|
# PROXY_SERVER can be a full URL (e.g. http://0.1.2.3:1234) or just an IP and port combo (e.g. 0.1.2.3:1234)
|
||||||
SCRAPING_BEE_API_KEY= # use if you'd like to use as a fallback scraper
|
# Do not uncomment PROXY_USERNAME and PROXY_PASSWORD if your proxy is unauthenticated
|
||||||
OPENAI_API_KEY= # add for LLM-dependent features (e.g., image alt generation)
|
# PROXY_SERVER=
|
||||||
BULL_AUTH_KEY= @
|
# PROXY_USERNAME=
|
||||||
PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
|
# PROXY_PASSWORD=
|
||||||
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
|
|
||||||
SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
|
## === /search API ===
|
||||||
POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
|
# By default, the /search API will use Google search.
|
||||||
POSTHOG_HOST= # set if you'd like to send posthog events like job logs
|
|
||||||
|
# You can specify a SearXNG server with the JSON format enabled, if you'd like to use that instead of direct Google.
|
||||||
|
# You can also customize the engines and categories parameters, but the defaults should also work just fine.
|
||||||
|
# SEARXNG_ENDPOINT=http://your.searxng.server
|
||||||
|
# SEARXNG_ENGINES=
|
||||||
|
# SEARXNG_CATEGORIES=
|
||||||
|
|
||||||
|
## === Other ===
|
||||||
|
|
||||||
|
# Supabase Setup (used to support DB authentication, advanced logging, etc.)
|
||||||
|
# SUPABASE_ANON_TOKEN=
|
||||||
|
# SUPABASE_URL=
|
||||||
|
# SUPABASE_SERVICE_TOKEN=
|
||||||
|
|
||||||
|
# Use if you've set up authentication and want to test with a real API key
|
||||||
|
# TEST_API_KEY=
|
||||||
|
|
||||||
|
# You can add this to enable ScrapingBee as a fallback scraping engine.
|
||||||
|
# SCRAPING_BEE_API_KEY=
|
||||||
|
|
||||||
|
# This key lets you access the queue admin panel. Change this if your deployment is publicly accessible.
|
||||||
|
BULL_AUTH_KEY=CHANGEME
|
||||||
|
|
||||||
|
# This is now autoconfigured by the docker-compose.yaml. You shouldn't need to set it.
|
||||||
|
# PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape
|
||||||
|
# REDIS_URL=redis://redis:6379
|
||||||
|
# REDIS_RATE_LIMIT_URL=redis://redis:6379
|
||||||
|
|
||||||
|
# Set if you have a llamaparse key you'd like to use to parse pdfs
|
||||||
|
# LLAMAPARSE_API_KEY=
|
||||||
|
|
||||||
|
# Set if you'd like to send server health status messages to Slack
|
||||||
|
# SLACK_WEBHOOK_URL=
|
||||||
|
|
||||||
|
# Set if you'd like to send posthog events like job logs
|
||||||
|
# POSTHOG_API_KEY=
|
||||||
|
# POSTHOG_HOST=
|
||||||
```
|
```
|
||||||
|
|
||||||
3. *(Optional) Running with TypeScript Playwright Service*
|
3. Build and run the Docker containers:
|
||||||
|
|
||||||
* Update the `docker-compose.yml` file to change the Playwright service:
|
|
||||||
|
|
||||||
```plaintext
|
|
||||||
build: apps/playwright-service
|
|
||||||
```
|
|
||||||
TO
|
|
||||||
```plaintext
|
|
||||||
build: apps/playwright-service-ts
|
|
||||||
```
|
|
||||||
|
|
||||||
* Set the `PLAYWRIGHT_MICROSERVICE_URL` in your `.env` file:
|
|
||||||
|
|
||||||
```plaintext
|
|
||||||
PLAYWRIGHT_MICROSERVICE_URL=http://localhost:3000/scrape
|
|
||||||
```
|
|
||||||
|
|
||||||
* Don't forget to set the proxy server in your `.env` file as needed.
|
|
||||||
|
|
||||||
4. Build and run the Docker containers:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose build
|
docker compose build
|
||||||
@ -98,9 +108,9 @@ POSTHOG_HOST= # set if you'd like to send posthog events like job logs
|
|||||||
|
|
||||||
This will run a local instance of Firecrawl which can be accessed at `http://localhost:3002`.
|
This will run a local instance of Firecrawl which can be accessed at `http://localhost:3002`.
|
||||||
|
|
||||||
You should be able to see the Bull Queue Manager UI on `http://localhost:3002/admin/@/queues`.
|
You should be able to see the Bull Queue Manager UI on `http://localhost:3002/admin/CHANGEME/queues`.
|
||||||
|
|
||||||
5. *(Optional)* Test the API
|
4. *(Optional)* Test the API
|
||||||
|
|
||||||
If you’d like to test the crawl endpoint, you can run this:
|
If you’d like to test the crawl endpoint, you can run this:
|
||||||
|
|
||||||
@ -108,7 +118,7 @@ If you’d like to test the crawl endpoint, you can run this:
|
|||||||
curl -X POST http://localhost:3002/v1/crawl \
|
curl -X POST http://localhost:3002/v1/crawl \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
-d '{
|
-d '{
|
||||||
"url": "https://mendable.ai"
|
"url": "https://firecrawl.dev"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -19,8 +19,7 @@ RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile
|
|||||||
|
|
||||||
RUN apt-get clean && apt-get update -qq && apt-get install -y ca-certificates && update-ca-certificates
|
RUN apt-get clean && apt-get update -qq && apt-get install -y ca-certificates && update-ca-certificates
|
||||||
RUN pnpm install
|
RUN pnpm install
|
||||||
RUN --mount=type=secret,id=SENTRY_AUTH_TOKEN \
|
RUN pnpm run build
|
||||||
bash -c 'export SENTRY_AUTH_TOKEN="$(cat /run/secrets/SENTRY_AUTH_TOKEN)"; if [ -z $SENTRY_AUTH_TOKEN ]; then pnpm run build:nosentry; else pnpm run build; fi'
|
|
||||||
|
|
||||||
# Install Go
|
# Install Go
|
||||||
FROM golang:1.19 AS go-base
|
FROM golang:1.19 AS go-base
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
"format": "prettier --write \"src/**/*.(js|ts)\"",
|
"format": "prettier --write \"src/**/*.(js|ts)\"",
|
||||||
"flyio": "node dist/src/index.js",
|
"flyio": "node dist/src/index.js",
|
||||||
"start:dev": "nodemon --exec ts-node src/index.ts",
|
"start:dev": "nodemon --exec ts-node src/index.ts",
|
||||||
"build": "tsc && pnpm sentry:sourcemaps",
|
"build": "tsc",
|
||||||
"build:nosentry": "tsc",
|
"build:nosentry": "tsc",
|
||||||
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
|
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
|
||||||
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
|
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
|
||||||
@ -56,6 +56,7 @@
|
|||||||
"typescript": "^5.4.2"
|
"typescript": "^5.4.2"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"jsdom": "^26.0.0",
|
||||||
"@anthropic-ai/sdk": "^0.24.3",
|
"@anthropic-ai/sdk": "^0.24.3",
|
||||||
"@apidevtools/json-schema-ref-parser": "^11.7.3",
|
"@apidevtools/json-schema-ref-parser": "^11.7.3",
|
||||||
"@brillout/import": "^0.2.2",
|
"@brillout/import": "^0.2.2",
|
||||||
|
286
apps/api/pnpm-lock.yaml
generated
286
apps/api/pnpm-lock.yaml
generated
@ -125,6 +125,9 @@ importers:
|
|||||||
joplin-turndown-plugin-gfm:
|
joplin-turndown-plugin-gfm:
|
||||||
specifier: ^1.0.12
|
specifier: ^1.0.12
|
||||||
version: 1.0.12
|
version: 1.0.12
|
||||||
|
jsdom:
|
||||||
|
specifier: ^26.0.0
|
||||||
|
version: 26.0.0
|
||||||
json-schema-to-zod:
|
json-schema-to-zod:
|
||||||
specifier: ^2.3.0
|
specifier: ^2.3.0
|
||||||
version: 2.3.0
|
version: 2.3.0
|
||||||
@ -136,7 +139,7 @@ importers:
|
|||||||
version: 2.9.0
|
version: 2.9.0
|
||||||
langchain:
|
langchain:
|
||||||
specifier: ^0.2.8
|
specifier: ^0.2.8
|
||||||
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||||
languagedetect:
|
languagedetect:
|
||||||
specifier: ^2.0.0
|
specifier: ^2.0.0
|
||||||
version: 2.0.0
|
version: 2.0.0
|
||||||
@ -332,6 +335,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-WApSdLdXEBb/1FUPca2lteASewEfpjEYJ8oXZP+0gExK5qSfsEKBKcA+WjY6Q4wvXwyv0+W6Kvc372pSceib9w==}
|
resolution: {integrity: sha512-WApSdLdXEBb/1FUPca2lteASewEfpjEYJ8oXZP+0gExK5qSfsEKBKcA+WjY6Q4wvXwyv0+W6Kvc372pSceib9w==}
|
||||||
engines: {node: '>= 16'}
|
engines: {node: '>= 16'}
|
||||||
|
|
||||||
|
'@asamuzakjp/css-color@2.8.3':
|
||||||
|
resolution: {integrity: sha512-GIc76d9UI1hCvOATjZPyHFmE5qhRccp3/zGfMPapK3jBi+yocEzp6BBB0UnfRYP9NP4FANqUZYb0hnfs3TM3hw==}
|
||||||
|
|
||||||
'@aws-crypto/crc32@3.0.0':
|
'@aws-crypto/crc32@3.0.0':
|
||||||
resolution: {integrity: sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==}
|
resolution: {integrity: sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==}
|
||||||
|
|
||||||
@ -685,6 +691,34 @@ packages:
|
|||||||
resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
|
resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
|
||||||
engines: {node: '>=12'}
|
engines: {node: '>=12'}
|
||||||
|
|
||||||
|
'@csstools/color-helpers@5.0.1':
|
||||||
|
resolution: {integrity: sha512-MKtmkA0BX87PKaO1NFRTFH+UnkgnmySQOvNxJubsadusqPEC2aJ9MOQiMceZJJ6oitUl/i0L6u0M1IrmAOmgBA==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
|
'@csstools/css-calc@2.1.1':
|
||||||
|
resolution: {integrity: sha512-rL7kaUnTkL9K+Cvo2pnCieqNpTKgQzy5f+N+5Iuko9HAoasP+xgprVh7KN/MaJVvVL1l0EzQq2MoqBHKSrDrag==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
'@csstools/css-parser-algorithms': ^3.0.4
|
||||||
|
'@csstools/css-tokenizer': ^3.0.3
|
||||||
|
|
||||||
|
'@csstools/css-color-parser@3.0.7':
|
||||||
|
resolution: {integrity: sha512-nkMp2mTICw32uE5NN+EsJ4f5N+IGFeCFu4bGpiKgb2Pq/7J/MpyLBeQ5ry4KKtRFZaYs6sTmcMYrSRIyj5DFKA==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
'@csstools/css-parser-algorithms': ^3.0.4
|
||||||
|
'@csstools/css-tokenizer': ^3.0.3
|
||||||
|
|
||||||
|
'@csstools/css-parser-algorithms@3.0.4':
|
||||||
|
resolution: {integrity: sha512-Up7rBoV77rv29d3uKHUIVubz1BTcgyUK72IvCQAbfbMv584xHcGKCKbWh7i8hPrRJ7qU4Y8IO3IY9m+iTB7P3A==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
'@csstools/css-tokenizer': ^3.0.3
|
||||||
|
|
||||||
|
'@csstools/css-tokenizer@3.0.3':
|
||||||
|
resolution: {integrity: sha512-UJnjoFsmxfKUdNYdWgOB0mWUypuLvAfQPH1+pyvRJs6euowbFkFC6P13w1l8mJyi3vxYMxc9kld5jZEGRQs6bw==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
'@dabh/diagnostics@2.0.3':
|
'@dabh/diagnostics@2.0.3':
|
||||||
resolution: {integrity: sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==}
|
resolution: {integrity: sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==}
|
||||||
|
|
||||||
@ -1715,6 +1749,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==}
|
resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==}
|
||||||
engines: {node: '>= 14'}
|
engines: {node: '>= 14'}
|
||||||
|
|
||||||
|
agent-base@7.1.3:
|
||||||
|
resolution: {integrity: sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==}
|
||||||
|
engines: {node: '>= 14'}
|
||||||
|
|
||||||
agentkeepalive@4.5.0:
|
agentkeepalive@4.5.0:
|
||||||
resolution: {integrity: sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==}
|
resolution: {integrity: sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==}
|
||||||
engines: {node: '>= 8.0.0'}
|
engines: {node: '>= 8.0.0'}
|
||||||
@ -2141,6 +2179,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==}
|
resolution: {integrity: sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==}
|
||||||
engines: {node: '>= 6'}
|
engines: {node: '>= 6'}
|
||||||
|
|
||||||
|
cssstyle@4.2.1:
|
||||||
|
resolution: {integrity: sha512-9+vem03dMXG7gDmZ62uqmRiMRNtinIZ9ZyuF6BdxzfOD+FdN5hretzynkn0ReS2DO2GSw76RWHs0UmJPI2zUjw==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
csv-parse@5.5.6:
|
csv-parse@5.5.6:
|
||||||
resolution: {integrity: sha512-uNpm30m/AGSkLxxy7d9yRXpJQFrZzVWLFBkS+6ngPcZkw/5k3L/jjFuj7tVnEpRn+QgmiXr21nDlhCiUK4ij2A==}
|
resolution: {integrity: sha512-uNpm30m/AGSkLxxy7d9yRXpJQFrZzVWLFBkS+6ngPcZkw/5k3L/jjFuj7tVnEpRn+QgmiXr21nDlhCiUK4ij2A==}
|
||||||
|
|
||||||
@ -2152,6 +2194,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==}
|
resolution: {integrity: sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==}
|
||||||
engines: {node: '>= 14'}
|
engines: {node: '>= 14'}
|
||||||
|
|
||||||
|
data-urls@5.0.0:
|
||||||
|
resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
date-fns@3.6.0:
|
date-fns@3.6.0:
|
||||||
resolution: {integrity: sha512-fRHTG8g/Gif+kSh50gaGEdToemgfj74aRX3swtiouboip5JDLAyDE9F11nHMIcvOaXeOC6D7SpNhi7uFyB7Uww==}
|
resolution: {integrity: sha512-fRHTG8g/Gif+kSh50gaGEdToemgfj74aRX3swtiouboip5JDLAyDE9F11nHMIcvOaXeOC6D7SpNhi7uFyB7Uww==}
|
||||||
|
|
||||||
@ -2197,6 +2243,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ==}
|
resolution: {integrity: sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ==}
|
||||||
engines: {node: '>=10'}
|
engines: {node: '>=10'}
|
||||||
|
|
||||||
|
decimal.js@10.5.0:
|
||||||
|
resolution: {integrity: sha512-8vDa8Qxvr/+d94hSh5P3IJwI5t8/c0KsMp+g8bNw9cY2icONa5aPfvKeieW1WlG0WQYwwhJ7mjui2xtiePQSXw==}
|
||||||
|
|
||||||
dedent@1.5.3:
|
dedent@1.5.3:
|
||||||
resolution: {integrity: sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==}
|
resolution: {integrity: sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==}
|
||||||
peerDependencies:
|
peerDependencies:
|
||||||
@ -2510,6 +2559,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==}
|
resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==}
|
||||||
engines: {node: '>= 6'}
|
engines: {node: '>= 6'}
|
||||||
|
|
||||||
|
form-data@4.0.1:
|
||||||
|
resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==}
|
||||||
|
engines: {node: '>= 6'}
|
||||||
|
|
||||||
formdata-node@4.4.1:
|
formdata-node@4.4.1:
|
||||||
resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==}
|
resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==}
|
||||||
engines: {node: '>= 12.20'}
|
engines: {node: '>= 12.20'}
|
||||||
@ -2647,6 +2700,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==}
|
resolution: {integrity: sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==}
|
||||||
engines: {node: '>=12'}
|
engines: {node: '>=12'}
|
||||||
|
|
||||||
|
html-encoding-sniffer@4.0.0:
|
||||||
|
resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
html-escaper@2.0.2:
|
html-escaper@2.0.2:
|
||||||
resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
|
resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
|
||||||
|
|
||||||
@ -2686,6 +2743,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==}
|
resolution: {integrity: sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==}
|
||||||
engines: {node: '>= 14'}
|
engines: {node: '>= 14'}
|
||||||
|
|
||||||
|
https-proxy-agent@7.0.6:
|
||||||
|
resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==}
|
||||||
|
engines: {node: '>= 14'}
|
||||||
|
|
||||||
human-signals@2.1.0:
|
human-signals@2.1.0:
|
||||||
resolution: {integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==}
|
resolution: {integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==}
|
||||||
engines: {node: '>=10.17.0'}
|
engines: {node: '>=10.17.0'}
|
||||||
@ -2798,6 +2859,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==}
|
resolution: {integrity: sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
|
|
||||||
|
is-potential-custom-element-name@1.0.1:
|
||||||
|
resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==}
|
||||||
|
|
||||||
is-retry-allowed@2.2.0:
|
is-retry-allowed@2.2.0:
|
||||||
resolution: {integrity: sha512-XVm7LOeLpTW4jV19QSH38vkswxoLud8sQ57YwJVTPWdiaI9I8keEhGFpBlslyVsgdQy4Opg8QOLb8YRgsyZiQg==}
|
resolution: {integrity: sha512-XVm7LOeLpTW4jV19QSH38vkswxoLud8sQ57YwJVTPWdiaI9I8keEhGFpBlslyVsgdQy4Opg8QOLb8YRgsyZiQg==}
|
||||||
engines: {node: '>=10'}
|
engines: {node: '>=10'}
|
||||||
@ -3012,6 +3076,15 @@ packages:
|
|||||||
jsbn@1.1.0:
|
jsbn@1.1.0:
|
||||||
resolution: {integrity: sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==}
|
resolution: {integrity: sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==}
|
||||||
|
|
||||||
|
jsdom@26.0.0:
|
||||||
|
resolution: {integrity: sha512-BZYDGVAIriBWTpIxYzrXjv3E/4u8+/pSG5bQdIYCbNCGOvsPkDQfTVLAIXAf9ETdCpduCVTkDe2NNZ8NIwUVzw==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
peerDependencies:
|
||||||
|
canvas: ^3.0.0
|
||||||
|
peerDependenciesMeta:
|
||||||
|
canvas:
|
||||||
|
optional: true
|
||||||
|
|
||||||
jsesc@2.5.2:
|
jsesc@2.5.2:
|
||||||
resolution: {integrity: sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==}
|
resolution: {integrity: sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==}
|
||||||
engines: {node: '>=4'}
|
engines: {node: '>=4'}
|
||||||
@ -3298,6 +3371,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-CQl19J/g+Hbjbv4Y3mFNNXFEL/5t/KCg8POCuUqd4rMKjGG+j1ybER83hxV58zL+dFI1PTkt3GNFSHRt+d8qEQ==}
|
resolution: {integrity: sha512-CQl19J/g+Hbjbv4Y3mFNNXFEL/5t/KCg8POCuUqd4rMKjGG+j1ybER83hxV58zL+dFI1PTkt3GNFSHRt+d8qEQ==}
|
||||||
engines: {node: 14 || >=16.14}
|
engines: {node: 14 || >=16.14}
|
||||||
|
|
||||||
|
lru-cache@10.4.3:
|
||||||
|
resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
|
||||||
|
|
||||||
lru-cache@5.1.1:
|
lru-cache@5.1.1:
|
||||||
resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==}
|
resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==}
|
||||||
|
|
||||||
@ -3588,6 +3664,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==}
|
resolution: {integrity: sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
|
|
||||||
|
nwsapi@2.2.16:
|
||||||
|
resolution: {integrity: sha512-F1I/bimDpj3ncaNDhfyMWuFqmQDBwDB0Fogc2qpL3BWvkQteFD/8BzWuIRl83rq0DXfm8SGt/HFhLXZyljTXcQ==}
|
||||||
|
|
||||||
object-assign@4.1.1:
|
object-assign@4.1.1:
|
||||||
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
|
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
|
||||||
engines: {node: '>=0.10.0'}
|
engines: {node: '>=0.10.0'}
|
||||||
@ -3697,6 +3776,9 @@ packages:
|
|||||||
parse5@7.1.2:
|
parse5@7.1.2:
|
||||||
resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==}
|
resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==}
|
||||||
|
|
||||||
|
parse5@7.2.1:
|
||||||
|
resolution: {integrity: sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==}
|
||||||
|
|
||||||
parseley@0.12.1:
|
parseley@0.12.1:
|
||||||
resolution: {integrity: sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==}
|
resolution: {integrity: sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==}
|
||||||
|
|
||||||
@ -4015,6 +4097,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-s+pyvQeIKIZ0dx5iJiQk1tPLJAWln39+MI5jtM8wnyws+G5azk+dMnMX0qfbqNetKKNgcWWOdi0sfm+FbQbgdQ==}
|
resolution: {integrity: sha512-s+pyvQeIKIZ0dx5iJiQk1tPLJAWln39+MI5jtM8wnyws+G5azk+dMnMX0qfbqNetKKNgcWWOdi0sfm+FbQbgdQ==}
|
||||||
engines: {node: '>=10.0.0'}
|
engines: {node: '>=10.0.0'}
|
||||||
|
|
||||||
|
rrweb-cssom@0.8.0:
|
||||||
|
resolution: {integrity: sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw==}
|
||||||
|
|
||||||
rusha@0.8.14:
|
rusha@0.8.14:
|
||||||
resolution: {integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==}
|
resolution: {integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==}
|
||||||
|
|
||||||
@ -4034,6 +4119,10 @@ packages:
|
|||||||
sax@1.4.1:
|
sax@1.4.1:
|
||||||
resolution: {integrity: sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==}
|
resolution: {integrity: sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==}
|
||||||
|
|
||||||
|
saxes@6.0.0:
|
||||||
|
resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==}
|
||||||
|
engines: {node: '>=v12.22.7'}
|
||||||
|
|
||||||
scheduler@0.23.2:
|
scheduler@0.23.2:
|
||||||
resolution: {integrity: sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==}
|
resolution: {integrity: sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==}
|
||||||
|
|
||||||
@ -4260,6 +4349,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-SzRP5LQ6Ts2G5NyAa/jg16s8e3R7rfdFjizy1zeoecYWw+nGL+YA1xZvW/+iJmidBGSdLkuvdwTYEyJEb+EiUw==}
|
resolution: {integrity: sha512-SzRP5LQ6Ts2G5NyAa/jg16s8e3R7rfdFjizy1zeoecYWw+nGL+YA1xZvW/+iJmidBGSdLkuvdwTYEyJEb+EiUw==}
|
||||||
engines: {node: '>=0.2.6'}
|
engines: {node: '>=0.2.6'}
|
||||||
|
|
||||||
|
symbol-tree@3.2.4:
|
||||||
|
resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==}
|
||||||
|
|
||||||
systeminformation@5.22.11:
|
systeminformation@5.22.11:
|
||||||
resolution: {integrity: sha512-aLws5yi4KCHTb0BVvbodQY5bY8eW4asMRDTxTW46hqw9lGjACX6TlLdJrkdoHYRB0qs+MekqEq1zG7WDnWE8Ug==}
|
resolution: {integrity: sha512-aLws5yi4KCHTb0BVvbodQY5bY8eW4asMRDTxTW46hqw9lGjACX6TlLdJrkdoHYRB0qs+MekqEq1zG7WDnWE8Ug==}
|
||||||
engines: {node: '>=8.0.0'}
|
engines: {node: '>=8.0.0'}
|
||||||
@ -4315,6 +4407,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-r0eojU4bI8MnHr8c5bNo7lJDdI2qXlWWJk6a9EAFG7vbhTjElYhBVS3/miuE0uOuoLdb8Mc/rVfsmm6eo5o9GA==}
|
resolution: {integrity: sha512-r0eojU4bI8MnHr8c5bNo7lJDdI2qXlWWJk6a9EAFG7vbhTjElYhBVS3/miuE0uOuoLdb8Mc/rVfsmm6eo5o9GA==}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
|
tough-cookie@5.1.1:
|
||||||
|
resolution: {integrity: sha512-Ek7HndSVkp10hmHP9V4qZO1u+pn1RU5sI0Fw+jCU3lyvuMZcgqsNgc6CmJJZyByK4Vm/qotGRJlfgAX8q+4JiA==}
|
||||||
|
engines: {node: '>=16'}
|
||||||
|
|
||||||
tr46@0.0.3:
|
tr46@0.0.3:
|
||||||
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
|
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
|
||||||
|
|
||||||
@ -4322,6 +4418,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-2lv/66T7e5yNyhAAC4NaKe5nVavzuGJQVVtRYLyQ2OI8tsJ61PMLlelehb0wi2Hx6+hT/OJUWZcw8MjlSRnxvw==}
|
resolution: {integrity: sha512-2lv/66T7e5yNyhAAC4NaKe5nVavzuGJQVVtRYLyQ2OI8tsJ61PMLlelehb0wi2Hx6+hT/OJUWZcw8MjlSRnxvw==}
|
||||||
engines: {node: '>=14'}
|
engines: {node: '>=14'}
|
||||||
|
|
||||||
|
tr46@5.0.0:
|
||||||
|
resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
triple-beam@1.4.1:
|
triple-beam@1.4.1:
|
||||||
resolution: {integrity: sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==}
|
resolution: {integrity: sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==}
|
||||||
engines: {node: '>= 14.0.0'}
|
engines: {node: '>= 14.0.0'}
|
||||||
@ -4483,6 +4583,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
|
resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
|
||||||
engines: {node: '>= 0.8'}
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
w3c-xmlserializer@5.0.0:
|
||||||
|
resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
walker@1.0.8:
|
walker@1.0.8:
|
||||||
resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==}
|
resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==}
|
||||||
|
|
||||||
@ -4505,13 +4609,25 @@ packages:
|
|||||||
resolution: {integrity: sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==}
|
resolution: {integrity: sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==}
|
||||||
engines: {node: '>=12'}
|
engines: {node: '>=12'}
|
||||||
|
|
||||||
|
whatwg-encoding@3.1.1:
|
||||||
|
resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
whatwg-fetch@3.6.20:
|
whatwg-fetch@3.6.20:
|
||||||
resolution: {integrity: sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==}
|
resolution: {integrity: sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==}
|
||||||
|
|
||||||
|
whatwg-mimetype@4.0.0:
|
||||||
|
resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
whatwg-url@13.0.0:
|
whatwg-url@13.0.0:
|
||||||
resolution: {integrity: sha512-9WWbymnqj57+XEuqADHrCJ2eSXzn8WXIW/YSGaZtb2WKAInQ6CHfaUUcTyyver0p8BDg5StLQq8h1vtZuwmOig==}
|
resolution: {integrity: sha512-9WWbymnqj57+XEuqADHrCJ2eSXzn8WXIW/YSGaZtb2WKAInQ6CHfaUUcTyyver0p8BDg5StLQq8h1vtZuwmOig==}
|
||||||
engines: {node: '>=16'}
|
engines: {node: '>=16'}
|
||||||
|
|
||||||
|
whatwg-url@14.1.1:
|
||||||
|
resolution: {integrity: sha512-mDGf9diDad/giZ/Sm9Xi2YcyzaFpbdLpJPr+E9fSkyQ7KpQD4SdFcugkRQYzhmfI4KeV4Qpnn2sKPdo+kmsgRQ==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
whatwg-url@5.0.0:
|
whatwg-url@5.0.0:
|
||||||
resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==}
|
resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==}
|
||||||
|
|
||||||
@ -4583,6 +4699,10 @@ packages:
|
|||||||
utf-8-validate:
|
utf-8-validate:
|
||||||
optional: true
|
optional: true
|
||||||
|
|
||||||
|
xml-name-validator@5.0.0:
|
||||||
|
resolution: {integrity: sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
|
||||||
xml2js@0.6.2:
|
xml2js@0.6.2:
|
||||||
resolution: {integrity: sha512-T4rieHaC1EXcES0Kxxj4JWgaUQHDk+qwHcYOCFHfiwKz7tOVPLq7Hjq9dM1WCMhylqMEfP7hMcOIChvotiZegA==}
|
resolution: {integrity: sha512-T4rieHaC1EXcES0Kxxj4JWgaUQHDk+qwHcYOCFHfiwKz7tOVPLq7Hjq9dM1WCMhylqMEfP7hMcOIChvotiZegA==}
|
||||||
engines: {node: '>=4.0.0'}
|
engines: {node: '>=4.0.0'}
|
||||||
@ -4595,6 +4715,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==}
|
resolution: {integrity: sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==}
|
||||||
engines: {node: '>=4.0'}
|
engines: {node: '>=4.0'}
|
||||||
|
|
||||||
|
xmlchars@2.2.0:
|
||||||
|
resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==}
|
||||||
|
|
||||||
xtend@4.0.2:
|
xtend@4.0.2:
|
||||||
resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==}
|
resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==}
|
||||||
engines: {node: '>=0.4'}
|
engines: {node: '>=0.4'}
|
||||||
@ -4675,6 +4798,14 @@ snapshots:
|
|||||||
'@types/json-schema': 7.0.15
|
'@types/json-schema': 7.0.15
|
||||||
js-yaml: 4.1.0
|
js-yaml: 4.1.0
|
||||||
|
|
||||||
|
'@asamuzakjp/css-color@2.8.3':
|
||||||
|
dependencies:
|
||||||
|
'@csstools/css-calc': 2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
|
||||||
|
'@csstools/css-color-parser': 3.0.7(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
|
||||||
|
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
|
||||||
|
'@csstools/css-tokenizer': 3.0.3
|
||||||
|
lru-cache: 10.4.3
|
||||||
|
|
||||||
'@aws-crypto/crc32@3.0.0':
|
'@aws-crypto/crc32@3.0.0':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@aws-crypto/util': 3.0.0
|
'@aws-crypto/util': 3.0.0
|
||||||
@ -5413,6 +5544,26 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
'@jridgewell/trace-mapping': 0.3.9
|
'@jridgewell/trace-mapping': 0.3.9
|
||||||
|
|
||||||
|
'@csstools/color-helpers@5.0.1': {}
|
||||||
|
|
||||||
|
'@csstools/css-calc@2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)':
|
||||||
|
dependencies:
|
||||||
|
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
|
||||||
|
'@csstools/css-tokenizer': 3.0.3
|
||||||
|
|
||||||
|
'@csstools/css-color-parser@3.0.7(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)':
|
||||||
|
dependencies:
|
||||||
|
'@csstools/color-helpers': 5.0.1
|
||||||
|
'@csstools/css-calc': 2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
|
||||||
|
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
|
||||||
|
'@csstools/css-tokenizer': 3.0.3
|
||||||
|
|
||||||
|
'@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3)':
|
||||||
|
dependencies:
|
||||||
|
'@csstools/css-tokenizer': 3.0.3
|
||||||
|
|
||||||
|
'@csstools/css-tokenizer@3.0.3': {}
|
||||||
|
|
||||||
'@dabh/diagnostics@2.0.3':
|
'@dabh/diagnostics@2.0.3':
|
||||||
dependencies:
|
dependencies:
|
||||||
colorspace: 1.1.4
|
colorspace: 1.1.4
|
||||||
@ -5642,13 +5793,13 @@ snapshots:
|
|||||||
|
|
||||||
'@jsdevtools/ono@7.1.3': {}
|
'@jsdevtools/ono@7.1.3': {}
|
||||||
|
|
||||||
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||||
dependencies:
|
dependencies:
|
||||||
ansi-styles: 5.2.0
|
ansi-styles: 5.2.0
|
||||||
camelcase: 6.3.0
|
camelcase: 6.3.0
|
||||||
decamelize: 1.2.0
|
decamelize: 1.2.0
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
|
langsmith: 0.1.34(7lljbsleilzgkaubvlq4ipicvq)
|
||||||
ml-distance: 4.0.1
|
ml-distance: 4.0.1
|
||||||
mustache: 4.2.0
|
mustache: 4.2.0
|
||||||
p-queue: 6.6.2
|
p-queue: 6.6.2
|
||||||
@ -5660,9 +5811,9 @@ snapshots:
|
|||||||
- langchain
|
- langchain
|
||||||
- openai
|
- openai
|
||||||
|
|
||||||
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||||
zod: 3.23.8
|
zod: 3.23.8
|
||||||
@ -5671,9 +5822,9 @@ snapshots:
|
|||||||
- encoding
|
- encoding
|
||||||
- langchain
|
- langchain
|
||||||
|
|
||||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- langchain
|
- langchain
|
||||||
@ -6811,6 +6962,8 @@ snapshots:
|
|||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- supports-color
|
- supports-color
|
||||||
|
|
||||||
|
agent-base@7.1.3: {}
|
||||||
|
|
||||||
agentkeepalive@4.5.0:
|
agentkeepalive@4.5.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
humanize-ms: 1.2.1
|
humanize-ms: 1.2.1
|
||||||
@ -7321,12 +7474,22 @@ snapshots:
|
|||||||
|
|
||||||
css-what@6.1.0: {}
|
css-what@6.1.0: {}
|
||||||
|
|
||||||
|
cssstyle@4.2.1:
|
||||||
|
dependencies:
|
||||||
|
'@asamuzakjp/css-color': 2.8.3
|
||||||
|
rrweb-cssom: 0.8.0
|
||||||
|
|
||||||
csv-parse@5.5.6: {}
|
csv-parse@5.5.6: {}
|
||||||
|
|
||||||
data-uri-to-buffer@4.0.1: {}
|
data-uri-to-buffer@4.0.1: {}
|
||||||
|
|
||||||
data-uri-to-buffer@6.0.2: {}
|
data-uri-to-buffer@6.0.2: {}
|
||||||
|
|
||||||
|
data-urls@5.0.0:
|
||||||
|
dependencies:
|
||||||
|
whatwg-mimetype: 4.0.0
|
||||||
|
whatwg-url: 14.1.1
|
||||||
|
|
||||||
date-fns@3.6.0: {}
|
date-fns@3.6.0: {}
|
||||||
|
|
||||||
debug@2.6.9:
|
debug@2.6.9:
|
||||||
@ -7351,6 +7514,8 @@ snapshots:
|
|||||||
|
|
||||||
decamelize@4.0.0: {}
|
decamelize@4.0.0: {}
|
||||||
|
|
||||||
|
decimal.js@10.5.0: {}
|
||||||
|
|
||||||
dedent@1.5.3: {}
|
dedent@1.5.3: {}
|
||||||
|
|
||||||
deepmerge@4.3.1: {}
|
deepmerge@4.3.1: {}
|
||||||
@ -7661,6 +7826,12 @@ snapshots:
|
|||||||
combined-stream: 1.0.8
|
combined-stream: 1.0.8
|
||||||
mime-types: 2.1.35
|
mime-types: 2.1.35
|
||||||
|
|
||||||
|
form-data@4.0.1:
|
||||||
|
dependencies:
|
||||||
|
asynckit: 0.4.0
|
||||||
|
combined-stream: 1.0.8
|
||||||
|
mime-types: 2.1.35
|
||||||
|
|
||||||
formdata-node@4.4.1:
|
formdata-node@4.4.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
node-domexception: 1.0.0
|
node-domexception: 1.0.0
|
||||||
@ -7795,6 +7966,10 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
whatwg-encoding: 2.0.0
|
whatwg-encoding: 2.0.0
|
||||||
|
|
||||||
|
html-encoding-sniffer@4.0.0:
|
||||||
|
dependencies:
|
||||||
|
whatwg-encoding: 3.1.1
|
||||||
|
|
||||||
html-escaper@2.0.2: {}
|
html-escaper@2.0.2: {}
|
||||||
|
|
||||||
html-to-text@9.0.5:
|
html-to-text@9.0.5:
|
||||||
@ -7875,6 +8050,13 @@ snapshots:
|
|||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- supports-color
|
- supports-color
|
||||||
|
|
||||||
|
https-proxy-agent@7.0.6:
|
||||||
|
dependencies:
|
||||||
|
agent-base: 7.1.3
|
||||||
|
debug: 4.3.5
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- supports-color
|
||||||
|
|
||||||
human-signals@2.1.0: {}
|
human-signals@2.1.0: {}
|
||||||
|
|
||||||
humanize-ms@1.2.1:
|
humanize-ms@1.2.1:
|
||||||
@ -7984,6 +8166,8 @@ snapshots:
|
|||||||
|
|
||||||
is-plain-obj@2.1.0: {}
|
is-plain-obj@2.1.0: {}
|
||||||
|
|
||||||
|
is-potential-custom-element-name@1.0.1: {}
|
||||||
|
|
||||||
is-retry-allowed@2.2.0: {}
|
is-retry-allowed@2.2.0: {}
|
||||||
|
|
||||||
is-stream@2.0.1: {}
|
is-stream@2.0.1: {}
|
||||||
@ -8400,6 +8584,34 @@ snapshots:
|
|||||||
|
|
||||||
jsbn@1.1.0: {}
|
jsbn@1.1.0: {}
|
||||||
|
|
||||||
|
jsdom@26.0.0:
|
||||||
|
dependencies:
|
||||||
|
cssstyle: 4.2.1
|
||||||
|
data-urls: 5.0.0
|
||||||
|
decimal.js: 10.5.0
|
||||||
|
form-data: 4.0.1
|
||||||
|
html-encoding-sniffer: 4.0.0
|
||||||
|
http-proxy-agent: 7.0.2
|
||||||
|
https-proxy-agent: 7.0.6
|
||||||
|
is-potential-custom-element-name: 1.0.1
|
||||||
|
nwsapi: 2.2.16
|
||||||
|
parse5: 7.2.1
|
||||||
|
rrweb-cssom: 0.8.0
|
||||||
|
saxes: 6.0.0
|
||||||
|
symbol-tree: 3.2.4
|
||||||
|
tough-cookie: 5.1.1
|
||||||
|
w3c-xmlserializer: 5.0.0
|
||||||
|
webidl-conversions: 7.0.0
|
||||||
|
whatwg-encoding: 3.1.1
|
||||||
|
whatwg-mimetype: 4.0.0
|
||||||
|
whatwg-url: 14.1.1
|
||||||
|
ws: 8.18.0
|
||||||
|
xml-name-validator: 5.0.0
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- bufferutil
|
||||||
|
- supports-color
|
||||||
|
- utf-8-validate
|
||||||
|
|
||||||
jsesc@2.5.2: {}
|
jsesc@2.5.2: {}
|
||||||
|
|
||||||
json-parse-even-better-errors@2.3.1: {}
|
json-parse-even-better-errors@2.3.1: {}
|
||||||
@ -8435,17 +8647,17 @@ snapshots:
|
|||||||
|
|
||||||
kuler@2.0.0: {}
|
kuler@2.0.0: {}
|
||||||
|
|
||||||
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||||
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||||
binary-extensions: 2.3.0
|
binary-extensions: 2.3.0
|
||||||
js-tiktoken: 1.0.12
|
js-tiktoken: 1.0.12
|
||||||
js-yaml: 4.1.0
|
js-yaml: 4.1.0
|
||||||
jsonpointer: 5.0.1
|
jsonpointer: 5.0.1
|
||||||
langchainhub: 0.0.11
|
langchainhub: 0.0.11
|
||||||
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
|
langsmith: 0.1.34(7lljbsleilzgkaubvlq4ipicvq)
|
||||||
ml-distance: 4.0.1
|
ml-distance: 4.0.1
|
||||||
openapi-types: 12.1.3
|
openapi-types: 12.1.3
|
||||||
p-retry: 4.6.2
|
p-retry: 4.6.2
|
||||||
@ -8463,6 +8675,7 @@ snapshots:
|
|||||||
handlebars: 4.7.8
|
handlebars: 4.7.8
|
||||||
html-to-text: 9.0.5
|
html-to-text: 9.0.5
|
||||||
ioredis: 5.4.1
|
ioredis: 5.4.1
|
||||||
|
jsdom: 26.0.0
|
||||||
mammoth: 1.7.2
|
mammoth: 1.7.2
|
||||||
mongodb: 6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
|
mongodb: 6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
|
||||||
pdf-parse: 1.1.1
|
pdf-parse: 1.1.1
|
||||||
@ -8475,7 +8688,7 @@ snapshots:
|
|||||||
|
|
||||||
langchainhub@0.0.11: {}
|
langchainhub@0.0.11: {}
|
||||||
|
|
||||||
langsmith@0.1.34(npkyd6f7wyl3urgrzoxaktl5a4):
|
langsmith@0.1.34(7lljbsleilzgkaubvlq4ipicvq):
|
||||||
dependencies:
|
dependencies:
|
||||||
'@types/uuid': 9.0.8
|
'@types/uuid': 9.0.8
|
||||||
commander: 10.0.1
|
commander: 10.0.1
|
||||||
@ -8484,8 +8697,8 @@ snapshots:
|
|||||||
p-retry: 4.6.2
|
p-retry: 4.6.2
|
||||||
uuid: 9.0.1
|
uuid: 9.0.1
|
||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||||
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||||
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||||
|
|
||||||
languagedetect@2.0.0: {}
|
languagedetect@2.0.0: {}
|
||||||
@ -8554,6 +8767,8 @@ snapshots:
|
|||||||
|
|
||||||
lru-cache@10.3.0: {}
|
lru-cache@10.3.0: {}
|
||||||
|
|
||||||
|
lru-cache@10.4.3: {}
|
||||||
|
|
||||||
lru-cache@5.1.1:
|
lru-cache@5.1.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
yallist: 3.1.1
|
yallist: 3.1.1
|
||||||
@ -8849,6 +9064,8 @@ snapshots:
|
|||||||
|
|
||||||
num-sort@2.1.0: {}
|
num-sort@2.1.0: {}
|
||||||
|
|
||||||
|
nwsapi@2.2.16: {}
|
||||||
|
|
||||||
object-assign@4.1.1: {}
|
object-assign@4.1.1: {}
|
||||||
|
|
||||||
object-inspect@1.13.1: {}
|
object-inspect@1.13.1: {}
|
||||||
@ -8979,6 +9196,10 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
entities: 4.5.0
|
entities: 4.5.0
|
||||||
|
|
||||||
|
parse5@7.2.1:
|
||||||
|
dependencies:
|
||||||
|
entities: 4.5.0
|
||||||
|
|
||||||
parseley@0.12.1:
|
parseley@0.12.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
leac: 0.6.0
|
leac: 0.6.0
|
||||||
@ -9321,6 +9542,8 @@ snapshots:
|
|||||||
|
|
||||||
robots-parser@3.0.1: {}
|
robots-parser@3.0.1: {}
|
||||||
|
|
||||||
|
rrweb-cssom@0.8.0: {}
|
||||||
|
|
||||||
rusha@0.8.14: {}
|
rusha@0.8.14: {}
|
||||||
|
|
||||||
safe-buffer@5.1.2: {}
|
safe-buffer@5.1.2: {}
|
||||||
@ -9333,6 +9556,10 @@ snapshots:
|
|||||||
|
|
||||||
sax@1.4.1: {}
|
sax@1.4.1: {}
|
||||||
|
|
||||||
|
saxes@6.0.0:
|
||||||
|
dependencies:
|
||||||
|
xmlchars: 2.2.0
|
||||||
|
|
||||||
scheduler@0.23.2:
|
scheduler@0.23.2:
|
||||||
dependencies:
|
dependencies:
|
||||||
loose-envify: 1.4.0
|
loose-envify: 1.4.0
|
||||||
@ -9583,6 +9810,8 @@ snapshots:
|
|||||||
|
|
||||||
sylvester@0.0.12: {}
|
sylvester@0.0.12: {}
|
||||||
|
|
||||||
|
symbol-tree@3.2.4: {}
|
||||||
|
|
||||||
systeminformation@5.22.11: {}
|
systeminformation@5.22.11: {}
|
||||||
|
|
||||||
tar-fs@3.0.5:
|
tar-fs@3.0.5:
|
||||||
@ -9640,12 +9869,20 @@ snapshots:
|
|||||||
|
|
||||||
touch@3.1.1: {}
|
touch@3.1.1: {}
|
||||||
|
|
||||||
|
tough-cookie@5.1.1:
|
||||||
|
dependencies:
|
||||||
|
tldts: 6.1.75
|
||||||
|
|
||||||
tr46@0.0.3: {}
|
tr46@0.0.3: {}
|
||||||
|
|
||||||
tr46@4.1.1:
|
tr46@4.1.1:
|
||||||
dependencies:
|
dependencies:
|
||||||
punycode: 2.3.1
|
punycode: 2.3.1
|
||||||
|
|
||||||
|
tr46@5.0.0:
|
||||||
|
dependencies:
|
||||||
|
punycode: 2.3.1
|
||||||
|
|
||||||
triple-beam@1.4.1: {}
|
triple-beam@1.4.1: {}
|
||||||
|
|
||||||
ts-jest@29.1.4(@babel/core@7.24.6)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.6))(jest@29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5)))(typescript@5.4.5):
|
ts-jest@29.1.4(@babel/core@7.24.6)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.6))(jest@29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5)))(typescript@5.4.5):
|
||||||
@ -9777,6 +10014,10 @@ snapshots:
|
|||||||
|
|
||||||
vary@1.1.2: {}
|
vary@1.1.2: {}
|
||||||
|
|
||||||
|
w3c-xmlserializer@5.0.0:
|
||||||
|
dependencies:
|
||||||
|
xml-name-validator: 5.0.0
|
||||||
|
|
||||||
walker@1.0.8:
|
walker@1.0.8:
|
||||||
dependencies:
|
dependencies:
|
||||||
makeerror: 1.0.12
|
makeerror: 1.0.12
|
||||||
@ -9793,13 +10034,24 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
iconv-lite: 0.6.3
|
iconv-lite: 0.6.3
|
||||||
|
|
||||||
|
whatwg-encoding@3.1.1:
|
||||||
|
dependencies:
|
||||||
|
iconv-lite: 0.6.3
|
||||||
|
|
||||||
whatwg-fetch@3.6.20: {}
|
whatwg-fetch@3.6.20: {}
|
||||||
|
|
||||||
|
whatwg-mimetype@4.0.0: {}
|
||||||
|
|
||||||
whatwg-url@13.0.0:
|
whatwg-url@13.0.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
tr46: 4.1.1
|
tr46: 4.1.1
|
||||||
webidl-conversions: 7.0.0
|
webidl-conversions: 7.0.0
|
||||||
|
|
||||||
|
whatwg-url@14.1.1:
|
||||||
|
dependencies:
|
||||||
|
tr46: 5.0.0
|
||||||
|
webidl-conversions: 7.0.0
|
||||||
|
|
||||||
whatwg-url@5.0.0:
|
whatwg-url@5.0.0:
|
||||||
dependencies:
|
dependencies:
|
||||||
tr46: 0.0.3
|
tr46: 0.0.3
|
||||||
@ -9868,6 +10120,8 @@ snapshots:
|
|||||||
|
|
||||||
ws@8.18.0: {}
|
ws@8.18.0: {}
|
||||||
|
|
||||||
|
xml-name-validator@5.0.0: {}
|
||||||
|
|
||||||
xml2js@0.6.2:
|
xml2js@0.6.2:
|
||||||
dependencies:
|
dependencies:
|
||||||
sax: 1.4.1
|
sax: 1.4.1
|
||||||
@ -9877,6 +10131,8 @@ snapshots:
|
|||||||
|
|
||||||
xmlbuilder@11.0.1: {}
|
xmlbuilder@11.0.1: {}
|
||||||
|
|
||||||
|
xmlchars@2.2.0: {}
|
||||||
|
|
||||||
xtend@4.0.2: {}
|
xtend@4.0.2: {}
|
||||||
|
|
||||||
y18n@5.0.8: {}
|
y18n@5.0.8: {}
|
||||||
|
@ -30,7 +30,7 @@ async function batchScrape(body: BatchScrapeRequestInput): ReturnType<typeof bat
|
|||||||
x = await batchScrapeStatus(bss.body.id);
|
x = await batchScrapeStatus(bss.body.id);
|
||||||
expect(x.statusCode).toBe(200);
|
expect(x.statusCode).toBe(200);
|
||||||
expect(typeof x.body.status).toBe("string");
|
expect(typeof x.body.status).toBe("string");
|
||||||
} while (x.body.status !== "completed")
|
} while (x.body.status === "scraping");
|
||||||
|
|
||||||
expectBatchScrapeToSucceed(x);
|
expectBatchScrapeToSucceed(x);
|
||||||
return x;
|
return x;
|
||||||
@ -53,40 +53,51 @@ function expectBatchScrapeToSucceed(response: Awaited<ReturnType<typeof batchScr
|
|||||||
}
|
}
|
||||||
|
|
||||||
describe("Batch scrape tests", () => {
|
describe("Batch scrape tests", () => {
|
||||||
describe("JSON format", () => {
|
|
||||||
it.concurrent("works", async () => {
|
it.concurrent("works", async () => {
|
||||||
const response = await batchScrape({
|
const response = await batchScrape({
|
||||||
urls: ["http://firecrawl.dev"],
|
urls: ["http://firecrawl.dev"]
|
||||||
formats: ["json"],
|
|
||||||
jsonOptions: {
|
|
||||||
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
|
||||||
schema: {
|
|
||||||
type: "object",
|
|
||||||
properties: {
|
|
||||||
company_mission: {
|
|
||||||
type: "string",
|
|
||||||
},
|
|
||||||
supports_sso: {
|
|
||||||
type: "boolean",
|
|
||||||
},
|
|
||||||
is_open_source: {
|
|
||||||
type: "boolean",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
required: ["company_mission", "supports_sso", "is_open_source"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(response.body.data[0]).toHaveProperty("json");
|
expect(response.body.data[0]).toHaveProperty("markdown");
|
||||||
expect(response.body.data[0].json).toHaveProperty("company_mission");
|
expect(response.body.data[0].markdown).toContain("Firecrawl");
|
||||||
expect(typeof response.body.data[0].json.company_mission).toBe("string");
|
|
||||||
expect(response.body.data[0].json).toHaveProperty("supports_sso");
|
|
||||||
expect(response.body.data[0].json.supports_sso).toBe(false);
|
|
||||||
expect(typeof response.body.data[0].json.supports_sso).toBe("boolean");
|
|
||||||
expect(response.body.data[0].json).toHaveProperty("is_open_source");
|
|
||||||
expect(response.body.data[0].json.is_open_source).toBe(true);
|
|
||||||
expect(typeof response.body.data[0].json.is_open_source).toBe("boolean");
|
|
||||||
}, 30000);
|
}, 30000);
|
||||||
});
|
|
||||||
|
if (!process.env.TEST_SUITE_SELF_HOSTED) {
|
||||||
|
describe("JSON format", () => {
|
||||||
|
it.concurrent("works", async () => {
|
||||||
|
const response = await batchScrape({
|
||||||
|
urls: ["http://firecrawl.dev"],
|
||||||
|
formats: ["json"],
|
||||||
|
jsonOptions: {
|
||||||
|
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
||||||
|
schema: {
|
||||||
|
type: "object",
|
||||||
|
properties: {
|
||||||
|
company_mission: {
|
||||||
|
type: "string",
|
||||||
|
},
|
||||||
|
supports_sso: {
|
||||||
|
type: "boolean",
|
||||||
|
},
|
||||||
|
is_open_source: {
|
||||||
|
type: "boolean",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.body.data[0]).toHaveProperty("json");
|
||||||
|
expect(response.body.data[0].json).toHaveProperty("company_mission");
|
||||||
|
expect(typeof response.body.data[0].json.company_mission).toBe("string");
|
||||||
|
expect(response.body.data[0].json).toHaveProperty("supports_sso");
|
||||||
|
expect(response.body.data[0].json.supports_sso).toBe(false);
|
||||||
|
expect(typeof response.body.data[0].json.supports_sso).toBe("boolean");
|
||||||
|
expect(response.body.data[0].json).toHaveProperty("is_open_source");
|
||||||
|
expect(response.body.data[0].json.is_open_source).toBe(true);
|
||||||
|
expect(typeof response.body.data[0].json.is_open_source).toBe("boolean");
|
||||||
|
}, 30000);
|
||||||
|
});
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
@ -30,7 +30,7 @@ async function crawl(body: CrawlRequestInput): ReturnType<typeof crawlStatus> {
|
|||||||
x = await crawlStatus(cs.body.id);
|
x = await crawlStatus(cs.body.id);
|
||||||
expect(x.statusCode).toBe(200);
|
expect(x.statusCode).toBe(200);
|
||||||
expect(typeof x.body.status).toBe("string");
|
expect(typeof x.body.status).toBe("string");
|
||||||
} while (x.body.status !== "completed")
|
} while (x.body.status === "scraping");
|
||||||
|
|
||||||
expectCrawlToSucceed(x);
|
expectCrawlToSucceed(x);
|
||||||
return x;
|
return x;
|
||||||
|
@ -30,7 +30,7 @@ async function extract(body: ExtractRequestInput): Promise<ExtractResponse> {
|
|||||||
x = await extractStatus(es.body.id);
|
x = await extractStatus(es.body.id);
|
||||||
expect(x.statusCode).toBe(200);
|
expect(x.statusCode).toBe(200);
|
||||||
expect(typeof x.body.status).toBe("string");
|
expect(typeof x.body.status).toBe("string");
|
||||||
} while (x.body.status !== "completed");
|
} while (x.body.status === "processing");
|
||||||
|
|
||||||
expectExtractToSucceed(x);
|
expectExtractToSucceed(x);
|
||||||
return x.body;
|
return x.body;
|
||||||
@ -51,31 +51,37 @@ function expectExtractToSucceed(response: Awaited<ReturnType<typeof extractStatu
|
|||||||
}
|
}
|
||||||
|
|
||||||
describe("Extract tests", () => {
|
describe("Extract tests", () => {
|
||||||
it.concurrent("works", async () => {
|
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY) {
|
||||||
const res = await extract({
|
it.concurrent("works", async () => {
|
||||||
urls: ["https://firecrawl.dev"],
|
const res = await extract({
|
||||||
schema: {
|
urls: ["https://firecrawl.dev"],
|
||||||
"type": "object",
|
schema: {
|
||||||
"properties": {
|
"type": "object",
|
||||||
"company_mission": {
|
"properties": {
|
||||||
"type": "string"
|
"company_mission": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"is_open_source": {
|
||||||
|
"type": "boolean"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"is_open_source": {
|
"required": [
|
||||||
"type": "boolean"
|
"company_mission",
|
||||||
}
|
"is_open_source"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"required": [
|
origin: "api-sdk",
|
||||||
"company_mission",
|
});
|
||||||
"is_open_source"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
origin: "api-sdk",
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(res.data).toHaveProperty("company_mission");
|
expect(res.data).toHaveProperty("company_mission");
|
||||||
expect(typeof res.data.company_mission).toBe("string")
|
expect(typeof res.data.company_mission).toBe("string")
|
||||||
expect(res.data).toHaveProperty("is_open_source");
|
expect(res.data).toHaveProperty("is_open_source");
|
||||||
expect(typeof res.data.is_open_source).toBe("boolean");
|
expect(typeof res.data.is_open_source).toBe("boolean");
|
||||||
expect(res.data.is_open_source).toBe(true);
|
expect(res.data.is_open_source).toBe(true);
|
||||||
}, 60000);
|
}, 60000);
|
||||||
|
} else {
|
||||||
|
it.concurrent("dummy test", () => {
|
||||||
|
expect(true).toBe(true);
|
||||||
|
});
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
@ -21,7 +21,7 @@ function expectMapToSucceed(response: Awaited<ReturnType<typeof map>>) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
describe("Map tests", () => {
|
describe("Map tests", () => {
|
||||||
it("basic map succeeds", async () => {
|
it.concurrent("basic map succeeds", async () => {
|
||||||
const response = await map({
|
const response = await map({
|
||||||
url: "http://firecrawl.dev",
|
url: "http://firecrawl.dev",
|
||||||
});
|
});
|
||||||
@ -29,7 +29,7 @@ describe("Map tests", () => {
|
|||||||
expectMapToSucceed(response);
|
expectMapToSucceed(response);
|
||||||
}, 10000);
|
}, 10000);
|
||||||
|
|
||||||
it("times out properly", async () => {
|
it.concurrent("times out properly", async () => {
|
||||||
const response = await map({
|
const response = await map({
|
||||||
url: "http://firecrawl.dev",
|
url: "http://firecrawl.dev",
|
||||||
timeout: 1
|
timeout: 1
|
||||||
@ -40,14 +40,15 @@ describe("Map tests", () => {
|
|||||||
expect(response.body.error).toBe("Request timed out");
|
expect(response.body.error).toBe("Request timed out");
|
||||||
}, 10000);
|
}, 10000);
|
||||||
|
|
||||||
it("handles query parameters correctly", async () => {
|
it.concurrent("handles query parameters correctly", async () => {
|
||||||
let response = await map({
|
let response = await map({
|
||||||
url: "https://www.hfea.gov.uk",
|
url: "https://www.hfea.gov.uk",
|
||||||
sitemapOnly: true,
|
sitemapOnly: true,
|
||||||
|
useMock: "map-query-params",
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(response.statusCode).toBe(200);
|
expect(response.statusCode).toBe(200);
|
||||||
expect(response.body.success).toBe(true);
|
expect(response.body.success).toBe(true);
|
||||||
expect(response.body.links.some(x => x.match(/^https:\/\/www\.hfea\.gov\.uk\/choose-a-clinic\/clinic-search\/results\/?\?options=\d+$/))).toBe(true);
|
expect(response.body.links.some(x => x.match(/^https:\/\/www\.hfea\.gov\.uk\/choose-a-clinic\/clinic-search\/results\/?\?options=\d+$/))).toBe(true);
|
||||||
}, 300000);
|
}, 60000);
|
||||||
});
|
});
|
||||||
|
51
apps/api/src/__tests__/snips/mocks/map-query-params.json
Normal file
51
apps/api/src/__tests__/snips/mocks/map-query-params.json
Normal file
File diff suppressed because one or more lines are too long
@ -26,7 +26,7 @@ async function scrape(body: ScrapeRequestInput): Promise<Document> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
describe("Scrape tests", () => {
|
describe("Scrape tests", () => {
|
||||||
it("mocking works properly", async () => {
|
it.concurrent("mocking works properly", async () => {
|
||||||
// depends on falsified mock mocking-works-properly
|
// depends on falsified mock mocking-works-properly
|
||||||
// this test will fail if mock is bypassed with real data -- firecrawl.dev will never have
|
// this test will fail if mock is bypassed with real data -- firecrawl.dev will never have
|
||||||
// that as its actual markdown output
|
// that as its actual markdown output
|
||||||
@ -41,41 +41,34 @@ describe("Scrape tests", () => {
|
|||||||
);
|
);
|
||||||
}, 10000);
|
}, 10000);
|
||||||
|
|
||||||
describe("Ad blocking (f-e dependant)", () => {
|
it.concurrent("works", async () => {
|
||||||
it.concurrent("blocks ads by default", async () => {
|
const response = await scrape({
|
||||||
|
url: "http://firecrawl.dev"
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.markdown).toContain("Firecrawl");
|
||||||
|
}, 10000);
|
||||||
|
|
||||||
|
if (process.env.TEST_SUITE_SELF_HOSTED && process.env.PROXY_SERVER) {
|
||||||
|
it.concurrent("self-hosted proxy works", async () => {
|
||||||
const response = await scrape({
|
const response = await scrape({
|
||||||
url: "https://canyoublockit.com/testing/",
|
url: "https://icanhazip.com"
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
expect(response.markdown?.trim()).toBe(process.env.PROXY_SERVER!.split("://").slice(-1)[0].split(":")[0]);
|
||||||
}, 10000);
|
});
|
||||||
|
}
|
||||||
|
|
||||||
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.PLAYWRIGHT_MICROSERVICE_URL) {
|
||||||
|
it.concurrent("waitFor works", async () => {
|
||||||
const response = await scrape({
|
const response = await scrape({
|
||||||
url: "https://canyoublockit.com/testing/",
|
url: "http://firecrawl.dev",
|
||||||
blockAds: false,
|
waitFor: 2000,
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(response.markdown).toContain(".g.doubleclick.net/");
|
expect(response.markdown).toContain("Firecrawl");
|
||||||
}, 10000);
|
}, 15000);
|
||||||
});
|
}
|
||||||
|
|
||||||
describe("Location API (f-e dependant)", () => {
|
|
||||||
it.concurrent("works without specifying an explicit location", async () => {
|
|
||||||
const response = await scrape({
|
|
||||||
url: "https://iplocation.com",
|
|
||||||
});
|
|
||||||
}, 10000);
|
|
||||||
|
|
||||||
it.concurrent("works with country US", async () => {
|
|
||||||
const response = await scrape({
|
|
||||||
url: "https://iplocation.com",
|
|
||||||
location: { country: "US" },
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(response.markdown).toContain("| Country | United States |");
|
|
||||||
}, 10000);
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("JSON scrape support", () => {
|
describe("JSON scrape support", () => {
|
||||||
it.concurrent("returns parseable JSON", async () => {
|
it.concurrent("returns parseable JSON", async () => {
|
||||||
@ -89,82 +82,132 @@ describe("Scrape tests", () => {
|
|||||||
}, 25000); // TODO: mock and shorten
|
}, 25000); // TODO: mock and shorten
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("Screenshot", () => {
|
if (!process.env.TEST_SUITE_SELF_HOSTED) {
|
||||||
it.concurrent("screenshot format works", async () => {
|
describe("Ad blocking (f-e dependant)", () => {
|
||||||
const response = await scrape({
|
it.concurrent("blocks ads by default", async () => {
|
||||||
url: "http://firecrawl.dev",
|
const response = await scrape({
|
||||||
formats: ["screenshot"]
|
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(typeof response.screenshot).toBe("string");
|
expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||||
}, 15000);
|
}, 10000);
|
||||||
|
|
||||||
it.concurrent("screenshot@fullPage format works", async () => {
|
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||||
const response = await scrape({
|
const response = await scrape({
|
||||||
url: "http://firecrawl.dev",
|
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||||
formats: ["screenshot@fullPage"]
|
blockAds: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(typeof response.screenshot).toBe("string");
|
expect(response.markdown).toContain(".g.doubleclick.net/");
|
||||||
}, 15000);
|
}, 10000);
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("JSON format", () => {
|
describe("Location API (f-e dependant)", () => {
|
||||||
it.concurrent("works", async () => {
|
it.concurrent("works without specifying an explicit location", async () => {
|
||||||
const response = await scrape({
|
const response = await scrape({
|
||||||
url: "http://firecrawl.dev",
|
url: "https://iplocation.com",
|
||||||
formats: ["json"],
|
});
|
||||||
jsonOptions: {
|
}, 10000);
|
||||||
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
|
||||||
schema: {
|
it.concurrent("works with country US", async () => {
|
||||||
type: "object",
|
const response = await scrape({
|
||||||
properties: {
|
url: "https://iplocation.com",
|
||||||
company_mission: {
|
location: { country: "US" },
|
||||||
type: "string",
|
});
|
||||||
},
|
|
||||||
supports_sso: {
|
expect(response.markdown).toContain("| Country | United States |");
|
||||||
type: "boolean",
|
}, 10000);
|
||||||
},
|
});
|
||||||
is_open_source: {
|
|
||||||
type: "boolean",
|
describe("Screenshot (f-e/sb dependant)", () => {
|
||||||
|
it.concurrent("screenshot format works", async () => {
|
||||||
|
const response = await scrape({
|
||||||
|
url: "http://firecrawl.dev",
|
||||||
|
formats: ["screenshot"]
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(typeof response.screenshot).toBe("string");
|
||||||
|
}, 30000);
|
||||||
|
|
||||||
|
it.concurrent("screenshot@fullPage format works", async () => {
|
||||||
|
const response = await scrape({
|
||||||
|
url: "http://firecrawl.dev",
|
||||||
|
formats: ["screenshot@fullPage"]
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(typeof response.screenshot).toBe("string");
|
||||||
|
}, 30000);
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("Proxy API (f-e dependant)", () => {
|
||||||
|
it.concurrent("undefined works", async () => {
|
||||||
|
await scrape({
|
||||||
|
url: "http://firecrawl.dev",
|
||||||
|
});
|
||||||
|
}, 15000);
|
||||||
|
|
||||||
|
it.concurrent("basic works", async () => {
|
||||||
|
await scrape({
|
||||||
|
url: "http://firecrawl.dev",
|
||||||
|
proxy: "basic",
|
||||||
|
});
|
||||||
|
}, 15000);
|
||||||
|
|
||||||
|
it.concurrent("stealth works", async () => {
|
||||||
|
await scrape({
|
||||||
|
url: "http://firecrawl.dev",
|
||||||
|
proxy: "stealth",
|
||||||
|
});
|
||||||
|
}, 15000);
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("PDF (f-e dependant)", () => {
|
||||||
|
it.concurrent("works for PDFs behind anti-bot", async () => {
|
||||||
|
const response = await scrape({
|
||||||
|
url: "https://www.researchgate.net/profile/Amir-Leshem/publication/220732050_Robust_adaptive_beamforming_based_on_jointly_estimating_covariance_matrix_and_steering_vector/links/0c96052d2fd8f0a84b000000/Robust-adaptive-beamforming-based-on-jointly-estimating-covariance-matrix-and-steering-vector.pdf"
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.markdown).toContain("Robust adaptive beamforming based on jointly estimating covariance matrix");
|
||||||
|
}, 60000);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY) {
|
||||||
|
describe("JSON format", () => {
|
||||||
|
it.concurrent("works", async () => {
|
||||||
|
const response = await scrape({
|
||||||
|
url: "http://firecrawl.dev",
|
||||||
|
formats: ["json"],
|
||||||
|
jsonOptions: {
|
||||||
|
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
||||||
|
schema: {
|
||||||
|
type: "object",
|
||||||
|
properties: {
|
||||||
|
company_mission: {
|
||||||
|
type: "string",
|
||||||
|
},
|
||||||
|
supports_sso: {
|
||||||
|
type: "boolean",
|
||||||
|
},
|
||||||
|
is_open_source: {
|
||||||
|
type: "boolean",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||||
},
|
},
|
||||||
required: ["company_mission", "supports_sso", "is_open_source"],
|
|
||||||
},
|
},
|
||||||
},
|
});
|
||||||
});
|
|
||||||
|
|
||||||
expect(response).toHaveProperty("json");
|
expect(response).toHaveProperty("json");
|
||||||
expect(response.json).toHaveProperty("company_mission");
|
expect(response.json).toHaveProperty("company_mission");
|
||||||
expect(typeof response.json.company_mission).toBe("string");
|
expect(typeof response.json.company_mission).toBe("string");
|
||||||
expect(response.json).toHaveProperty("supports_sso");
|
expect(response.json).toHaveProperty("supports_sso");
|
||||||
expect(response.json.supports_sso).toBe(false);
|
expect(response.json.supports_sso).toBe(false);
|
||||||
expect(typeof response.json.supports_sso).toBe("boolean");
|
expect(typeof response.json.supports_sso).toBe("boolean");
|
||||||
expect(response.json).toHaveProperty("is_open_source");
|
expect(response.json).toHaveProperty("is_open_source");
|
||||||
expect(response.json.is_open_source).toBe(true);
|
expect(response.json.is_open_source).toBe(true);
|
||||||
expect(typeof response.json.is_open_source).toBe("boolean");
|
expect(typeof response.json.is_open_source).toBe("boolean");
|
||||||
}, 30000);
|
}, 30000);
|
||||||
});
|
});
|
||||||
|
}
|
||||||
describe("Proxy API (f-e dependant)", () => {
|
|
||||||
it.concurrent("undefined works", async () => {
|
|
||||||
await scrape({
|
|
||||||
url: "http://firecrawl.dev",
|
|
||||||
});
|
|
||||||
}, 15000);
|
|
||||||
|
|
||||||
it.concurrent("basic works", async () => {
|
|
||||||
await scrape({
|
|
||||||
url: "http://firecrawl.dev",
|
|
||||||
proxy: "basic",
|
|
||||||
});
|
|
||||||
}, 15000);
|
|
||||||
|
|
||||||
it.concurrent("stealth works", async () => {
|
|
||||||
await scrape({
|
|
||||||
url: "http://firecrawl.dev",
|
|
||||||
proxy: "stealth",
|
|
||||||
});
|
|
||||||
}, 15000);
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
@ -27,10 +27,10 @@ async function search(body: SearchRequestInput): Promise<Document> {
|
|||||||
return raw.body.data;
|
return raw.body.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
describe("Scrape tests", () => {
|
describe("Search tests", () => {
|
||||||
it("works", async () => {
|
it.concurrent("works", async () => {
|
||||||
await search({
|
await search({
|
||||||
query: "firecrawl"
|
query: "firecrawl"
|
||||||
});
|
});
|
||||||
}, 15000);
|
}, 60000);
|
||||||
});
|
});
|
||||||
|
@ -13,13 +13,13 @@ import {
|
|||||||
getDoneJobsOrderedLength,
|
getDoneJobsOrderedLength,
|
||||||
isCrawlKickoffFinished,
|
isCrawlKickoffFinished,
|
||||||
} from "../../lib/crawl-redis";
|
} from "../../lib/crawl-redis";
|
||||||
import { getScrapeQueue } from "../../services/queue-service";
|
import { getScrapeQueue, QueueFunction } from "../../services/queue-service";
|
||||||
import {
|
import {
|
||||||
supabaseGetJobById,
|
supabaseGetJobById,
|
||||||
supabaseGetJobsById,
|
supabaseGetJobsById,
|
||||||
} from "../../lib/supabase-jobs";
|
} from "../../lib/supabase-jobs";
|
||||||
import { configDotenv } from "dotenv";
|
import { configDotenv } from "dotenv";
|
||||||
import type { Job, JobState } from "bullmq";
|
import type { Job, JobState, Queue } from "bullmq";
|
||||||
import { logger } from "../../lib/logger";
|
import { logger } from "../../lib/logger";
|
||||||
import { supabase_service } from "../../services/supabase";
|
import { supabase_service } from "../../services/supabase";
|
||||||
import { getConcurrencyLimitedJobs } from "../../lib/concurrency-limit";
|
import { getConcurrencyLimitedJobs } from "../../lib/concurrency-limit";
|
||||||
@ -245,7 +245,7 @@ export async function crawlStatusController(
|
|||||||
|
|
||||||
let totalCount = jobIDs.length;
|
let totalCount = jobIDs.length;
|
||||||
|
|
||||||
if (totalCount === 0) {
|
if (totalCount === 0 && process.env.USE_DB_AUTHENTICATION === "true") {
|
||||||
const x = await supabase_service
|
const x = await supabase_service
|
||||||
.from('firecrawl_jobs')
|
.from('firecrawl_jobs')
|
||||||
.select('*', { count: 'exact', head: true })
|
.select('*', { count: 'exact', head: true })
|
||||||
|
@ -1,7 +1,34 @@
|
|||||||
import { Response } from "express";
|
import { Response } from "express";
|
||||||
import { supabaseGetJobsById } from "../../lib/supabase-jobs";
|
|
||||||
import { RequestWithAuth } from "./types";
|
import { RequestWithAuth } from "./types";
|
||||||
import { getExtract, getExtractExpiry } from "../../lib/extract/extract-redis";
|
import { getExtract, getExtractExpiry } from "../../lib/extract/extract-redis";
|
||||||
|
import { DBJob, PseudoJob } from "./crawl-status";
|
||||||
|
import { getExtractQueue } from "../../services/queue-service";
|
||||||
|
import { ExtractResult } from "../../lib/extract/extraction-service";
|
||||||
|
import { supabaseGetJobById } from "../../lib/supabase-jobs";
|
||||||
|
|
||||||
|
export async function getExtractJob(id: string): Promise<PseudoJob<ExtractResult> | null> {
|
||||||
|
const [bullJob, dbJob] = await Promise.all([
|
||||||
|
getExtractQueue().getJob(id),
|
||||||
|
(process.env.USE_DB_AUTHENTICATION === "true" ? supabaseGetJobById(id) : null) as Promise<DBJob | null>,
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (!bullJob && !dbJob) return null;
|
||||||
|
|
||||||
|
const data = dbJob?.docs ?? bullJob?.returnvalue?.data;
|
||||||
|
|
||||||
|
const job: PseudoJob<any> = {
|
||||||
|
id,
|
||||||
|
getState: bullJob ? bullJob.getState : (() => dbJob!.success ? "completed" : "failed"),
|
||||||
|
returnvalue: data,
|
||||||
|
data: {
|
||||||
|
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
|
||||||
|
},
|
||||||
|
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
|
||||||
|
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
|
||||||
|
}
|
||||||
|
|
||||||
|
return job;
|
||||||
|
}
|
||||||
|
|
||||||
export async function extractStatusController(
|
export async function extractStatusController(
|
||||||
req: RequestWithAuth<{ jobId: string }, any, any>,
|
req: RequestWithAuth<{ jobId: string }, any, any>,
|
||||||
@ -16,24 +43,29 @@ export async function extractStatusController(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let data: any[] = [];
|
let data: ExtractResult | [] = [];
|
||||||
|
|
||||||
if (extract.status === "completed") {
|
if (extract.status === "completed") {
|
||||||
const jobData = await supabaseGetJobsById([req.params.jobId]);
|
const jobData = await getExtractJob(req.params.jobId);
|
||||||
if (!jobData || jobData.length === 0) {
|
if (!jobData) {
|
||||||
return res.status(404).json({
|
return res.status(404).json({
|
||||||
success: false,
|
success: false,
|
||||||
error: "Job not found",
|
error: "Job not found",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
data = jobData[0].docs;
|
if (!jobData.returnvalue) {
|
||||||
|
// if we got in the split-second where the redis is updated but the bull isn't
|
||||||
|
// just pretend it's still processing - MG
|
||||||
|
extract.status = "processing";
|
||||||
|
} else {
|
||||||
|
data = jobData.returnvalue ?? [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// console.log(extract.sources);
|
|
||||||
return res.status(200).json({
|
return res.status(200).json({
|
||||||
success: extract.status === "failed" ? false : true,
|
success: extract.status === "failed" ? false : true,
|
||||||
data: data,
|
data,
|
||||||
status: extract.status,
|
status: extract.status,
|
||||||
error: extract?.error ?? undefined,
|
error: extract?.error ?? undefined,
|
||||||
expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),
|
expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),
|
||||||
|
@ -5,6 +5,7 @@ import {
|
|||||||
mapRequestSchema,
|
mapRequestSchema,
|
||||||
RequestWithAuth,
|
RequestWithAuth,
|
||||||
scrapeOptions,
|
scrapeOptions,
|
||||||
|
TimeoutSignal,
|
||||||
} from "./types";
|
} from "./types";
|
||||||
import { crawlToCrawler, StoredCrawl } from "../../lib/crawl-redis";
|
import { crawlToCrawler, StoredCrawl } from "../../lib/crawl-redis";
|
||||||
import { MapResponse, MapRequest } from "./types";
|
import { MapResponse, MapRequest } from "./types";
|
||||||
@ -53,6 +54,8 @@ export async function getMapResults({
|
|||||||
origin,
|
origin,
|
||||||
includeMetadata = false,
|
includeMetadata = false,
|
||||||
allowExternalLinks,
|
allowExternalLinks,
|
||||||
|
abort = new AbortController().signal, // noop
|
||||||
|
mock,
|
||||||
}: {
|
}: {
|
||||||
url: string;
|
url: string;
|
||||||
search?: string;
|
search?: string;
|
||||||
@ -65,6 +68,8 @@ export async function getMapResults({
|
|||||||
origin?: string;
|
origin?: string;
|
||||||
includeMetadata?: boolean;
|
includeMetadata?: boolean;
|
||||||
allowExternalLinks?: boolean;
|
allowExternalLinks?: boolean;
|
||||||
|
abort?: AbortSignal;
|
||||||
|
mock?: string;
|
||||||
}): Promise<MapResult> {
|
}): Promise<MapResult> {
|
||||||
const id = uuidv4();
|
const id = uuidv4();
|
||||||
let links: string[] = [url];
|
let links: string[] = [url];
|
||||||
@ -87,8 +92,8 @@ export async function getMapResults({
|
|||||||
const crawler = crawlToCrawler(id, sc);
|
const crawler = crawlToCrawler(id, sc);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
sc.robots = await crawler.getRobotsTxt();
|
sc.robots = await crawler.getRobotsTxt(false, abort);
|
||||||
await crawler.importRobotsTxt(sc.robots);
|
crawler.importRobotsTxt(sc.robots);
|
||||||
} catch (_) {}
|
} catch (_) {}
|
||||||
|
|
||||||
// If sitemapOnly is true, only get links from sitemap
|
// If sitemapOnly is true, only get links from sitemap
|
||||||
@ -102,6 +107,8 @@ export async function getMapResults({
|
|||||||
true,
|
true,
|
||||||
true,
|
true,
|
||||||
30000,
|
30000,
|
||||||
|
abort,
|
||||||
|
mock,
|
||||||
);
|
);
|
||||||
if (sitemap > 0) {
|
if (sitemap > 0) {
|
||||||
links = links
|
links = links
|
||||||
@ -144,7 +151,7 @@ export async function getMapResults({
|
|||||||
return fireEngineMap(mapUrl, {
|
return fireEngineMap(mapUrl, {
|
||||||
numResults: resultsPerPage,
|
numResults: resultsPerPage,
|
||||||
page: page,
|
page: page,
|
||||||
});
|
}, abort);
|
||||||
};
|
};
|
||||||
|
|
||||||
pagePromises = Array.from({ length: maxPages }, (_, i) =>
|
pagePromises = Array.from({ length: maxPages }, (_, i) =>
|
||||||
@ -157,7 +164,7 @@ export async function getMapResults({
|
|||||||
|
|
||||||
// Parallelize sitemap index query with search results
|
// Parallelize sitemap index query with search results
|
||||||
const [sitemapIndexResult, ...searchResults] = await Promise.all([
|
const [sitemapIndexResult, ...searchResults] = await Promise.all([
|
||||||
querySitemapIndex(url),
|
querySitemapIndex(url, abort),
|
||||||
...(cachedResult ? [] : pagePromises),
|
...(cachedResult ? [] : pagePromises),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@ -178,6 +185,7 @@ export async function getMapResults({
|
|||||||
true,
|
true,
|
||||||
false,
|
false,
|
||||||
30000,
|
30000,
|
||||||
|
abort,
|
||||||
);
|
);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
logger.warn("tryGetSitemap threw an error", { error: e });
|
logger.warn("tryGetSitemap threw an error", { error: e });
|
||||||
@ -277,6 +285,7 @@ export async function mapController(
|
|||||||
req.body = mapRequestSchema.parse(req.body);
|
req.body = mapRequestSchema.parse(req.body);
|
||||||
|
|
||||||
let result: Awaited<ReturnType<typeof getMapResults>>;
|
let result: Awaited<ReturnType<typeof getMapResults>>;
|
||||||
|
const abort = new AbortController();
|
||||||
try {
|
try {
|
||||||
result = await Promise.race([
|
result = await Promise.race([
|
||||||
getMapResults({
|
getMapResults({
|
||||||
@ -289,13 +298,18 @@ export async function mapController(
|
|||||||
origin: req.body.origin,
|
origin: req.body.origin,
|
||||||
teamId: req.auth.team_id,
|
teamId: req.auth.team_id,
|
||||||
plan: req.auth.plan,
|
plan: req.auth.plan,
|
||||||
|
abort: abort.signal,
|
||||||
|
mock: req.body.useMock,
|
||||||
}),
|
}),
|
||||||
...(req.body.timeout !== undefined ? [
|
...(req.body.timeout !== undefined ? [
|
||||||
new Promise((resolve, reject) => setTimeout(() => reject("timeout"), req.body.timeout))
|
new Promise((resolve, reject) => setTimeout(() => {
|
||||||
|
abort.abort(new TimeoutSignal());
|
||||||
|
reject(new TimeoutSignal());
|
||||||
|
}, req.body.timeout))
|
||||||
] : []),
|
] : []),
|
||||||
]) as any;
|
]) as any;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (error === "timeout") {
|
if (error instanceof TimeoutSignal || error === "timeout") {
|
||||||
return res.status(408).json({
|
return res.status(408).json({
|
||||||
success: false,
|
success: false,
|
||||||
error: "Request timed out",
|
error: "Request timed out",
|
||||||
|
@ -501,6 +501,7 @@ export const mapRequestSchema = crawlerOptions
|
|||||||
sitemapOnly: z.boolean().default(false),
|
sitemapOnly: z.boolean().default(false),
|
||||||
limit: z.number().min(1).max(5000).default(5000),
|
limit: z.number().min(1).max(5000).default(5000),
|
||||||
timeout: z.number().positive().finite().optional(),
|
timeout: z.number().positive().finite().optional(),
|
||||||
|
useMock: z.string().optional(),
|
||||||
})
|
})
|
||||||
.strict(strictMessage);
|
.strict(strictMessage);
|
||||||
|
|
||||||
@ -1004,3 +1005,9 @@ export const generateLLMsTextRequestSchema = z.object({
|
|||||||
export type GenerateLLMsTextRequest = z.infer<
|
export type GenerateLLMsTextRequest = z.infer<
|
||||||
typeof generateLLMsTextRequestSchema
|
typeof generateLLMsTextRequestSchema
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
export class TimeoutSignal extends Error {
|
||||||
|
constructor() {
|
||||||
|
super("Operation timed out")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,38 +1,10 @@
|
|||||||
import { CONCURRENCY_LIMIT } from "../services/rate-limiter";
|
|
||||||
import { redisConnection } from "../services/queue-service";
|
import { redisConnection } from "../services/queue-service";
|
||||||
import { PlanType } from "../types";
|
import type { JobsOptions } from "bullmq";
|
||||||
import type { Job, JobsOptions } from "bullmq";
|
|
||||||
|
|
||||||
const constructKey = (team_id: string) => "concurrency-limiter:" + team_id;
|
const constructKey = (team_id: string) => "concurrency-limiter:" + team_id;
|
||||||
const constructQueueKey = (team_id: string) =>
|
const constructQueueKey = (team_id: string) =>
|
||||||
"concurrency-limit-queue:" + team_id;
|
"concurrency-limit-queue:" + team_id;
|
||||||
|
|
||||||
export function calculateJobTimeToRun(
|
|
||||||
job: ConcurrencyLimitedJob
|
|
||||||
): number {
|
|
||||||
let jobTimeToRun = 86400000; // 24h (crawl)
|
|
||||||
|
|
||||||
if (job.data.scrapeOptions) {
|
|
||||||
if (job.data.scrapeOptions.timeout) {
|
|
||||||
jobTimeToRun = job.data.scrapeOptions.timeout;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (job.data.scrapeOptions.waitFor) {
|
|
||||||
jobTimeToRun += job.data.scrapeOptions.waitFor;
|
|
||||||
}
|
|
||||||
|
|
||||||
(job.data.scrapeOptions.actions ?? []).forEach(x => {
|
|
||||||
if (x.type === "wait" && x.milliseconds) {
|
|
||||||
jobTimeToRun += x.milliseconds;
|
|
||||||
} else {
|
|
||||||
jobTimeToRun += 1000;
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return jobTimeToRun;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function cleanOldConcurrencyLimitEntries(
|
export async function cleanOldConcurrencyLimitEntries(
|
||||||
team_id: string,
|
team_id: string,
|
||||||
now: number = Date.now(),
|
now: number = Date.now(),
|
||||||
|
@ -7,7 +7,6 @@ import {
|
|||||||
} from "../build-prompts";
|
} from "../build-prompts";
|
||||||
import OpenAI from "openai";
|
import OpenAI from "openai";
|
||||||
import { logger } from "../../../lib/logger";
|
import { logger } from "../../../lib/logger";
|
||||||
const openai = new OpenAI();
|
|
||||||
|
|
||||||
export async function analyzeSchemaAndPrompt(
|
export async function analyzeSchemaAndPrompt(
|
||||||
urls: string[],
|
urls: string[],
|
||||||
@ -40,6 +39,7 @@ export async function analyzeSchemaAndPrompt(
|
|||||||
|
|
||||||
const model = "gpt-4o";
|
const model = "gpt-4o";
|
||||||
|
|
||||||
|
const openai = new OpenAI();
|
||||||
const result = await openai.beta.chat.completions.parse({
|
const result = await openai.beta.chat.completions.parse({
|
||||||
model: model,
|
model: model,
|
||||||
messages: [
|
messages: [
|
||||||
|
@ -48,7 +48,7 @@ interface ExtractServiceOptions {
|
|||||||
cacheKey?: string;
|
cacheKey?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ExtractResult {
|
export interface ExtractResult {
|
||||||
success: boolean;
|
success: boolean;
|
||||||
data?: any;
|
data?: any;
|
||||||
extractId: string;
|
extractId: string;
|
||||||
|
@ -3,10 +3,6 @@ import { Document } from "../../../controllers/v1/types";
|
|||||||
import { logger } from "../../logger";
|
import { logger } from "../../logger";
|
||||||
import OpenAI from "openai";
|
import OpenAI from "openai";
|
||||||
|
|
||||||
const openai = new OpenAI({
|
|
||||||
apiKey: process.env.OPENAI_API_KEY,
|
|
||||||
});
|
|
||||||
|
|
||||||
const pinecone = new Pinecone({
|
const pinecone = new Pinecone({
|
||||||
apiKey: process.env.PINECONE_API_KEY!,
|
apiKey: process.env.PINECONE_API_KEY!,
|
||||||
});
|
});
|
||||||
@ -27,6 +23,10 @@ export interface PageMetadata {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function getEmbedding(text: string) {
|
async function getEmbedding(text: string) {
|
||||||
|
const openai = new OpenAI({
|
||||||
|
apiKey: process.env.OPENAI_API_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
const embedding = await openai.embeddings.create({
|
const embedding = await openai.embeddings.create({
|
||||||
model: "text-embedding-3-small",
|
model: "text-embedding-3-small",
|
||||||
input: text,
|
input: text,
|
||||||
|
@ -1,9 +1,5 @@
|
|||||||
import OpenAI from "openai";
|
import OpenAI from "openai";
|
||||||
|
|
||||||
const openai = new OpenAI({
|
|
||||||
apiKey: process.env.OPENAI_API_KEY,
|
|
||||||
});
|
|
||||||
|
|
||||||
interface Message {
|
interface Message {
|
||||||
role: "system" | "user" | "assistant";
|
role: "system" | "user" | "assistant";
|
||||||
content: string;
|
content: string;
|
||||||
@ -19,6 +15,10 @@ interface GenerateTextOptions {
|
|||||||
export async function generateText(options: GenerateTextOptions) {
|
export async function generateText(options: GenerateTextOptions) {
|
||||||
const { model, messages, temperature = 0.7, maxTokens } = options;
|
const { model, messages, temperature = 0.7, maxTokens } = options;
|
||||||
|
|
||||||
|
const openai = new OpenAI({
|
||||||
|
apiKey: process.env.OPENAI_API_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
const completion = await openai.chat.completions.create({
|
const completion = await openai.chat.completions.create({
|
||||||
model,
|
model,
|
||||||
messages,
|
messages,
|
||||||
|
@ -1,14 +1,13 @@
|
|||||||
import axios from "axios";
|
|
||||||
import { configDotenv } from "dotenv";
|
import { configDotenv } from "dotenv";
|
||||||
import OpenAI from "openai";
|
import OpenAI from "openai";
|
||||||
|
|
||||||
configDotenv();
|
configDotenv();
|
||||||
|
|
||||||
const openai = new OpenAI({
|
|
||||||
apiKey: process.env.OPENAI_API_KEY,
|
|
||||||
});
|
|
||||||
|
|
||||||
async function getEmbedding(text: string) {
|
async function getEmbedding(text: string) {
|
||||||
|
const openai = new OpenAI({
|
||||||
|
apiKey: process.env.OPENAI_API_KEY,
|
||||||
|
});
|
||||||
|
|
||||||
const embedding = await openai.embeddings.create({
|
const embedding = await openai.embeddings.create({
|
||||||
model: "text-embedding-3-small",
|
model: "text-embedding-3-small",
|
||||||
input: text,
|
input: text,
|
||||||
|
@ -9,6 +9,7 @@ import { logger as _logger } from "../../lib/logger";
|
|||||||
import https from "https";
|
import https from "https";
|
||||||
import { redisConnection } from "../../services/queue-service";
|
import { redisConnection } from "../../services/queue-service";
|
||||||
import { extractLinks } from "../../lib/html-transformer";
|
import { extractLinks } from "../../lib/html-transformer";
|
||||||
|
import { TimeoutSignal } from "../../controllers/v1/types";
|
||||||
export class WebCrawler {
|
export class WebCrawler {
|
||||||
private jobId: string;
|
private jobId: string;
|
||||||
private initialUrl: string;
|
private initialUrl: string;
|
||||||
@ -182,7 +183,7 @@ export class WebCrawler {
|
|||||||
.slice(0, limit);
|
.slice(0, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
public async getRobotsTxt(skipTlsVerification = false): Promise<string> {
|
public async getRobotsTxt(skipTlsVerification = false, abort?: AbortSignal): Promise<string> {
|
||||||
let extraArgs = {};
|
let extraArgs = {};
|
||||||
if (skipTlsVerification) {
|
if (skipTlsVerification) {
|
||||||
extraArgs["httpsAgent"] = new https.Agent({
|
extraArgs["httpsAgent"] = new https.Agent({
|
||||||
@ -191,6 +192,7 @@ export class WebCrawler {
|
|||||||
}
|
}
|
||||||
const response = await axios.get(this.robotsTxtUrl, {
|
const response = await axios.get(this.robotsTxtUrl, {
|
||||||
timeout: axiosTimeout,
|
timeout: axiosTimeout,
|
||||||
|
signal: abort,
|
||||||
...extraArgs,
|
...extraArgs,
|
||||||
});
|
});
|
||||||
return response.data;
|
return response.data;
|
||||||
@ -205,6 +207,8 @@ export class WebCrawler {
|
|||||||
fromMap: boolean = false,
|
fromMap: boolean = false,
|
||||||
onlySitemap: boolean = false,
|
onlySitemap: boolean = false,
|
||||||
timeout: number = 120000,
|
timeout: number = 120000,
|
||||||
|
abort?: AbortSignal,
|
||||||
|
mock?: string,
|
||||||
): Promise<number> {
|
): Promise<number> {
|
||||||
this.logger.debug(`Fetching sitemap links from ${this.initialUrl}`, {
|
this.logger.debug(`Fetching sitemap links from ${this.initialUrl}`, {
|
||||||
method: "tryGetSitemap",
|
method: "tryGetSitemap",
|
||||||
@ -260,10 +264,10 @@ export class WebCrawler {
|
|||||||
try {
|
try {
|
||||||
let count = (await Promise.race([
|
let count = (await Promise.race([
|
||||||
Promise.all([
|
Promise.all([
|
||||||
this.tryFetchSitemapLinks(this.initialUrl, _urlsHandler),
|
this.tryFetchSitemapLinks(this.initialUrl, _urlsHandler, abort, mock),
|
||||||
...this.robots
|
...this.robots
|
||||||
.getSitemaps()
|
.getSitemaps()
|
||||||
.map((x) => this.tryFetchSitemapLinks(x, _urlsHandler)),
|
.map((x) => this.tryFetchSitemapLinks(x, _urlsHandler, abort, mock)),
|
||||||
]).then((results) => results.reduce((a, x) => a + x, 0)),
|
]).then((results) => results.reduce((a, x) => a + x, 0)),
|
||||||
timeoutPromise,
|
timeoutPromise,
|
||||||
])) as number;
|
])) as number;
|
||||||
@ -555,6 +559,8 @@ export class WebCrawler {
|
|||||||
private async tryFetchSitemapLinks(
|
private async tryFetchSitemapLinks(
|
||||||
url: string,
|
url: string,
|
||||||
urlsHandler: (urls: string[]) => unknown,
|
urlsHandler: (urls: string[]) => unknown,
|
||||||
|
abort?: AbortSignal,
|
||||||
|
mock?: string,
|
||||||
): Promise<number> {
|
): Promise<number> {
|
||||||
const sitemapUrl = url.endsWith(".xml")
|
const sitemapUrl = url.endsWith(".xml")
|
||||||
? url
|
? url
|
||||||
@ -569,13 +575,19 @@ export class WebCrawler {
|
|||||||
this.logger,
|
this.logger,
|
||||||
this.jobId,
|
this.jobId,
|
||||||
this.sitemapsHit,
|
this.sitemapsHit,
|
||||||
|
abort,
|
||||||
|
mock,
|
||||||
);
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this.logger.debug(`Failed to fetch sitemap from ${sitemapUrl}`, {
|
if (error instanceof TimeoutSignal) {
|
||||||
method: "tryFetchSitemapLinks",
|
throw error;
|
||||||
sitemapUrl,
|
} else {
|
||||||
error,
|
this.logger.debug(`Failed to fetch sitemap from ${sitemapUrl}`, {
|
||||||
});
|
method: "tryFetchSitemapLinks",
|
||||||
|
sitemapUrl,
|
||||||
|
error,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this is a subdomain, also try to get sitemap from the main domain
|
// If this is a subdomain, also try to get sitemap from the main domain
|
||||||
@ -611,20 +623,30 @@ export class WebCrawler {
|
|||||||
this.logger,
|
this.logger,
|
||||||
this.jobId,
|
this.jobId,
|
||||||
this.sitemapsHit,
|
this.sitemapsHit,
|
||||||
|
abort,
|
||||||
|
mock,
|
||||||
);
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this.logger.debug(
|
if (error instanceof TimeoutSignal) {
|
||||||
`Failed to fetch main domain sitemap from ${mainDomainSitemapUrl}`,
|
throw error;
|
||||||
{ method: "tryFetchSitemapLinks", mainDomainSitemapUrl, error },
|
} else {
|
||||||
);
|
this.logger.debug(
|
||||||
|
`Failed to fetch main domain sitemap from ${mainDomainSitemapUrl}`,
|
||||||
|
{ method: "tryFetchSitemapLinks", mainDomainSitemapUrl, error },
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this.logger.debug(`Error processing main domain sitemap`, {
|
if (error instanceof TimeoutSignal) {
|
||||||
method: "tryFetchSitemapLinks",
|
throw error;
|
||||||
url,
|
} else {
|
||||||
error,
|
this.logger.debug(`Error processing main domain sitemap`, {
|
||||||
});
|
method: "tryFetchSitemapLinks",
|
||||||
|
url,
|
||||||
|
error,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no sitemap found yet, try the baseUrl as a last resort
|
// If no sitemap found yet, try the baseUrl as a last resort
|
||||||
@ -636,22 +658,30 @@ export class WebCrawler {
|
|||||||
this.logger,
|
this.logger,
|
||||||
this.jobId,
|
this.jobId,
|
||||||
this.sitemapsHit,
|
this.sitemapsHit,
|
||||||
|
abort,
|
||||||
|
mock,
|
||||||
);
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this.logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}`, {
|
if (error instanceof TimeoutSignal) {
|
||||||
method: "tryFetchSitemapLinks",
|
throw error;
|
||||||
sitemapUrl: baseUrlSitemap,
|
|
||||||
error,
|
|
||||||
});
|
|
||||||
if (error instanceof AxiosError && error.response?.status === 404) {
|
|
||||||
// ignore 404
|
|
||||||
} else {
|
} else {
|
||||||
sitemapCount += await getLinksFromSitemap(
|
this.logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}`, {
|
||||||
{ sitemapUrl: baseUrlSitemap, urlsHandler, mode: "fire-engine" },
|
method: "tryFetchSitemapLinks",
|
||||||
this.logger,
|
sitemapUrl: baseUrlSitemap,
|
||||||
this.jobId,
|
error,
|
||||||
this.sitemapsHit,
|
});
|
||||||
);
|
if (error instanceof AxiosError && error.response?.status === 404) {
|
||||||
|
// ignore 404
|
||||||
|
} else {
|
||||||
|
sitemapCount += await getLinksFromSitemap(
|
||||||
|
{ sitemapUrl: baseUrlSitemap, urlsHandler, mode: "fire-engine" },
|
||||||
|
this.logger,
|
||||||
|
this.jobId,
|
||||||
|
this.sitemapsHit,
|
||||||
|
abort,
|
||||||
|
mock,
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,10 +12,11 @@ import { supabase_service } from "../../services/supabase";
|
|||||||
*/
|
*/
|
||||||
import { withAuth } from "../../lib/withAuth";
|
import { withAuth } from "../../lib/withAuth";
|
||||||
|
|
||||||
async function querySitemapIndexFunction(url: string) {
|
async function querySitemapIndexFunction(url: string, abort?: AbortSignal) {
|
||||||
const originUrl = normalizeUrlOnlyHostname(url);
|
const originUrl = normalizeUrlOnlyHostname(url);
|
||||||
|
|
||||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||||
|
abort?.throwIfAborted();
|
||||||
try {
|
try {
|
||||||
const { data, error } = await supabase_service
|
const { data, error } = await supabase_service
|
||||||
.from("crawl_maps")
|
.from("crawl_maps")
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
import { axiosTimeout } from "../../lib/timeout";
|
|
||||||
import { parseStringPromise } from "xml2js";
|
import { parseStringPromise } from "xml2js";
|
||||||
import { WebCrawler } from "./crawler";
|
import { WebCrawler } from "./crawler";
|
||||||
import { scrapeURL } from "../scrapeURL";
|
import { scrapeURL } from "../scrapeURL";
|
||||||
import { scrapeOptions } from "../../controllers/v1/types";
|
import { scrapeOptions, TimeoutSignal } from "../../controllers/v1/types";
|
||||||
import type { Logger } from "winston";
|
import type { Logger } from "winston";
|
||||||
const useFireEngine =
|
const useFireEngine =
|
||||||
process.env.FIRE_ENGINE_BETA_URL !== "" &&
|
process.env.FIRE_ENGINE_BETA_URL !== "" &&
|
||||||
@ -20,6 +19,8 @@ export async function getLinksFromSitemap(
|
|||||||
logger: Logger,
|
logger: Logger,
|
||||||
crawlId: string,
|
crawlId: string,
|
||||||
sitemapsHit: Set<string>,
|
sitemapsHit: Set<string>,
|
||||||
|
abort?: AbortSignal,
|
||||||
|
mock?: string,
|
||||||
): Promise<number> {
|
): Promise<number> {
|
||||||
if (sitemapsHit.size >= 20) {
|
if (sitemapsHit.size >= 20) {
|
||||||
return 0;
|
return 0;
|
||||||
@ -38,13 +39,14 @@ export async function getLinksFromSitemap(
|
|||||||
const response = await scrapeURL(
|
const response = await scrapeURL(
|
||||||
"sitemap;" + crawlId,
|
"sitemap;" + crawlId,
|
||||||
sitemapUrl,
|
sitemapUrl,
|
||||||
scrapeOptions.parse({ formats: ["rawHtml"] }),
|
scrapeOptions.parse({ formats: ["rawHtml"], useMock: mock }),
|
||||||
{
|
{
|
||||||
forceEngine: [
|
forceEngine: [
|
||||||
"fetch",
|
"fetch",
|
||||||
...((mode === "fire-engine" && useFireEngine) ? ["fire-engine;tlsclient" as const] : []),
|
...((mode === "fire-engine" && useFireEngine) ? ["fire-engine;tlsclient" as const] : []),
|
||||||
],
|
],
|
||||||
v0DisableJsDom: true
|
v0DisableJsDom: true,
|
||||||
|
abort,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -69,14 +71,18 @@ export async function getLinksFromSitemap(
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Request failed for sitemap fetch`, {
|
if (error instanceof TimeoutSignal) {
|
||||||
method: "getLinksFromSitemap",
|
throw error;
|
||||||
mode,
|
} else {
|
||||||
sitemapUrl,
|
logger.error(`Request failed for sitemap fetch`, {
|
||||||
error,
|
method: "getLinksFromSitemap",
|
||||||
});
|
mode,
|
||||||
|
sitemapUrl,
|
||||||
|
error,
|
||||||
|
});
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const parsed = await parseStringPromise(content);
|
const parsed = await parseStringPromise(content);
|
||||||
@ -90,7 +96,7 @@ export async function getLinksFromSitemap(
|
|||||||
.map((sitemap) => sitemap.loc[0].trim());
|
.map((sitemap) => sitemap.loc[0].trim());
|
||||||
|
|
||||||
const sitemapPromises: Promise<number>[] = sitemapUrls.map((sitemapUrl) =>
|
const sitemapPromises: Promise<number>[] = sitemapUrls.map((sitemapUrl) =>
|
||||||
getLinksFromSitemap({ sitemapUrl, urlsHandler, mode }, logger, crawlId, sitemapsHit),
|
getLinksFromSitemap({ sitemapUrl, urlsHandler, mode }, logger, crawlId, sitemapsHit, abort, mock),
|
||||||
);
|
);
|
||||||
|
|
||||||
const results = await Promise.all(sitemapPromises);
|
const results = await Promise.all(sitemapPromises);
|
||||||
@ -114,6 +120,8 @@ export async function getLinksFromSitemap(
|
|||||||
logger,
|
logger,
|
||||||
crawlId,
|
crawlId,
|
||||||
sitemapsHit,
|
sitemapsHit,
|
||||||
|
abort,
|
||||||
|
mock,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
count += (await Promise.all(sitemapPromises)).reduce(
|
count += (await Promise.all(sitemapPromises)).reduce(
|
||||||
@ -151,56 +159,3 @@ export async function getLinksFromSitemap(
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const fetchSitemapData = async (
|
|
||||||
url: string,
|
|
||||||
timeout?: number,
|
|
||||||
): Promise<SitemapEntry[] | null> => {
|
|
||||||
const sitemapUrl = url.endsWith("/sitemap.xml") ? url : `${url}/sitemap.xml`;
|
|
||||||
try {
|
|
||||||
const fetchResponse = await scrapeURL(
|
|
||||||
"sitemap",
|
|
||||||
sitemapUrl,
|
|
||||||
scrapeOptions.parse({
|
|
||||||
formats: ["rawHtml"],
|
|
||||||
timeout: timeout || axiosTimeout,
|
|
||||||
}),
|
|
||||||
{ forceEngine: "fetch" },
|
|
||||||
);
|
|
||||||
|
|
||||||
if (
|
|
||||||
fetchResponse.success &&
|
|
||||||
fetchResponse.document.metadata.statusCode >= 200 &&
|
|
||||||
fetchResponse.document.metadata.statusCode < 300
|
|
||||||
) {
|
|
||||||
const xml = fetchResponse.document.rawHtml!;
|
|
||||||
const parsedXml = await parseStringPromise(xml);
|
|
||||||
|
|
||||||
const sitemapData: SitemapEntry[] = [];
|
|
||||||
if (parsedXml.urlset && parsedXml.urlset.url) {
|
|
||||||
for (const urlElement of parsedXml.urlset.url) {
|
|
||||||
const sitemapEntry: SitemapEntry = { loc: urlElement.loc[0] };
|
|
||||||
if (urlElement.lastmod) sitemapEntry.lastmod = urlElement.lastmod[0];
|
|
||||||
if (urlElement.changefreq)
|
|
||||||
sitemapEntry.changefreq = urlElement.changefreq[0];
|
|
||||||
if (urlElement.priority)
|
|
||||||
sitemapEntry.priority = Number(urlElement.priority[0]);
|
|
||||||
sitemapData.push(sitemapEntry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return sitemapData;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
} catch (error) {
|
|
||||||
// Error handling for failed sitemap fetch
|
|
||||||
}
|
|
||||||
return [];
|
|
||||||
};
|
|
||||||
|
|
||||||
export interface SitemapEntry {
|
|
||||||
loc: string;
|
|
||||||
lastmod?: string;
|
|
||||||
changefreq?: string;
|
|
||||||
priority?: number;
|
|
||||||
}
|
|
||||||
|
@ -7,6 +7,7 @@ import {
|
|||||||
InsecureConnectionError,
|
InsecureConnectionError,
|
||||||
makeSecureDispatcher,
|
makeSecureDispatcher,
|
||||||
} from "../utils/safeFetch";
|
} from "../utils/safeFetch";
|
||||||
|
import { MockState, saveMock } from "../../lib/mock";
|
||||||
|
|
||||||
export async function scrapeURLWithFetch(
|
export async function scrapeURLWithFetch(
|
||||||
meta: Meta,
|
meta: Meta,
|
||||||
@ -14,44 +15,95 @@ export async function scrapeURLWithFetch(
|
|||||||
): Promise<EngineScrapeResult> {
|
): Promise<EngineScrapeResult> {
|
||||||
const timeout = timeToRun ?? 300000;
|
const timeout = timeToRun ?? 300000;
|
||||||
|
|
||||||
let response: undici.Response;
|
const mockOptions = {
|
||||||
try {
|
url: meta.url,
|
||||||
response = await Promise.race([
|
|
||||||
undici.fetch(meta.url, {
|
// irrelevant
|
||||||
dispatcher: await makeSecureDispatcher(meta.url),
|
method: "GET",
|
||||||
redirect: "follow",
|
ignoreResponse: false,
|
||||||
headers: meta.options.headers,
|
ignoreFailure: false,
|
||||||
}),
|
tryCount: 1,
|
||||||
(async () => {
|
};
|
||||||
await new Promise((resolve) =>
|
|
||||||
setTimeout(() => resolve(null), timeout),
|
let response: {
|
||||||
|
url: string;
|
||||||
|
body: string,
|
||||||
|
status: number;
|
||||||
|
headers: any;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (meta.mock !== null) {
|
||||||
|
const makeRequestTypeId = (
|
||||||
|
request: MockState["requests"][number]["options"],
|
||||||
|
) => request.url + ";" + request.method;
|
||||||
|
|
||||||
|
const thisId = makeRequestTypeId(mockOptions);
|
||||||
|
const matchingMocks = meta.mock.requests
|
||||||
|
.filter((x) => makeRequestTypeId(x.options) === thisId)
|
||||||
|
.sort((a, b) => a.time - b.time);
|
||||||
|
const nextI = meta.mock.tracker[thisId] ?? 0;
|
||||||
|
meta.mock.tracker[thisId] = nextI + 1;
|
||||||
|
|
||||||
|
if (!matchingMocks[nextI]) {
|
||||||
|
throw new Error("Failed to mock request -- no mock targets found.");
|
||||||
|
}
|
||||||
|
|
||||||
|
response = {
|
||||||
|
...matchingMocks[nextI].result,
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
const x = await Promise.race([
|
||||||
|
undici.fetch(meta.url, {
|
||||||
|
dispatcher: await makeSecureDispatcher(meta.url),
|
||||||
|
redirect: "follow",
|
||||||
|
headers: meta.options.headers,
|
||||||
|
signal: meta.internalOptions.abort,
|
||||||
|
}),
|
||||||
|
(async () => {
|
||||||
|
await new Promise((resolve) =>
|
||||||
|
setTimeout(() => resolve(null), timeout),
|
||||||
|
);
|
||||||
|
throw new TimeoutError(
|
||||||
|
"Fetch was unable to scrape the page before timing out",
|
||||||
|
{ cause: { timeout } },
|
||||||
|
);
|
||||||
|
})(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
response = {
|
||||||
|
url: x.url,
|
||||||
|
body: await x.text(),
|
||||||
|
status: x.status,
|
||||||
|
headers: [...x.headers],
|
||||||
|
};
|
||||||
|
|
||||||
|
if (meta.mock === null) {
|
||||||
|
await saveMock(
|
||||||
|
mockOptions,
|
||||||
|
response,
|
||||||
);
|
);
|
||||||
throw new TimeoutError(
|
}
|
||||||
"Fetch was unable to scrape the page before timing out",
|
} catch (error) {
|
||||||
{ cause: { timeout } },
|
if (
|
||||||
);
|
error instanceof TypeError &&
|
||||||
})(),
|
error.cause instanceof InsecureConnectionError
|
||||||
]);
|
) {
|
||||||
} catch (error) {
|
throw error.cause;
|
||||||
if (
|
} else {
|
||||||
error instanceof TypeError &&
|
throw error;
|
||||||
error.cause instanceof InsecureConnectionError
|
}
|
||||||
) {
|
|
||||||
throw error.cause;
|
|
||||||
} else {
|
|
||||||
throw error;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
specialtyScrapeCheck(
|
await specialtyScrapeCheck(
|
||||||
meta.logger.child({ method: "scrapeURLWithFetch/specialtyScrapeCheck" }),
|
meta.logger.child({ method: "scrapeURLWithFetch/specialtyScrapeCheck" }),
|
||||||
Object.fromEntries(response.headers as any),
|
Object.fromEntries(response.headers as any),
|
||||||
);
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url: response.url,
|
url: response.url,
|
||||||
html: await response.text(),
|
html: response.body,
|
||||||
statusCode: response.status,
|
statusCode: response.status,
|
||||||
// TODO: error?
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,7 @@ import {
|
|||||||
UnsupportedFileError,
|
UnsupportedFileError,
|
||||||
} from "../../error";
|
} from "../../error";
|
||||||
import { MockState } from "../../lib/mock";
|
import { MockState } from "../../lib/mock";
|
||||||
|
import { fireEngineURL } from "./scrape";
|
||||||
|
|
||||||
const successSchema = z.object({
|
const successSchema = z.object({
|
||||||
jobId: z.string(),
|
jobId: z.string(),
|
||||||
@ -84,9 +85,8 @@ export async function fireEngineCheckStatus(
|
|||||||
logger: Logger,
|
logger: Logger,
|
||||||
jobId: string,
|
jobId: string,
|
||||||
mock: MockState | null,
|
mock: MockState | null,
|
||||||
|
abort?: AbortSignal,
|
||||||
): Promise<FireEngineCheckStatusSuccess> {
|
): Promise<FireEngineCheckStatusSuccess> {
|
||||||
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
|
|
||||||
|
|
||||||
const status = await Sentry.startSpan(
|
const status = await Sentry.startSpan(
|
||||||
{
|
{
|
||||||
name: "fire-engine: Check status",
|
name: "fire-engine: Check status",
|
||||||
|
@ -3,14 +3,13 @@ import * as Sentry from "@sentry/node";
|
|||||||
|
|
||||||
import { robustFetch } from "../../lib/fetch";
|
import { robustFetch } from "../../lib/fetch";
|
||||||
import { MockState } from "../../lib/mock";
|
import { MockState } from "../../lib/mock";
|
||||||
|
import { fireEngineURL } from "./scrape";
|
||||||
|
|
||||||
export async function fireEngineDelete(
|
export async function fireEngineDelete(
|
||||||
logger: Logger,
|
logger: Logger,
|
||||||
jobId: string,
|
jobId: string,
|
||||||
mock: MockState | null,
|
mock: MockState | null,
|
||||||
) {
|
) {
|
||||||
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
|
|
||||||
|
|
||||||
await Sentry.startSpan(
|
await Sentry.startSpan(
|
||||||
{
|
{
|
||||||
name: "fire-engine: Delete scrape",
|
name: "fire-engine: Delete scrape",
|
||||||
|
@ -24,8 +24,9 @@ import * as Sentry from "@sentry/node";
|
|||||||
import { Action } from "../../../../lib/entities";
|
import { Action } from "../../../../lib/entities";
|
||||||
import { specialtyScrapeCheck } from "../utils/specialtyHandler";
|
import { specialtyScrapeCheck } from "../utils/specialtyHandler";
|
||||||
import { fireEngineDelete } from "./delete";
|
import { fireEngineDelete } from "./delete";
|
||||||
import { MockState, saveMock } from "../../lib/mock";
|
import { MockState } from "../../lib/mock";
|
||||||
import { getInnerJSON } from "../../../../lib/html-transformer";
|
import { getInnerJSON } from "../../../../lib/html-transformer";
|
||||||
|
import { TimeoutSignal } from "../../../../controllers/v1/types";
|
||||||
|
|
||||||
// This function does not take `Meta` on purpose. It may not access any
|
// This function does not take `Meta` on purpose. It may not access any
|
||||||
// meta values to construct the request -- that must be done by the
|
// meta values to construct the request -- that must be done by the
|
||||||
@ -40,6 +41,7 @@ async function performFireEngineScrape<
|
|||||||
request: FireEngineScrapeRequestCommon & Engine,
|
request: FireEngineScrapeRequestCommon & Engine,
|
||||||
timeout: number,
|
timeout: number,
|
||||||
mock: MockState | null,
|
mock: MockState | null,
|
||||||
|
abort?: AbortSignal,
|
||||||
): Promise<FireEngineCheckStatusSuccess> {
|
): Promise<FireEngineCheckStatusSuccess> {
|
||||||
const scrape = await fireEngineScrape(
|
const scrape = await fireEngineScrape(
|
||||||
logger.child({ method: "fireEngineScrape" }),
|
logger.child({ method: "fireEngineScrape" }),
|
||||||
@ -84,6 +86,7 @@ async function performFireEngineScrape<
|
|||||||
logger.child({ method: "fireEngineCheckStatus" }),
|
logger.child({ method: "fireEngineCheckStatus" }),
|
||||||
scrape.jobId,
|
scrape.jobId,
|
||||||
mock,
|
mock,
|
||||||
|
abort,
|
||||||
);
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (error instanceof StillProcessingError) {
|
if (error instanceof StillProcessingError) {
|
||||||
@ -107,6 +110,16 @@ async function performFireEngineScrape<
|
|||||||
jobId: scrape.jobId,
|
jobId: scrape.jobId,
|
||||||
});
|
});
|
||||||
throw error;
|
throw error;
|
||||||
|
} else if (error instanceof TimeoutSignal) {
|
||||||
|
fireEngineDelete(
|
||||||
|
logger.child({
|
||||||
|
method: "performFireEngineScrape/fireEngineDelete",
|
||||||
|
afterError: error,
|
||||||
|
}),
|
||||||
|
scrape.jobId,
|
||||||
|
mock,
|
||||||
|
);
|
||||||
|
throw error;
|
||||||
} else {
|
} else {
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
errors.push(error);
|
errors.push(error);
|
||||||
@ -120,11 +133,12 @@ async function performFireEngineScrape<
|
|||||||
await new Promise((resolve) => setTimeout(resolve, 250));
|
await new Promise((resolve) => setTimeout(resolve, 250));
|
||||||
}
|
}
|
||||||
|
|
||||||
specialtyScrapeCheck(
|
await specialtyScrapeCheck(
|
||||||
logger.child({
|
logger.child({
|
||||||
method: "performFireEngineScrape/specialtyScrapeCheck",
|
method: "performFireEngineScrape/specialtyScrapeCheck",
|
||||||
}),
|
}),
|
||||||
status.responseHeaders,
|
status.responseHeaders,
|
||||||
|
status,
|
||||||
);
|
);
|
||||||
|
|
||||||
const contentType = (Object.entries(status.responseHeaders ?? {}).find(
|
const contentType = (Object.entries(status.responseHeaders ?? {}).find(
|
||||||
@ -219,6 +233,7 @@ export async function scrapeURLWithFireEngineChromeCDP(
|
|||||||
request,
|
request,
|
||||||
timeout,
|
timeout,
|
||||||
meta.mock,
|
meta.mock,
|
||||||
|
meta.internalOptions.abort,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (
|
if (
|
||||||
@ -298,6 +313,7 @@ export async function scrapeURLWithFireEnginePlaywright(
|
|||||||
request,
|
request,
|
||||||
timeout,
|
timeout,
|
||||||
meta.mock,
|
meta.mock,
|
||||||
|
meta.internalOptions.abort,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!response.url) {
|
if (!response.url) {
|
||||||
@ -353,6 +369,7 @@ export async function scrapeURLWithFireEngineTLSClient(
|
|||||||
request,
|
request,
|
||||||
timeout,
|
timeout,
|
||||||
meta.mock,
|
meta.mock,
|
||||||
|
meta.internalOptions.abort,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!response.url) {
|
if (!response.url) {
|
||||||
|
@ -65,6 +65,8 @@ const schema = z.object({
|
|||||||
processing: z.boolean(),
|
processing: z.boolean(),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
export const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL ?? "<mock-fire-engine-url>";
|
||||||
|
|
||||||
export async function fireEngineScrape<
|
export async function fireEngineScrape<
|
||||||
Engine extends
|
Engine extends
|
||||||
| FireEngineScrapeRequestChromeCDP
|
| FireEngineScrapeRequestChromeCDP
|
||||||
@ -74,11 +76,8 @@ export async function fireEngineScrape<
|
|||||||
logger: Logger,
|
logger: Logger,
|
||||||
request: FireEngineScrapeRequestCommon & Engine,
|
request: FireEngineScrapeRequestCommon & Engine,
|
||||||
mock: MockState | null,
|
mock: MockState | null,
|
||||||
|
abort?: AbortSignal,
|
||||||
): Promise<z.infer<typeof schema>> {
|
): Promise<z.infer<typeof schema>> {
|
||||||
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
|
|
||||||
|
|
||||||
// TODO: retries
|
|
||||||
|
|
||||||
const scrapeRequest = await Sentry.startSpan(
|
const scrapeRequest = await Sentry.startSpan(
|
||||||
{
|
{
|
||||||
name: "fire-engine: Scrape",
|
name: "fire-engine: Scrape",
|
||||||
@ -103,6 +102,7 @@ export async function fireEngineScrape<
|
|||||||
schema,
|
schema,
|
||||||
tryCount: 3,
|
tryCount: 3,
|
||||||
mock,
|
mock,
|
||||||
|
abort,
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
@ -310,7 +310,12 @@ export function buildFallbackList(meta: Meta): {
|
|||||||
engine: Engine;
|
engine: Engine;
|
||||||
unsupportedFeatures: Set<FeatureFlag>;
|
unsupportedFeatures: Set<FeatureFlag>;
|
||||||
}[] {
|
}[] {
|
||||||
const _engines = [...engines];
|
const _engines: Engine[] = [
|
||||||
|
...engines,
|
||||||
|
|
||||||
|
// enable fire-engine in self-hosted testing environment when mocks are supplied
|
||||||
|
...((!useFireEngine && meta.mock !== null) ? ["fire-engine;chrome-cdp", "fire-engine;playwright", "fire-engine;tlsclient"] as Engine[] : [])
|
||||||
|
];
|
||||||
|
|
||||||
if (meta.internalOptions.useCache !== true) {
|
if (meta.internalOptions.useCache !== true) {
|
||||||
const cacheIndex = _engines.indexOf("cache");
|
const cacheIndex = _engines.indexOf("cache");
|
||||||
|
@ -7,9 +7,10 @@ import * as Sentry from "@sentry/node";
|
|||||||
import escapeHtml from "escape-html";
|
import escapeHtml from "escape-html";
|
||||||
import PdfParse from "pdf-parse";
|
import PdfParse from "pdf-parse";
|
||||||
import { downloadFile, fetchFileToBuffer } from "../utils/downloadFile";
|
import { downloadFile, fetchFileToBuffer } from "../utils/downloadFile";
|
||||||
import { RemoveFeatureError, UnsupportedFileError } from "../../error";
|
import { PDFAntibotError, RemoveFeatureError, UnsupportedFileError } from "../../error";
|
||||||
import { readFile, unlink } from "node:fs/promises";
|
import { readFile, unlink } from "node:fs/promises";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
|
import type { Response } from "undici";
|
||||||
|
|
||||||
type PDFProcessorResult = { html: string; markdown?: string };
|
type PDFProcessorResult = { html: string; markdown?: string };
|
||||||
|
|
||||||
@ -75,22 +76,49 @@ export async function scrapePDF(
|
|||||||
timeToRun: number | undefined,
|
timeToRun: number | undefined,
|
||||||
): Promise<EngineScrapeResult> {
|
): Promise<EngineScrapeResult> {
|
||||||
if (!meta.options.parsePDF) {
|
if (!meta.options.parsePDF) {
|
||||||
const file = await fetchFileToBuffer(meta.url, {
|
if (meta.pdfPrefetch !== undefined && meta.pdfPrefetch !== null) {
|
||||||
headers: meta.options.headers,
|
const content = (await readFile(meta.pdfPrefetch.filePath)).toString("base64");
|
||||||
});
|
return {
|
||||||
const content = file.buffer.toString("base64");
|
url: meta.pdfPrefetch.url ?? meta.url,
|
||||||
return {
|
statusCode: meta.pdfPrefetch.status,
|
||||||
url: file.response.url,
|
|
||||||
statusCode: file.response.status,
|
|
||||||
|
|
||||||
html: content,
|
html: content,
|
||||||
markdown: content,
|
markdown: content,
|
||||||
};
|
};
|
||||||
|
} else {
|
||||||
|
const file = await fetchFileToBuffer(meta.url, {
|
||||||
|
headers: meta.options.headers,
|
||||||
|
});
|
||||||
|
|
||||||
|
const ct = file.response.headers.get("Content-Type");
|
||||||
|
if (ct && !ct.includes("application/pdf")) { // if downloaded file wasn't a PDF
|
||||||
|
throw new PDFAntibotError();
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = file.buffer.toString("base64");
|
||||||
|
return {
|
||||||
|
url: file.response.url,
|
||||||
|
statusCode: file.response.status,
|
||||||
|
|
||||||
|
html: content,
|
||||||
|
markdown: content,
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const { response, tempFilePath } = await downloadFile(meta.id, meta.url, {
|
const { response, tempFilePath } = (meta.pdfPrefetch !== undefined && meta.pdfPrefetch !== null)
|
||||||
headers: meta.options.headers,
|
? { response: meta.pdfPrefetch, tempFilePath: meta.pdfPrefetch.filePath }
|
||||||
});
|
: await downloadFile(meta.id, meta.url, {
|
||||||
|
headers: meta.options.headers,
|
||||||
|
});
|
||||||
|
|
||||||
|
if ((response as any).headers) { // if downloadFile was used
|
||||||
|
const r: Response = response as any;
|
||||||
|
const ct = r.headers.get("Content-Type");
|
||||||
|
if (ct && !ct.includes("application/pdf")) { // if downloaded file wasn't a PDF
|
||||||
|
throw new PDFAntibotError();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let result: PDFProcessorResult | null = null;
|
let result: PDFProcessorResult | null = null;
|
||||||
|
|
||||||
@ -142,7 +170,7 @@ export async function scrapePDF(
|
|||||||
await unlink(tempFilePath);
|
await unlink(tempFilePath);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
url: response.url,
|
url: response.url ?? meta.url,
|
||||||
statusCode: response.status,
|
statusCode: response.status,
|
||||||
html: result?.html ?? "",
|
html: result?.html ?? "",
|
||||||
markdown: result?.markdown ?? "",
|
markdown: result?.markdown ?? "",
|
||||||
|
@ -72,7 +72,7 @@ export function scrapeURLWithScrapingBee(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
specialtyScrapeCheck(
|
await specialtyScrapeCheck(
|
||||||
meta.logger.child({
|
meta.logger.child({
|
||||||
method: "scrapeURLWithScrapingBee/specialtyScrapeCheck",
|
method: "scrapeURLWithScrapingBee/specialtyScrapeCheck",
|
||||||
}),
|
}),
|
||||||
|
@ -43,14 +43,24 @@ export function makeSecureDispatcher(
|
|||||||
url: string,
|
url: string,
|
||||||
options?: undici.Agent.Options,
|
options?: undici.Agent.Options,
|
||||||
) {
|
) {
|
||||||
const agent = new undici.Agent({
|
const agentOpts: undici.Agent.Options = {
|
||||||
connect: {
|
connect: {
|
||||||
rejectUnauthorized: false, // bypass SSL failures -- this is fine
|
rejectUnauthorized: false, // bypass SSL failures -- this is fine
|
||||||
// lookup: secureLookup,
|
// lookup: secureLookup,
|
||||||
},
|
},
|
||||||
maxRedirections: 5000,
|
maxRedirections: 5000,
|
||||||
...options,
|
...options,
|
||||||
});
|
};
|
||||||
|
|
||||||
|
const agent = process.env.PROXY_SERVER
|
||||||
|
? new undici.ProxyAgent({
|
||||||
|
uri: process.env.PROXY_SERVER.includes("://") ? process.env.PROXY_SERVER : ("http://" + process.env.PROXY_SERVER),
|
||||||
|
token: process.env.PROXY_USERNAME
|
||||||
|
? `Basic ${Buffer.from(process.env.PROXY_USERNAME + ":" + (process.env.PROXY_PASSWORD ?? "")).toString("base64")}`
|
||||||
|
: undefined,
|
||||||
|
...agentOpts,
|
||||||
|
})
|
||||||
|
: new undici.Agent(agentOpts);
|
||||||
|
|
||||||
agent.on("connect", (_, targets) => {
|
agent.on("connect", (_, targets) => {
|
||||||
const client: undici.Client = targets.slice(-1)[0] as undici.Client;
|
const client: undici.Client = targets.slice(-1)[0] as undici.Client;
|
||||||
|
@ -1,9 +1,30 @@
|
|||||||
import { Logger } from "winston";
|
import { Logger } from "winston";
|
||||||
import { AddFeatureError } from "../../error";
|
import { AddFeatureError } from "../../error";
|
||||||
|
import { FireEngineCheckStatusSuccess } from "../fire-engine/checkStatus";
|
||||||
|
import path from "path";
|
||||||
|
import os from "os";
|
||||||
|
import { writeFile } from "fs/promises";
|
||||||
|
import { Meta } from "../..";
|
||||||
|
|
||||||
export function specialtyScrapeCheck(
|
async function feResToPdfPrefetch(feRes: FireEngineCheckStatusSuccess | undefined): Promise<Meta["pdfPrefetch"]> {
|
||||||
|
if (!feRes?.file) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const filePath = path.join(os.tmpdir(), `tempFile-${crypto.randomUUID()}.pdf`);
|
||||||
|
await writeFile(filePath, Buffer.from(feRes.file.content, "base64"))
|
||||||
|
|
||||||
|
return {
|
||||||
|
status: feRes.pageStatusCode,
|
||||||
|
url: feRes.url,
|
||||||
|
filePath,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function specialtyScrapeCheck(
|
||||||
logger: Logger,
|
logger: Logger,
|
||||||
headers: Record<string, string> | undefined,
|
headers: Record<string, string> | undefined,
|
||||||
|
feRes?: FireEngineCheckStatusSuccess,
|
||||||
) {
|
) {
|
||||||
const contentType = (Object.entries(headers ?? {}).find(
|
const contentType = (Object.entries(headers ?? {}).find(
|
||||||
(x) => x[0].toLowerCase() === "content-type",
|
(x) => x[0].toLowerCase() === "content-type",
|
||||||
@ -18,7 +39,7 @@ export function specialtyScrapeCheck(
|
|||||||
contentType.startsWith("application/pdf;")
|
contentType.startsWith("application/pdf;")
|
||||||
) {
|
) {
|
||||||
// .pdf
|
// .pdf
|
||||||
throw new AddFeatureError(["pdf"]);
|
throw new AddFeatureError(["pdf"], await feResToPdfPrefetch(feRes));
|
||||||
} else if (
|
} else if (
|
||||||
contentType ===
|
contentType ===
|
||||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import { EngineResultsTracker } from ".";
|
import { EngineResultsTracker, Meta } from ".";
|
||||||
import { Engine, FeatureFlag } from "./engines";
|
import { Engine, FeatureFlag } from "./engines";
|
||||||
|
|
||||||
export class EngineError extends Error {
|
export class EngineError extends Error {
|
||||||
@ -28,10 +28,12 @@ export class NoEnginesLeftError extends Error {
|
|||||||
|
|
||||||
export class AddFeatureError extends Error {
|
export class AddFeatureError extends Error {
|
||||||
public featureFlags: FeatureFlag[];
|
public featureFlags: FeatureFlag[];
|
||||||
|
public pdfPrefetch: Meta["pdfPrefetch"];
|
||||||
|
|
||||||
constructor(featureFlags: FeatureFlag[]) {
|
constructor(featureFlags: FeatureFlag[], pdfPrefetch?: Meta["pdfPrefetch"]) {
|
||||||
super("New feature flags have been discovered: " + featureFlags.join(", "));
|
super("New feature flags have been discovered: " + featureFlags.join(", "));
|
||||||
this.featureFlags = featureFlags;
|
this.featureFlags = featureFlags;
|
||||||
|
this.pdfPrefetch = pdfPrefetch;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,3 +74,9 @@ export class UnsupportedFileError extends Error {
|
|||||||
this.reason = reason;
|
this.reason = reason;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export class PDFAntibotError extends Error {
|
||||||
|
constructor() {
|
||||||
|
super("PDF scrape was prevented by anti-bot")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import { Logger } from "winston";
|
import { Logger } from "winston";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
|
|
||||||
import { Document, ScrapeOptions } from "../../controllers/v1/types";
|
import { Document, ScrapeOptions, TimeoutSignal } from "../../controllers/v1/types";
|
||||||
import { logger as _logger } from "../../lib/logger";
|
import { logger as _logger } from "../../lib/logger";
|
||||||
import {
|
import {
|
||||||
buildFallbackList,
|
buildFallbackList,
|
||||||
@ -16,6 +16,7 @@ import {
|
|||||||
AddFeatureError,
|
AddFeatureError,
|
||||||
EngineError,
|
EngineError,
|
||||||
NoEnginesLeftError,
|
NoEnginesLeftError,
|
||||||
|
PDFAntibotError,
|
||||||
RemoveFeatureError,
|
RemoveFeatureError,
|
||||||
SiteError,
|
SiteError,
|
||||||
TimeoutError,
|
TimeoutError,
|
||||||
@ -49,6 +50,11 @@ export type Meta = {
|
|||||||
logs: any[];
|
logs: any[];
|
||||||
featureFlags: Set<FeatureFlag>;
|
featureFlags: Set<FeatureFlag>;
|
||||||
mock: MockState | null;
|
mock: MockState | null;
|
||||||
|
pdfPrefetch: {
|
||||||
|
filePath: string;
|
||||||
|
url?: string;
|
||||||
|
status: number;
|
||||||
|
} | null | undefined; // undefined: no prefetch yet, null: prefetch came back empty
|
||||||
};
|
};
|
||||||
|
|
||||||
function buildFeatureFlags(
|
function buildFeatureFlags(
|
||||||
@ -151,6 +157,7 @@ async function buildMetaObject(
|
|||||||
options.useMock !== undefined
|
options.useMock !== undefined
|
||||||
? await loadMock(options.useMock, _logger)
|
? await loadMock(options.useMock, _logger)
|
||||||
: null,
|
: null,
|
||||||
|
pdfPrefetch: undefined,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -165,6 +172,7 @@ export type InternalOptions = {
|
|||||||
disableSmartWaitCache?: boolean; // Passed along to fire-engine
|
disableSmartWaitCache?: boolean; // Passed along to fire-engine
|
||||||
isBackgroundIndex?: boolean;
|
isBackgroundIndex?: boolean;
|
||||||
fromCache?: boolean; // Indicates if the document was retrieved from cache
|
fromCache?: boolean; // Indicates if the document was retrieved from cache
|
||||||
|
abort?: AbortSignal;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type EngineResultsTracker = {
|
export type EngineResultsTracker = {
|
||||||
@ -222,6 +230,7 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
|
|||||||
: undefined;
|
: undefined;
|
||||||
|
|
||||||
for (const { engine, unsupportedFeatures } of fallbackList) {
|
for (const { engine, unsupportedFeatures } of fallbackList) {
|
||||||
|
meta.internalOptions.abort?.throwIfAborted();
|
||||||
const startedAt = Date.now();
|
const startedAt = Date.now();
|
||||||
try {
|
try {
|
||||||
meta.logger.info("Scraping via " + engine + "...");
|
meta.logger.info("Scraping via " + engine + "...");
|
||||||
@ -307,6 +316,10 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
|
|||||||
throw error;
|
throw error;
|
||||||
} else if (error instanceof UnsupportedFileError) {
|
} else if (error instanceof UnsupportedFileError) {
|
||||||
throw error;
|
throw error;
|
||||||
|
} else if (error instanceof PDFAntibotError) {
|
||||||
|
throw error;
|
||||||
|
} else if (error instanceof TimeoutSignal) {
|
||||||
|
throw error;
|
||||||
} else {
|
} else {
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
meta.logger.warn(
|
meta.logger.warn(
|
||||||
@ -390,6 +403,9 @@ export async function scrapeURL(
|
|||||||
meta.featureFlags = new Set(
|
meta.featureFlags = new Set(
|
||||||
[...meta.featureFlags].concat(error.featureFlags),
|
[...meta.featureFlags].concat(error.featureFlags),
|
||||||
);
|
);
|
||||||
|
if (error.pdfPrefetch) {
|
||||||
|
meta.pdfPrefetch = error.pdfPrefetch;
|
||||||
|
}
|
||||||
} else if (
|
} else if (
|
||||||
error instanceof RemoveFeatureError &&
|
error instanceof RemoveFeatureError &&
|
||||||
meta.internalOptions.forceEngine === undefined
|
meta.internalOptions.forceEngine === undefined
|
||||||
@ -404,6 +420,21 @@ export async function scrapeURL(
|
|||||||
(x) => !error.featureFlags.includes(x),
|
(x) => !error.featureFlags.includes(x),
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
} else if (
|
||||||
|
error instanceof PDFAntibotError &&
|
||||||
|
meta.internalOptions.forceEngine === undefined
|
||||||
|
) {
|
||||||
|
if (meta.pdfPrefetch !== undefined) {
|
||||||
|
meta.logger.error("PDF was prefetched and still blocked by antibot, failing");
|
||||||
|
throw error;
|
||||||
|
} else {
|
||||||
|
meta.logger.debug("PDF was blocked by anti-bot, prefetching with chrome-cdp");
|
||||||
|
meta.featureFlags = new Set(
|
||||||
|
[...meta.featureFlags].filter(
|
||||||
|
(x) => x !== "pdf",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
@ -433,6 +464,8 @@ export async function scrapeURL(
|
|||||||
meta.logger.warn("scrapeURL: Tried to scrape unsupported file", {
|
meta.logger.warn("scrapeURL: Tried to scrape unsupported file", {
|
||||||
error,
|
error,
|
||||||
});
|
});
|
||||||
|
} else if (error instanceof TimeoutSignal) {
|
||||||
|
throw error;
|
||||||
} else {
|
} else {
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
meta.logger.error("scrapeURL: Unexpected error happened", { error });
|
meta.logger.error("scrapeURL: Unexpected error happened", { error });
|
||||||
|
@ -2,6 +2,8 @@ import { Logger } from "winston";
|
|||||||
import { z, ZodError } from "zod";
|
import { z, ZodError } from "zod";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
import { MockState, saveMock } from "./mock";
|
import { MockState, saveMock } from "./mock";
|
||||||
|
import { TimeoutSignal } from "../../../controllers/v1/types";
|
||||||
|
import { fireEngineURL } from "../engines/fire-engine/scrape";
|
||||||
|
|
||||||
export type RobustFetchParams<Schema extends z.Schema<any>> = {
|
export type RobustFetchParams<Schema extends z.Schema<any>> = {
|
||||||
url: string;
|
url: string;
|
||||||
@ -17,6 +19,7 @@ export type RobustFetchParams<Schema extends z.Schema<any>> = {
|
|||||||
tryCount?: number;
|
tryCount?: number;
|
||||||
tryCooldown?: number;
|
tryCooldown?: number;
|
||||||
mock: MockState | null;
|
mock: MockState | null;
|
||||||
|
abort?: AbortSignal;
|
||||||
};
|
};
|
||||||
|
|
||||||
export async function robustFetch<
|
export async function robustFetch<
|
||||||
@ -35,7 +38,10 @@ export async function robustFetch<
|
|||||||
tryCount = 1,
|
tryCount = 1,
|
||||||
tryCooldown,
|
tryCooldown,
|
||||||
mock,
|
mock,
|
||||||
|
abort,
|
||||||
}: RobustFetchParams<Schema>): Promise<Output> {
|
}: RobustFetchParams<Schema>): Promise<Output> {
|
||||||
|
abort?.throwIfAborted();
|
||||||
|
|
||||||
const params = {
|
const params = {
|
||||||
url,
|
url,
|
||||||
logger,
|
logger,
|
||||||
@ -47,6 +53,7 @@ export async function robustFetch<
|
|||||||
ignoreFailure,
|
ignoreFailure,
|
||||||
tryCount,
|
tryCount,
|
||||||
tryCooldown,
|
tryCooldown,
|
||||||
|
abort,
|
||||||
};
|
};
|
||||||
|
|
||||||
let response: {
|
let response: {
|
||||||
@ -70,6 +77,7 @@ export async function robustFetch<
|
|||||||
: {}),
|
: {}),
|
||||||
...(headers !== undefined ? headers : {}),
|
...(headers !== undefined ? headers : {}),
|
||||||
},
|
},
|
||||||
|
signal: abort,
|
||||||
...(body instanceof FormData
|
...(body instanceof FormData
|
||||||
? {
|
? {
|
||||||
body,
|
body,
|
||||||
@ -81,7 +89,9 @@ export async function robustFetch<
|
|||||||
: {}),
|
: {}),
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (!ignoreFailure) {
|
if (error instanceof TimeoutSignal) {
|
||||||
|
throw error;
|
||||||
|
} else if (!ignoreFailure) {
|
||||||
Sentry.captureException(error);
|
Sentry.captureException(error);
|
||||||
if (tryCount > 1) {
|
if (tryCount > 1) {
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@ -126,14 +136,13 @@ export async function robustFetch<
|
|||||||
const makeRequestTypeId = (
|
const makeRequestTypeId = (
|
||||||
request: (typeof mock)["requests"][number]["options"],
|
request: (typeof mock)["requests"][number]["options"],
|
||||||
) => {
|
) => {
|
||||||
let trueUrl = (process.env.FIRE_ENGINE_BETA_URL && request.url.startsWith(process.env.FIRE_ENGINE_BETA_URL))
|
let trueUrl = request.url.startsWith(fireEngineURL)
|
||||||
? request.url.replace(process.env.FIRE_ENGINE_BETA_URL, "<fire-engine>")
|
? request.url.replace(fireEngineURL, "<fire-engine>")
|
||||||
: request.url;
|
: request.url;
|
||||||
|
|
||||||
let out = trueUrl + ";" + request.method;
|
let out = trueUrl + ";" + request.method;
|
||||||
if (
|
if (
|
||||||
process.env.FIRE_ENGINE_BETA_URL &&
|
trueUrl.startsWith("<fire-engine>") &&
|
||||||
(trueUrl.startsWith("<fire-engine>")) &&
|
|
||||||
request.method === "POST"
|
request.method === "POST"
|
||||||
) {
|
) {
|
||||||
out += "f-e;" + request.body?.engine + ";" + request.body?.url;
|
out += "f-e;" + request.body?.engine + ";" + request.body?.url;
|
||||||
|
@ -305,6 +305,7 @@ export async function performLLMExtract(
|
|||||||
document: Document,
|
document: Document,
|
||||||
): Promise<Document> {
|
): Promise<Document> {
|
||||||
if (meta.options.formats.includes("extract")) {
|
if (meta.options.formats.includes("extract")) {
|
||||||
|
meta.internalOptions.abort?.throwIfAborted();
|
||||||
const { extract, warning } = await generateOpenAICompletions(
|
const { extract, warning } = await generateOpenAICompletions(
|
||||||
meta.logger.child({
|
meta.logger.child({
|
||||||
method: "performLLMExtract/generateOpenAICompletions",
|
method: "performLLMExtract/generateOpenAICompletions",
|
||||||
|
@ -16,6 +16,7 @@ export async function fireEngineMap(
|
|||||||
numResults: number;
|
numResults: number;
|
||||||
page?: number;
|
page?: number;
|
||||||
},
|
},
|
||||||
|
abort?: AbortSignal,
|
||||||
): Promise<SearchResult[]> {
|
): Promise<SearchResult[]> {
|
||||||
try {
|
try {
|
||||||
let data = JSON.stringify({
|
let data = JSON.stringify({
|
||||||
@ -29,9 +30,7 @@ export async function fireEngineMap(
|
|||||||
});
|
});
|
||||||
|
|
||||||
if (!process.env.FIRE_ENGINE_BETA_URL) {
|
if (!process.env.FIRE_ENGINE_BETA_URL) {
|
||||||
console.warn(
|
logger.warn("(v1/map Beta) Results might differ from cloud offering currently.");
|
||||||
"(v1/map Beta) Results might differ from cloud offering currently.",
|
|
||||||
);
|
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -42,6 +41,7 @@ export async function fireEngineMap(
|
|||||||
"X-Disable-Cache": "true",
|
"X-Disable-Cache": "true",
|
||||||
},
|
},
|
||||||
body: data,
|
body: data,
|
||||||
|
signal: abort,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (response.ok) {
|
if (response.ok) {
|
||||||
|
@ -1,21 +1,18 @@
|
|||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import * as cheerio from "cheerio"; // TODO: rustify
|
import { JSDOM } from 'jsdom';
|
||||||
import * as querystring from "querystring";
|
import * as querystring from "querystring";
|
||||||
import { SearchResult } from "../../src/lib/entities";
|
import { SearchResult } from "../../src/lib/entities";
|
||||||
import { logger } from "../../src/lib/logger";
|
import { logger } from "../../src/lib/logger";
|
||||||
|
import https from 'https';
|
||||||
|
|
||||||
const _useragent_list = [
|
const getRandomInt = (min: number, max: number): number => Math.floor(Math.random() * (max - min + 1)) + min;
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
|
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0",
|
|
||||||
];
|
|
||||||
|
|
||||||
function get_useragent(): string {
|
export function get_useragent(): string {
|
||||||
return _useragent_list[Math.floor(Math.random() * _useragent_list.length)];
|
const lynx_version = `Lynx/${getRandomInt(2, 3)}.${getRandomInt(8, 9)}.${getRandomInt(0, 2)}`;
|
||||||
|
const libwww_version = `libwww-FM/${getRandomInt(2, 3)}.${getRandomInt(13, 15)}`;
|
||||||
|
const ssl_mm_version = `SSL-MM/${getRandomInt(1, 2)}.${getRandomInt(3, 5)}`;
|
||||||
|
const openssl_version = `OpenSSL/${getRandomInt(1, 3)}.${getRandomInt(0, 4)}.${getRandomInt(0, 9)}`;
|
||||||
|
return `${lynx_version} ${libwww_version} ${ssl_mm_version} ${openssl_version}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function _req(
|
async function _req(
|
||||||
@ -31,9 +28,10 @@ async function _req(
|
|||||||
) {
|
) {
|
||||||
const params = {
|
const params = {
|
||||||
q: term,
|
q: term,
|
||||||
num: results, // Number of results to return
|
num: results+2, // Number of results to return
|
||||||
hl: lang,
|
hl: lang,
|
||||||
gl: country,
|
gl: country,
|
||||||
|
safe: "active",
|
||||||
start: start,
|
start: start,
|
||||||
};
|
};
|
||||||
if (tbs) {
|
if (tbs) {
|
||||||
@ -42,18 +40,25 @@ async function _req(
|
|||||||
if (filter) {
|
if (filter) {
|
||||||
params["filter"] = filter;
|
params["filter"] = filter;
|
||||||
}
|
}
|
||||||
|
var agent = get_useragent();
|
||||||
try {
|
try {
|
||||||
const resp = await axios.get("https://www.google.com/search", {
|
const resp = await axios.get("https://www.google.com/search", {
|
||||||
headers: {
|
headers: {
|
||||||
"User-Agent": get_useragent(),
|
"User-Agent": agent,
|
||||||
|
"Accept": "*/*"
|
||||||
},
|
},
|
||||||
params: params,
|
params: params,
|
||||||
proxy: proxies,
|
proxy: proxies,
|
||||||
timeout: timeout,
|
timeout: timeout,
|
||||||
|
httpsAgent: new https.Agent({
|
||||||
|
rejectUnauthorized: true
|
||||||
|
}),
|
||||||
|
withCredentials: true
|
||||||
});
|
});
|
||||||
return resp;
|
return resp;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (error.response && error.response.status === 429) {
|
if (error.response && error.response.status === 429) {
|
||||||
|
logger.warn("Google Search: Too many requests, try again later.", error.response);
|
||||||
throw new Error("Google Search: Too many requests, try again later.");
|
throw new Error("Google Search: Too many requests, try again later.");
|
||||||
}
|
}
|
||||||
throw error;
|
throw error;
|
||||||
@ -100,34 +105,42 @@ export async function googleSearch(
|
|||||||
tbs,
|
tbs,
|
||||||
filter,
|
filter,
|
||||||
);
|
);
|
||||||
const $ = cheerio.load(resp.data);
|
const dom = new JSDOM(resp.data);
|
||||||
const result_block = $("div.g");
|
const document = dom.window.document;
|
||||||
|
const result_block = document.querySelectorAll("div.ezO2md");
|
||||||
|
let new_results = 0;
|
||||||
|
let unique = true;
|
||||||
|
let fetched_results = 0;
|
||||||
|
|
||||||
|
const fetched_links = new Set<string>();
|
||||||
if (result_block.length === 0) {
|
if (result_block.length === 0) {
|
||||||
start += 1;
|
start += 1;
|
||||||
attempts += 1;
|
attempts += 1;
|
||||||
} else {
|
} else {
|
||||||
attempts = 0; // Reset attempts if we have results
|
attempts = 0;
|
||||||
}
|
}
|
||||||
result_block.each((index, element) => {
|
|
||||||
const linkElement = $(element).find("a");
|
for (const result of result_block) {
|
||||||
const link =
|
const link_tag = result.querySelector("a[href]") as HTMLAnchorElement;
|
||||||
linkElement && linkElement.attr("href")
|
const title_tag = link_tag ? link_tag.querySelector("span.CVA68e") : null;
|
||||||
? linkElement.attr("href")
|
const description_tag = result.querySelector("span.FrIlee");
|
||||||
: null;
|
|
||||||
const title = $(element).find("h3");
|
if (link_tag && title_tag && description_tag) {
|
||||||
const ogImage = $(element).find("img").eq(1).attr("src");
|
const link = decodeURIComponent(link_tag.href.split("&")[0].replace("/url?q=", ""));
|
||||||
const description_box = $(element).find(
|
if (fetched_links.has(link) && unique) continue;
|
||||||
"div[style='-webkit-line-clamp:2']",
|
fetched_links.add(link);
|
||||||
);
|
const title = title_tag.textContent || "";
|
||||||
const answerBox = $(element).find(".mod").text();
|
const description = description_tag.textContent || "";
|
||||||
if (description_box) {
|
fetched_results++;
|
||||||
const description = description_box.text();
|
new_results++;
|
||||||
if (link && title && description) {
|
if (link && title && description) {
|
||||||
start += 1;
|
start += 1
|
||||||
results.push(new SearchResult(link, title.text(), description));
|
results.push(new SearchResult(link, title, description));
|
||||||
|
}
|
||||||
|
if (fetched_results >= num_results) break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
|
||||||
await new Promise((resolve) =>
|
await new Promise((resolve) =>
|
||||||
setTimeout(resolve, sleep_interval * 1000),
|
setTimeout(resolve, sleep_interval * 1000),
|
||||||
);
|
);
|
||||||
|
@ -4,6 +4,7 @@ import { googleSearch } from "./googlesearch";
|
|||||||
import { fireEngineMap } from "./fireEngine";
|
import { fireEngineMap } from "./fireEngine";
|
||||||
import { searchapi_search } from "./searchapi";
|
import { searchapi_search } from "./searchapi";
|
||||||
import { serper_search } from "./serper";
|
import { serper_search } from "./serper";
|
||||||
|
import { searxng_search } from "./searxng";
|
||||||
|
|
||||||
export async function search({
|
export async function search({
|
||||||
query,
|
query,
|
||||||
@ -51,6 +52,16 @@ export async function search({
|
|||||||
location,
|
location,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
if (process.env.SEARXNG_ENDPOINT) {
|
||||||
|
return await searxng_search(query, {
|
||||||
|
num_results,
|
||||||
|
tbs,
|
||||||
|
filter,
|
||||||
|
lang,
|
||||||
|
country,
|
||||||
|
location,
|
||||||
|
});
|
||||||
|
}
|
||||||
return await googleSearch(
|
return await googleSearch(
|
||||||
query,
|
query,
|
||||||
advanced,
|
advanced,
|
||||||
@ -64,7 +75,7 @@ export async function search({
|
|||||||
timeout,
|
timeout,
|
||||||
);
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Error in search function: ${error}`);
|
logger.error(`Error in search function`, { error });
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
64
apps/api/src/search/searxng.ts
Normal file
64
apps/api/src/search/searxng.ts
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import axios from "axios";
|
||||||
|
import dotenv from "dotenv";
|
||||||
|
import { SearchResult } from "../../src/lib/entities";
|
||||||
|
import { logger } from "../lib/logger"
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
interface SearchOptions {
|
||||||
|
tbs?: string;
|
||||||
|
filter?: string;
|
||||||
|
lang?: string;
|
||||||
|
country?: string;
|
||||||
|
location?: string;
|
||||||
|
num_results: number;
|
||||||
|
page?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function searxng_search(
|
||||||
|
q: string,
|
||||||
|
options: SearchOptions,
|
||||||
|
): Promise<SearchResult[]> {
|
||||||
|
const params = {
|
||||||
|
q: q,
|
||||||
|
language: options.lang,
|
||||||
|
// gl: options.country, //not possible with SearXNG
|
||||||
|
// location: options.location, //not possible with SearXNG
|
||||||
|
// num: options.num_results, //not possible with SearXNG
|
||||||
|
engines: process.env.SEARXNG_ENGINES || "",
|
||||||
|
categories: process.env.SEARXNG_CATEGORIES || "general",
|
||||||
|
pageno: options.page ?? 1,
|
||||||
|
format: "json"
|
||||||
|
};
|
||||||
|
|
||||||
|
const url = process.env.SEARXNG_ENDPOINT!;
|
||||||
|
// Remove trailing slash if it exists
|
||||||
|
const cleanedUrl = url.endsWith('/') ? url.slice(0, -1) : url;
|
||||||
|
|
||||||
|
// Concatenate "/search" to the cleaned URL
|
||||||
|
const finalUrl = cleanedUrl + "/search";
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.get(finalUrl, {
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
params: params,
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = response.data;
|
||||||
|
|
||||||
|
if (data && Array.isArray(data.results)) {
|
||||||
|
return data.results.map((a: any) => ({
|
||||||
|
url: a.url,
|
||||||
|
title: a.title,
|
||||||
|
description: a.content,
|
||||||
|
}));
|
||||||
|
} else {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`There was an error searching for content`, { error });
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
@ -1,10 +1,8 @@
|
|||||||
import { Job, JobsOptions } from "bullmq";
|
|
||||||
import { getScrapeQueue } from "./queue-service";
|
import { getScrapeQueue } from "./queue-service";
|
||||||
import { v4 as uuidv4 } from "uuid";
|
import { v4 as uuidv4 } from "uuid";
|
||||||
import { NotificationType, PlanType, WebScraperOptions } from "../types";
|
import { PlanType, WebScraperOptions } from "../types";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
import {
|
import {
|
||||||
calculateJobTimeToRun,
|
|
||||||
cleanOldConcurrencyLimitEntries,
|
cleanOldConcurrencyLimitEntries,
|
||||||
getConcurrencyLimitActiveJobs,
|
getConcurrencyLimitActiveJobs,
|
||||||
getConcurrencyQueueJobsCount,
|
getConcurrencyQueueJobsCount,
|
||||||
@ -13,7 +11,6 @@ import {
|
|||||||
} from "../lib/concurrency-limit";
|
} from "../lib/concurrency-limit";
|
||||||
import { logger } from "../lib/logger";
|
import { logger } from "../lib/logger";
|
||||||
import { getConcurrencyLimitMax } from "./rate-limiter";
|
import { getConcurrencyLimitMax } from "./rate-limiter";
|
||||||
import { sendNotificationWithCustomDays } from "./notification/email_notification";
|
|
||||||
|
|
||||||
async function _addScrapeJobToConcurrencyQueue(
|
async function _addScrapeJobToConcurrencyQueue(
|
||||||
webScraperOptions: any,
|
webScraperOptions: any,
|
||||||
@ -44,15 +41,7 @@ export async function _addScrapeJobToBullMQ(
|
|||||||
webScraperOptions.team_id &&
|
webScraperOptions.team_id &&
|
||||||
webScraperOptions.plan
|
webScraperOptions.plan
|
||||||
) {
|
) {
|
||||||
await pushConcurrencyLimitActiveJob(webScraperOptions.team_id, jobId, calculateJobTimeToRun({
|
await pushConcurrencyLimitActiveJob(webScraperOptions.team_id, jobId, 60 * 1000); // 60s default timeout
|
||||||
id: jobId,
|
|
||||||
opts: {
|
|
||||||
...options,
|
|
||||||
priority: jobPriority,
|
|
||||||
jobId,
|
|
||||||
},
|
|
||||||
data: webScraperOptions,
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await getScrapeQueue().add(jobId, webScraperOptions, {
|
await getScrapeQueue().add(jobId, webScraperOptions, {
|
||||||
|
@ -2,6 +2,8 @@ import { Queue } from "bullmq";
|
|||||||
import { logger } from "../lib/logger";
|
import { logger } from "../lib/logger";
|
||||||
import IORedis from "ioredis";
|
import IORedis from "ioredis";
|
||||||
|
|
||||||
|
export type QueueFunction = () => Queue<any, any, string, any, any, string>;
|
||||||
|
|
||||||
let scrapeQueue: Queue;
|
let scrapeQueue: Queue;
|
||||||
let extractQueue: Queue;
|
let extractQueue: Queue;
|
||||||
let loggingQueue: Queue;
|
let loggingQueue: Queue;
|
||||||
|
@ -52,7 +52,6 @@ import { configDotenv } from "dotenv";
|
|||||||
import { scrapeOptions } from "../controllers/v1/types";
|
import { scrapeOptions } from "../controllers/v1/types";
|
||||||
import { getRateLimiterPoints } from "./rate-limiter";
|
import { getRateLimiterPoints } from "./rate-limiter";
|
||||||
import {
|
import {
|
||||||
calculateJobTimeToRun,
|
|
||||||
cleanOldConcurrencyLimitEntries,
|
cleanOldConcurrencyLimitEntries,
|
||||||
pushConcurrencyLimitActiveJob,
|
pushConcurrencyLimitActiveJob,
|
||||||
removeConcurrencyLimitActiveJob,
|
removeConcurrencyLimitActiveJob,
|
||||||
@ -247,6 +246,11 @@ const processJobInternal = async (token: string, job: Job & { id: string }) => {
|
|||||||
extendInterval: jobLockExtendInterval,
|
extendInterval: jobLockExtendInterval,
|
||||||
extensionTime: jobLockExtensionTime,
|
extensionTime: jobLockExtensionTime,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (job.data?.mode !== "kickoff" && job.data?.team_id) {
|
||||||
|
await pushConcurrencyLimitActiveJob(job.data.team_id, job.id, 60 * 1000); // 60s lock renew, just like in the queue
|
||||||
|
}
|
||||||
|
|
||||||
await job.extendLock(token, jobLockExtensionTime);
|
await job.extendLock(token, jobLockExtensionTime);
|
||||||
}, jobLockExtendInterval);
|
}, jobLockExtendInterval);
|
||||||
|
|
||||||
@ -597,7 +601,7 @@ const workerFun = async (
|
|||||||
// we are 1 under the limit, assuming the job insertion logic never over-inserts. - MG
|
// we are 1 under the limit, assuming the job insertion logic never over-inserts. - MG
|
||||||
const nextJob = await takeConcurrencyLimitedJob(job.data.team_id);
|
const nextJob = await takeConcurrencyLimitedJob(job.data.team_id);
|
||||||
if (nextJob !== null) {
|
if (nextJob !== null) {
|
||||||
await pushConcurrencyLimitActiveJob(job.data.team_id, nextJob.id, calculateJobTimeToRun(nextJob));
|
await pushConcurrencyLimitActiveJob(job.data.team_id, nextJob.id, 60 * 1000); // 60s initial timeout
|
||||||
|
|
||||||
await queue.add(
|
await queue.add(
|
||||||
nextJob.id,
|
nextJob.id,
|
||||||
|
@ -535,7 +535,7 @@ export default class FirecrawlApp {
|
|||||||
const response: AxiosResponse = await axios.post(
|
const response: AxiosResponse = await axios.post(
|
||||||
this.apiUrl + `/v1/scrape`,
|
this.apiUrl + `/v1/scrape`,
|
||||||
jsonData,
|
jsonData,
|
||||||
{ headers }
|
{ headers, timeout: params?.timeout !== undefined ? (params.timeout + 5000) : undefined },
|
||||||
);
|
);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const responseData = response.data;
|
const responseData = response.data;
|
||||||
@ -1262,7 +1262,7 @@ export default class FirecrawlApp {
|
|||||||
data: any,
|
data: any,
|
||||||
headers: AxiosRequestHeaders
|
headers: AxiosRequestHeaders
|
||||||
): Promise<AxiosResponse> {
|
): Promise<AxiosResponse> {
|
||||||
return axios.post(url, data, { headers });
|
return axios.post(url, data, { headers, timeout: (data?.timeout ? (data.timeout + 5000) : undefined) });
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
3
apps/playwright-service-ts/.dockerignore
Normal file
3
apps/playwright-service-ts/.dockerignore
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
/node_modules/
|
||||||
|
/dist/
|
||||||
|
.env
|
@ -1,6 +1,6 @@
|
|||||||
import express, { Request, Response } from 'express';
|
import express, { Request, Response } from 'express';
|
||||||
import bodyParser from 'body-parser';
|
import bodyParser from 'body-parser';
|
||||||
import { chromium, Browser, BrowserContext, Route, Request as PlaywrightRequest } from 'playwright';
|
import { chromium, Browser, BrowserContext, Route, Request as PlaywrightRequest, Page } from 'playwright';
|
||||||
import dotenv from 'dotenv';
|
import dotenv from 'dotenv';
|
||||||
import UserAgent from 'user-agents';
|
import UserAgent from 'user-agents';
|
||||||
import { getError } from './helpers/get_error';
|
import { getError } from './helpers/get_error';
|
||||||
@ -119,7 +119,7 @@ const isValidUrl = (urlString: string): boolean => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const scrapePage = async (page: any, url: string, waitUntil: 'load' | 'networkidle', waitAfterLoad: number, timeout: number, checkSelector: string | undefined) => {
|
const scrapePage = async (page: Page, url: string, waitUntil: 'load' | 'networkidle', waitAfterLoad: number, timeout: number, checkSelector: string | undefined) => {
|
||||||
console.log(`Navigating to ${url} with waitUntil: ${waitUntil} and timeout: ${timeout}ms`);
|
console.log(`Navigating to ${url} with waitUntil: ${waitUntil} and timeout: ${timeout}ms`);
|
||||||
const response = await page.goto(url, { waitUntil, timeout });
|
const response = await page.goto(url, { waitUntil, timeout });
|
||||||
|
|
||||||
@ -135,9 +135,19 @@ const scrapePage = async (page: any, url: string, waitUntil: 'load' | 'networkid
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let headers = null, content = await page.content();
|
||||||
|
if (response) {
|
||||||
|
headers = await response.allHeaders();
|
||||||
|
const ct = Object.entries(headers).find(x => x[0].toLowerCase() === "content-type");
|
||||||
|
if (ct && (ct[1].includes("application/json") || ct[1].includes("text/plain"))) {
|
||||||
|
content = (await response.body()).toString("utf8"); // TODO: determine real encoding
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
content: await page.content(),
|
content,
|
||||||
status: response ? response.status() : null,
|
status: response ? response.status() : null,
|
||||||
|
headers,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -175,40 +185,35 @@ app.post('/scrape', async (req: Request, res: Response) => {
|
|||||||
await page.setExtraHTTPHeaders(headers);
|
await page.setExtraHTTPHeaders(headers);
|
||||||
}
|
}
|
||||||
|
|
||||||
let pageContent;
|
let result: Awaited<ReturnType<typeof scrapePage>>;
|
||||||
let pageStatusCode: number | null = null;
|
|
||||||
try {
|
try {
|
||||||
// Strategy 1: Normal
|
// Strategy 1: Normal
|
||||||
console.log('Attempting strategy 1: Normal load');
|
console.log('Attempting strategy 1: Normal load');
|
||||||
const result = await scrapePage(page, url, 'load', wait_after_load, timeout, check_selector);
|
result = await scrapePage(page, url, 'load', wait_after_load, timeout, check_selector);
|
||||||
pageContent = result.content;
|
|
||||||
pageStatusCode = result.status;
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log('Strategy 1 failed, attempting strategy 2: Wait until networkidle');
|
console.log('Strategy 1 failed, attempting strategy 2: Wait until networkidle');
|
||||||
try {
|
try {
|
||||||
// Strategy 2: Wait until networkidle
|
// Strategy 2: Wait until networkidle
|
||||||
const result = await scrapePage(page, url, 'networkidle', wait_after_load, timeout, check_selector);
|
result = await scrapePage(page, url, 'networkidle', wait_after_load, timeout, check_selector);
|
||||||
pageContent = result.content;
|
|
||||||
pageStatusCode = result.status;
|
|
||||||
} catch (finalError) {
|
} catch (finalError) {
|
||||||
await page.close();
|
await page.close();
|
||||||
return res.status(500).json({ error: 'An error occurred while fetching the page.' });
|
return res.status(500).json({ error: 'An error occurred while fetching the page.' });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const pageError = pageStatusCode !== 200 ? getError(pageStatusCode) : undefined;
|
const pageError = result.status !== 200 ? getError(result.status) : undefined;
|
||||||
|
|
||||||
if (!pageError) {
|
if (!pageError) {
|
||||||
console.log(`✅ Scrape successful!`);
|
console.log(`✅ Scrape successful!`);
|
||||||
} else {
|
} else {
|
||||||
console.log(`🚨 Scrape failed with status code: ${pageStatusCode} ${pageError}`);
|
console.log(`🚨 Scrape failed with status code: ${result.status} ${pageError}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
await page.close();
|
await page.close();
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
content: pageContent,
|
content: result.content,
|
||||||
pageStatusCode,
|
pageStatusCode: result.status,
|
||||||
...(pageError && { pageError })
|
...(pageError && { pageError })
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
"user-agents": "^1.1.410"
|
"user-agents": "^1.1.410"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@types/body-parser": "^1.19.5",
|
||||||
"@types/express": "^4.17.21",
|
"@types/express": "^4.17.21",
|
||||||
"@types/node": "^20.14.9",
|
"@types/node": "^20.14.9",
|
||||||
"@types/user-agents": "^1.0.4",
|
"@types/user-agents": "^1.0.4",
|
||||||
|
873
apps/playwright-service-ts/pnpm-lock.yaml
generated
Normal file
873
apps/playwright-service-ts/pnpm-lock.yaml
generated
Normal file
@ -0,0 +1,873 @@
|
|||||||
|
lockfileVersion: '9.0'
|
||||||
|
|
||||||
|
settings:
|
||||||
|
autoInstallPeers: true
|
||||||
|
excludeLinksFromLockfile: false
|
||||||
|
|
||||||
|
importers:
|
||||||
|
|
||||||
|
.:
|
||||||
|
dependencies:
|
||||||
|
body-parser:
|
||||||
|
specifier: ^1.20.2
|
||||||
|
version: 1.20.3
|
||||||
|
dotenv:
|
||||||
|
specifier: ^16.4.5
|
||||||
|
version: 16.4.7
|
||||||
|
express:
|
||||||
|
specifier: ^4.19.2
|
||||||
|
version: 4.21.2
|
||||||
|
playwright:
|
||||||
|
specifier: ^1.45.0
|
||||||
|
version: 1.49.1
|
||||||
|
user-agents:
|
||||||
|
specifier: ^1.1.410
|
||||||
|
version: 1.1.455
|
||||||
|
devDependencies:
|
||||||
|
'@types/body-parser':
|
||||||
|
specifier: ^1.19.5
|
||||||
|
version: 1.19.5
|
||||||
|
'@types/express':
|
||||||
|
specifier: ^4.17.21
|
||||||
|
version: 4.17.21
|
||||||
|
'@types/node':
|
||||||
|
specifier: ^20.14.9
|
||||||
|
version: 20.17.10
|
||||||
|
'@types/user-agents':
|
||||||
|
specifier: ^1.0.4
|
||||||
|
version: 1.0.4
|
||||||
|
ts-node:
|
||||||
|
specifier: ^10.9.2
|
||||||
|
version: 10.9.2(@types/node@20.17.10)(typescript@5.7.2)
|
||||||
|
typescript:
|
||||||
|
specifier: ^5.5.2
|
||||||
|
version: 5.7.2
|
||||||
|
|
||||||
|
packages:
|
||||||
|
|
||||||
|
'@cspotcode/source-map-support@0.8.1':
|
||||||
|
resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
|
||||||
|
engines: {node: '>=12'}
|
||||||
|
|
||||||
|
'@jridgewell/resolve-uri@3.1.2':
|
||||||
|
resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==}
|
||||||
|
engines: {node: '>=6.0.0'}
|
||||||
|
|
||||||
|
'@jridgewell/sourcemap-codec@1.5.0':
|
||||||
|
resolution: {integrity: sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==}
|
||||||
|
|
||||||
|
'@jridgewell/trace-mapping@0.3.9':
|
||||||
|
resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==}
|
||||||
|
|
||||||
|
'@tsconfig/node10@1.0.11':
|
||||||
|
resolution: {integrity: sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==}
|
||||||
|
|
||||||
|
'@tsconfig/node12@1.0.11':
|
||||||
|
resolution: {integrity: sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==}
|
||||||
|
|
||||||
|
'@tsconfig/node14@1.0.3':
|
||||||
|
resolution: {integrity: sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==}
|
||||||
|
|
||||||
|
'@tsconfig/node16@1.0.4':
|
||||||
|
resolution: {integrity: sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==}
|
||||||
|
|
||||||
|
'@types/body-parser@1.19.5':
|
||||||
|
resolution: {integrity: sha512-fB3Zu92ucau0iQ0JMCFQE7b/dv8Ot07NI3KaZIkIUNXq82k4eBAqUaneXfleGY9JWskeS9y+u0nXMyspcuQrCg==}
|
||||||
|
|
||||||
|
'@types/connect@3.4.38':
|
||||||
|
resolution: {integrity: sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==}
|
||||||
|
|
||||||
|
'@types/express-serve-static-core@4.19.6':
|
||||||
|
resolution: {integrity: sha512-N4LZ2xG7DatVqhCZzOGb1Yi5lMbXSZcmdLDe9EzSndPV2HpWYWzRbaerl2n27irrm94EPpprqa8KpskPT085+A==}
|
||||||
|
|
||||||
|
'@types/express@4.17.21':
|
||||||
|
resolution: {integrity: sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ==}
|
||||||
|
|
||||||
|
'@types/http-errors@2.0.4':
|
||||||
|
resolution: {integrity: sha512-D0CFMMtydbJAegzOyHjtiKPLlvnm3iTZyZRSZoLq2mRhDdmLfIWOCYPfQJ4cu2erKghU++QvjcUjp/5h7hESpA==}
|
||||||
|
|
||||||
|
'@types/mime@1.3.5':
|
||||||
|
resolution: {integrity: sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w==}
|
||||||
|
|
||||||
|
'@types/node@20.17.10':
|
||||||
|
resolution: {integrity: sha512-/jrvh5h6NXhEauFFexRin69nA0uHJ5gwk4iDivp/DeoEua3uwCUto6PC86IpRITBOs4+6i2I56K5x5b6WYGXHA==}
|
||||||
|
|
||||||
|
'@types/qs@6.9.17':
|
||||||
|
resolution: {integrity: sha512-rX4/bPcfmvxHDv0XjfJELTTr+iB+tn032nPILqHm5wbthUUUuVtNGGqzhya9XUxjTP8Fpr0qYgSZZKxGY++svQ==}
|
||||||
|
|
||||||
|
'@types/range-parser@1.2.7':
|
||||||
|
resolution: {integrity: sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==}
|
||||||
|
|
||||||
|
'@types/send@0.17.4':
|
||||||
|
resolution: {integrity: sha512-x2EM6TJOybec7c52BX0ZspPodMsQUd5L6PRwOunVyVUhXiBSKf3AezDL8Dgvgt5o0UfKNfuA0eMLr2wLT4AiBA==}
|
||||||
|
|
||||||
|
'@types/serve-static@1.15.7':
|
||||||
|
resolution: {integrity: sha512-W8Ym+h8nhuRwaKPaDw34QUkwsGi6Rc4yYqvKFo5rm2FUEhCFbzVWrxXUxuKK8TASjWsysJY0nsmNCGhCOIsrOw==}
|
||||||
|
|
||||||
|
'@types/user-agents@1.0.4':
|
||||||
|
resolution: {integrity: sha512-AjeFc4oX5WPPflgKfRWWJfkEk7Wu82fnj1rROPsiqFt6yElpdGFg8Srtm/4PU4rA9UiDUZlruGPgcwTMQlwq4w==}
|
||||||
|
|
||||||
|
accepts@1.3.8:
|
||||||
|
resolution: {integrity: sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
acorn-walk@8.3.4:
|
||||||
|
resolution: {integrity: sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==}
|
||||||
|
engines: {node: '>=0.4.0'}
|
||||||
|
|
||||||
|
acorn@8.14.0:
|
||||||
|
resolution: {integrity: sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==}
|
||||||
|
engines: {node: '>=0.4.0'}
|
||||||
|
hasBin: true
|
||||||
|
|
||||||
|
arg@4.1.3:
|
||||||
|
resolution: {integrity: sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==}
|
||||||
|
|
||||||
|
array-flatten@1.1.1:
|
||||||
|
resolution: {integrity: sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==}
|
||||||
|
|
||||||
|
body-parser@1.20.3:
|
||||||
|
resolution: {integrity: sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==}
|
||||||
|
engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
|
||||||
|
|
||||||
|
bytes@3.1.2:
|
||||||
|
resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
call-bind-apply-helpers@1.0.1:
|
||||||
|
resolution: {integrity: sha512-BhYE+WDaywFg2TBWYNXAE+8B1ATnThNBqXHP5nQu0jWJdVvY2hvkpyB3qOmtmDePiS5/BDQ8wASEWGMWRG148g==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
call-bound@1.0.3:
|
||||||
|
resolution: {integrity: sha512-YTd+6wGlNlPxSuri7Y6X8tY2dmm12UMH66RpKMhiX6rsk5wXXnYgbUcOt8kiS31/AjfoTOvCsE+w8nZQLQnzHA==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
content-disposition@0.5.4:
|
||||||
|
resolution: {integrity: sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
content-type@1.0.5:
|
||||||
|
resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
cookie-signature@1.0.6:
|
||||||
|
resolution: {integrity: sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==}
|
||||||
|
|
||||||
|
cookie@0.7.1:
|
||||||
|
resolution: {integrity: sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
create-require@1.1.1:
|
||||||
|
resolution: {integrity: sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==}
|
||||||
|
|
||||||
|
debug@2.6.9:
|
||||||
|
resolution: {integrity: sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==}
|
||||||
|
peerDependencies:
|
||||||
|
supports-color: '*'
|
||||||
|
peerDependenciesMeta:
|
||||||
|
supports-color:
|
||||||
|
optional: true
|
||||||
|
|
||||||
|
depd@2.0.0:
|
||||||
|
resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
destroy@1.2.0:
|
||||||
|
resolution: {integrity: sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==}
|
||||||
|
engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
|
||||||
|
|
||||||
|
diff@4.0.2:
|
||||||
|
resolution: {integrity: sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==}
|
||||||
|
engines: {node: '>=0.3.1'}
|
||||||
|
|
||||||
|
dotenv@16.4.7:
|
||||||
|
resolution: {integrity: sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==}
|
||||||
|
engines: {node: '>=12'}
|
||||||
|
|
||||||
|
dunder-proto@1.0.1:
|
||||||
|
resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
ee-first@1.1.1:
|
||||||
|
resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
|
||||||
|
|
||||||
|
encodeurl@1.0.2:
|
||||||
|
resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
encodeurl@2.0.0:
|
||||||
|
resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
es-define-property@1.0.1:
|
||||||
|
resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
es-errors@1.3.0:
|
||||||
|
resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
es-object-atoms@1.0.0:
|
||||||
|
resolution: {integrity: sha512-MZ4iQ6JwHOBQjahnjwaC1ZtIBH+2ohjamzAO3oaHcXYup7qxjF2fixyH+Q71voWHeOkI2q/TnJao/KfXYIZWbw==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
escape-html@1.0.3:
|
||||||
|
resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==}
|
||||||
|
|
||||||
|
etag@1.8.1:
|
||||||
|
resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
express@4.21.2:
|
||||||
|
resolution: {integrity: sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==}
|
||||||
|
engines: {node: '>= 0.10.0'}
|
||||||
|
|
||||||
|
finalhandler@1.3.1:
|
||||||
|
resolution: {integrity: sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
forwarded@0.2.0:
|
||||||
|
resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
fresh@0.5.2:
|
||||||
|
resolution: {integrity: sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
fsevents@2.3.2:
|
||||||
|
resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==}
|
||||||
|
engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
|
||||||
|
os: [darwin]
|
||||||
|
|
||||||
|
function-bind@1.1.2:
|
||||||
|
resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
|
||||||
|
|
||||||
|
get-intrinsic@1.2.6:
|
||||||
|
resolution: {integrity: sha512-qxsEs+9A+u85HhllWJJFicJfPDhRmjzoYdl64aMWW9yRIJmSyxdn8IEkuIM530/7T+lv0TIHd8L6Q/ra0tEoeA==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
gopd@1.2.0:
|
||||||
|
resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
has-symbols@1.1.0:
|
||||||
|
resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
hasown@2.0.2:
|
||||||
|
resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
http-errors@2.0.0:
|
||||||
|
resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
iconv-lite@0.4.24:
|
||||||
|
resolution: {integrity: sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==}
|
||||||
|
engines: {node: '>=0.10.0'}
|
||||||
|
|
||||||
|
inherits@2.0.4:
|
||||||
|
resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
|
||||||
|
|
||||||
|
ipaddr.js@1.9.1:
|
||||||
|
resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==}
|
||||||
|
engines: {node: '>= 0.10'}
|
||||||
|
|
||||||
|
lodash.clonedeep@4.5.0:
|
||||||
|
resolution: {integrity: sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ==}
|
||||||
|
|
||||||
|
make-error@1.3.6:
|
||||||
|
resolution: {integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==}
|
||||||
|
|
||||||
|
math-intrinsics@1.1.0:
|
||||||
|
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
media-typer@0.3.0:
|
||||||
|
resolution: {integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
merge-descriptors@1.0.3:
|
||||||
|
resolution: {integrity: sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==}
|
||||||
|
|
||||||
|
methods@1.1.2:
|
||||||
|
resolution: {integrity: sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
mime-db@1.52.0:
|
||||||
|
resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
mime-types@2.1.35:
|
||||||
|
resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
mime@1.6.0:
|
||||||
|
resolution: {integrity: sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==}
|
||||||
|
engines: {node: '>=4'}
|
||||||
|
hasBin: true
|
||||||
|
|
||||||
|
ms@2.0.0:
|
||||||
|
resolution: {integrity: sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==}
|
||||||
|
|
||||||
|
ms@2.1.3:
|
||||||
|
resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
|
||||||
|
|
||||||
|
negotiator@0.6.3:
|
||||||
|
resolution: {integrity: sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
object-inspect@1.13.3:
|
||||||
|
resolution: {integrity: sha512-kDCGIbxkDSXE3euJZZXzc6to7fCrKHNI/hSRQnRuQ+BWjFNzZwiFF8fj/6o2t2G9/jTj8PSIYTfCLelLZEeRpA==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
on-finished@2.4.1:
|
||||||
|
resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
parseurl@1.3.3:
|
||||||
|
resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
path-to-regexp@0.1.12:
|
||||||
|
resolution: {integrity: sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==}
|
||||||
|
|
||||||
|
playwright-core@1.49.1:
|
||||||
|
resolution: {integrity: sha512-BzmpVcs4kE2CH15rWfzpjzVGhWERJfmnXmniSyKeRZUs9Ws65m+RGIi7mjJK/euCegfn3i7jvqWeWyHe9y3Vgg==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
hasBin: true
|
||||||
|
|
||||||
|
playwright@1.49.1:
|
||||||
|
resolution: {integrity: sha512-VYL8zLoNTBxVOrJBbDuRgDWa3i+mfQgDTrL8Ah9QXZ7ax4Dsj0MSq5bYgytRnDVVe+njoKnfsYkH3HzqVj5UZA==}
|
||||||
|
engines: {node: '>=18'}
|
||||||
|
hasBin: true
|
||||||
|
|
||||||
|
proxy-addr@2.0.7:
|
||||||
|
resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==}
|
||||||
|
engines: {node: '>= 0.10'}
|
||||||
|
|
||||||
|
qs@6.13.0:
|
||||||
|
resolution: {integrity: sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==}
|
||||||
|
engines: {node: '>=0.6'}
|
||||||
|
|
||||||
|
range-parser@1.2.1:
|
||||||
|
resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
raw-body@2.5.2:
|
||||||
|
resolution: {integrity: sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
safe-buffer@5.2.1:
|
||||||
|
resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}
|
||||||
|
|
||||||
|
safer-buffer@2.1.2:
|
||||||
|
resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==}
|
||||||
|
|
||||||
|
send@0.19.0:
|
||||||
|
resolution: {integrity: sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==}
|
||||||
|
engines: {node: '>= 0.8.0'}
|
||||||
|
|
||||||
|
serve-static@1.16.2:
|
||||||
|
resolution: {integrity: sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==}
|
||||||
|
engines: {node: '>= 0.8.0'}
|
||||||
|
|
||||||
|
setprototypeof@1.2.0:
|
||||||
|
resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==}
|
||||||
|
|
||||||
|
side-channel-list@1.0.0:
|
||||||
|
resolution: {integrity: sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
side-channel-map@1.0.1:
|
||||||
|
resolution: {integrity: sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
side-channel-weakmap@1.0.2:
|
||||||
|
resolution: {integrity: sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
side-channel@1.1.0:
|
||||||
|
resolution: {integrity: sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==}
|
||||||
|
engines: {node: '>= 0.4'}
|
||||||
|
|
||||||
|
statuses@2.0.1:
|
||||||
|
resolution: {integrity: sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
toidentifier@1.0.1:
|
||||||
|
resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==}
|
||||||
|
engines: {node: '>=0.6'}
|
||||||
|
|
||||||
|
ts-node@10.9.2:
|
||||||
|
resolution: {integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==}
|
||||||
|
hasBin: true
|
||||||
|
peerDependencies:
|
||||||
|
'@swc/core': '>=1.2.50'
|
||||||
|
'@swc/wasm': '>=1.2.50'
|
||||||
|
'@types/node': '*'
|
||||||
|
typescript: '>=2.7'
|
||||||
|
peerDependenciesMeta:
|
||||||
|
'@swc/core':
|
||||||
|
optional: true
|
||||||
|
'@swc/wasm':
|
||||||
|
optional: true
|
||||||
|
|
||||||
|
type-is@1.6.18:
|
||||||
|
resolution: {integrity: sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==}
|
||||||
|
engines: {node: '>= 0.6'}
|
||||||
|
|
||||||
|
typescript@5.7.2:
|
||||||
|
resolution: {integrity: sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==}
|
||||||
|
engines: {node: '>=14.17'}
|
||||||
|
hasBin: true
|
||||||
|
|
||||||
|
undici-types@6.19.8:
|
||||||
|
resolution: {integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==}
|
||||||
|
|
||||||
|
unpipe@1.0.0:
|
||||||
|
resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
user-agents@1.1.455:
|
||||||
|
resolution: {integrity: sha512-C5FfBiUlxZAYI+nsxg2iUcVrC0CxjawRZMxoUA9Z5MUm1mC0phPvs7iPe9ksKVaZrsyNLivDeIUxJvHFuCXyLw==}
|
||||||
|
|
||||||
|
utils-merge@1.0.1:
|
||||||
|
resolution: {integrity: sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==}
|
||||||
|
engines: {node: '>= 0.4.0'}
|
||||||
|
|
||||||
|
v8-compile-cache-lib@3.0.1:
|
||||||
|
resolution: {integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==}
|
||||||
|
|
||||||
|
vary@1.1.2:
|
||||||
|
resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
|
||||||
|
engines: {node: '>= 0.8'}
|
||||||
|
|
||||||
|
yn@3.1.1:
|
||||||
|
resolution: {integrity: sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==}
|
||||||
|
engines: {node: '>=6'}
|
||||||
|
|
||||||
|
snapshots:
|
||||||
|
|
||||||
|
'@cspotcode/source-map-support@0.8.1':
|
||||||
|
dependencies:
|
||||||
|
'@jridgewell/trace-mapping': 0.3.9
|
||||||
|
|
||||||
|
'@jridgewell/resolve-uri@3.1.2': {}
|
||||||
|
|
||||||
|
'@jridgewell/sourcemap-codec@1.5.0': {}
|
||||||
|
|
||||||
|
'@jridgewell/trace-mapping@0.3.9':
|
||||||
|
dependencies:
|
||||||
|
'@jridgewell/resolve-uri': 3.1.2
|
||||||
|
'@jridgewell/sourcemap-codec': 1.5.0
|
||||||
|
|
||||||
|
'@tsconfig/node10@1.0.11': {}
|
||||||
|
|
||||||
|
'@tsconfig/node12@1.0.11': {}
|
||||||
|
|
||||||
|
'@tsconfig/node14@1.0.3': {}
|
||||||
|
|
||||||
|
'@tsconfig/node16@1.0.4': {}
|
||||||
|
|
||||||
|
'@types/body-parser@1.19.5':
|
||||||
|
dependencies:
|
||||||
|
'@types/connect': 3.4.38
|
||||||
|
'@types/node': 20.17.10
|
||||||
|
|
||||||
|
'@types/connect@3.4.38':
|
||||||
|
dependencies:
|
||||||
|
'@types/node': 20.17.10
|
||||||
|
|
||||||
|
'@types/express-serve-static-core@4.19.6':
|
||||||
|
dependencies:
|
||||||
|
'@types/node': 20.17.10
|
||||||
|
'@types/qs': 6.9.17
|
||||||
|
'@types/range-parser': 1.2.7
|
||||||
|
'@types/send': 0.17.4
|
||||||
|
|
||||||
|
'@types/express@4.17.21':
|
||||||
|
dependencies:
|
||||||
|
'@types/body-parser': 1.19.5
|
||||||
|
'@types/express-serve-static-core': 4.19.6
|
||||||
|
'@types/qs': 6.9.17
|
||||||
|
'@types/serve-static': 1.15.7
|
||||||
|
|
||||||
|
'@types/http-errors@2.0.4': {}
|
||||||
|
|
||||||
|
'@types/mime@1.3.5': {}
|
||||||
|
|
||||||
|
'@types/node@20.17.10':
|
||||||
|
dependencies:
|
||||||
|
undici-types: 6.19.8
|
||||||
|
|
||||||
|
'@types/qs@6.9.17': {}
|
||||||
|
|
||||||
|
'@types/range-parser@1.2.7': {}
|
||||||
|
|
||||||
|
'@types/send@0.17.4':
|
||||||
|
dependencies:
|
||||||
|
'@types/mime': 1.3.5
|
||||||
|
'@types/node': 20.17.10
|
||||||
|
|
||||||
|
'@types/serve-static@1.15.7':
|
||||||
|
dependencies:
|
||||||
|
'@types/http-errors': 2.0.4
|
||||||
|
'@types/node': 20.17.10
|
||||||
|
'@types/send': 0.17.4
|
||||||
|
|
||||||
|
'@types/user-agents@1.0.4': {}
|
||||||
|
|
||||||
|
accepts@1.3.8:
|
||||||
|
dependencies:
|
||||||
|
mime-types: 2.1.35
|
||||||
|
negotiator: 0.6.3
|
||||||
|
|
||||||
|
acorn-walk@8.3.4:
|
||||||
|
dependencies:
|
||||||
|
acorn: 8.14.0
|
||||||
|
|
||||||
|
acorn@8.14.0: {}
|
||||||
|
|
||||||
|
arg@4.1.3: {}
|
||||||
|
|
||||||
|
array-flatten@1.1.1: {}
|
||||||
|
|
||||||
|
body-parser@1.20.3:
|
||||||
|
dependencies:
|
||||||
|
bytes: 3.1.2
|
||||||
|
content-type: 1.0.5
|
||||||
|
debug: 2.6.9
|
||||||
|
depd: 2.0.0
|
||||||
|
destroy: 1.2.0
|
||||||
|
http-errors: 2.0.0
|
||||||
|
iconv-lite: 0.4.24
|
||||||
|
on-finished: 2.4.1
|
||||||
|
qs: 6.13.0
|
||||||
|
raw-body: 2.5.2
|
||||||
|
type-is: 1.6.18
|
||||||
|
unpipe: 1.0.0
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- supports-color
|
||||||
|
|
||||||
|
bytes@3.1.2: {}
|
||||||
|
|
||||||
|
call-bind-apply-helpers@1.0.1:
|
||||||
|
dependencies:
|
||||||
|
es-errors: 1.3.0
|
||||||
|
function-bind: 1.1.2
|
||||||
|
|
||||||
|
call-bound@1.0.3:
|
||||||
|
dependencies:
|
||||||
|
call-bind-apply-helpers: 1.0.1
|
||||||
|
get-intrinsic: 1.2.6
|
||||||
|
|
||||||
|
content-disposition@0.5.4:
|
||||||
|
dependencies:
|
||||||
|
safe-buffer: 5.2.1
|
||||||
|
|
||||||
|
content-type@1.0.5: {}
|
||||||
|
|
||||||
|
cookie-signature@1.0.6: {}
|
||||||
|
|
||||||
|
cookie@0.7.1: {}
|
||||||
|
|
||||||
|
create-require@1.1.1: {}
|
||||||
|
|
||||||
|
debug@2.6.9:
|
||||||
|
dependencies:
|
||||||
|
ms: 2.0.0
|
||||||
|
|
||||||
|
depd@2.0.0: {}
|
||||||
|
|
||||||
|
destroy@1.2.0: {}
|
||||||
|
|
||||||
|
diff@4.0.2: {}
|
||||||
|
|
||||||
|
dotenv@16.4.7: {}
|
||||||
|
|
||||||
|
dunder-proto@1.0.1:
|
||||||
|
dependencies:
|
||||||
|
call-bind-apply-helpers: 1.0.1
|
||||||
|
es-errors: 1.3.0
|
||||||
|
gopd: 1.2.0
|
||||||
|
|
||||||
|
ee-first@1.1.1: {}
|
||||||
|
|
||||||
|
encodeurl@1.0.2: {}
|
||||||
|
|
||||||
|
encodeurl@2.0.0: {}
|
||||||
|
|
||||||
|
es-define-property@1.0.1: {}
|
||||||
|
|
||||||
|
es-errors@1.3.0: {}
|
||||||
|
|
||||||
|
es-object-atoms@1.0.0:
|
||||||
|
dependencies:
|
||||||
|
es-errors: 1.3.0
|
||||||
|
|
||||||
|
escape-html@1.0.3: {}
|
||||||
|
|
||||||
|
etag@1.8.1: {}
|
||||||
|
|
||||||
|
express@4.21.2:
|
||||||
|
dependencies:
|
||||||
|
accepts: 1.3.8
|
||||||
|
array-flatten: 1.1.1
|
||||||
|
body-parser: 1.20.3
|
||||||
|
content-disposition: 0.5.4
|
||||||
|
content-type: 1.0.5
|
||||||
|
cookie: 0.7.1
|
||||||
|
cookie-signature: 1.0.6
|
||||||
|
debug: 2.6.9
|
||||||
|
depd: 2.0.0
|
||||||
|
encodeurl: 2.0.0
|
||||||
|
escape-html: 1.0.3
|
||||||
|
etag: 1.8.1
|
||||||
|
finalhandler: 1.3.1
|
||||||
|
fresh: 0.5.2
|
||||||
|
http-errors: 2.0.0
|
||||||
|
merge-descriptors: 1.0.3
|
||||||
|
methods: 1.1.2
|
||||||
|
on-finished: 2.4.1
|
||||||
|
parseurl: 1.3.3
|
||||||
|
path-to-regexp: 0.1.12
|
||||||
|
proxy-addr: 2.0.7
|
||||||
|
qs: 6.13.0
|
||||||
|
range-parser: 1.2.1
|
||||||
|
safe-buffer: 5.2.1
|
||||||
|
send: 0.19.0
|
||||||
|
serve-static: 1.16.2
|
||||||
|
setprototypeof: 1.2.0
|
||||||
|
statuses: 2.0.1
|
||||||
|
type-is: 1.6.18
|
||||||
|
utils-merge: 1.0.1
|
||||||
|
vary: 1.1.2
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- supports-color
|
||||||
|
|
||||||
|
finalhandler@1.3.1:
|
||||||
|
dependencies:
|
||||||
|
debug: 2.6.9
|
||||||
|
encodeurl: 2.0.0
|
||||||
|
escape-html: 1.0.3
|
||||||
|
on-finished: 2.4.1
|
||||||
|
parseurl: 1.3.3
|
||||||
|
statuses: 2.0.1
|
||||||
|
unpipe: 1.0.0
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- supports-color
|
||||||
|
|
||||||
|
forwarded@0.2.0: {}
|
||||||
|
|
||||||
|
fresh@0.5.2: {}
|
||||||
|
|
||||||
|
fsevents@2.3.2:
|
||||||
|
optional: true
|
||||||
|
|
||||||
|
function-bind@1.1.2: {}
|
||||||
|
|
||||||
|
get-intrinsic@1.2.6:
|
||||||
|
dependencies:
|
||||||
|
call-bind-apply-helpers: 1.0.1
|
||||||
|
dunder-proto: 1.0.1
|
||||||
|
es-define-property: 1.0.1
|
||||||
|
es-errors: 1.3.0
|
||||||
|
es-object-atoms: 1.0.0
|
||||||
|
function-bind: 1.1.2
|
||||||
|
gopd: 1.2.0
|
||||||
|
has-symbols: 1.1.0
|
||||||
|
hasown: 2.0.2
|
||||||
|
math-intrinsics: 1.1.0
|
||||||
|
|
||||||
|
gopd@1.2.0: {}
|
||||||
|
|
||||||
|
has-symbols@1.1.0: {}
|
||||||
|
|
||||||
|
hasown@2.0.2:
|
||||||
|
dependencies:
|
||||||
|
function-bind: 1.1.2
|
||||||
|
|
||||||
|
http-errors@2.0.0:
|
||||||
|
dependencies:
|
||||||
|
depd: 2.0.0
|
||||||
|
inherits: 2.0.4
|
||||||
|
setprototypeof: 1.2.0
|
||||||
|
statuses: 2.0.1
|
||||||
|
toidentifier: 1.0.1
|
||||||
|
|
||||||
|
iconv-lite@0.4.24:
|
||||||
|
dependencies:
|
||||||
|
safer-buffer: 2.1.2
|
||||||
|
|
||||||
|
inherits@2.0.4: {}
|
||||||
|
|
||||||
|
ipaddr.js@1.9.1: {}
|
||||||
|
|
||||||
|
lodash.clonedeep@4.5.0: {}
|
||||||
|
|
||||||
|
make-error@1.3.6: {}
|
||||||
|
|
||||||
|
math-intrinsics@1.1.0: {}
|
||||||
|
|
||||||
|
media-typer@0.3.0: {}
|
||||||
|
|
||||||
|
merge-descriptors@1.0.3: {}
|
||||||
|
|
||||||
|
methods@1.1.2: {}
|
||||||
|
|
||||||
|
mime-db@1.52.0: {}
|
||||||
|
|
||||||
|
mime-types@2.1.35:
|
||||||
|
dependencies:
|
||||||
|
mime-db: 1.52.0
|
||||||
|
|
||||||
|
mime@1.6.0: {}
|
||||||
|
|
||||||
|
ms@2.0.0: {}
|
||||||
|
|
||||||
|
ms@2.1.3: {}
|
||||||
|
|
||||||
|
negotiator@0.6.3: {}
|
||||||
|
|
||||||
|
object-inspect@1.13.3: {}
|
||||||
|
|
||||||
|
on-finished@2.4.1:
|
||||||
|
dependencies:
|
||||||
|
ee-first: 1.1.1
|
||||||
|
|
||||||
|
parseurl@1.3.3: {}
|
||||||
|
|
||||||
|
path-to-regexp@0.1.12: {}
|
||||||
|
|
||||||
|
playwright-core@1.49.1: {}
|
||||||
|
|
||||||
|
playwright@1.49.1:
|
||||||
|
dependencies:
|
||||||
|
playwright-core: 1.49.1
|
||||||
|
optionalDependencies:
|
||||||
|
fsevents: 2.3.2
|
||||||
|
|
||||||
|
proxy-addr@2.0.7:
|
||||||
|
dependencies:
|
||||||
|
forwarded: 0.2.0
|
||||||
|
ipaddr.js: 1.9.1
|
||||||
|
|
||||||
|
qs@6.13.0:
|
||||||
|
dependencies:
|
||||||
|
side-channel: 1.1.0
|
||||||
|
|
||||||
|
range-parser@1.2.1: {}
|
||||||
|
|
||||||
|
raw-body@2.5.2:
|
||||||
|
dependencies:
|
||||||
|
bytes: 3.1.2
|
||||||
|
http-errors: 2.0.0
|
||||||
|
iconv-lite: 0.4.24
|
||||||
|
unpipe: 1.0.0
|
||||||
|
|
||||||
|
safe-buffer@5.2.1: {}
|
||||||
|
|
||||||
|
safer-buffer@2.1.2: {}
|
||||||
|
|
||||||
|
send@0.19.0:
|
||||||
|
dependencies:
|
||||||
|
debug: 2.6.9
|
||||||
|
depd: 2.0.0
|
||||||
|
destroy: 1.2.0
|
||||||
|
encodeurl: 1.0.2
|
||||||
|
escape-html: 1.0.3
|
||||||
|
etag: 1.8.1
|
||||||
|
fresh: 0.5.2
|
||||||
|
http-errors: 2.0.0
|
||||||
|
mime: 1.6.0
|
||||||
|
ms: 2.1.3
|
||||||
|
on-finished: 2.4.1
|
||||||
|
range-parser: 1.2.1
|
||||||
|
statuses: 2.0.1
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- supports-color
|
||||||
|
|
||||||
|
serve-static@1.16.2:
|
||||||
|
dependencies:
|
||||||
|
encodeurl: 2.0.0
|
||||||
|
escape-html: 1.0.3
|
||||||
|
parseurl: 1.3.3
|
||||||
|
send: 0.19.0
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- supports-color
|
||||||
|
|
||||||
|
setprototypeof@1.2.0: {}
|
||||||
|
|
||||||
|
side-channel-list@1.0.0:
|
||||||
|
dependencies:
|
||||||
|
es-errors: 1.3.0
|
||||||
|
object-inspect: 1.13.3
|
||||||
|
|
||||||
|
side-channel-map@1.0.1:
|
||||||
|
dependencies:
|
||||||
|
call-bound: 1.0.3
|
||||||
|
es-errors: 1.3.0
|
||||||
|
get-intrinsic: 1.2.6
|
||||||
|
object-inspect: 1.13.3
|
||||||
|
|
||||||
|
side-channel-weakmap@1.0.2:
|
||||||
|
dependencies:
|
||||||
|
call-bound: 1.0.3
|
||||||
|
es-errors: 1.3.0
|
||||||
|
get-intrinsic: 1.2.6
|
||||||
|
object-inspect: 1.13.3
|
||||||
|
side-channel-map: 1.0.1
|
||||||
|
|
||||||
|
side-channel@1.1.0:
|
||||||
|
dependencies:
|
||||||
|
es-errors: 1.3.0
|
||||||
|
object-inspect: 1.13.3
|
||||||
|
side-channel-list: 1.0.0
|
||||||
|
side-channel-map: 1.0.1
|
||||||
|
side-channel-weakmap: 1.0.2
|
||||||
|
|
||||||
|
statuses@2.0.1: {}
|
||||||
|
|
||||||
|
toidentifier@1.0.1: {}
|
||||||
|
|
||||||
|
ts-node@10.9.2(@types/node@20.17.10)(typescript@5.7.2):
|
||||||
|
dependencies:
|
||||||
|
'@cspotcode/source-map-support': 0.8.1
|
||||||
|
'@tsconfig/node10': 1.0.11
|
||||||
|
'@tsconfig/node12': 1.0.11
|
||||||
|
'@tsconfig/node14': 1.0.3
|
||||||
|
'@tsconfig/node16': 1.0.4
|
||||||
|
'@types/node': 20.17.10
|
||||||
|
acorn: 8.14.0
|
||||||
|
acorn-walk: 8.3.4
|
||||||
|
arg: 4.1.3
|
||||||
|
create-require: 1.1.1
|
||||||
|
diff: 4.0.2
|
||||||
|
make-error: 1.3.6
|
||||||
|
typescript: 5.7.2
|
||||||
|
v8-compile-cache-lib: 3.0.1
|
||||||
|
yn: 3.1.1
|
||||||
|
|
||||||
|
type-is@1.6.18:
|
||||||
|
dependencies:
|
||||||
|
media-typer: 0.3.0
|
||||||
|
mime-types: 2.1.35
|
||||||
|
|
||||||
|
typescript@5.7.2: {}
|
||||||
|
|
||||||
|
undici-types@6.19.8: {}
|
||||||
|
|
||||||
|
unpipe@1.0.0: {}
|
||||||
|
|
||||||
|
user-agents@1.1.455:
|
||||||
|
dependencies:
|
||||||
|
lodash.clonedeep: 4.5.0
|
||||||
|
|
||||||
|
utils-merge@1.0.1: {}
|
||||||
|
|
||||||
|
v8-compile-cache-lib@3.0.1: {}
|
||||||
|
|
||||||
|
vary@1.1.2: {}
|
||||||
|
|
||||||
|
yn@3.1.1: {}
|
@ -104,6 +104,8 @@ async def root(body: UrlModel):
|
|||||||
json_compatible_item_data = {
|
json_compatible_item_data = {
|
||||||
"content": page_content,
|
"content": page_content,
|
||||||
"pageStatusCode": page_status_code,
|
"pageStatusCode": page_status_code,
|
||||||
"pageError": page_error
|
}
|
||||||
}
|
|
||||||
|
if page_error is not None:
|
||||||
|
json_compatible_item_data["pageError"] = page_error
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
return JSONResponse(content=json_compatible_item_data)
|
@ -145,6 +145,7 @@ class FirecrawlApp:
|
|||||||
f'{self.api_url}{endpoint}',
|
f'{self.api_url}{endpoint}',
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json=scrape_params,
|
json=scrape_params,
|
||||||
|
timeout=(scrape_params["timeout"] + 5000 if "timeout" in scrape_params else None),
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
try:
|
try:
|
||||||
@ -433,7 +434,7 @@ class FirecrawlApp:
|
|||||||
else:
|
else:
|
||||||
self._handle_error(response, 'map')
|
self._handle_error(response, 'map')
|
||||||
|
|
||||||
def batch_scrape_urls(self, urls: list[str],
|
def batch_scrape_urls(self, urls: List[str],
|
||||||
params: Optional[Dict[str, Any]] = None,
|
params: Optional[Dict[str, Any]] = None,
|
||||||
poll_interval: Optional[int] = 2,
|
poll_interval: Optional[int] = 2,
|
||||||
idempotency_key: Optional[str] = None) -> Any:
|
idempotency_key: Optional[str] = None) -> Any:
|
||||||
@ -441,7 +442,7 @@ class FirecrawlApp:
|
|||||||
Initiate a batch scrape job for the specified URLs using the Firecrawl API.
|
Initiate a batch scrape job for the specified URLs using the Firecrawl API.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
urls (list[str]): The URLs to scrape.
|
urls (List[str]): The URLs to scrape.
|
||||||
params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
|
params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
|
||||||
poll_interval (Optional[int]): Time in seconds between status checks when waiting for job completion. Defaults to 2 seconds.
|
poll_interval (Optional[int]): Time in seconds between status checks when waiting for job completion. Defaults to 2 seconds.
|
||||||
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||||
@ -476,12 +477,12 @@ class FirecrawlApp:
|
|||||||
self._handle_error(response, 'start batch scrape job')
|
self._handle_error(response, 'start batch scrape job')
|
||||||
|
|
||||||
|
|
||||||
def async_batch_scrape_urls(self, urls: list[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
|
def async_batch_scrape_urls(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Initiate a crawl job asynchronously.
|
Initiate a crawl job asynchronously.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
urls (list[str]): The URLs to scrape.
|
urls (List[str]): The URLs to scrape.
|
||||||
params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
|
params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
|
||||||
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||||
|
|
||||||
@ -505,12 +506,12 @@ class FirecrawlApp:
|
|||||||
else:
|
else:
|
||||||
self._handle_error(response, 'start batch scrape job')
|
self._handle_error(response, 'start batch scrape job')
|
||||||
|
|
||||||
def batch_scrape_urls_and_watch(self, urls: list[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
|
def batch_scrape_urls_and_watch(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
|
||||||
"""
|
"""
|
||||||
Initiate a batch scrape job and return a CrawlWatcher to monitor the job via WebSocket.
|
Initiate a batch scrape job and return a CrawlWatcher to monitor the job via WebSocket.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
urls (list[str]): The URLs to scrape.
|
urls (List[str]): The URLs to scrape.
|
||||||
params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
|
params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
|
||||||
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||||
|
|
||||||
@ -925,7 +926,7 @@ class FirecrawlApp:
|
|||||||
requests.RequestException: If the request fails after the specified retries.
|
requests.RequestException: If the request fails after the specified retries.
|
||||||
"""
|
"""
|
||||||
for attempt in range(retries):
|
for attempt in range(retries):
|
||||||
response = requests.post(url, headers=headers, json=data)
|
response = requests.post(url, headers=headers, json=data, timeout=((data["timeout"] + 5000) if "timeout" in data else None))
|
||||||
if response.status_code == 502:
|
if response.status_code == 502:
|
||||||
time.sleep(backoff_factor * (2 ** attempt))
|
time.sleep(backoff_factor * (2 ** attempt))
|
||||||
else:
|
else:
|
||||||
|
@ -13,13 +13,13 @@ x-common-service: &common-service
|
|||||||
|
|
||||||
services:
|
services:
|
||||||
playwright-service:
|
playwright-service:
|
||||||
build: apps/playwright-service
|
build: apps/playwright-service-ts
|
||||||
environment:
|
environment:
|
||||||
- PORT=3000
|
PORT: 3000
|
||||||
- PROXY_SERVER=${PROXY_SERVER}
|
PROXY_SERVER: ${PROXY_SERVER}
|
||||||
- PROXY_USERNAME=${PROXY_USERNAME}
|
PROXY_USERNAME: ${PROXY_USERNAME}
|
||||||
- PROXY_PASSWORD=${PROXY_PASSWORD}
|
PROXY_PASSWORD: ${PROXY_PASSWORD}
|
||||||
- BLOCK_MEDIA=${BLOCK_MEDIA}
|
BLOCK_MEDIA: ${BLOCK_MEDIA}
|
||||||
networks:
|
networks:
|
||||||
- backend
|
- backend
|
||||||
|
|
||||||
@ -28,7 +28,7 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
REDIS_URL: ${REDIS_URL:-redis://redis:6379}
|
REDIS_URL: ${REDIS_URL:-redis://redis:6379}
|
||||||
REDIS_RATE_LIMIT_URL: ${REDIS_URL:-redis://redis:6379}
|
REDIS_RATE_LIMIT_URL: ${REDIS_URL:-redis://redis:6379}
|
||||||
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000}
|
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape}
|
||||||
USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION}
|
USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION}
|
||||||
PORT: ${PORT:-3002}
|
PORT: ${PORT:-3002}
|
||||||
NUM_WORKERS_PER_QUEUE: ${NUM_WORKERS_PER_QUEUE}
|
NUM_WORKERS_PER_QUEUE: ${NUM_WORKERS_PER_QUEUE}
|
||||||
@ -51,6 +51,9 @@ services:
|
|||||||
SERPER_API_KEY: ${SERPER_API_KEY}
|
SERPER_API_KEY: ${SERPER_API_KEY}
|
||||||
SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY}
|
SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY}
|
||||||
LOGGING_LEVEL: ${LOGGING_LEVEL}
|
LOGGING_LEVEL: ${LOGGING_LEVEL}
|
||||||
|
PROXY_SERVER: ${PROXY_SERVER}
|
||||||
|
PROXY_USERNAME: ${PROXY_USERNAME}
|
||||||
|
PROXY_PASSWORD: ${PROXY_PASSWORD}
|
||||||
FLY_PROCESS_GROUP: app
|
FLY_PROCESS_GROUP: app
|
||||||
depends_on:
|
depends_on:
|
||||||
- redis
|
- redis
|
||||||
@ -64,7 +67,7 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
REDIS_URL: ${REDIS_URL:-redis://redis:6379}
|
REDIS_URL: ${REDIS_URL:-redis://redis:6379}
|
||||||
REDIS_RATE_LIMIT_URL: ${REDIS_URL:-redis://redis:6379}
|
REDIS_RATE_LIMIT_URL: ${REDIS_URL:-redis://redis:6379}
|
||||||
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000}
|
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape}
|
||||||
USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION}
|
USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION}
|
||||||
PORT: ${PORT:-3002}
|
PORT: ${PORT:-3002}
|
||||||
NUM_WORKERS_PER_QUEUE: ${NUM_WORKERS_PER_QUEUE}
|
NUM_WORKERS_PER_QUEUE: ${NUM_WORKERS_PER_QUEUE}
|
||||||
@ -85,6 +88,9 @@ services:
|
|||||||
HOST: ${HOST:-0.0.0.0}
|
HOST: ${HOST:-0.0.0.0}
|
||||||
SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL}
|
SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL}
|
||||||
LOGGING_LEVEL: ${LOGGING_LEVEL}
|
LOGGING_LEVEL: ${LOGGING_LEVEL}
|
||||||
|
PROXY_SERVER: ${PROXY_SERVER}
|
||||||
|
PROXY_USERNAME: ${PROXY_USERNAME}
|
||||||
|
PROXY_PASSWORD: ${PROXY_PASSWORD}
|
||||||
FLY_PROCESS_GROUP: worker
|
FLY_PROCESS_GROUP: worker
|
||||||
depends_on:
|
depends_on:
|
||||||
- redis
|
- redis
|
||||||
|
Loading…
x
Reference in New Issue
Block a user