mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 17:59:00 +08:00
Merge branch 'main' of https://github.com/mendableai/firecrawl
This commit is contained in:
commit
2151ca846c
@ -1,20 +0,0 @@
|
||||
name: Clean Every 30 Minutes Before 24h Completed Jobs
|
||||
on:
|
||||
schedule:
|
||||
- cron: '30 * * * *'
|
||||
|
||||
env:
|
||||
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
|
||||
|
||||
jobs:
|
||||
clean-jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Send GET request to clean jobs
|
||||
run: |
|
||||
response=$(curl --write-out '%{http_code}' --silent --output /dev/null --max-time 180 https://api.firecrawl.dev/admin/${{ secrets.BULL_AUTH_KEY }}/clean-before-24h-complete-jobs)
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "Failed to clean jobs. Response: $response"
|
||||
exit 1
|
||||
fi
|
||||
echo "Successfully cleaned jobs. Response: $response"
|
3
.github/workflows/deploy-image-staging.yml
vendored
3
.github/workflows/deploy-image-staging.yml
vendored
@ -4,9 +4,6 @@ env:
|
||||
DOTNET_VERSION: '6.0.x'
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- mog/webscraper-refactor
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
5
.github/workflows/deploy-image.yml
vendored
5
.github/workflows/deploy-image.yml
vendored
@ -2,12 +2,13 @@ name: Deploy Images to GHCR
|
||||
|
||||
env:
|
||||
DOTNET_VERSION: '6.0.x'
|
||||
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- apps/api/**
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
@ -29,5 +30,5 @@ jobs:
|
||||
|
||||
- name: 'Build Inventory Image'
|
||||
run: |
|
||||
docker build . --tag ghcr.io/mendableai/firecrawl:latest --secret id=SENTRY_AUTH_TOKEN
|
||||
docker build . --tag ghcr.io/mendableai/firecrawl:latest
|
||||
docker push ghcr.io/mendableai/firecrawl:latest
|
32
.github/workflows/publish-js-sdk.yml
vendored
Normal file
32
.github/workflows/publish-js-sdk.yml
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
name: Publish JS SDK
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- apps/js-sdk/firecrawl/package.json
|
||||
|
||||
env:
|
||||
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
name: Publish
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: "20"
|
||||
- name: Authenticate
|
||||
run: echo "//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}" > ~/.npmrc
|
||||
- name: Publish
|
||||
run: |
|
||||
npm publish
|
||||
sed -i 's/"name": "@mendable\/firecrawl-js"/"name": "@mendable\/firecrawl"/g' package.json
|
||||
npm publish
|
||||
sed -i 's/"name": "@mendable\/firecrawl-js"/"name": "firecrawl"/g' package.json
|
||||
npm publish
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
30
.github/workflows/test-js-sdk.yml
vendored
Normal file
30
.github/workflows/test-js-sdk.yml
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
name: JS SDK Test Suite
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- apps/js-sdk/firecrawl/**
|
||||
|
||||
env:
|
||||
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Run tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: "20"
|
||||
cache: "npm"
|
||||
cache-dependency-path: './apps/js-sdk/firecrawl/package-lock.json'
|
||||
- name: Install dependencies
|
||||
run: npm install
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
||||
- name: Run tests
|
||||
run: npm run test
|
||||
working-directory: ./apps/js-sdk/firecrawl
|
138
.github/workflows/test-server-self-host.yml
vendored
Normal file
138
.github/workflows/test-server-self-host.yml
vendored
Normal file
@ -0,0 +1,138 @@
|
||||
name: Self-hosted Server Test Suite
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- apps/api/**
|
||||
- apps/playwright-service-ts/**
|
||||
|
||||
env:
|
||||
PORT: 3002
|
||||
REDIS_URL: redis://localhost:6379
|
||||
HOST: 0.0.0.0
|
||||
ENV: ${{ secrets.ENV }}
|
||||
TEST_SUITE_SELF_HOSTED: true
|
||||
USE_GO_MARKDOWN_PARSER: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Run tests
|
||||
strategy:
|
||||
matrix:
|
||||
ai: ["openai", "no-ai"]
|
||||
search: ["searxng", "google"]
|
||||
engine: ["playwright", "fetch"]
|
||||
proxy: ["proxy", "no-proxy"]
|
||||
fail-fast: false
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
redis:
|
||||
image: redis
|
||||
ports:
|
||||
- 6379:6379
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ matrix.ai == 'openai' && secrets.OPENAI_API_KEY || '' }}
|
||||
SEARXNG_ENDPOINT: ${{ matrix.search == 'searxng' && 'http://localhost:3434' || '' }}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${{ matrix.engine == 'playwright' && 'http://localhost:3003/scrape' || '' }}
|
||||
PROXY_SERVER: ${{ matrix.proxy == 'proxy' && secrets.PROXY_SERVER || '' }}
|
||||
PROXY_USERNAME: ${{ matrix.proxy == 'proxy' && secrets.PROXY_USERNAME || '' }}
|
||||
PROXY_PASSWORD: ${{ matrix.proxy == 'proxy' && secrets.PROXY_PASSWORD || '' }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: "20"
|
||||
cache: "pnpm"
|
||||
cache-dependency-path: './apps/api/pnpm-lock.yaml'
|
||||
- name: Install dependencies
|
||||
run: pnpm install
|
||||
working-directory: ./apps/api
|
||||
- name: Install Playwright dependencies
|
||||
if: matrix.engine == 'playwright'
|
||||
run: |
|
||||
pnpm install
|
||||
pnpm exec playwright install-deps
|
||||
pnpm exec playwright install
|
||||
working-directory: ./apps/playwright-service-ts
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.19'
|
||||
cache-dependency-path: ./apps/api/sharedLibs/go-html-to-md/go.sum
|
||||
- name: Build go-html-to-md
|
||||
run: |
|
||||
go mod tidy
|
||||
go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go
|
||||
chmod +x html-to-markdown.so
|
||||
working-directory: ./apps/api/sharedLibs/go-html-to-md
|
||||
- name: Set up SearXNG
|
||||
if: matrix.search == 'searxng'
|
||||
run: |
|
||||
mkdir searxng
|
||||
|
||||
echo "use_default_settings: true
|
||||
search:
|
||||
formats: [html, json, csv]
|
||||
server:
|
||||
secret_key: 'fcsecret'" > searxng/settings.yml
|
||||
|
||||
docker run -d -p 3434:8080 -v "${PWD}/searxng:/etc/searxng" --name searxng searxng/searxng
|
||||
pnpx wait-on tcp:3434 -t 30s
|
||||
working-directory: ./
|
||||
- name: Start server
|
||||
run: npm start > api.log 2>&1 &
|
||||
working-directory: ./apps/api
|
||||
- name: Start worker
|
||||
run: npm run workers > worker.log 2>&1 &
|
||||
working-directory: ./apps/api
|
||||
- name: Start playwright
|
||||
if: matrix.engine == 'playwright'
|
||||
run: npm run dev > playwright.log 2>&1 &
|
||||
working-directory: ./apps/playwright-service-ts
|
||||
env:
|
||||
PORT: 3003
|
||||
- name: Wait for server
|
||||
run: pnpx wait-on tcp:3002 -t 15s
|
||||
- name: Wait for playwright
|
||||
if: matrix.engine == 'playwright'
|
||||
run: pnpx wait-on tcp:3003 -t 15s
|
||||
- name: Run snippet tests
|
||||
run: |
|
||||
npm run test:snips
|
||||
working-directory: ./apps/api
|
||||
- name: Kill instances
|
||||
if: always()
|
||||
run: pkill -9 node
|
||||
- name: Kill SearXNG
|
||||
if: always() && matrix.search == 'searxng'
|
||||
run: |
|
||||
docker logs searxng > searxng/searxng.log 2>&1
|
||||
docker kill searxng
|
||||
working-directory: ./
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: always()
|
||||
with:
|
||||
name: Logs (${{ matrix.ai }}, ${{ matrix.search }}, ${{ matrix.engine }}, ${{ matrix.proxy }})
|
||||
path: |
|
||||
./apps/api/api.log
|
||||
./apps/api/worker.log
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: always() && matrix.playwright
|
||||
with:
|
||||
name: Playwright Logs (${{ matrix.ai }}, ${{ matrix.search }}, ${{ matrix.proxy }})
|
||||
path: |
|
||||
./apps/playwright-service-ts/playwright.log
|
||||
- uses: actions/upload-artifact@v4
|
||||
if: always() && matrix.search == 'searxng'
|
||||
with:
|
||||
name: SearXNG (${{ matrix.ai }}, ${{ matrix.engine }}, ${{ matrix.proxy }})
|
||||
path: |
|
||||
./searxng/searxng.log
|
||||
./searxng/settings.yml
|
@ -1,8 +1,11 @@
|
||||
name: CI/CD
|
||||
name: Server Test Suite
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- apps/api/**
|
||||
# schedule:
|
||||
# - cron: '0 */4 * * *'
|
||||
|
||||
@ -29,10 +32,11 @@ env:
|
||||
USE_DB_AUTHENTICATION: ${{ secrets.USE_DB_AUTHENTICATION }}
|
||||
SERPER_API_KEY: ${{ secrets.SERPER_API_KEY }}
|
||||
ENV: ${{ secrets.ENV }}
|
||||
USE_GO_MARKDOWN_PARSER: true
|
||||
|
||||
jobs:
|
||||
pre-deploy:
|
||||
name: Pre-deploy checks
|
||||
test:
|
||||
name: Run tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
redis:
|
||||
@ -47,15 +51,30 @@ jobs:
|
||||
oauth-client-id: ${{ secrets.TS_OAUTH_CLIENT_ID }}
|
||||
oauth-secret: ${{ secrets.TS_OAUTH_SECRET }}
|
||||
tags: tag:ci
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 10
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: "20"
|
||||
- name: Install pnpm
|
||||
run: npm install -g pnpm
|
||||
cache: "pnpm"
|
||||
cache-dependency-path: './apps/api/pnpm-lock.yaml'
|
||||
- name: Install dependencies
|
||||
run: pnpm install
|
||||
working-directory: ./apps/api
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.19'
|
||||
cache-dependency-path: ./apps/api/sharedLibs/go-html-to-md/go.sum
|
||||
- name: Build go-html-to-md
|
||||
run: |
|
||||
go mod tidy
|
||||
go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go
|
||||
chmod +x html-to-markdown.so
|
||||
working-directory: ./apps/api/sharedLibs/go-html-to-md
|
||||
- name: Start the application
|
||||
run: npm start &
|
||||
working-directory: ./apps/api
|
@ -95,7 +95,7 @@ curl -X POST https://api.firecrawl.dev/v1/crawl \
|
||||
-H 'Authorization: Bearer fc-YOUR_API_KEY' \
|
||||
-d '{
|
||||
"url": "https://docs.firecrawl.dev",
|
||||
"limit": 100,
|
||||
"limit": 10,
|
||||
"scrapeOptions": {
|
||||
"formats": ["markdown", "html"]
|
||||
}
|
||||
|
100
SELF_HOST.md
100
SELF_HOST.md
@ -34,62 +34,72 @@ Self-hosting Firecrawl is ideal for those who need full control over their scrap
|
||||
|
||||
2. Set environment variables
|
||||
|
||||
Create an `.env` in the root directory you can copy over the template in `apps/api/.env.example`
|
||||
|
||||
To start, we won't set up authentication or any optional subservices (pdf parsing, JS blocking support, AI features)
|
||||
Create an `.env` in the root directory using the template below.
|
||||
|
||||
`.env:`
|
||||
```
|
||||
# ===== Required ENVS ======
|
||||
NUM_WORKERS_PER_QUEUE=8
|
||||
PORT=3002
|
||||
HOST=0.0.0.0
|
||||
REDIS_URL=redis://redis:6379
|
||||
REDIS_RATE_LIMIT_URL=redis://redis:6379
|
||||
|
||||
## To turn on DB authentication, you need to set up Supabase.
|
||||
# To turn on DB authentication, you need to set up Supabase.
|
||||
USE_DB_AUTHENTICATION=false
|
||||
|
||||
# ===== Optional ENVS ======
|
||||
|
||||
# Supabase Setup (used to support DB authentication, advanced logging, etc.)
|
||||
SUPABASE_ANON_TOKEN=
|
||||
SUPABASE_URL=
|
||||
SUPABASE_SERVICE_TOKEN=
|
||||
## === AI features (JSON format on scrape, /extract API) ===
|
||||
# Provide your OpenAI API key here to enable AI features
|
||||
# OPENAI_API_KEY=
|
||||
|
||||
# Other Optionals
|
||||
TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
|
||||
SCRAPING_BEE_API_KEY= # use if you'd like to use as a fallback scraper
|
||||
OPENAI_API_KEY= # add for LLM-dependent features (e.g., image alt generation)
|
||||
BULL_AUTH_KEY= @
|
||||
PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback
|
||||
LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
|
||||
SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
|
||||
POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
|
||||
POSTHOG_HOST= # set if you'd like to send posthog events like job logs
|
||||
## === Proxy ===
|
||||
# PROXY_SERVER can be a full URL (e.g. http://0.1.2.3:1234) or just an IP and port combo (e.g. 0.1.2.3:1234)
|
||||
# Do not uncomment PROXY_USERNAME and PROXY_PASSWORD if your proxy is unauthenticated
|
||||
# PROXY_SERVER=
|
||||
# PROXY_USERNAME=
|
||||
# PROXY_PASSWORD=
|
||||
|
||||
## === /search API ===
|
||||
# By default, the /search API will use Google search.
|
||||
|
||||
# You can specify a SearXNG server with the JSON format enabled, if you'd like to use that instead of direct Google.
|
||||
# You can also customize the engines and categories parameters, but the defaults should also work just fine.
|
||||
# SEARXNG_ENDPOINT=http://your.searxng.server
|
||||
# SEARXNG_ENGINES=
|
||||
# SEARXNG_CATEGORIES=
|
||||
|
||||
## === Other ===
|
||||
|
||||
# Supabase Setup (used to support DB authentication, advanced logging, etc.)
|
||||
# SUPABASE_ANON_TOKEN=
|
||||
# SUPABASE_URL=
|
||||
# SUPABASE_SERVICE_TOKEN=
|
||||
|
||||
# Use if you've set up authentication and want to test with a real API key
|
||||
# TEST_API_KEY=
|
||||
|
||||
# You can add this to enable ScrapingBee as a fallback scraping engine.
|
||||
# SCRAPING_BEE_API_KEY=
|
||||
|
||||
# This key lets you access the queue admin panel. Change this if your deployment is publicly accessible.
|
||||
BULL_AUTH_KEY=CHANGEME
|
||||
|
||||
# This is now autoconfigured by the docker-compose.yaml. You shouldn't need to set it.
|
||||
# PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape
|
||||
# REDIS_URL=redis://redis:6379
|
||||
# REDIS_RATE_LIMIT_URL=redis://redis:6379
|
||||
|
||||
# Set if you have a llamaparse key you'd like to use to parse pdfs
|
||||
# LLAMAPARSE_API_KEY=
|
||||
|
||||
# Set if you'd like to send server health status messages to Slack
|
||||
# SLACK_WEBHOOK_URL=
|
||||
|
||||
# Set if you'd like to send posthog events like job logs
|
||||
# POSTHOG_API_KEY=
|
||||
# POSTHOG_HOST=
|
||||
```
|
||||
|
||||
3. *(Optional) Running with TypeScript Playwright Service*
|
||||
|
||||
* Update the `docker-compose.yml` file to change the Playwright service:
|
||||
|
||||
```plaintext
|
||||
build: apps/playwright-service
|
||||
```
|
||||
TO
|
||||
```plaintext
|
||||
build: apps/playwright-service-ts
|
||||
```
|
||||
|
||||
* Set the `PLAYWRIGHT_MICROSERVICE_URL` in your `.env` file:
|
||||
|
||||
```plaintext
|
||||
PLAYWRIGHT_MICROSERVICE_URL=http://localhost:3000/scrape
|
||||
```
|
||||
|
||||
* Don't forget to set the proxy server in your `.env` file as needed.
|
||||
|
||||
4. Build and run the Docker containers:
|
||||
3. Build and run the Docker containers:
|
||||
|
||||
```bash
|
||||
docker compose build
|
||||
@ -98,9 +108,9 @@ POSTHOG_HOST= # set if you'd like to send posthog events like job logs
|
||||
|
||||
This will run a local instance of Firecrawl which can be accessed at `http://localhost:3002`.
|
||||
|
||||
You should be able to see the Bull Queue Manager UI on `http://localhost:3002/admin/@/queues`.
|
||||
You should be able to see the Bull Queue Manager UI on `http://localhost:3002/admin/CHANGEME/queues`.
|
||||
|
||||
5. *(Optional)* Test the API
|
||||
4. *(Optional)* Test the API
|
||||
|
||||
If you’d like to test the crawl endpoint, you can run this:
|
||||
|
||||
@ -108,7 +118,7 @@ If you’d like to test the crawl endpoint, you can run this:
|
||||
curl -X POST http://localhost:3002/v1/crawl \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"url": "https://mendable.ai"
|
||||
"url": "https://firecrawl.dev"
|
||||
}'
|
||||
```
|
||||
|
||||
|
@ -19,8 +19,7 @@ RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile
|
||||
|
||||
RUN apt-get clean && apt-get update -qq && apt-get install -y ca-certificates && update-ca-certificates
|
||||
RUN pnpm install
|
||||
RUN --mount=type=secret,id=SENTRY_AUTH_TOKEN \
|
||||
bash -c 'export SENTRY_AUTH_TOKEN="$(cat /run/secrets/SENTRY_AUTH_TOKEN)"; if [ -z $SENTRY_AUTH_TOKEN ]; then pnpm run build:nosentry; else pnpm run build; fi'
|
||||
RUN pnpm run build
|
||||
|
||||
# Install Go
|
||||
FROM golang:1.19 AS go-base
|
||||
|
@ -9,7 +9,7 @@
|
||||
"format": "prettier --write \"src/**/*.(js|ts)\"",
|
||||
"flyio": "node dist/src/index.js",
|
||||
"start:dev": "nodemon --exec ts-node src/index.ts",
|
||||
"build": "tsc && pnpm sentry:sourcemaps",
|
||||
"build": "tsc",
|
||||
"build:nosentry": "tsc",
|
||||
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
|
||||
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
|
||||
@ -56,6 +56,7 @@
|
||||
"typescript": "^5.4.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"jsdom": "^26.0.0",
|
||||
"@anthropic-ai/sdk": "^0.24.3",
|
||||
"@apidevtools/json-schema-ref-parser": "^11.7.3",
|
||||
"@brillout/import": "^0.2.2",
|
||||
|
286
apps/api/pnpm-lock.yaml
generated
286
apps/api/pnpm-lock.yaml
generated
@ -125,6 +125,9 @@ importers:
|
||||
joplin-turndown-plugin-gfm:
|
||||
specifier: ^1.0.12
|
||||
version: 1.0.12
|
||||
jsdom:
|
||||
specifier: ^26.0.0
|
||||
version: 26.0.0
|
||||
json-schema-to-zod:
|
||||
specifier: ^2.3.0
|
||||
version: 2.3.0
|
||||
@ -136,7 +139,7 @@ importers:
|
||||
version: 2.9.0
|
||||
langchain:
|
||||
specifier: ^0.2.8
|
||||
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
languagedetect:
|
||||
specifier: ^2.0.0
|
||||
version: 2.0.0
|
||||
@ -332,6 +335,9 @@ packages:
|
||||
resolution: {integrity: sha512-WApSdLdXEBb/1FUPca2lteASewEfpjEYJ8oXZP+0gExK5qSfsEKBKcA+WjY6Q4wvXwyv0+W6Kvc372pSceib9w==}
|
||||
engines: {node: '>= 16'}
|
||||
|
||||
'@asamuzakjp/css-color@2.8.3':
|
||||
resolution: {integrity: sha512-GIc76d9UI1hCvOATjZPyHFmE5qhRccp3/zGfMPapK3jBi+yocEzp6BBB0UnfRYP9NP4FANqUZYb0hnfs3TM3hw==}
|
||||
|
||||
'@aws-crypto/crc32@3.0.0':
|
||||
resolution: {integrity: sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==}
|
||||
|
||||
@ -685,6 +691,34 @@ packages:
|
||||
resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
'@csstools/color-helpers@5.0.1':
|
||||
resolution: {integrity: sha512-MKtmkA0BX87PKaO1NFRTFH+UnkgnmySQOvNxJubsadusqPEC2aJ9MOQiMceZJJ6oitUl/i0L6u0M1IrmAOmgBA==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
'@csstools/css-calc@2.1.1':
|
||||
resolution: {integrity: sha512-rL7kaUnTkL9K+Cvo2pnCieqNpTKgQzy5f+N+5Iuko9HAoasP+xgprVh7KN/MaJVvVL1l0EzQq2MoqBHKSrDrag==}
|
||||
engines: {node: '>=18'}
|
||||
peerDependencies:
|
||||
'@csstools/css-parser-algorithms': ^3.0.4
|
||||
'@csstools/css-tokenizer': ^3.0.3
|
||||
|
||||
'@csstools/css-color-parser@3.0.7':
|
||||
resolution: {integrity: sha512-nkMp2mTICw32uE5NN+EsJ4f5N+IGFeCFu4bGpiKgb2Pq/7J/MpyLBeQ5ry4KKtRFZaYs6sTmcMYrSRIyj5DFKA==}
|
||||
engines: {node: '>=18'}
|
||||
peerDependencies:
|
||||
'@csstools/css-parser-algorithms': ^3.0.4
|
||||
'@csstools/css-tokenizer': ^3.0.3
|
||||
|
||||
'@csstools/css-parser-algorithms@3.0.4':
|
||||
resolution: {integrity: sha512-Up7rBoV77rv29d3uKHUIVubz1BTcgyUK72IvCQAbfbMv584xHcGKCKbWh7i8hPrRJ7qU4Y8IO3IY9m+iTB7P3A==}
|
||||
engines: {node: '>=18'}
|
||||
peerDependencies:
|
||||
'@csstools/css-tokenizer': ^3.0.3
|
||||
|
||||
'@csstools/css-tokenizer@3.0.3':
|
||||
resolution: {integrity: sha512-UJnjoFsmxfKUdNYdWgOB0mWUypuLvAfQPH1+pyvRJs6euowbFkFC6P13w1l8mJyi3vxYMxc9kld5jZEGRQs6bw==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
'@dabh/diagnostics@2.0.3':
|
||||
resolution: {integrity: sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA==}
|
||||
|
||||
@ -1715,6 +1749,10 @@ packages:
|
||||
resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
agent-base@7.1.3:
|
||||
resolution: {integrity: sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
agentkeepalive@4.5.0:
|
||||
resolution: {integrity: sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==}
|
||||
engines: {node: '>= 8.0.0'}
|
||||
@ -2141,6 +2179,10 @@ packages:
|
||||
resolution: {integrity: sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==}
|
||||
engines: {node: '>= 6'}
|
||||
|
||||
cssstyle@4.2.1:
|
||||
resolution: {integrity: sha512-9+vem03dMXG7gDmZ62uqmRiMRNtinIZ9ZyuF6BdxzfOD+FdN5hretzynkn0ReS2DO2GSw76RWHs0UmJPI2zUjw==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
csv-parse@5.5.6:
|
||||
resolution: {integrity: sha512-uNpm30m/AGSkLxxy7d9yRXpJQFrZzVWLFBkS+6ngPcZkw/5k3L/jjFuj7tVnEpRn+QgmiXr21nDlhCiUK4ij2A==}
|
||||
|
||||
@ -2152,6 +2194,10 @@ packages:
|
||||
resolution: {integrity: sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
data-urls@5.0.0:
|
||||
resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
date-fns@3.6.0:
|
||||
resolution: {integrity: sha512-fRHTG8g/Gif+kSh50gaGEdToemgfj74aRX3swtiouboip5JDLAyDE9F11nHMIcvOaXeOC6D7SpNhi7uFyB7Uww==}
|
||||
|
||||
@ -2197,6 +2243,9 @@ packages:
|
||||
resolution: {integrity: sha512-9iE1PgSik9HeIIw2JO94IidnE3eBoQrFJ3w7sFuzSX4DpmZ3v5sZpUiV5Swcf6mQEF+Y0ru8Neo+p+nyh2J+hQ==}
|
||||
engines: {node: '>=10'}
|
||||
|
||||
decimal.js@10.5.0:
|
||||
resolution: {integrity: sha512-8vDa8Qxvr/+d94hSh5P3IJwI5t8/c0KsMp+g8bNw9cY2icONa5aPfvKeieW1WlG0WQYwwhJ7mjui2xtiePQSXw==}
|
||||
|
||||
dedent@1.5.3:
|
||||
resolution: {integrity: sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==}
|
||||
peerDependencies:
|
||||
@ -2510,6 +2559,10 @@ packages:
|
||||
resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==}
|
||||
engines: {node: '>= 6'}
|
||||
|
||||
form-data@4.0.1:
|
||||
resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==}
|
||||
engines: {node: '>= 6'}
|
||||
|
||||
formdata-node@4.4.1:
|
||||
resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==}
|
||||
engines: {node: '>= 12.20'}
|
||||
@ -2647,6 +2700,10 @@ packages:
|
||||
resolution: {integrity: sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
html-encoding-sniffer@4.0.0:
|
||||
resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
html-escaper@2.0.2:
|
||||
resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
|
||||
|
||||
@ -2686,6 +2743,10 @@ packages:
|
||||
resolution: {integrity: sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
https-proxy-agent@7.0.6:
|
||||
resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==}
|
||||
engines: {node: '>= 14'}
|
||||
|
||||
human-signals@2.1.0:
|
||||
resolution: {integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==}
|
||||
engines: {node: '>=10.17.0'}
|
||||
@ -2798,6 +2859,9 @@ packages:
|
||||
resolution: {integrity: sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
is-potential-custom-element-name@1.0.1:
|
||||
resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==}
|
||||
|
||||
is-retry-allowed@2.2.0:
|
||||
resolution: {integrity: sha512-XVm7LOeLpTW4jV19QSH38vkswxoLud8sQ57YwJVTPWdiaI9I8keEhGFpBlslyVsgdQy4Opg8QOLb8YRgsyZiQg==}
|
||||
engines: {node: '>=10'}
|
||||
@ -3012,6 +3076,15 @@ packages:
|
||||
jsbn@1.1.0:
|
||||
resolution: {integrity: sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==}
|
||||
|
||||
jsdom@26.0.0:
|
||||
resolution: {integrity: sha512-BZYDGVAIriBWTpIxYzrXjv3E/4u8+/pSG5bQdIYCbNCGOvsPkDQfTVLAIXAf9ETdCpduCVTkDe2NNZ8NIwUVzw==}
|
||||
engines: {node: '>=18'}
|
||||
peerDependencies:
|
||||
canvas: ^3.0.0
|
||||
peerDependenciesMeta:
|
||||
canvas:
|
||||
optional: true
|
||||
|
||||
jsesc@2.5.2:
|
||||
resolution: {integrity: sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==}
|
||||
engines: {node: '>=4'}
|
||||
@ -3298,6 +3371,9 @@ packages:
|
||||
resolution: {integrity: sha512-CQl19J/g+Hbjbv4Y3mFNNXFEL/5t/KCg8POCuUqd4rMKjGG+j1ybER83hxV58zL+dFI1PTkt3GNFSHRt+d8qEQ==}
|
||||
engines: {node: 14 || >=16.14}
|
||||
|
||||
lru-cache@10.4.3:
|
||||
resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
|
||||
|
||||
lru-cache@5.1.1:
|
||||
resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==}
|
||||
|
||||
@ -3588,6 +3664,9 @@ packages:
|
||||
resolution: {integrity: sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==}
|
||||
engines: {node: '>=8'}
|
||||
|
||||
nwsapi@2.2.16:
|
||||
resolution: {integrity: sha512-F1I/bimDpj3ncaNDhfyMWuFqmQDBwDB0Fogc2qpL3BWvkQteFD/8BzWuIRl83rq0DXfm8SGt/HFhLXZyljTXcQ==}
|
||||
|
||||
object-assign@4.1.1:
|
||||
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
@ -3697,6 +3776,9 @@ packages:
|
||||
parse5@7.1.2:
|
||||
resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==}
|
||||
|
||||
parse5@7.2.1:
|
||||
resolution: {integrity: sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==}
|
||||
|
||||
parseley@0.12.1:
|
||||
resolution: {integrity: sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==}
|
||||
|
||||
@ -4015,6 +4097,9 @@ packages:
|
||||
resolution: {integrity: sha512-s+pyvQeIKIZ0dx5iJiQk1tPLJAWln39+MI5jtM8wnyws+G5azk+dMnMX0qfbqNetKKNgcWWOdi0sfm+FbQbgdQ==}
|
||||
engines: {node: '>=10.0.0'}
|
||||
|
||||
rrweb-cssom@0.8.0:
|
||||
resolution: {integrity: sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw==}
|
||||
|
||||
rusha@0.8.14:
|
||||
resolution: {integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==}
|
||||
|
||||
@ -4034,6 +4119,10 @@ packages:
|
||||
sax@1.4.1:
|
||||
resolution: {integrity: sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==}
|
||||
|
||||
saxes@6.0.0:
|
||||
resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==}
|
||||
engines: {node: '>=v12.22.7'}
|
||||
|
||||
scheduler@0.23.2:
|
||||
resolution: {integrity: sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==}
|
||||
|
||||
@ -4260,6 +4349,9 @@ packages:
|
||||
resolution: {integrity: sha512-SzRP5LQ6Ts2G5NyAa/jg16s8e3R7rfdFjizy1zeoecYWw+nGL+YA1xZvW/+iJmidBGSdLkuvdwTYEyJEb+EiUw==}
|
||||
engines: {node: '>=0.2.6'}
|
||||
|
||||
symbol-tree@3.2.4:
|
||||
resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==}
|
||||
|
||||
systeminformation@5.22.11:
|
||||
resolution: {integrity: sha512-aLws5yi4KCHTb0BVvbodQY5bY8eW4asMRDTxTW46hqw9lGjACX6TlLdJrkdoHYRB0qs+MekqEq1zG7WDnWE8Ug==}
|
||||
engines: {node: '>=8.0.0'}
|
||||
@ -4315,6 +4407,10 @@ packages:
|
||||
resolution: {integrity: sha512-r0eojU4bI8MnHr8c5bNo7lJDdI2qXlWWJk6a9EAFG7vbhTjElYhBVS3/miuE0uOuoLdb8Mc/rVfsmm6eo5o9GA==}
|
||||
hasBin: true
|
||||
|
||||
tough-cookie@5.1.1:
|
||||
resolution: {integrity: sha512-Ek7HndSVkp10hmHP9V4qZO1u+pn1RU5sI0Fw+jCU3lyvuMZcgqsNgc6CmJJZyByK4Vm/qotGRJlfgAX8q+4JiA==}
|
||||
engines: {node: '>=16'}
|
||||
|
||||
tr46@0.0.3:
|
||||
resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
|
||||
|
||||
@ -4322,6 +4418,10 @@ packages:
|
||||
resolution: {integrity: sha512-2lv/66T7e5yNyhAAC4NaKe5nVavzuGJQVVtRYLyQ2OI8tsJ61PMLlelehb0wi2Hx6+hT/OJUWZcw8MjlSRnxvw==}
|
||||
engines: {node: '>=14'}
|
||||
|
||||
tr46@5.0.0:
|
||||
resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
triple-beam@1.4.1:
|
||||
resolution: {integrity: sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg==}
|
||||
engines: {node: '>= 14.0.0'}
|
||||
@ -4483,6 +4583,10 @@ packages:
|
||||
resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
w3c-xmlserializer@5.0.0:
|
||||
resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
walker@1.0.8:
|
||||
resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==}
|
||||
|
||||
@ -4505,13 +4609,25 @@ packages:
|
||||
resolution: {integrity: sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
whatwg-encoding@3.1.1:
|
||||
resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
whatwg-fetch@3.6.20:
|
||||
resolution: {integrity: sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==}
|
||||
|
||||
whatwg-mimetype@4.0.0:
|
||||
resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
whatwg-url@13.0.0:
|
||||
resolution: {integrity: sha512-9WWbymnqj57+XEuqADHrCJ2eSXzn8WXIW/YSGaZtb2WKAInQ6CHfaUUcTyyver0p8BDg5StLQq8h1vtZuwmOig==}
|
||||
engines: {node: '>=16'}
|
||||
|
||||
whatwg-url@14.1.1:
|
||||
resolution: {integrity: sha512-mDGf9diDad/giZ/Sm9Xi2YcyzaFpbdLpJPr+E9fSkyQ7KpQD4SdFcugkRQYzhmfI4KeV4Qpnn2sKPdo+kmsgRQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
whatwg-url@5.0.0:
|
||||
resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==}
|
||||
|
||||
@ -4583,6 +4699,10 @@ packages:
|
||||
utf-8-validate:
|
||||
optional: true
|
||||
|
||||
xml-name-validator@5.0.0:
|
||||
resolution: {integrity: sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
xml2js@0.6.2:
|
||||
resolution: {integrity: sha512-T4rieHaC1EXcES0Kxxj4JWgaUQHDk+qwHcYOCFHfiwKz7tOVPLq7Hjq9dM1WCMhylqMEfP7hMcOIChvotiZegA==}
|
||||
engines: {node: '>=4.0.0'}
|
||||
@ -4595,6 +4715,9 @@ packages:
|
||||
resolution: {integrity: sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==}
|
||||
engines: {node: '>=4.0'}
|
||||
|
||||
xmlchars@2.2.0:
|
||||
resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==}
|
||||
|
||||
xtend@4.0.2:
|
||||
resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==}
|
||||
engines: {node: '>=0.4'}
|
||||
@ -4675,6 +4798,14 @@ snapshots:
|
||||
'@types/json-schema': 7.0.15
|
||||
js-yaml: 4.1.0
|
||||
|
||||
'@asamuzakjp/css-color@2.8.3':
|
||||
dependencies:
|
||||
'@csstools/css-calc': 2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-color-parser': 3.0.7(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-tokenizer': 3.0.3
|
||||
lru-cache: 10.4.3
|
||||
|
||||
'@aws-crypto/crc32@3.0.0':
|
||||
dependencies:
|
||||
'@aws-crypto/util': 3.0.0
|
||||
@ -5413,6 +5544,26 @@ snapshots:
|
||||
dependencies:
|
||||
'@jridgewell/trace-mapping': 0.3.9
|
||||
|
||||
'@csstools/color-helpers@5.0.1': {}
|
||||
|
||||
'@csstools/css-calc@2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)':
|
||||
dependencies:
|
||||
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-tokenizer': 3.0.3
|
||||
|
||||
'@csstools/css-color-parser@3.0.7(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)':
|
||||
dependencies:
|
||||
'@csstools/color-helpers': 5.0.1
|
||||
'@csstools/css-calc': 2.1.1(@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3))(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-parser-algorithms': 3.0.4(@csstools/css-tokenizer@3.0.3)
|
||||
'@csstools/css-tokenizer': 3.0.3
|
||||
|
||||
'@csstools/css-parser-algorithms@3.0.4(@csstools/css-tokenizer@3.0.3)':
|
||||
dependencies:
|
||||
'@csstools/css-tokenizer': 3.0.3
|
||||
|
||||
'@csstools/css-tokenizer@3.0.3': {}
|
||||
|
||||
'@dabh/diagnostics@2.0.3':
|
||||
dependencies:
|
||||
colorspace: 1.1.4
|
||||
@ -5642,13 +5793,13 @@ snapshots:
|
||||
|
||||
'@jsdevtools/ono@7.1.3': {}
|
||||
|
||||
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||
'@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||
dependencies:
|
||||
ansi-styles: 5.2.0
|
||||
camelcase: 6.3.0
|
||||
decamelize: 1.2.0
|
||||
js-tiktoken: 1.0.12
|
||||
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
|
||||
langsmith: 0.1.34(7lljbsleilzgkaubvlq4ipicvq)
|
||||
ml-distance: 4.0.1
|
||||
mustache: 4.2.0
|
||||
p-queue: 6.6.2
|
||||
@ -5660,9 +5811,9 @@ snapshots:
|
||||
- langchain
|
||||
- openai
|
||||
|
||||
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||
'@langchain/openai@0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
js-tiktoken: 1.0.12
|
||||
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||
zod: 3.23.8
|
||||
@ -5671,9 +5822,9 @@ snapshots:
|
||||
- encoding
|
||||
- langchain
|
||||
|
||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))':
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
js-tiktoken: 1.0.12
|
||||
transitivePeerDependencies:
|
||||
- langchain
|
||||
@ -6811,6 +6962,8 @@ snapshots:
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
agent-base@7.1.3: {}
|
||||
|
||||
agentkeepalive@4.5.0:
|
||||
dependencies:
|
||||
humanize-ms: 1.2.1
|
||||
@ -7321,12 +7474,22 @@ snapshots:
|
||||
|
||||
css-what@6.1.0: {}
|
||||
|
||||
cssstyle@4.2.1:
|
||||
dependencies:
|
||||
'@asamuzakjp/css-color': 2.8.3
|
||||
rrweb-cssom: 0.8.0
|
||||
|
||||
csv-parse@5.5.6: {}
|
||||
|
||||
data-uri-to-buffer@4.0.1: {}
|
||||
|
||||
data-uri-to-buffer@6.0.2: {}
|
||||
|
||||
data-urls@5.0.0:
|
||||
dependencies:
|
||||
whatwg-mimetype: 4.0.0
|
||||
whatwg-url: 14.1.1
|
||||
|
||||
date-fns@3.6.0: {}
|
||||
|
||||
debug@2.6.9:
|
||||
@ -7351,6 +7514,8 @@ snapshots:
|
||||
|
||||
decamelize@4.0.0: {}
|
||||
|
||||
decimal.js@10.5.0: {}
|
||||
|
||||
dedent@1.5.3: {}
|
||||
|
||||
deepmerge@4.3.1: {}
|
||||
@ -7661,6 +7826,12 @@ snapshots:
|
||||
combined-stream: 1.0.8
|
||||
mime-types: 2.1.35
|
||||
|
||||
form-data@4.0.1:
|
||||
dependencies:
|
||||
asynckit: 0.4.0
|
||||
combined-stream: 1.0.8
|
||||
mime-types: 2.1.35
|
||||
|
||||
formdata-node@4.4.1:
|
||||
dependencies:
|
||||
node-domexception: 1.0.0
|
||||
@ -7795,6 +7966,10 @@ snapshots:
|
||||
dependencies:
|
||||
whatwg-encoding: 2.0.0
|
||||
|
||||
html-encoding-sniffer@4.0.0:
|
||||
dependencies:
|
||||
whatwg-encoding: 3.1.1
|
||||
|
||||
html-escaper@2.0.2: {}
|
||||
|
||||
html-to-text@9.0.5:
|
||||
@ -7875,6 +8050,13 @@ snapshots:
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
https-proxy-agent@7.0.6:
|
||||
dependencies:
|
||||
agent-base: 7.1.3
|
||||
debug: 4.3.5
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
human-signals@2.1.0: {}
|
||||
|
||||
humanize-ms@1.2.1:
|
||||
@ -7984,6 +8166,8 @@ snapshots:
|
||||
|
||||
is-plain-obj@2.1.0: {}
|
||||
|
||||
is-potential-custom-element-name@1.0.1: {}
|
||||
|
||||
is-retry-allowed@2.2.0: {}
|
||||
|
||||
is-stream@2.0.1: {}
|
||||
@ -8400,6 +8584,34 @@ snapshots:
|
||||
|
||||
jsbn@1.1.0: {}
|
||||
|
||||
jsdom@26.0.0:
|
||||
dependencies:
|
||||
cssstyle: 4.2.1
|
||||
data-urls: 5.0.0
|
||||
decimal.js: 10.5.0
|
||||
form-data: 4.0.1
|
||||
html-encoding-sniffer: 4.0.0
|
||||
http-proxy-agent: 7.0.2
|
||||
https-proxy-agent: 7.0.6
|
||||
is-potential-custom-element-name: 1.0.1
|
||||
nwsapi: 2.2.16
|
||||
parse5: 7.2.1
|
||||
rrweb-cssom: 0.8.0
|
||||
saxes: 6.0.0
|
||||
symbol-tree: 3.2.4
|
||||
tough-cookie: 5.1.1
|
||||
w3c-xmlserializer: 5.0.0
|
||||
webidl-conversions: 7.0.0
|
||||
whatwg-encoding: 3.1.1
|
||||
whatwg-mimetype: 4.0.0
|
||||
whatwg-url: 14.1.1
|
||||
ws: 8.18.0
|
||||
xml-name-validator: 5.0.0
|
||||
transitivePeerDependencies:
|
||||
- bufferutil
|
||||
- supports-color
|
||||
- utf-8-validate
|
||||
|
||||
jsesc@2.5.2: {}
|
||||
|
||||
json-parse-even-better-errors@2.3.1: {}
|
||||
@ -8435,17 +8647,17 @@ snapshots:
|
||||
|
||||
kuler@2.0.0: {}
|
||||
|
||||
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||
langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
'@langchain/openai': 0.2.1(encoding@0.1.13)(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
binary-extensions: 2.3.0
|
||||
js-tiktoken: 1.0.12
|
||||
js-yaml: 4.1.0
|
||||
jsonpointer: 5.0.1
|
||||
langchainhub: 0.0.11
|
||||
langsmith: 0.1.34(npkyd6f7wyl3urgrzoxaktl5a4)
|
||||
langsmith: 0.1.34(7lljbsleilzgkaubvlq4ipicvq)
|
||||
ml-distance: 4.0.1
|
||||
openapi-types: 12.1.3
|
||||
p-retry: 4.6.2
|
||||
@ -8463,6 +8675,7 @@ snapshots:
|
||||
handlebars: 4.7.8
|
||||
html-to-text: 9.0.5
|
||||
ioredis: 5.4.1
|
||||
jsdom: 26.0.0
|
||||
mammoth: 1.7.2
|
||||
mongodb: 6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3)
|
||||
pdf-parse: 1.1.1
|
||||
@ -8475,7 +8688,7 @@ snapshots:
|
||||
|
||||
langchainhub@0.0.11: {}
|
||||
|
||||
langsmith@0.1.34(npkyd6f7wyl3urgrzoxaktl5a4):
|
||||
langsmith@0.1.34(7lljbsleilzgkaubvlq4ipicvq):
|
||||
dependencies:
|
||||
'@types/uuid': 9.0.8
|
||||
commander: 10.0.1
|
||||
@ -8484,8 +8697,8 @@ snapshots:
|
||||
p-retry: 4.6.2
|
||||
uuid: 9.0.1
|
||||
optionalDependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))
|
||||
langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@pinecone-database/pinecone@4.0.0)(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(encoding@0.1.13)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(jsdom@26.0.0)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(encoding@0.1.13)(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
openai: 4.57.0(encoding@0.1.13)(zod@3.23.8)
|
||||
|
||||
languagedetect@2.0.0: {}
|
||||
@ -8554,6 +8767,8 @@ snapshots:
|
||||
|
||||
lru-cache@10.3.0: {}
|
||||
|
||||
lru-cache@10.4.3: {}
|
||||
|
||||
lru-cache@5.1.1:
|
||||
dependencies:
|
||||
yallist: 3.1.1
|
||||
@ -8849,6 +9064,8 @@ snapshots:
|
||||
|
||||
num-sort@2.1.0: {}
|
||||
|
||||
nwsapi@2.2.16: {}
|
||||
|
||||
object-assign@4.1.1: {}
|
||||
|
||||
object-inspect@1.13.1: {}
|
||||
@ -8979,6 +9196,10 @@ snapshots:
|
||||
dependencies:
|
||||
entities: 4.5.0
|
||||
|
||||
parse5@7.2.1:
|
||||
dependencies:
|
||||
entities: 4.5.0
|
||||
|
||||
parseley@0.12.1:
|
||||
dependencies:
|
||||
leac: 0.6.0
|
||||
@ -9321,6 +9542,8 @@ snapshots:
|
||||
|
||||
robots-parser@3.0.1: {}
|
||||
|
||||
rrweb-cssom@0.8.0: {}
|
||||
|
||||
rusha@0.8.14: {}
|
||||
|
||||
safe-buffer@5.1.2: {}
|
||||
@ -9333,6 +9556,10 @@ snapshots:
|
||||
|
||||
sax@1.4.1: {}
|
||||
|
||||
saxes@6.0.0:
|
||||
dependencies:
|
||||
xmlchars: 2.2.0
|
||||
|
||||
scheduler@0.23.2:
|
||||
dependencies:
|
||||
loose-envify: 1.4.0
|
||||
@ -9583,6 +9810,8 @@ snapshots:
|
||||
|
||||
sylvester@0.0.12: {}
|
||||
|
||||
symbol-tree@3.2.4: {}
|
||||
|
||||
systeminformation@5.22.11: {}
|
||||
|
||||
tar-fs@3.0.5:
|
||||
@ -9640,12 +9869,20 @@ snapshots:
|
||||
|
||||
touch@3.1.1: {}
|
||||
|
||||
tough-cookie@5.1.1:
|
||||
dependencies:
|
||||
tldts: 6.1.75
|
||||
|
||||
tr46@0.0.3: {}
|
||||
|
||||
tr46@4.1.1:
|
||||
dependencies:
|
||||
punycode: 2.3.1
|
||||
|
||||
tr46@5.0.0:
|
||||
dependencies:
|
||||
punycode: 2.3.1
|
||||
|
||||
triple-beam@1.4.1: {}
|
||||
|
||||
ts-jest@29.1.4(@babel/core@7.24.6)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.24.6))(jest@29.7.0(@types/node@20.14.1)(ts-node@10.9.2(@types/node@20.14.1)(typescript@5.4.5)))(typescript@5.4.5):
|
||||
@ -9777,6 +10014,10 @@ snapshots:
|
||||
|
||||
vary@1.1.2: {}
|
||||
|
||||
w3c-xmlserializer@5.0.0:
|
||||
dependencies:
|
||||
xml-name-validator: 5.0.0
|
||||
|
||||
walker@1.0.8:
|
||||
dependencies:
|
||||
makeerror: 1.0.12
|
||||
@ -9793,13 +10034,24 @@ snapshots:
|
||||
dependencies:
|
||||
iconv-lite: 0.6.3
|
||||
|
||||
whatwg-encoding@3.1.1:
|
||||
dependencies:
|
||||
iconv-lite: 0.6.3
|
||||
|
||||
whatwg-fetch@3.6.20: {}
|
||||
|
||||
whatwg-mimetype@4.0.0: {}
|
||||
|
||||
whatwg-url@13.0.0:
|
||||
dependencies:
|
||||
tr46: 4.1.1
|
||||
webidl-conversions: 7.0.0
|
||||
|
||||
whatwg-url@14.1.1:
|
||||
dependencies:
|
||||
tr46: 5.0.0
|
||||
webidl-conversions: 7.0.0
|
||||
|
||||
whatwg-url@5.0.0:
|
||||
dependencies:
|
||||
tr46: 0.0.3
|
||||
@ -9868,6 +10120,8 @@ snapshots:
|
||||
|
||||
ws@8.18.0: {}
|
||||
|
||||
xml-name-validator@5.0.0: {}
|
||||
|
||||
xml2js@0.6.2:
|
||||
dependencies:
|
||||
sax: 1.4.1
|
||||
@ -9877,6 +10131,8 @@ snapshots:
|
||||
|
||||
xmlbuilder@11.0.1: {}
|
||||
|
||||
xmlchars@2.2.0: {}
|
||||
|
||||
xtend@4.0.2: {}
|
||||
|
||||
y18n@5.0.8: {}
|
||||
|
@ -30,7 +30,7 @@ async function batchScrape(body: BatchScrapeRequestInput): ReturnType<typeof bat
|
||||
x = await batchScrapeStatus(bss.body.id);
|
||||
expect(x.statusCode).toBe(200);
|
||||
expect(typeof x.body.status).toBe("string");
|
||||
} while (x.body.status !== "completed")
|
||||
} while (x.body.status === "scraping");
|
||||
|
||||
expectBatchScrapeToSucceed(x);
|
||||
return x;
|
||||
@ -53,40 +53,51 @@ function expectBatchScrapeToSucceed(response: Awaited<ReturnType<typeof batchScr
|
||||
}
|
||||
|
||||
describe("Batch scrape tests", () => {
|
||||
describe("JSON format", () => {
|
||||
it.concurrent("works", async () => {
|
||||
const response = await batchScrape({
|
||||
urls: ["http://firecrawl.dev"],
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
company_mission: {
|
||||
type: "string",
|
||||
},
|
||||
supports_sso: {
|
||||
type: "boolean",
|
||||
},
|
||||
is_open_source: {
|
||||
type: "boolean",
|
||||
},
|
||||
},
|
||||
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||
},
|
||||
},
|
||||
urls: ["http://firecrawl.dev"]
|
||||
});
|
||||
|
||||
expect(response.body.data[0]).toHaveProperty("json");
|
||||
expect(response.body.data[0].json).toHaveProperty("company_mission");
|
||||
expect(typeof response.body.data[0].json.company_mission).toBe("string");
|
||||
expect(response.body.data[0].json).toHaveProperty("supports_sso");
|
||||
expect(response.body.data[0].json.supports_sso).toBe(false);
|
||||
expect(typeof response.body.data[0].json.supports_sso).toBe("boolean");
|
||||
expect(response.body.data[0].json).toHaveProperty("is_open_source");
|
||||
expect(response.body.data[0].json.is_open_source).toBe(true);
|
||||
expect(typeof response.body.data[0].json.is_open_source).toBe("boolean");
|
||||
expect(response.body.data[0]).toHaveProperty("markdown");
|
||||
expect(response.body.data[0].markdown).toContain("Firecrawl");
|
||||
}, 30000);
|
||||
});
|
||||
|
||||
if (!process.env.TEST_SUITE_SELF_HOSTED) {
|
||||
describe("JSON format", () => {
|
||||
it.concurrent("works", async () => {
|
||||
const response = await batchScrape({
|
||||
urls: ["http://firecrawl.dev"],
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
company_mission: {
|
||||
type: "string",
|
||||
},
|
||||
supports_sso: {
|
||||
type: "boolean",
|
||||
},
|
||||
is_open_source: {
|
||||
type: "boolean",
|
||||
},
|
||||
},
|
||||
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(response.body.data[0]).toHaveProperty("json");
|
||||
expect(response.body.data[0].json).toHaveProperty("company_mission");
|
||||
expect(typeof response.body.data[0].json.company_mission).toBe("string");
|
||||
expect(response.body.data[0].json).toHaveProperty("supports_sso");
|
||||
expect(response.body.data[0].json.supports_sso).toBe(false);
|
||||
expect(typeof response.body.data[0].json.supports_sso).toBe("boolean");
|
||||
expect(response.body.data[0].json).toHaveProperty("is_open_source");
|
||||
expect(response.body.data[0].json.is_open_source).toBe(true);
|
||||
expect(typeof response.body.data[0].json.is_open_source).toBe("boolean");
|
||||
}, 30000);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
@ -30,7 +30,7 @@ async function crawl(body: CrawlRequestInput): ReturnType<typeof crawlStatus> {
|
||||
x = await crawlStatus(cs.body.id);
|
||||
expect(x.statusCode).toBe(200);
|
||||
expect(typeof x.body.status).toBe("string");
|
||||
} while (x.body.status !== "completed")
|
||||
} while (x.body.status === "scraping");
|
||||
|
||||
expectCrawlToSucceed(x);
|
||||
return x;
|
||||
|
@ -30,7 +30,7 @@ async function extract(body: ExtractRequestInput): Promise<ExtractResponse> {
|
||||
x = await extractStatus(es.body.id);
|
||||
expect(x.statusCode).toBe(200);
|
||||
expect(typeof x.body.status).toBe("string");
|
||||
} while (x.body.status !== "completed");
|
||||
} while (x.body.status === "processing");
|
||||
|
||||
expectExtractToSucceed(x);
|
||||
return x.body;
|
||||
@ -51,31 +51,37 @@ function expectExtractToSucceed(response: Awaited<ReturnType<typeof extractStatu
|
||||
}
|
||||
|
||||
describe("Extract tests", () => {
|
||||
it.concurrent("works", async () => {
|
||||
const res = await extract({
|
||||
urls: ["https://firecrawl.dev"],
|
||||
schema: {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"company_mission": {
|
||||
"type": "string"
|
||||
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY) {
|
||||
it.concurrent("works", async () => {
|
||||
const res = await extract({
|
||||
urls: ["https://firecrawl.dev"],
|
||||
schema: {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"company_mission": {
|
||||
"type": "string"
|
||||
},
|
||||
"is_open_source": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"is_open_source": {
|
||||
"type": "boolean"
|
||||
}
|
||||
"required": [
|
||||
"company_mission",
|
||||
"is_open_source"
|
||||
]
|
||||
},
|
||||
"required": [
|
||||
"company_mission",
|
||||
"is_open_source"
|
||||
]
|
||||
},
|
||||
origin: "api-sdk",
|
||||
});
|
||||
origin: "api-sdk",
|
||||
});
|
||||
|
||||
expect(res.data).toHaveProperty("company_mission");
|
||||
expect(typeof res.data.company_mission).toBe("string")
|
||||
expect(res.data).toHaveProperty("is_open_source");
|
||||
expect(typeof res.data.is_open_source).toBe("boolean");
|
||||
expect(res.data.is_open_source).toBe(true);
|
||||
}, 60000);
|
||||
expect(res.data).toHaveProperty("company_mission");
|
||||
expect(typeof res.data.company_mission).toBe("string")
|
||||
expect(res.data).toHaveProperty("is_open_source");
|
||||
expect(typeof res.data.is_open_source).toBe("boolean");
|
||||
expect(res.data.is_open_source).toBe(true);
|
||||
}, 60000);
|
||||
} else {
|
||||
it.concurrent("dummy test", () => {
|
||||
expect(true).toBe(true);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
@ -21,7 +21,7 @@ function expectMapToSucceed(response: Awaited<ReturnType<typeof map>>) {
|
||||
}
|
||||
|
||||
describe("Map tests", () => {
|
||||
it("basic map succeeds", async () => {
|
||||
it.concurrent("basic map succeeds", async () => {
|
||||
const response = await map({
|
||||
url: "http://firecrawl.dev",
|
||||
});
|
||||
@ -29,7 +29,7 @@ describe("Map tests", () => {
|
||||
expectMapToSucceed(response);
|
||||
}, 10000);
|
||||
|
||||
it("times out properly", async () => {
|
||||
it.concurrent("times out properly", async () => {
|
||||
const response = await map({
|
||||
url: "http://firecrawl.dev",
|
||||
timeout: 1
|
||||
@ -40,14 +40,15 @@ describe("Map tests", () => {
|
||||
expect(response.body.error).toBe("Request timed out");
|
||||
}, 10000);
|
||||
|
||||
it("handles query parameters correctly", async () => {
|
||||
it.concurrent("handles query parameters correctly", async () => {
|
||||
let response = await map({
|
||||
url: "https://www.hfea.gov.uk",
|
||||
sitemapOnly: true,
|
||||
useMock: "map-query-params",
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body.success).toBe(true);
|
||||
expect(response.body.links.some(x => x.match(/^https:\/\/www\.hfea\.gov\.uk\/choose-a-clinic\/clinic-search\/results\/?\?options=\d+$/))).toBe(true);
|
||||
}, 300000);
|
||||
}, 60000);
|
||||
});
|
||||
|
51
apps/api/src/__tests__/snips/mocks/map-query-params.json
Normal file
51
apps/api/src/__tests__/snips/mocks/map-query-params.json
Normal file
File diff suppressed because one or more lines are too long
@ -26,7 +26,7 @@ async function scrape(body: ScrapeRequestInput): Promise<Document> {
|
||||
}
|
||||
|
||||
describe("Scrape tests", () => {
|
||||
it("mocking works properly", async () => {
|
||||
it.concurrent("mocking works properly", async () => {
|
||||
// depends on falsified mock mocking-works-properly
|
||||
// this test will fail if mock is bypassed with real data -- firecrawl.dev will never have
|
||||
// that as its actual markdown output
|
||||
@ -41,41 +41,34 @@ describe("Scrape tests", () => {
|
||||
);
|
||||
}, 10000);
|
||||
|
||||
describe("Ad blocking (f-e dependant)", () => {
|
||||
it.concurrent("blocks ads by default", async () => {
|
||||
it.concurrent("works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev"
|
||||
});
|
||||
|
||||
expect(response.markdown).toContain("Firecrawl");
|
||||
}, 10000);
|
||||
|
||||
if (process.env.TEST_SUITE_SELF_HOSTED && process.env.PROXY_SERVER) {
|
||||
it.concurrent("self-hosted proxy works", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://canyoublockit.com/testing/",
|
||||
url: "https://icanhazip.com"
|
||||
});
|
||||
|
||||
expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||
}, 10000);
|
||||
expect(response.markdown?.trim()).toBe(process.env.PROXY_SERVER!.split("://").slice(-1)[0].split(":")[0]);
|
||||
});
|
||||
}
|
||||
|
||||
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.PLAYWRIGHT_MICROSERVICE_URL) {
|
||||
it.concurrent("waitFor works", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://canyoublockit.com/testing/",
|
||||
blockAds: false,
|
||||
url: "http://firecrawl.dev",
|
||||
waitFor: 2000,
|
||||
});
|
||||
|
||||
expect(response.markdown).toContain(".g.doubleclick.net/");
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe("Location API (f-e dependant)", () => {
|
||||
it.concurrent("works without specifying an explicit location", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
});
|
||||
}, 10000);
|
||||
|
||||
it.concurrent("works with country US", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
location: { country: "US" },
|
||||
});
|
||||
|
||||
expect(response.markdown).toContain("| Country | United States |");
|
||||
}, 10000);
|
||||
});
|
||||
expect(response.markdown).toContain("Firecrawl");
|
||||
}, 15000);
|
||||
}
|
||||
|
||||
describe("JSON scrape support", () => {
|
||||
it.concurrent("returns parseable JSON", async () => {
|
||||
@ -89,82 +82,132 @@ describe("Scrape tests", () => {
|
||||
}, 25000); // TODO: mock and shorten
|
||||
});
|
||||
|
||||
describe("Screenshot", () => {
|
||||
it.concurrent("screenshot format works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["screenshot"]
|
||||
});
|
||||
if (!process.env.TEST_SUITE_SELF_HOSTED) {
|
||||
describe("Ad blocking (f-e dependant)", () => {
|
||||
it.concurrent("blocks ads by default", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||
});
|
||||
|
||||
expect(typeof response.screenshot).toBe("string");
|
||||
}, 15000);
|
||||
expect(response.markdown).not.toContain(".g.doubleclick.net/");
|
||||
}, 10000);
|
||||
|
||||
it.concurrent("screenshot@fullPage format works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["screenshot@fullPage"]
|
||||
});
|
||||
it.concurrent("doesn't block ads if explicitly disabled", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://www.allrecipes.com/recipe/18185/yum/",
|
||||
blockAds: false,
|
||||
});
|
||||
|
||||
expect(typeof response.screenshot).toBe("string");
|
||||
}, 15000);
|
||||
});
|
||||
expect(response.markdown).toContain(".g.doubleclick.net/");
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe("JSON format", () => {
|
||||
it.concurrent("works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
company_mission: {
|
||||
type: "string",
|
||||
},
|
||||
supports_sso: {
|
||||
type: "boolean",
|
||||
},
|
||||
is_open_source: {
|
||||
type: "boolean",
|
||||
describe("Location API (f-e dependant)", () => {
|
||||
it.concurrent("works without specifying an explicit location", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
});
|
||||
}, 10000);
|
||||
|
||||
it.concurrent("works with country US", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://iplocation.com",
|
||||
location: { country: "US" },
|
||||
});
|
||||
|
||||
expect(response.markdown).toContain("| Country | United States |");
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe("Screenshot (f-e/sb dependant)", () => {
|
||||
it.concurrent("screenshot format works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["screenshot"]
|
||||
});
|
||||
|
||||
expect(typeof response.screenshot).toBe("string");
|
||||
}, 30000);
|
||||
|
||||
it.concurrent("screenshot@fullPage format works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["screenshot@fullPage"]
|
||||
});
|
||||
|
||||
expect(typeof response.screenshot).toBe("string");
|
||||
}, 30000);
|
||||
});
|
||||
|
||||
describe("Proxy API (f-e dependant)", () => {
|
||||
it.concurrent("undefined works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
});
|
||||
}, 15000);
|
||||
|
||||
it.concurrent("basic works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
proxy: "basic",
|
||||
});
|
||||
}, 15000);
|
||||
|
||||
it.concurrent("stealth works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
proxy: "stealth",
|
||||
});
|
||||
}, 15000);
|
||||
});
|
||||
|
||||
describe("PDF (f-e dependant)", () => {
|
||||
it.concurrent("works for PDFs behind anti-bot", async () => {
|
||||
const response = await scrape({
|
||||
url: "https://www.researchgate.net/profile/Amir-Leshem/publication/220732050_Robust_adaptive_beamforming_based_on_jointly_estimating_covariance_matrix_and_steering_vector/links/0c96052d2fd8f0a84b000000/Robust-adaptive-beamforming-based-on-jointly-estimating-covariance-matrix-and-steering-vector.pdf"
|
||||
});
|
||||
|
||||
expect(response.markdown).toContain("Robust adaptive beamforming based on jointly estimating covariance matrix");
|
||||
}, 60000);
|
||||
});
|
||||
}
|
||||
|
||||
if (!process.env.TEST_SUITE_SELF_HOSTED || process.env.OPENAI_API_KEY) {
|
||||
describe("JSON format", () => {
|
||||
it.concurrent("works", async () => {
|
||||
const response = await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
formats: ["json"],
|
||||
jsonOptions: {
|
||||
prompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source.",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
company_mission: {
|
||||
type: "string",
|
||||
},
|
||||
supports_sso: {
|
||||
type: "boolean",
|
||||
},
|
||||
is_open_source: {
|
||||
type: "boolean",
|
||||
},
|
||||
},
|
||||
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||
},
|
||||
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
expect(response).toHaveProperty("json");
|
||||
expect(response.json).toHaveProperty("company_mission");
|
||||
expect(typeof response.json.company_mission).toBe("string");
|
||||
expect(response.json).toHaveProperty("supports_sso");
|
||||
expect(response.json.supports_sso).toBe(false);
|
||||
expect(typeof response.json.supports_sso).toBe("boolean");
|
||||
expect(response.json).toHaveProperty("is_open_source");
|
||||
expect(response.json.is_open_source).toBe(true);
|
||||
expect(typeof response.json.is_open_source).toBe("boolean");
|
||||
}, 30000);
|
||||
});
|
||||
|
||||
describe("Proxy API (f-e dependant)", () => {
|
||||
it.concurrent("undefined works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
});
|
||||
}, 15000);
|
||||
|
||||
it.concurrent("basic works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
proxy: "basic",
|
||||
});
|
||||
}, 15000);
|
||||
|
||||
it.concurrent("stealth works", async () => {
|
||||
await scrape({
|
||||
url: "http://firecrawl.dev",
|
||||
proxy: "stealth",
|
||||
});
|
||||
}, 15000);
|
||||
});
|
||||
expect(response).toHaveProperty("json");
|
||||
expect(response.json).toHaveProperty("company_mission");
|
||||
expect(typeof response.json.company_mission).toBe("string");
|
||||
expect(response.json).toHaveProperty("supports_sso");
|
||||
expect(response.json.supports_sso).toBe(false);
|
||||
expect(typeof response.json.supports_sso).toBe("boolean");
|
||||
expect(response.json).toHaveProperty("is_open_source");
|
||||
expect(response.json.is_open_source).toBe(true);
|
||||
expect(typeof response.json.is_open_source).toBe("boolean");
|
||||
}, 30000);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
@ -27,10 +27,10 @@ async function search(body: SearchRequestInput): Promise<Document> {
|
||||
return raw.body.data;
|
||||
}
|
||||
|
||||
describe("Scrape tests", () => {
|
||||
it("works", async () => {
|
||||
describe("Search tests", () => {
|
||||
it.concurrent("works", async () => {
|
||||
await search({
|
||||
query: "firecrawl"
|
||||
});
|
||||
}, 15000);
|
||||
}, 60000);
|
||||
});
|
||||
|
@ -13,13 +13,13 @@ import {
|
||||
getDoneJobsOrderedLength,
|
||||
isCrawlKickoffFinished,
|
||||
} from "../../lib/crawl-redis";
|
||||
import { getScrapeQueue } from "../../services/queue-service";
|
||||
import { getScrapeQueue, QueueFunction } from "../../services/queue-service";
|
||||
import {
|
||||
supabaseGetJobById,
|
||||
supabaseGetJobsById,
|
||||
} from "../../lib/supabase-jobs";
|
||||
import { configDotenv } from "dotenv";
|
||||
import type { Job, JobState } from "bullmq";
|
||||
import type { Job, JobState, Queue } from "bullmq";
|
||||
import { logger } from "../../lib/logger";
|
||||
import { supabase_service } from "../../services/supabase";
|
||||
import { getConcurrencyLimitedJobs } from "../../lib/concurrency-limit";
|
||||
@ -245,7 +245,7 @@ export async function crawlStatusController(
|
||||
|
||||
let totalCount = jobIDs.length;
|
||||
|
||||
if (totalCount === 0) {
|
||||
if (totalCount === 0 && process.env.USE_DB_AUTHENTICATION === "true") {
|
||||
const x = await supabase_service
|
||||
.from('firecrawl_jobs')
|
||||
.select('*', { count: 'exact', head: true })
|
||||
|
@ -1,7 +1,34 @@
|
||||
import { Response } from "express";
|
||||
import { supabaseGetJobsById } from "../../lib/supabase-jobs";
|
||||
import { RequestWithAuth } from "./types";
|
||||
import { getExtract, getExtractExpiry } from "../../lib/extract/extract-redis";
|
||||
import { DBJob, PseudoJob } from "./crawl-status";
|
||||
import { getExtractQueue } from "../../services/queue-service";
|
||||
import { ExtractResult } from "../../lib/extract/extraction-service";
|
||||
import { supabaseGetJobById } from "../../lib/supabase-jobs";
|
||||
|
||||
export async function getExtractJob(id: string): Promise<PseudoJob<ExtractResult> | null> {
|
||||
const [bullJob, dbJob] = await Promise.all([
|
||||
getExtractQueue().getJob(id),
|
||||
(process.env.USE_DB_AUTHENTICATION === "true" ? supabaseGetJobById(id) : null) as Promise<DBJob | null>,
|
||||
]);
|
||||
|
||||
if (!bullJob && !dbJob) return null;
|
||||
|
||||
const data = dbJob?.docs ?? bullJob?.returnvalue?.data;
|
||||
|
||||
const job: PseudoJob<any> = {
|
||||
id,
|
||||
getState: bullJob ? bullJob.getState : (() => dbJob!.success ? "completed" : "failed"),
|
||||
returnvalue: data,
|
||||
data: {
|
||||
scrapeOptions: bullJob ? bullJob.data.scrapeOptions : dbJob!.page_options,
|
||||
},
|
||||
timestamp: bullJob ? bullJob.timestamp : new Date(dbJob!.date_added).valueOf(),
|
||||
failedReason: (bullJob ? bullJob.failedReason : dbJob!.message) || undefined,
|
||||
}
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
export async function extractStatusController(
|
||||
req: RequestWithAuth<{ jobId: string }, any, any>,
|
||||
@ -16,24 +43,29 @@ export async function extractStatusController(
|
||||
});
|
||||
}
|
||||
|
||||
let data: any[] = [];
|
||||
let data: ExtractResult | [] = [];
|
||||
|
||||
if (extract.status === "completed") {
|
||||
const jobData = await supabaseGetJobsById([req.params.jobId]);
|
||||
if (!jobData || jobData.length === 0) {
|
||||
const jobData = await getExtractJob(req.params.jobId);
|
||||
if (!jobData) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: "Job not found",
|
||||
});
|
||||
}
|
||||
|
||||
data = jobData[0].docs;
|
||||
if (!jobData.returnvalue) {
|
||||
// if we got in the split-second where the redis is updated but the bull isn't
|
||||
// just pretend it's still processing - MG
|
||||
extract.status = "processing";
|
||||
} else {
|
||||
data = jobData.returnvalue ?? [];
|
||||
}
|
||||
}
|
||||
|
||||
// console.log(extract.sources);
|
||||
return res.status(200).json({
|
||||
success: extract.status === "failed" ? false : true,
|
||||
data: data,
|
||||
data,
|
||||
status: extract.status,
|
||||
error: extract?.error ?? undefined,
|
||||
expiresAt: (await getExtractExpiry(req.params.jobId)).toISOString(),
|
||||
|
@ -5,6 +5,7 @@ import {
|
||||
mapRequestSchema,
|
||||
RequestWithAuth,
|
||||
scrapeOptions,
|
||||
TimeoutSignal,
|
||||
} from "./types";
|
||||
import { crawlToCrawler, StoredCrawl } from "../../lib/crawl-redis";
|
||||
import { MapResponse, MapRequest } from "./types";
|
||||
@ -53,6 +54,8 @@ export async function getMapResults({
|
||||
origin,
|
||||
includeMetadata = false,
|
||||
allowExternalLinks,
|
||||
abort = new AbortController().signal, // noop
|
||||
mock,
|
||||
}: {
|
||||
url: string;
|
||||
search?: string;
|
||||
@ -65,6 +68,8 @@ export async function getMapResults({
|
||||
origin?: string;
|
||||
includeMetadata?: boolean;
|
||||
allowExternalLinks?: boolean;
|
||||
abort?: AbortSignal;
|
||||
mock?: string;
|
||||
}): Promise<MapResult> {
|
||||
const id = uuidv4();
|
||||
let links: string[] = [url];
|
||||
@ -87,8 +92,8 @@ export async function getMapResults({
|
||||
const crawler = crawlToCrawler(id, sc);
|
||||
|
||||
try {
|
||||
sc.robots = await crawler.getRobotsTxt();
|
||||
await crawler.importRobotsTxt(sc.robots);
|
||||
sc.robots = await crawler.getRobotsTxt(false, abort);
|
||||
crawler.importRobotsTxt(sc.robots);
|
||||
} catch (_) {}
|
||||
|
||||
// If sitemapOnly is true, only get links from sitemap
|
||||
@ -102,6 +107,8 @@ export async function getMapResults({
|
||||
true,
|
||||
true,
|
||||
30000,
|
||||
abort,
|
||||
mock,
|
||||
);
|
||||
if (sitemap > 0) {
|
||||
links = links
|
||||
@ -144,7 +151,7 @@ export async function getMapResults({
|
||||
return fireEngineMap(mapUrl, {
|
||||
numResults: resultsPerPage,
|
||||
page: page,
|
||||
});
|
||||
}, abort);
|
||||
};
|
||||
|
||||
pagePromises = Array.from({ length: maxPages }, (_, i) =>
|
||||
@ -157,7 +164,7 @@ export async function getMapResults({
|
||||
|
||||
// Parallelize sitemap index query with search results
|
||||
const [sitemapIndexResult, ...searchResults] = await Promise.all([
|
||||
querySitemapIndex(url),
|
||||
querySitemapIndex(url, abort),
|
||||
...(cachedResult ? [] : pagePromises),
|
||||
]);
|
||||
|
||||
@ -178,6 +185,7 @@ export async function getMapResults({
|
||||
true,
|
||||
false,
|
||||
30000,
|
||||
abort,
|
||||
);
|
||||
} catch (e) {
|
||||
logger.warn("tryGetSitemap threw an error", { error: e });
|
||||
@ -277,6 +285,7 @@ export async function mapController(
|
||||
req.body = mapRequestSchema.parse(req.body);
|
||||
|
||||
let result: Awaited<ReturnType<typeof getMapResults>>;
|
||||
const abort = new AbortController();
|
||||
try {
|
||||
result = await Promise.race([
|
||||
getMapResults({
|
||||
@ -289,13 +298,18 @@ export async function mapController(
|
||||
origin: req.body.origin,
|
||||
teamId: req.auth.team_id,
|
||||
plan: req.auth.plan,
|
||||
abort: abort.signal,
|
||||
mock: req.body.useMock,
|
||||
}),
|
||||
...(req.body.timeout !== undefined ? [
|
||||
new Promise((resolve, reject) => setTimeout(() => reject("timeout"), req.body.timeout))
|
||||
new Promise((resolve, reject) => setTimeout(() => {
|
||||
abort.abort(new TimeoutSignal());
|
||||
reject(new TimeoutSignal());
|
||||
}, req.body.timeout))
|
||||
] : []),
|
||||
]) as any;
|
||||
} catch (error) {
|
||||
if (error === "timeout") {
|
||||
if (error instanceof TimeoutSignal || error === "timeout") {
|
||||
return res.status(408).json({
|
||||
success: false,
|
||||
error: "Request timed out",
|
||||
|
@ -501,6 +501,7 @@ export const mapRequestSchema = crawlerOptions
|
||||
sitemapOnly: z.boolean().default(false),
|
||||
limit: z.number().min(1).max(5000).default(5000),
|
||||
timeout: z.number().positive().finite().optional(),
|
||||
useMock: z.string().optional(),
|
||||
})
|
||||
.strict(strictMessage);
|
||||
|
||||
@ -1004,3 +1005,9 @@ export const generateLLMsTextRequestSchema = z.object({
|
||||
export type GenerateLLMsTextRequest = z.infer<
|
||||
typeof generateLLMsTextRequestSchema
|
||||
>;
|
||||
|
||||
export class TimeoutSignal extends Error {
|
||||
constructor() {
|
||||
super("Operation timed out")
|
||||
}
|
||||
}
|
||||
|
@ -1,38 +1,10 @@
|
||||
import { CONCURRENCY_LIMIT } from "../services/rate-limiter";
|
||||
import { redisConnection } from "../services/queue-service";
|
||||
import { PlanType } from "../types";
|
||||
import type { Job, JobsOptions } from "bullmq";
|
||||
import type { JobsOptions } from "bullmq";
|
||||
|
||||
const constructKey = (team_id: string) => "concurrency-limiter:" + team_id;
|
||||
const constructQueueKey = (team_id: string) =>
|
||||
"concurrency-limit-queue:" + team_id;
|
||||
|
||||
export function calculateJobTimeToRun(
|
||||
job: ConcurrencyLimitedJob
|
||||
): number {
|
||||
let jobTimeToRun = 86400000; // 24h (crawl)
|
||||
|
||||
if (job.data.scrapeOptions) {
|
||||
if (job.data.scrapeOptions.timeout) {
|
||||
jobTimeToRun = job.data.scrapeOptions.timeout;
|
||||
}
|
||||
|
||||
if (job.data.scrapeOptions.waitFor) {
|
||||
jobTimeToRun += job.data.scrapeOptions.waitFor;
|
||||
}
|
||||
|
||||
(job.data.scrapeOptions.actions ?? []).forEach(x => {
|
||||
if (x.type === "wait" && x.milliseconds) {
|
||||
jobTimeToRun += x.milliseconds;
|
||||
} else {
|
||||
jobTimeToRun += 1000;
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return jobTimeToRun;
|
||||
}
|
||||
|
||||
export async function cleanOldConcurrencyLimitEntries(
|
||||
team_id: string,
|
||||
now: number = Date.now(),
|
||||
|
@ -7,7 +7,6 @@ import {
|
||||
} from "../build-prompts";
|
||||
import OpenAI from "openai";
|
||||
import { logger } from "../../../lib/logger";
|
||||
const openai = new OpenAI();
|
||||
|
||||
export async function analyzeSchemaAndPrompt(
|
||||
urls: string[],
|
||||
@ -40,6 +39,7 @@ export async function analyzeSchemaAndPrompt(
|
||||
|
||||
const model = "gpt-4o";
|
||||
|
||||
const openai = new OpenAI();
|
||||
const result = await openai.beta.chat.completions.parse({
|
||||
model: model,
|
||||
messages: [
|
||||
|
@ -48,7 +48,7 @@ interface ExtractServiceOptions {
|
||||
cacheKey?: string;
|
||||
}
|
||||
|
||||
interface ExtractResult {
|
||||
export interface ExtractResult {
|
||||
success: boolean;
|
||||
data?: any;
|
||||
extractId: string;
|
||||
|
@ -3,10 +3,6 @@ import { Document } from "../../../controllers/v1/types";
|
||||
import { logger } from "../../logger";
|
||||
import OpenAI from "openai";
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
const pinecone = new Pinecone({
|
||||
apiKey: process.env.PINECONE_API_KEY!,
|
||||
});
|
||||
@ -27,6 +23,10 @@ export interface PageMetadata {
|
||||
}
|
||||
|
||||
async function getEmbedding(text: string) {
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
const embedding = await openai.embeddings.create({
|
||||
model: "text-embedding-3-small",
|
||||
input: text,
|
||||
|
@ -1,9 +1,5 @@
|
||||
import OpenAI from "openai";
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
interface Message {
|
||||
role: "system" | "user" | "assistant";
|
||||
content: string;
|
||||
@ -19,6 +15,10 @@ interface GenerateTextOptions {
|
||||
export async function generateText(options: GenerateTextOptions) {
|
||||
const { model, messages, temperature = 0.7, maxTokens } = options;
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
const completion = await openai.chat.completions.create({
|
||||
model,
|
||||
messages,
|
||||
|
@ -1,14 +1,13 @@
|
||||
import axios from "axios";
|
||||
import { configDotenv } from "dotenv";
|
||||
import OpenAI from "openai";
|
||||
|
||||
configDotenv();
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
async function getEmbedding(text: string) {
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
const embedding = await openai.embeddings.create({
|
||||
model: "text-embedding-3-small",
|
||||
input: text,
|
||||
|
@ -9,6 +9,7 @@ import { logger as _logger } from "../../lib/logger";
|
||||
import https from "https";
|
||||
import { redisConnection } from "../../services/queue-service";
|
||||
import { extractLinks } from "../../lib/html-transformer";
|
||||
import { TimeoutSignal } from "../../controllers/v1/types";
|
||||
export class WebCrawler {
|
||||
private jobId: string;
|
||||
private initialUrl: string;
|
||||
@ -182,7 +183,7 @@ export class WebCrawler {
|
||||
.slice(0, limit);
|
||||
}
|
||||
|
||||
public async getRobotsTxt(skipTlsVerification = false): Promise<string> {
|
||||
public async getRobotsTxt(skipTlsVerification = false, abort?: AbortSignal): Promise<string> {
|
||||
let extraArgs = {};
|
||||
if (skipTlsVerification) {
|
||||
extraArgs["httpsAgent"] = new https.Agent({
|
||||
@ -191,6 +192,7 @@ export class WebCrawler {
|
||||
}
|
||||
const response = await axios.get(this.robotsTxtUrl, {
|
||||
timeout: axiosTimeout,
|
||||
signal: abort,
|
||||
...extraArgs,
|
||||
});
|
||||
return response.data;
|
||||
@ -205,6 +207,8 @@ export class WebCrawler {
|
||||
fromMap: boolean = false,
|
||||
onlySitemap: boolean = false,
|
||||
timeout: number = 120000,
|
||||
abort?: AbortSignal,
|
||||
mock?: string,
|
||||
): Promise<number> {
|
||||
this.logger.debug(`Fetching sitemap links from ${this.initialUrl}`, {
|
||||
method: "tryGetSitemap",
|
||||
@ -260,10 +264,10 @@ export class WebCrawler {
|
||||
try {
|
||||
let count = (await Promise.race([
|
||||
Promise.all([
|
||||
this.tryFetchSitemapLinks(this.initialUrl, _urlsHandler),
|
||||
this.tryFetchSitemapLinks(this.initialUrl, _urlsHandler, abort, mock),
|
||||
...this.robots
|
||||
.getSitemaps()
|
||||
.map((x) => this.tryFetchSitemapLinks(x, _urlsHandler)),
|
||||
.map((x) => this.tryFetchSitemapLinks(x, _urlsHandler, abort, mock)),
|
||||
]).then((results) => results.reduce((a, x) => a + x, 0)),
|
||||
timeoutPromise,
|
||||
])) as number;
|
||||
@ -555,6 +559,8 @@ export class WebCrawler {
|
||||
private async tryFetchSitemapLinks(
|
||||
url: string,
|
||||
urlsHandler: (urls: string[]) => unknown,
|
||||
abort?: AbortSignal,
|
||||
mock?: string,
|
||||
): Promise<number> {
|
||||
const sitemapUrl = url.endsWith(".xml")
|
||||
? url
|
||||
@ -569,13 +575,19 @@ export class WebCrawler {
|
||||
this.logger,
|
||||
this.jobId,
|
||||
this.sitemapsHit,
|
||||
abort,
|
||||
mock,
|
||||
);
|
||||
} catch (error) {
|
||||
this.logger.debug(`Failed to fetch sitemap from ${sitemapUrl}`, {
|
||||
method: "tryFetchSitemapLinks",
|
||||
sitemapUrl,
|
||||
error,
|
||||
});
|
||||
if (error instanceof TimeoutSignal) {
|
||||
throw error;
|
||||
} else {
|
||||
this.logger.debug(`Failed to fetch sitemap from ${sitemapUrl}`, {
|
||||
method: "tryFetchSitemapLinks",
|
||||
sitemapUrl,
|
||||
error,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a subdomain, also try to get sitemap from the main domain
|
||||
@ -611,20 +623,30 @@ export class WebCrawler {
|
||||
this.logger,
|
||||
this.jobId,
|
||||
this.sitemapsHit,
|
||||
abort,
|
||||
mock,
|
||||
);
|
||||
} catch (error) {
|
||||
this.logger.debug(
|
||||
`Failed to fetch main domain sitemap from ${mainDomainSitemapUrl}`,
|
||||
{ method: "tryFetchSitemapLinks", mainDomainSitemapUrl, error },
|
||||
);
|
||||
if (error instanceof TimeoutSignal) {
|
||||
throw error;
|
||||
} else {
|
||||
this.logger.debug(
|
||||
`Failed to fetch main domain sitemap from ${mainDomainSitemapUrl}`,
|
||||
{ method: "tryFetchSitemapLinks", mainDomainSitemapUrl, error },
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.debug(`Error processing main domain sitemap`, {
|
||||
method: "tryFetchSitemapLinks",
|
||||
url,
|
||||
error,
|
||||
});
|
||||
if (error instanceof TimeoutSignal) {
|
||||
throw error;
|
||||
} else {
|
||||
this.logger.debug(`Error processing main domain sitemap`, {
|
||||
method: "tryFetchSitemapLinks",
|
||||
url,
|
||||
error,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// If no sitemap found yet, try the baseUrl as a last resort
|
||||
@ -636,22 +658,30 @@ export class WebCrawler {
|
||||
this.logger,
|
||||
this.jobId,
|
||||
this.sitemapsHit,
|
||||
abort,
|
||||
mock,
|
||||
);
|
||||
} catch (error) {
|
||||
this.logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}`, {
|
||||
method: "tryFetchSitemapLinks",
|
||||
sitemapUrl: baseUrlSitemap,
|
||||
error,
|
||||
});
|
||||
if (error instanceof AxiosError && error.response?.status === 404) {
|
||||
// ignore 404
|
||||
if (error instanceof TimeoutSignal) {
|
||||
throw error;
|
||||
} else {
|
||||
sitemapCount += await getLinksFromSitemap(
|
||||
{ sitemapUrl: baseUrlSitemap, urlsHandler, mode: "fire-engine" },
|
||||
this.logger,
|
||||
this.jobId,
|
||||
this.sitemapsHit,
|
||||
);
|
||||
this.logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}`, {
|
||||
method: "tryFetchSitemapLinks",
|
||||
sitemapUrl: baseUrlSitemap,
|
||||
error,
|
||||
});
|
||||
if (error instanceof AxiosError && error.response?.status === 404) {
|
||||
// ignore 404
|
||||
} else {
|
||||
sitemapCount += await getLinksFromSitemap(
|
||||
{ sitemapUrl: baseUrlSitemap, urlsHandler, mode: "fire-engine" },
|
||||
this.logger,
|
||||
this.jobId,
|
||||
this.sitemapsHit,
|
||||
abort,
|
||||
mock,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,10 +12,11 @@ import { supabase_service } from "../../services/supabase";
|
||||
*/
|
||||
import { withAuth } from "../../lib/withAuth";
|
||||
|
||||
async function querySitemapIndexFunction(url: string) {
|
||||
async function querySitemapIndexFunction(url: string, abort?: AbortSignal) {
|
||||
const originUrl = normalizeUrlOnlyHostname(url);
|
||||
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
abort?.throwIfAborted();
|
||||
try {
|
||||
const { data, error } = await supabase_service
|
||||
.from("crawl_maps")
|
||||
|
@ -1,8 +1,7 @@
|
||||
import { axiosTimeout } from "../../lib/timeout";
|
||||
import { parseStringPromise } from "xml2js";
|
||||
import { WebCrawler } from "./crawler";
|
||||
import { scrapeURL } from "../scrapeURL";
|
||||
import { scrapeOptions } from "../../controllers/v1/types";
|
||||
import { scrapeOptions, TimeoutSignal } from "../../controllers/v1/types";
|
||||
import type { Logger } from "winston";
|
||||
const useFireEngine =
|
||||
process.env.FIRE_ENGINE_BETA_URL !== "" &&
|
||||
@ -20,6 +19,8 @@ export async function getLinksFromSitemap(
|
||||
logger: Logger,
|
||||
crawlId: string,
|
||||
sitemapsHit: Set<string>,
|
||||
abort?: AbortSignal,
|
||||
mock?: string,
|
||||
): Promise<number> {
|
||||
if (sitemapsHit.size >= 20) {
|
||||
return 0;
|
||||
@ -38,13 +39,14 @@ export async function getLinksFromSitemap(
|
||||
const response = await scrapeURL(
|
||||
"sitemap;" + crawlId,
|
||||
sitemapUrl,
|
||||
scrapeOptions.parse({ formats: ["rawHtml"] }),
|
||||
scrapeOptions.parse({ formats: ["rawHtml"], useMock: mock }),
|
||||
{
|
||||
forceEngine: [
|
||||
"fetch",
|
||||
...((mode === "fire-engine" && useFireEngine) ? ["fire-engine;tlsclient" as const] : []),
|
||||
],
|
||||
v0DisableJsDom: true
|
||||
v0DisableJsDom: true,
|
||||
abort,
|
||||
},
|
||||
);
|
||||
|
||||
@ -69,14 +71,18 @@ export async function getLinksFromSitemap(
|
||||
return 0;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Request failed for sitemap fetch`, {
|
||||
method: "getLinksFromSitemap",
|
||||
mode,
|
||||
sitemapUrl,
|
||||
error,
|
||||
});
|
||||
if (error instanceof TimeoutSignal) {
|
||||
throw error;
|
||||
} else {
|
||||
logger.error(`Request failed for sitemap fetch`, {
|
||||
method: "getLinksFromSitemap",
|
||||
mode,
|
||||
sitemapUrl,
|
||||
error,
|
||||
});
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const parsed = await parseStringPromise(content);
|
||||
@ -90,7 +96,7 @@ export async function getLinksFromSitemap(
|
||||
.map((sitemap) => sitemap.loc[0].trim());
|
||||
|
||||
const sitemapPromises: Promise<number>[] = sitemapUrls.map((sitemapUrl) =>
|
||||
getLinksFromSitemap({ sitemapUrl, urlsHandler, mode }, logger, crawlId, sitemapsHit),
|
||||
getLinksFromSitemap({ sitemapUrl, urlsHandler, mode }, logger, crawlId, sitemapsHit, abort, mock),
|
||||
);
|
||||
|
||||
const results = await Promise.all(sitemapPromises);
|
||||
@ -114,6 +120,8 @@ export async function getLinksFromSitemap(
|
||||
logger,
|
||||
crawlId,
|
||||
sitemapsHit,
|
||||
abort,
|
||||
mock,
|
||||
),
|
||||
);
|
||||
count += (await Promise.all(sitemapPromises)).reduce(
|
||||
@ -151,56 +159,3 @@ export async function getLinksFromSitemap(
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
export const fetchSitemapData = async (
|
||||
url: string,
|
||||
timeout?: number,
|
||||
): Promise<SitemapEntry[] | null> => {
|
||||
const sitemapUrl = url.endsWith("/sitemap.xml") ? url : `${url}/sitemap.xml`;
|
||||
try {
|
||||
const fetchResponse = await scrapeURL(
|
||||
"sitemap",
|
||||
sitemapUrl,
|
||||
scrapeOptions.parse({
|
||||
formats: ["rawHtml"],
|
||||
timeout: timeout || axiosTimeout,
|
||||
}),
|
||||
{ forceEngine: "fetch" },
|
||||
);
|
||||
|
||||
if (
|
||||
fetchResponse.success &&
|
||||
fetchResponse.document.metadata.statusCode >= 200 &&
|
||||
fetchResponse.document.metadata.statusCode < 300
|
||||
) {
|
||||
const xml = fetchResponse.document.rawHtml!;
|
||||
const parsedXml = await parseStringPromise(xml);
|
||||
|
||||
const sitemapData: SitemapEntry[] = [];
|
||||
if (parsedXml.urlset && parsedXml.urlset.url) {
|
||||
for (const urlElement of parsedXml.urlset.url) {
|
||||
const sitemapEntry: SitemapEntry = { loc: urlElement.loc[0] };
|
||||
if (urlElement.lastmod) sitemapEntry.lastmod = urlElement.lastmod[0];
|
||||
if (urlElement.changefreq)
|
||||
sitemapEntry.changefreq = urlElement.changefreq[0];
|
||||
if (urlElement.priority)
|
||||
sitemapEntry.priority = Number(urlElement.priority[0]);
|
||||
sitemapData.push(sitemapEntry);
|
||||
}
|
||||
}
|
||||
|
||||
return sitemapData;
|
||||
}
|
||||
return null;
|
||||
} catch (error) {
|
||||
// Error handling for failed sitemap fetch
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
export interface SitemapEntry {
|
||||
loc: string;
|
||||
lastmod?: string;
|
||||
changefreq?: string;
|
||||
priority?: number;
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ import {
|
||||
InsecureConnectionError,
|
||||
makeSecureDispatcher,
|
||||
} from "../utils/safeFetch";
|
||||
import { MockState, saveMock } from "../../lib/mock";
|
||||
|
||||
export async function scrapeURLWithFetch(
|
||||
meta: Meta,
|
||||
@ -14,44 +15,95 @@ export async function scrapeURLWithFetch(
|
||||
): Promise<EngineScrapeResult> {
|
||||
const timeout = timeToRun ?? 300000;
|
||||
|
||||
let response: undici.Response;
|
||||
try {
|
||||
response = await Promise.race([
|
||||
undici.fetch(meta.url, {
|
||||
dispatcher: await makeSecureDispatcher(meta.url),
|
||||
redirect: "follow",
|
||||
headers: meta.options.headers,
|
||||
}),
|
||||
(async () => {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(() => resolve(null), timeout),
|
||||
const mockOptions = {
|
||||
url: meta.url,
|
||||
|
||||
// irrelevant
|
||||
method: "GET",
|
||||
ignoreResponse: false,
|
||||
ignoreFailure: false,
|
||||
tryCount: 1,
|
||||
};
|
||||
|
||||
let response: {
|
||||
url: string;
|
||||
body: string,
|
||||
status: number;
|
||||
headers: any;
|
||||
};
|
||||
|
||||
if (meta.mock !== null) {
|
||||
const makeRequestTypeId = (
|
||||
request: MockState["requests"][number]["options"],
|
||||
) => request.url + ";" + request.method;
|
||||
|
||||
const thisId = makeRequestTypeId(mockOptions);
|
||||
const matchingMocks = meta.mock.requests
|
||||
.filter((x) => makeRequestTypeId(x.options) === thisId)
|
||||
.sort((a, b) => a.time - b.time);
|
||||
const nextI = meta.mock.tracker[thisId] ?? 0;
|
||||
meta.mock.tracker[thisId] = nextI + 1;
|
||||
|
||||
if (!matchingMocks[nextI]) {
|
||||
throw new Error("Failed to mock request -- no mock targets found.");
|
||||
}
|
||||
|
||||
response = {
|
||||
...matchingMocks[nextI].result,
|
||||
};
|
||||
} else {
|
||||
try {
|
||||
const x = await Promise.race([
|
||||
undici.fetch(meta.url, {
|
||||
dispatcher: await makeSecureDispatcher(meta.url),
|
||||
redirect: "follow",
|
||||
headers: meta.options.headers,
|
||||
signal: meta.internalOptions.abort,
|
||||
}),
|
||||
(async () => {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(() => resolve(null), timeout),
|
||||
);
|
||||
throw new TimeoutError(
|
||||
"Fetch was unable to scrape the page before timing out",
|
||||
{ cause: { timeout } },
|
||||
);
|
||||
})(),
|
||||
]);
|
||||
|
||||
response = {
|
||||
url: x.url,
|
||||
body: await x.text(),
|
||||
status: x.status,
|
||||
headers: [...x.headers],
|
||||
};
|
||||
|
||||
if (meta.mock === null) {
|
||||
await saveMock(
|
||||
mockOptions,
|
||||
response,
|
||||
);
|
||||
throw new TimeoutError(
|
||||
"Fetch was unable to scrape the page before timing out",
|
||||
{ cause: { timeout } },
|
||||
);
|
||||
})(),
|
||||
]);
|
||||
} catch (error) {
|
||||
if (
|
||||
error instanceof TypeError &&
|
||||
error.cause instanceof InsecureConnectionError
|
||||
) {
|
||||
throw error.cause;
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
} catch (error) {
|
||||
if (
|
||||
error instanceof TypeError &&
|
||||
error.cause instanceof InsecureConnectionError
|
||||
) {
|
||||
throw error.cause;
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
specialtyScrapeCheck(
|
||||
await specialtyScrapeCheck(
|
||||
meta.logger.child({ method: "scrapeURLWithFetch/specialtyScrapeCheck" }),
|
||||
Object.fromEntries(response.headers as any),
|
||||
);
|
||||
|
||||
return {
|
||||
url: response.url,
|
||||
html: await response.text(),
|
||||
html: response.body,
|
||||
statusCode: response.status,
|
||||
// TODO: error?
|
||||
};
|
||||
}
|
||||
|
@ -10,6 +10,7 @@ import {
|
||||
UnsupportedFileError,
|
||||
} from "../../error";
|
||||
import { MockState } from "../../lib/mock";
|
||||
import { fireEngineURL } from "./scrape";
|
||||
|
||||
const successSchema = z.object({
|
||||
jobId: z.string(),
|
||||
@ -84,9 +85,8 @@ export async function fireEngineCheckStatus(
|
||||
logger: Logger,
|
||||
jobId: string,
|
||||
mock: MockState | null,
|
||||
abort?: AbortSignal,
|
||||
): Promise<FireEngineCheckStatusSuccess> {
|
||||
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
|
||||
|
||||
const status = await Sentry.startSpan(
|
||||
{
|
||||
name: "fire-engine: Check status",
|
||||
|
@ -3,14 +3,13 @@ import * as Sentry from "@sentry/node";
|
||||
|
||||
import { robustFetch } from "../../lib/fetch";
|
||||
import { MockState } from "../../lib/mock";
|
||||
import { fireEngineURL } from "./scrape";
|
||||
|
||||
export async function fireEngineDelete(
|
||||
logger: Logger,
|
||||
jobId: string,
|
||||
mock: MockState | null,
|
||||
) {
|
||||
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
|
||||
|
||||
await Sentry.startSpan(
|
||||
{
|
||||
name: "fire-engine: Delete scrape",
|
||||
|
@ -24,8 +24,9 @@ import * as Sentry from "@sentry/node";
|
||||
import { Action } from "../../../../lib/entities";
|
||||
import { specialtyScrapeCheck } from "../utils/specialtyHandler";
|
||||
import { fireEngineDelete } from "./delete";
|
||||
import { MockState, saveMock } from "../../lib/mock";
|
||||
import { MockState } from "../../lib/mock";
|
||||
import { getInnerJSON } from "../../../../lib/html-transformer";
|
||||
import { TimeoutSignal } from "../../../../controllers/v1/types";
|
||||
|
||||
// This function does not take `Meta` on purpose. It may not access any
|
||||
// meta values to construct the request -- that must be done by the
|
||||
@ -40,6 +41,7 @@ async function performFireEngineScrape<
|
||||
request: FireEngineScrapeRequestCommon & Engine,
|
||||
timeout: number,
|
||||
mock: MockState | null,
|
||||
abort?: AbortSignal,
|
||||
): Promise<FireEngineCheckStatusSuccess> {
|
||||
const scrape = await fireEngineScrape(
|
||||
logger.child({ method: "fireEngineScrape" }),
|
||||
@ -84,6 +86,7 @@ async function performFireEngineScrape<
|
||||
logger.child({ method: "fireEngineCheckStatus" }),
|
||||
scrape.jobId,
|
||||
mock,
|
||||
abort,
|
||||
);
|
||||
} catch (error) {
|
||||
if (error instanceof StillProcessingError) {
|
||||
@ -107,6 +110,16 @@ async function performFireEngineScrape<
|
||||
jobId: scrape.jobId,
|
||||
});
|
||||
throw error;
|
||||
} else if (error instanceof TimeoutSignal) {
|
||||
fireEngineDelete(
|
||||
logger.child({
|
||||
method: "performFireEngineScrape/fireEngineDelete",
|
||||
afterError: error,
|
||||
}),
|
||||
scrape.jobId,
|
||||
mock,
|
||||
);
|
||||
throw error;
|
||||
} else {
|
||||
Sentry.captureException(error);
|
||||
errors.push(error);
|
||||
@ -120,11 +133,12 @@ async function performFireEngineScrape<
|
||||
await new Promise((resolve) => setTimeout(resolve, 250));
|
||||
}
|
||||
|
||||
specialtyScrapeCheck(
|
||||
await specialtyScrapeCheck(
|
||||
logger.child({
|
||||
method: "performFireEngineScrape/specialtyScrapeCheck",
|
||||
}),
|
||||
status.responseHeaders,
|
||||
status,
|
||||
);
|
||||
|
||||
const contentType = (Object.entries(status.responseHeaders ?? {}).find(
|
||||
@ -219,6 +233,7 @@ export async function scrapeURLWithFireEngineChromeCDP(
|
||||
request,
|
||||
timeout,
|
||||
meta.mock,
|
||||
meta.internalOptions.abort,
|
||||
);
|
||||
|
||||
if (
|
||||
@ -298,6 +313,7 @@ export async function scrapeURLWithFireEnginePlaywright(
|
||||
request,
|
||||
timeout,
|
||||
meta.mock,
|
||||
meta.internalOptions.abort,
|
||||
);
|
||||
|
||||
if (!response.url) {
|
||||
@ -353,6 +369,7 @@ export async function scrapeURLWithFireEngineTLSClient(
|
||||
request,
|
||||
timeout,
|
||||
meta.mock,
|
||||
meta.internalOptions.abort,
|
||||
);
|
||||
|
||||
if (!response.url) {
|
||||
|
@ -65,6 +65,8 @@ const schema = z.object({
|
||||
processing: z.boolean(),
|
||||
});
|
||||
|
||||
export const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL ?? "<mock-fire-engine-url>";
|
||||
|
||||
export async function fireEngineScrape<
|
||||
Engine extends
|
||||
| FireEngineScrapeRequestChromeCDP
|
||||
@ -74,11 +76,8 @@ export async function fireEngineScrape<
|
||||
logger: Logger,
|
||||
request: FireEngineScrapeRequestCommon & Engine,
|
||||
mock: MockState | null,
|
||||
abort?: AbortSignal,
|
||||
): Promise<z.infer<typeof schema>> {
|
||||
const fireEngineURL = process.env.FIRE_ENGINE_BETA_URL!;
|
||||
|
||||
// TODO: retries
|
||||
|
||||
const scrapeRequest = await Sentry.startSpan(
|
||||
{
|
||||
name: "fire-engine: Scrape",
|
||||
@ -103,6 +102,7 @@ export async function fireEngineScrape<
|
||||
schema,
|
||||
tryCount: 3,
|
||||
mock,
|
||||
abort,
|
||||
});
|
||||
},
|
||||
);
|
||||
|
@ -310,7 +310,12 @@ export function buildFallbackList(meta: Meta): {
|
||||
engine: Engine;
|
||||
unsupportedFeatures: Set<FeatureFlag>;
|
||||
}[] {
|
||||
const _engines = [...engines];
|
||||
const _engines: Engine[] = [
|
||||
...engines,
|
||||
|
||||
// enable fire-engine in self-hosted testing environment when mocks are supplied
|
||||
...((!useFireEngine && meta.mock !== null) ? ["fire-engine;chrome-cdp", "fire-engine;playwright", "fire-engine;tlsclient"] as Engine[] : [])
|
||||
];
|
||||
|
||||
if (meta.internalOptions.useCache !== true) {
|
||||
const cacheIndex = _engines.indexOf("cache");
|
||||
|
@ -7,9 +7,10 @@ import * as Sentry from "@sentry/node";
|
||||
import escapeHtml from "escape-html";
|
||||
import PdfParse from "pdf-parse";
|
||||
import { downloadFile, fetchFileToBuffer } from "../utils/downloadFile";
|
||||
import { RemoveFeatureError, UnsupportedFileError } from "../../error";
|
||||
import { PDFAntibotError, RemoveFeatureError, UnsupportedFileError } from "../../error";
|
||||
import { readFile, unlink } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { Response } from "undici";
|
||||
|
||||
type PDFProcessorResult = { html: string; markdown?: string };
|
||||
|
||||
@ -75,22 +76,49 @@ export async function scrapePDF(
|
||||
timeToRun: number | undefined,
|
||||
): Promise<EngineScrapeResult> {
|
||||
if (!meta.options.parsePDF) {
|
||||
const file = await fetchFileToBuffer(meta.url, {
|
||||
headers: meta.options.headers,
|
||||
});
|
||||
const content = file.buffer.toString("base64");
|
||||
return {
|
||||
url: file.response.url,
|
||||
statusCode: file.response.status,
|
||||
if (meta.pdfPrefetch !== undefined && meta.pdfPrefetch !== null) {
|
||||
const content = (await readFile(meta.pdfPrefetch.filePath)).toString("base64");
|
||||
return {
|
||||
url: meta.pdfPrefetch.url ?? meta.url,
|
||||
statusCode: meta.pdfPrefetch.status,
|
||||
|
||||
html: content,
|
||||
markdown: content,
|
||||
};
|
||||
html: content,
|
||||
markdown: content,
|
||||
};
|
||||
} else {
|
||||
const file = await fetchFileToBuffer(meta.url, {
|
||||
headers: meta.options.headers,
|
||||
});
|
||||
|
||||
const ct = file.response.headers.get("Content-Type");
|
||||
if (ct && !ct.includes("application/pdf")) { // if downloaded file wasn't a PDF
|
||||
throw new PDFAntibotError();
|
||||
}
|
||||
|
||||
const content = file.buffer.toString("base64");
|
||||
return {
|
||||
url: file.response.url,
|
||||
statusCode: file.response.status,
|
||||
|
||||
html: content,
|
||||
markdown: content,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const { response, tempFilePath } = await downloadFile(meta.id, meta.url, {
|
||||
headers: meta.options.headers,
|
||||
});
|
||||
const { response, tempFilePath } = (meta.pdfPrefetch !== undefined && meta.pdfPrefetch !== null)
|
||||
? { response: meta.pdfPrefetch, tempFilePath: meta.pdfPrefetch.filePath }
|
||||
: await downloadFile(meta.id, meta.url, {
|
||||
headers: meta.options.headers,
|
||||
});
|
||||
|
||||
if ((response as any).headers) { // if downloadFile was used
|
||||
const r: Response = response as any;
|
||||
const ct = r.headers.get("Content-Type");
|
||||
if (ct && !ct.includes("application/pdf")) { // if downloaded file wasn't a PDF
|
||||
throw new PDFAntibotError();
|
||||
}
|
||||
}
|
||||
|
||||
let result: PDFProcessorResult | null = null;
|
||||
|
||||
@ -142,7 +170,7 @@ export async function scrapePDF(
|
||||
await unlink(tempFilePath);
|
||||
|
||||
return {
|
||||
url: response.url,
|
||||
url: response.url ?? meta.url,
|
||||
statusCode: response.status,
|
||||
html: result?.html ?? "",
|
||||
markdown: result?.markdown ?? "",
|
||||
|
@ -72,7 +72,7 @@ export function scrapeURLWithScrapingBee(
|
||||
});
|
||||
}
|
||||
|
||||
specialtyScrapeCheck(
|
||||
await specialtyScrapeCheck(
|
||||
meta.logger.child({
|
||||
method: "scrapeURLWithScrapingBee/specialtyScrapeCheck",
|
||||
}),
|
||||
|
@ -43,14 +43,24 @@ export function makeSecureDispatcher(
|
||||
url: string,
|
||||
options?: undici.Agent.Options,
|
||||
) {
|
||||
const agent = new undici.Agent({
|
||||
const agentOpts: undici.Agent.Options = {
|
||||
connect: {
|
||||
rejectUnauthorized: false, // bypass SSL failures -- this is fine
|
||||
// lookup: secureLookup,
|
||||
},
|
||||
maxRedirections: 5000,
|
||||
...options,
|
||||
});
|
||||
};
|
||||
|
||||
const agent = process.env.PROXY_SERVER
|
||||
? new undici.ProxyAgent({
|
||||
uri: process.env.PROXY_SERVER.includes("://") ? process.env.PROXY_SERVER : ("http://" + process.env.PROXY_SERVER),
|
||||
token: process.env.PROXY_USERNAME
|
||||
? `Basic ${Buffer.from(process.env.PROXY_USERNAME + ":" + (process.env.PROXY_PASSWORD ?? "")).toString("base64")}`
|
||||
: undefined,
|
||||
...agentOpts,
|
||||
})
|
||||
: new undici.Agent(agentOpts);
|
||||
|
||||
agent.on("connect", (_, targets) => {
|
||||
const client: undici.Client = targets.slice(-1)[0] as undici.Client;
|
||||
|
@ -1,9 +1,30 @@
|
||||
import { Logger } from "winston";
|
||||
import { AddFeatureError } from "../../error";
|
||||
import { FireEngineCheckStatusSuccess } from "../fire-engine/checkStatus";
|
||||
import path from "path";
|
||||
import os from "os";
|
||||
import { writeFile } from "fs/promises";
|
||||
import { Meta } from "../..";
|
||||
|
||||
export function specialtyScrapeCheck(
|
||||
async function feResToPdfPrefetch(feRes: FireEngineCheckStatusSuccess | undefined): Promise<Meta["pdfPrefetch"]> {
|
||||
if (!feRes?.file) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const filePath = path.join(os.tmpdir(), `tempFile-${crypto.randomUUID()}.pdf`);
|
||||
await writeFile(filePath, Buffer.from(feRes.file.content, "base64"))
|
||||
|
||||
return {
|
||||
status: feRes.pageStatusCode,
|
||||
url: feRes.url,
|
||||
filePath,
|
||||
};
|
||||
}
|
||||
|
||||
export async function specialtyScrapeCheck(
|
||||
logger: Logger,
|
||||
headers: Record<string, string> | undefined,
|
||||
feRes?: FireEngineCheckStatusSuccess,
|
||||
) {
|
||||
const contentType = (Object.entries(headers ?? {}).find(
|
||||
(x) => x[0].toLowerCase() === "content-type",
|
||||
@ -18,7 +39,7 @@ export function specialtyScrapeCheck(
|
||||
contentType.startsWith("application/pdf;")
|
||||
) {
|
||||
// .pdf
|
||||
throw new AddFeatureError(["pdf"]);
|
||||
throw new AddFeatureError(["pdf"], await feResToPdfPrefetch(feRes));
|
||||
} else if (
|
||||
contentType ===
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document" ||
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { EngineResultsTracker } from ".";
|
||||
import { EngineResultsTracker, Meta } from ".";
|
||||
import { Engine, FeatureFlag } from "./engines";
|
||||
|
||||
export class EngineError extends Error {
|
||||
@ -28,10 +28,12 @@ export class NoEnginesLeftError extends Error {
|
||||
|
||||
export class AddFeatureError extends Error {
|
||||
public featureFlags: FeatureFlag[];
|
||||
public pdfPrefetch: Meta["pdfPrefetch"];
|
||||
|
||||
constructor(featureFlags: FeatureFlag[]) {
|
||||
constructor(featureFlags: FeatureFlag[], pdfPrefetch?: Meta["pdfPrefetch"]) {
|
||||
super("New feature flags have been discovered: " + featureFlags.join(", "));
|
||||
this.featureFlags = featureFlags;
|
||||
this.pdfPrefetch = pdfPrefetch;
|
||||
}
|
||||
}
|
||||
|
||||
@ -72,3 +74,9 @@ export class UnsupportedFileError extends Error {
|
||||
this.reason = reason;
|
||||
}
|
||||
}
|
||||
|
||||
export class PDFAntibotError extends Error {
|
||||
constructor() {
|
||||
super("PDF scrape was prevented by anti-bot")
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
import { Logger } from "winston";
|
||||
import * as Sentry from "@sentry/node";
|
||||
|
||||
import { Document, ScrapeOptions } from "../../controllers/v1/types";
|
||||
import { Document, ScrapeOptions, TimeoutSignal } from "../../controllers/v1/types";
|
||||
import { logger as _logger } from "../../lib/logger";
|
||||
import {
|
||||
buildFallbackList,
|
||||
@ -16,6 +16,7 @@ import {
|
||||
AddFeatureError,
|
||||
EngineError,
|
||||
NoEnginesLeftError,
|
||||
PDFAntibotError,
|
||||
RemoveFeatureError,
|
||||
SiteError,
|
||||
TimeoutError,
|
||||
@ -49,6 +50,11 @@ export type Meta = {
|
||||
logs: any[];
|
||||
featureFlags: Set<FeatureFlag>;
|
||||
mock: MockState | null;
|
||||
pdfPrefetch: {
|
||||
filePath: string;
|
||||
url?: string;
|
||||
status: number;
|
||||
} | null | undefined; // undefined: no prefetch yet, null: prefetch came back empty
|
||||
};
|
||||
|
||||
function buildFeatureFlags(
|
||||
@ -151,6 +157,7 @@ async function buildMetaObject(
|
||||
options.useMock !== undefined
|
||||
? await loadMock(options.useMock, _logger)
|
||||
: null,
|
||||
pdfPrefetch: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@ -165,6 +172,7 @@ export type InternalOptions = {
|
||||
disableSmartWaitCache?: boolean; // Passed along to fire-engine
|
||||
isBackgroundIndex?: boolean;
|
||||
fromCache?: boolean; // Indicates if the document was retrieved from cache
|
||||
abort?: AbortSignal;
|
||||
};
|
||||
|
||||
export type EngineResultsTracker = {
|
||||
@ -222,6 +230,7 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
|
||||
: undefined;
|
||||
|
||||
for (const { engine, unsupportedFeatures } of fallbackList) {
|
||||
meta.internalOptions.abort?.throwIfAborted();
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
meta.logger.info("Scraping via " + engine + "...");
|
||||
@ -307,6 +316,10 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
|
||||
throw error;
|
||||
} else if (error instanceof UnsupportedFileError) {
|
||||
throw error;
|
||||
} else if (error instanceof PDFAntibotError) {
|
||||
throw error;
|
||||
} else if (error instanceof TimeoutSignal) {
|
||||
throw error;
|
||||
} else {
|
||||
Sentry.captureException(error);
|
||||
meta.logger.warn(
|
||||
@ -390,6 +403,9 @@ export async function scrapeURL(
|
||||
meta.featureFlags = new Set(
|
||||
[...meta.featureFlags].concat(error.featureFlags),
|
||||
);
|
||||
if (error.pdfPrefetch) {
|
||||
meta.pdfPrefetch = error.pdfPrefetch;
|
||||
}
|
||||
} else if (
|
||||
error instanceof RemoveFeatureError &&
|
||||
meta.internalOptions.forceEngine === undefined
|
||||
@ -404,6 +420,21 @@ export async function scrapeURL(
|
||||
(x) => !error.featureFlags.includes(x),
|
||||
),
|
||||
);
|
||||
} else if (
|
||||
error instanceof PDFAntibotError &&
|
||||
meta.internalOptions.forceEngine === undefined
|
||||
) {
|
||||
if (meta.pdfPrefetch !== undefined) {
|
||||
meta.logger.error("PDF was prefetched and still blocked by antibot, failing");
|
||||
throw error;
|
||||
} else {
|
||||
meta.logger.debug("PDF was blocked by anti-bot, prefetching with chrome-cdp");
|
||||
meta.featureFlags = new Set(
|
||||
[...meta.featureFlags].filter(
|
||||
(x) => x !== "pdf",
|
||||
),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
@ -433,6 +464,8 @@ export async function scrapeURL(
|
||||
meta.logger.warn("scrapeURL: Tried to scrape unsupported file", {
|
||||
error,
|
||||
});
|
||||
} else if (error instanceof TimeoutSignal) {
|
||||
throw error;
|
||||
} else {
|
||||
Sentry.captureException(error);
|
||||
meta.logger.error("scrapeURL: Unexpected error happened", { error });
|
||||
|
@ -2,6 +2,8 @@ import { Logger } from "winston";
|
||||
import { z, ZodError } from "zod";
|
||||
import * as Sentry from "@sentry/node";
|
||||
import { MockState, saveMock } from "./mock";
|
||||
import { TimeoutSignal } from "../../../controllers/v1/types";
|
||||
import { fireEngineURL } from "../engines/fire-engine/scrape";
|
||||
|
||||
export type RobustFetchParams<Schema extends z.Schema<any>> = {
|
||||
url: string;
|
||||
@ -17,6 +19,7 @@ export type RobustFetchParams<Schema extends z.Schema<any>> = {
|
||||
tryCount?: number;
|
||||
tryCooldown?: number;
|
||||
mock: MockState | null;
|
||||
abort?: AbortSignal;
|
||||
};
|
||||
|
||||
export async function robustFetch<
|
||||
@ -35,7 +38,10 @@ export async function robustFetch<
|
||||
tryCount = 1,
|
||||
tryCooldown,
|
||||
mock,
|
||||
abort,
|
||||
}: RobustFetchParams<Schema>): Promise<Output> {
|
||||
abort?.throwIfAborted();
|
||||
|
||||
const params = {
|
||||
url,
|
||||
logger,
|
||||
@ -47,6 +53,7 @@ export async function robustFetch<
|
||||
ignoreFailure,
|
||||
tryCount,
|
||||
tryCooldown,
|
||||
abort,
|
||||
};
|
||||
|
||||
let response: {
|
||||
@ -70,6 +77,7 @@ export async function robustFetch<
|
||||
: {}),
|
||||
...(headers !== undefined ? headers : {}),
|
||||
},
|
||||
signal: abort,
|
||||
...(body instanceof FormData
|
||||
? {
|
||||
body,
|
||||
@ -81,7 +89,9 @@ export async function robustFetch<
|
||||
: {}),
|
||||
});
|
||||
} catch (error) {
|
||||
if (!ignoreFailure) {
|
||||
if (error instanceof TimeoutSignal) {
|
||||
throw error;
|
||||
} else if (!ignoreFailure) {
|
||||
Sentry.captureException(error);
|
||||
if (tryCount > 1) {
|
||||
logger.debug(
|
||||
@ -126,14 +136,13 @@ export async function robustFetch<
|
||||
const makeRequestTypeId = (
|
||||
request: (typeof mock)["requests"][number]["options"],
|
||||
) => {
|
||||
let trueUrl = (process.env.FIRE_ENGINE_BETA_URL && request.url.startsWith(process.env.FIRE_ENGINE_BETA_URL))
|
||||
? request.url.replace(process.env.FIRE_ENGINE_BETA_URL, "<fire-engine>")
|
||||
let trueUrl = request.url.startsWith(fireEngineURL)
|
||||
? request.url.replace(fireEngineURL, "<fire-engine>")
|
||||
: request.url;
|
||||
|
||||
let out = trueUrl + ";" + request.method;
|
||||
if (
|
||||
process.env.FIRE_ENGINE_BETA_URL &&
|
||||
(trueUrl.startsWith("<fire-engine>")) &&
|
||||
trueUrl.startsWith("<fire-engine>") &&
|
||||
request.method === "POST"
|
||||
) {
|
||||
out += "f-e;" + request.body?.engine + ";" + request.body?.url;
|
||||
|
@ -305,6 +305,7 @@ export async function performLLMExtract(
|
||||
document: Document,
|
||||
): Promise<Document> {
|
||||
if (meta.options.formats.includes("extract")) {
|
||||
meta.internalOptions.abort?.throwIfAborted();
|
||||
const { extract, warning } = await generateOpenAICompletions(
|
||||
meta.logger.child({
|
||||
method: "performLLMExtract/generateOpenAICompletions",
|
||||
|
@ -16,6 +16,7 @@ export async function fireEngineMap(
|
||||
numResults: number;
|
||||
page?: number;
|
||||
},
|
||||
abort?: AbortSignal,
|
||||
): Promise<SearchResult[]> {
|
||||
try {
|
||||
let data = JSON.stringify({
|
||||
@ -29,9 +30,7 @@ export async function fireEngineMap(
|
||||
});
|
||||
|
||||
if (!process.env.FIRE_ENGINE_BETA_URL) {
|
||||
console.warn(
|
||||
"(v1/map Beta) Results might differ from cloud offering currently.",
|
||||
);
|
||||
logger.warn("(v1/map Beta) Results might differ from cloud offering currently.");
|
||||
return [];
|
||||
}
|
||||
|
||||
@ -42,6 +41,7 @@ export async function fireEngineMap(
|
||||
"X-Disable-Cache": "true",
|
||||
},
|
||||
body: data,
|
||||
signal: abort,
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
|
@ -1,21 +1,18 @@
|
||||
import axios from "axios";
|
||||
import * as cheerio from "cheerio"; // TODO: rustify
|
||||
import { JSDOM } from 'jsdom';
|
||||
import * as querystring from "querystring";
|
||||
import { SearchResult } from "../../src/lib/entities";
|
||||
import { logger } from "../../src/lib/logger";
|
||||
import https from 'https';
|
||||
|
||||
const _useragent_list = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0",
|
||||
];
|
||||
const getRandomInt = (min: number, max: number): number => Math.floor(Math.random() * (max - min + 1)) + min;
|
||||
|
||||
function get_useragent(): string {
|
||||
return _useragent_list[Math.floor(Math.random() * _useragent_list.length)];
|
||||
export function get_useragent(): string {
|
||||
const lynx_version = `Lynx/${getRandomInt(2, 3)}.${getRandomInt(8, 9)}.${getRandomInt(0, 2)}`;
|
||||
const libwww_version = `libwww-FM/${getRandomInt(2, 3)}.${getRandomInt(13, 15)}`;
|
||||
const ssl_mm_version = `SSL-MM/${getRandomInt(1, 2)}.${getRandomInt(3, 5)}`;
|
||||
const openssl_version = `OpenSSL/${getRandomInt(1, 3)}.${getRandomInt(0, 4)}.${getRandomInt(0, 9)}`;
|
||||
return `${lynx_version} ${libwww_version} ${ssl_mm_version} ${openssl_version}`;
|
||||
}
|
||||
|
||||
async function _req(
|
||||
@ -31,9 +28,10 @@ async function _req(
|
||||
) {
|
||||
const params = {
|
||||
q: term,
|
||||
num: results, // Number of results to return
|
||||
num: results+2, // Number of results to return
|
||||
hl: lang,
|
||||
gl: country,
|
||||
safe: "active",
|
||||
start: start,
|
||||
};
|
||||
if (tbs) {
|
||||
@ -42,18 +40,25 @@ async function _req(
|
||||
if (filter) {
|
||||
params["filter"] = filter;
|
||||
}
|
||||
var agent = get_useragent();
|
||||
try {
|
||||
const resp = await axios.get("https://www.google.com/search", {
|
||||
headers: {
|
||||
"User-Agent": get_useragent(),
|
||||
"User-Agent": agent,
|
||||
"Accept": "*/*"
|
||||
},
|
||||
params: params,
|
||||
proxy: proxies,
|
||||
timeout: timeout,
|
||||
httpsAgent: new https.Agent({
|
||||
rejectUnauthorized: true
|
||||
}),
|
||||
withCredentials: true
|
||||
});
|
||||
return resp;
|
||||
} catch (error) {
|
||||
if (error.response && error.response.status === 429) {
|
||||
logger.warn("Google Search: Too many requests, try again later.", error.response);
|
||||
throw new Error("Google Search: Too many requests, try again later.");
|
||||
}
|
||||
throw error;
|
||||
@ -100,34 +105,42 @@ export async function googleSearch(
|
||||
tbs,
|
||||
filter,
|
||||
);
|
||||
const $ = cheerio.load(resp.data);
|
||||
const result_block = $("div.g");
|
||||
const dom = new JSDOM(resp.data);
|
||||
const document = dom.window.document;
|
||||
const result_block = document.querySelectorAll("div.ezO2md");
|
||||
let new_results = 0;
|
||||
let unique = true;
|
||||
let fetched_results = 0;
|
||||
|
||||
const fetched_links = new Set<string>();
|
||||
if (result_block.length === 0) {
|
||||
start += 1;
|
||||
attempts += 1;
|
||||
} else {
|
||||
attempts = 0; // Reset attempts if we have results
|
||||
attempts = 0;
|
||||
}
|
||||
result_block.each((index, element) => {
|
||||
const linkElement = $(element).find("a");
|
||||
const link =
|
||||
linkElement && linkElement.attr("href")
|
||||
? linkElement.attr("href")
|
||||
: null;
|
||||
const title = $(element).find("h3");
|
||||
const ogImage = $(element).find("img").eq(1).attr("src");
|
||||
const description_box = $(element).find(
|
||||
"div[style='-webkit-line-clamp:2']",
|
||||
);
|
||||
const answerBox = $(element).find(".mod").text();
|
||||
if (description_box) {
|
||||
const description = description_box.text();
|
||||
if (link && title && description) {
|
||||
start += 1;
|
||||
results.push(new SearchResult(link, title.text(), description));
|
||||
|
||||
for (const result of result_block) {
|
||||
const link_tag = result.querySelector("a[href]") as HTMLAnchorElement;
|
||||
const title_tag = link_tag ? link_tag.querySelector("span.CVA68e") : null;
|
||||
const description_tag = result.querySelector("span.FrIlee");
|
||||
|
||||
if (link_tag && title_tag && description_tag) {
|
||||
const link = decodeURIComponent(link_tag.href.split("&")[0].replace("/url?q=", ""));
|
||||
if (fetched_links.has(link) && unique) continue;
|
||||
fetched_links.add(link);
|
||||
const title = title_tag.textContent || "";
|
||||
const description = description_tag.textContent || "";
|
||||
fetched_results++;
|
||||
new_results++;
|
||||
if (link && title && description) {
|
||||
start += 1
|
||||
results.push(new SearchResult(link, title, description));
|
||||
}
|
||||
if (fetched_results >= num_results) break;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, sleep_interval * 1000),
|
||||
);
|
||||
|
@ -4,6 +4,7 @@ import { googleSearch } from "./googlesearch";
|
||||
import { fireEngineMap } from "./fireEngine";
|
||||
import { searchapi_search } from "./searchapi";
|
||||
import { serper_search } from "./serper";
|
||||
import { searxng_search } from "./searxng";
|
||||
|
||||
export async function search({
|
||||
query,
|
||||
@ -51,6 +52,16 @@ export async function search({
|
||||
location,
|
||||
});
|
||||
}
|
||||
if (process.env.SEARXNG_ENDPOINT) {
|
||||
return await searxng_search(query, {
|
||||
num_results,
|
||||
tbs,
|
||||
filter,
|
||||
lang,
|
||||
country,
|
||||
location,
|
||||
});
|
||||
}
|
||||
return await googleSearch(
|
||||
query,
|
||||
advanced,
|
||||
@ -64,7 +75,7 @@ export async function search({
|
||||
timeout,
|
||||
);
|
||||
} catch (error) {
|
||||
logger.error(`Error in search function: ${error}`);
|
||||
logger.error(`Error in search function`, { error });
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
64
apps/api/src/search/searxng.ts
Normal file
64
apps/api/src/search/searxng.ts
Normal file
@ -0,0 +1,64 @@
|
||||
import axios from "axios";
|
||||
import dotenv from "dotenv";
|
||||
import { SearchResult } from "../../src/lib/entities";
|
||||
import { logger } from "../lib/logger"
|
||||
|
||||
dotenv.config();
|
||||
|
||||
interface SearchOptions {
|
||||
tbs?: string;
|
||||
filter?: string;
|
||||
lang?: string;
|
||||
country?: string;
|
||||
location?: string;
|
||||
num_results: number;
|
||||
page?: number;
|
||||
}
|
||||
|
||||
export async function searxng_search(
|
||||
q: string,
|
||||
options: SearchOptions,
|
||||
): Promise<SearchResult[]> {
|
||||
const params = {
|
||||
q: q,
|
||||
language: options.lang,
|
||||
// gl: options.country, //not possible with SearXNG
|
||||
// location: options.location, //not possible with SearXNG
|
||||
// num: options.num_results, //not possible with SearXNG
|
||||
engines: process.env.SEARXNG_ENGINES || "",
|
||||
categories: process.env.SEARXNG_CATEGORIES || "general",
|
||||
pageno: options.page ?? 1,
|
||||
format: "json"
|
||||
};
|
||||
|
||||
const url = process.env.SEARXNG_ENDPOINT!;
|
||||
// Remove trailing slash if it exists
|
||||
const cleanedUrl = url.endsWith('/') ? url.slice(0, -1) : url;
|
||||
|
||||
// Concatenate "/search" to the cleaned URL
|
||||
const finalUrl = cleanedUrl + "/search";
|
||||
|
||||
try {
|
||||
const response = await axios.get(finalUrl, {
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
params: params,
|
||||
});
|
||||
|
||||
const data = response.data;
|
||||
|
||||
if (data && Array.isArray(data.results)) {
|
||||
return data.results.map((a: any) => ({
|
||||
url: a.url,
|
||||
title: a.title,
|
||||
description: a.content,
|
||||
}));
|
||||
} else {
|
||||
return [];
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`There was an error searching for content`, { error });
|
||||
return [];
|
||||
}
|
||||
}
|
@ -1,10 +1,8 @@
|
||||
import { Job, JobsOptions } from "bullmq";
|
||||
import { getScrapeQueue } from "./queue-service";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { NotificationType, PlanType, WebScraperOptions } from "../types";
|
||||
import { PlanType, WebScraperOptions } from "../types";
|
||||
import * as Sentry from "@sentry/node";
|
||||
import {
|
||||
calculateJobTimeToRun,
|
||||
cleanOldConcurrencyLimitEntries,
|
||||
getConcurrencyLimitActiveJobs,
|
||||
getConcurrencyQueueJobsCount,
|
||||
@ -13,7 +11,6 @@ import {
|
||||
} from "../lib/concurrency-limit";
|
||||
import { logger } from "../lib/logger";
|
||||
import { getConcurrencyLimitMax } from "./rate-limiter";
|
||||
import { sendNotificationWithCustomDays } from "./notification/email_notification";
|
||||
|
||||
async function _addScrapeJobToConcurrencyQueue(
|
||||
webScraperOptions: any,
|
||||
@ -44,15 +41,7 @@ export async function _addScrapeJobToBullMQ(
|
||||
webScraperOptions.team_id &&
|
||||
webScraperOptions.plan
|
||||
) {
|
||||
await pushConcurrencyLimitActiveJob(webScraperOptions.team_id, jobId, calculateJobTimeToRun({
|
||||
id: jobId,
|
||||
opts: {
|
||||
...options,
|
||||
priority: jobPriority,
|
||||
jobId,
|
||||
},
|
||||
data: webScraperOptions,
|
||||
}));
|
||||
await pushConcurrencyLimitActiveJob(webScraperOptions.team_id, jobId, 60 * 1000); // 60s default timeout
|
||||
}
|
||||
|
||||
await getScrapeQueue().add(jobId, webScraperOptions, {
|
||||
|
@ -2,6 +2,8 @@ import { Queue } from "bullmq";
|
||||
import { logger } from "../lib/logger";
|
||||
import IORedis from "ioredis";
|
||||
|
||||
export type QueueFunction = () => Queue<any, any, string, any, any, string>;
|
||||
|
||||
let scrapeQueue: Queue;
|
||||
let extractQueue: Queue;
|
||||
let loggingQueue: Queue;
|
||||
|
@ -52,7 +52,6 @@ import { configDotenv } from "dotenv";
|
||||
import { scrapeOptions } from "../controllers/v1/types";
|
||||
import { getRateLimiterPoints } from "./rate-limiter";
|
||||
import {
|
||||
calculateJobTimeToRun,
|
||||
cleanOldConcurrencyLimitEntries,
|
||||
pushConcurrencyLimitActiveJob,
|
||||
removeConcurrencyLimitActiveJob,
|
||||
@ -247,6 +246,11 @@ const processJobInternal = async (token: string, job: Job & { id: string }) => {
|
||||
extendInterval: jobLockExtendInterval,
|
||||
extensionTime: jobLockExtensionTime,
|
||||
});
|
||||
|
||||
if (job.data?.mode !== "kickoff" && job.data?.team_id) {
|
||||
await pushConcurrencyLimitActiveJob(job.data.team_id, job.id, 60 * 1000); // 60s lock renew, just like in the queue
|
||||
}
|
||||
|
||||
await job.extendLock(token, jobLockExtensionTime);
|
||||
}, jobLockExtendInterval);
|
||||
|
||||
@ -597,7 +601,7 @@ const workerFun = async (
|
||||
// we are 1 under the limit, assuming the job insertion logic never over-inserts. - MG
|
||||
const nextJob = await takeConcurrencyLimitedJob(job.data.team_id);
|
||||
if (nextJob !== null) {
|
||||
await pushConcurrencyLimitActiveJob(job.data.team_id, nextJob.id, calculateJobTimeToRun(nextJob));
|
||||
await pushConcurrencyLimitActiveJob(job.data.team_id, nextJob.id, 60 * 1000); // 60s initial timeout
|
||||
|
||||
await queue.add(
|
||||
nextJob.id,
|
||||
|
@ -535,7 +535,7 @@ export default class FirecrawlApp {
|
||||
const response: AxiosResponse = await axios.post(
|
||||
this.apiUrl + `/v1/scrape`,
|
||||
jsonData,
|
||||
{ headers }
|
||||
{ headers, timeout: params?.timeout !== undefined ? (params.timeout + 5000) : undefined },
|
||||
);
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
@ -1262,7 +1262,7 @@ export default class FirecrawlApp {
|
||||
data: any,
|
||||
headers: AxiosRequestHeaders
|
||||
): Promise<AxiosResponse> {
|
||||
return axios.post(url, data, { headers });
|
||||
return axios.post(url, data, { headers, timeout: (data?.timeout ? (data.timeout + 5000) : undefined) });
|
||||
}
|
||||
|
||||
/**
|
||||
|
3
apps/playwright-service-ts/.dockerignore
Normal file
3
apps/playwright-service-ts/.dockerignore
Normal file
@ -0,0 +1,3 @@
|
||||
/node_modules/
|
||||
/dist/
|
||||
.env
|
@ -1,6 +1,6 @@
|
||||
import express, { Request, Response } from 'express';
|
||||
import bodyParser from 'body-parser';
|
||||
import { chromium, Browser, BrowserContext, Route, Request as PlaywrightRequest } from 'playwright';
|
||||
import { chromium, Browser, BrowserContext, Route, Request as PlaywrightRequest, Page } from 'playwright';
|
||||
import dotenv from 'dotenv';
|
||||
import UserAgent from 'user-agents';
|
||||
import { getError } from './helpers/get_error';
|
||||
@ -119,7 +119,7 @@ const isValidUrl = (urlString: string): boolean => {
|
||||
}
|
||||
};
|
||||
|
||||
const scrapePage = async (page: any, url: string, waitUntil: 'load' | 'networkidle', waitAfterLoad: number, timeout: number, checkSelector: string | undefined) => {
|
||||
const scrapePage = async (page: Page, url: string, waitUntil: 'load' | 'networkidle', waitAfterLoad: number, timeout: number, checkSelector: string | undefined) => {
|
||||
console.log(`Navigating to ${url} with waitUntil: ${waitUntil} and timeout: ${timeout}ms`);
|
||||
const response = await page.goto(url, { waitUntil, timeout });
|
||||
|
||||
@ -135,9 +135,19 @@ const scrapePage = async (page: any, url: string, waitUntil: 'load' | 'networkid
|
||||
}
|
||||
}
|
||||
|
||||
let headers = null, content = await page.content();
|
||||
if (response) {
|
||||
headers = await response.allHeaders();
|
||||
const ct = Object.entries(headers).find(x => x[0].toLowerCase() === "content-type");
|
||||
if (ct && (ct[1].includes("application/json") || ct[1].includes("text/plain"))) {
|
||||
content = (await response.body()).toString("utf8"); // TODO: determine real encoding
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content: await page.content(),
|
||||
content,
|
||||
status: response ? response.status() : null,
|
||||
headers,
|
||||
};
|
||||
};
|
||||
|
||||
@ -175,40 +185,35 @@ app.post('/scrape', async (req: Request, res: Response) => {
|
||||
await page.setExtraHTTPHeaders(headers);
|
||||
}
|
||||
|
||||
let pageContent;
|
||||
let pageStatusCode: number | null = null;
|
||||
let result: Awaited<ReturnType<typeof scrapePage>>;
|
||||
try {
|
||||
// Strategy 1: Normal
|
||||
console.log('Attempting strategy 1: Normal load');
|
||||
const result = await scrapePage(page, url, 'load', wait_after_load, timeout, check_selector);
|
||||
pageContent = result.content;
|
||||
pageStatusCode = result.status;
|
||||
result = await scrapePage(page, url, 'load', wait_after_load, timeout, check_selector);
|
||||
} catch (error) {
|
||||
console.log('Strategy 1 failed, attempting strategy 2: Wait until networkidle');
|
||||
try {
|
||||
// Strategy 2: Wait until networkidle
|
||||
const result = await scrapePage(page, url, 'networkidle', wait_after_load, timeout, check_selector);
|
||||
pageContent = result.content;
|
||||
pageStatusCode = result.status;
|
||||
result = await scrapePage(page, url, 'networkidle', wait_after_load, timeout, check_selector);
|
||||
} catch (finalError) {
|
||||
await page.close();
|
||||
return res.status(500).json({ error: 'An error occurred while fetching the page.' });
|
||||
}
|
||||
}
|
||||
|
||||
const pageError = pageStatusCode !== 200 ? getError(pageStatusCode) : undefined;
|
||||
const pageError = result.status !== 200 ? getError(result.status) : undefined;
|
||||
|
||||
if (!pageError) {
|
||||
console.log(`✅ Scrape successful!`);
|
||||
} else {
|
||||
console.log(`🚨 Scrape failed with status code: ${pageStatusCode} ${pageError}`);
|
||||
console.log(`🚨 Scrape failed with status code: ${result.status} ${pageError}`);
|
||||
}
|
||||
|
||||
await page.close();
|
||||
|
||||
res.json({
|
||||
content: pageContent,
|
||||
pageStatusCode,
|
||||
content: result.content,
|
||||
pageStatusCode: result.status,
|
||||
...(pageError && { pageError })
|
||||
});
|
||||
});
|
||||
|
@ -19,6 +19,7 @@
|
||||
"user-agents": "^1.1.410"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/body-parser": "^1.19.5",
|
||||
"@types/express": "^4.17.21",
|
||||
"@types/node": "^20.14.9",
|
||||
"@types/user-agents": "^1.0.4",
|
||||
|
873
apps/playwright-service-ts/pnpm-lock.yaml
generated
Normal file
873
apps/playwright-service-ts/pnpm-lock.yaml
generated
Normal file
@ -0,0 +1,873 @@
|
||||
lockfileVersion: '9.0'
|
||||
|
||||
settings:
|
||||
autoInstallPeers: true
|
||||
excludeLinksFromLockfile: false
|
||||
|
||||
importers:
|
||||
|
||||
.:
|
||||
dependencies:
|
||||
body-parser:
|
||||
specifier: ^1.20.2
|
||||
version: 1.20.3
|
||||
dotenv:
|
||||
specifier: ^16.4.5
|
||||
version: 16.4.7
|
||||
express:
|
||||
specifier: ^4.19.2
|
||||
version: 4.21.2
|
||||
playwright:
|
||||
specifier: ^1.45.0
|
||||
version: 1.49.1
|
||||
user-agents:
|
||||
specifier: ^1.1.410
|
||||
version: 1.1.455
|
||||
devDependencies:
|
||||
'@types/body-parser':
|
||||
specifier: ^1.19.5
|
||||
version: 1.19.5
|
||||
'@types/express':
|
||||
specifier: ^4.17.21
|
||||
version: 4.17.21
|
||||
'@types/node':
|
||||
specifier: ^20.14.9
|
||||
version: 20.17.10
|
||||
'@types/user-agents':
|
||||
specifier: ^1.0.4
|
||||
version: 1.0.4
|
||||
ts-node:
|
||||
specifier: ^10.9.2
|
||||
version: 10.9.2(@types/node@20.17.10)(typescript@5.7.2)
|
||||
typescript:
|
||||
specifier: ^5.5.2
|
||||
version: 5.7.2
|
||||
|
||||
packages:
|
||||
|
||||
'@cspotcode/source-map-support@0.8.1':
|
||||
resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
'@jridgewell/resolve-uri@3.1.2':
|
||||
resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==}
|
||||
engines: {node: '>=6.0.0'}
|
||||
|
||||
'@jridgewell/sourcemap-codec@1.5.0':
|
||||
resolution: {integrity: sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==}
|
||||
|
||||
'@jridgewell/trace-mapping@0.3.9':
|
||||
resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==}
|
||||
|
||||
'@tsconfig/node10@1.0.11':
|
||||
resolution: {integrity: sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==}
|
||||
|
||||
'@tsconfig/node12@1.0.11':
|
||||
resolution: {integrity: sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==}
|
||||
|
||||
'@tsconfig/node14@1.0.3':
|
||||
resolution: {integrity: sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==}
|
||||
|
||||
'@tsconfig/node16@1.0.4':
|
||||
resolution: {integrity: sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==}
|
||||
|
||||
'@types/body-parser@1.19.5':
|
||||
resolution: {integrity: sha512-fB3Zu92ucau0iQ0JMCFQE7b/dv8Ot07NI3KaZIkIUNXq82k4eBAqUaneXfleGY9JWskeS9y+u0nXMyspcuQrCg==}
|
||||
|
||||
'@types/connect@3.4.38':
|
||||
resolution: {integrity: sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==}
|
||||
|
||||
'@types/express-serve-static-core@4.19.6':
|
||||
resolution: {integrity: sha512-N4LZ2xG7DatVqhCZzOGb1Yi5lMbXSZcmdLDe9EzSndPV2HpWYWzRbaerl2n27irrm94EPpprqa8KpskPT085+A==}
|
||||
|
||||
'@types/express@4.17.21':
|
||||
resolution: {integrity: sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ==}
|
||||
|
||||
'@types/http-errors@2.0.4':
|
||||
resolution: {integrity: sha512-D0CFMMtydbJAegzOyHjtiKPLlvnm3iTZyZRSZoLq2mRhDdmLfIWOCYPfQJ4cu2erKghU++QvjcUjp/5h7hESpA==}
|
||||
|
||||
'@types/mime@1.3.5':
|
||||
resolution: {integrity: sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w==}
|
||||
|
||||
'@types/node@20.17.10':
|
||||
resolution: {integrity: sha512-/jrvh5h6NXhEauFFexRin69nA0uHJ5gwk4iDivp/DeoEua3uwCUto6PC86IpRITBOs4+6i2I56K5x5b6WYGXHA==}
|
||||
|
||||
'@types/qs@6.9.17':
|
||||
resolution: {integrity: sha512-rX4/bPcfmvxHDv0XjfJELTTr+iB+tn032nPILqHm5wbthUUUuVtNGGqzhya9XUxjTP8Fpr0qYgSZZKxGY++svQ==}
|
||||
|
||||
'@types/range-parser@1.2.7':
|
||||
resolution: {integrity: sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==}
|
||||
|
||||
'@types/send@0.17.4':
|
||||
resolution: {integrity: sha512-x2EM6TJOybec7c52BX0ZspPodMsQUd5L6PRwOunVyVUhXiBSKf3AezDL8Dgvgt5o0UfKNfuA0eMLr2wLT4AiBA==}
|
||||
|
||||
'@types/serve-static@1.15.7':
|
||||
resolution: {integrity: sha512-W8Ym+h8nhuRwaKPaDw34QUkwsGi6Rc4yYqvKFo5rm2FUEhCFbzVWrxXUxuKK8TASjWsysJY0nsmNCGhCOIsrOw==}
|
||||
|
||||
'@types/user-agents@1.0.4':
|
||||
resolution: {integrity: sha512-AjeFc4oX5WPPflgKfRWWJfkEk7Wu82fnj1rROPsiqFt6yElpdGFg8Srtm/4PU4rA9UiDUZlruGPgcwTMQlwq4w==}
|
||||
|
||||
accepts@1.3.8:
|
||||
resolution: {integrity: sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
acorn-walk@8.3.4:
|
||||
resolution: {integrity: sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==}
|
||||
engines: {node: '>=0.4.0'}
|
||||
|
||||
acorn@8.14.0:
|
||||
resolution: {integrity: sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==}
|
||||
engines: {node: '>=0.4.0'}
|
||||
hasBin: true
|
||||
|
||||
arg@4.1.3:
|
||||
resolution: {integrity: sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==}
|
||||
|
||||
array-flatten@1.1.1:
|
||||
resolution: {integrity: sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==}
|
||||
|
||||
body-parser@1.20.3:
|
||||
resolution: {integrity: sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==}
|
||||
engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
|
||||
|
||||
bytes@3.1.2:
|
||||
resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
call-bind-apply-helpers@1.0.1:
|
||||
resolution: {integrity: sha512-BhYE+WDaywFg2TBWYNXAE+8B1ATnThNBqXHP5nQu0jWJdVvY2hvkpyB3qOmtmDePiS5/BDQ8wASEWGMWRG148g==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
call-bound@1.0.3:
|
||||
resolution: {integrity: sha512-YTd+6wGlNlPxSuri7Y6X8tY2dmm12UMH66RpKMhiX6rsk5wXXnYgbUcOt8kiS31/AjfoTOvCsE+w8nZQLQnzHA==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
content-disposition@0.5.4:
|
||||
resolution: {integrity: sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
content-type@1.0.5:
|
||||
resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
cookie-signature@1.0.6:
|
||||
resolution: {integrity: sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==}
|
||||
|
||||
cookie@0.7.1:
|
||||
resolution: {integrity: sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
create-require@1.1.1:
|
||||
resolution: {integrity: sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==}
|
||||
|
||||
debug@2.6.9:
|
||||
resolution: {integrity: sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==}
|
||||
peerDependencies:
|
||||
supports-color: '*'
|
||||
peerDependenciesMeta:
|
||||
supports-color:
|
||||
optional: true
|
||||
|
||||
depd@2.0.0:
|
||||
resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
destroy@1.2.0:
|
||||
resolution: {integrity: sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==}
|
||||
engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
|
||||
|
||||
diff@4.0.2:
|
||||
resolution: {integrity: sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==}
|
||||
engines: {node: '>=0.3.1'}
|
||||
|
||||
dotenv@16.4.7:
|
||||
resolution: {integrity: sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
dunder-proto@1.0.1:
|
||||
resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
ee-first@1.1.1:
|
||||
resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
|
||||
|
||||
encodeurl@1.0.2:
|
||||
resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
encodeurl@2.0.0:
|
||||
resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
es-define-property@1.0.1:
|
||||
resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
es-errors@1.3.0:
|
||||
resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
es-object-atoms@1.0.0:
|
||||
resolution: {integrity: sha512-MZ4iQ6JwHOBQjahnjwaC1ZtIBH+2ohjamzAO3oaHcXYup7qxjF2fixyH+Q71voWHeOkI2q/TnJao/KfXYIZWbw==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
escape-html@1.0.3:
|
||||
resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==}
|
||||
|
||||
etag@1.8.1:
|
||||
resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
express@4.21.2:
|
||||
resolution: {integrity: sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA==}
|
||||
engines: {node: '>= 0.10.0'}
|
||||
|
||||
finalhandler@1.3.1:
|
||||
resolution: {integrity: sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
forwarded@0.2.0:
|
||||
resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
fresh@0.5.2:
|
||||
resolution: {integrity: sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
fsevents@2.3.2:
|
||||
resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==}
|
||||
engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
|
||||
os: [darwin]
|
||||
|
||||
function-bind@1.1.2:
|
||||
resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
|
||||
|
||||
get-intrinsic@1.2.6:
|
||||
resolution: {integrity: sha512-qxsEs+9A+u85HhllWJJFicJfPDhRmjzoYdl64aMWW9yRIJmSyxdn8IEkuIM530/7T+lv0TIHd8L6Q/ra0tEoeA==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
gopd@1.2.0:
|
||||
resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
has-symbols@1.1.0:
|
||||
resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
hasown@2.0.2:
|
||||
resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
http-errors@2.0.0:
|
||||
resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
iconv-lite@0.4.24:
|
||||
resolution: {integrity: sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
|
||||
inherits@2.0.4:
|
||||
resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
|
||||
|
||||
ipaddr.js@1.9.1:
|
||||
resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==}
|
||||
engines: {node: '>= 0.10'}
|
||||
|
||||
lodash.clonedeep@4.5.0:
|
||||
resolution: {integrity: sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ==}
|
||||
|
||||
make-error@1.3.6:
|
||||
resolution: {integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==}
|
||||
|
||||
math-intrinsics@1.1.0:
|
||||
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
media-typer@0.3.0:
|
||||
resolution: {integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
merge-descriptors@1.0.3:
|
||||
resolution: {integrity: sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==}
|
||||
|
||||
methods@1.1.2:
|
||||
resolution: {integrity: sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
mime-db@1.52.0:
|
||||
resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
mime-types@2.1.35:
|
||||
resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
mime@1.6.0:
|
||||
resolution: {integrity: sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==}
|
||||
engines: {node: '>=4'}
|
||||
hasBin: true
|
||||
|
||||
ms@2.0.0:
|
||||
resolution: {integrity: sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==}
|
||||
|
||||
ms@2.1.3:
|
||||
resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
|
||||
|
||||
negotiator@0.6.3:
|
||||
resolution: {integrity: sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
object-inspect@1.13.3:
|
||||
resolution: {integrity: sha512-kDCGIbxkDSXE3euJZZXzc6to7fCrKHNI/hSRQnRuQ+BWjFNzZwiFF8fj/6o2t2G9/jTj8PSIYTfCLelLZEeRpA==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
on-finished@2.4.1:
|
||||
resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
parseurl@1.3.3:
|
||||
resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
path-to-regexp@0.1.12:
|
||||
resolution: {integrity: sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==}
|
||||
|
||||
playwright-core@1.49.1:
|
||||
resolution: {integrity: sha512-BzmpVcs4kE2CH15rWfzpjzVGhWERJfmnXmniSyKeRZUs9Ws65m+RGIi7mjJK/euCegfn3i7jvqWeWyHe9y3Vgg==}
|
||||
engines: {node: '>=18'}
|
||||
hasBin: true
|
||||
|
||||
playwright@1.49.1:
|
||||
resolution: {integrity: sha512-VYL8zLoNTBxVOrJBbDuRgDWa3i+mfQgDTrL8Ah9QXZ7ax4Dsj0MSq5bYgytRnDVVe+njoKnfsYkH3HzqVj5UZA==}
|
||||
engines: {node: '>=18'}
|
||||
hasBin: true
|
||||
|
||||
proxy-addr@2.0.7:
|
||||
resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==}
|
||||
engines: {node: '>= 0.10'}
|
||||
|
||||
qs@6.13.0:
|
||||
resolution: {integrity: sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==}
|
||||
engines: {node: '>=0.6'}
|
||||
|
||||
range-parser@1.2.1:
|
||||
resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
raw-body@2.5.2:
|
||||
resolution: {integrity: sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
safe-buffer@5.2.1:
|
||||
resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}
|
||||
|
||||
safer-buffer@2.1.2:
|
||||
resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==}
|
||||
|
||||
send@0.19.0:
|
||||
resolution: {integrity: sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==}
|
||||
engines: {node: '>= 0.8.0'}
|
||||
|
||||
serve-static@1.16.2:
|
||||
resolution: {integrity: sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==}
|
||||
engines: {node: '>= 0.8.0'}
|
||||
|
||||
setprototypeof@1.2.0:
|
||||
resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==}
|
||||
|
||||
side-channel-list@1.0.0:
|
||||
resolution: {integrity: sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
side-channel-map@1.0.1:
|
||||
resolution: {integrity: sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
side-channel-weakmap@1.0.2:
|
||||
resolution: {integrity: sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
side-channel@1.1.0:
|
||||
resolution: {integrity: sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
statuses@2.0.1:
|
||||
resolution: {integrity: sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
toidentifier@1.0.1:
|
||||
resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==}
|
||||
engines: {node: '>=0.6'}
|
||||
|
||||
ts-node@10.9.2:
|
||||
resolution: {integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==}
|
||||
hasBin: true
|
||||
peerDependencies:
|
||||
'@swc/core': '>=1.2.50'
|
||||
'@swc/wasm': '>=1.2.50'
|
||||
'@types/node': '*'
|
||||
typescript: '>=2.7'
|
||||
peerDependenciesMeta:
|
||||
'@swc/core':
|
||||
optional: true
|
||||
'@swc/wasm':
|
||||
optional: true
|
||||
|
||||
type-is@1.6.18:
|
||||
resolution: {integrity: sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
||||
typescript@5.7.2:
|
||||
resolution: {integrity: sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==}
|
||||
engines: {node: '>=14.17'}
|
||||
hasBin: true
|
||||
|
||||
undici-types@6.19.8:
|
||||
resolution: {integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==}
|
||||
|
||||
unpipe@1.0.0:
|
||||
resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
user-agents@1.1.455:
|
||||
resolution: {integrity: sha512-C5FfBiUlxZAYI+nsxg2iUcVrC0CxjawRZMxoUA9Z5MUm1mC0phPvs7iPe9ksKVaZrsyNLivDeIUxJvHFuCXyLw==}
|
||||
|
||||
utils-merge@1.0.1:
|
||||
resolution: {integrity: sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==}
|
||||
engines: {node: '>= 0.4.0'}
|
||||
|
||||
v8-compile-cache-lib@3.0.1:
|
||||
resolution: {integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==}
|
||||
|
||||
vary@1.1.2:
|
||||
resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
||||
yn@3.1.1:
|
||||
resolution: {integrity: sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==}
|
||||
engines: {node: '>=6'}
|
||||
|
||||
snapshots:
|
||||
|
||||
'@cspotcode/source-map-support@0.8.1':
|
||||
dependencies:
|
||||
'@jridgewell/trace-mapping': 0.3.9
|
||||
|
||||
'@jridgewell/resolve-uri@3.1.2': {}
|
||||
|
||||
'@jridgewell/sourcemap-codec@1.5.0': {}
|
||||
|
||||
'@jridgewell/trace-mapping@0.3.9':
|
||||
dependencies:
|
||||
'@jridgewell/resolve-uri': 3.1.2
|
||||
'@jridgewell/sourcemap-codec': 1.5.0
|
||||
|
||||
'@tsconfig/node10@1.0.11': {}
|
||||
|
||||
'@tsconfig/node12@1.0.11': {}
|
||||
|
||||
'@tsconfig/node14@1.0.3': {}
|
||||
|
||||
'@tsconfig/node16@1.0.4': {}
|
||||
|
||||
'@types/body-parser@1.19.5':
|
||||
dependencies:
|
||||
'@types/connect': 3.4.38
|
||||
'@types/node': 20.17.10
|
||||
|
||||
'@types/connect@3.4.38':
|
||||
dependencies:
|
||||
'@types/node': 20.17.10
|
||||
|
||||
'@types/express-serve-static-core@4.19.6':
|
||||
dependencies:
|
||||
'@types/node': 20.17.10
|
||||
'@types/qs': 6.9.17
|
||||
'@types/range-parser': 1.2.7
|
||||
'@types/send': 0.17.4
|
||||
|
||||
'@types/express@4.17.21':
|
||||
dependencies:
|
||||
'@types/body-parser': 1.19.5
|
||||
'@types/express-serve-static-core': 4.19.6
|
||||
'@types/qs': 6.9.17
|
||||
'@types/serve-static': 1.15.7
|
||||
|
||||
'@types/http-errors@2.0.4': {}
|
||||
|
||||
'@types/mime@1.3.5': {}
|
||||
|
||||
'@types/node@20.17.10':
|
||||
dependencies:
|
||||
undici-types: 6.19.8
|
||||
|
||||
'@types/qs@6.9.17': {}
|
||||
|
||||
'@types/range-parser@1.2.7': {}
|
||||
|
||||
'@types/send@0.17.4':
|
||||
dependencies:
|
||||
'@types/mime': 1.3.5
|
||||
'@types/node': 20.17.10
|
||||
|
||||
'@types/serve-static@1.15.7':
|
||||
dependencies:
|
||||
'@types/http-errors': 2.0.4
|
||||
'@types/node': 20.17.10
|
||||
'@types/send': 0.17.4
|
||||
|
||||
'@types/user-agents@1.0.4': {}
|
||||
|
||||
accepts@1.3.8:
|
||||
dependencies:
|
||||
mime-types: 2.1.35
|
||||
negotiator: 0.6.3
|
||||
|
||||
acorn-walk@8.3.4:
|
||||
dependencies:
|
||||
acorn: 8.14.0
|
||||
|
||||
acorn@8.14.0: {}
|
||||
|
||||
arg@4.1.3: {}
|
||||
|
||||
array-flatten@1.1.1: {}
|
||||
|
||||
body-parser@1.20.3:
|
||||
dependencies:
|
||||
bytes: 3.1.2
|
||||
content-type: 1.0.5
|
||||
debug: 2.6.9
|
||||
depd: 2.0.0
|
||||
destroy: 1.2.0
|
||||
http-errors: 2.0.0
|
||||
iconv-lite: 0.4.24
|
||||
on-finished: 2.4.1
|
||||
qs: 6.13.0
|
||||
raw-body: 2.5.2
|
||||
type-is: 1.6.18
|
||||
unpipe: 1.0.0
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
bytes@3.1.2: {}
|
||||
|
||||
call-bind-apply-helpers@1.0.1:
|
||||
dependencies:
|
||||
es-errors: 1.3.0
|
||||
function-bind: 1.1.2
|
||||
|
||||
call-bound@1.0.3:
|
||||
dependencies:
|
||||
call-bind-apply-helpers: 1.0.1
|
||||
get-intrinsic: 1.2.6
|
||||
|
||||
content-disposition@0.5.4:
|
||||
dependencies:
|
||||
safe-buffer: 5.2.1
|
||||
|
||||
content-type@1.0.5: {}
|
||||
|
||||
cookie-signature@1.0.6: {}
|
||||
|
||||
cookie@0.7.1: {}
|
||||
|
||||
create-require@1.1.1: {}
|
||||
|
||||
debug@2.6.9:
|
||||
dependencies:
|
||||
ms: 2.0.0
|
||||
|
||||
depd@2.0.0: {}
|
||||
|
||||
destroy@1.2.0: {}
|
||||
|
||||
diff@4.0.2: {}
|
||||
|
||||
dotenv@16.4.7: {}
|
||||
|
||||
dunder-proto@1.0.1:
|
||||
dependencies:
|
||||
call-bind-apply-helpers: 1.0.1
|
||||
es-errors: 1.3.0
|
||||
gopd: 1.2.0
|
||||
|
||||
ee-first@1.1.1: {}
|
||||
|
||||
encodeurl@1.0.2: {}
|
||||
|
||||
encodeurl@2.0.0: {}
|
||||
|
||||
es-define-property@1.0.1: {}
|
||||
|
||||
es-errors@1.3.0: {}
|
||||
|
||||
es-object-atoms@1.0.0:
|
||||
dependencies:
|
||||
es-errors: 1.3.0
|
||||
|
||||
escape-html@1.0.3: {}
|
||||
|
||||
etag@1.8.1: {}
|
||||
|
||||
express@4.21.2:
|
||||
dependencies:
|
||||
accepts: 1.3.8
|
||||
array-flatten: 1.1.1
|
||||
body-parser: 1.20.3
|
||||
content-disposition: 0.5.4
|
||||
content-type: 1.0.5
|
||||
cookie: 0.7.1
|
||||
cookie-signature: 1.0.6
|
||||
debug: 2.6.9
|
||||
depd: 2.0.0
|
||||
encodeurl: 2.0.0
|
||||
escape-html: 1.0.3
|
||||
etag: 1.8.1
|
||||
finalhandler: 1.3.1
|
||||
fresh: 0.5.2
|
||||
http-errors: 2.0.0
|
||||
merge-descriptors: 1.0.3
|
||||
methods: 1.1.2
|
||||
on-finished: 2.4.1
|
||||
parseurl: 1.3.3
|
||||
path-to-regexp: 0.1.12
|
||||
proxy-addr: 2.0.7
|
||||
qs: 6.13.0
|
||||
range-parser: 1.2.1
|
||||
safe-buffer: 5.2.1
|
||||
send: 0.19.0
|
||||
serve-static: 1.16.2
|
||||
setprototypeof: 1.2.0
|
||||
statuses: 2.0.1
|
||||
type-is: 1.6.18
|
||||
utils-merge: 1.0.1
|
||||
vary: 1.1.2
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
finalhandler@1.3.1:
|
||||
dependencies:
|
||||
debug: 2.6.9
|
||||
encodeurl: 2.0.0
|
||||
escape-html: 1.0.3
|
||||
on-finished: 2.4.1
|
||||
parseurl: 1.3.3
|
||||
statuses: 2.0.1
|
||||
unpipe: 1.0.0
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
forwarded@0.2.0: {}
|
||||
|
||||
fresh@0.5.2: {}
|
||||
|
||||
fsevents@2.3.2:
|
||||
optional: true
|
||||
|
||||
function-bind@1.1.2: {}
|
||||
|
||||
get-intrinsic@1.2.6:
|
||||
dependencies:
|
||||
call-bind-apply-helpers: 1.0.1
|
||||
dunder-proto: 1.0.1
|
||||
es-define-property: 1.0.1
|
||||
es-errors: 1.3.0
|
||||
es-object-atoms: 1.0.0
|
||||
function-bind: 1.1.2
|
||||
gopd: 1.2.0
|
||||
has-symbols: 1.1.0
|
||||
hasown: 2.0.2
|
||||
math-intrinsics: 1.1.0
|
||||
|
||||
gopd@1.2.0: {}
|
||||
|
||||
has-symbols@1.1.0: {}
|
||||
|
||||
hasown@2.0.2:
|
||||
dependencies:
|
||||
function-bind: 1.1.2
|
||||
|
||||
http-errors@2.0.0:
|
||||
dependencies:
|
||||
depd: 2.0.0
|
||||
inherits: 2.0.4
|
||||
setprototypeof: 1.2.0
|
||||
statuses: 2.0.1
|
||||
toidentifier: 1.0.1
|
||||
|
||||
iconv-lite@0.4.24:
|
||||
dependencies:
|
||||
safer-buffer: 2.1.2
|
||||
|
||||
inherits@2.0.4: {}
|
||||
|
||||
ipaddr.js@1.9.1: {}
|
||||
|
||||
lodash.clonedeep@4.5.0: {}
|
||||
|
||||
make-error@1.3.6: {}
|
||||
|
||||
math-intrinsics@1.1.0: {}
|
||||
|
||||
media-typer@0.3.0: {}
|
||||
|
||||
merge-descriptors@1.0.3: {}
|
||||
|
||||
methods@1.1.2: {}
|
||||
|
||||
mime-db@1.52.0: {}
|
||||
|
||||
mime-types@2.1.35:
|
||||
dependencies:
|
||||
mime-db: 1.52.0
|
||||
|
||||
mime@1.6.0: {}
|
||||
|
||||
ms@2.0.0: {}
|
||||
|
||||
ms@2.1.3: {}
|
||||
|
||||
negotiator@0.6.3: {}
|
||||
|
||||
object-inspect@1.13.3: {}
|
||||
|
||||
on-finished@2.4.1:
|
||||
dependencies:
|
||||
ee-first: 1.1.1
|
||||
|
||||
parseurl@1.3.3: {}
|
||||
|
||||
path-to-regexp@0.1.12: {}
|
||||
|
||||
playwright-core@1.49.1: {}
|
||||
|
||||
playwright@1.49.1:
|
||||
dependencies:
|
||||
playwright-core: 1.49.1
|
||||
optionalDependencies:
|
||||
fsevents: 2.3.2
|
||||
|
||||
proxy-addr@2.0.7:
|
||||
dependencies:
|
||||
forwarded: 0.2.0
|
||||
ipaddr.js: 1.9.1
|
||||
|
||||
qs@6.13.0:
|
||||
dependencies:
|
||||
side-channel: 1.1.0
|
||||
|
||||
range-parser@1.2.1: {}
|
||||
|
||||
raw-body@2.5.2:
|
||||
dependencies:
|
||||
bytes: 3.1.2
|
||||
http-errors: 2.0.0
|
||||
iconv-lite: 0.4.24
|
||||
unpipe: 1.0.0
|
||||
|
||||
safe-buffer@5.2.1: {}
|
||||
|
||||
safer-buffer@2.1.2: {}
|
||||
|
||||
send@0.19.0:
|
||||
dependencies:
|
||||
debug: 2.6.9
|
||||
depd: 2.0.0
|
||||
destroy: 1.2.0
|
||||
encodeurl: 1.0.2
|
||||
escape-html: 1.0.3
|
||||
etag: 1.8.1
|
||||
fresh: 0.5.2
|
||||
http-errors: 2.0.0
|
||||
mime: 1.6.0
|
||||
ms: 2.1.3
|
||||
on-finished: 2.4.1
|
||||
range-parser: 1.2.1
|
||||
statuses: 2.0.1
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
serve-static@1.16.2:
|
||||
dependencies:
|
||||
encodeurl: 2.0.0
|
||||
escape-html: 1.0.3
|
||||
parseurl: 1.3.3
|
||||
send: 0.19.0
|
||||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
setprototypeof@1.2.0: {}
|
||||
|
||||
side-channel-list@1.0.0:
|
||||
dependencies:
|
||||
es-errors: 1.3.0
|
||||
object-inspect: 1.13.3
|
||||
|
||||
side-channel-map@1.0.1:
|
||||
dependencies:
|
||||
call-bound: 1.0.3
|
||||
es-errors: 1.3.0
|
||||
get-intrinsic: 1.2.6
|
||||
object-inspect: 1.13.3
|
||||
|
||||
side-channel-weakmap@1.0.2:
|
||||
dependencies:
|
||||
call-bound: 1.0.3
|
||||
es-errors: 1.3.0
|
||||
get-intrinsic: 1.2.6
|
||||
object-inspect: 1.13.3
|
||||
side-channel-map: 1.0.1
|
||||
|
||||
side-channel@1.1.0:
|
||||
dependencies:
|
||||
es-errors: 1.3.0
|
||||
object-inspect: 1.13.3
|
||||
side-channel-list: 1.0.0
|
||||
side-channel-map: 1.0.1
|
||||
side-channel-weakmap: 1.0.2
|
||||
|
||||
statuses@2.0.1: {}
|
||||
|
||||
toidentifier@1.0.1: {}
|
||||
|
||||
ts-node@10.9.2(@types/node@20.17.10)(typescript@5.7.2):
|
||||
dependencies:
|
||||
'@cspotcode/source-map-support': 0.8.1
|
||||
'@tsconfig/node10': 1.0.11
|
||||
'@tsconfig/node12': 1.0.11
|
||||
'@tsconfig/node14': 1.0.3
|
||||
'@tsconfig/node16': 1.0.4
|
||||
'@types/node': 20.17.10
|
||||
acorn: 8.14.0
|
||||
acorn-walk: 8.3.4
|
||||
arg: 4.1.3
|
||||
create-require: 1.1.1
|
||||
diff: 4.0.2
|
||||
make-error: 1.3.6
|
||||
typescript: 5.7.2
|
||||
v8-compile-cache-lib: 3.0.1
|
||||
yn: 3.1.1
|
||||
|
||||
type-is@1.6.18:
|
||||
dependencies:
|
||||
media-typer: 0.3.0
|
||||
mime-types: 2.1.35
|
||||
|
||||
typescript@5.7.2: {}
|
||||
|
||||
undici-types@6.19.8: {}
|
||||
|
||||
unpipe@1.0.0: {}
|
||||
|
||||
user-agents@1.1.455:
|
||||
dependencies:
|
||||
lodash.clonedeep: 4.5.0
|
||||
|
||||
utils-merge@1.0.1: {}
|
||||
|
||||
v8-compile-cache-lib@3.0.1: {}
|
||||
|
||||
vary@1.1.2: {}
|
||||
|
||||
yn@3.1.1: {}
|
@ -104,6 +104,8 @@ async def root(body: UrlModel):
|
||||
json_compatible_item_data = {
|
||||
"content": page_content,
|
||||
"pageStatusCode": page_status_code,
|
||||
"pageError": page_error
|
||||
}
|
||||
}
|
||||
|
||||
if page_error is not None:
|
||||
json_compatible_item_data["pageError"] = page_error
|
||||
return JSONResponse(content=json_compatible_item_data)
|
@ -145,6 +145,7 @@ class FirecrawlApp:
|
||||
f'{self.api_url}{endpoint}',
|
||||
headers=headers,
|
||||
json=scrape_params,
|
||||
timeout=(scrape_params["timeout"] + 5000 if "timeout" in scrape_params else None),
|
||||
)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
@ -433,7 +434,7 @@ class FirecrawlApp:
|
||||
else:
|
||||
self._handle_error(response, 'map')
|
||||
|
||||
def batch_scrape_urls(self, urls: list[str],
|
||||
def batch_scrape_urls(self, urls: List[str],
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
poll_interval: Optional[int] = 2,
|
||||
idempotency_key: Optional[str] = None) -> Any:
|
||||
@ -441,7 +442,7 @@ class FirecrawlApp:
|
||||
Initiate a batch scrape job for the specified URLs using the Firecrawl API.
|
||||
|
||||
Args:
|
||||
urls (list[str]): The URLs to scrape.
|
||||
urls (List[str]): The URLs to scrape.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
|
||||
poll_interval (Optional[int]): Time in seconds between status checks when waiting for job completion. Defaults to 2 seconds.
|
||||
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||
@ -476,12 +477,12 @@ class FirecrawlApp:
|
||||
self._handle_error(response, 'start batch scrape job')
|
||||
|
||||
|
||||
def async_batch_scrape_urls(self, urls: list[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
|
||||
def async_batch_scrape_urls(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Initiate a crawl job asynchronously.
|
||||
|
||||
Args:
|
||||
urls (list[str]): The URLs to scrape.
|
||||
urls (List[str]): The URLs to scrape.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
|
||||
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||
|
||||
@ -505,12 +506,12 @@ class FirecrawlApp:
|
||||
else:
|
||||
self._handle_error(response, 'start batch scrape job')
|
||||
|
||||
def batch_scrape_urls_and_watch(self, urls: list[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
|
||||
def batch_scrape_urls_and_watch(self, urls: List[str], params: Optional[Dict[str, Any]] = None, idempotency_key: Optional[str] = None) -> 'CrawlWatcher':
|
||||
"""
|
||||
Initiate a batch scrape job and return a CrawlWatcher to monitor the job via WebSocket.
|
||||
|
||||
Args:
|
||||
urls (list[str]): The URLs to scrape.
|
||||
urls (List[str]): The URLs to scrape.
|
||||
params (Optional[Dict[str, Any]]): Additional parameters for the scraper.
|
||||
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||
|
||||
@ -925,7 +926,7 @@ class FirecrawlApp:
|
||||
requests.RequestException: If the request fails after the specified retries.
|
||||
"""
|
||||
for attempt in range(retries):
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
response = requests.post(url, headers=headers, json=data, timeout=((data["timeout"] + 5000) if "timeout" in data else None))
|
||||
if response.status_code == 502:
|
||||
time.sleep(backoff_factor * (2 ** attempt))
|
||||
else:
|
||||
|
@ -13,13 +13,13 @@ x-common-service: &common-service
|
||||
|
||||
services:
|
||||
playwright-service:
|
||||
build: apps/playwright-service
|
||||
build: apps/playwright-service-ts
|
||||
environment:
|
||||
- PORT=3000
|
||||
- PROXY_SERVER=${PROXY_SERVER}
|
||||
- PROXY_USERNAME=${PROXY_USERNAME}
|
||||
- PROXY_PASSWORD=${PROXY_PASSWORD}
|
||||
- BLOCK_MEDIA=${BLOCK_MEDIA}
|
||||
PORT: 3000
|
||||
PROXY_SERVER: ${PROXY_SERVER}
|
||||
PROXY_USERNAME: ${PROXY_USERNAME}
|
||||
PROXY_PASSWORD: ${PROXY_PASSWORD}
|
||||
BLOCK_MEDIA: ${BLOCK_MEDIA}
|
||||
networks:
|
||||
- backend
|
||||
|
||||
@ -28,7 +28,7 @@ services:
|
||||
environment:
|
||||
REDIS_URL: ${REDIS_URL:-redis://redis:6379}
|
||||
REDIS_RATE_LIMIT_URL: ${REDIS_URL:-redis://redis:6379}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape}
|
||||
USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION}
|
||||
PORT: ${PORT:-3002}
|
||||
NUM_WORKERS_PER_QUEUE: ${NUM_WORKERS_PER_QUEUE}
|
||||
@ -51,6 +51,9 @@ services:
|
||||
SERPER_API_KEY: ${SERPER_API_KEY}
|
||||
SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY}
|
||||
LOGGING_LEVEL: ${LOGGING_LEVEL}
|
||||
PROXY_SERVER: ${PROXY_SERVER}
|
||||
PROXY_USERNAME: ${PROXY_USERNAME}
|
||||
PROXY_PASSWORD: ${PROXY_PASSWORD}
|
||||
FLY_PROCESS_GROUP: app
|
||||
depends_on:
|
||||
- redis
|
||||
@ -64,7 +67,7 @@ services:
|
||||
environment:
|
||||
REDIS_URL: ${REDIS_URL:-redis://redis:6379}
|
||||
REDIS_RATE_LIMIT_URL: ${REDIS_URL:-redis://redis:6379}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000}
|
||||
PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape}
|
||||
USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION}
|
||||
PORT: ${PORT:-3002}
|
||||
NUM_WORKERS_PER_QUEUE: ${NUM_WORKERS_PER_QUEUE}
|
||||
@ -85,6 +88,9 @@ services:
|
||||
HOST: ${HOST:-0.0.0.0}
|
||||
SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL}
|
||||
LOGGING_LEVEL: ${LOGGING_LEVEL}
|
||||
PROXY_SERVER: ${PROXY_SERVER}
|
||||
PROXY_USERNAME: ${PROXY_USERNAME}
|
||||
PROXY_PASSWORD: ${PROXY_PASSWORD}
|
||||
FLY_PROCESS_GROUP: worker
|
||||
depends_on:
|
||||
- redis
|
||||
|
Loading…
x
Reference in New Issue
Block a user