Merge remote-tracking branch 'origin/v1/node-sdk' into v1/python-sdk

2025-08-05 18:40:45 +08:00 · 2024-08-21 12:09:53 -03:00 · 2024-08-21 12:09:53 -03:00 · af0e47a30e
commit af0e47a30e
parent 0b8df5e264 e9d6ca197e
118 changed files with 15145 additions and 2520 deletions
--- a/.github/workflows/check-redis.yml
+++ b/.github/workflows/check-redis.yml
@ -1,20 +0,0 @@
 name: Check Redis
 on:
  schedule:
    - cron: '*/5 * * * *'
 env:
  BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
 jobs:
  clean-jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Send GET request to check queues
        run: |
          response=$(curl --write-out '%{http_code}' --silent --output /dev/null --max-time 180 https://api.firecrawl.dev/admin/${{ secrets.BULL_AUTH_KEY }}/redis-health)
          if [ "$response" -ne 200 ]; then
            echo "Failed to check queues. Response: $response"
            exit 1
          fi
          echo "Successfully checked queues. Response: $response"
--- a/.github/workflows/fly-direct.yml
+++ b/.github/workflows/fly-direct.yml
@ -1,7 +1,7 @@
 name: Fly Deploy Direct
 on:
  schedule:
-    - cron: '0 */6 * * *'
+    - cron: '0 */2 * * *'
 env:
  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
--- a/.github/workflows/fly.yml
+++ b/.github/workflows/fly.yml
@ -169,6 +169,41 @@ jobs:
        run: npm run test
        working-directory: ./apps/js-sdk/firecrawl
  go-sdk-tests:
    name: Go SDK Tests
    needs: pre-deploy-e2e-tests
    runs-on: ubuntu-latest
    services:
      redis:
        image: redis
        ports:
          - 6379:6379
    steps:
      - uses: actions/checkout@v3
      - name: Set up Go
        uses: actions/setup-go@v5
        with:
          go-version-file: "go.mod"
      - name: Install pnpm
        run: npm install -g pnpm
      - name: Install dependencies
        run: pnpm install
        working-directory: ./apps/api
      - name: Start the application
        run: npm start &
        working-directory: ./apps/api
        id: start_app
      - name: Start workers
        run: npm run workers &
        working-directory: ./apps/api
        id: start_workers
      - name: Install dependencies for Go SDK
        run: go mod tidy
        working-directory: ./apps/go-sdk
      - name: Run tests for Go SDK
        run: go test -v ./... -timeout 180s
        working-directory: ./apps/go-sdk/firecrawl
  deploy:
    name: Deploy app
    runs-on: ubuntu-latest
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,6 @@
 [submodule "apps/go-sdk/firecrawl"]
 	path = apps/go-sdk/firecrawl
 	url = https://github.com/mendableai/firecrawl-go
 [submodule "apps/go-sdk/examples"]
 	path = apps/go-sdk/examples
 	url = https://github.com/mendableai/firecrawl-go-examples
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -44,7 +44,6 @@ BULL_AUTH_KEY= @
 LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
 PLAYWRIGHT_MICROSERVICE_URL=  # set if you'd like to run a playwright fallback
 LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
 SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
 SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
 POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
 POSTHOG_HOST= # set if you'd like to send posthog events like job logs
--- a/SELF_HOST.md
+++ b/SELF_HOST.md
@ -1,36 +1,76 @@
-## Self-hosting Firecrawl
+# Self-hosting Firecrawl
-_We're currently working on a more in-depth guide on how to self-host, but in the meantime, here is a simplified version._
+#### Contributor?
-Refer to [CONTRIBUTING.md](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md) for instructions on how to run it locally.
+Welcome to [Firecrawl](https://firecrawl.dev) 🔥! Here are some instructions on how to get the project locally so you can run it on your own and contribute.
-## Getting Started
+If you're contributing, note that the process is similar to other open-source repos, i.e., fork Firecrawl, make changes, run tests, PR.
-First, clone this repository and copy the example env file from the API folder `.env.example` to `.env`.
+If you have any questions or would like help getting on board, join our Discord community [here](https://discord.gg/gSmWdAkdwd) for more information or submit an issue on Github [here](https://github.com/mendableai/firecrawl/issues/new/choose)!
-### Steps
+## Why?
-1.  Clone the repository:
+Self-hosting Firecrawl is particularly beneficial for organizations with stringent security policies that require data to remain within controlled environments. Here are some key reasons to consider self-hosting:
-    ```bash
+- **Enhanced Security and Compliance:** By self-hosting, you ensure that all data handling and processing complies with internal and external regulations, keeping sensitive information within your secure infrastructure. Note that Firecrawl is a Mendable product and relies on SOC2 Type2 certification, which means that the platform adheres to high industry standards for managing data security.
-    git clone https://github.com/mendableai/firecrawl.git
+- **Customizable Services:** Self-hosting allows you to tailor the services, such as the Playwright service, to meet specific needs or handle particular use cases that may not be supported by the standard cloud offering.
-    cd firecrawl
+- **Learning and Community Contribution:** By setting up and maintaining your own instance, you gain a deeper understanding of how Firecrawl works, which can also lead to more meaningful contributions to the project.
    cp ./apps/api/.env.example ./.env
    ```
-2.  For running the simplest version of FireCrawl, edit the `USE_DB_AUTHENTICATION` in `.env` to not use the database authentication:
+### Considerations
-    ```plaintext
+However, there are some limitations and additional responsibilities to be aware of:
    USE_DB_AUTHENTICATION=false
    ```
-3.  Update the Redis URL in the .env file to align with the Docker configuration:
+1. **Limited Access to Fire-engine:** Currently, self-hosted instances of Firecrawl do not have access to Fire-engine, which includes advanced features for handling IP blocks, robot detection mechanisms, and more. This means that while you can manage basic scraping tasks, more complex scenarios might require additional configuration or might not be supported.
 2. **Manual Configuration Required:** If you need to use scraping methods beyond the basic fetch and Playwright options, you will need to manually configure these in the `.env` file. This requires a deeper understanding of the technologies and might involve more setup time.
-    ```plaintext
+Self-hosting Firecrawl is ideal for those who need full control over their scraping and data processing environments but comes with the trade-off of additional maintenance and configuration efforts.
    REDIS_URL=redis://redis:6379
    ```
-4.  #### Option: Running with TypeScript Playwright Service
+## Steps
 1. First, start by installing the dependencies
 - Docker [instructions](https://docs.docker.com/get-docker/)
 2. Set environment variables
 Create an `.env` in the root directory you can copy over the template in `apps/api/.env.example`
 To start, we wont set up authentication, or any optional sub services (pdf parsing, JS blocking support, AI features)
 `.env:`
 ```
 # ===== Required ENVS ======
 NUM_WORKERS_PER_QUEUE=8
 PORT=3002
 HOST=0.0.0.0
 REDIS_URL=redis://redis:6379
 REDIS_RATE_LIMIT_URL=redis://redis:6379
 ## To turn on DB authentication, you need to set up supabase.
 USE_DB_AUTHENTICATION=false
 # ===== Optional ENVS ======
 # Supabase Setup (used to support DB authentication, advanced logging, etc.)
 SUPABASE_ANON_TOKEN=
 SUPABASE_URL=
 SUPABASE_SERVICE_TOKEN=
 # Other Optionals
 TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
 SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
 OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
 BULL_AUTH_KEY= @
 LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
 PLAYWRIGHT_MICROSERVICE_URL=  # set if you'd like to run a playwright fallback
 LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
 SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
 POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
 POSTHOG_HOST= # set if you'd like to send posthog events like job logs
 ```
 3.  *(Optional) Running with TypeScript Playwright Service*
    *   Update the `docker-compose.yml` file to change the Playwright service:
@ -49,16 +89,91 @@ First, clone this repository and copy the example env file from the API folder `
        ```
    *   Don't forget to set the proxy server in your `.env` file as needed.
-5.  Build and run the Docker containers:
+
 4.  Build and run the Docker containers:
    ```bash
    docker compose build
    docker compose up
    ```
 This will run a local instance of Firecrawl which can be accessed at `http://localhost:3002`.
 You should be able to see the Bull Queue Manager UI on `http://localhost:3002/admin/@/queues`.
 5. *(Optional)* Test the API
 If you’d like to test the crawl endpoint, you can run this:
  ```bash
  curl -X POST http://localhost:3002/v0/crawl \
      -H 'Content-Type: application/json' \
      -d '{
        "url": "https://mendable.ai"
      }'
  ```   
 ## Troubleshooting
 This section provides solutions to common issues you might encounter while setting up or running your self-hosted instance of Firecrawl.
 ### Supabase client is not configured
 **Symptom:**
 ```bash
 [YYYY-MM-DDTHH:MM:SS.SSSz]ERROR - Attempted to access Supabase client when it's not configured.
 [YYYY-MM-DDTHH:MM:SS.SSSz]ERROR - Error inserting scrape event: Error: Supabase client is not configured.
 ```
 **Explanation:**
 This error occurs because the Supabase client setup is not completed. You should be able to scrape and crawl with no problems. Right now it's not possible to configure Supabase in self-hosted instances.
 ### You're bypassing authentication
 **Symptom:**
 ```bash
 [YYYY-MM-DDTHH:MM:SS.SSSz]WARN - You're bypassing authentication
 ```
 **Explanation:**
 This error occurs because the Supabase client setup is not completed. You should be able to scrape and crawl with no problems. Right now it's not possible to configure Supabase in self-hosted instances.
 ### Docker containers fail to start
 **Symptom:**
 Docker containers exit unexpectedly or fail to start.
 **Solution:**
 Check the Docker logs for any error messages using the command:
 ```bash
 docker logs [container_name]
 ```
 - Ensure all required environment variables are set correctly in the .env file.
 - Verify that all Docker services defined in docker-compose.yml are correctly configured and the necessary images are available.
 ### Connection issues with Redis
 **Symptom:**
 Errors related to connecting to Redis, such as timeouts or "Connection refused".
 **Solution:**
 - Ensure that the Redis service is up and running in your Docker environment.
 - Verify that the REDIS_URL and REDIS_RATE_LIMIT_URL in your .env file point to the correct Redis instance, ensure that it points to the same URL in the `docker-compose.yaml` file (`redis://redis:6379`)
 - Check network settings and firewall rules that may block the connection to the Redis port.
 ### API endpoint does not respond
 **Symptom:**
 API requests to the Firecrawl instance timeout or return no response.
 **Solution:**
 - Ensure that the Firecrawl service is running by checking the Docker container status.
 - Verify that the PORT and HOST settings in your .env file are correct and that no other service is using the same port.
 - Check the network configuration to ensure that the host is accessible from the client making the API request.
 By addressing these common issues, you can ensure a smoother setup and operation of your self-hosted Firecrawl instance.
 ## Install Firecrawl on a Kubernetes Cluster (Simple Version)
 Read the [examples/kubernetes-cluster-install/README.md](https://github.com/mendableai/firecrawl/blob/main/examples/kubernetes-cluster-install/README.md) for instructions on how to install Firecrawl on a Kubernetes Cluster.
--- a/apps/api/.env.example
+++ b/apps/api/.env.example
@ -2,8 +2,8 @@
 NUM_WORKERS_PER_QUEUE=8 
 PORT=3002
 HOST=0.0.0.0
-REDIS_URL=redis://localhost:6379
+REDIS_URL=redis://redis:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379
-REDIS_RATE_LIMIT_URL=redis://localhost:6379
+REDIS_RATE_LIMIT_URL=redis://redis:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379
 PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/html
 ## To turn on DB authentication, you need to set up supabase.
@ -17,18 +17,27 @@ SUPABASE_URL=
 SUPABASE_SERVICE_TOKEN=
 # Other Optionals
-TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
+# use if you've set up authentication and want to test with a real API key
-RATE_LIMIT_TEST_API_KEY_SCRAPE= # set if you'd like to test the scraping rate limit
+TEST_API_KEY=
-RATE_LIMIT_TEST_API_KEY_CRAWL= # set if you'd like to test the crawling rate limit
+# set if you'd like to test the scraping rate limit
-SCRAPING_BEE_API_KEY= #Set if you'd like to use scraping Be to handle JS blocking
+RATE_LIMIT_TEST_API_KEY_SCRAPE=
-OPENAI_API_KEY= # add for LLM dependednt features (image alt generation, etc.)
+# set if you'd like to test the crawling rate limit
-BULL_AUTH_KEY= @
+RATE_LIMIT_TEST_API_KEY_CRAWL=
-LOGTAIL_KEY= # Use if you're configuring basic logging with logtail
+# set if you'd like to use scraping Be to handle JS blocking
-LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
+SCRAPING_BEE_API_KEY=
-SERPER_API_KEY= #Set if you have a serper key you'd like to use as a search api
+# add for LLM dependednt features (image alt generation, etc.)
-SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
+OPENAI_API_KEY=
-POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
+BULL_AUTH_KEY=@
-POSTHOG_HOST= # set if you'd like to send posthog events like job logs
+# use if you're configuring basic logging with logtail
 LOGTAIL_KEY=
 # set if you have a llamaparse key you'd like to use to parse pdfs
 LLAMAPARSE_API_KEY=
 # set if you'd like to send slack server health status messages
 SLACK_WEBHOOK_URL=
 # set if you'd like to send posthog events like job logs
 POSTHOG_API_KEY=
 # set if you'd like to send posthog events like job logs
 POSTHOG_HOST=
 STRIPE_PRICE_ID_STANDARD=
 STRIPE_PRICE_ID_SCALE=
@ -43,7 +52,8 @@ STRIPE_PRICE_ID_GROWTH_YEARLY=
 HYPERDX_API_KEY=
 HDX_NODE_BETA_MODE=1
-FIRE_ENGINE_BETA_URL= # set if you'd like to use the fire engine closed beta
+# set if you'd like to use the fire engine closed beta
 FIRE_ENGINE_BETA_URL=
 # Proxy Settings for Playwright (Alternative you can can use a proxy service like oxylabs, which rotates IPs for you on every request)
 PROXY_SERVER=
--- a/apps/api/.gitignore
+++ b/apps/api/.gitignore
@ -6,3 +6,5 @@ dump.rdb
 /mongo-data
 /.next/
 .rdb
--- a/apps/api/fly.staging.toml
+++ b/apps/api/fly.staging.toml
@ -24,8 +24,8 @@ kill_timeout = '30s'
 [http_service.concurrency]
  type = "requests"
-  hard_limit = 100
+  # hard_limit = 100
-  soft_limit = 50
+  soft_limit = 100
 [[http_service.checks]]
  grace_period = "10s"
@ -51,12 +51,13 @@ kill_timeout = '30s'
  [services.concurrency]
    type = 'connections'
-    hard_limit = 25
+    # hard_limit = 25
-    soft_limit = 20
+    soft_limit = 100
 [[vm]]
-  size = 'performance-1x'
+  size = 'performance-2x'
  processes = ['app','worker']
  memory = 8192
--- a/apps/api/fly.toml
+++ b/apps/api/fly.toml
@ -24,8 +24,8 @@ kill_timeout = '30s'
 [http_service.concurrency]
  type = "requests"
-  hard_limit = 200
+  # hard_limit = 200
-  soft_limit = 75
+  soft_limit = 200
 [[http_service.checks]]
  grace_period = "20s"
@ -50,8 +50,8 @@ kill_timeout = '30s'
  [services.concurrency]
    type = 'connections'
-    hard_limit = 30
+    # hard_limit = 30
-    soft_limit = 12
+    soft_limit = 200
 [[vm]]
  size = 'performance-4x'
--- a/apps/api/openapi-v0.json
+++ b/apps/api/openapi-v0.json
@ -0,0 +1,924 @@
 {
  "openapi": "3.0.0",
  "info": {
    "title": "Firecrawl API",
    "version": "0.0.0",
    "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.",
    "contact": {
      "name": "Firecrawl Support",
      "url": "https://firecrawl.dev/support",
      "email": "support@firecrawl.dev"
    }
  },
  "servers": [
    {
      "url": "https://api.firecrawl.dev/v0"
    }
  ],
  "paths": {
    "/scrape": {
      "post": {
        "summary": "Scrape a single URL and optionally extract information using an LLM",
        "operationId": "scrapeAndExtractFromUrl",
        "tags": ["Scraping"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri",
                    "description": "The URL to scrape"
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "headers": {
                        "type": "object",
                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "replaceAllPathsWithAbsolutePaths": {
                        "type": "boolean",
                        "description": "Replace all relative paths with absolute paths for images and links",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "fullPageScreenshot": {
                        "type": "boolean",
                        "description": "Include a full page screenshot of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  },
                  "extractorOptions": {
                    "type": "object",
                    "description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
                    "default": {},
                    "properties": {
                      "mode": {
                        "type": "string",
                        "enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
                        "description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
                      },
                      "extractionPrompt": {
                        "type": "string",
                        "description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
                      },
                      "extractionSchema": {
                        "type": "object",
                        "additionalProperties": true,
                        "description": "The schema for the data to be extracted, required only for LLM extraction modes.",
                        "required": [
                          "company_mission",
                          "supports_sso",
                          "is_open_source"
                        ]
                      }
                    }
                  },
                  "timeout": {
                    "type": "integer",
                    "description": "Timeout in milliseconds for the request",
                    "default": 30000
                  }
                },
                "required": ["url"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ScrapeResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl": {
      "post": {
        "summary": "Crawl multiple URLs based on options",
        "operationId": "crawlUrls",
        "tags": ["Crawling"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri",
                    "description": "The base URL to start crawling from"
                  },
                  "crawlerOptions": {
                    "type": "object",
                    "properties": {
                      "includes": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "URL patterns to include"
                      },
                      "excludes": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "URL patterns to exclude"
                      },
                      "generateImgAltText": {
                        "type": "boolean",
                        "description": "Generate alt text for images using LLMs (must have a paid plan)",
                        "default": false
                      },
                      "returnOnlyUrls": {
                        "type": "boolean",
                        "description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.",
                        "default": false
                      },
                      "maxDepth": {
                        "type": "integer",
                        "description": "Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern."
                      },
                      "mode": {
                        "type": "string",
                        "enum": ["default", "fast"],
                        "description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
                        "default": "default"
                      },
                      "ignoreSitemap": {
                        "type": "boolean",
                        "description": "Ignore the website sitemap when crawling",
                        "default": false
                      },
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of pages to crawl",
                        "default": 10000
                      },
                      "allowBackwardCrawling": {
                        "type": "boolean",
                        "description": "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'",
                        "default": false
                      },
                      "allowExternalContentLinks": {
                        "type": "boolean",
                        "description": "Allows the crawler to follow links to external websites.",
                        "default": false
                      }
                    }
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "headers": {
                        "type": "object",
                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "replaceAllPathsWithAbsolutePaths": {
                        "type": "boolean",
                        "description": "Replace all relative paths with absolute paths for images and links",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "fullPageScreenshot": {
                        "type": "boolean",
                        "description": "Include a full page screenshot of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  }
                },
                "required": ["url"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CrawlResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/search": {
      "post": {
        "summary": "Search for a keyword in Google, returns top page results with markdown content for each page",
        "operationId": "searchGoogle",
        "tags": ["Search"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "query": {
                    "type": "string",
                    "format": "uri",
                    "description": "The query to search for"
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "fetchPageContent": {
                        "type": "boolean",
                        "description": "Fetch the content of each page. If false, defaults to a basic fast serp API.",
                        "default": true
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      }
                    }
                  },
                  "searchOptions": {
                    "type": "object",
                    "properties": {
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of results. Max is 20 during beta."
                      }
                    }
                  }
                },
                "required": ["query"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/SearchResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl/status/{jobId}": {
      "get": {
        "tags": ["Crawl"],
        "summary": "Get the status of a crawl job",
        "operationId": "getCrawlStatus",
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "parameters": [
          {
            "name": "jobId",
            "in": "path",
            "description": "ID of the crawl job",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": {
                      "type": "string",
                      "description": "Status of the job (completed, active, failed, paused)"
                    },
                    "current": {
                      "type": "integer",
                      "description": "Current page number"
                    },
                    "total": {
                      "type": "integer",
                      "description": "Total number of pages"
                    },
                    "data": {
                      "type": "array",
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
                      "description": "Data returned from the job (null when it is in progress)"
                    },
                    "partial_data": {
                      "type": "array",
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
                      "description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl/cancel/{jobId}": {
      "delete": {
        "tags": ["Crawl"],
        "summary": "Cancel a crawl job",
        "operationId": "cancelCrawlJob",
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "parameters": [
          {
            "name": "jobId",
            "in": "path",
            "description": "ID of the crawl job",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": {
                      "type": "string",
                      "description": "Returns cancelled."
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "bearerAuth": {
        "type": "http",
        "scheme": "bearer"
      }
    },
    "schemas": {
      "ScrapeResponse": {
        "type": "object",
        "properties": {
          "success": {
            "type": "boolean"
          },
          "data": {
            "type": "object",
            "properties": {
              "markdown": {
                "type": "string"
              },
              "content": {
                "type": "string"
              },
              "html": {
                "type": "string",
                "nullable": true,
                "description": "HTML version of the content on page if `includeHtml`  is true"
              },
              "rawHtml": {
                "type": "string",
                "nullable": true,
                "description": "Raw HTML content of the page if `includeRawHtml`  is true"
              },
              "metadata": {
                "type": "object",
                "properties": {
                  "title": {
                    "type": "string"
                  },
                  "description": {
                    "type": "string"
                  },
                  "language": {
                    "type": "string",
                    "nullable": true
                  },
                  "sourceURL": {
                    "type": "string",
                    "format": "uri"
                  },
                  "<any other metadata> ": {
                    "type": "string"
                  },
                  "pageStatusCode": {
                    "type": "integer",
                    "description": "The status code of the page"
                  },
                  "pageError": {
                    "type": "string",
                    "nullable": true,
                    "description": "The error message of the page"
                  }
                }
              },
              "llm_extraction": {
                "type": "object",
                "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
                "nullable": true
              },
              "warning": {
                "type": "string",
                "nullable": true,
                "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
              }
            }
          }
        }
      },
      "CrawlStatusResponseObj": {
        "type": "object",
        "properties": {
          "markdown": {
            "type": "string"
          },
          "content": {
            "type": "string"
          },
          "html": {
            "type": "string",
            "nullable": true,
            "description": "HTML version of the content on page if `includeHtml`  is true"
          },
          "rawHtml": {
            "type": "string",
            "nullable": true,
            "description": "Raw HTML content of the page if `includeRawHtml`  is true"
          },
          "index": {
            "type": "integer",
            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
          },
          "metadata": {
            "type": "object",
            "properties": {
              "title": {
                "type": "string"
              },
              "description": {
                "type": "string"
              },
              "language": {
                "type": "string",
                "nullable": true
              },
              "sourceURL": {
                "type": "string",
                "format": "uri"
              },
              "<any other metadata> ": {
                "type": "string"
              },
              "pageStatusCode": {
                "type": "integer",
                "description": "The status code of the page"
              },
              "pageError": {
                "type": "string",
                "nullable": true,
                "description": "The error message of the page"
              }
            }
          }
        }
      },
      "SearchResponse": {
        "type": "object",
        "properties": {
          "success": {
            "type": "boolean"
          },
          "data": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "url": {
                  "type": "string"
                },
                "markdown": {
                  "type": "string"
                },
                "content": {
                  "type": "string"
                },
                "metadata": {
                  "type": "object",
                  "properties": {
                    "title": {
                      "type": "string"
                    },
                    "description": {
                      "type": "string"
                    },
                    "language": {
                      "type": "string",
                      "nullable": true
                    },
                    "sourceURL": {
                      "type": "string",
                      "format": "uri"
                    }
                  }
                }
              }
            }
          }
        }
      },
      "CrawlResponse": {
        "type": "object",
        "properties": {
          "jobId": {
            "type": "string"
          }
        }
      }
    }
  },
  "security": [
    {
      "bearerAuth": []
    }
  ]
 }
--- a/apps/api/openapi.json
+++ b/apps/api/openapi.json
@ -18,8 +18,8 @@
  "paths": {
    "/scrape": {
      "post": {
-        "summary": "Scrape a single URL and optionally extract information using an LLM",
+        "summary": "Scrape a single URL",
-        "operationId": "scrapeAndExtractFromUrl",
+        "operationId": "scrape",
        "tags": ["Scraping"],
        "security": [
          {
@ -38,89 +38,47 @@
                    "format": "uri",
                    "description": "The URL to scrape"
                  },
-                  "pageOptions": {
+                  "formats": {
-                    "type": "object",
+                    "type": "array",
-                    "properties": {
+                    "items": {
                      "type": "string",
                      "enum": ["markdown", "html", "rawHtml", "links", "screenshot", "screenshot@fullPage"]
                    },
                    "description": "Specific formats to return.\n\n - markdown: The page in Markdown format.\n - html: The page's HTML, trimmed to include only meaningful content.\n - rawHtml: The page's original HTML.\n - links: The links on the page.\n - screenshot: A screenshot of the top of the page.\n - screenshot@fullPage: A screenshot of the full page. (overridden by screenshot if present)",
                    "default": ["markdown"]
                  },
                  "headers": {
                    "type": "object",
                    "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                  },
-                      "includeHtml": {
+                  "includeTags": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                    "type": "array",
                    "items": {
                      "type": "string"
                    },
                    "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                  },
-                      "onlyMainContent": {
+                  "excludeTags": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                    "type": "array",
                    "items": {
                      "type": "string"
                    },
                    "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                  },
-                      "replaceAllPathsWithAbsolutePaths": {
+                  "onlyMainContent": {
                    "type": "boolean",
-                        "description": "Replace all relative paths with absolute paths for images and links",
+                    "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
-                        "default": false
+                    "default": true
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  },
                  "extractorOptions": {
                    "type": "object",
                    "description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
                    "default": {},
                    "properties": {
                      "mode": {
                        "type": "string",
                        "enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
                        "description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
                      },
                      "extractionPrompt": {
                        "type": "string",
                        "description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
                      },
                      "extractionSchema": {
                        "type": "object",
                        "additionalProperties": true,
                        "description": "The schema for the data to be extracted, required only for LLM extraction modes.",
                        "required": [
                          "company_mission",
                          "supports_sso",
                          "is_open_source"
                        ]
                      }
                    }
                  },
                  "timeout": {
                    "type": "integer",
                    "description": "Timeout in milliseconds for the request",
                    "default": 30000
                  },
                  "waitFor": {
                    "type": "integer",
                    "description": "Wait x amount of milliseconds for the page to load to fetch content",
                    "default": 0
                  }
                },
                "required": ["url"]
@ -317,6 +275,11 @@
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "fullPageScreenshot": {
                        "type": "boolean",
                        "description": "Include a full page screenshot of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
@ -731,24 +694,42 @@
          "success": {
            "type": "boolean"
          },
          "warning": {
            "type": "string",
            "nullable": true,
            "description": "Warning message to let you know of any issues."
          },
          "data": {
            "type": "object",
            "properties": {
              "markdown": {
-                "type": "string"
+                "type": "string",
-              },
+                "nullable": true,
-              "content": {
+                "description": "Markdown content of the page if the `markdown` format was specified (default)"
                "type": "string"
              },
              "html": {
                "type": "string",
                "nullable": true,
-                "description": "HTML version of the content on page if `includeHtml`  is true"
+                "description": "HTML version of the content on page if the `html` format was specified"
              },
              "rawHtml": {
                "type": "string",
                "nullable": true,
-                "description": "Raw HTML content of the page if `includeRawHtml`  is true"
+                "description": "Raw HTML content of the page if the `rawHtml` format was specified"
              },
              "links": {
                "type": "array",
                "items": {
                  "type": "string",
                  "format": "uri"
                },
                "nullable": true,
                "description": "Links on the page if the `links` format was specified"
              },
              "screenshot": {
                "type": "string",
                "nullable": true,
                "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
              },
              "metadata": {
                "type": "object",
@ -770,27 +751,16 @@
                  "<any other metadata> ": {
                    "type": "string"
                  },
-                  "pageStatusCode": {
+                  "statusCode": {
                    "type": "integer",
                    "description": "The status code of the page"
                  },
-                  "pageError": {
+                  "error": {
                    "type": "string",
                    "nullable": true,
                    "description": "The error message of the page"
                  }
                }
              },
              "llm_extraction": {
                "type": "object",
                "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
                "nullable": true
              },
              "warning": {
                "type": "string",
                "nullable": true,
                "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
              }
            }
          }
@ -800,24 +770,33 @@
        "type": "object",
        "properties": {
          "markdown": {
-            "type": "string"
+            "type": "string",
-          },
+            "nullable": true,
-          "content": {
+            "description": "Markdown content of the page if the `markdown` format was specified (default)"
            "type": "string"
          },
          "html": {
            "type": "string",
            "nullable": true,
-            "description": "HTML version of the content on page if `includeHtml`  is true"
+            "description": "HTML version of the content on page if the `html` format was specified"
          },
          "rawHtml": {
            "type": "string",
            "nullable": true,
-            "description": "Raw HTML content of the page if `includeRawHtml`  is true"
+            "description": "Raw HTML content of the page if the `rawHtml` format was specified"
          },
-          "index": {
+          "links": {
-            "type": "integer",
+            "type": "array",
-            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
+            "items": {
              "type": "string",
              "format": "uri"
            },
            "nullable": true,
            "description": "Links on the page if the `links` format was specified"
          },
          "screenshot": {
            "type": "string",
            "nullable": true,
            "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
          },
          "metadata": {
            "type": "object",
@ -839,11 +818,11 @@
              "<any other metadata> ": {
                "type": "string"
              },
-              "pageStatusCode": {
+              "statusCode": {
                "type": "integer",
                "description": "The status code of the page"
              },
-              "pageError": {
+              "error": {
                "type": "string",
                "nullable": true,
                "description": "The error message of the page"
@ -861,16 +840,34 @@
          "data": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "url": {
                  "type": "string"
                },
              "markdown": {
-                  "type": "string"
+                "type": "string",
                "nullable": true,
                "description": "Markdown content of the page if the `markdown` format was specified (default)"
              },
-                "content": {
+              "html": {
-                  "type": "string"
+                "type": "string",
                "nullable": true,
                "description": "HTML version of the content on page if the `html` format was specified"
              },
              "rawHtml": {
                "type": "string",
                "nullable": true,
                "description": "Raw HTML content of the page if the `rawHtml` format was specified"
              },
              "links": {
                "type": "array",
                "items": {
                  "type": "string",
                  "format": "uri"
                },
                "nullable": true,
                "description": "Links on the page if the `links` format was specified"
              },
              "screenshot": {
                "type": "string",
                "nullable": true,
                "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
              },
              "metadata": {
                "type": "object",
@ -888,7 +885,18 @@
                  "sourceURL": {
                    "type": "string",
                    "format": "uri"
-                    }
+                  },
                  "<any other metadata> ": {
                    "type": "string"
                  },
                  "statusCode": {
                    "type": "integer",
                    "description": "The status code of the page"
                  },
                  "error": {
                    "type": "string",
                    "nullable": true,
                    "description": "The error message of the page"
                  }
                }
              }
@ -899,8 +907,15 @@
      "CrawlResponse": {
        "type": "object",
        "properties": {
-          "jobId": {
+          "success": {
            "type": "boolean"
          },
          "id": {
            "type": "string"
          },
          "url": {
            "type": "string",
            "format": "uri"
          }
        }
      }
--- a/apps/api/package.json
+++ b/apps/api/package.json
@ -57,6 +57,8 @@
    "@nangohq/node": "^0.40.8",
    "@sentry/node": "^8.13.0",
    "@supabase/supabase-js": "^2.44.2",
    "@types/express-ws": "^3.0.4",
    "@types/ws": "^8.5.12",
    "ajv": "^8.16.0",
    "async": "^3.2.5",
    "async-mutex": "^0.5.0",
@ -71,6 +73,7 @@
    "date-fns": "^3.6.0",
    "dotenv": "^16.3.1",
    "express-rate-limit": "^7.3.1",
    "express-ws": "^5.0.2",
    "form-data": "^4.0.0",
    "glob": "^10.4.2",
    "gpt3-tokenizer": "^1.1.5",
@ -93,6 +96,7 @@
    "promptable": "^0.0.10",
    "puppeteer": "^22.12.1",
    "rate-limiter-flexible": "2.4.2",
    "redlock": "5.0.0-beta.2",
    "resend": "^3.4.0",
    "robots-parser": "^3.0.1",
    "scrapingbee": "^1.7.4",
@ -104,8 +108,9 @@
    "unstructured-client": "^0.11.3",
    "uuid": "^10.0.0",
    "wordpos": "^2.1.0",
    "ws": "^8.18.0",
    "xml2js": "^0.6.2",
-    "zod": "^3.23.4",
+    "zod": "^3.23.8",
    "zod-to-json-schema": "^3.23.1"
  },
  "nodemonConfig": {
--- a/apps/api/pnpm-lock.yaml
+++ b/apps/api/pnpm-lock.yaml
@ -41,6 +41,12 @@ importers:
      '@supabase/supabase-js':
        specifier: ^2.44.2
        version: 2.44.2
      '@types/express-ws':
        specifier: ^3.0.4
        version: 3.0.4
      '@types/ws':
        specifier: ^8.5.12
        version: 8.5.12
      ajv:
        specifier: ^8.16.0
        version: 8.16.0
@ -83,6 +89,9 @@ importers:
      express-rate-limit:
        specifier: ^7.3.1
        version: 7.3.1(express@4.19.2)
      express-ws:
        specifier: ^5.0.2
        version: 5.0.2(express@4.19.2)
      form-data:
        specifier: ^4.0.0
        version: 4.0.0
@ -106,7 +115,7 @@ importers:
        version: 0.0.28
      langchain:
        specifier: ^0.2.8
-        version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1)
+        version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
      languagedetect:
        specifier: ^2.0.0
        version: 2.0.0
@ -149,6 +158,9 @@ importers:
      rate-limiter-flexible:
        specifier: 2.4.2
        version: 2.4.2
      redlock:
        specifier: 5.0.0-beta.2
        version: 5.0.0-beta.2
      resend:
        specifier: ^3.4.0
        version: 3.4.0
@ -182,11 +194,14 @@ importers:
      wordpos:
        specifier: ^2.1.0
        version: 2.1.0
      ws:
        specifier: ^8.18.0
        version: 8.18.0
      xml2js:
        specifier: ^0.6.2
        version: 0.6.2
      zod:
-        specifier: ^3.23.4
+        specifier: ^3.23.8
        version: 3.23.8
      zod-to-json-schema:
        specifier: ^3.23.1
@ -1556,6 +1571,9 @@ packages:
  '@types/express-serve-static-core@4.19.3':
    resolution: {integrity: sha512-KOzM7MhcBFlmnlr/fzISFF5vGWVSvN6fTd4T+ExOt08bA/dA5kpSzY52nMsI1KDFmUREpJelPYyuslLRSjjgCg==}
  '@types/express-ws@3.0.4':
    resolution: {integrity: sha512-Yjj18CaivG5KndgcvzttWe8mPFinPCHJC2wvyQqVzA7hqeufM8EtWMj6mpp5omg3s8XALUexhOu8aXAyi/DyJQ==}
  '@types/express@4.17.21':
    resolution: {integrity: sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ==}
@ -1658,8 +1676,8 @@ packages:
  '@types/whatwg-url@11.0.5':
    resolution: {integrity: sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==}
-  '@types/ws@8.5.10':
+  '@types/ws@8.5.12':
-    resolution: {integrity: sha512-vmQSUcfalpIq0R9q7uTo2lXs6eGIpt9wtnLdMv9LVpIjCA/+ufZRozlVoVelIYixx1ugCBKDhn89vnsEGOCx9A==}
+    resolution: {integrity: sha512-3tPRkv1EtkDpzlgyKyI8pGsGZAGPEaXeu0DOj5DI25Ja91bdAYddYHbADRYVrZMRbfW+1l5YwXVDKohDJNQxkQ==}
  '@types/yargs-parser@21.0.3':
    resolution: {integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==}
@ -2413,6 +2431,12 @@ packages:
    peerDependencies:
      express: 4 || 5 || ^5.0.0-beta.1
  express-ws@5.0.2:
    resolution: {integrity: sha512-0uvmuk61O9HXgLhGl3QhNSEtRsQevtmbL94/eILaliEADZBHZOQUAiHFrGPrgsjikohyrmSG5g+sCfASTt0lkQ==}
    engines: {node: '>=4.5.0'}
    peerDependencies:
      express: ^4.0.0 || ^5.0.0-alpha.1
  express@4.19.2:
    resolution: {integrity: sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==}
    engines: {node: '>= 0.10.0'}
@ -3950,6 +3974,10 @@ packages:
  redis@4.6.14:
    resolution: {integrity: sha512-GrNg/e33HtsQwNXL7kJT+iNFPSwE1IPmd7wzV3j4f2z0EYxZfZE7FVTmUysgAtqQQtg5NXF5SNLR9OdO/UHOfw==}
  redlock@5.0.0-beta.2:
    resolution: {integrity: sha512-2RDWXg5jgRptDrB1w9O/JgSZC0j7y4SlaXnor93H/UJm/QyDiFgBKNtrh0TI6oCXqYSaSoXxFh6Sd3VtYfhRXw==}
    engines: {node: '>=12'}
  regenerator-runtime@0.14.1:
    resolution: {integrity: sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==}
@ -4540,8 +4568,20 @@ packages:
    resolution: {integrity: sha512-+QU2zd6OTD8XWIJCbffaiQeH9U73qIqafo1x6V1snCWYGJf6cVE0cDR4D8xRzcEnfI21IFrUPzPGtcPf8AC+Rw==}
    engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
-  ws@8.17.1:
+  ws@7.5.10:
-    resolution: {integrity: sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==}
+    resolution: {integrity: sha512-+dbF1tHwZpXcbOJdVOkzLDxZP1ailvSxM6ZweXTegylPny803bFhA+vqBYw4s31NSAk4S2Qz+AKXK9a4wkdjcQ==}
    engines: {node: '>=8.3.0'}
    peerDependencies:
      bufferutil: ^4.0.1
      utf-8-validate: ^5.0.2
    peerDependenciesMeta:
      bufferutil:
        optional: true
      utf-8-validate:
        optional: true
  ws@8.18.0:
    resolution: {integrity: sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==}
    engines: {node: '>=10.0.0'}
    peerDependencies:
      bufferutil: ^4.0.1
@ -5178,13 +5218,13 @@ snapshots:
  '@js-sdsl/ordered-map@4.4.2': {}
-  '@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)':
+  '@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)':
    dependencies:
      ansi-styles: 5.2.0
      camelcase: 6.3.0
      decamelize: 1.2.0
      js-tiktoken: 1.0.12
-      langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
+      langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
      ml-distance: 4.0.1
      mustache: 4.2.0
      p-queue: 6.6.2
@ -5196,9 +5236,9 @@ snapshots:
      - langchain
      - openai
-  '@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))':
+  '@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
    dependencies:
-      '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
+      '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
      js-tiktoken: 1.0.12
      openai: 4.52.2
      zod: 3.23.8
@ -5207,9 +5247,9 @@ snapshots:
      - encoding
      - langchain
-  '@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)':
+  '@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)':
    dependencies:
-      '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
+      '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
      js-tiktoken: 1.0.12
    transitivePeerDependencies:
      - langchain
@ -6367,8 +6407,8 @@ snapshots:
    dependencies:
      '@supabase/node-fetch': 2.6.15
      '@types/phoenix': 1.6.5
-      '@types/ws': 8.5.10
+      '@types/ws': 8.5.12
-      ws: 8.17.1
+      ws: 8.18.0
    transitivePeerDependencies:
      - bufferutil
      - utf-8-validate
@ -6465,6 +6505,12 @@ snapshots:
      '@types/range-parser': 1.2.7
      '@types/send': 0.17.4
  '@types/express-ws@3.0.4':
    dependencies:
      '@types/express': 4.17.21
      '@types/express-serve-static-core': 4.19.3
      '@types/ws': 8.5.12
  '@types/express@4.17.21':
    dependencies:
      '@types/body-parser': 1.19.5
@ -6588,7 +6634,7 @@ snapshots:
    dependencies:
      '@types/webidl-conversions': 7.0.3
-  '@types/ws@8.5.10':
+  '@types/ws@8.5.12':
    dependencies:
      '@types/node': 20.14.1
@ -7329,6 +7375,14 @@ snapshots:
    dependencies:
      express: 4.19.2
  express-ws@5.0.2(express@4.19.2):
    dependencies:
      express: 4.19.2
      ws: 7.5.10
    transitivePeerDependencies:
      - bufferutil
      - utf-8-validate
  express@4.19.2:
    dependencies:
      accepts: 1.3.8
@ -8241,17 +8295,17 @@ snapshots:
  kleur@3.0.3: {}
-  langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1):
+  langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
    dependencies:
-      '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
+      '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
-      '@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))
+      '@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
-      '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
+      '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
      binary-extensions: 2.3.0
      js-tiktoken: 1.0.12
      js-yaml: 4.1.0
      jsonpointer: 5.0.1
      langchainhub: 0.0.11
-      langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
+      langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
      ml-distance: 4.0.1
      openapi-types: 12.1.3
      p-retry: 4.6.2
@ -8271,14 +8325,14 @@ snapshots:
      pdf-parse: 1.1.1
      puppeteer: 22.12.1(typescript@5.4.5)
      redis: 4.6.14
-      ws: 8.17.1
+      ws: 8.18.0
    transitivePeerDependencies:
      - encoding
      - openai
  langchainhub@0.0.11: {}
-  langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2):
+  langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2):
    dependencies:
      '@types/uuid': 9.0.8
      commander: 10.0.1
@ -8287,8 +8341,8 @@ snapshots:
      p-retry: 4.6.2
      uuid: 9.0.1
    optionalDependencies:
-      '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
+      '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
-      langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1)
+      langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
      openai: 4.52.2
  languagedetect@2.0.0: {}
@ -8992,7 +9046,7 @@ snapshots:
      chromium-bidi: 0.5.24(devtools-protocol@0.0.1299070)
      debug: 4.3.5
      devtools-protocol: 0.0.1299070
-      ws: 8.17.1
+      ws: 8.18.0
    transitivePeerDependencies:
      - bufferutil
      - supports-color
@ -9098,6 +9152,10 @@ snapshots:
      '@redis/search': 1.1.6(@redis/client@1.5.16)
      '@redis/time-series': 1.0.5(@redis/client@1.5.16)
  redlock@5.0.0-beta.2:
    dependencies:
      node-abort-controller: 3.1.1
  regenerator-runtime@0.14.1: {}
  require-directory@2.1.1: {}
@ -9670,7 +9728,9 @@ snapshots:
      imurmurhash: 0.1.4
      signal-exit: 4.1.0
-  ws@8.17.1: {}
+  ws@7.5.10: {}
  ws@8.18.0: {}
  xml2js@0.6.2:
    dependencies:
--- a/apps/api/src/tests/e2e_v1_withAuth/index.test.ts
+++ b/apps/api/src/tests/e2e_v1_withAuth/index.test.ts
@ -0,0 +1,609 @@
 import request from "supertest";
 import dotenv from "dotenv";
 import {
  ScrapeOptions,
  ScrapeRequest,
  ScrapeResponseRequestTest,
 } from "../../controllers/v1/types";
 dotenv.config();
 const TEST_URL = "http://127.0.0.1:3002";
 describe("E2E Tests for v1 API Routes", () => {
  beforeAll(() => {
    process.env.USE_DB_AUTHENTICATION = "true";
  });
  afterAll(() => {
    delete process.env.USE_DB_AUTHENTICATION;
  });
  describe("GET /is-production", () => {
    it.concurrent("should return the production status", async () => {
      const response: ScrapeResponseRequestTest = await request(TEST_URL).get(
        "/is-production"
      );
      expect(response.statusCode).toBe(200);
      expect(response.body).toHaveProperty("isProduction");
    });
  });
  describe("POST /v1/scrape", () => {
    it.concurrent("should require authorization", async () => {
      const response: ScrapeResponseRequestTest = await request(TEST_URL).post(
        "/v1/scrape"
      );
      expect(response.statusCode).toBe(401);
    });
    it.concurrent("should throw error for blocklisted URL", async () => {
      const scrapeRequest: ScrapeRequest = {
        url: "https://facebook.com/fake-test",
      };
      const response = await request(TEST_URL)
        .post("/v1/scrape")
        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
        .set("Content-Type", "application/json")
        .send(scrapeRequest);
      expect(response.statusCode).toBe(403);
      expect(response.body.error).toBe("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
    });
    it.concurrent(
      "should return an error response with an invalid API key",
      async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer invalid-api-key`)
          .set("Content-Type", "application/json")
          .send({ url: "https://firecrawl.dev" });
        expect(response.statusCode).toBe(401);
      }
    );
    it.concurrent(
      "should return a successful response with a valid API key",
      async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://roastmywebsite.ai",
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send(scrapeRequest);
        expect(response.statusCode).toBe(200);
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).not.toHaveProperty("content");
        expect(response.body.data).toHaveProperty("markdown");
        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data).not.toHaveProperty("html");
        expect(response.body.data.markdown).toContain("_Roast_");
        expect(response.body.data.metadata.error).toBeUndefined();
        expect(response.body.data.metadata.title).toBe("Roast My Website");
        expect(response.body.data.metadata.description).toBe(
          "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
        );
        expect(response.body.data.metadata.keywords).toBe(
          "Roast My Website,Roast,Website,GitHub,Firecrawl"
        );
        expect(response.body.data.metadata.robots).toBe("follow, index");
        expect(response.body.data.metadata.ogTitle).toBe("Roast My Website");
        expect(response.body.data.metadata.ogDescription).toBe(
          "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
        );
        expect(response.body.data.metadata.ogUrl).toBe(
          "https://www.roastmywebsite.ai"
        );
        expect(response.body.data.metadata.ogImage).toBe(
          "https://www.roastmywebsite.ai/og.png"
        );
        expect(response.body.data.metadata.ogLocaleAlternate).toStrictEqual([]);
        expect(response.body.data.metadata.ogSiteName).toBe("Roast My Website");
        expect(response.body.data.metadata.sourceURL).toBe(
          "https://roastmywebsite.ai"
        );
        expect(response.body.data.metadata.statusCode).toBe(200);
      },
      30000
    ); // 30 seconds timeout
    it.concurrent(
      "should return a successful response with a valid API key and includeHtml set to true",
      async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://roastmywebsite.ai",
          formats: ["markdown", "html"],
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send(scrapeRequest);
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty("data");
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty("markdown");
        expect(response.body.data).toHaveProperty("html");
        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data.markdown).toContain("_Roast_");
        expect(response.body.data.html).toContain("<h1");
        expect(response.body.data.metadata.statusCode).toBe(200);
        expect(response.body.data.metadata.error).toBeUndefined();
      },
      30000
    );
    it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://arxiv.org/pdf/astro-ph/9301001.pdf"
        //   formats: ["markdown", "html"],
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send(scrapeRequest);
        await new Promise((r) => setTimeout(r, 6000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.markdown).toContain('Broad Line Radio Galaxy');
        expect(response.body.data.metadata.statusCode).toBe(200);
        expect(response.body.data.metadata.error).toBeUndefined();
      }, 60000);
      it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://arxiv.org/pdf/astro-ph/9301001"
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send(scrapeRequest);
        await new Promise((r) => setTimeout(r, 6000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.markdown).toContain('Broad Line Radio Galaxy');
        expect(response.body.data.metadata.statusCode).toBe(200);
        expect(response.body.data.metadata.error).toBeUndefined();
      }, 60000);
      it.concurrent("should return a successful response with a valid API key with removeTags option", async () => {
        const scrapeRequest: ScrapeRequest = {
          url: "https://www.scrapethissite.com/",
          onlyMainContent: false // default is true
        };
        const responseWithoutRemoveTags: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send(scrapeRequest);
        expect(responseWithoutRemoveTags.statusCode).toBe(200);
        expect(responseWithoutRemoveTags.body).toHaveProperty("data");
        if (!("data" in responseWithoutRemoveTags.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(responseWithoutRemoveTags.body.data).toHaveProperty("markdown");
        expect(responseWithoutRemoveTags.body.data).toHaveProperty("metadata");
        expect(responseWithoutRemoveTags.body.data).not.toHaveProperty("html");
        expect(responseWithoutRemoveTags.body.data.markdown).toContain("[FAQ](/faq/)"); // .nav
        expect(responseWithoutRemoveTags.body.data.markdown).toContain("Hartley Brody 2023"); // #footer
        const scrapeRequestWithRemoveTags: ScrapeRequest = {
            url: "https://www.scrapethissite.com/",
            excludeTags: ['.nav', '#footer', 'strong'],
            onlyMainContent: false // default is true
        };
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send(scrapeRequestWithRemoveTags);
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty("data");
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty("markdown");
        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data).not.toHaveProperty("html");
        expect(response.body.data.markdown).not.toContain("Hartley Brody 2023");
        expect(response.body.data.markdown).not.toContain("[FAQ](/faq/)"); // 
      }, 30000);
      it.concurrent('should return a successful response for a scrape with 400 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/400' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(400);
      }, 60000);
      it.concurrent('should return a successful response for a scrape with 401 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/401' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(401);
      }, 60000);
      it.concurrent('should return a successful response for a scrape with 403 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/403' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(403);
      }, 60000);
      it.concurrent('should return a successful response for a scrape with 404 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/404' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(404);
      }, 60000);
      it.concurrent('should return a successful response for a scrape with 405 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/405' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(405);
      }, 60000);
      it.concurrent('should return a successful response for a scrape with 500 page', async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post('/v1/scrape')
          .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
          .set('Content-Type', 'application/json')
          .send({ url: 'https://httpstat.us/500' });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty('data');
        if (!("data" in response.body)) {
          throw new Error("Expected response body to have 'data' property");
        }
        expect(response.body.data).toHaveProperty('markdown');
        expect(response.body.data).toHaveProperty('metadata');
        expect(response.body.data.metadata.statusCode).toBe(500);
      }, 60000);
      it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
        const response: ScrapeResponseRequestTest = await request(TEST_URL)
          .post("/v1/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send({ url: "https://firecrawl.dev", timeout: 1000 });
        expect(response.statusCode).toBe(408);
      }, 3000);
      it.concurrent(
        "should return a successful response with a valid API key and includeHtml set to true",
        async () => {
          const scrapeRequest: ScrapeRequest = {
            url: "https://roastmywebsite.ai",
            formats: ["html","rawHtml"],
          };
          const response: ScrapeResponseRequestTest = await request(TEST_URL)
            .post("/v1/scrape")
            .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
            .set("Content-Type", "application/json")
            .send(scrapeRequest);
          expect(response.statusCode).toBe(200);
          expect(response.body).toHaveProperty("data");
          if (!("data" in response.body)) {
            throw new Error("Expected response body to have 'data' property");
          }
          expect(response.body.data).not.toHaveProperty("markdown");
          expect(response.body.data).toHaveProperty("html");
          expect(response.body.data).toHaveProperty("rawHtml");
          expect(response.body.data).toHaveProperty("metadata");
          expect(response.body.data.html).toContain("<h1");
          expect(response.body.data.rawHtml).toContain("<html");
          expect(response.body.data.metadata.statusCode).toBe(200);
          expect(response.body.data.metadata.error).toBeUndefined();
        },
        30000
      );
      it.concurrent(
        "should return a successful response with waitFor",
        async () => {
          const scrapeRequest: ScrapeRequest = {
            url: "https://ycombinator.com/companies",
            formats: ["markdown"],
            waitFor: 5000
          };
          const response: ScrapeResponseRequestTest = await request(TEST_URL)
            .post("/v1/scrape")
            .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
            .set("Content-Type", "application/json")
            .send(scrapeRequest);
          expect(response.statusCode).toBe(200);
          expect(response.body).toHaveProperty("data");
          if (!("data" in response.body)) {
            throw new Error("Expected response body to have 'data' property");
          }
          expect(response.body.data).toHaveProperty("markdown");
          expect(response.body.data).not.toHaveProperty("html");
          expect(response.body.data).not.toHaveProperty("links");
          expect(response.body.data).not.toHaveProperty("rawHtml");
          expect(response.body.data).toHaveProperty("metadata");
          expect(response.body.data.markdown).toContain("PagerDuty");
          expect(response.body.data.metadata.statusCode).toBe(200);
          expect(response.body.data.metadata.error).toBeUndefined();
        },
        30000
      );
      it.concurrent(
        "should return a successful response with a valid links on page",
        async () => {
          const scrapeRequest: ScrapeRequest = {
            url: "https://roastmywebsite.ai",
            formats: ["links"],
          };
          const response: ScrapeResponseRequestTest = await request(TEST_URL)
            .post("/v1/scrape")
            .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
            .set("Content-Type", "application/json")
            .send(scrapeRequest);
          expect(response.statusCode).toBe(200);
          expect(response.body).toHaveProperty("data");
          if (!("data" in response.body)) {
            throw new Error("Expected response body to have 'data' property");
          }
          expect(response.body.data).not.toHaveProperty("html");
          expect(response.body.data).not.toHaveProperty("rawHtml");
          expect(response.body.data).toHaveProperty("links");
          expect(response.body.data).toHaveProperty("metadata");
          expect(response.body.data.links).toContain("https://firecrawl.dev");
          expect(response.body.data.metadata.statusCode).toBe(200);
          expect(response.body.data.metadata.error).toBeUndefined();
        },
        30000
      );
  });
 describe("POST /v1/map", () => {
  it.concurrent("should require authorization", async () => {
    const response: ScrapeResponseRequestTest = await request(TEST_URL).post(
      "/v1/map"
    );
    expect(response.statusCode).toBe(401);
  });
  it.concurrent("should return an error response with an invalid API key", async () => {
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer invalid-api-key`)
      .set("Content-Type", "application/json")
      .send({ url: "https://firecrawl.dev" });
    expect(response.statusCode).toBe(401);
  });
  it.concurrent("should return a successful response with a valid API key", async () => {
    const mapRequest = {
      url: "https://roastmywebsite.ai"
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("links");
    if (!("links" in response.body)) {
      throw new Error("Expected response body to have 'links' property");
    }
    const links = response.body.links as unknown[];
    expect(Array.isArray(links)).toBe(true);
    expect(links.length).toBeGreaterThan(0);
  });
  it.concurrent("should return a successful response with a valid API key and search", async () => {
    const mapRequest = {
      url: "https://usemotion.com",
      search: "pricing"
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("links");
    if (!("links" in response.body)) {
      throw new Error("Expected response body to have 'links' property");
    }
    const links = response.body.links as unknown[];
    expect(Array.isArray(links)).toBe(true);
    expect(links.length).toBeGreaterThan(0);
    expect(links[0]).toContain("usemotion.com/pricing");
  });
  it.concurrent("should return a successful response with a valid API key and search and allowSubdomains", async () => {
    const mapRequest = {
      url: "https://firecrawl.dev",
      search: "docs",
      includeSubdomains: true
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("links");
    if (!("links" in response.body)) {
      throw new Error("Expected response body to have 'links' property");
    }
    const links = response.body.links as unknown[];
    expect(Array.isArray(links)).toBe(true);
    expect(links.length).toBeGreaterThan(0);
    expect(links[0]).toContain("docs.firecrawl.dev");
  });
  it.concurrent("should return a successful response with a valid API key and search and allowSubdomains and www", async () => {
    const mapRequest = {
      url: "https://www.firecrawl.dev",
      search: "docs",
      includeSubdomains: true
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("links");
    if (!("links" in response.body)) {
      throw new Error("Expected response body to have 'links' property");
    }
    const links = response.body.links as unknown[];
    expect(Array.isArray(links)).toBe(true);
    expect(links.length).toBeGreaterThan(0);
    expect(links[0]).toContain("docs.firecrawl.dev");
  }, 10000)
  it.concurrent("should return a successful response with a valid API key and search and not allowSubdomains and www", async () => {
    const mapRequest = {
      url: "https://www.firecrawl.dev",
      search: "docs",
      includeSubdomains: false
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(200);
    expect(response.body).toHaveProperty("success", true);
    expect(response.body).toHaveProperty("links");
    if (!("links" in response.body)) {
      throw new Error("Expected response body to have 'links' property");
    }
    const links = response.body.links as unknown[];
    expect(Array.isArray(links)).toBe(true);
    expect(links.length).toBeGreaterThan(0);
    expect(links[0]).not.toContain("docs.firecrawl.dev");
  })
  it.concurrent("should return an error for invalid URL", async () => {
    const mapRequest = {
      url: "invalid-url",
      includeSubdomains: true,
      search: "test",
    };
    const response: ScrapeResponseRequestTest = await request(TEST_URL)
      .post("/v1/map")
      .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
      .set("Content-Type", "application/json")
      .send(mapRequest);
    expect(response.statusCode).toBe(400);
    expect(response.body).toHaveProperty("success", false);
    expect(response.body).toHaveProperty("error");
  });
 });
 });
--- a/apps/api/src/tests/e2e_withAuth/index.test.ts
+++ b/apps/api/src/tests/e2e_withAuth/index.test.ts
@ -1,11 +1,15 @@
 import request from "supertest";
 import dotenv from "dotenv";
-import { FirecrawlCrawlResponse, FirecrawlCrawlStatusResponse, FirecrawlScrapeResponse } from "../../types";
+import {
  FirecrawlCrawlResponse,
  FirecrawlCrawlStatusResponse,
  FirecrawlScrapeResponse,
 } from "../../types";
 dotenv.config();
 const TEST_URL = "http://127.0.0.1:3002";
-describe("E2E Tests for API Routes", () => {
+describe("E2E Tests for v0 API Routes", () => {
  beforeAll(() => {
    process.env.USE_DB_AUTHENTICATION = "true";
  });
@ -24,20 +28,27 @@ describe("E2E Tests for API Routes", () => {
  describe("POST /v0/scrape", () => {
    it.concurrent("should require authorization", async () => {
-      const response: FirecrawlScrapeResponse = await request(TEST_URL).post("/v0/scrape");
+      const response: FirecrawlScrapeResponse = await request(TEST_URL).post(
        "/v0/scrape"
      );
      expect(response.statusCode).toBe(401);
    });
-    it.concurrent("should return an error response with an invalid API key", async () => {
+    it.concurrent(
      "should return an error response with an invalid API key",
      async () => {
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
          .post("/v0/scrape")
          .set("Authorization", `Bearer invalid-api-key`)
          .set("Content-Type", "application/json")
          .send({ url: "https://firecrawl.dev" });
        expect(response.statusCode).toBe(401);
-    });
+      }
    );
-    it.concurrent("should return a successful response with a valid API key", async () => {
+    it.concurrent(
      "should return a successful response with a valid API key",
      async () => {
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
          .post("/v0/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -52,21 +63,36 @@ describe("E2E Tests for API Routes", () => {
        expect(response.body.data.content).toContain("_Roast_");
        expect(response.body.data.metadata.pageError).toBeUndefined();
        expect(response.body.data.metadata.title).toBe("Roast My Website");
-      expect(response.body.data.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
+        expect(response.body.data.metadata.description).toBe(
-      expect(response.body.data.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
+          "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
        );
        expect(response.body.data.metadata.keywords).toBe(
          "Roast My Website,Roast,Website,GitHub,Firecrawl"
        );
        expect(response.body.data.metadata.robots).toBe("follow, index");
        expect(response.body.data.metadata.ogTitle).toBe("Roast My Website");
-      expect(response.body.data.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
+        expect(response.body.data.metadata.ogDescription).toBe(
-      expect(response.body.data.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
+          "Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️"
-      expect(response.body.data.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
+        );
        expect(response.body.data.metadata.ogUrl).toBe(
          "https://www.roastmywebsite.ai"
        );
        expect(response.body.data.metadata.ogImage).toBe(
          "https://www.roastmywebsite.ai/og.png"
        );
        expect(response.body.data.metadata.ogLocaleAlternate).toStrictEqual([]);
        expect(response.body.data.metadata.ogSiteName).toBe("Roast My Website");
-      expect(response.body.data.metadata.sourceURL).toBe("https://roastmywebsite.ai");
+        expect(response.body.data.metadata.sourceURL).toBe(
          "https://roastmywebsite.ai"
        );
        expect(response.body.data.metadata.pageStatusCode).toBe(200);
-    }, 30000); // 30 seconds timeout
+      },
      30000
    ); // 30 seconds timeout
-
+    it.concurrent(
-    it.concurrent("should return a successful response with a valid API key and includeHtml set to true", async () => {
+      "should return a successful response with a valid API key and includeHtml set to true",
      async () => {
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
          .post("/v0/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -86,44 +112,61 @@ describe("E2E Tests for API Routes", () => {
        expect(response.body.data.html).toContain("<h1");
        expect(response.body.data.metadata.pageStatusCode).toBe(200);
        expect(response.body.data.metadata.pageError).toBeUndefined();
-    }, 30000); // 30 seconds timeout
+      },
      30000
    ); // 30 seconds timeout
-   it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
+    it.concurrent(
      "should return a successful response for a valid scrape with PDF file",
      async () => {
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
-        .post('/v0/scrape')
+          .post("/v0/scrape")
-        .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
-        .set('Content-Type', 'application/json')
+          .set("Content-Type", "application/json")
-        .send({ url: 'https://arxiv.org/pdf/astro-ph/9301001.pdf' });
+          .send({ url: "https://arxiv.org/pdf/astro-ph/9301001.pdf" });
        await new Promise((r) => setTimeout(r, 6000));
        expect(response.statusCode).toBe(200);
-      expect(response.body).toHaveProperty('data');
+        expect(response.body).toHaveProperty("data");
-      expect(response.body.data).toHaveProperty('content');
+        expect(response.body.data).toHaveProperty("content");
-      expect(response.body.data).toHaveProperty('metadata');
+        expect(response.body.data).toHaveProperty("metadata");
-      expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
+        expect(response.body.data.content).toContain(
          "We present spectrophotometric observations of the Broad Line Radio Galaxy"
        );
        expect(response.body.data.metadata.pageStatusCode).toBe(200);
        expect(response.body.data.metadata.pageError).toBeUndefined();
-    }, 60000); // 60 seconds
+      },
      60000
    ); // 60 seconds
-    it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
+    it.concurrent(
      "should return a successful response for a valid scrape with PDF file without explicit .pdf extension",
      async () => {
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
-        .post('/v0/scrape')
+          .post("/v0/scrape")
-        .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
-        .set('Content-Type', 'application/json')
+          .set("Content-Type", "application/json")
-        .send({ url: 'https://arxiv.org/pdf/astro-ph/9301001' });
+          .send({ url: "https://arxiv.org/pdf/astro-ph/9301001" });
        await new Promise((r) => setTimeout(r, 6000));
        expect(response.statusCode).toBe(200);
-      expect(response.body).toHaveProperty('data');
+        expect(response.body).toHaveProperty("data");
-      expect(response.body.data).toHaveProperty('content');
+        expect(response.body.data).toHaveProperty("content");
-      expect(response.body.data).toHaveProperty('metadata');
+        expect(response.body.data).toHaveProperty("metadata");
-      expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
+        expect(response.body.data.content).toContain(
          "We present spectrophotometric observations of the Broad Line Radio Galaxy"
        );
        expect(response.body.data.metadata.pageStatusCode).toBe(200);
        expect(response.body.data.metadata.pageError).toBeUndefined();
-    }, 60000); // 60 seconds
+      },
      60000
    ); // 60 seconds
-    it.concurrent("should return a successful response with a valid API key with removeTags option", async () => {
+    it.concurrent(
-      const responseWithoutRemoveTags: FirecrawlScrapeResponse = await request(TEST_URL)
+      "should return a successful response with a valid API key with removeTags option",
      async () => {
        const responseWithoutRemoveTags: FirecrawlScrapeResponse =
          await request(TEST_URL)
            .post("/v0/scrape")
            .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
            .set("Content-Type", "application/json")
@ -134,16 +177,27 @@ describe("E2E Tests for API Routes", () => {
        expect(responseWithoutRemoveTags.body.data).toHaveProperty("markdown");
        expect(responseWithoutRemoveTags.body.data).toHaveProperty("metadata");
        expect(responseWithoutRemoveTags.body.data).not.toHaveProperty("html");
-      expect(responseWithoutRemoveTags.body.data.content).toContain("Scrape This Site");
+        expect(responseWithoutRemoveTags.body.data.content).toContain(
-      expect(responseWithoutRemoveTags.body.data.content).toContain("Lessons and Videos"); // #footer
+          "Scrape This Site"
-      expect(responseWithoutRemoveTags.body.data.content).toContain("[Sandbox]("); // .nav
+        );
-      expect(responseWithoutRemoveTags.body.data.content).toContain("web scraping"); // strong
+        expect(responseWithoutRemoveTags.body.data.content).toContain(
          "Lessons and Videos"
        ); // #footer
        expect(responseWithoutRemoveTags.body.data.content).toContain(
          "[Sandbox]("
        ); // .nav
        expect(responseWithoutRemoveTags.body.data.content).toContain(
          "web scraping"
        ); // strong
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
          .post("/v0/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
-        .send({ url: "https://www.scrapethissite.com/", pageOptions: { removeTags: ['.nav', '#footer', 'strong'] } });
+          .send({
            url: "https://www.scrapethissite.com/",
            pageOptions: { removeTags: [".nav", "#footer", "strong"] },
          });
        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty("data");
        expect(response.body.data).toHaveProperty("content");
@ -154,121 +208,157 @@ describe("E2E Tests for API Routes", () => {
        expect(response.body.data.content).not.toContain("Lessons and Videos"); // #footer
        expect(response.body.data.content).not.toContain("[Sandbox]("); // .nav
        expect(response.body.data.content).not.toContain("web scraping"); // strong
-    }, 30000); // 30 seconds timeout
+      },
      30000
    ); // 30 seconds timeout
-    it.concurrent('should return a successful response for a scrape with 400 page', async () => {
+    it.concurrent(
      "should return a successful response for a scrape with 400 page",
      async () => {
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
-        .post('/v0/scrape')
+          .post("/v0/scrape")
-        .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
-        .set('Content-Type', 'application/json')
+          .set("Content-Type", "application/json")
-        .send({ url: 'https://httpstat.us/400' });
+          .send({ url: "https://httpstat.us/400" });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
-      expect(response.body).toHaveProperty('data');
+        expect(response.body).toHaveProperty("data");
-      expect(response.body.data).toHaveProperty('content');
+        expect(response.body.data).toHaveProperty("content");
-      expect(response.body.data).toHaveProperty('metadata');
+        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data.metadata.pageStatusCode).toBe(400);
-      expect(response.body.data.metadata.pageError.toLowerCase()).toContain("bad request");
+        expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
-    }, 60000); // 60 seconds
+          "bad request"
        );
      },
      60000
    ); // 60 seconds
-    it.concurrent('should return a successful response for a scrape with 401 page', async () => {
+    it.concurrent(
      "should return a successful response for a scrape with 401 page",
      async () => {
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
-        .post('/v0/scrape')
+          .post("/v0/scrape")
-        .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
-        .set('Content-Type', 'application/json')
+          .set("Content-Type", "application/json")
-        .send({ url: 'https://httpstat.us/401' });
+          .send({ url: "https://httpstat.us/401" });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
-      expect(response.body).toHaveProperty('data');
+        expect(response.body).toHaveProperty("data");
-      expect(response.body.data).toHaveProperty('content');
+        expect(response.body.data).toHaveProperty("content");
-      expect(response.body.data).toHaveProperty('metadata');
+        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data.metadata.pageStatusCode).toBe(401);
-      expect(response.body.data.metadata.pageError.toLowerCase()).toContain("unauthorized");
+        expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
-    }, 60000); // 60 seconds
+          "unauthorized"
        );
      },
      60000
    ); // 60 seconds
-    it.concurrent("should return a successful response for a scrape with 403 page", async () => {
+    it.concurrent(
      "should return a successful response for a scrape with 403 page",
      async () => {
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
-        .post('/v0/scrape')
+          .post("/v0/scrape")
-        .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
-        .set('Content-Type', 'application/json')
+          .set("Content-Type", "application/json")
-        .send({ url: 'https://httpstat.us/403' });
+          .send({ url: "https://httpstat.us/403" });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
-      expect(response.body).toHaveProperty('data');
+        expect(response.body).toHaveProperty("data");
-      expect(response.body.data).toHaveProperty('content');
+        expect(response.body.data).toHaveProperty("content");
-      expect(response.body.data).toHaveProperty('metadata');
+        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data.metadata.pageStatusCode).toBe(403);
-      expect(response.body.data.metadata.pageError.toLowerCase()).toContain("forbidden");
+        expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
-    }, 60000); // 60 seconds
+          "forbidden"
        );
      },
      60000
    ); // 60 seconds
-    it.concurrent('should return a successful response for a scrape with 404 page', async () => {
+    it.concurrent(
      "should return a successful response for a scrape with 404 page",
      async () => {
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
-        .post('/v0/scrape')
+          .post("/v0/scrape")
-        .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
-        .set('Content-Type', 'application/json')
+          .set("Content-Type", "application/json")
-        .send({ url: 'https://httpstat.us/404' });
+          .send({ url: "https://httpstat.us/404" });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
-      expect(response.body).toHaveProperty('data');
+        expect(response.body).toHaveProperty("data");
-      expect(response.body.data).toHaveProperty('content');
+        expect(response.body.data).toHaveProperty("content");
-      expect(response.body.data).toHaveProperty('metadata');
+        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data.metadata.pageStatusCode).toBe(404);
-      expect(response.body.data.metadata.pageError.toLowerCase()).toContain("not found");
+      },
-    }, 60000); // 60 seconds
+      60000
    ); // 60 seconds
-    it.concurrent('should return a successful response for a scrape with 405 page', async () => {
+    it.concurrent(
      "should return a successful response for a scrape with 405 page",
      async () => {
        const response = await request(TEST_URL)
-        .post('/v0/scrape')
+          .post("/v0/scrape")
-        .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
-        .set('Content-Type', 'application/json')
+          .set("Content-Type", "application/json")
-        .send({ url: 'https://httpstat.us/405' });
+          .send({ url: "https://httpstat.us/405" });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
-      expect(response.body).toHaveProperty('data');
+        expect(response.body).toHaveProperty("data");
-      expect(response.body.data).toHaveProperty('content');
+        expect(response.body.data).toHaveProperty("content");
-      expect(response.body.data).toHaveProperty('metadata');
+        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data.metadata.pageStatusCode).toBe(405);
-      expect(response.body.data.metadata.pageError.toLowerCase()).toContain("method not allowed");
+      },
-    }, 60000); // 60 seconds
+      60000
    ); // 60 seconds
-    it.concurrent('should return a successful response for a scrape with 500 page', async () => {
+    it.concurrent(
      "should return a successful response for a scrape with 500 page",
      async () => {
        const response: FirecrawlScrapeResponse = await request(TEST_URL)
-        .post('/v0/scrape')
+          .post("/v0/scrape")
-        .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
-        .set('Content-Type', 'application/json')
+          .set("Content-Type", "application/json")
-        .send({ url: 'https://httpstat.us/500' });
+          .send({ url: "https://httpstat.us/500" });
        await new Promise((r) => setTimeout(r, 5000));
        expect(response.statusCode).toBe(200);
-      expect(response.body).toHaveProperty('data');
+        expect(response.body).toHaveProperty("data");
-      expect(response.body.data).toHaveProperty('content');
+        expect(response.body.data).toHaveProperty("content");
-      expect(response.body.data).toHaveProperty('metadata');
+        expect(response.body.data).toHaveProperty("metadata");
        expect(response.body.data.metadata.pageStatusCode).toBe(500);
-      expect(response.body.data.metadata.pageError.toLowerCase()).toContain("internal server error");
+      },
-    }, 60000); // 60 seconds
+      60000
    ); // 60 seconds
  });
  describe("POST /v0/crawl", () => {
    it.concurrent("should require authorization", async () => {
-      const response: FirecrawlCrawlResponse = await request(TEST_URL).post("/v0/crawl");
+      const response: FirecrawlCrawlResponse = await request(TEST_URL).post(
        "/v0/crawl"
      );
      expect(response.statusCode).toBe(401);
    });
-    it.concurrent("should return an error response with an invalid API key", async () => {
+    it.concurrent(
      "should return an error response with an invalid API key",
      async () => {
        const response: FirecrawlCrawlResponse = await request(TEST_URL)
          .post("/v0/crawl")
          .set("Authorization", `Bearer invalid-api-key`)
          .set("Content-Type", "application/json")
          .send({ url: "https://firecrawl.dev" });
        expect(response.statusCode).toBe(401);
-    });
+      }
    );
-    it.concurrent("should return a successful response with a valid API key for crawl", async () => {
+    it.concurrent(
      "should return a successful response with a valid API key for crawl",
      async () => {
        const response: FirecrawlCrawlResponse = await request(TEST_URL)
          .post("/v0/crawl")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -279,9 +369,12 @@ describe("E2E Tests for API Routes", () => {
        expect(response.body.jobId).toMatch(
          /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
        );
-    });
+      }
    );
-    it.concurrent("should return a successful response with a valid API key and valid includes option", async () => {
+    it.concurrent(
      "should return a successful response with a valid API key and valid includes option",
      async () => {
        const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
          .post("/v0/crawl")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -311,7 +404,10 @@ describe("E2E Tests for API Routes", () => {
          }
        }
-        const completedResponse = response;
+        await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
        const completedResponse = await request(TEST_URL)
          .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
        const urls = completedResponse.body.data.map(
          (item: any) => item.metadata?.sourceURL
@ -329,11 +425,19 @@ describe("E2E Tests for API Routes", () => {
        expect(completedResponse.body.data[0]).toHaveProperty("markdown");
        expect(completedResponse.body.data[0]).toHaveProperty("metadata");
        expect(completedResponse.body.data[0].content).toContain("Mendable");
-      expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
+        expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
-      expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
+          200
-    }, 180000); // 180 seconds
+        );
        expect(
          completedResponse.body.data[0].metadata.pageError
        ).toBeUndefined();
      },
      180000
    ); // 180 seconds
-    it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => {
+    it.concurrent(
      "should return a successful response with a valid API key and valid excludes option",
      async () => {
        const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
          .post("/v0/crawl")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -363,7 +467,12 @@ describe("E2E Tests for API Routes", () => {
          }
        }
-      const completedResponse: FirecrawlCrawlStatusResponse = response;
+        await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
        const completedResponse: FirecrawlCrawlStatusResponse = await request(
          TEST_URL
        )
          .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
        const urls = completedResponse.body.data.map(
          (item: any) => item.metadata?.sourceURL
@ -372,9 +481,13 @@ describe("E2E Tests for API Routes", () => {
        urls.forEach((url: string) => {
          expect(url.startsWith("https://wwww.mendable.ai/blog/")).toBeFalsy();
        });
-    }, 90000); // 90 seconds
+      },
      90000
    ); // 90 seconds
-    it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
+    it.concurrent(
      "should return a successful response with max depth option for a valid crawl job",
      async () => {
        const crawlResponse: FirecrawlCrawlResponse = await request(TEST_URL)
          .post("/v0/crawl")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -403,7 +516,9 @@ describe("E2E Tests for API Routes", () => {
            await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
          }
        }
-      const completedResponse: FirecrawlCrawlStatusResponse = await request(TEST_URL)
+        const completedResponse: FirecrawlCrawlStatusResponse = await request(
          TEST_URL
        )
          .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
@ -414,8 +529,12 @@ describe("E2E Tests for API Routes", () => {
        expect(completedResponse.body.data[0]).toHaveProperty("content");
        expect(completedResponse.body.data[0]).toHaveProperty("markdown");
        expect(completedResponse.body.data[0]).toHaveProperty("metadata");
-      expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
+        expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
-      expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
+          200
        );
        expect(
          completedResponse.body.data[0].metadata.pageError
        ).toBeUndefined();
        const urls = completedResponse.body.data.map(
          (item: any) => item.metadata?.sourceURL
        );
@ -423,29 +542,43 @@ describe("E2E Tests for API Routes", () => {
        // Check if all URLs have a maximum depth of 1
        urls.forEach((url: string) => {
-        const pathSplits = new URL(url).pathname.split('/');
+          const pathSplits = new URL(url).pathname.split("/");
-        const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0);
+          const depth =
            pathSplits.length -
            (pathSplits[0].length === 0 &&
            pathSplits[pathSplits.length - 1].length === 0
              ? 1
              : 0);
          expect(depth).toBeLessThanOrEqual(2);
        });
-    }, 180000);
+      },
      180000
    );
  });
  describe("POST /v0/crawlWebsitePreview", () => {
    it.concurrent("should require authorization", async () => {
-      const response: FirecrawlCrawlResponse = await request(TEST_URL).post("/v0/crawlWebsitePreview");
+      const response: FirecrawlCrawlResponse = await request(TEST_URL).post(
        "/v0/crawlWebsitePreview"
      );
      expect(response.statusCode).toBe(401);
    });
-    it.concurrent("should return an error response with an invalid API key", async () => {
+    it.concurrent(
      "should return an error response with an invalid API key",
      async () => {
        const response: FirecrawlCrawlResponse = await request(TEST_URL)
          .post("/v0/crawlWebsitePreview")
          .set("Authorization", `Bearer invalid-api-key`)
          .set("Content-Type", "application/json")
          .send({ url: "https://firecrawl.dev" });
        expect(response.statusCode).toBe(401);
-    });
+      }
    );
-    it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
+    it.concurrent(
      "should return a timeout error when scraping takes longer than the specified timeout",
      async () => {
        const response: FirecrawlCrawlResponse = await request(TEST_URL)
          .post("/v0/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -453,7 +586,9 @@ describe("E2E Tests for API Routes", () => {
          .send({ url: "https://firecrawl.dev", timeout: 1000 });
        expect(response.statusCode).toBe(408);
-    }, 3000); 
+      },
      3000
    );
  });
  describe("POST /v0/search", () => {
@ -462,16 +597,21 @@ describe("E2E Tests for API Routes", () => {
      expect(response.statusCode).toBe(401);
    });
-    it.concurrent("should return an error response with an invalid API key", async () => {
+    it.concurrent(
      "should return an error response with an invalid API key",
      async () => {
        const response = await request(TEST_URL)
          .post("/v0/search")
          .set("Authorization", `Bearer invalid-api-key`)
          .set("Content-Type", "application/json")
          .send({ query: "test" });
        expect(response.statusCode).toBe(401);
-    });
+      }
    );
-    it.concurrent("should return a successful response with a valid API key for search", async () => {
+    it.concurrent(
      "should return a successful response with a valid API key for search",
      async () => {
        const response = await request(TEST_URL)
          .post("/v0/search")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -481,7 +621,9 @@ describe("E2E Tests for API Routes", () => {
        expect(response.body).toHaveProperty("success");
        expect(response.body.success).toBe(true);
        expect(response.body).toHaveProperty("data");
-    }, 30000); // 30 seconds timeout
+      },
      60000
    ); // 60 seconds timeout
  });
  describe("GET /v0/crawl/status/:jobId", () => {
@ -490,21 +632,29 @@ describe("E2E Tests for API Routes", () => {
      expect(response.statusCode).toBe(401);
    });
-    it.concurrent("should return an error response with an invalid API key", async () => {
+    it.concurrent(
      "should return an error response with an invalid API key",
      async () => {
        const response = await request(TEST_URL)
          .get("/v0/crawl/status/123")
          .set("Authorization", `Bearer invalid-api-key`);
        expect(response.statusCode).toBe(401);
-    });
+      }
    );
-    it.concurrent("should return Job not found for invalid job ID", async () => {
+    it.concurrent(
      "should return Job not found for invalid job ID",
      async () => {
        const response = await request(TEST_URL)
          .get("/v0/crawl/status/invalidJobId")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
        expect(response.statusCode).toBe(404);
-    });
+      }
    );
-    it.concurrent("should return a successful crawl status response for a valid crawl job", async () => {
+    it.concurrent(
      "should return a successful crawl status response for a valid crawl job",
      async () => {
        const crawlResponse = await request(TEST_URL)
          .post("/v0/crawl")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -513,7 +663,6 @@ describe("E2E Tests for API Routes", () => {
        expect(crawlResponse.statusCode).toBe(200);
        let isCompleted = false;
      let completedResponse;
        while (!isCompleted) {
          const response = await request(TEST_URL)
@ -524,11 +673,16 @@ describe("E2E Tests for API Routes", () => {
          if (response.body.status === "completed") {
            isCompleted = true;
          completedResponse = response;
          } else {
            await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
          }
        }
        await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
        const completedResponse = await request(TEST_URL)
          .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
        expect(completedResponse.body).toHaveProperty("status");
        expect(completedResponse.body.status).toBe("completed");
        expect(completedResponse.body).toHaveProperty("data");
@ -536,15 +690,24 @@ describe("E2E Tests for API Routes", () => {
        expect(completedResponse.body.data[0]).toHaveProperty("markdown");
        expect(completedResponse.body.data[0]).toHaveProperty("metadata");
        expect(completedResponse.body.data[0].content).toContain("Mendable");
-      expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
+        expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
-      expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
+          200
        );
        expect(
          completedResponse.body.data[0].metadata.pageError
        ).toBeUndefined();
-      const childrenLinks = completedResponse.body.data.filter(doc => 
+        const childrenLinks = completedResponse.body.data.filter(
-        doc.metadata && doc.metadata.sourceURL && doc.metadata.sourceURL.includes("mendable.ai/blog")
+          (doc) =>
            doc.metadata &&
            doc.metadata.sourceURL &&
            doc.metadata.sourceURL.includes("mendable.ai/blog")
        );
        expect(childrenLinks.length).toBe(completedResponse.body.data.length);
-    }, 180000); // 120 seconds
+      },
      180000
    ); // 120 seconds
    // TODO: review the test below
    // it.concurrent('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension ', async () => {
@ -592,7 +755,9 @@ describe("E2E Tests for API Routes", () => {
    //     expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
    // }, 180000); // 120 seconds
-    it.concurrent("If someone cancels a crawl job, it should turn into failed status", async () => {
+    it.concurrent(
      "If someone cancels a crawl job, it should turn into failed status",
      async () => {
        const crawlResponse = await request(TEST_URL)
          .post("/v0/crawl")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -619,18 +784,41 @@ describe("E2E Tests for API Routes", () => {
        expect(completedResponse.body).toHaveProperty("status");
        expect(completedResponse.body.status).toBe("failed");
        expect(completedResponse.body).toHaveProperty("data");
-      expect(completedResponse.body.data).toBeNull();
+
        let isNullOrEmptyArray = false;
        if (
          completedResponse.body.data === null ||
          completedResponse.body.data.length === 0
        ) {
          isNullOrEmptyArray = true;
        }
        expect(isNullOrEmptyArray).toBe(true);
        expect(completedResponse.body.data).toEqual(expect.arrayContaining([]));
        expect(completedResponse.body).toHaveProperty("partial_data");
-      expect(completedResponse.body.partial_data[0]).toHaveProperty("content");
+        expect(completedResponse.body.partial_data[0]).toHaveProperty(
-      expect(completedResponse.body.partial_data[0]).toHaveProperty("markdown");
+          "content"
-      expect(completedResponse.body.partial_data[0]).toHaveProperty("metadata");
+        );
-      expect(completedResponse.body.partial_data[0].metadata.pageStatusCode).toBe(200);
+        expect(completedResponse.body.partial_data[0]).toHaveProperty(
-      expect(completedResponse.body.partial_data[0].metadata.pageError).toBeUndefined();
+          "markdown"
-    }, 60000); // 60 seconds
+        );
        expect(completedResponse.body.partial_data[0]).toHaveProperty(
          "metadata"
        );
        expect(
          completedResponse.body.partial_data[0].metadata.pageStatusCode
        ).toBe(200);
        expect(
          completedResponse.body.partial_data[0].metadata.pageError
        ).toBeUndefined();
      },
      60000
    ); // 60 seconds
  });
  describe("POST /v0/scrape with LLM Extraction", () => {
-    it.concurrent("should extract data using LLM extraction mode", async () => {
+    it.concurrent(
      "should extract data using LLM extraction mode",
      async () => {
        const response = await request(TEST_URL)
          .post("/v0/scrape")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
@ -677,63 +865,100 @@ describe("E2E Tests for API Routes", () => {
        expect(llmExtraction).toHaveProperty("is_open_source");
        expect(llmExtraction.is_open_source).toBe(false);
        expect(typeof llmExtraction.is_open_source).toBe("boolean");
-    }, 60000); // 60 secs
+      },
      60000
    ); // 60 secs
  });
-  describe("POST /v0/crawl with fast mode", () => {
+  describe("POST /v0/map", () => {
-    it.concurrent("should complete the crawl under 20 seconds", async () => {
+    it.concurrent(
-      const startTime = Date.now();
+      "should return a list of links for mendable.ai without subdomains included",
-
+      async () => {
-      const crawlResponse = await request(TEST_URL)
+        const response = await request(TEST_URL)
-        .post("/v0/crawl")
+          .post("/v1/map")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send({
-          url: "https://flutterbricks.com",
+            url: "https://mendable.ai",
          crawlerOptions: {
            mode: "fast"
          }
          });
-      expect(crawlResponse.statusCode).toBe(200);
+        expect(response.statusCode).toBe(200);
        expect(response.body).toHaveProperty("success", true);
        expect(response.body).toHaveProperty("links");
        expect(response.body.links).not.toContain("https://docs.mendable.ai");
        expect(Array.isArray(response.body.links)).toBe(true);
        expect(response.body.links.length).toBeGreaterThan(0);
      },
      60000
    ); // 60 secs
-      const jobId = crawlResponse.body.jobId;
+    it.concurrent(
-      let statusResponse;
+      "should return a list of links for a given URL with subdomains included",
-      let isFinished = false;
+      async () => {
        const response = await request(TEST_URL)
          .post("/v1/map")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send({
            url: "https://python.langchain.com",
            includeSubdomains: true,
          });
-      while (!isFinished) {
+        expect(response.statusCode).toBe(200);
-        statusResponse = await request(TEST_URL)
+        expect(response.body).toHaveProperty("success", true);
-          .get(`/v0/crawl/status/${jobId}`)
+        expect(response.body).toHaveProperty("links");
-          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+        expect(Array.isArray(response.body.links)).toBe(true);
        expect(response.body.links.length).toBeGreaterThan(0);
      },
      60000
    ); // 60 secs
-        expect(statusResponse.statusCode).toBe(200);
+    it.concurrent(
-        isFinished = statusResponse.body.status === "completed";
+      "should return a list of links for a given URL with subdomains and search",
      async () => {
        const response = await request(TEST_URL)
          .post("/v1/map")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send({
            url: "https://python.langchain.com",
            includeSubdomains: true,
            search: "agents",
          });
-        if (!isFinished) {
+        expect(response.statusCode).toBe(200);
-          await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+        expect(response.body).toHaveProperty("success", true);
-        }
+        expect(response.body).toHaveProperty("links");
-      }
+        expect(response.body.links).toContain(
          "https://api.python.langchain.com/en/latest/_modules/langchain/agents/openai_functions_agent/base.html"
        );
        expect(Array.isArray(response.body.links)).toBe(true);
        expect(response.body.links.length).toBeGreaterThan(0);
        response.body.links.forEach((link) => {
          expect(link).toContain("python.langchain.com");
        });
      },
      60000
    ); // 60 secs
-      // const endTime = Date.now();
+    it.concurrent(
-      // const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
+      "should handle invalid URL input gracefully",
      async () => {
        const response = await request(TEST_URL)
          .post("/v1/map")
          .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
          .set("Content-Type", "application/json")
          .send({
            url: "invalid-url",
            includeSubdomains: true,
            search: "agents",
          });
-      // console.log(`Time elapsed: ${timeElapsed} seconds`);
+        expect(response.statusCode).toBe(400);
-
+        expect(response.body).toHaveProperty("success", false);
-      expect(statusResponse.body.status).toBe("completed");
+        expect(response.body).toHaveProperty("details");
-      expect(statusResponse.body).toHaveProperty("data");
+      },
-      expect(statusResponse.body.data[0]).toHaveProperty("content");
+      60000
-      expect(statusResponse.body.data[0]).toHaveProperty("markdown");
+    ); // 60 secs
      expect(statusResponse.body.data[0]).toHaveProperty("metadata");
      expect(statusResponse.body.data[0].metadata.pageStatusCode).toBe(200);
      expect(statusResponse.body.data[0].metadata.pageError).toBeUndefined();
      const results = statusResponse.body.data;
      // results.forEach((result, i) => {
      //   console.log(result.metadata.sourceURL);
      // });
      expect(results.length).toBeGreaterThanOrEqual(10);
      expect(results.length).toBeLessThanOrEqual(15);
    }, 20000);
  });
 });
--- a/apps/api/src/controllers/tests/crawl.test.ts
+++ b/apps/api/src/controllers/tests/crawl.test.ts
@ -1,6 +1,6 @@
-import { crawlController } from '../crawl'
+import { crawlController } from '../v0/crawl'
 import { Request, Response } from 'express';
-import { authenticateUser } from '../auth'; // Ensure this import is correct
+import { authenticateUser } from '../v0/auth'; // Ensure this import is correct
 import { createIdempotencyKey } from '../../services/idempotency/create';
 import { validateIdempotencyKey } from '../../services/idempotency/validate';
 import { v4 as uuidv4 } from 'uuid';
--- a/apps/api/src/controllers/crawl-status.ts
+++ b/apps/api/src/controllers/crawl-status.ts
@ -1,69 +0,0 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../src/types";
 import { addWebScraperJob } from "../../src/services/queue-jobs";
 import { getWebScraperQueue } from "../../src/services/queue-service";
 import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
 import { Logger } from "../../src/lib/logger";
 export async function crawlStatusController(req: Request, res: Response) {
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.CrawlStatus
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    const job = await getWebScraperQueue().getJob(req.params.jobId);
    if (!job) {
      return res.status(404).json({ error: "Job not found" });
    }
    const isCancelled = await (await getWebScraperQueue().client).exists("cancelled:" + req.params.jobId);
    let progress = job.progress;
    if(typeof progress !== 'object') {
      progress = {
        current: 0,
        current_url: '',
        total: 0,
        current_step: '',
        partialDocs: []
      }
    }
    const { 
      current = 0, 
      current_url = '', 
      total = 0, 
      current_step = '', 
      partialDocs = [] 
    } = progress as { current: number, current_url: string, total: number, current_step: string, partialDocs: any[] };
    let data = job.returnvalue;
    if (process.env.USE_DB_AUTHENTICATION === "true") {
      const supabaseData = await supabaseGetJobById(req.params.jobId);
      if (supabaseData) {
        data = supabaseData.docs;
      }
    }
    const jobStatus = await job.getState();
    res.json({
      status: isCancelled ? "failed" : jobStatus,
      // progress: job.progress(),
      current,
      current_url,
      current_step,
      total,
      data: data && !isCancelled ? data : null,
      partial_data: jobStatus == 'completed' && !isCancelled ? [] : partialDocs,
    });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/crawl.ts
+++ b/apps/api/src/controllers/crawl.ts
@ -1,110 +0,0 @@
 import { Request, Response } from "express";
 import { WebScraperDataProvider } from "../../src/scraper/WebScraper";
 import { billTeam } from "../../src/services/billing/credit_billing";
 import { checkTeamCredits } from "../../src/services/billing/credit_billing";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../src/types";
 import { addWebScraperJob } from "../../src/services/queue-jobs";
 import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
 import { logCrawl } from "../../src/services/logging/crawl_log";
 import { validateIdempotencyKey } from "../../src/services/idempotency/validate";
 import { createIdempotencyKey } from "../../src/services/idempotency/create";
 import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../src/lib/default-values";
 import { v4 as uuidv4 } from "uuid";
 import { Logger } from "../../src/lib/logger";
 export async function crawlController(req: Request, res: Response) {
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Crawl
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    if (req.headers["x-idempotency-key"]) {
      const isIdempotencyValid = await validateIdempotencyKey(req);
      if (!isIdempotencyValid) {
        return res.status(409).json({ error: "Idempotency key already used" });
      }
      try {
        createIdempotencyKey(req);
      } catch (error) {
        Logger.error(error);
        return res.status(500).json({ error: error.message });
      }
    }
    const { success: creditsCheckSuccess, message: creditsCheckMessage } =
      await checkTeamCredits(team_id, 1);
    if (!creditsCheckSuccess) {
      return res.status(402).json({ error: "Insufficient credits" });
    }
    const url = req.body.url;
    if (!url) {
      return res.status(400).json({ error: "Url is required" });
    }
    if (isUrlBlocked(url)) {
      return res
        .status(403)
        .json({
          error:
            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
        });
    }
    const mode = req.body.mode ?? "crawl";
    const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
    const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
    if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
      try {
        const a = new WebScraperDataProvider();
        await a.setOptions({
          jobId: uuidv4(),
          mode: "single_urls",
          urls: [url],
          crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
          pageOptions: pageOptions,
        });
        const docs = await a.getDocuments(false, (progress) => {
          job.updateProgress({
            current: progress.current,
            total: progress.total,
            current_step: "SCRAPING",
            current_url: progress.currentDocumentUrl,
          });
        });
        return res.json({
          success: true,
          documents: docs,
        });
      } catch (error) {
        Logger.error(error);
        return res.status(500).json({ error: error.message });
      }
    }
    const job = await addWebScraperJob({
      url: url,
      mode: mode ?? "crawl", // fix for single urls not working
      crawlerOptions: crawlerOptions,
      team_id: team_id,
      pageOptions: pageOptions,
      origin: req.body.origin ?? defaultOrigin,
    });
    await logCrawl(job.id.toString(), team_id);
    res.json({ jobId: job.id });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/crawlPreview.ts
+++ b/apps/api/src/controllers/crawlPreview.ts
@ -1,46 +0,0 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../src/types";
 import { addWebScraperJob } from "../../src/services/queue-jobs";
 import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
 import { Logger } from "../../src/lib/logger";
 export async function crawlPreviewController(req: Request, res: Response) {
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Preview
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    // authenticate on supabase
    const url = req.body.url;
    if (!url) {
      return res.status(400).json({ error: "Url is required" });
    }
    if (isUrlBlocked(url)) {
      return res.status(403).json({ error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." });
    }
    const mode = req.body.mode ?? "crawl";
    const crawlerOptions = req.body.crawlerOptions ?? {};
    const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false, removeTags: [] };
    const job = await addWebScraperJob({
      url: url,
      mode: mode ?? "crawl", // fix for single urls not working
      crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 },
      team_id: "preview",
      pageOptions: pageOptions,
      origin: "website-preview",
    });
    res.json({ jobId: job.id });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/status.ts
+++ b/apps/api/src/controllers/status.ts
@ -1,59 +0,0 @@
 import { Request, Response } from "express";
 import { getWebScraperQueue } from "../../src/services/queue-service";
 import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
 import { Logger } from "../../src/lib/logger";
 export async function crawlJobStatusPreviewController(req: Request, res: Response) {
  try {
    const job = await getWebScraperQueue().getJob(req.params.jobId);
    if (!job) {
      return res.status(404).json({ error: "Job not found" });
    }
    let progress = job.progress;
    if(typeof progress !== 'object') {
      progress = {
        current: 0,
        current_url: '',
        total: 0,
        current_step: '',
        partialDocs: []
      }
    }
    const { 
      current = 0, 
      current_url = '', 
      total = 0, 
      current_step = '', 
      partialDocs = [] 
    } = progress as { current: number, current_url: string, total: number, current_step: string, partialDocs: any[] };
    let data = job.returnvalue;
    if (process.env.USE_DB_AUTHENTICATION === "true") {
      const supabaseData = await supabaseGetJobById(req.params.jobId);
      if (supabaseData) {
        data = supabaseData.docs;
      }
    }
    let jobStatus = await job.getState();
    if (jobStatus === 'waiting' || jobStatus === 'delayed' || jobStatus === 'waiting-children' || jobStatus === 'unknown' || jobStatus === 'prioritized') {
      jobStatus = 'active';
    }
    res.json({
      status: jobStatus,
      // progress: job.progress(),
      current,
      current_url,
      current_step,
      total,
      data: data ? data : null,
      partial_data: jobStatus == 'completed' ? [] : partialDocs,
    });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v0/admin/queue.ts
+++ b/apps/api/src/controllers/v0/admin/queue.ts
@ -1,9 +1,9 @@
 import { Request, Response } from "express";
 import { Job } from "bullmq";
-import { Logger } from "../../lib/logger";
+import { Logger } from "../../../lib/logger";
-import { getWebScraperQueue } from "../../services/queue-service";
+import { getScrapeQueue } from "../../../services/queue-service";
-import { checkAlerts } from "../../services/alerts";
+import { checkAlerts } from "../../../services/alerts";
 export async function cleanBefore24hCompleteJobsController(
  req: Request,
@ -11,13 +11,13 @@ export async function cleanBefore24hCompleteJobsController(
 ) {
  Logger.info("🐂 Cleaning jobs older than 24h");
  try {
-    const webScraperQueue = getWebScraperQueue();
+    const scrapeQueue = getScrapeQueue();
    const batchSize = 10;
    const numberOfBatches = 9; // Adjust based on your needs
    const completedJobsPromises: Promise<Job[]>[] = [];
    for (let i = 0; i < numberOfBatches; i++) {
      completedJobsPromises.push(
-        webScraperQueue.getJobs(
+        scrapeQueue.getJobs(
          ["completed"],
          i * batchSize,
          i * batchSize + batchSize,
@ -68,10 +68,10 @@ export async function checkQueuesController(req: Request, res: Response) {
  // Use this as a "health check" that way we dont destroy the server
 export async function queuesController(req: Request, res: Response) {
    try {
-      const webScraperQueue = getWebScraperQueue();
+      const scrapeQueue = getScrapeQueue();
      const [webScraperActive] = await Promise.all([
-        webScraperQueue.getActiveCount(),
+        scrapeQueue.getActiveCount(),
      ]);
      const noActiveJobs = webScraperActive === 0;
--- a/apps/api/src/controllers/v0/admin/redis-health.ts
+++ b/apps/api/src/controllers/v0/admin/redis-health.ts
@ -1,8 +1,7 @@
 import { Request, Response } from "express";
 import Redis from "ioredis";
-import { Logger } from "../../lib/logger";
+import { Logger } from "../../../lib/logger";
-import { sendSlackWebhook } from "../../services/alerts/slack";
+import { redisRateLimitClient } from "../../../services/rate-limiter";
 import { redisRateLimitClient } from "../../services/rate-limiter";
 export async function redisHealthController(req: Request, res: Response) {
  const retryOperation = async (operation, retries = 3) => {
@ -63,22 +62,22 @@ export async function redisHealthController(req: Request, res: Response) {
      Logger.info(
        `Redis instances health check: ${JSON.stringify(healthStatus)}`
      );
-      await sendSlackWebhook(
+      // await sendSlackWebhook(
-        `[REDIS DOWN] Redis instances health check: ${JSON.stringify(
+      //   `[REDIS DOWN] Redis instances health check: ${JSON.stringify(
-          healthStatus
+      //     healthStatus
-        )}`,
+      //   )}`,
-        true
+      //   true
-      );
+      // );
      return res
        .status(500)
        .json({ status: "unhealthy", details: healthStatus });
    }
  } catch (error) {
    Logger.error(`Redis health check failed: ${error}`);
-    await sendSlackWebhook(
+    // await sendSlackWebhook(
-      `[REDIS DOWN] Redis instances health check: ${error.message}`,
+    //   `[REDIS DOWN] Redis instances health check: ${error.message}`,
-      true
+    //   true
-    );
+    // );
    return res
      .status(500)
      .json({ status: "unhealthy", message: error.message });
--- a/apps/api/src/controllers/v0/auth.ts
+++ b/apps/api/src/controllers/v0/auth.ts
@ -1,26 +1,77 @@
-import { parseApi } from "../../src/lib/parseApi";
+import { parseApi } from "../../../src/lib/parseApi";
-import { getRateLimiter, } from "../../src/services/rate-limiter";
+import { getRateLimiter } from "../../../src/services/rate-limiter";
-import { AuthResponse, NotificationType, RateLimiterMode } from "../../src/types";
+import {
-import { supabase_service } from "../../src/services/supabase";
+  AuthResponse,
-import { withAuth } from "../../src/lib/withAuth";
+  NotificationType,
  RateLimiterMode,
 } from "../../../src/types";
 import { supabase_service } from "../../../src/services/supabase";
 import { withAuth } from "../../../src/lib/withAuth";
 import { RateLimiterRedis } from "rate-limiter-flexible";
-import { setTraceAttributes } from '@hyperdx/node-opentelemetry';
+import { setTraceAttributes } from "@hyperdx/node-opentelemetry";
-import { sendNotification } from "../services/notification/email_notification";
+import { sendNotification } from "../../services/notification/email_notification";
-import { Logger } from "../lib/logger";
+import { Logger } from "../../lib/logger";
 import { redlock } from "../../../src/services/redlock";
 import { getValue } from "../../../src/services/redis";
 import { setValue } from "../../../src/services/redis";
 import { validate } from "uuid";
-export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise<AuthResponse> {
+function normalizedApiIsUuid(potentialUuid: string): boolean {
  // Check if the string is a valid UUID
  return validate(potentialUuid);
 }
 export async function authenticateUser(
  req,
  res,
  mode?: RateLimiterMode
 ): Promise<AuthResponse> {
  return withAuth(supaAuthenticateUser)(req, res, mode);
 }
 function setTrace(team_id: string, api_key: string) {
  try {
    setTraceAttributes({
      team_id,
-      api_key
+      api_key,
    });
  } catch (error) {
    Logger.error(`Error setting trace attributes: ${error.message}`);
  }
 }
 async function getKeyAndPriceId(normalizedApi: string): Promise<{
  success: boolean;
  teamId?: string;
  priceId?: string;
  error?: string;
  status?: number;
 }> {
  const { data, error } = await supabase_service.rpc("get_key_and_price_id_2", {
    api_key: normalizedApi,
  });
  if (error) {
    Logger.error(`RPC ERROR (get_key_and_price_id_2): ${error.message}`);
    return {
      success: false,
      error:
        "The server seems overloaded. Please contact hello@firecrawl.com if you aren't sending too many requests at once.",
      status: 500,
    };
  }
  if (!data || data.length === 0) {
    Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
    // TODO: change this error code ?
    return {
      success: false,
      error: "Unauthorized: Invalid token",
      status: 401,
    };
  } else {
    return {
      success: true,
      teamId: data[0].team_id,
      priceId: data[0].price_id,
    };
  }
 }
 export async function supaAuthenticateUser(
  req,
@ -51,20 +102,83 @@ export async function supaAuthenticateUser(
  const iptoken = incomingIP + token;
  let rateLimiter: RateLimiterRedis;
-  let subscriptionData: { team_id: string, plan: string } | null = null;
+  let subscriptionData: { team_id: string; plan: string } | null = null;
  let normalizedApi: string;
-  let team_id: string;
+  let cacheKey = "";
  let redLockKey = "";
  const lockTTL = 15000; // 10 seconds
  let teamId: string | null = null;
  let priceId: string | null = null;
  if (token == "this_is_just_a_preview_token") {
    rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
-    team_id = "preview";
+    teamId = "preview";
  } else {
    normalizedApi = parseApi(token);
    if (!normalizedApiIsUuid(normalizedApi)) {
      return {
        success: false,
        error: "Unauthorized: Invalid token",
        status: 401,
      };
    }
-    const { data, error } = await supabase_service.rpc(
+    cacheKey = `api_key:${normalizedApi}`;
-      'get_key_and_price_id_2', { api_key: normalizedApi }
+
    try {
      const teamIdPriceId = await getValue(cacheKey);
      if (teamIdPriceId) {
        const { team_id, price_id } = JSON.parse(teamIdPriceId);
        teamId = team_id;
        priceId = price_id;
      } else {
        const {
          success,
          teamId: tId,
          priceId: pId,
          error,
          status,
        } = await getKeyAndPriceId(normalizedApi);
        if (!success) {
          return { success, error, status };
        }
        teamId = tId;
        priceId = pId;
        await setValue(
          cacheKey,
          JSON.stringify({ team_id: teamId, price_id: priceId }),
          10
        );
      }
    } catch (error) {
      Logger.error(`Error with auth function: ${error.message}`);
      // const {
      //   success,
      //   teamId: tId,
      //   priceId: pId,
      //   error: e,
      //   status,
      // } = await getKeyAndPriceId(normalizedApi);
      // if (!success) {
      //   return { success, error: e, status };
      // }
      // teamId = tId;
      // priceId = pId;
      // const {
      //   success,
      //   teamId: tId,
      //   priceId: pId,
      //   error: e,
      //   status,
      // } = await getKeyAndPriceId(normalizedApi);
      // if (!success) {
      //   return { success, error: e, status };
      // }
      // teamId = tId;
      // priceId = pId;
    }
    // get_key_and_price_id_2 rpc definition:
    // create or replace function get_key_and_price_id_2(api_key uuid)
    //   returns table(key uuid, team_id uuid, price_id text) as $$
@ -82,41 +196,34 @@ export async function supaAuthenticateUser(
    //   end;
    //   $$ language plpgsql;
-    if (error) {
+    const plan = getPlanByPriceId(priceId);
      Logger.warn(`Error fetching key and price_id: ${error.message}`);
    } else {
      // console.log('Key and Price ID:', data);
    }
    if (error || !data || data.length === 0) {
      Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
      return {
        success: false,
        error: "Unauthorized: Invalid token",
        status: 401,
      };
    }
    const internal_team_id = data[0].team_id;
    team_id = internal_team_id;
    const plan = getPlanByPriceId(data[0].price_id);
    // HyperDX Logging
-    setTrace(team_id, normalizedApi);
+    setTrace(teamId, normalizedApi);
    subscriptionData = {
-      team_id: team_id,
+      team_id: teamId,
-      plan: plan
+      plan: plan,
-    }
+    };
    switch (mode) {
      case RateLimiterMode.Crawl:
-        rateLimiter = getRateLimiter(RateLimiterMode.Crawl, token, subscriptionData.plan);
+        rateLimiter = getRateLimiter(
          RateLimiterMode.Crawl,
          token,
          subscriptionData.plan
        );
        break;
      case RateLimiterMode.Scrape:
-        rateLimiter = getRateLimiter(RateLimiterMode.Scrape, token, subscriptionData.plan);
+        rateLimiter = getRateLimiter(
          RateLimiterMode.Scrape,
          token,
          subscriptionData.plan
        );
        break;
      case RateLimiterMode.Search:
-        rateLimiter = getRateLimiter(RateLimiterMode.Search, token, subscriptionData.plan);
+        rateLimiter = getRateLimiter(
          RateLimiterMode.Search,
          token,
          subscriptionData.plan
        );
        break;
      case RateLimiterMode.CrawlStatus:
        rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
@ -134,7 +241,8 @@ export async function supaAuthenticateUser(
    }
  }
-  const team_endpoint_token = token === "this_is_just_a_preview_token" ? iptoken : team_id;
+  const team_endpoint_token =
    token === "this_is_just_a_preview_token" ? iptoken : teamId;
  try {
    await rateLimiter.consume(team_endpoint_token);
@ -147,7 +255,17 @@ export async function supaAuthenticateUser(
    const startDate = new Date();
    const endDate = new Date();
    endDate.setDate(endDate.getDate() + 7);
    // await sendNotification(team_id, NotificationType.RATE_LIMIT_REACHED, startDate.toISOString(), endDate.toISOString());
    // Cache longer for 429s
    if (teamId && priceId && mode !== RateLimiterMode.Preview) {
      await setValue(
        cacheKey,
        JSON.stringify({ team_id: teamId, price_id: priceId }),
        60 // 10 seconds, cache for everything
      );
    }
    return {
      success: false,
      error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
@ -157,7 +275,9 @@ export async function supaAuthenticateUser(
  if (
    token === "this_is_just_a_preview_token" &&
-    (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview || mode === RateLimiterMode.Search)
+    (mode === RateLimiterMode.Scrape ||
      mode === RateLimiterMode.Preview ||
      mode === RateLimiterMode.Search)
  ) {
    return { success: true, team_id: "preview" };
    // check the origin of the request and make sure its from firecrawl.dev
@ -181,8 +301,6 @@ export async function supaAuthenticateUser(
      .select("*")
      .eq("key", normalizedApi);
    if (error || !data || data.length === 0) {
      Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
      return {
@ -195,26 +313,32 @@ export async function supaAuthenticateUser(
    subscriptionData = data[0];
  }
-  return { success: true, team_id: subscriptionData.team_id, plan: subscriptionData.plan ?? ""};
+  return {
    success: true,
    team_id: subscriptionData.team_id,
    plan: subscriptionData.plan ?? "",
  };
 }
 function getPlanByPriceId(price_id: string) {
  switch (price_id) {
    case process.env.STRIPE_PRICE_ID_STARTER:
-      return 'starter';
+      return "starter";
    case process.env.STRIPE_PRICE_ID_STANDARD:
-      return 'standard';
+      return "standard";
    case process.env.STRIPE_PRICE_ID_SCALE:
-      return 'scale';
+      return "scale";
    case process.env.STRIPE_PRICE_ID_HOBBY:
    case process.env.STRIPE_PRICE_ID_HOBBY_YEARLY:
-      return 'hobby';
+      return "hobby";
    case process.env.STRIPE_PRICE_ID_STANDARD_NEW:
    case process.env.STRIPE_PRICE_ID_STANDARD_NEW_YEARLY:
-      return 'standardnew';
+      return "standardnew";
    case process.env.STRIPE_PRICE_ID_GROWTH:
    case process.env.STRIPE_PRICE_ID_GROWTH_YEARLY:
-      return 'growth';
+      return "growth";
    case process.env.STRIPE_PRICE_ID_GROWTH_DOUBLE_MONTHLY:
      return "growthdouble";
    default:
-      return 'free';
+      return "free";
  }
 }
--- a/apps/api/src/controllers/v0/crawl-cancel.ts
+++ b/apps/api/src/controllers/v0/crawl-cancel.ts
@ -1,10 +1,9 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "./auth";
-import { RateLimiterMode } from "../../src/types";
+import { RateLimiterMode } from "../../../src/types";
-import { getWebScraperQueue } from "../../src/services/queue-service";
+import { supabase_service } from "../../../src/services/supabase";
-import { supabase_service } from "../../src/services/supabase";
+import { Logger } from "../../../src/lib/logger";
-import { billTeam } from "../../src/services/billing/credit_billing";
+import { getCrawl, saveCrawl } from "../../../src/lib/crawl-redis";
 import { Logger } from "../../src/lib/logger";
 export async function crawlCancelController(req: Request, res: Response) {
  try {
@ -18,8 +17,9 @@ export async function crawlCancelController(req: Request, res: Response) {
    if (!success) {
      return res.status(status).json({ error });
    }
-    const job = await getWebScraperQueue().getJob(req.params.jobId);
+
-    if (!job) {
+    const sc = await getCrawl(req.params.jobId);
    if (!sc) {
      return res.status(404).json({ error: "Job not found" });
    }
@ -39,27 +39,9 @@ export async function crawlCancelController(req: Request, res: Response) {
      }
    }
    const jobState = await job.getState();
    let progress = job.progress;
    if(typeof progress !== 'object') {
      progress = {
        partialDocs: []
      }
    }
    const { 
      partialDocs = [] 
    } = progress as { partialDocs: any[] };
    if (partialDocs && partialDocs.length > 0 && jobState === "active") {
      Logger.info("Billing team for partial docs...");
      // Note: the credits that we will bill them here might be lower than the actual
      // due to promises that are not yet resolved
      await billTeam(team_id, partialDocs.length);
    }
    try {
-      await (await getWebScraperQueue().client).set("cancelled:" + job.id, "true", "EX", 60 * 60);
+      sc.cancelled = true;
-      await job.discard();
+      await saveCrawl(req.params.jobId, sc);
    } catch (error) {
      Logger.error(error);
    }
--- a/apps/api/src/controllers/v0/crawl-status.ts
+++ b/apps/api/src/controllers/v0/crawl-status.ts
@ -0,0 +1,60 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../../src/types";
 import { getScrapeQueue } from "../../../src/services/queue-service";
 import { Logger } from "../../../src/lib/logger";
 import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
 import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
 export async function crawlStatusController(req: Request, res: Response) {
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.CrawlStatus
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    const sc = await getCrawl(req.params.jobId);
    if (!sc) {
      return res.status(404).json({ error: "Job not found" });
    }
    if (sc.team_id !== team_id) {
      return res.status(403).json({ error: "Forbidden" });
    }
    const jobIDs = await getCrawlJobs(req.params.jobId);
    const jobs = (await Promise.all(jobIDs.map(async x => {
      const job = await getScrapeQueue().getJob(x);
      if (process.env.USE_DB_AUTHENTICATION === "true") {
        const supabaseData = await supabaseGetJobById(job.id);
        if (supabaseData) {
          job.returnvalue = supabaseData.docs;
        }
      }
      return job;
    }))).sort((a, b) => a.timestamp - b.timestamp);
    const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
    const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
    const data = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
    res.json({
      status: jobStatus,
      current: jobStatuses.filter(x => x === "completed" || x === "failed").length,
      total: jobs.length,
      data: jobStatus === "completed" ? data : null,
      partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null),
    });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v0/crawl.ts
+++ b/apps/api/src/controllers/v0/crawl.ts
@ -0,0 +1,171 @@
 import { Request, Response } from "express";
 import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../../src/types";
 import { addScrapeJob } from "../../../src/services/queue-jobs";
 import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
 import { logCrawl } from "../../../src/services/logging/crawl_log";
 import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
 import { createIdempotencyKey } from "../../../src/services/idempotency/create";
 import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
 import { v4 as uuidv4 } from "uuid";
 import { Logger } from "../../../src/lib/logger";
 import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
 import { getScrapeQueue } from "../../../src/services/queue-service";
 import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
 export async function crawlController(req: Request, res: Response) {
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Crawl
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    if (req.headers["x-idempotency-key"]) {
      const isIdempotencyValid = await validateIdempotencyKey(req);
      if (!isIdempotencyValid) {
        return res.status(409).json({ error: "Idempotency key already used" });
      }
      try {
        createIdempotencyKey(req);
      } catch (error) {
        Logger.error(error);
        return res.status(500).json({ error: error.message });
      }
    }
    const { success: creditsCheckSuccess, message: creditsCheckMessage } =
      await checkTeamCredits(team_id, 1);
    if (!creditsCheckSuccess) {
      return res.status(402).json({ error: "Insufficient credits" });
    }
    let url = req.body.url;
    if (!url) {
      return res.status(400).json({ error: "Url is required" });
    }
    try {
      url = checkAndUpdateURL(url).url;
    } catch (e) {
      return res
        .status(e instanceof Error && e.message === "Invalid URL" ? 400 : 500)
        .json({ error: e.message ?? e });
    }
    if (isUrlBlocked(url)) {
      return res
        .status(403)
        .json({
          error:
            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
        });
    }
    const mode = req.body.mode ?? "crawl";
    const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
    const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
    // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
    //   try {
    //     const a = new WebScraperDataProvider();
    //     await a.setOptions({
    //       jobId: uuidv4(),
    //       mode: "single_urls",
    //       urls: [url],
    //       crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
    //       pageOptions: pageOptions,
    //     });
    //     const docs = await a.getDocuments(false, (progress) => {
    //       job.updateProgress({
    //         current: progress.current,
    //         total: progress.total,
    //         current_step: "SCRAPING",
    //         current_url: progress.currentDocumentUrl,
    //       });
    //     });
    //     return res.json({
    //       success: true,
    //       documents: docs,
    //     });
    //   } catch (error) {
    //     Logger.error(error);
    //     return res.status(500).json({ error: error.message });
    //   }
    // }
    const id = uuidv4();
    await logCrawl(id, team_id);
    const sc: StoredCrawl = {
      originUrl: url,
      crawlerOptions,
      pageOptions,
      team_id,
      createdAt: Date.now(),
    };
    const crawler = crawlToCrawler(id, sc);
    try {
      sc.robots = await crawler.getRobotsTxt();
    } catch (_) {}
    await saveCrawl(id, sc);
    const sitemap = sc.crawlerOptions?.ignoreSitemap ? null : await crawler.tryGetSitemap();
    if (sitemap !== null) {
      const jobs = sitemap.map(x => {
        const url = x.url;
        const uuid = uuidv4();
        return {
          name: uuid,
          data: {
            url,
            mode: "single_urls",
            crawlerOptions: crawlerOptions,
            team_id: team_id,
            pageOptions: pageOptions,
            origin: req.body.origin ?? defaultOrigin,
            crawl_id: id,
            sitemapped: true,
          },
          opts: {
            jobId: uuid,
            priority: 20,
          }
        };
      })
      await lockURLs(id, jobs.map(x => x.data.url));
      await addCrawlJobs(id, jobs.map(x => x.opts.jobId));
      await getScrapeQueue().addBulk(jobs);
    } else {
      await lockURL(id, sc, url);
      const job = await addScrapeJob({
        url,
        mode: "single_urls",
        crawlerOptions: crawlerOptions,
        team_id: team_id,
        pageOptions: pageOptions,
        origin: req.body.origin ?? defaultOrigin,
        crawl_id: id,
      }, {
        priority: 15, // prioritize request 0 of crawl jobs same as scrape jobs
      });
      await addCrawlJob(id, job.id);
    }
    res.json({ jobId: id });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v0/crawlPreview.ts
+++ b/apps/api/src/controllers/v0/crawlPreview.ts
@ -0,0 +1,135 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../../src/types";
 import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
 import { v4 as uuidv4 } from "uuid";
 import { Logger } from "../../../src/lib/logger";
 import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
 import { addScrapeJob } from "../../../src/services/queue-jobs";
 import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
 export async function crawlPreviewController(req: Request, res: Response) {
  try {
    const { success, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Preview
    );
    const team_id = "preview";
    if (!success) {
      return res.status(status).json({ error });
    }
    let url = req.body.url;
    if (!url) {
      return res.status(400).json({ error: "Url is required" });
    }
    try {
      url = checkAndUpdateURL(url).url;
    } catch (e) {
      return res
        .status(e instanceof Error && e.message === "Invalid URL" ? 400 : 500)
        .json({ error: e.message ?? e });
    }
    if (isUrlBlocked(url)) {
      return res
        .status(403)
        .json({
          error:
            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
        });
    }
    const crawlerOptions = req.body.crawlerOptions ?? {};
    const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false, removeTags: [] };
    // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
    //   try {
    //     const a = new WebScraperDataProvider();
    //     await a.setOptions({
    //       jobId: uuidv4(),
    //       mode: "single_urls",
    //       urls: [url],
    //       crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
    //       pageOptions: pageOptions,
    //     });
    //     const docs = await a.getDocuments(false, (progress) => {
    //       job.updateProgress({
    //         current: progress.current,
    //         total: progress.total,
    //         current_step: "SCRAPING",
    //         current_url: progress.currentDocumentUrl,
    //       });
    //     });
    //     return res.json({
    //       success: true,
    //       documents: docs,
    //     });
    //   } catch (error) {
    //     Logger.error(error);
    //     return res.status(500).json({ error: error.message });
    //   }
    // }
    const id = uuidv4();
    let robots;
    try {
      robots = await this.getRobotsTxt();
    } catch (_) {}
    const sc: StoredCrawl = {
      originUrl: url,
      crawlerOptions,
      pageOptions,
      team_id,
      robots,
      createdAt: Date.now(),
    };
    await saveCrawl(id, sc);
    const crawler = crawlToCrawler(id, sc);
    const sitemap = sc.crawlerOptions?.ignoreSitemap ? null : await crawler.tryGetSitemap();
    if (sitemap !== null) {
      for (const url of sitemap.map(x => x.url)) {
        await lockURL(id, sc, url);
        const job = await addScrapeJob({
          url,
          mode: "single_urls",
          crawlerOptions: crawlerOptions,
          team_id: team_id,
          pageOptions: pageOptions,
          origin: "website-preview",
          crawl_id: id,
          sitemapped: true,
        });
        await addCrawlJob(id, job.id);
      }
    } else {
      await lockURL(id, sc, url);
      const job = await addScrapeJob({
        url,
        mode: "single_urls",
        crawlerOptions: crawlerOptions,
        team_id: team_id,
        pageOptions: pageOptions,
        origin: "website-preview",
        crawl_id: id,
      });
      await addCrawlJob(id, job.id);
    }
    res.json({ jobId: id });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v0/keyAuth.ts
+++ b/apps/api/src/controllers/v0/keyAuth.ts
@ -1,5 +1,5 @@
-import { AuthResponse, RateLimiterMode } from "../types";
+import { AuthResponse, RateLimiterMode } from "../../types";
 import { Request, Response } from "express";
 import { authenticateUser } from "./auth";
--- a/apps/api/src/controllers/v0/liveness.ts
+++ b/apps/api/src/controllers/v0/liveness.ts
--- a/apps/api/src/controllers/v0/readiness.ts
+++ b/apps/api/src/controllers/v0/readiness.ts
--- a/apps/api/src/controllers/v0/scrape.ts
+++ b/apps/api/src/controllers/v0/scrape.ts
@ -1,17 +1,17 @@
-import { ExtractorOptions, PageOptions } from './../lib/entities';
+import { ExtractorOptions, PageOptions } from './../../lib/entities';
 import { Request, Response } from "express";
-import { billTeam, checkTeamCredits } from "../services/billing/credit_billing";
+import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
 import { authenticateUser } from "./auth";
-import { RateLimiterMode } from "../types";
+import { RateLimiterMode } from "../../types";
-import { logJob } from "../services/logging/log_job";
+import { logJob } from "../../services/logging/log_job";
-import { Document } from "../lib/entities";
+import { Document } from "../../lib/entities";
-import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
+import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
-import { numTokensFromString } from '../lib/LLM-extraction/helpers';
+import { numTokensFromString } from '../../lib/LLM-extraction/helpers';
-import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values';
+import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../lib/default-values';
-import { addScrapeJob } from '../services/queue-jobs';
+import { addScrapeJob } from '../../services/queue-jobs';
-import { scrapeQueueEvents } from '../services/queue-service';
+import { scrapeQueueEvents } from '../../services/queue-service';
 import { v4 as uuidv4 } from "uuid";
-import { Logger } from '../lib/logger';
+import { Logger } from '../../lib/logger';
 export async function scrapeHelper(
  jobId: string,
@ -45,7 +45,7 @@ export async function scrapeHelper(
    pageOptions,
    extractorOptions,
    origin: req.body.origin ?? defaultOrigin,
-  });
+  }, {}, jobId);
  let doc;
  try {
@ -62,6 +62,8 @@ export async function scrapeHelper(
    }
  }
  await job.remove();
  if (!doc) {
    console.error("!!! PANIC DOC IS", doc, job);
    return { success: true, error: "No page found", returnCode: 200, data: doc };
@ -121,13 +123,7 @@ export async function scrapeController(req: Request, res: Response) {
    };
    // Async check saves 500ms in average case
    // Don't async check in llm extraction mode as it could be expensive
    if (extractorOptions.mode.includes("llm-extraction")) {
    await checkCredits();
    } else {
      checkCredits();
    }
    const jobId = uuidv4();
--- a/apps/api/src/controllers/v0/search.ts
+++ b/apps/api/src/controllers/v0/search.ts
@ -1,14 +1,15 @@
 import { Request, Response } from "express";
-import { WebScraperDataProvider } from "../scraper/WebScraper";
+import { WebScraperDataProvider } from "../../scraper/WebScraper";
-import { billTeam, checkTeamCredits } from "../services/billing/credit_billing";
+import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
 import { authenticateUser } from "./auth";
-import { RateLimiterMode } from "../types";
+import { RateLimiterMode } from "../../types";
-import { logJob } from "../services/logging/log_job";
+import { logJob } from "../../services/logging/log_job";
-import { PageOptions, SearchOptions } from "../lib/entities";
+import { PageOptions, SearchOptions } from "../../lib/entities";
-import { search } from "../search";
+import { search } from "../../search";
-import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
+import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
 import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../lib/logger";
+import { Logger } from "../../lib/logger";
 import { getScrapeQueue, scrapeQueueEvents } from "../../services/queue-service";
 export async function searchHelper(
  jobId: string,
@ -75,26 +76,28 @@ export async function searchHelper(
  // filter out social media links
-
+  const jobDatas = res.map(x => {
-  const a = new WebScraperDataProvider();
+    const url = x.url;
-  await a.setOptions({
+    const uuid = uuidv4();
-    jobId,
+    return {
      name: uuid,
      data: {
        url,
        mode: "single_urls",
-    urls: res.map((r) => r.url).slice(0, searchOptions.limit ?? 7),
+        crawlerOptions: crawlerOptions,
-    crawlerOptions: {
+        team_id: team_id,
-      ...crawlerOptions,
+        pageOptions: pageOptions,
      },
-    pageOptions: {
+      opts: {
-      ...pageOptions,
+        jobId: uuid,
-      onlyMainContent: pageOptions?.onlyMainContent ?? true,
+        priority: 10,
-      fetchPageContent: pageOptions?.fetchPageContent ?? true,
+      }
-      includeHtml: pageOptions?.includeHtml ?? false,
+    };
-      removeTags: pageOptions?.removeTags ?? [],
+  })
      fallback: false,
    },
  });
-  const docs = await a.getDocuments(false);
+  const jobs = await getScrapeQueue().addBulk(jobDatas);
  const docs = (await Promise.all(jobs.map(x => x.waitUntilFinished(scrapeQueueEvents, 60000)))).map(x => x[0]);
  if (docs.length === 0) {
    return { success: true, error: "No search results found", returnCode: 200 };
@ -109,19 +112,6 @@ export async function searchHelper(
    return { success: true, error: "No page found", returnCode: 200, data: docs };
  }
  const billingResult = await billTeam(
    team_id,
    filteredDocs.length
  );
  if (!billingResult.success) {
    return {
      success: false,
      error:
        "Failed to bill team. Insufficient credits or subscription not found.",
      returnCode: 402,
    };
  }
  return {
    success: true,
    data: filteredDocs,
@ -150,7 +140,7 @@ export async function searchController(req: Request, res: Response) {
    };
    const origin = req.body.origin ?? "api";
-    const searchOptions = req.body.searchOptions ?? { limit: 7 };
+    const searchOptions = req.body.searchOptions ?? { limit: 5 };
    const jobId = uuidv4();
--- a/apps/api/src/controllers/v0/status.ts
+++ b/apps/api/src/controllers/v0/status.ts
@ -0,0 +1,54 @@
 import { Request, Response } from "express";
 import { Logger } from "../../../src/lib/logger";
 import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
 import { getScrapeQueue } from "../../../src/services/queue-service";
 import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
 export async function crawlJobStatusPreviewController(req: Request, res: Response) {
  try {
    const sc = await getCrawl(req.params.jobId);
    if (!sc) {
      return res.status(404).json({ error: "Job not found" });
    }
    const jobIDs = await getCrawlJobs(req.params.jobId);
    // let data = job.returnvalue;
    // if (process.env.USE_DB_AUTHENTICATION === "true") {
    //   const supabaseData = await supabaseGetJobById(req.params.jobId);
    //   if (supabaseData) {
    //     data = supabaseData.docs;
    //   }
    // }
    const jobs = (await Promise.all(jobIDs.map(async x => {
      const job = await getScrapeQueue().getJob(x);
      if (process.env.USE_DB_AUTHENTICATION === "true") {
        const supabaseData = await supabaseGetJobById(job.id);
        if (supabaseData) {
          job.returnvalue = supabaseData.docs;
        }
      }
      return job;
    }))).sort((a, b) => a.timestamp - b.timestamp);
    const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
    const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
    const data = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
    res.json({
      status: jobStatus,
      current: jobStatuses.filter(x => x === "completed" || x === "failed").length,
      total: jobs.length,
      data: jobStatus === "completed" ? data : null,
      partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null),
    });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v1/auth.ts
+++ b/apps/api/src/controllers/v1/auth.ts
@ -26,13 +26,7 @@ export async function supaAuthenticateUser(
  req,
  res,
  mode?: RateLimiterMode
-): Promise<{
+): Promise<AuthResponse> {
  success: boolean;
  team_id?: string;
  error?: string;
  status?: number;
  plan?: string;
 }> {
  const authHeader = req.headers.authorization;
  if (!authHeader) {
    return { success: false, error: "Unauthorized", status: 401 };
@ -106,7 +100,7 @@ export async function supaAuthenticateUser(
    setTrace(team_id, normalizedApi);
    subscriptionData = {
      team_id: team_id,
-      plan: plan
+      plan: plan,
    }
    switch (mode) {
      case RateLimiterMode.Crawl:
@ -121,6 +115,9 @@ export async function supaAuthenticateUser(
      case RateLimiterMode.CrawlStatus:
        rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
        break;
      case RateLimiterMode.Map:
        rateLimiter = getRateLimiter(RateLimiterMode.Map, token);
        break;
      case RateLimiterMode.Preview:
        rateLimiter = getRateLimiter(RateLimiterMode.Preview, token);
@ -157,7 +154,7 @@ export async function supaAuthenticateUser(
  if (
    token === "this_is_just_a_preview_token" &&
-    (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview || mode === RateLimiterMode.Search)
+    (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview || mode === RateLimiterMode.Search || mode === RateLimiterMode.Map)
  ) {
    return { success: true, team_id: "preview" };
    // check the origin of the request and make sure its from firecrawl.dev
@ -195,7 +192,12 @@ export async function supaAuthenticateUser(
    subscriptionData = data[0];
  }
-  return { success: true, team_id: subscriptionData.team_id, plan: subscriptionData.plan ?? ""};
+  return {
    success: true,
    team_id: subscriptionData.team_id,
    plan: subscriptionData.plan ?? "",
    api_key: normalizedApi
  };
 }
 function getPlanByPriceId(price_id: string) {
  switch (price_id) {
--- a/apps/api/src/controllers/v1/crawl-status-ws.ts
+++ b/apps/api/src/controllers/v1/crawl-status-ws.ts
@ -0,0 +1,148 @@
 import { authMiddleware } from "../../routes/v1";
 import { RateLimiterMode } from "../../types";
 import { authenticateUser } from "../v0/auth";
 import { CrawlStatusParams, CrawlStatusResponse, Document, ErrorResponse, legacyDocumentConverter, RequestWithAuth } from "./types";
 import { WebSocket } from "ws";
 import { v4 as uuidv4 } from "uuid";
 import { Logger } from "../../lib/logger";
 import { getCrawl, getCrawlExpiry, getCrawlJobs, getDoneJobsOrdered, getDoneJobsOrderedLength, isCrawlFinished, isCrawlFinishedLocked } from "../../lib/crawl-redis";
 import { getScrapeQueue, scrapeQueueEvents } from "../../services/queue-service";
 import { getJob, getJobs } from "./crawl-status";
 type ErrorMessage = {
  type: "error",
  error: string,
 }
 type CatchupMessage = {
  type: "catchup",
  data: CrawlStatusResponse,
 }
 type DocumentMessage = {
  type: "document",
  data: Document,
 }
 type DoneMessage = { type: "done" }
 type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
 function send(ws: WebSocket, msg: Message) {
  if (ws.readyState === 1) {
    return new Promise((resolve, reject) => {
      ws.send(JSON.stringify(msg), (err) => {
        if (err) reject(err);
        else resolve(null);
      });
    });
  }
 }
 function close(ws: WebSocket, code: number, msg: Message) {
  if (ws.readyState <= 1) {
    ws.close(code, JSON.stringify(msg));
  }
 }
 async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth<CrawlStatusParams, undefined, undefined>) {
  const sc = await getCrawl(req.params.jobId);
  if (!sc) {
    return close(ws, 1008, { type: "error", error: "Job not found" });
  }
  if (sc.team_id !== req.auth.team_id) {
    return close(ws, 3003, { type: "error", error: "Forbidden" });
  }
  let doneJobIDs = [];
  const completedListener = async e => {
    const job = await getScrapeQueue().getJob(e.jobId)
    if (job.data.crawl_id === req.params.jobId) {
      if (doneJobIDs.includes(job.id)) return;
      const j = await getJob(job.id);
      if (j.returnvalue) {
        send(ws, {
          type: "document",
          data: legacyDocumentConverter(j.returnvalue),
        });
        if (await isCrawlFinishedLocked(req.params.jobId)) {
          await new Promise((resolve) => setTimeout(() => resolve(true), 5000)) // wait for last events to pour in
          scrapeQueueEvents.removeListener("completed", completedListener);
          close(ws, 1000, { type: "done" })
        }
      } else {
        // FAILED
      }
    }
  };
  // TODO: handle failed jobs
  scrapeQueueEvents.addListener("completed", completedListener);
  doneJobIDs = await getDoneJobsOrdered(req.params.jobId);
  const jobIDs = await getCrawlJobs(req.params.jobId);
  const jobStatuses = await Promise.all(jobIDs.map(x => getScrapeQueue().getJobState(x)));
  const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "scraping";
  const doneJobs = await getJobs(doneJobIDs);
  const data = doneJobs.map(x => x.returnvalue);
  send(ws, {
    type: "catchup",
    data: {
      status,
      totalCount: jobIDs.length,
      creditsUsed: jobIDs.length,
      expiresAt: (await getCrawlExpiry(req.params.jobId)).toISOString(),
      data: data.map(x => legacyDocumentConverter(x)),
    }
  });
  if (status !== "scraping") {
    scrapeQueueEvents.removeListener("completed", completedListener);
    return close(ws, 1000, { type: "done" });
  }
 }
 // Basically just middleware and error wrapping
 export async function crawlStatusWSController(ws: WebSocket, req: RequestWithAuth<CrawlStatusParams, undefined, undefined>) {
  try {
    const { success, team_id, error, status, plan } = await authenticateUser(
      req,
      null,
      RateLimiterMode.CrawlStatus,
    );
    if (!success) {
      return close(ws, 3000, {
        type: "error",
        error,
      });
    }
    req.auth = { team_id, plan };
    await crawlStatusWS(ws, req);
  } catch (err) {
    const id = uuidv4();
    let verbose = JSON.stringify(err);
    if (verbose === "{}") {
      if (err instanceof Error) {
        verbose = JSON.stringify({
          message: err.message,
          name: err.name,
          stack: err.stack,
        });
      }
    }
    Logger.error("Error occurred in WebSocket! (" + req.path + ") -- ID " + id + " -- " + verbose);
    return close(ws, 1011, {
      type: "error",
      error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id
    });
  }
 }
--- a/apps/api/src/controllers/v1/crawl-status.ts
+++ b/apps/api/src/controllers/v1/crawl-status.ts
@ -1,89 +1,115 @@
-import { Request, Response } from "express";
+import { Response } from "express";
-import { authenticateUser } from "./auth";
+import { CrawlStatusParams, CrawlStatusResponse, ErrorResponse, legacyDocumentConverter, RequestWithAuth } from "./types";
-import { RateLimiterMode } from "../../../src/types";
+import { getCrawl, getCrawlExpiry, getCrawlJobs, getDoneJobsOrdered, getDoneJobsOrderedLength } from "../../lib/crawl-redis";
-import { addWebScraperJob } from "../../../src/services/queue-jobs";
+import { getScrapeQueue } from "../../services/queue-service";
-import { getWebScraperQueue } from "../../../src/services/queue-service";
+import { supabaseGetJobById, supabaseGetJobsById } from "../../lib/supabase-jobs";
 import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
 import { Logger } from "../../../src/lib/logger";
 import { v4 as uuidv4 } from "uuid";
-export async function crawlStatusController(req: Request, res: Response) {
+export async function getJob(id: string) {
-  // TODO: validate req.params.jobId
+  const job = await getScrapeQueue().getJob(id);
  if (!job) return job;
-  try {    
+  if (process.env.USE_DB_AUTHENTICATION === "true") {
-    const { success, team_id, error, status } = await authenticateUser(
+    const supabaseData = await supabaseGetJobById(id);
-      req,
+
-      res,
+    if (supabaseData) {
-      RateLimiterMode.CrawlStatus
+      job.returnvalue = supabaseData.docs;
-    );
+    }
    if (!success) {
      return res.status(status).json({ error });
  }
-    // const job = await getWebScraperQueue().getJob(req.params.jobId);
+  job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue;
    // if (!job) {
    //   return res.status(404).json({ error: "Job not found" });
    // }
-    // const { current, current_url, total, current_step, partialDocs } = await job.progress();
+  return job;
-
+}
-    // let data = job.returnvalue;
+
-    // if (process.env.USE_DB_AUTHENTICATION === "true") {
+export async function getJobs(ids: string[]) {
-    //   const supabaseData = await supabaseGetJobById(req.params.jobId);
+  const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x);
-
+  
-    //   if (supabaseData) {
+  if (process.env.USE_DB_AUTHENTICATION === "true") {
-    //     data = supabaseData.docs;
+    const supabaseData = await supabaseGetJobsById(ids);
-    //   }
+
-    // }
+    supabaseData.forEach(x => {
-
+      const job = jobs.find(y => y.id === x.job_id);
-    // const jobStatus = await job.getState();
+      if (job) {
-
+        job.returnvalue = x.docs;
-    // mock:
+      }
-    const id = uuidv4();
+    })
-    const result = {
+  }
-      totalCount: 100,
+
-      creditsUsed: 2,
+  jobs.forEach(job => {
-      expiresAt: new Date(Date.now() + 24 * 60 * 60 * 1000).getTime(),
+    job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue;
-      status: "scraping", // scraping, completed, failed
+  });
-      next: `${req.protocol}://${req.get("host")}/v1/crawl/${id}`,
+
-      data: [{
+  return jobs;
-        markdown: "test",
+}
-        content: "test",
+
-        html: "test",
+export async function crawlStatusController(req: RequestWithAuth<CrawlStatusParams, undefined, CrawlStatusResponse>, res: Response<CrawlStatusResponse>) {
-        rawHtml: "test",
+  const sc = await getCrawl(req.params.jobId);
-        linksOnPage: ["test1", "test2"],
+  if (!sc) {
-        screenshot: "test",
+    return res.status(404).json({ success: false, error: "Job not found" });
-        metadata: {
+  }
-          title: "test",
+
-          description: "test",
+  if (sc.team_id !== req.auth.team_id) {
-          language: "test",
+    return res.status(403).json({ success: false, error: "Forbidden" });
-          sourceURL: "test",
+  }
-          statusCode: 200,
+
-          error: "test"
+  const start = typeof req.query.skip === "string" ? parseInt(req.query.skip, 10) : 0;
-        }
+  const end = typeof req.query.limit === "string" ? (start + parseInt(req.query.limit, 10) - 1) : undefined;
-      },
+
-      {
+  const jobIDs = await getCrawlJobs(req.params.jobId);
-        markdown: "test",
+  const jobStatuses = await Promise.all(jobIDs.map(x => getScrapeQueue().getJobState(x)));
-        content: "test",
+  const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "scraping";
-        html: "test",
+  const doneJobsLength = await getDoneJobsOrderedLength(req.params.jobId);
-        rawHtml: "test",
+  const doneJobsOrder = await getDoneJobsOrdered(req.params.jobId, start, end ?? -1);
-        linksOnPage: ["test1", "test2"],
+
-        screenshot: "test",
+  let doneJobs = [];
-        metadata: {
+
-          title: "test",
+  if (end === undefined) { // determine 10 megabyte limit
-          description: "test",
+    let bytes = 0;
-          language: "test",
+    const bytesLimit = 10485760; // 10 MiB in bytes
-          sourceURL: "test",
+    const factor = 100; // chunking for faster retrieval
-          statusCode: 200,
+
-          error: "test"
+    for (let i = 0; i < doneJobsOrder.length && bytes < bytesLimit; i += factor) {
-        }
+      // get current chunk and retrieve jobs
-      }]
+      const currentIDs = doneJobsOrder.slice(i, i+factor);
-    }
+      const jobs = await getJobs(currentIDs);
-
+
-    res.status(200).json(result);
+      // iterate through jobs and add them one them one to the byte counter
-  } catch (error) {
+      // both loops will break once we cross the byte counter
-    Logger.error(error);
+      for (let ii = 0; ii < jobs.length && bytes < bytesLimit; ii++) {
-    return res.status(500).json({ error: error.message });
+        const job = jobs[ii];
-  }
+        doneJobs.push(job);
        bytes += JSON.stringify(legacyDocumentConverter(job.returnvalue)).length;
      }
    }
    // if we ran over the bytes limit, remove the last document
    if (bytes > bytesLimit) {
      doneJobs.splice(doneJobs.length - 1, 1);
    }
  } else {
    doneJobs = await getJobs(doneJobsOrder);
  }
  const data = doneJobs.map(x => x.returnvalue);
  const nextURL = new URL(`${req.protocol}://${req.get("host")}/v1/crawl/${req.params.jobId}`);
  nextURL.searchParams.set("skip", (start + data.length).toString());
  if (typeof req.query.limit === "string") {
    nextURL.searchParams.set("limit", req.query.limit);
  }
  res.status(200).json({
    status,
    totalCount: jobIDs.length,
    creditsUsed: jobIDs.length,
    expiresAt: (await getCrawlExpiry(req.params.jobId)).toISOString(),
    next:
      status !== "scraping" && (start + data.length) === doneJobsLength // if there's not gonna be any documents after this
        ? undefined
        : nextURL.href,
    data: data.map(x => legacyDocumentConverter(x)),
  });
 }
--- a/apps/api/src/controllers/v1/crawl.ts
+++ b/apps/api/src/controllers/v1/crawl.ts
@ -1,139 +1,126 @@
-import { Request, Response } from "express";
+import { Response } from "express";
 import { WebScraperDataProvider } from "../../../src/scraper/WebScraper";
 import { billTeam } from "../../../src/services/billing/credit_billing";
 import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../../src/types";
 import { addWebScraperJob } from "../../../src/services/queue-jobs";
 import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
 import { logCrawl } from "../../../src/services/logging/crawl_log";
 import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
 import { createIdempotencyKey } from "../../../src/services/idempotency/create";
 import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
 import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../../../src/lib/logger";
+import {
-import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
+  CrawlRequest,
  crawlRequestSchema,
  CrawlResponse,
  legacyCrawlerOptions,
  legacyScrapeOptions,
  RequestWithAuth,
 } from "./types";
 import {
  addCrawlJob,
  addCrawlJobs,
  crawlToCrawler,
  lockURL,
  lockURLs,
  saveCrawl,
  StoredCrawl,
 } from "../../lib/crawl-redis";
 import { logCrawl } from "../../services/logging/crawl_log";
 import { getScrapeQueue } from "../../services/queue-service";
 import { addScrapeJob } from "../../services/queue-jobs";
 import { Logger } from "../../lib/logger";
-export async function crawlController(req: Request, res: Response) {
+export async function crawlController(
-  // expected req.body
+  req: RequestWithAuth<{}, CrawlResponse, CrawlRequest>,
-
+  res: Response<CrawlResponse>
-  // req.body = {
+) {
-  //   url: string
+  req.body = crawlRequestSchema.parse(req.body);
  //   crawlerOptions: {
  //     includePaths: string[]
  //     excludePaths: string[]
  //     maxDepth: number
  //     limit: number
  //     allowBackwardLinks: boolean >> TODO: CHANGE THIS NAME???
  //     allowExternalLinks: boolean
  //     ignoreSitemap: number
  //   }
  //   scrapeOptions: Exclude<Scrape, "url">
  // }
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Crawl
    );
    if (!success) {
      return res.status(status).json({ error });
    }
    if (req.headers["x-idempotency-key"]) {
      const isIdempotencyValid = await validateIdempotencyKey(req);
      if (!isIdempotencyValid) {
        return res.status(409).json({ error: "Idempotency key already used" });
      }
      try {
        createIdempotencyKey(req);
      } catch (error) {
        Logger.error(error);
        return res.status(500).json({ error: error.message });
      }
    }
    const { success: creditsCheckSuccess, message: creditsCheckMessage } =
      await checkTeamCredits(team_id, 1);
    if (!creditsCheckSuccess) {
      return res.status(402).json({ error: "Insufficient credits" });
    }
    let url = req.body.url;
    if (!url) {
      return res.status(400).json({ error: "Url is required" });
    }
    if (isUrlBlocked(url)) {
      return res
        .status(403)
        .json({
          error:
            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
        });
    }
    try {
      url = checkAndUpdateURL(url);
    } catch (error) {
      return res.status(400).json({ error: 'Invalid Url' });
    }
    // TODO: add job to queue
  const id = uuidv4();
    return res.status(200).json({ jobId: id, url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}` });
-    // const mode = req.body.mode ?? "crawl";
+  await logCrawl(id, req.auth.team_id);
-    // const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
+  const { remainingCredits } = req.account;
    // const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
-    // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
+  // TODO: Get rid of crawlerOptions
-    //   try {
+  const crawlerOptions = legacyCrawlerOptions(req.body);
-    //     const a = new WebScraperDataProvider();
+  const pageOptions = legacyScrapeOptions(req.body.scrapeOptions);
    //     await a.setOptions({
    //       jobId: uuidv4(),
    //       mode: "single_urls",
    //       urls: [url],
    //       crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
    //       pageOptions: pageOptions,
    //     });
-    //     const docs = await a.getDocuments(false, (progress) => {
+  crawlerOptions.limit = Math.min(remainingCredits, crawlerOptions.limit);
    //       job.progress({
    //         current: progress.current,
    //         total: progress.total,
    //         current_step: "SCRAPING",
    //         current_url: progress.currentDocumentUrl,
    //       });
    //     });
    //     return res.json({
    //       success: true,
    //       documents: docs,
    //     });
    //   } catch (error) {
    //     Logger.error(error);
    //     return res.status(500).json({ error: error.message });
    //   }
    // }
-    // const job = await addWebScraperJob({
+  const sc: StoredCrawl = {
-    //   url: url,
+    originUrl: req.body.url,
-    //   mode: mode ?? "crawl", // fix for single urls not working
+    crawlerOptions,
-    //   crawlerOptions: crawlerOptions,
+    pageOptions,
-    //   team_id: team_id,
+    team_id: req.auth.team_id,
-    //   pageOptions: pageOptions,
+    createdAt: Date.now(),
-    //   origin: req.body.origin ?? defaultOrigin,
+  };
    // });
-    // await logCrawl(job.id.toString(), team_id);
+  const crawler = crawlToCrawler(id, sc);
-    // res.json({ jobId: job.id });
+  try {
-  } catch (error) {
+    sc.robots = await crawler.getRobotsTxt();
-    Logger.error(error);
+  } catch (e) {
-    return res.status(500).json({ error: error.message });
+    Logger.debug(
      `[Crawl] Failed to get robots.txt (this is probably fine!): ${JSON.stringify(
        e
      )}`
    );
  }
  await saveCrawl(id, sc);
  const sitemap = sc.crawlerOptions.ignoreSitemap
    ? null
    : await crawler.tryGetSitemap();
  if (sitemap !== null) {
    const jobs = sitemap.map((x) => {
      const url = x.url;
      const uuid = uuidv4();
      return {
        name: uuid,
        data: {
          url,
          mode: "single_urls",
          team_id: req.auth.team_id,
          crawlerOptions,
          pageOptions,
          origin: "api",
          crawl_id: id,
          sitemapped: true,
        },
        opts: {
          jobId: uuid,
          priority: 20,
        },
      };
    });
    await lockURLs(
      id,
      jobs.map((x) => x.data.url)
    );
    await addCrawlJobs(
      id,
      jobs.map((x) => x.opts.jobId)
    );
    await getScrapeQueue().addBulk(jobs);
  } else {
    await lockURL(id, sc, req.body.url);
    const job = await addScrapeJob(
      {
        url: req.body.url,
        mode: "single_urls",
        crawlerOptions: crawlerOptions,
        team_id: req.auth.team_id,
        pageOptions: pageOptions,
        origin: "api",
        crawl_id: id,
        webhook: req.body.webhook,
      },
      {
        priority: 15,
      }
    );
    await addCrawlJob(id, job.id);
  }
  return res.status(200).json({
    success: true,
    id,
    url: `${req.protocol}://${req.get("host")}/v1/crawl/${id}`,
  });
 }
--- a/apps/api/src/controllers/v1/map.ts
+++ b/apps/api/src/controllers/v1/map.ts
@ -1,128 +1,94 @@
-import { Request, Response } from "express";
+import { Response } from "express";
 import { WebScraperDataProvider } from "../../../src/scraper/WebScraper";
 import { billTeam } from "../../../src/services/billing/credit_billing";
 import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../../src/types";
 import { addWebScraperJob } from "../../../src/services/queue-jobs";
 import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
 import { logCrawl } from "../../../src/services/logging/crawl_log";
 import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
 import { createIdempotencyKey } from "../../../src/services/idempotency/create";
 import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
 import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../../../src/lib/logger";
+import {
-import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
+  legacyCrawlerOptions,
  mapRequestSchema,
  RequestWithAuth,
 } from "./types";
 import { crawlToCrawler, StoredCrawl } from "../../lib/crawl-redis";
 import { MapResponse, MapRequest } from "./types";
 import { configDotenv } from "dotenv";
 import {
  checkAndUpdateURLForMap,
  isSameDomain,
  isSameSubdomain,
 } from "../../lib/validateUrl";
 import { fireEngineMap } from "../../search/fireEngine";
 import { billTeam } from "../../services/billing/credit_billing";
-export async function mapController(req: Request, res: Response) {
+configDotenv();
  // expected req.body
-  // req.body = {
+export async function mapController(
-  //   url: string
+  req: RequestWithAuth<{}, MapResponse, MapRequest>,
-  //   ignoreSitemap: true??
+  res: Response<MapResponse>
-  //   other crawler options?
+) {
-  // }
+  req.body = mapRequestSchema.parse(req.body);
  const id = uuidv4();
  let links: string[] = [req.body.url];
-  try {
+  const sc: StoredCrawl = {
-    const { success, team_id, error, status } = await authenticateUser(
+    originUrl: req.body.url,
-      req,
+    crawlerOptions: legacyCrawlerOptions(req.body),
-      res,
+    pageOptions: {},
-      RateLimiterMode.Crawl
+    team_id: req.auth.team_id,
-    );
+    createdAt: Date.now(),
-    if (!success) {
+  };
      return res.status(status).json({ error });
    }
-    // if (req.headers["x-idempotency-key"]) {
+  const crawler = crawlToCrawler(id, sc);
    //   const isIdempotencyValid = await validateIdempotencyKey(req);
    //   if (!isIdempotencyValid) {
    //     return res.status(409).json({ error: "Idempotency key already used" });
    //   }
    //   try {
    //     createIdempotencyKey(req);
    //   } catch (error) {
    //     Logger.error(error);
    //     return res.status(500).json({ error: error.message });
    //   }
    // }
-    // const { success: creditsCheckSuccess, message: creditsCheckMessage } =
+  const sitemap =
-    //   await checkTeamCredits(team_id, 1);
+    req.body.ignoreSitemap
-    // if (!creditsCheckSuccess) {
+      ? null
-    //   return res.status(402).json({ error: "Insufficient credits" });
+      : await crawler.tryGetSitemap();
    // }
-    let url = req.body.url;
+  if (sitemap !== null) {
-    if (!url) {
+    sitemap.map((x) => {
-      return res.status(400).json({ error: "Url is required" });
+      links.push(x.url);
    }
    if (isUrlBlocked(url)) {
      return res
        .status(403)
        .json({
          error:
            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
    });
  }
-    try {
+  let urlWithoutWww = req.body.url.replace("www.", "");
-      url = checkAndUpdateURL(url);
+  
-    } catch (error) {
+  let mapUrl = req.body.search
-      return res.status(400).json({ error: 'Invalid Url' });
+    ? `"${req.body.search}" site:${urlWithoutWww}`
    : `site:${req.body.url}`;
  // www. seems to exclude subdomains in some cases
  const mapResults = await fireEngineMap(mapUrl, {
    numResults: 50,
  });
  if (mapResults.length > 0) {
    if (req.body.search) {
      // Ensure all map results are first, maintaining their order
      links = [mapResults[0].url, ...mapResults.slice(1).map(x => x.url), ...links];
    } else {
      mapResults.map((x) => {
        links.push(x.url);
      });
    }
  }
-    return res.status(200).json({ urls: [ "test1", "test2" ] });
+  links = links.map((x) => checkAndUpdateURLForMap(x).url.trim());
    // const mode = req.body.mode ?? "crawl";
    // const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
    // const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
-    // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
+  // allows for subdomains to be included
-    //   try {
+  links = links.filter((x) => isSameDomain(x, req.body.url));
    //     const a = new WebScraperDataProvider();
    //     await a.setOptions({
    //       jobId: uuidv4(),
    //       mode: "single_urls",
    //       urls: [url],
    //       crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
    //       pageOptions: pageOptions,
    //     });
-    //     const docs = await a.getDocuments(false, (progress) => {
+  // if includeSubdomains is false, filter out subdomains
-    //       job.progress({
+  if (!req.body.includeSubdomains) {
-    //         current: progress.current,
+    links = links.filter((x) => isSameSubdomain(x, req.body.url));
    //         total: progress.total,
    //         current_step: "SCRAPING",
    //         current_url: progress.currentDocumentUrl,
    //       });
    //     });
    //     return res.json({
    //       success: true,
    //       documents: docs,
    //     });
    //   } catch (error) {
    //     Logger.error(error);
    //     return res.status(500).json({ error: error.message });
    //   }
    // }
    // const job = await addWebScraperJob({
    //   url: url,
    //   mode: mode ?? "crawl", // fix for single urls not working
    //   crawlerOptions: crawlerOptions,
    //   team_id: team_id,
    //   pageOptions: pageOptions,
    //   origin: req.body.origin ?? defaultOrigin,
    // });
    // await logCrawl(job.id.toString(), team_id);
    // res.json({ jobId: job.id });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
  // remove duplicates that could be due to http/https or www
  links = [...new Set(links)];
  await billTeam(req.auth.team_id, 1);
  return res.status(200).json({
    success: true,
    links,
  });
 }
--- a/apps/api/src/controllers/v1/scrape.ts
+++ b/apps/api/src/controllers/v1/scrape.ts
@ -1,253 +1,105 @@
 // import { ExtractorOptions, PageOptions } from './../../lib/entities';
 import { Request, Response } from "express";
 // import { WebScraperDataProvider } from "../../scraper/WebScraper";
 // import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../types";
 // import { logJob } from "../../services/logging/log_job";
 // import { Document } from "../../lib/entities";
 import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
 // import { numTokensFromString } from '../../lib/LLM-extraction/helpers';
 // import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../../src/lib/default-values';
 // import { v4 as uuidv4 } from "uuid";
 import { Logger } from '../../lib/logger';
-import { checkAndUpdateURL } from '../../lib/validateUrl';
+import { Document, legacyDocumentConverter, legacyScrapeOptions, RequestWithAuth, ScrapeRequest, scrapeRequestSchema, ScrapeResponse } from "./types";
 import { billTeam } from "../../services/billing/credit_billing";
 import { v4 as uuidv4 } from 'uuid';
 import { numTokensFromString } from "../../lib/LLM-extraction/helpers";
 import { addScrapeJob } from "../../services/queue-jobs";
 import { scrapeQueueEvents } from '../../services/queue-service';
 import { logJob } from "../../services/logging/log_job";
-export async function scrapeController(req: Request, res: Response) {
+export async function scrapeController(req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>, res: Response<ScrapeResponse>) {
-  let url = req.body.url;
+  req.body = scrapeRequestSchema.parse(req.body);
  if (!url) {
    return { success: false, error: "Url is required", returnCode: 400 };
  }
  if (isUrlBlocked(url)) {
    return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 };
  }
  try {
    url = checkAndUpdateURL(url);
  } catch (error) {
    return { success: false, error: "Invalid URL", returnCode: 400 };
  }
  // TODO: check req.body
  // mockup req.body
  // req.body = {
  //   url: "test",
  //   headers: {
  //     "x-key": "test"
  //   },
  //   formats: ["markdown", "html", "rawHtml", "content", "linksOnPage", "screenshot", "fullPageScreenshot"],
  //   includeTags: ["test"],
  //   excludeTags: ["test"],
  //   onlyMainContent: false,
  //   timeout: 30000,
  //   waitFor: number
  // }
  try {
  let earlyReturn = false;
-    // make sure to authenticate user first, Bearer <token>
+
-    const { success, team_id, error, status, plan } = await authenticateUser(
+  const origin = req.body.origin;
-      req,
+  const timeout = req.body.timeout;
-      res,
+  const pageOptions = legacyScrapeOptions(req.body);
-      RateLimiterMode.Scrape
+  const jobId = uuidv4();
-    );
+
-    if (!success) {
+  const startTime = new Date().getTime();
-      return res.status(status).json({ error });
+  const job = await addScrapeJob({
    url: req.body.url,
    mode: "single_urls",
    crawlerOptions: {},
    team_id: req.auth.team_id,
    pageOptions,
    extractorOptions: {},
    origin: req.body.origin,
  }, {}, jobId);
  let doc: any | undefined;
  try {
    doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0]; // 60 seconds timeout
  } catch (e) {
    Logger.error(`Error in scrapeController: ${e}`);
    if (e instanceof Error && e.message.startsWith("Job wait")) {
      return res.status(408).json({
        success: false,
        error: "Request timed out",
      });
    } else {
      return res.status(500).json({
        success: false,
        error: "Internal server error",
      });
    }
  }
-    // check credits
+  await job.remove();
-    const result = {
+  if (!doc) {
    console.error("!!! PANIC DOC IS", doc, job);
    return res.status(200).json({
      success: true,
-      warning: "test",
+      warning: "No page found",
-      data: {
+      data: doc
-        markdown: "test",
+    });
        content: "test",
        html: "test",
        rawHtml: "test",
        linksOnPage: ["test1", "test2"],
        screenshot: "test",
        metadata: {
          title: "test",
          description: "test",
          language: "test",
          sourceURL: "test",
          statusCode: 200,
          error: "test"
        }
      }
  }
-    return res.status(200).json(result);
+  delete doc.index;
  delete doc.provider;
-    // const crawlerOptions = req.body.crawlerOptions ?? {};
+  const endTime = new Date().getTime();
-    // const pageOptions = { ...defaultPageOptions, ...req.body.pageOptions };
+  const timeTakenInSeconds = (endTime - startTime) / 1000;
-    // const extractorOptions = { ...defaultExtractorOptions, ...req.body.extractorOptions };
+  const numTokens = (doc && doc.markdown) ? numTokensFromString(doc.markdown, "gpt-3.5-turbo") : 0;
    // const origin = req.body.origin ?? defaultOrigin;
    // let timeout = req.body.timeout ?? defaultTimeout;
-    // if (extractorOptions.mode.includes("llm-extraction")) {
+  let creditsToBeBilled = 1; // Assuming 1 credit per document
-    //   pageOptions.onlyMainContent = true;
+  if (earlyReturn) {
-    //   timeout = req.body.timeout ?? 90000;
+    // Don't bill if we're early returning
-    // }
+    return;
    // const checkCredits = async () => {
    //   try {
    //     const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1);
    //     if (!creditsCheckSuccess) {
    //       earlyReturn = true;
    //       return res.status(402).json({ error: "Insufficient credits" });
    //     }
    //   } catch (error) {
    //     Logger.error(error);
    //     earlyReturn = true;
    //     return res.status(500).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." });
    //   }
    // };
    // await checkCredits();
    // const jobId = uuidv4();
    // const startTime = new Date().getTime();
    // const result = await scrapeHelper(
    //   jobId,
    //   req,
    //   team_id,
    //   crawlerOptions,
    //   pageOptions,
    //   extractorOptions,
    //   timeout,
    //   plan
    // );
    // const endTime = new Date().getTime();
    // const timeTakenInSeconds = (endTime - startTime) / 1000;
    // const numTokens = (result.data && result.data.markdown) ? numTokensFromString(result.data.markdown, "gpt-3.5-turbo") : 0;
    // if (result.success) {
    //   let creditsToBeBilled = 1; // Assuming 1 credit per document
    //   const creditsPerLLMExtract = 50;
    //   if (extractorOptions.mode.includes("llm-extraction")) {
    //     // creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length);
    //     creditsToBeBilled += creditsPerLLMExtract;
    //   }
    //   let startTimeBilling = new Date().getTime();
    //   if (earlyReturn) {
    //     // Don't bill if we're early returning
    //     return;
    //   }
    //   const billingResult = await billTeam(
    //     team_id,
    //     creditsToBeBilled
    //   );
    //   if (!billingResult.success) {
    //     return res.status(402).json({
    //       success: false,
    //       error: "Failed to bill team. Insufficient credits or subscription not found.",
    //     });
    //   }
    // }
    // logJob({
    //   job_id: jobId,
    //   success: result.success,
    //   message: result.error,
    //   num_docs: 1,
    //   docs: [result.data],
    //   time_taken: timeTakenInSeconds,
    //   team_id: team_id,
    //   mode: "scrape",
    //   url: req.body.url,
    //   crawlerOptions: crawlerOptions,
    //   pageOptions: pageOptions,
    //   origin: origin, 
    //   extractor_options: extractorOptions,
    //   num_tokens: numTokens,
    // });
    // return res.status(result.returnCode).json(result);
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
  const billingResult = await billTeam(
    req.auth.team_id,
    creditsToBeBilled
  );
  if (!billingResult.success) {
    return res.status(402).json({
      success: false,
      error: "Failed to bill team. Insufficient credits or subscription not found.",
    });
  }
  logJob({
    job_id: jobId,
    success: true,
    message: "Scrape completed",
    num_docs: 1,
    docs: [doc],
    time_taken: timeTakenInSeconds,
    team_id: req.auth.team_id,
    mode: "scrape",
    url: req.body.url,
    crawlerOptions: {},
    pageOptions: pageOptions,
    origin: origin, 
    extractor_options: { mode: "markdown" },
    num_tokens: numTokens,
  });
  return res.status(200).json({
    success: true,
    data: legacyDocumentConverter(doc),
  });
 }
 // export async function scrapeHelper(
 //   jobId: string,
 //   req: Request,
 //   team_id: string,
 //   crawlerOptions: any,
 //   pageOptions: PageOptions,
 //   extractorOptions: ExtractorOptions,
 //   timeout: number,
 //   plan?: string
 // ): Promise<{
 //   success: boolean;
 //   error?: string;
 //   data?: Document;
 //   returnCode: number;
 // }> {
  // const url = req.body.url;
  // if (!url) {
  //   return { success: false, error: "Url is required", returnCode: 400 };
  // }
  // if (isUrlBlocked(url)) {
  //   return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 };
  // }
  // const a = new WebScraperDataProvider();
  // await a.setOptions({
  //   jobId,
  //   mode: "single_urls",
  //   urls: [url],
  //   crawlerOptions: {
  //     ...crawlerOptions,
  //   },
  //   pageOptions: pageOptions,
  //   extractorOptions: extractorOptions,
  // });
  // const timeoutPromise = new Promise<{ success: boolean; error?: string; returnCode: number }>((_, reject) =>
  //   setTimeout(() => reject({ success: false, error: "Request timed out. Increase the timeout by passing `timeout` param to the request.", returnCode: 408 }), timeout)
  // );
  // const docsPromise = a.getDocuments(false);
  // let docs;
  // try {
  //   docs = await Promise.race([docsPromise, timeoutPromise]);
  // } catch (error) {
  //   return error;
  // }
  // // make sure doc.content is not empty
  // let filteredDocs = docs.filter(
  //   (doc: { content?: string }) => doc.content && doc.content.trim().length > 0
  // );
  // if (filteredDocs.length === 0) {
  //   return { success: true, error: "No page found", returnCode: 200, data: docs[0] };
  // }
  // // Remove rawHtml if pageOptions.rawHtml is false and extractorOptions.mode is llm-extraction-from-raw-html
  // if (!pageOptions.includeRawHtml && extractorOptions.mode == "llm-extraction-from-raw-html") {
  //   filteredDocs.forEach(doc => {
  //     delete doc.rawHtml;
  //   });
  // }
  // return {
  //   success: true,
  //   data: filteredDocs[0],
  //   returnCode: 200,
  // };
 // }
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@ -0,0 +1,306 @@
 import { Request } from "express";
 import { z } from "zod";
 import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
 import { PageOptions } from "../../lib/entities";
 export type Format =
  | "markdown"
  | "html"
  | "rawHtml"
  | "links"
  | "screenshot"
  | "screenshot@fullPage";
 const url = z.preprocess(
  (x) => {
    if (typeof x === "string" && !/^([^.:]+:\/\/)/.test(x)) {
      if (x.startsWith("://")) {
        return "http" + x;
      } else {
        return "http://" + x;
      }
    } else {
      return x;
    }
  },
  z
    .string()
    .url()
    .regex(/^https?:\/\//, "URL uses unsupported protocol")
    .refine(
      (x) => !isUrlBlocked(x),
      "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
    )
 );
 const strictMessage = "Unrecognized key in body -- please review the v1 API documentation for request body changes";
 export const scrapeOptions = z.object({
  formats: z
    .enum([
      "markdown",
      "html",
      "rawHtml",
      "links",
      "screenshot",
      "screenshot@fullPage",
    ])
    .array()
    .optional()
    .default(["markdown"]),
  headers: z.record(z.string(), z.string()).optional(),
  includeTags: z.string().array().optional(),
  excludeTags: z.string().array().optional(),
  onlyMainContent: z.boolean().default(true),
  timeout: z.number().int().positive().finite().safe().default(30000), // default?
  waitFor: z.number().int().nonnegative().finite().safe().default(0),
  parsePDF: z.boolean().default(true),
 }).strict(strictMessage);
 export type ScrapeOptions = z.infer<typeof scrapeOptions>;
 export const scrapeRequestSchema = scrapeOptions.extend({
  url,
  origin: z.string().optional().default("api"),
 }).strict(strictMessage);
 // export type ScrapeRequest = {
 //   url: string;
 //   formats?: Format[];
 //   headers?: { [K: string]: string };
 //   includeTags?: string[];
 //   excludeTags?: string[];
 //   onlyMainContent?: boolean;
 //   timeout?: number;
 //   waitFor?: number;
 // }
 export type ScrapeRequest = z.infer<typeof scrapeRequestSchema>;
 const crawlerOptions = z.object({
  includePaths: z.string().array().default([]),
  excludePaths: z.string().array().default([]),
  maxDepth: z.number().default(10), // default?
  limit: z.number().default(10000), // default?
  allowBackwardLinks: z.boolean().default(false), // >> TODO: CHANGE THIS NAME???
  allowExternalLinks: z.boolean().default(false),
  ignoreSitemap: z.boolean().default(true),
 }).strict(strictMessage);
 // export type CrawlerOptions = {
 //   includePaths?: string[];
 //   excludePaths?: string[];
 //   maxDepth?: number;
 //   limit?: number;
 //   allowBackwardLinks?: boolean; // >> TODO: CHANGE THIS NAME???
 //   allowExternalLinks?: boolean;
 //   ignoreSitemap?: boolean;
 // };
 export type CrawlerOptions = z.infer<typeof crawlerOptions>;
 export const crawlRequestSchema = crawlerOptions.extend({
  url,
  origin: z.string().optional().default("api"),
  scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
  webhook: z.string().url().optional(),
  limit: z.number().default(10000),
 }).strict(strictMessage);
 // export type CrawlRequest = {
 //   url: string;
 //   crawlerOptions?: CrawlerOptions;
 //   scrapeOptions?: Exclude<ScrapeRequest, "url">;
 // };
 export type CrawlRequest = z.infer<typeof crawlRequestSchema>;
 export const mapRequestSchema = crawlerOptions.extend({
  url: z.string().url(),
  origin: z.string().optional().default("api"),
  includeSubdomains: z.boolean().default(true),
  search: z.string().optional(),
  ignoreSitemap: z.boolean().default(false),
 }).strict(strictMessage);
 // export type MapRequest = {
 //   url: string;
 //   crawlerOptions?: CrawlerOptions;
 // };
 export type MapRequest = z.infer<typeof mapRequestSchema>;
 export type Document = {
  markdown?: string;
  html?: string;
  rawHtml?: string;
  links?: string[];
  screenshot?: string;
  metadata: {
    title?: string;
    description?: string;
    language?: string;
    keywords?: string;
    robots?: string;
    ogTitle?: string;
    ogDescription?: string;
    ogUrl?: string;
    ogImage?: string;
    ogAudio?: string;
    ogDeterminer?: string;
    ogLocale?: string;
    ogLocaleAlternate?: string[];
    ogSiteName?: string;
    ogVideo?: string;
    dcTermsCreated?: string;
    dcDateCreated?: string;
    dcDate?: string;
    dcTermsType?: string;
    dcType?: string;
    dcTermsAudience?: string;
    dcTermsSubject?: string;
    dcSubject?: string;
    dcDescription?: string;
    dcTermsKeywords?: string;
    modifiedTime?: string;
    publishedTime?: string;
    articleTag?: string;
    articleSection?: string;
    sourceURL?: string;
    statusCode?: number;
    error?: string;
  };
 };
 export type ErrorResponse = {
  success: false;
  error: string;
  details?: any;
 };
 export type ScrapeResponse =
  | ErrorResponse
  | {
      success: true;
      warning?: string;
      data: Document;
    };
 export interface ScrapeResponseRequestTest {
  statusCode: number;
  body: ScrapeResponse;
  error?: string;
 }
 export type CrawlResponse =
  | ErrorResponse
  | {
      success: true;
      id: string;
      url: string;
    };
 export type MapResponse =
  | ErrorResponse
  | {
      success: true;
      links: string[];
    };
 export type CrawlStatusParams = {
  jobId: string;
 };
 export type CrawlStatusResponse =
  | ErrorResponse
  | {
      status: "scraping" | "completed" | "failed" | "cancelled";
      totalCount: number;
      creditsUsed: number;
      expiresAt: string;
      next?: string;
      data: Document[];
    };
 type AuthObject = {
  team_id: string;
  plan: string;
 };
 type Account = {
  remainingCredits: number;
 };
 export interface RequestWithMaybeAuth<
  ReqParams = {},
  ReqBody = undefined,
  ResBody = undefined
 > extends Request<ReqParams, ReqBody, ResBody> {
  auth?: AuthObject;
  account?: Account;
 }
 export interface RequestWithAuth<
  ReqParams = {},
  ReqBody = undefined,
  ResBody = undefined,
 > extends Request<ReqParams, ReqBody, ResBody> {
  auth: AuthObject;
  account?: Account;
 }
 export function legacyCrawlerOptions(x: CrawlerOptions) {
  return {
    includes: x.includePaths,
    excludes: x.excludePaths,
    maxCrawledLinks: x.limit,
    maxCrawledDepth: x.maxDepth,
    limit: x.limit,
    generateImgAltText: false,
    allowBackwardCrawling: x.allowBackwardLinks,
    allowExternalContentLinks: x.allowExternalLinks,
  };
 }
 export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
  return {
    includeMarkdown: x.formats.includes("markdown"),
    includeHtml: x.formats.includes("html"),
    includeRawHtml: x.formats.includes("rawHtml"),
    onlyIncludeTags: x.includeTags,
    removeTags: x.excludeTags,
    onlyMainContent: x.onlyMainContent,
    waitFor: x.waitFor,
    includeLinks: x.formats.includes("links"),
    screenshot: x.formats.includes("screenshot"),
    fullPageScreenshot: x.formats.includes("screenshot@fullPage"),
    parsePDF: x.parsePDF,
  };
 }
 export function legacyDocumentConverter(doc: any): Document {
  if (doc.metadata.screenshot) {
    doc.screenshot = doc.metadata.screenshot;
    delete doc.metadata.screenshot;
  }
  if (doc.metadata.fullPageScreenshot) {
    doc.fullPageScreenshot = doc.metadata.fullPageScreenshot;
    delete doc.metadata.fullPageScreenshot;
  }
  return {
    markdown: doc.markdown,
    links: doc.linksOnPage,
    rawHtml: doc.rawHtml,
    html: doc.html,
    screenshot: doc.screenshot ?? doc.fullPageScreenshot,
    metadata: {
      ...doc.metadata,
      pageError: undefined,
      pageStatusCode: undefined,
      error: doc.metadata.pageError,
      statusCode: doc.metadata.pageStatusCode,
    },
  };
 }
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@ -2,7 +2,7 @@ import express from "express";
 import bodyParser from "body-parser";
 import cors from "cors";
 import "dotenv/config";
-import { getScrapeQueue, getWebScraperQueue } from "./services/queue-service";
+import { getScrapeQueue } from "./services/queue-service";
 import { v0Router } from "./routes/v0";
 import { initSDK } from "@hyperdx/node-opentelemetry";
 import cluster from "cluster";
@ -14,6 +14,8 @@ import http from 'node:http';
 import https from 'node:https';
 import CacheableLookup  from 'cacheable-lookup';
 import { v1Router } from "./routes/v1";
 import expressWs from "express-ws";
 import { crawlStatusWSController } from "./controllers/v1/crawl-status-ws";
 const { createBullBoard } = require("@bull-board/api");
 const { BullAdapter } = require("@bull-board/api/bullAdapter");
@ -46,7 +48,8 @@ if (cluster.isMaster) {
    }
  });
 } else {
-  const app = express();
+  const ws = expressWs(express());
  const app = ws.app;
  global.isProduction = process.env.IS_PRODUCTION === "true";
@ -59,7 +62,7 @@ if (cluster.isMaster) {
  serverAdapter.setBasePath(`/admin/${process.env.BULL_AUTH_KEY}/queues`);
  const { addQueue, removeQueue, setQueues, replaceQueues } = createBullBoard({
-    queues: [new BullAdapter(getWebScraperQueue()), new BullAdapter(getScrapeQueue())],
+    queues: [new BullAdapter(getScrapeQueue())],
    serverAdapter: serverAdapter,
  });
@ -79,7 +82,7 @@ if (cluster.isMaster) {
  // register router
  app.use(v0Router);
-  app.use(v1Router);
+  app.use("/v1", v1Router);
  app.use(adminRouter);
  const DEFAULT_PORT = process.env.PORT ?? 3002;
@ -106,9 +109,9 @@ if (cluster.isMaster) {
  app.get(`/serverHealthCheck`, async (req, res) => {
    try {
-      const webScraperQueue = getWebScraperQueue();
+      const scrapeQueue = getScrapeQueue();
      const [waitingJobs] = await Promise.all([
-        webScraperQueue.getWaitingCount(),
+        scrapeQueue.getWaitingCount(),
      ]);
      const noWaitingJobs = waitingJobs === 0;
@ -128,9 +131,9 @@ if (cluster.isMaster) {
      const timeout = 60000; // 1 minute // The timeout value for the check in milliseconds
      const getWaitingJobsCount = async () => {
-        const webScraperQueue = getWebScraperQueue();
+        const scrapeQueue = getScrapeQueue();
        const [waitingJobsCount] = await Promise.all([
-          webScraperQueue.getWaitingCount(),
+          scrapeQueue.getWaitingCount(),
        ]);
        return waitingJobsCount;
@ -183,11 +186,12 @@ if (cluster.isMaster) {
  Logger.info(`Worker ${process.pid} started`);
 }
-// const wsq = getWebScraperQueue();
+// const sq = getScrapeQueue();
 // sq.on("waiting", j => ScrapeEvents.logJobEvent(j, "waiting"));
 // sq.on("active", j => ScrapeEvents.logJobEvent(j, "active"));
 // sq.on("completed", j => ScrapeEvents.logJobEvent(j, "completed"));
 // sq.on("paused", j => ScrapeEvents.logJobEvent(j, "paused"));
 // sq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed"));
 // sq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed"));
 // wsq.on("waiting", j => ScrapeEvents.logJobEvent(j, "waiting"));
 // wsq.on("active", j => ScrapeEvents.logJobEvent(j, "active"));
 // wsq.on("completed", j => ScrapeEvents.logJobEvent(j, "completed"));
 // wsq.on("paused", j => ScrapeEvents.logJobEvent(j, "paused"));
 // wsq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed"));
 // wsq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed"));
--- a/apps/api/src/lib/checkCredits.ts
+++ b/apps/api/src/lib/checkCredits.ts
@ -0,0 +1,32 @@
 import { checkTeamCredits } from "../services/billing/credit_billing";
 import { Logger } from "./logger";
 type checkCreditsResponse = {
  status: number;
  error: string | null;
 }
 export const checkCredits = async (team_id: string): Promise<checkCreditsResponse> => {
  try {
    const {
      success: creditsCheckSuccess,
      message: creditsCheckMessage
    } = await checkTeamCredits(team_id, 1);
    if (!creditsCheckSuccess) {
      return {
        status: 402,
        error: "Insufficient credits"
      };
    }
  } catch (error) {
    Logger.error(error);
    return {
      status: 500,
      error: "Error checking team credits. Please contact hello@firecrawl.com for help."
    };
  }
  return {
    status: 200,
    error: null
  }
 };
--- a/apps/api/src/lib/crawl-redis.ts
+++ b/apps/api/src/lib/crawl-redis.ts
@ -0,0 +1,123 @@
 import { WebCrawler } from "../scraper/WebScraper/crawler";
 import { redisConnection } from "../services/queue-service";
 export type StoredCrawl = {
    originUrl: string;
    crawlerOptions: any;
    pageOptions: any;
    team_id: string;
    robots?: string;
    cancelled?: boolean;
    createdAt: number;
 };
 export async function saveCrawl(id: string, crawl: StoredCrawl) {
    await redisConnection.set("crawl:" + id, JSON.stringify(crawl));
    await redisConnection.expire("crawl:" + id, 24 * 60 * 60, "NX");
 }
 export async function getCrawl(id: string): Promise<StoredCrawl | null> {
    const x = await redisConnection.get("crawl:" + id);
    if (x === null) {
        return null;
    }
    return JSON.parse(x);
 }
 export async function getCrawlExpiry(id: string): Promise<Date> {
    const d = new Date();
    const ttl = await redisConnection.pttl("crawl:" + id);
    d.setMilliseconds(d.getMilliseconds() + ttl);
    d.setMilliseconds(0);
    return d;
 }
 export async function addCrawlJob(id: string, job_id: string) {
    await redisConnection.sadd("crawl:" + id + ":jobs", job_id);
    await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60, "NX");
 }
 export async function addCrawlJobs(id: string, job_ids: string[]) {
    await redisConnection.sadd("crawl:" + id + ":jobs", ...job_ids);
    await redisConnection.expire("crawl:" + id + ":jobs", 24 * 60 * 60, "NX");
 }
 export async function addCrawlJobDone(id: string, job_id: string) {
    await redisConnection.sadd("crawl:" + id + ":jobs_done", job_id);
    await redisConnection.lpush("crawl:" + id + ":jobs_done_ordered", job_id);
    await redisConnection.expire("crawl:" + id + ":jobs_done", 24 * 60 * 60, "NX");
    await redisConnection.expire("crawl:" + id + ":jobs_done_ordered", 24 * 60 * 60, "NX");
 }
 export async function getDoneJobsOrderedLength(id: string): Promise<number> {
    return await redisConnection.llen("crawl:" + id + ":jobs_done_ordered");
 }
 export async function getDoneJobsOrdered(id: string, start = 0, end = -1): Promise<string[]> {
    return await redisConnection.lrange("crawl:" + id + ":jobs_done_ordered", start, end);
 }
 export async function isCrawlFinished(id: string) {
    return (await redisConnection.scard("crawl:" + id + ":jobs_done")) === (await redisConnection.scard("crawl:" + id + ":jobs"));
 }
 export async function isCrawlFinishedLocked(id: string) {
    return (await redisConnection.exists("crawl:" + id + ":finish"));
 }
 export async function finishCrawl(id: string) {
    if (await isCrawlFinished(id)) {
        const set = await redisConnection.setnx("crawl:" + id + ":finish", "yes");
        if (set === 1) {
            await redisConnection.expire("crawl:" + id + ":finish", 24 * 60 * 60);
        }
        return set === 1
    }
 }
 export async function getCrawlJobs(id: string): Promise<string[]> {
    return await redisConnection.smembers("crawl:" + id + ":jobs");
 }
 export async function lockURL(id: string, sc: StoredCrawl, url: string): Promise<boolean> {
    if (typeof sc.crawlerOptions?.limit === "number") {
        if (await redisConnection.scard("crawl:" + id + ":visited") >= sc.crawlerOptions.limit) {
            return false;
        }
    }
    const res = (await redisConnection.sadd("crawl:" + id + ":visited", url)) !== 0
    await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX");
    return res;
 }
 /// NOTE: does not check limit. only use if limit is checked beforehand e.g. with sitemap
 export async function lockURLs(id: string, urls: string[]): Promise<boolean> {
    const res = (await redisConnection.sadd("crawl:" + id + ":visited", ...urls)) !== 0
    await redisConnection.expire("crawl:" + id + ":visited", 24 * 60 * 60, "NX");
    return res;
 }
 export function crawlToCrawler(id: string, sc: StoredCrawl): WebCrawler {
    const crawler = new WebCrawler({
        jobId: id,
        initialUrl: sc.originUrl,
        includes: sc.crawlerOptions?.includes ?? [],
        excludes: sc.crawlerOptions?.excludes ?? [],
        maxCrawledLinks: sc.crawlerOptions?.maxCrawledLinks ?? 1000,
        maxCrawledDepth: sc.crawlerOptions?.maxDepth ?? 10,
        limit: sc.crawlerOptions?.limit ?? 10000,
        generateImgAltText: sc.crawlerOptions?.generateImgAltText ?? false,
        allowBackwardCrawling: sc.crawlerOptions?.allowBackwardCrawling ?? false,
        allowExternalContentLinks: sc.crawlerOptions?.allowExternalContentLinks ?? false,
    });
    if (sc.robots !== undefined) {
        try {
            crawler.importRobotsTxt(sc.robots);
        } catch (_) {}
    }
    return crawler;
 }
--- a/apps/api/src/lib/default-values.ts
+++ b/apps/api/src/lib/default-values.ts
@ -7,6 +7,7 @@ export const defaultPageOptions = {
  includeHtml: false,
  waitFor: 0,
  screenshot: false,
  fullPageScreenshot: false,
  parsePDF: true
 };
--- a/apps/api/src/lib/entities.ts
+++ b/apps/api/src/lib/entities.ts
@ -11,6 +11,7 @@ export interface Progress {
 }
 export type PageOptions = {
  includeMarkdown?: boolean;
  onlyMainContent?: boolean;
  includeHtml?: boolean;
  includeRawHtml?: boolean;
@ -18,11 +19,13 @@ export type PageOptions = {
  fetchPageContent?: boolean;
  waitFor?: number;
  screenshot?: boolean;
  fullPageScreenshot?: boolean;
  headers?: Record<string, string>;
  replaceAllPathsWithAbsolutePaths?: boolean;
  parsePDF?: boolean;
  removeTags?: string | string[];
  onlyIncludeTags?: string | string[];
  includeLinks?: boolean;
 };
 export type ExtractorOptions = {
@ -42,8 +45,8 @@ export type SearchOptions = {
 export type CrawlerOptions = {
  returnOnlyUrls?: boolean;
-  includes?: string[];
+  includes?: string | string[];
-  excludes?: string[];
+  excludes?: string | string[];
  maxCrawledLinks?: number;
  maxDepth?: number;
  limit?: number;
@ -64,6 +67,7 @@ export type WebScraperOptions = {
  extractorOptions?: ExtractorOptions;
  concurrentRequests?: number;
  bullJobId?: string;
  priority?: number;
 };
 export interface DocumentUrl {
--- a/apps/api/src/lib/scrape-events.ts
+++ b/apps/api/src/lib/scrape-events.ts
@ -46,7 +46,7 @@ export class ScrapeEvents {
        }).select().single();
        return (result.data as any).id;
      } catch (error) {
-        Logger.error(`Error inserting scrape event: ${error}`);
+        // Logger.error(`Error inserting scrape event: ${error}`);
        return null;
      }
    }
--- a/apps/api/src/lib/supabase-jobs.ts
+++ b/apps/api/src/lib/supabase-jobs.ts
@ -17,3 +17,21 @@ export const supabaseGetJobById = async (jobId: string) => {
  return data;
 }
 export const supabaseGetJobsById = async (jobIds: string[]) => {
  const { data, error } = await supabase_service
    .from('firecrawl_jobs')
    .select('*')
    .in('job_id', jobIds);
  if (error) {
    return [];
  }
  if (!data) {
    return [];
  }
  return data;
 }
--- a/apps/api/src/lib/validateUrl.test.ts
+++ b/apps/api/src/lib/validateUrl.test.ts
@ -0,0 +1,88 @@
 import { isSameDomain } from "./validateUrl";
 import { isSameSubdomain } from "./validateUrl";
 describe("isSameDomain", () => {
  it("should return true for a subdomain", () => {
    const result = isSameDomain("http://sub.example.com", "http://example.com");
    expect(result).toBe(true);
  });
  it("should return true for the same domain", () => {
    const result = isSameDomain("http://example.com", "http://example.com");
    expect(result).toBe(true);
  });
  it("should return false for different domains", () => {
    const result = isSameDomain("http://example.com", "http://another.com");
    expect(result).toBe(false);
  });
  it("should return true for a subdomain with different protocols", () => {
    const result = isSameDomain("https://sub.example.com", "http://example.com");
    expect(result).toBe(true);
  });
  it("should return false for invalid URLs", () => {
    const result = isSameDomain("invalid-url", "http://example.com");
    expect(result).toBe(false);
    const result2 = isSameDomain("http://example.com", "invalid-url");
    expect(result2).toBe(false);
  });
  it("should return true for a subdomain with www prefix", () => {
    const result = isSameDomain("http://www.sub.example.com", "http://example.com");
    expect(result).toBe(true);
  });
  it("should return true for the same domain with www prefix", () => {
    const result = isSameDomain("http://docs.s.s.example.com", "http://example.com");
    expect(result).toBe(true);
  });
 });
 describe("isSameSubdomain", () => {
  it("should return false for a subdomain", () => {
    const result = isSameSubdomain("http://example.com", "http://docs.example.com");
    expect(result).toBe(false);
  });
  it("should return true for the same subdomain", () => {
    const result = isSameSubdomain("http://docs.example.com", "http://docs.example.com");
    expect(result).toBe(true);
  });
  it("should return false for different subdomains", () => {
    const result = isSameSubdomain("http://docs.example.com", "http://blog.example.com");
    expect(result).toBe(false);
  });
  it("should return false for different domains", () => {
    const result = isSameSubdomain("http://example.com", "http://another.com");
    expect(result).toBe(false);
  });
  it("should return false for invalid URLs", () => {
    const result = isSameSubdomain("invalid-url", "http://example.com");
    expect(result).toBe(false);
    const result2 = isSameSubdomain("http://example.com", "invalid-url");
    expect(result2).toBe(false);
  });
  it("should return true for the same subdomain with different protocols", () => {
    const result = isSameSubdomain("https://docs.example.com", "http://docs.example.com");
    expect(result).toBe(true);
  });
  it("should return true for the same subdomain with www prefix", () => {
    const result = isSameSubdomain("http://www.docs.example.com", "http://docs.example.com");
    expect(result).toBe(true);
  });
  it("should return false for a subdomain with www prefix and different subdomain", () => {
    const result = isSameSubdomain("http://www.docs.example.com", "http://blog.example.com");
    expect(result).toBe(false);
  });
 });
--- a/apps/api/src/lib/validateUrl.ts
+++ b/apps/api/src/lib/validateUrl.ts
@ -1,9 +1,8 @@
 const protocolIncluded = (url: string) => {
  // if :// not in the start of the url assume http (maybe https?)
  // regex checks if :// appears before any .
-  return(/^([^.:]+:\/\/)/.test(url));
+  return /^([^.:]+:\/\/)/.test(url);
-}
+};
 const getURLobj = (s: string) => {
  // URL fails if we dont include the protocol ie google.com
@ -18,7 +17,6 @@ const getURLobj = (s: string) => {
 };
 export const checkAndUpdateURL = (url: string) => {
  if (!protocolIncluded(url)) {
    url = `http://${url}`;
  }
@ -30,9 +28,95 @@ export const checkAndUpdateURL = (url: string) => {
  const typedUrlObj = urlObj as URL;
-  if(typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") {
+  if (typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") {
    throw new Error("Invalid URL");
  }
  return { urlObj: typedUrlObj, url: url };
 };
 /**
 * Same domain check
 * It checks if the domain of the url is the same as the base url
 * It accounts true for subdomains and www.subdomains
 * @param url 
 * @param baseUrl 
 * @returns 
 */
 export function isSameDomain(url: string, baseUrl: string) {
  const { urlObj: urlObj1, error: error1 } = getURLobj(url);
  const { urlObj: urlObj2, error: error2 } = getURLobj(baseUrl);
  if (error1 || error2) {
    return false;
  }
  const typedUrlObj1 = urlObj1 as URL;
  const typedUrlObj2 = urlObj2 as URL;
  const cleanHostname = (hostname: string) => {
    return hostname.startsWith('www.') ? hostname.slice(4) : hostname;
  };
  const domain1 = cleanHostname(typedUrlObj1.hostname).split('.').slice(-2).join('.');
  const domain2 = cleanHostname(typedUrlObj2.hostname).split('.').slice(-2).join('.');
  return domain1 === domain2;
 }
 export function isSameSubdomain(url: string, baseUrl: string) {
  const { urlObj: urlObj1, error: error1 } = getURLobj(url);
  const { urlObj: urlObj2, error: error2 } = getURLobj(baseUrl);
  if (error1 || error2) {
    return false;
  }
  const typedUrlObj1 = urlObj1 as URL;
  const typedUrlObj2 = urlObj2 as URL;
  const cleanHostname = (hostname: string) => {
    return hostname.startsWith('www.') ? hostname.slice(4) : hostname;
  };
  const domain1 = cleanHostname(typedUrlObj1.hostname).split('.').slice(-2).join('.');
  const domain2 = cleanHostname(typedUrlObj2.hostname).split('.').slice(-2).join('.');
  const subdomain1 = cleanHostname(typedUrlObj1.hostname).split('.').slice(0, -2).join('.');
  const subdomain2 = cleanHostname(typedUrlObj2.hostname).split('.').slice(0, -2).join('.');
  // Check if the domains are the same and the subdomains are the same
  return domain1 === domain2 && subdomain1 === subdomain2;
 }
 export const checkAndUpdateURLForMap = (url: string) => {
  if (!protocolIncluded(url)) {
    url = `http://${url}`;
  }
  // remove last slash if present
  if (url.endsWith("/")) {
    url = url.slice(0, -1);
  }
  const { error, urlObj } = getURLobj(url);
  if (error) {
    throw new Error("Invalid URL");
  }
  const typedUrlObj = urlObj as URL;
  if (typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") {
    throw new Error("Invalid URL");
  }
  // remove any query params
  url = url.split("?")[0].trim();
  return { urlObj: typedUrlObj, url: url };
 };
--- a/apps/api/src/main/runWebScraper.ts
+++ b/apps/api/src/main/runWebScraper.ts
@ -12,7 +12,7 @@ import { Document } from "../lib/entities";
 import { supabase_service } from "../services/supabase";
 import { Logger } from "../lib/logger";
 import { ScrapeEvents } from "../lib/scrape-events";
-import { getWebScraperQueue } from "../services/queue-service";
+import { getScrapeQueue } from "../services/queue-service";
 export async function startWebScraperPipeline({
  job,
@ -27,7 +27,12 @@ export async function startWebScraperPipeline({
    mode: job.data.mode,
    crawlerOptions: job.data.crawlerOptions,
    extractorOptions: job.data.extractorOptions,
-    pageOptions: job.data.pageOptions,
+    pageOptions: {
      ...job.data.pageOptions,
      ...(job.data.crawl_id ? ({
        includeRawHtml: true,
      }): {}),
    },
    inProgress: (progress) => {
      Logger.debug(`🐂 Job in progress ${job.id}`);
      if (progress.currentDocument) {
@ -35,7 +40,7 @@ export async function startWebScraperPipeline({
        if (partialDocs.length > 50) {
          partialDocs = partialDocs.slice(-50);
        }
-        job.updateProgress({ ...progress, partialDocs: partialDocs });
+        // job.updateProgress({ ...progress, partialDocs: partialDocs });
      }
    },
    onSuccess: (result, mode) => {
@ -49,6 +54,7 @@ export async function startWebScraperPipeline({
    },
    team_id: job.data.team_id,
    bull_job_id: job.id.toString(),
    priority: job.opts.priority,
  })) as { success: boolean; message: string; docs: Document[] };
 }
 export async function runWebScraper({
@ -62,6 +68,7 @@ export async function runWebScraper({
  onError,
  team_id,
  bull_job_id,
  priority,
 }: RunWebScraperParams): Promise<RunWebScraperResult> {
  try {
    const provider = new WebScraperDataProvider();
@ -74,6 +81,7 @@ export async function runWebScraper({
        crawlerOptions: crawlerOptions,
        pageOptions: pageOptions,
        bullJobId: bull_job_id,
        priority,
      });
    } else {
      await provider.setOptions({
@ -83,6 +91,7 @@ export async function runWebScraper({
        extractorOptions,
        crawlerOptions: crawlerOptions,
        pageOptions: pageOptions,
        priority,
      });
    }
    const docs = (await provider.getDocuments(false, (progress: Progress) => {
@ -104,11 +113,8 @@ export async function runWebScraper({
            return { url: doc.metadata.sourceURL };
          }
        })
-      : docs.filter((doc) => doc.content.trim().length > 0);
+      : docs;
    const isCancelled = await (await getWebScraperQueue().client).exists("cancelled:" + bull_job_id);
    if (!isCancelled) {
    const billingResult = await billTeam(team_id, filteredDocs.length);
    if (!billingResult.success) {
@ -119,7 +125,6 @@ export async function runWebScraper({
        docs: [],
      };
    }
    }
    // This is where the returnvalue from the job is set
    onSuccess(filteredDocs, mode);
@ -141,21 +146,21 @@ const saveJob = async (job: Job, result: any, token: string, mode: string) => {
        .eq("job_id", job.id);
      if (error) throw new Error(error.message);
-      try {
+      // try {
-        if (mode === "crawl") {
+      //   if (mode === "crawl") {
-          await job.moveToCompleted(null, token, false);
+      //     await job.moveToCompleted(null, token, false);
-        } else {
+      //   } else {
-          await job.moveToCompleted(result, token, false);
+      //     await job.moveToCompleted(result, token, false);
-        }
+      //   }
-      } catch (error) {
+      // } catch (error) {
-        // I think the job won't exist here anymore
+      //   // I think the job won't exist here anymore
-      }
+      // }
-    } else {
+    // } else {
-      try {
+    //   try {
-        await job.moveToCompleted(result, token, false);
+    //     await job.moveToCompleted(result, token, false);
-      } catch (error) {
+    //   } catch (error) {
-        // I think the job won't exist here anymore
+    //     // I think the job won't exist here anymore
-      }
+    //   }
    }
    ScrapeEvents.logJobEvent(job, "completed");
  } catch (error) {
--- a/apps/api/src/routes/admin.ts
+++ b/apps/api/src/routes/admin.ts
@ -1,10 +1,10 @@
 import express from "express";
-import { redisHealthController } from "../controllers/admin/redis-health";
+import { redisHealthController } from "../controllers/v0/admin/redis-health";
 import {
  checkQueuesController,
  cleanBefore24hCompleteJobsController,
  queuesController,
-} from "../controllers/admin/queue";
+} from "../controllers/v0/admin/queue";
 export const adminRouter = express.Router();
--- a/apps/api/src/routes/v0.ts
+++ b/apps/api/src/routes/v0.ts
@ -1,14 +1,14 @@
 import express from "express";
-import { crawlController } from "../../src/controllers/crawl";
+import { crawlController } from "../../src/controllers/v0/crawl";
-import { crawlStatusController } from "../../src/controllers/crawl-status";
+import { crawlStatusController } from "../../src/controllers/v0/crawl-status";
-import { scrapeController } from "../../src/controllers/scrape";
+import { scrapeController } from "../../src/controllers/v0/scrape";
-import { crawlPreviewController } from "../../src/controllers/crawlPreview";
+import { crawlPreviewController } from "../../src/controllers/v0/crawlPreview";
-import { crawlJobStatusPreviewController } from "../../src/controllers/status";
+import { crawlJobStatusPreviewController } from "../../src/controllers/v0/status";
-import { searchController } from "../../src/controllers/search";
+import { searchController } from "../../src/controllers/v0/search";
-import { crawlCancelController } from "../../src/controllers/crawl-cancel";
+import { crawlCancelController } from "../../src/controllers/v0/crawl-cancel";
-import { keyAuthController } from "../../src/controllers/keyAuth";
+import { keyAuthController } from "../../src/controllers/v0/keyAuth";
-import { livenessController } from "../controllers/liveness";
+import { livenessController } from "../controllers/v0/liveness";
-import { readinessController } from "../controllers/readiness";
+import { readinessController } from "../controllers/v0/readiness";
 export const v0Router = express.Router();
--- a/apps/api/src/routes/v1.ts
+++ b/apps/api/src/routes/v1.ts
@ -1,9 +1,21 @@
-import express from "express";
+import express, { NextFunction, Request, Response } from "express";
 import { crawlController } from "../../src/controllers/v1/crawl";
 // import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
 import { scrapeController } from "../../src/controllers/v1/scrape";
 import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
 import { mapController } from "../../src/controllers/v1/map";
 import { ErrorResponse, RequestWithAuth, RequestWithMaybeAuth } from "../controllers/v1/types";
 import { RateLimiterMode } from "../types";
 import { authenticateUser } from "../controllers/v1/auth";
 import { Logger } from "../lib/logger";
 import { createIdempotencyKey } from "../services/idempotency/create";
 import { validateIdempotencyKey } from "../services/idempotency/validate";
 import { ZodError } from "zod";
 import { checkTeamCredits } from "../services/billing/credit_billing";
 import { v4 as uuidv4 } from "uuid";
 import expressWs from "express-ws";
 import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws";
 import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
 // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
 // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
 // import { searchController } from "../../src/controllers/v1/search";
@ -12,23 +24,142 @@ import { mapController } from "../../src/controllers/v1/map";
 // import { livenessController } from "../controllers/v1/liveness";
 // import { readinessController } from "../controllers/v1/readiness";
 function checkCreditsMiddleware(minimum?: number): (req: RequestWithAuth, res: Response, next: NextFunction) => void {
    return (req, res, next) => {
        (async () => {
            if (!minimum && req.body) {
                minimum = (req.body as any)?.limit ?? 1;
            }
            const { success, message, remainingCredits } = await checkTeamCredits(req.auth.team_id, minimum);
            if (!success) {
                return res.status(402).json({ success: false, error: "Insufficient credits" });
            }
            req.account = { remainingCredits }
            next();
        })()
            .catch(err => next(err));
    };
 }
 export function authMiddleware(rateLimiterMode: RateLimiterMode): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void {
    return (req, res, next) => {
        (async () => {
            const { success, team_id, error, status, plan } = await authenticateUser(
                req,
                res,
                rateLimiterMode,
            );
            if (!success) {
                return res.status(status).json({ success: false, error });
            }
            req.auth = { team_id, plan };
            next();
        })()
            .catch(err => next(err));
    }
 }
 function idempotencyMiddleware(req: Request, res: Response, next: NextFunction) {
    (async () => {
        if (req.headers["x-idempotency-key"]) {
            const isIdempotencyValid = await validateIdempotencyKey(req);
            if (!isIdempotencyValid) {
                return res.status(409).json({ success: false, error: "Idempotency key already used" });
            }
            createIdempotencyKey(req);
        }
        next();
    })()
        .catch(err => next(err));
 }
 function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
    if (req.body.url && isUrlBlocked(req.body.url)) {
        return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." });
    }
    next();
 }
 function wrap(controller: (req: Request, res: Response) => Promise<any>): (req: Request, res: Response, next: NextFunction) => any {
    return (req, res, next) => {
        controller(req, res)
            .catch(err => next(err))
    }
 }
 expressWs(express());
 export const v1Router = express.Router();
-v1Router.post("/v1/scrape", scrapeController);
+v1Router.post(
-v1Router.post("/v1/crawl", crawlController);
+    "/scrape",
-v1Router.get("/v1/crawl/:jobId", crawlStatusController);
+    blocklistMiddleware,
-// v1Router.post("/v1/crawlWebsitePreview", crawlPreviewController);
+    authMiddleware(RateLimiterMode.Scrape),
-// v1Router.delete("/v1/crawl/cancel/:jobId", crawlCancelController);
+    checkCreditsMiddleware(1),
-// v1Router.get("/v1/checkJobStatus/:jobId", crawlJobStatusPreviewController);
+    wrap(scrapeController)
 );
 v1Router.post(
    "/crawl",
    blocklistMiddleware,
    authMiddleware(RateLimiterMode.Crawl),
    idempotencyMiddleware,
    checkCreditsMiddleware(),
    wrap(crawlController)
 );
 v1Router.post(
    "/map",
    blocklistMiddleware,
    authMiddleware(RateLimiterMode.Map),
    checkCreditsMiddleware(1),
    wrap(mapController)
 );
 v1Router.get(
    "/crawl/:jobId",
    authMiddleware(RateLimiterMode.CrawlStatus),
    wrap(crawlStatusController)
 );
 v1Router.ws(
    "/crawl/:jobId",
    crawlStatusWSController
 );
 // v1Router.post("/crawlWebsitePreview", crawlPreviewController);
 // v1Router.delete("/crawl/:jobId", crawlCancelController);
 // v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController);
 // // Auth route for key based authentication
-// v1Router.get("/v1/keyAuth", keyAuthController);
+// v1Router.get("/keyAuth", keyAuthController);
 // // Search routes
-// v0Router.post("/v1/search", searchController);
+// v0Router.post("/search", searchController);
 // Health/Probe routes
-// v1Router.get("/v1/health/liveness", livenessController);
+// v1Router.get("/health/liveness", livenessController);
-// v1Router.get("/v1/health/readiness", readinessController);
+// v1Router.get("/health/readiness", readinessController);
-v1Router.post("/v1/map", mapController);
+v1Router.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: Response<ErrorResponse>, next: NextFunction) => {
    if (err instanceof ZodError) {
        res.status(400).json({ success: false, error: "Bad Request", details: err.errors });
    } else {
        const id = uuidv4();
        let verbose = JSON.stringify(err);
        if (verbose === "{}") {
            if (err instanceof Error) {
                verbose = JSON.stringify({
                    message: err.message,
                    name: err.name,
                    stack: err.stack,
                });
            }
        }
        Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id  + " -- " + verbose);
        res.status(500).json({ success: false, error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id + "" });
    }
 });
--- a/apps/api/src/run-req.ts
+++ b/apps/api/src/run-req.ts
@ -0,0 +1,175 @@
 import axios from "axios";
 import { promises as fs } from "fs";
 import { v4 as uuidV4 } from "uuid";
 interface Result {
  start_url: string;
  job_id?: string;
  idempotency_key?: string;
  result_data_jsonb?: any;
 }
 async function sendCrawl(result: Result): Promise<string | undefined> {
  const idempotencyKey = uuidV4();
  const url = result.start_url;
  try {
    const response = await axios.post(
      "https://staging-firecrawl-scraper-js.fly.dev/v0/crawl",
      {
        url: url,
        crawlerOptions: {
          limit: 75,
        },
        pageOptions: {
          includeHtml: true,
          replaceAllPathsWithAbsolutePaths: true,
          waitFor: 1000,
        },
      },
      {
        headers: {
          "Content-Type": "application/json",
          Authorization: `Bearer `,
        },
      }
    );
    result.idempotency_key = idempotencyKey;
    return response.data.jobId;
  } catch (error) {
    console.error("Error sending crawl:", error);
    return undefined;
  }
 }
 async function getContent(result: Result): Promise<boolean> {
  let attempts = 0;
  while (attempts < 120) {
    // Reduce the number of attempts to speed up
    try {
      const response = await axios.get(
        `https://staging-firecrawl-scraper-js.fly.dev/v0/crawl/status/${result.job_id}`,
        {
          headers: {
            "Content-Type": "application/json",
            Authorization: `Bearer `,
          },
        }
      );
      if (response.data.status === "completed") {
        result.result_data_jsonb = response.data.data;
        // Job actually completed
        return true;
      }
    } catch (error) {
      console.error("Error getting content:", error);
    }
    const randomSleep = Math.floor(Math.random() * 15000) + 5000;
    await new Promise((resolve) => setTimeout(resolve, randomSleep)); // Reduce sleep time to 1.5 seconds
    attempts++;
  }
  // Set result as null if timed out
  result.result_data_jsonb = null;
  return false;
 }
 async function processResults(results: Result[]): Promise<void> {
  let processedCount = 0;
  let starterCount = 0;
  const queue: Result[] = [];
  const processedUrls = new Set<string>();
  // Initialize the queue with the first 1000 results
  for (let i = 0; i < Math.min(100, results.length); i++) {
    queue.push(results[i]);
    processedUrls.add(results[i].start_url);
  }
  // Function to process a single result
  const processSingleResult = async (result: Result) => {
    const jobId = await sendCrawl(result);
    if (jobId) {
      console.log(`Job requested count: ${starterCount}`);
      starterCount++;
      result.job_id = jobId;
      processedCount++;
      // Save the result to the file
      try {
        // Save job id along with the start_url
        const resultWithJobId = results.map(r => ({
          start_url: r.start_url,
          job_id: r.job_id,
        }));
        await fs.writeFile(
          "results_with_job_id_4000_6000.json",
          JSON.stringify(resultWithJobId, null, 4)
        );
      } catch (error) {
        console.error("Error writing to results_with_content.json:", error);
      }
      // Add a new result to the queue if there are more results to process
      // if (processedCount < results.length) {
      //   for (let i = queue.length; i < results.length; i++) {
      //     if (!processedUrls.has(results[i].start_url)) {
      //       const nextResult = results[i];
      //       console.log("Next result:", nextResult.start_url);
      //       queue.push(nextResult);
      //       processedUrls.add(nextResult.start_url);
      //       console.log(`Queue length: ${queue.length}`);
      //       processSingleResult(nextResult);
      //       break;
      //     }
      //   }
      // }
    }
  };
  // Start processing the initial queue concurrently
  // for (let i = 0; i < queue.length; i++) {
  //   processSingleResult(queue[i]);
  //   if ((i + 1) % 500 === 0) {
  //     console.log(`Processed ${i + 1} results, waiting for 1 minute before adding the next batch...`);
  //     await new Promise(resolve => setTimeout(resolve, 60 * 1000)); // Wait for 1 minute
  //   }
  // }
  // Start processing the initial queue concurrently
  // await Promise.all(queue.map(result => processSingleResult(result)));
  for (let i = 0; i < results.length; i += 100) {
    const batch = results.slice(i, i + 100);
    Promise.all(batch.map((result) => processSingleResult(result)))
      .then(() => {
        console.log(`Processed ${i + 100} results.`);
      })
      .catch((error) => {
        console.error(`Error processing batch starting at index ${i}:`, error);
      });
    await new Promise((resolve) => setTimeout(resolve, 60 * 1000)); // Wait for 1 minute
  }
 }
 // Example call
 async function getStartUrls(): Promise<Result[]> {
  try {
    const data = await fs.readFile("starturls.json", "utf-8");
    return JSON.parse(data);
  } catch (error) {
    console.error("Error reading starturls.json:", error);
    return [];
  }
 }
 async function main() {
  const results: Result[] = (await getStartUrls()).slice(3999, 6000);
  // console.log(results.map((r) => r.start_url).slice(0, 3));
  processResults(results)
    .then(() => {
      console.log("All results processed.");
    })
    .catch((error) => {
      console.error("Error processing results:", error);
    });
 }
 main();
--- a/apps/api/src/scraper/WebScraper/tests/single_url.test.ts
+++ b/apps/api/src/scraper/WebScraper/tests/single_url.test.ts
@ -23,8 +23,8 @@ describe('scrapSingleUrl', () => {
  }, 10000);
 });
-it('should return a list of links on the mendable.ai page', async () => {
+it('should return a list of links on the firecrawl.ai page', async () => {
-  const url = 'https://mendable.ai';
+  const url = 'https://flutterbricks.com';
  const pageOptions: PageOptions = { includeHtml: true };
  const result = await scrapSingleUrl("TEST", url, pageOptions);
@ -33,5 +33,5 @@ it('should return a list of links on the mendable.ai page', async () => {
  expect(result.linksOnPage).toBeDefined();
  expect(Array.isArray(result.linksOnPage)).toBe(true);
  expect(result.linksOnPage.length).toBeGreaterThan(0);
-  expect(result.linksOnPage).toContain('https://mendable.ai/blog')
+  expect(result.linksOnPage).toContain('https://flutterbricks.com/features')
 }, 10000);
--- a/apps/api/src/scraper/WebScraper/crawler.ts
+++ b/apps/api/src/scraper/WebScraper/crawler.ts
@ -1,4 +1,4 @@
-import axios from "axios";
+import axios, { AxiosError } from "axios";
 import cheerio, { load } from "cheerio";
 import { URL } from "url";
 import { getLinksFromSitemap } from "./sitemap";
@ -22,7 +22,7 @@ export class WebCrawler {
  private crawledUrls: Map<string, string> = new Map();
  private limit: number;
  private robotsTxtUrl: string;
-  private robots: any;
+  public robots: any;
  private generateImgAltText: boolean;
  private allowBackwardCrawling: boolean;
  private allowExternalContentLinks: boolean;
@ -66,7 +66,7 @@ export class WebCrawler {
    this.allowExternalContentLinks = allowExternalContentLinks ?? false;
  }
-  private filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
+  public filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
    return sitemapLinks
      .filter((link) => {
        const url = new URL(link.trim(), this.baseUrl);
@ -130,6 +130,25 @@ export class WebCrawler {
      .slice(0, limit);
  }
  public async getRobotsTxt(): Promise<string> {
    const response = await axios.get(this.robotsTxtUrl, { timeout: axiosTimeout });
    return response.data;
  }
  public importRobotsTxt(txt: string) {
    this.robots = robotsParser(this.robotsTxtUrl, txt);
  }
  public async tryGetSitemap(): Promise<{ url: string; html: string; }[] | null> {
    Logger.debug(`Fetching sitemap links from ${this.initialUrl}`);
    const sitemapLinks = await this.tryFetchSitemapLinks(this.initialUrl);
    if (sitemapLinks.length > 0) {
      let filteredLinks = this.filterLinks(sitemapLinks, this.limit, this.maxCrawledDepth);
      return filteredLinks.map(link => ({ url: link, html: "" }));
    }
    return null;
  }
  public async start(
    inProgress?: (progress: Progress) => void,
    pageOptions?: PageOptions,
@ -142,19 +161,17 @@ export class WebCrawler {
    Logger.debug(`Crawler starting with ${this.initialUrl}`);
    // Fetch and parse robots.txt
    try {
-      const response = await axios.get(this.robotsTxtUrl, { timeout: axiosTimeout });
+      const txt = await this.getRobotsTxt();
-      this.robots = robotsParser(this.robotsTxtUrl, response.data);
+      this.importRobotsTxt(txt);
      Logger.debug(`Crawler robots.txt fetched with ${this.robotsTxtUrl}`);
    } catch (error) {
      Logger.debug(`Failed to fetch robots.txt from ${this.robotsTxtUrl}`);
    }
    if (!crawlerOptions?.ignoreSitemap){
-      Logger.debug(`Fetching sitemap links from ${this.initialUrl}`);
+      const sm = await this.tryGetSitemap();
-      const sitemapLinks = await this.tryFetchSitemapLinks(this.initialUrl);
+      if (sm !== null) {
-      if (sitemapLinks.length > 0) {
+        return sm;
        let filteredLinks = this.filterLinks(sitemapLinks, limit, maxDepth);
        return filteredLinks.map(link => ({ url: link, html: "" }));
      }
    }
@ -241,6 +258,54 @@ export class WebCrawler {
    return Array.from(this.crawledUrls.entries()).map(([url, html]) => ({ url, html }));
  }
  public filterURL(href: string, url: string): string | null {
    let fullUrl = href;
    if (!href.startsWith("http")) {
      fullUrl = new URL(href, this.baseUrl).toString();
    }
    const urlObj = new URL(fullUrl);
    const path = urlObj.pathname;
    if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS
      if (this.isInternalLink(fullUrl) &&
        this.noSections(fullUrl) &&
        !this.matchesExcludes(path) &&
        this.isRobotsAllowed(fullUrl)
      ) {
        return fullUrl;
      }
    } else { // EXTERNAL LINKS
      if (
        this.isInternalLink(url) &&
        this.allowExternalContentLinks &&
        !this.isSocialMediaOrEmail(fullUrl) &&
        !this.matchesExcludes(fullUrl, true) &&
        !this.isExternalMainPage(fullUrl)
      ) {
        return fullUrl;
      }
    }
    return null;
  }
  public extractLinksFromHTML(html: string, url: string) {
    let links: string[] = [];
    const $ = load(html);
    $("a").each((_, element) => {
      const href = $(element).attr("href");
      if (href) {
        const u = this.filterURL(href, url);
        if (u !== null) {
          links.push(u);
        }
      }
    });
    return links;
  }
  async crawl(url: string, pageOptions: PageOptions): Promise<{url: string, html: string, pageStatusCode?: number, pageError?: string}[]> {
    if (this.visited.has(url) || !this.robots.isAllowed(url, "FireCrawlAgent")) {
      return [];
@ -284,37 +349,7 @@ export class WebCrawler {
        links.push({ url, html: content, pageStatusCode, pageError });
      }
-      $("a").each((_, element) => {
+      links.push(...this.extractLinksFromHTML(content, url).map(url => ({ url, html: content, pageStatusCode, pageError })));
        const href = $(element).attr("href");
        if (href) {
          let fullUrl = href;
          if (!href.startsWith("http")) {
            fullUrl = new URL(href, this.baseUrl).toString();
          }
          const urlObj = new URL(fullUrl);
          const path = urlObj.pathname;
          if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS
            if (this.isInternalLink(fullUrl) &&
              this.noSections(fullUrl) &&
              !this.matchesExcludes(path) &&
              this.isRobotsAllowed(fullUrl)
            ) {
              links.push({ url: fullUrl, html: content, pageStatusCode, pageError });
            }
          } else { // EXTERNAL LINKS
            if (
              this.isInternalLink(url) &&
              this.allowExternalContentLinks &&
              !this.isSocialMediaOrEmail(fullUrl) &&
              !this.matchesExcludes(fullUrl, true) &&
              !this.isExternalMainPage(fullUrl)
            ) {
              links.push({ url: fullUrl, html: content, pageStatusCode, pageError });
            }
          }
        }
      });
      if (this.visited.size === 1) {
        return links;
@ -420,9 +455,10 @@ export class WebCrawler {
      ".woff",
      ".ttf",
      ".woff2",
-      ".webp"
+      ".webp",
      ".inc"
    ];
-    return fileExtensions.some((ext) => url.endsWith(ext));
+    return fileExtensions.some((ext) => url.toLowerCase().endsWith(ext));
  }
  private isSocialMediaOrEmail(url: string): boolean {
@ -464,24 +500,32 @@ export class WebCrawler {
      }
    } catch (error) { 
      Logger.debug(`Failed to fetch sitemap with axios from ${sitemapUrl}: ${error}`);
      if (error instanceof AxiosError && error.response?.status === 404) {
        // ignore 404
      } else {
        const response = await getLinksFromSitemap({ sitemapUrl, mode: 'fire-engine' });
        if (response) {
          sitemapLinks = response;
        }
      }
    }
    if (sitemapLinks.length === 0) {
      const baseUrlSitemap = `${this.baseUrl}/sitemap.xml`;
      try {
        const response = await axios.get(baseUrlSitemap, { timeout: axiosTimeout });
        if (response.status === 200) {
-          sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap });
+          sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap, mode: 'fire-engine' });
        }
      } catch (error) {
        Logger.debug(`Failed to fetch sitemap from ${baseUrlSitemap}: ${error}`);
        if (error instanceof AxiosError && error.response?.status === 404) {
          // ignore 404
        } else {
          sitemapLinks = await getLinksFromSitemap({ sitemapUrl: baseUrlSitemap, mode: 'fire-engine' });
        }
      }
    }
    const normalizedUrl = normalizeUrl(url);
    const normalizedSitemapLinks = sitemapLinks.map(link => normalizeUrl(link));
--- a/apps/api/src/scraper/WebScraper/index.ts
+++ b/apps/api/src/scraper/WebScraper/index.ts
@ -16,18 +16,19 @@ import {
  replacePathsWithAbsolutePaths,
 } from "./utils/replacePaths";
 import { generateCompletions } from "../../lib/LLM-extraction";
-import { getWebScraperQueue } from "../../../src/services/queue-service";
+import { getScrapeQueue } from "../../../src/services/queue-service";
 import { fetchAndProcessDocx } from "./utils/docxProcessor";
 import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils";
 import { Logger } from "../../lib/logger";
 import { ScrapeEvents } from "../../lib/scrape-events";
 export class WebScraperDataProvider {
  private jobId: string;
  private bullJobId: string;
  private urls: string[] = [""];
  private mode: "single_urls" | "sitemap" | "crawl" = "single_urls";
-  private includes: string[];
+  private includes: string | string[];
-  private excludes: string[];
+  private excludes: string | string[];
  private maxCrawledLinks: number;
  private maxCrawledDepth: number = 10;
  private returnOnlyUrls: boolean;
@ -43,6 +44,7 @@ export class WebScraperDataProvider {
  private crawlerMode: string = "default";
  private allowBackwardCrawling: boolean = false;
  private allowExternalContentLinks: boolean = false;
  private priority?: number;
  authorize(): void {
    throw new Error("Method not implemented.");
@ -71,7 +73,8 @@ export class WebScraperDataProvider {
            url,
            this.pageOptions,
            this.extractorOptions,
-            existingHTML
+            existingHTML,
            this.priority,
          );
          processedUrls++;
          if (inProgress) {
@ -87,21 +90,6 @@ export class WebScraperDataProvider {
          results[i + index] = result;
        })
      );
      try {
        if (this.mode === "crawl" && this.bullJobId) {
          const job = await getWebScraperQueue().getJob(this.bullJobId);
          const jobStatus = await job.getState();
          if (jobStatus === "failed") {
            Logger.info(
              "Job has failed or has been cancelled by the user. Stopping the job..."
            );
            return [] as Document[];
          }
        }
      } catch (error) {
        Logger.error(error.message);
        return [] as Document[];
      }
    }
    return results.filter((result) => result !== null) as Document[];
  }
@ -167,11 +155,29 @@ export class WebScraperDataProvider {
  private async handleCrawlMode(
    inProgress?: (progress: Progress) => void
  ): Promise<Document[]> {
    let includes: string[];
    if (Array.isArray(this.includes)) {
      if (this.includes[0] != "") {
        includes = this.includes;
      }
    } else {
      includes = this.includes.split(',');
    }
    let excludes: string[];
    if (Array.isArray(this.excludes)) {
      if (this.excludes[0] != "") {
        excludes = this.excludes;
      }
    } else {
      excludes = this.excludes.split(',');
    }
    const crawler = new WebCrawler({
      jobId: this.jobId,
      initialUrl: this.urls[0],
-      includes: this.includes,
+      includes,
-      excludes: this.excludes,
+      excludes,
      maxCrawledLinks: this.maxCrawledLinks,
      maxCrawledDepth: getAdjustedMaxDepth(this.urls[0], this.maxCrawledDepth),
      limit: this.limit,
@ -287,7 +293,10 @@ export class WebScraperDataProvider {
      documents = await this.getSitemapData(this.urls[0], documents);
    }
    if (this.pageOptions.includeMarkdown) {
      documents = this.applyPathReplacements(documents);
    }
    // documents = await this.applyImgAltText(documents);
    if (
      (this.extractorOptions.mode === "llm-extraction" ||
@ -316,12 +325,31 @@ export class WebScraperDataProvider {
  private async fetchPdfDocuments(pdfLinks: string[]): Promise<Document[]> {
    return Promise.all(
      pdfLinks.map(async (pdfLink) => {
        const timer = Date.now();
        const logInsertPromise = ScrapeEvents.insert(this.jobId, {
          type: "scrape",
          url: pdfLink,
          worker: process.env.FLY_MACHINE_ID,
          method: "pdf-scrape",
          result: null,
        });
        const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
          pdfLink,
          this.pageOptions.parsePDF
        );
        const insertedLogId = await logInsertPromise;
        ScrapeEvents.updateScrapeResult(insertedLogId, {
          response_size: content.length,
          success: !(pageStatusCode && pageStatusCode >= 400) && !!content && (content.trim().length >= 100),
          error: pageError,
          response_code: pageStatusCode,
          time_taken: Date.now() - timer,
        });
        return {
          content: content,
          markdown: content,
          metadata: { sourceURL: pdfLink, pageStatusCode, pageError },
          provider: "web-scraper",
        };
@ -330,12 +358,32 @@ export class WebScraperDataProvider {
  }
  private async fetchDocxDocuments(docxLinks: string[]): Promise<Document[]> {
    return Promise.all(
-      docxLinks.map(async (p) => {
+      docxLinks.map(async (docxLink) => {
-        const { content, pageStatusCode, pageError } =
+        const timer = Date.now();
-          await fetchAndProcessDocx(p);
+        const logInsertPromise = ScrapeEvents.insert(this.jobId, {
          type: "scrape",
          url: docxLink,
          worker: process.env.FLY_MACHINE_ID,
          method: "docx-scrape",
          result: null,
        });
        const { content, pageStatusCode, pageError } = await fetchAndProcessDocx(
          docxLink
        );
        const insertedLogId = await logInsertPromise;
        ScrapeEvents.updateScrapeResult(insertedLogId, {
          response_size: content.length,
          success: !(pageStatusCode && pageStatusCode >= 400) && !!content && (content.trim().length >= 100),
          error: pageError,
          response_code: pageStatusCode,
          time_taken: Date.now() - timer,
        });
        return {
          content,
-          metadata: { sourceURL: p, pageStatusCode, pageError },
+          metadata: { sourceURL: docxLink, pageStatusCode, pageError },
          provider: "web-scraper",
        };
      })
@ -406,6 +454,10 @@ export class WebScraperDataProvider {
      const url = new URL(document.metadata.sourceURL);
      const path = url.pathname;
      if (!Array.isArray(this.excludes)) {
        this.excludes = this.excludes.split(',');
      }
      if (this.excludes.length > 0 && this.excludes[0] !== "") {
        // Check if the link should be excluded
        if (
@ -417,6 +469,10 @@ export class WebScraperDataProvider {
        }
      }
      if (!Array.isArray(this.includes)) {
        this.includes = this.includes.split(',');
      }
      if (this.includes.length > 0 && this.includes[0] !== "") {
        // Check if the link matches the include patterns, if any are specified
        if (this.includes.length > 0) {
@ -528,14 +584,22 @@ export class WebScraperDataProvider {
      options.crawlerOptions?.replaceAllPathsWithAbsolutePaths ??
      options.pageOptions?.replaceAllPathsWithAbsolutePaths ??
      false;
-    //! @nicolas, for some reason this was being injected and breaking everything. Don't have time to find source of the issue so adding this check
+
-    this.excludes = this.excludes.filter((item) => item !== "");
+    if (typeof options.crawlerOptions?.excludes === 'string') {
      this.excludes = options.crawlerOptions?.excludes.split(',').filter((item) => item.trim() !== "");
    }
    if (typeof options.crawlerOptions?.includes === 'string') {
      this.includes = options.crawlerOptions?.includes.split(',').filter((item) => item.trim() !== "");
    }
    this.crawlerMode = options.crawlerOptions?.mode ?? "default";
    this.ignoreSitemap = options.crawlerOptions?.ignoreSitemap ?? false;
    this.allowBackwardCrawling =
      options.crawlerOptions?.allowBackwardCrawling ?? false;
    this.allowExternalContentLinks =
      options.crawlerOptions?.allowExternalContentLinks ?? false;
    this.priority = options.priority;
    // make sure all urls start with https://
    this.urls = this.urls.map((url) => {
--- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
+++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
@ -11,6 +11,7 @@ import { Logger } from "../../../lib/logger";
 * @param url The URL to scrape
 * @param waitFor The time to wait for the page to load
 * @param screenshot Whether to take a screenshot
 * @param fullPageScreenshot Whether to take a full page screenshot
 * @param pageOptions The options for the page
 * @param headers The headers to send with the request
 * @param options The options for the request
@ -20,18 +21,22 @@ export async function scrapWithFireEngine({
  url,
  waitFor = 0,
  screenshot = false,
  fullPageScreenshot = false,
  pageOptions = { parsePDF: true },
  fireEngineOptions = {},
  headers,
  options,
  priority,
 }: {
  url: string;
  waitFor?: number;
  screenshot?: boolean;
  fullPageScreenshot?: boolean;
  pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean };
  fireEngineOptions?: FireEngineOptions;
  headers?: Record<string, string>;
  options?: any;
  priority?: number;
 }): Promise<FireEngineResponse> {
  const logParams = {
    url,
@ -47,8 +52,9 @@ export async function scrapWithFireEngine({
  try {
    const reqParams = await generateRequestParams(url);
    const waitParam = reqParams["params"]?.wait ?? waitFor;
-    const engineParam = reqParams["params"]?.engine ?? fireEngineOptions?.engine  ?? "playwright";
+    const engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine  ?? "playwright";
    const screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
    const fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
    const fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
@ -61,17 +67,20 @@ export async function scrapWithFireEngine({
    let engine = engineParam; // do we want fireEngineOptions as first choice?
    Logger.info(
-      `⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
+      `⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, fullPageScreenshot: ${fullPageScreenshot}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
    );
    const response = await axios.post(
      process.env.FIRE_ENGINE_BETA_URL + endpoint,
      {
        url: url,
        wait: waitParam,
        screenshot: screenshotParam,
        fullPageScreenshot: fullPageScreenshotParam,
        headers: headers,
        pageOptions: pageOptions,
        priority,
        ...fireEngineOptionsParam,
      },
      {
--- a/apps/api/src/scraper/WebScraper/single_url.ts
+++ b/apps/api/src/scraper/WebScraper/single_url.ts
@ -123,17 +123,21 @@ export async function scrapSingleUrl(
  jobId: string,
  urlToScrap: string,
  pageOptions: PageOptions = {
    includeMarkdown: true,
    onlyMainContent: true,
    includeHtml: false,
    includeRawHtml: false,
    waitFor: 0,
    screenshot: false,
    fullPageScreenshot: false,
    headers: undefined,
    includeLinks: true
  },
  extractorOptions: ExtractorOptions = {
    mode: "llm-extraction-from-markdown",
  },
-  existingHtml: string = ""
+  existingHtml: string = "",
  priority?: number,
 ): Promise<Document> {
  urlToScrap = urlToScrap.trim();
@ -171,11 +175,13 @@ export async function scrapSingleUrl(
            url,
            waitFor: pageOptions.waitFor,
            screenshot: pageOptions.screenshot,
            fullPageScreenshot: pageOptions.fullPageScreenshot,
            pageOptions: pageOptions,
            headers: pageOptions.headers,
            fireEngineOptions: {
              engine: engine,
-            }
+            },
            priority,
          });
          scraperResponse.text = response.html;
          scraperResponse.screenshot = response.screenshot;
@ -306,7 +312,7 @@ export async function scrapSingleUrl(
    const scrapersInOrder = getScrapingFallbackOrder(
      defaultScraper,
      pageOptions && pageOptions.waitFor && pageOptions.waitFor > 0,
-      pageOptions && pageOptions.screenshot && pageOptions.screenshot === true,
+      pageOptions && (pageOptions.screenshot || pageOptions.fullPageScreenshot) && (pageOptions.screenshot === true || pageOptions.fullPageScreenshot === true),
      pageOptions && pageOptions.headers && pageOptions.headers !== undefined
    );
@ -334,8 +340,8 @@ export async function scrapSingleUrl(
        pageError = undefined;
      }
-      if (text && text.trim().length >= 100) {
+      if ((text && text.trim().length >= 100) || (typeof screenshot === "string" && screenshot.length > 0)) {
-        Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100, breaking`);
+        Logger.debug(`⛏️ ${scraper}: Successfully scraped ${urlToScrap} with text length >= 100 or screenshot, breaking`);
        break;
      }
      if (pageStatusCode && (pageStatusCode == 404 || pageStatusCode == 500)) {
@ -357,20 +363,22 @@ export async function scrapSingleUrl(
    let linksOnPage: string[] | undefined;
    if (pageOptions.includeLinks) {
      linksOnPage = extractLinks(rawHtml, urlToScrap);
    }
    let document: Document;
    if (screenshot && screenshot.length > 0) {
      document = {
        content: text,
-        markdown: text,
+        markdown: pageOptions.includeMarkdown ? text : undefined,
        html: pageOptions.includeHtml ? html : undefined,
        rawHtml:
          pageOptions.includeRawHtml ||
            extractorOptions.mode === "llm-extraction-from-raw-html"
            ? rawHtml
            : undefined,
-        linksOnPage,
+        linksOnPage: pageOptions.includeLinks ? linksOnPage : undefined,
        metadata: {
          ...metadata,
          screenshot: screenshot,
@ -382,7 +390,7 @@ export async function scrapSingleUrl(
    } else {
      document = {
        content: text,
-        markdown: text,
+        markdown: pageOptions.includeMarkdown ? text : undefined,
        html: pageOptions.includeHtml ? html : undefined,
        rawHtml:
          pageOptions.includeRawHtml ||
@ -395,7 +403,7 @@ export async function scrapSingleUrl(
          pageStatusCode: pageStatusCode,
          pageError: pageError,
        },
-        linksOnPage,
+        linksOnPage: pageOptions.includeLinks ? linksOnPage : undefined,
      };
    }
@ -409,9 +417,9 @@ export async function scrapSingleUrl(
    });
    return {
      content: "",
-      markdown: "",
+      markdown: pageOptions.includeMarkdown ? "" : undefined,
      html: "",
-      linksOnPage: [],
+      linksOnPage: pageOptions.includeLinks ? [] : undefined,
      metadata: {
        sourceURL: urlToScrap,
        pageStatusCode: pageStatusCode,
--- a/apps/api/src/scraper/WebScraper/sitemap.ts
+++ b/apps/api/src/scraper/WebScraper/sitemap.ts
@ -19,7 +19,7 @@ export async function getLinksFromSitemap(
  try {
    let content: string;
    try {
-      if (mode === 'axios') {
+      if (mode === 'axios' || process.env.FIRE_ENGINE_BETA_URL === '') {
        const response = await axios.get(sitemapUrl, { timeout: axiosTimeout });
        content = response.data;
      } else if (mode === 'fire-engine') {
--- a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts
+++ b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts
@ -1,24 +1,11 @@
 export const urlSpecificParams = {
  "platform.openai.com": {
-    params: {
+    defaultScraper: "fire-engine",
-      wait_browser: "networkidle2",
+    params:{
-      block_resources: false,
+      wait: 3000,
      fireEngineOptions:{
        engine: "chrome-cdp"
      },
    headers: {
      "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
      "sec-fetch-site": "same-origin",
      "sec-fetch-mode": "cors",
      "sec-fetch-dest": "empty",
      referer: "https://www.google.com/",
      "accept-language": "en-US,en;q=0.9",
      "accept-encoding": "gzip, deflate, br",
      accept:
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    },
    cookies: {
      __cf_bm:
        "mC1On8P2GWT3A5UeSYH6z_MP94xcTAdZ5jfNi9IT2U0-1714327136-1.0.1.1-ILAP5pSX_Oo9PPo2iHEYCYX.p9a0yRBNLr58GHyrzYNDJ537xYpG50MXxUYVdfrD.h3FV5O7oMlRKGA0scbxaQ",
    },
  },
  "support.greenpay.me":{
@ -232,4 +219,28 @@ export const urlSpecificParams = {
      }
    },
  },
  "amazon.com":{
    defaultScraper: "fire-engine",
    params:{
      fireEngineOptions:{
        engine: "chrome-cdp",
      },
    },
  },
  "digikey.com":{
    defaultScraper: "fire-engine",
    params:{
      fireEngineOptions:{
        engine: "tlsclient",
      },
    },
  },
  "zoopla.co.uk":{
    defaultScraper: "fire-engine",
    params:{
      fireEngineOptions:{
        engine: "chrome-cdp",
      },
    },
  }
 };
--- a/apps/api/src/scraper/WebScraper/utils/docxProcessor.ts
+++ b/apps/api/src/scraper/WebScraper/utils/docxProcessor.ts
@ -4,15 +4,36 @@ import { createWriteStream } from "node:fs";
 import path from "path";
 import os from "os";
 import mammoth from "mammoth";
 import { Logger } from "../../../lib/logger";
 export async function fetchAndProcessDocx(url: string): Promise<{ content: string; pageStatusCode: number; pageError: string }> {
-  const { tempFilePath, pageStatusCode, pageError } = await downloadDocx(url);
+  let tempFilePath = '';
-  const content = await processDocxToText(tempFilePath);
+  let pageStatusCode = 200;
  let pageError = '';
  let content = '';
  try {
    const downloadResult = await downloadDocx(url);
    tempFilePath = downloadResult.tempFilePath;
    pageStatusCode = downloadResult.pageStatusCode;
    pageError = downloadResult.pageError;
    content = await processDocxToText(tempFilePath);
  } catch (error) {
    Logger.error(`Failed to fetch and process DOCX: ${error.message}`);
    pageStatusCode = 500;
    pageError = error.message;
    content = '';
  } finally {
    if (tempFilePath) {
      fs.unlinkSync(tempFilePath); // Clean up the temporary file
    }
  }
  return { content, pageStatusCode, pageError };
 }
 async function downloadDocx(url: string): Promise<{ tempFilePath: string; pageStatusCode: number; pageError: string }> {
  try {
    const response = await axios({
      url,
      method: "GET",
@ -26,16 +47,33 @@ async function downloadDocx(url: string): Promise<{ tempFilePath: string; pageSt
    return new Promise((resolve, reject) => {
      writer.on("finish", () => resolve({ tempFilePath, pageStatusCode: response.status, pageError: response.statusText != "OK" ? response.statusText : undefined }));
-    writer.on("error", reject);
+      writer.on("error", () => {
        Logger.error('Failed to write DOCX file to disk');
        reject(new Error('Failed to write DOCX file to disk'));
      });
    });
  } catch (error) {
    Logger.error(`Failed to download DOCX: ${error.message}`);
    return { tempFilePath: "", pageStatusCode: 500, pageError: error.message };
  }
 }
 export async function processDocxToText(filePath: string): Promise<string> {
  try {
    const content = await extractTextFromDocx(filePath);
    return content;
  } catch (error) {
    Logger.error(`Failed to process DOCX to text: ${error.message}`);
    return "";
  }
 }
 async function extractTextFromDocx(filePath: string): Promise<string> {
  try {
    const result = await mammoth.extractRawText({ path: filePath });
    return result.value;
  } catch (error) {
    Logger.error(`Failed to extract text from DOCX: ${error.message}`);
    return "";
  }
 }
--- a/apps/api/src/scraper/WebScraper/utils/metadata.ts
+++ b/apps/api/src/scraper/WebScraper/utils/metadata.ts
@ -75,9 +75,7 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata {
    description = soup('meta[name="description"]').attr("content") || null;
    // Assuming the language is part of the URL as per the regex pattern
-    const pattern = /([a-zA-Z]+-[A-Z]{2})/;
+    language = soup('html').attr('lang') || null;
    const match = pattern.exec(url);
    language = match ? match[1] : null;
    keywords = soup('meta[name="keywords"]').attr("content") || null;
    robots = soup('meta[name="robots"]').attr("content") || null;
--- a/apps/api/src/scraper/WebScraper/utils/pdfProcessor.ts
+++ b/apps/api/src/scraper/WebScraper/utils/pdfProcessor.ts
@ -76,7 +76,6 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
      let attempt = 0;
      const maxAttempts = 10; // Maximum number of attempts
      let resultAvailable = false;
      while (attempt < maxAttempts && !resultAvailable) {
        try {
          resultResponse = await axios.get(resultUrl, { headers, timeout: (axiosTimeout * 2) });
@ -90,13 +89,22 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
        } catch (error) {
          Logger.debug("Error fetching result w/ LlamaIndex");
          attempt++;
          if (attempt >= maxAttempts) {
            Logger.error("Max attempts reached, unable to fetch result.");
            break; // Exit the loop if max attempts are reached
          }
          await new Promise((resolve) => setTimeout(resolve, 500)); // Wait for 0.5 seconds before retrying
          // You may want to handle specific errors differently
        }
      }
      if (!resultAvailable) {
        try {
          content = await processPdf(filePath);
        } catch (error) {
          Logger.error(`Failed to process PDF: ${error}`);
          content = "";
        }
      }
      content = resultResponse.data[resultType];
    } catch (error) {
@ -104,15 +112,29 @@ export async function processPdfToText(filePath: string, parsePDF: boolean): Pro
      content = await processPdf(filePath);
    }
  } else if (parsePDF) {
    try {
      content = await processPdf(filePath);
    } catch (error) {
      Logger.error(`Failed to process PDF: ${error}`);
      content = "";
    }
  } else {
    try {
      content = fs.readFileSync(filePath, "utf-8");
    } catch (error) {
      Logger.error(`Failed to read PDF file: ${error}`);
      content = "";
    }
  }
  return content;
 }
 async function processPdf(file: string) {
  try {
    const fileContent = fs.readFileSync(file);
    const data = await pdf(fileContent);
    return data.text;
  } catch (error) {
    throw error;
  }
 }
--- a/apps/api/src/scraper/WebScraper/utils/utils.ts
+++ b/apps/api/src/scraper/WebScraper/utils/utils.ts
@ -41,10 +41,10 @@ export function extractLinks(html: string, baseUrl: string): string[] {
        links.push(href);
      } else if (href.startsWith('/')) {
        // Relative URL starting with '/', append to origin
-        links.push(`${origin}${href}`);
+        links.push(new URL(href, baseUrl).href);
      } else if (!href.startsWith('#') && !href.startsWith('mailto:')) {
        // Relative URL not starting with '/', append to base URL
-        links.push(`${baseUrl}/${href}`);
+        links.push(new URL(href, baseUrl).href);
      } else if (href.startsWith('mailto:')) {
        // mailto: links, add as is
        links.push(href);
--- a/apps/api/src/search/fireEngine.ts
+++ b/apps/api/src/search/fireEngine.ts
@ -0,0 +1,44 @@
 import axios from "axios";
 import dotenv from "dotenv";
 import { SearchResult } from "../../src/lib/entities";
 dotenv.config();
 export async function fireEngineMap(q: string, options: {
    tbs?: string;
    filter?: string;
    lang?: string;
    country?: string;
    location?: string;
    numResults: number;
    page?: number;
 }): Promise<SearchResult[]> {
  let data = JSON.stringify({
    query: q,
    lang: options.lang,
    country: options.country,
    location: options.location,
    tbs: options.tbs,
    numResults: options.numResults,
    page: options.page ?? 1,
  });
  if (!process.env.FIRE_ENGINE_BETA_URL) {
    return [];
  }
  let config = {
    method: "POST",
    url: `${process.env.FIRE_ENGINE_BETA_URL}/search`,
    headers: {
      "Content-Type": "application/json",
    },
    data: data,
  };
  const response = await axios(config);
  if (response && response) {
    return response.data
  } else {
    return [];
  }
 }
--- a/apps/api/src/search/googlesearch.ts
+++ b/apps/api/src/search/googlesearch.ts
@ -52,7 +52,7 @@ async function _req(term: string, results: number, lang: string, country: string
-export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", country = "us", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
+export async function googleSearch(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", country = "us", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
    let proxies = null;
    if (proxy) {
        if (proxy.startsWith("https")) {
--- a/apps/api/src/search/index.ts
+++ b/apps/api/src/search/index.ts
@ -1,11 +1,9 @@
 import { Logger } from "../../src/lib/logger";
 import { SearchResult } from "../../src/lib/entities";
-import { google_search } from "./googlesearch";
+import { googleSearch } from "./googlesearch";
 import { fireEngineMap } from "./fireEngine";
 import { serper_search } from "./serper";
 export async function search({
  query,
  advanced = false,
@ -30,12 +28,20 @@ export async function search({
  proxy?: string;
  sleep_interval?: number;
  timeout?: number;
-}) : Promise<SearchResult[]> {
+}): Promise<SearchResult[]> {
  try {
-    if (process.env.SERPER_API_KEY ) {
+    
-      return await serper_search(query, {num_results, tbs, filter, lang, country, location});
+    if (process.env.SERPER_API_KEY) {
      return await serper_search(query, {
        num_results,
        tbs,
        filter,
        lang,
        country,
        location,
      });
    }
-    return await google_search(
+    return await googleSearch(
      query,
      advanced,
      num_results,
@ -49,7 +55,6 @@ export async function search({
    );
  } catch (error) {
    Logger.error(`Error in search function: ${error}`);
-    return []
+    return [];
  }
  // if process.env.SERPER_API_KEY is set, use serper
 }
--- a/apps/api/src/services/alerts/index.ts
+++ b/apps/api/src/services/alerts/index.ts
@ -1,5 +1,5 @@
 import { Logger } from "../../../src/lib/logger";
-import { getWebScraperQueue } from "../queue-service";
+import { getScrapeQueue } from "../queue-service";
 import { sendSlackWebhook } from "./slack";
 export async function checkAlerts() {
@ -13,8 +13,8 @@ export async function checkAlerts() {
      Logger.info("Initializing alerts");
      const checkActiveJobs = async () => {
        try {
-          const webScraperQueue = getWebScraperQueue();
+          const scrapeQueue = getScrapeQueue();
-          const activeJobs = await webScraperQueue.getActiveCount();
+          const activeJobs = await scrapeQueue.getActiveCount();
          if (activeJobs > Number(process.env.ALERT_NUM_ACTIVE_JOBS)) {
            Logger.warn(
              `Alert: Number of active jobs is over ${process.env.ALERT_NUM_ACTIVE_JOBS}. Current active jobs: ${activeJobs}.`
@ -34,8 +34,8 @@ export async function checkAlerts() {
      };
      const checkWaitingQueue = async () => {
-        const webScraperQueue = getWebScraperQueue();
+        const scrapeQueue = getScrapeQueue();
-        const waitingJobs = await webScraperQueue.getWaitingCount();
+        const waitingJobs = await scrapeQueue.getWaitingCount();
        if (waitingJobs > Number(process.env.ALERT_NUM_WAITING_JOBS)) {
          Logger.warn(
@ -49,7 +49,7 @@ export async function checkAlerts() {
      };
      const checkAll = async () => {
-        await checkActiveJobs();
+        // await checkActiveJobs();
        await checkWaitingQueue();
      };
--- a/apps/api/src/services/billing/credit_billing.ts
+++ b/apps/api/src/services/billing/credit_billing.ts
@ -3,9 +3,13 @@ import { withAuth } from "../../lib/withAuth";
 import { sendNotification } from "../notification/email_notification";
 import { supabase_service } from "../supabase";
 import { Logger } from "../../lib/logger";
 import { getValue, setValue } from "../redis";
 import { redlock } from "../redlock";
 const FREE_CREDITS = 500;
 export async function billTeam(team_id: string, credits: number) {
  return withAuth(supaBillTeam)(team_id, credits);
 }
@ -164,10 +168,11 @@ export async function supaBillTeam(team_id: string, credits: number) {
 export async function checkTeamCredits(team_id: string, credits: number) {
  return withAuth(supaCheckTeamCredits)(team_id, credits);
 }
 // if team has enough credits for the operation, return true, else return false
 export async function supaCheckTeamCredits(team_id: string, credits: number) {
  if (team_id === "preview") {
-    return { success: true, message: "Preview team, no credits used" };
+    return { success: true, message: "Preview team, no credits used", remainingCredits: Infinity };
  }
  // Retrieve the team's active subscription and check for available coupons concurrently
@ -198,7 +203,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
  if (subscriptionError || !subscription) {
    // If there is no active subscription but there are available coupons
    if (couponCredits >= credits) {
-      return { success: true, message: "Sufficient credits available" };
+      return { success: true, message: "Sufficient credits available", remainingCredits: couponCredits };
    }
    const { data: creditUsages, error: creditUsageError } =
@ -248,13 +253,26 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
      return {
        success: false,
        message: "Insufficient credits, please upgrade!",
        remainingCredits: FREE_CREDITS - totalCreditsUsed
      };
    }
-    return { success: true, message: "Sufficient credits available" };
+    return { success: true, message: "Sufficient credits available", remainingCredits: FREE_CREDITS - totalCreditsUsed };
  }
  let totalCreditsUsed = 0;
  const cacheKey = `credit_usage_${subscription.id}_${subscription.current_period_start}_${subscription.current_period_end}_lc`;
  const redLockKey = `lock_${cacheKey}`;
  const lockTTL = 10000; // 10 seconds
  try {
    const lock = await redlock.acquire([redLockKey], lockTTL);
    try {
      const cachedCreditUsage = await getValue(cacheKey);
      if (cachedCreditUsage) {
        totalCreditsUsed = parseInt(cachedCreditUsage);
      } else {
        const { data: creditUsages, error: creditUsageError } =
          await supabase_service.rpc("get_credit_usage_2", {
            sub_id: subscription.id,
@ -268,9 +286,15 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
        if (creditUsages && creditUsages.length > 0) {
          totalCreditsUsed = creditUsages[0].total_credits_used;
          await setValue(cacheKey, totalCreditsUsed.toString(), 1800); // Cache for 30 minutes
          // Logger.info(`Cache set for credit usage: ${totalCreditsUsed}`);
        }
      }
    } finally {
      await lock.release();
    }
  } catch (error) {
-    Logger.error(`Error calculating credit usage: ${error}`);
+    Logger.error(`Error acquiring lock or calculating credit usage: ${error}`);
  }
  // Adjust total credits used by subtracting coupon value
@ -299,7 +323,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
      subscription.current_period_start,
      subscription.current_period_end
    );
-    return { success: false, message: "Insufficient credits, please upgrade!" };
+    return { success: false, message: "Insufficient credits, please upgrade!", remainingCredits: creditLimit - adjustedCreditsUsed };
  } else if (creditUsagePercentage >= 0.8) {
    // Send email notification for approaching credit limit
    await sendNotification(
@ -310,7 +334,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) {
    );
  }
-  return { success: true, message: "Sufficient credits available" };
+  return { success: true, message: "Sufficient credits available", remainingCredits: creditLimit - adjustedCreditsUsed };
 }
 // Count the total credits used by a team within the current billing period and return the remaining credits.
--- a/apps/api/src/services/logging/log_job.ts
+++ b/apps/api/src/services/logging/log_job.ts
@ -40,10 +40,11 @@ export async function logJob(job: FirecrawlJob) {
          extractor_options: job.extractor_options,
          num_tokens: job.num_tokens,
          retry: !!job.retry,
          crawl_id: job.crawl_id,
        },
      ]);
-    if (process.env.POSTHOG_API_KEY) {
+    if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
      let phLog = {
        distinctId: "from-api", //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
        ...(job.team_id !== "preview" && {
--- a/apps/api/src/services/logging/scrape_log.ts
+++ b/apps/api/src/services/logging/scrape_log.ts
@ -44,9 +44,9 @@ export async function logScrape(
    ]);
    if (error) {
-      Logger.error(`Error logging proxy:\n${error}`);
+      Logger.error(`Error logging proxy:\n${JSON.stringify(error)}`);
    }
  } catch (error) {
-    Logger.error(`Error logging proxy:\n${error}`);
+    Logger.error(`Error logging proxy:\n${JSON.stringify(error)}`);
  }
 }
--- a/apps/api/src/services/queue-jobs.ts
+++ b/apps/api/src/services/queue-jobs.ts
@ -1,28 +1,15 @@
 import { Job, Queue } from "bullmq";
-import {
+import { getScrapeQueue } from "./queue-service";
  getScrapeQueue,
  getWebScraperQueue,
 } from "./queue-service";
 import { v4 as uuidv4 } from "uuid";
 import { WebScraperOptions } from "../types";
 export async function addWebScraperJob(
  webScraperOptions: WebScraperOptions,
  options: any = {},
  jobId: string = uuidv4(),
 ): Promise<Job> {
  return await getWebScraperQueue().add(jobId, webScraperOptions, {
    ...options,
    jobId,
  });
 }
 export async function addScrapeJob(
  webScraperOptions: WebScraperOptions,
  options: any = {},
  jobId: string = uuidv4(),
 ): Promise<Job> {
  return await getScrapeQueue().add(jobId, webScraperOptions, {
    priority: webScraperOptions.crawl_id ? 20 : 10,
    ...options,
    jobId,
  });
--- a/apps/api/src/services/queue-service.ts
+++ b/apps/api/src/services/queue-service.ts
@ -2,38 +2,13 @@ import { Queue } from "bullmq";
 import { Logger } from "../lib/logger";
 import IORedis from "ioredis";
 let webScraperQueue: Queue;
 let scrapeQueue: Queue;
 export const redisConnection = new IORedis(process.env.REDIS_URL, {
  maxRetriesPerRequest: null,
 });
 export const webScraperQueueName = "{crawlQueue}";
 export const scrapeQueueName = "{scrapeQueue}";
 export function getWebScraperQueue() {
  if (!webScraperQueue) {
    webScraperQueue = new Queue(
      webScraperQueueName,
      {
        connection: redisConnection,
      }
      //   {
      //   settings: {
      //     lockDuration: 1 * 60 * 1000, // 1 minute in milliseconds,
      //     lockRenewTime: 15 * 1000, // 15 seconds in milliseconds
      //     stalledInterval: 30 * 1000,
      //     maxStalledCount: 10,
      //   },
      //   defaultJobOptions:{
      //     attempts: 5
      //   }
      // }
    );
    Logger.info("Web scraper queue created");
  }
  return webScraperQueue;
 }
 export function getScrapeQueue() {
  if (!scrapeQueue) {
@ -63,4 +38,3 @@ export function getScrapeQueue() {
 import { QueueEvents } from 'bullmq';
 export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection });
 export const webScraperQueueEvents = new QueueEvents(webScraperQueueName, { connection: redisConnection });
--- a/apps/api/src/services/queue-worker.ts
+++ b/apps/api/src/services/queue-worker.ts
@ -1,23 +1,24 @@
 import "dotenv/config";
 import { CustomError } from "../lib/custom-error";
 import {
  getWebScraperQueue,
  getScrapeQueue,
  redisConnection,
  webScraperQueueName,
  scrapeQueueName,
 } from "./queue-service";
 import "dotenv/config";
 import { logtail } from "./logtail";
 import { startWebScraperPipeline } from "../main/runWebScraper";
 import { callWebhook } from "./webhook";
 import { logJob } from "./logging/log_job";
 import { initSDK } from "@hyperdx/node-opentelemetry";
-import { Job, QueueEvents, tryCatch } from "bullmq";
+import { Job } from "bullmq";
 import { Logger } from "../lib/logger";
 import { ScrapeEvents } from "../lib/scrape-events";
 import { Worker } from "bullmq";
 import systemMonitor from "./system-monitor";
 import { v4 as uuidv4 } from "uuid";
 import { addCrawlJob, addCrawlJobDone, crawlToCrawler, finishCrawl, getCrawl, getCrawlJobs, lockURL } from "../lib/crawl-redis";
 import { StoredCrawl } from "../lib/crawl-redis";
 import { addScrapeJob } from "./queue-jobs";
 import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
 if (process.env.ENV === "production") {
  initSDK({
@ -33,30 +34,29 @@ const workerStalledCheckInterval =
 const jobLockExtendInterval =
  Number(process.env.JOB_LOCK_EXTEND_INTERVAL) || 15000;
 const jobLockExtensionTime =
-  Number(process.env.JOB_LOCK_EXTENSION_TIME) || 15000;
+  Number(process.env.JOB_LOCK_EXTENSION_TIME) || 60000;
 const cantAcceptConnectionInterval =
  Number(process.env.CANT_ACCEPT_CONNECTION_INTERVAL) || 2000;
 const connectionMonitorInterval =
  Number(process.env.CONNECTION_MONITOR_INTERVAL) || 10;
 const gotJobInterval = Number(process.env.CONNECTION_MONITOR_INTERVAL) || 20;
 const wsq = getWebScraperQueue();
 const sq = getScrapeQueue();
 const processJobInternal = async (token: string, job: Job) => {
  const extendLockInterval = setInterval(async () => {
    Logger.info(`🐂 Worker extending lock on job ${job.id}`);
    await job.extendLock(token, jobLockExtensionTime);
  }, jobLockExtendInterval);
  try {
    const result = await processJob(job, token);
    const jobState = await job.getState();
    if(jobState !== "completed" && jobState !== "failed"){
    try{
-        await job.moveToCompleted(result.docs, token, false); //3rd arg fetchNext
+      if (job.data.crawl_id && process.env.USE_DB_AUTHENTICATION === "true") {
-      }catch(e){
+        await job.moveToCompleted(null, token, false);
-        // console.log("Job already completed, error:", e);
+      } else {
        await job.moveToCompleted(result.docs, token, false);
      }
    }catch(e){
    }
  } catch (error) {
    console.log("Job failed, error:", error);
@ -110,11 +110,10 @@ const workerFun = async (queueName: string, processJobInternal: (token: string,
  }
 };
 workerFun(webScraperQueueName, processJobInternal);
 workerFun(scrapeQueueName, processJobInternal);
 async function processJob(job: Job, token: string) {
-  Logger.debug(`🐂 Worker taking job ${job.id}`);
+  Logger.info(`🐂 Worker taking job ${job.id}`);
  try {
    job.updateProgress({
@ -131,18 +130,16 @@ async function processJob(job: Job, token: string) {
    const end = Date.now();
    const timeTakenInSeconds = (end - start) / 1000;
-    const isCancelled = await (await getWebScraperQueue().client).exists("cancelled:" + job.id);
+    const rawHtml = docs[0].rawHtml;
-    if (isCancelled) {
+    if (job.data.crawl_id && (!job.data.pageOptions || !job.data.pageOptions.includeRawHtml)) {
-      await job.discard();
+      delete docs[0].rawHtml;
      await job.moveToFailed(Error("Job cancelled by user"), job.token);
      await job.discard();
    }
    const data = {
      success,
      result: {
-        links: isCancelled ? [] : docs.map((doc) => {
+        links: docs.map((doc) => {
          return {
            content: doc,
            source: doc?.metadata?.sourceURL ?? doc?.url ?? "",
@ -150,20 +147,21 @@ async function processJob(job: Job, token: string) {
        }),
      },
      project_id: job.data.project_id,
-      error: isCancelled ? "Job cancelled by user" : message /* etc... */,
+      error: message /* etc... */,
-      docs: isCancelled ? [] : docs,
+      docs,
    };
-    if (job.data.mode === "crawl" && !isCancelled) {
+    if (job.data.mode === "crawl") {
-      await callWebhook(job.data.team_id, job.id as string, data);
+      await callWebhook(job.data.team_id, job.id as string, data, job.data.webhook);
    }
    if (job.data.crawl_id) {
      await logJob({
        job_id: job.id as string,
-      success: success && !isCancelled,
+        success: success,
-      message: isCancelled ? "Job cancelled by user" : message,
+        message: message,
-      num_docs: isCancelled ? 0 : docs.length,
+        num_docs: docs.length,
-      docs: isCancelled ? [] : docs,
+        docs: docs,
        time_taken: timeTakenInSeconds,
        team_id: job.data.team_id,
        mode: job.data.mode,
@ -171,15 +169,110 @@ async function processJob(job: Job, token: string) {
        crawlerOptions: job.data.crawlerOptions,
        pageOptions: job.data.pageOptions,
        origin: job.data.origin,
        crawl_id: job.data.crawl_id,
      });
-    Logger.debug(`🐂 Job done ${job.id}`);
+
      await addCrawlJobDone(job.data.crawl_id, job.id);
      const sc = await getCrawl(job.data.crawl_id) as StoredCrawl;
      if (!job.data.sitemapped) {
        if (!sc.cancelled) {
          const crawler = crawlToCrawler(job.data.crawl_id, sc);
          const links = crawler.filterLinks(
            crawler.extractLinksFromHTML(rawHtml ?? "", sc.originUrl),
            Infinity,
            sc.crawlerOptions?.maxDepth ?? 10
          )
          for (const link of links) {
            if (await lockURL(job.data.crawl_id, sc, link)) {
              const newJob = await addScrapeJob({
                url: link,
                mode: "single_urls",
                crawlerOptions: sc.crawlerOptions,
                team_id: sc.team_id,
                pageOptions: sc.pageOptions,
                origin: job.data.origin,
                crawl_id: job.data.crawl_id,
              });
              await addCrawlJob(job.data.crawl_id, newJob.id);
            }
          }
        }
      }
      if (await finishCrawl(job.data.crawl_id)) {
        const jobIDs = await getCrawlJobs(job.data.crawl_id);
        const jobs = (await Promise.all(jobIDs.map(async x => {
          if (x === job.id) {
            return {
              async getState() {
                return "completed"
              },
              timestamp: Date.now(),
              returnvalue: docs,
            }
          }
          const j = await getScrapeQueue().getJob(x);
          if (process.env.USE_DB_AUTHENTICATION === "true") {
            const supabaseData = await supabaseGetJobById(j.id);
            if (supabaseData) {
              j.returnvalue = supabaseData.docs;
            }
          }
          return j;
        }))).sort((a, b) => a.timestamp - b.timestamp);
        const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
        const jobStatus = sc.cancelled || jobStatuses.some(x => x === "failed") ? "failed" : "completed";
        const fullDocs = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
        await logJob({
          job_id: job.data.crawl_id,
          success: jobStatus === "completed",
          message: sc.cancelled ? "Cancelled" : message,
          num_docs: fullDocs.length,
          docs: [],
          time_taken: (Date.now() - sc.createdAt) / 1000,
          team_id: job.data.team_id,
          mode: "crawl",
          url: sc.originUrl,
          crawlerOptions: sc.crawlerOptions,
          pageOptions: sc.pageOptions,
          origin: job.data.origin,
        });
        const data = {
          success: jobStatus !== "failed",
          result: {
            links: fullDocs.map((doc) => {
              return {
                content: doc,
                source: doc?.metadata?.sourceURL ?? doc?.url ?? "",
              };
            }),
          },
          project_id: job.data.project_id,
          error: message /* etc... */,
          docs: fullDocs,
        };
        await callWebhook(job.data.team_id, job.data.crawl_id, data);
      }
    }
    Logger.info(`🐂 Job done ${job.id}`);
    return data;
  } catch (error) {
    Logger.error(`🐂 Job errored ${job.id} - ${error}`);
    if (await getWebScraperQueue().isPaused()) {
      Logger.debug("🐂Queue is paused, ignoring");
      return;
    }
    if (error instanceof CustomError) {
      // Here we handle the error, then save the failed job
@ -192,6 +285,9 @@ async function processJob(job: Job, token: string) {
      });
    }
    Logger.error(error);
    if (error.stack) {
      Logger.error(error.stack);
    }
    logtail.error("Overall error ingesting", {
      job_id: job.id,
@ -205,9 +301,12 @@ async function processJob(job: Job, token: string) {
      error:
        "Something went wrong... Contact help@mendable.ai or try again." /* etc... */,
    };
-    if (job.data.mode === "crawl") {
+    
-      await callWebhook(job.data.team_id, job.id as string, data);
+    if (job.data.mode === "crawl" || job.data.crawl_id) {
      await callWebhook(job.data.team_id, job.data.crawl_id ?? job.id as string, data);
    }
    if (job.data.crawl_id) {
      await logJob({
        job_id: job.id as string,
        success: false,
@ -219,12 +318,34 @@ async function processJob(job: Job, token: string) {
        docs: [],
        time_taken: 0,
        team_id: job.data.team_id,
-      mode: "crawl",
+        mode: job.data.mode,
        url: job.data.url,
        crawlerOptions: job.data.crawlerOptions,
        pageOptions: job.data.pageOptions,
        origin: job.data.origin,
        crawl_id: job.data.crawl_id,
      });
      const sc = await getCrawl(job.data.crawl_id);
      await logJob({
        job_id: job.data.crawl_id,
        success: false,
        message:
          typeof error === "string"
            ? error
            : error.message ?? "Something went wrong... Contact help@mendable.ai",
        num_docs: 0,
        docs: [],
        time_taken: 0,
        team_id: job.data.team_id,
        mode: "crawl",
        url: sc ? sc.originUrl : job.data.url,
        crawlerOptions: sc ? sc.crawlerOptions : job.data.crawlerOptions,
        pageOptions: sc ? sc.pageOptions : job.data.pageOptions,
        origin: job.data.origin,
      });
    }
    // done(null, data);
    return data;
  }
--- a/apps/api/src/services/rate-limiter.ts
+++ b/apps/api/src/services/rate-limiter.ts
@ -14,18 +14,20 @@ const RATE_LIMITS = {
    standardNew: 10,
    standardnew: 10,
    growth: 50,
    growthdouble: 50,
  },
  scrape: {
    default: 20,
    free: 5,
    starter: 20,
-    standard: 50,
+    standard: 100,
    standardOld: 40,
    scale: 500,
    hobby: 10,
-    standardNew: 50,
+    standardNew: 100,
-    standardnew: 50,
+    standardnew: 100,
-    growth: 500,
+    growth: 1000,
    growthdouble: 1000,
  },
  search: {
    default: 20,
@ -38,6 +40,20 @@ const RATE_LIMITS = {
    standardNew: 50,
    standardnew: 50,
    growth: 500,
    growthdouble: 500,
  },
  map:{
    default: 20,
    free: 5,
    starter: 20,
    standard: 40,
    standardOld: 40,
    scale: 500,
    hobby: 10,
    standardNew: 50,
    standardnew: 50,
    growth: 500,
    growthdouble: 500,
  },
  preview: {
    free: 5,
--- a/apps/api/src/services/redlock.ts
+++ b/apps/api/src/services/redlock.ts
@ -0,0 +1,29 @@
 import Redlock from "redlock";
 import Client from "ioredis";
 export const redlock = new Redlock(
  // You should have one client for each independent redis node
  // or cluster.
  [new Client(process.env.REDIS_RATE_LIMIT_URL)],
  {
    // The expected clock drift; for more details see:
    // http://redis.io/topics/distlock
    driftFactor: 0.01, // multiplied by lock ttl to determine drift time
    // The max number of times Redlock will attempt to lock a resource
    // before erroring.
    retryCount: 5,
    // the time in ms between attempts
    retryDelay: 100, // time in ms
    // the max time in ms randomly added to retries
    // to improve performance under high contention
    // see https://www.awsarchitectureblog.com/2015/03/backoff.html
    retryJitter: 200, // time in ms
    // The minimum remaining time on a lock before an extension is automatically
    // attempted with the `using` API.
    automaticExtensionThreshold: 500, // time in ms
  }
 );
--- a/apps/api/src/services/supabase.ts
+++ b/apps/api/src/services/supabase.ts
@ -36,17 +36,9 @@ export const supabase_service: SupabaseClient = new Proxy(
  new SupabaseService(),
  {
    get: function (target, prop, receiver) {
      if (process.env.USE_DB_AUTHENTICATION === "false") {
        Logger.debug(
          "Attempted to access Supabase client when it's not configured."
        );
      }
      const client = target.getClient();
      // If the Supabase client is not initialized, intercept property access to provide meaningful error feedback.
      if (client === null) {
        Logger.error(
          "Attempted to access Supabase client when it's not configured."
        );
        return () => {
          throw new Error("Supabase client is not configured.");
        };
--- a/apps/api/src/services/webhook.ts
+++ b/apps/api/src/services/webhook.ts
@ -1,15 +1,15 @@
 import { Logger } from "../../src/lib/logger";
 import { supabase_service } from "./supabase";
-export const callWebhook = async (teamId: string, jobId: string,data: any) => {
+export const callWebhook = async (teamId: string, jobId: string, data: any, specified?: string) => {
  try {
    const selfHostedUrl = process.env.SELF_HOSTED_WEBHOOK_URL?.replace("{{JOB_ID}}", jobId);
    const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
-    let webhookUrl = selfHostedUrl;
+    let webhookUrl = specified ?? selfHostedUrl;
-    // Only fetch the webhook URL from the database if the self-hosted webhook URL is not set
+    // Only fetch the webhook URL from the database if the self-hosted webhook URL and specified webhook are not set
    // and the USE_DB_AUTHENTICATION environment variable is set to true
-    if (!selfHostedUrl && useDbAuthentication) {
+    if (!webhookUrl && useDbAuthentication) {
      const { data: webhooksData, error } = await supabase_service
        .from("webhooks")
        .select("url")
--- a/apps/api/src/types.ts
+++ b/apps/api/src/types.ts
@ -28,6 +28,9 @@ export interface WebScraperOptions {
  extractorOptions?: any;
  team_id: string;
  origin?: string;
  crawl_id?: string;
  sitemapped?: boolean;
  webhook?: string;
 }
 export interface RunWebScraperParams {
@ -41,6 +44,7 @@ export interface RunWebScraperParams {
  onError: (error: Error) => void;
  team_id: string;
  bull_job_id: string;
  priority?: number;
 }
 export interface RunWebScraperResult {
@ -65,6 +69,7 @@ export interface FirecrawlJob {
  extractor_options?: ExtractorOptions,
  num_tokens?: number,
  retry?: boolean,
  crawl_id?: string;
 }
 export interface FirecrawlScrapeResponse {
@ -101,6 +106,7 @@ export enum RateLimiterMode {
  Scrape = "scrape",
  Preview = "preview",
  Search = "search",
  Map = "map",
 }
@ -110,6 +116,7 @@ export interface AuthResponse {
  error?: string;
  status?: number;
  plan?: string;
  api_key?: string;
 }
--- a/apps/api/tsconfig.json
+++ b/apps/api/tsconfig.json
@ -8,10 +8,6 @@
    "sourceMap": true,
    "outDir": "./dist/src",
    "moduleResolution": "node",
    "baseUrl": ".",
    "paths": {
      "*": ["node_modules/*", "src/types/*"],
    }
  },
  "include": ["src/","src/**/*", "services/db/supabase.ts", "utils/utils.ts", "services/db/supabaseEmbeddings.ts", "utils/EventEmmitter.ts", "src/services/queue-service.ts"]
 }
--- a/apps/go-sdk/examples/.gitignore
+++ b/apps/go-sdk/examples/.gitignore
@ -0,0 +1,25 @@
 # If you prefer the allow list template instead of the deny list, see community template:
 # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
 #
 # Binaries for programs and plugins
 *.exe
 *.exe~
 *.dll
 *.so
 *.dylib
 # Test binary, built with `go test -c`
 *.test
 # Output of the go coverage tool, specifically when used with LiteIDE
 *.out
 # Dependency directories (remove the comment below to include it)
 # vendor/
 # Go workspace file
 go.work
 go.work.sum
 # env file
 .env
--- a/apps/go-sdk/examples/LICENSE
+++ b/apps/go-sdk/examples/LICENSE
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2024 Mendable
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/apps/go-sdk/examples/example.go
+++ b/apps/go-sdk/examples/example.go
@ -0,0 +1,87 @@
 package main
 import (
 	"encoding/json"
 	"fmt"
 	"log"
 	"github.com/google/uuid"
 	"github.com/mendableai/firecrawl-go"
 )
 func main() {
 	app, err := firecrawl.NewFirecrawlApp("fc-YOUR_API_KEY", "https://api.firecrawl.dev")
 	if err != nil {
 		log.Fatalf("Failed to create FirecrawlApp: %v", err)
 	}
 	// Scrape a website
 	scrapeResult, err := app.ScrapeURL("firecrawl.dev", nil)
 	if err != nil {
 		log.Fatalf("Failed to scrape URL: %v", err)
 	}
 	fmt.Println(scrapeResult.Markdown)
 	// Crawl a website
 	idempotencyKey := uuid.New().String() // optional idempotency key
 	crawlParams := map[string]any{
 		"crawlerOptions": map[string]any{
 			"excludes": []string{"blog/*"},
 		},
 	}
 	crawlResult, err := app.CrawlURL("mendable.ai", crawlParams, true, 2, idempotencyKey)
 	if err != nil {
 		log.Fatalf("Failed to crawl URL: %v", err)
 	}
 	jsonCrawlResult, err := json.MarshalIndent(crawlResult, "", "  ")
 	if err != nil {
 		log.Fatalf("Failed to marshal crawl result: %v", err)
 	}
 	fmt.Println(string(jsonCrawlResult))
 	// LLM Extraction using JSON schema
 	jsonSchema := map[string]any{
 		"type": "object",
 		"properties": map[string]any{
 			"top": map[string]any{
 				"type": "array",
 				"items": map[string]any{
 					"type": "object",
 					"properties": map[string]any{
 						"title":       map[string]string{"type": "string"},
 						"points":      map[string]string{"type": "number"},
 						"by":          map[string]string{"type": "string"},
 						"commentsURL": map[string]string{"type": "string"},
 					},
 					"required": []string{"title", "points", "by", "commentsURL"},
 				},
 				"minItems":    5,
 				"maxItems":    5,
 				"description": "Top 5 stories on Hacker News",
 			},
 		},
 		"required": []string{"top"},
 	}
 	llmExtractionParams := map[string]any{
 		"extractorOptions": firecrawl.ExtractorOptions{
 			ExtractionSchema: jsonSchema,
 			Mode:             "llm-extraction",
 		},
 		"pageOptions": map[string]any{
 			"onlyMainContent": true,
 		},
 	}
 	llmExtractionResult, err := app.ScrapeURL("https://news.ycombinator.com", llmExtractionParams)
 	if err != nil {
 		log.Fatalf("Failed to perform LLM extraction: %v", err)
 	}
 	// Pretty print the LLM extraction result
 	jsonResult, err := json.MarshalIndent(llmExtractionResult.LLMExtraction, "", "  ")
 	if err != nil {
 		log.Fatalf("Failed to marshal LLM extraction result: %v", err)
 	}
 	fmt.Println(string(jsonResult))
 }
--- a/apps/go-sdk/examples/go.mod
+++ b/apps/go-sdk/examples/go.mod
@ -0,0 +1,9 @@
 module github.com/mendableai/firecrawl-go-examples
 go 1.22.5
 replace github.com/mendableai/firecrawl => ../
 require github.com/google/uuid v1.6.0
 require github.com/mendableai/firecrawl-go v0.0.0-20240813205613-366e8d8dcf46 // indirect
--- a/apps/go-sdk/examples/go.sum
+++ b/apps/go-sdk/examples/go.sum
@ -0,0 +1,14 @@
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
 github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
 github.com/mendableai/firecrawl-go v0.0.0-20240813205613-366e8d8dcf46 h1:461um7fbSQYj2E3ETl8GINuRg5MTY3BdjMnogwUIhBs=
 github.com/mendableai/firecrawl-go v0.0.0-20240813205613-366e8d8dcf46/go.mod h1:mTGbJ37fy43aaqonp/tdpzCH516jHFw/XVvfFi4QXHo=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/apps/go-sdk/firecrawl/.env.example
+++ b/apps/go-sdk/firecrawl/.env.example
@ -0,0 +1,2 @@
 API_URL=http://localhost:3002
 TEST_API_KEY=fc-YOUR-API-KEY
--- a/apps/go-sdk/firecrawl/.gitignore
+++ b/apps/go-sdk/firecrawl/.gitignore
@ -0,0 +1,2 @@
 .env
 vendor
--- a/apps/go-sdk/firecrawl/LICENSE
+++ b/apps/go-sdk/firecrawl/LICENSE
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2024 Sideguide Technologies Inc.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/apps/go-sdk/firecrawl/README.md
+++ b/apps/go-sdk/firecrawl/README.md
@ -0,0 +1,189 @@
 # Firecrawl Go SDK
 The Firecrawl Go SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API.
 ## Installation
 To install the Firecrawl Go SDK, you can
 ```bash
 go get github.com/mendableai/firecrawl
 ```
 ## Usage
 1. Get an API key from [firecrawl.dev](https://firecrawl.dev)
 2. Set the API key as an environment variable named `FIRECRAWL_API_KEY` or pass it as a parameter to the `FirecrawlApp` class.
 Here's an example of how to use the SDK with error handling:
 ```go
 import (
 	"fmt"
 	"log"
 	"github.com/mendableai/firecrawl/firecrawl"
 )
 func main() {
 	// Initialize the FirecrawlApp with your API key
 	app, err := firecrawl.NewFirecrawlApp("YOUR_API_KEY")
 	if err != nil {
 		log.Fatalf("Failed to initialize FirecrawlApp: %v", err)
 	}
 	// Scrape a single URL
 	url := "https://mendable.ai"
 	scrapedData, err := app.ScrapeURL(url, nil)
 	if err != nil {
 		log.Fatalf("Error occurred while scraping: %v", err)
 	}
 	fmt.Println(scrapedData)
 	// Crawl a website
 	crawlUrl := "https://mendable.ai"
 	params := map[string]any{
 		"pageOptions": map[string]any{
 			"onlyMainContent": true,
 		},
 	}
 	crawlResult, err := app.CrawlURL(crawlUrl, params)
 	if err != nil {
 		log.Fatalf("Error occurred while crawling: %v", err)
 	}
 	fmt.Println(crawlResult)
 }
 ```
 ### Scraping a URL
 To scrape a single URL with error handling, use the `ScrapeURL` method. It takes the URL as a parameter and returns the scraped data as a dictionary.
 ```go
 url := "https://mendable.ai"
 scrapedData, err := app.ScrapeURL(url, nil)
 if err != nil {
 	log.Fatalf("Failed to scrape URL: %v", err)
 }
 fmt.Println(scrapedData)
 ```
 ### Extracting structured data from a URL
 With LLM extraction, you can easily extract structured data from any URL. Here is how you to use it:
 ```go
 jsonSchema := map[string]any{
 	"type": "object",
 	"properties": map[string]any{
 		"top": map[string]any{
 			"type": "array",
 			"items": map[string]any{
 				"type": "object",
 				"properties": map[string]any{
 					"title":       map[string]string{"type": "string"},
 					"points":      map[string]string{"type": "number"},
 					"by":          map[string]string{"type": "string"},
 					"commentsURL": map[string]string{"type": "string"},
 				},
 				"required": []string{"title", "points", "by", "commentsURL"},
 			},
 			"minItems":    5,
 			"maxItems":    5,
 			"description": "Top 5 stories on Hacker News",
 		},
 	},
 	"required": []string{"top"},
 }
 llmExtractionParams := map[string]any{
 	"extractorOptions": firecrawl.ExtractorOptions{
 		ExtractionSchema: jsonSchema,
 	},
 }
 scrapeResult, err := app.ScrapeURL("https://news.ycombinator.com", llmExtractionParams)
 if err != nil {
 	log.Fatalf("Failed to perform LLM extraction: %v", err)
 }
 fmt.Println(scrapeResult)
 ```
 ### Search for a query
 To search the web, get the most relevant results, scrap each page and return the markdown, use the `Search` method. The method takes the query as a parameter and returns the search results.
 ```go
 query := "what is mendable?"
 searchResult, err := app.Search(query)
 if err != nil {
 	log.Fatalf("Failed to search: %v", err)
 }
 fmt.Println(searchResult)
 ```
 ### Crawling a Website
 To crawl a website, use the `CrawlUrl` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
 ```go
 crawlParams := map[string]any{
 	"crawlerOptions": map[string]any{
 		"excludes": []string{"blog/*"},
 		"includes": []string{}, // leave empty for all pages
 		"limit": 1000,
 	},
 	"pageOptions": map[string]any{
 		"onlyMainContent": true,
 	},
 }
 crawlResult, err := app.CrawlURL("mendable.ai", crawlParams, true, 2, idempotencyKey)
 if err != nil {
 	log.Fatalf("Failed to crawl URL: %v", err)
 }
 fmt.Println(crawlResult)
 ```
 ### Checking Crawl Status
 To check the status of a crawl job, use the `CheckCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job.
 ```go
 status, err := app.CheckCrawlStatus(jobId)
 if err != nil {
 	log.Fatalf("Failed to check crawl status: %v", err)
 }
 fmt.Println(status)
 ```
 ### Canceling a Crawl Job
 To cancel a crawl job, use the `CancelCrawlJob` method. It takes the job ID as a parameter and returns the cancellation status of the crawl job.
 ```go
 canceled, err := app.CancelCrawlJob(jobId)
 if err != nil {
 	log.Fatalf("Failed to cancel crawl job: %v", err)
 }
 fmt.Println(canceled)
 ```
 ## Error Handling
 The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
 ## Contributing
 Contributions to the Firecrawl Go SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
 ## License
 The Firecrawl Go SDK is licensed under the MIT License. This means you are free to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the SDK, subject to the following conditions:
 - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 Please note that while this SDK is MIT licensed, it is part of a larger project which may be under different licensing terms. Always refer to the license information in the root directory of the main project for overall licensing details.
--- a/apps/go-sdk/firecrawl/firecrawl.go
+++ b/apps/go-sdk/firecrawl/firecrawl.go
@ -0,0 +1,584 @@
 // Package firecrawl provides a client for interacting with the Firecrawl API.
 package firecrawl
 import (
 	"bytes"
 	"encoding/json"
 	"fmt"
 	"io"
 	"math"
 	"net/http"
 	"os"
 	"time"
 )
 // FirecrawlDocumentMetadata represents metadata for a Firecrawl document
 type FirecrawlDocumentMetadata struct {
 	Title             string   `json:"title,omitempty"`
 	Description       string   `json:"description,omitempty"`
 	Language          string   `json:"language,omitempty"`
 	Keywords          string   `json:"keywords,omitempty"`
 	Robots            string   `json:"robots,omitempty"`
 	OGTitle           string   `json:"ogTitle,omitempty"`
 	OGDescription     string   `json:"ogDescription,omitempty"`
 	OGURL             string   `json:"ogUrl,omitempty"`
 	OGImage           string   `json:"ogImage,omitempty"`
 	OGAudio           string   `json:"ogAudio,omitempty"`
 	OGDeterminer      string   `json:"ogDeterminer,omitempty"`
 	OGLocale          string   `json:"ogLocale,omitempty"`
 	OGLocaleAlternate []string `json:"ogLocaleAlternate,omitempty"`
 	OGSiteName        string   `json:"ogSiteName,omitempty"`
 	OGVideo           string   `json:"ogVideo,omitempty"`
 	DCTermsCreated    string   `json:"dctermsCreated,omitempty"`
 	DCDateCreated     string   `json:"dcDateCreated,omitempty"`
 	DCDate            string   `json:"dcDate,omitempty"`
 	DCTermsType       string   `json:"dctermsType,omitempty"`
 	DCType            string   `json:"dcType,omitempty"`
 	DCTermsAudience   string   `json:"dctermsAudience,omitempty"`
 	DCTermsSubject    string   `json:"dctermsSubject,omitempty"`
 	DCSubject         string   `json:"dcSubject,omitempty"`
 	DCDescription     string   `json:"dcDescription,omitempty"`
 	DCTermsKeywords   string   `json:"dctermsKeywords,omitempty"`
 	ModifiedTime      string   `json:"modifiedTime,omitempty"`
 	PublishedTime     string   `json:"publishedTime,omitempty"`
 	ArticleTag        string   `json:"articleTag,omitempty"`
 	ArticleSection    string   `json:"articleSection,omitempty"`
 	SourceURL         string   `json:"sourceURL,omitempty"`
 	PageStatusCode    int      `json:"pageStatusCode,omitempty"`
 	PageError         string   `json:"pageError,omitempty"`
 }
 // FirecrawlDocument represents a document in Firecrawl
 type FirecrawlDocument struct {
 	ID            string                     `json:"id,omitempty"`
 	URL           string                     `json:"url,omitempty"`
 	Content       string                     `json:"content"`
 	Markdown      string                     `json:"markdown,omitempty"`
 	HTML          string                     `json:"html,omitempty"`
 	LLMExtraction map[string]any             `json:"llm_extraction,omitempty"`
 	CreatedAt     *time.Time                 `json:"createdAt,omitempty"`
 	UpdatedAt     *time.Time                 `json:"updatedAt,omitempty"`
 	Type          string                     `json:"type,omitempty"`
 	Metadata      *FirecrawlDocumentMetadata `json:"metadata,omitempty"`
 	ChildrenLinks []string                   `json:"childrenLinks,omitempty"`
 	Provider      string                     `json:"provider,omitempty"`
 	Warning       string                     `json:"warning,omitempty"`
 	Index         int                        `json:"index,omitempty"`
 }
 // ExtractorOptions represents options for extraction.
 type ExtractorOptions struct {
 	Mode             string `json:"mode,omitempty"`
 	ExtractionPrompt string `json:"extractionPrompt,omitempty"`
 	ExtractionSchema any    `json:"extractionSchema,omitempty"`
 }
 // ScrapeResponse represents the response for scraping operations
 type ScrapeResponse struct {
 	Success bool               `json:"success"`
 	Data    *FirecrawlDocument `json:"data,omitempty"`
 }
 // SearchResponse represents the response for searching operations
 type SearchResponse struct {
 	Success bool                 `json:"success"`
 	Data    []*FirecrawlDocument `json:"data,omitempty"`
 }
 // CrawlResponse represents the response for crawling operations
 type CrawlResponse struct {
 	Success bool                 `json:"success"`
 	JobID   string               `json:"jobId,omitempty"`
 	Data    []*FirecrawlDocument `json:"data,omitempty"`
 }
 // JobStatusResponse represents the response for checking crawl job status
 type JobStatusResponse struct {
 	Success     bool                 `json:"success"`
 	Status      string               `json:"status"`
 	Current     int                  `json:"current,omitempty"`
 	CurrentURL  string               `json:"current_url,omitempty"`
 	CurrentStep string               `json:"current_step,omitempty"`
 	Total       int                  `json:"total,omitempty"`
 	JobID       string               `json:"jobId,omitempty"`
 	Data        []*FirecrawlDocument `json:"data,omitempty"`
 	PartialData []*FirecrawlDocument `json:"partial_data,omitempty"`
 }
 // CancelCrawlJobResponse represents the response for canceling a crawl job
 type CancelCrawlJobResponse struct {
 	Success bool   `json:"success"`
 	Status  string `json:"status"`
 }
 // requestOptions represents options for making requests.
 type requestOptions struct {
 	retries int
 	backoff int
 }
 // requestOption is a functional option type for requestOptions.
 type requestOption func(*requestOptions)
 // newRequestOptions creates a new requestOptions instance with the provided options.
 //
 // Parameters:
 //   - opts: Optional request options.
 //
 // Returns:
 //   - *requestOptions: A new instance of requestOptions with the provided options.
 func newRequestOptions(opts ...requestOption) *requestOptions {
 	options := &requestOptions{retries: 1}
 	for _, opt := range opts {
 		opt(options)
 	}
 	return options
 }
 // withRetries sets the number of retries for a request.
 //
 // Parameters:
 //   - retries: The number of retries to be performed.
 //
 // Returns:
 //   - requestOption: A functional option that sets the number of retries for a request.
 func withRetries(retries int) requestOption {
 	return func(opts *requestOptions) {
 		opts.retries = retries
 	}
 }
 // withBackoff sets the backoff interval for a request.
 //
 // Parameters:
 //   - backoff: The backoff interval (in milliseconds) to be used for retries.
 //
 // Returns:
 //   - requestOption: A functional option that sets the backoff interval for a request.
 func withBackoff(backoff int) requestOption {
 	return func(opts *requestOptions) {
 		opts.backoff = backoff
 	}
 }
 // FirecrawlApp represents a client for the Firecrawl API.
 type FirecrawlApp struct {
 	APIKey string
 	APIURL string
 	Client *http.Client
 }
 // NewFirecrawlApp creates a new instance of FirecrawlApp with the provided API key and API URL.
 // If the API key or API URL is not provided, it attempts to retrieve them from environment variables.
 // If the API key is still not found, it returns an error.
 //
 // Parameters:
 //   - apiKey: The API key for authenticating with the Firecrawl API. If empty, it will be retrieved from the FIRECRAWL_API_KEY environment variable.
 //   - apiURL: The base URL for the Firecrawl API. If empty, it will be retrieved from the FIRECRAWL_API_URL environment variable, defaulting to "https://api.firecrawl.dev".
 //
 // Returns:
 //   - *FirecrawlApp: A new instance of FirecrawlApp configured with the provided or retrieved API key and API URL.
 //   - error: An error if the API key is not provided or retrieved.
 func NewFirecrawlApp(apiKey, apiURL string) (*FirecrawlApp, error) {
 	if apiKey == "" {
 		apiKey = os.Getenv("FIRECRAWL_API_KEY")
 		if apiKey == "" {
 			return nil, fmt.Errorf("no API key provided")
 		}
 	}
 	if apiURL == "" {
 		apiURL = os.Getenv("FIRECRAWL_API_URL")
 		if apiURL == "" {
 			apiURL = "https://api.firecrawl.dev"
 		}
 	}
 	client := &http.Client{
 		Timeout: 60 * time.Second,
 	}
 	return &FirecrawlApp{
 		APIKey: apiKey,
 		APIURL: apiURL,
 		Client: client,
 	}, nil
 }
 // ScrapeURL scrapes the content of the specified URL using the Firecrawl API.
 //
 // Parameters:
 //   - url: The URL to be scraped.
 //   - params: Optional parameters for the scrape request, including extractor options for LLM extraction.
 //
 // Returns:
 //   - *FirecrawlDocument: The scraped document data.
 //   - error: An error if the scrape request fails.
 func (app *FirecrawlApp) ScrapeURL(url string, params map[string]any) (*FirecrawlDocument, error) {
 	headers := app.prepareHeaders("")
 	scrapeBody := map[string]any{"url": url}
 	if params != nil {
 		if extractorOptions, ok := params["extractorOptions"].(ExtractorOptions); ok {
 			if schema, ok := extractorOptions.ExtractionSchema.(interface{ schema() any }); ok {
 				extractorOptions.ExtractionSchema = schema.schema()
 			}
 			if extractorOptions.Mode == "" {
 				extractorOptions.Mode = "llm-extraction"
 			}
 			scrapeBody["extractorOptions"] = extractorOptions
 		}
 		for key, value := range params {
 			if key != "extractorOptions" {
 				scrapeBody[key] = value
 			}
 		}
 	}
 	resp, err := app.makeRequest(
 		http.MethodPost,
 		fmt.Sprintf("%s/v0/scrape", app.APIURL),
 		scrapeBody,
 		headers,
 		"scrape URL",
 	)
 	if err != nil {
 		return nil, err
 	}
 	var scrapeResponse ScrapeResponse
 	err = json.Unmarshal(resp, &scrapeResponse)
 	if err != nil {
 		return nil, err
 	}
 	if scrapeResponse.Success {
 		return scrapeResponse.Data, nil
 	}
 	return nil, fmt.Errorf("failed to scrape URL")
 }
 // Search performs a search query using the Firecrawl API and returns the search results.
 //
 // Parameters:
 //   - query: The search query string.
 //   - params: Optional parameters for the search request.
 //
 // Returns:
 //   - []*FirecrawlDocument: A slice of FirecrawlDocument containing the search results.
 //   - error: An error if the search request fails.
 func (app *FirecrawlApp) Search(query string, params map[string]any) ([]*FirecrawlDocument, error) {
 	headers := app.prepareHeaders("")
 	searchBody := map[string]any{"query": query}
 	for k, v := range params {
 		searchBody[k] = v
 	}
 	resp, err := app.makeRequest(
 		http.MethodPost,
 		fmt.Sprintf("%s/v0/search", app.APIURL),
 		searchBody,
 		headers,
 		"search",
 	)
 	if err != nil {
 		return nil, err
 	}
 	var searchResponse SearchResponse
 	err = json.Unmarshal(resp, &searchResponse)
 	if err != nil {
 		return nil, err
 	}
 	if searchResponse.Success {
 		return searchResponse.Data, nil
 	}
 	return nil, fmt.Errorf("failed to search")
 }
 // CrawlURL starts a crawl job for the specified URL using the Firecrawl API.
 //
 // Parameters:
 //   - url: The URL to crawl.
 //   - params: Optional parameters for the crawl request.
 //   - waitUntilDone: If true, the method will wait until the crawl job is completed before returning.
 //   - pollInterval: The interval (in seconds) at which to poll the job status if waitUntilDone is true.
 //   - idempotencyKey: An optional idempotency key to ensure the request is idempotent.
 //
 // Returns:
 //   - any: The job ID if waitUntilDone is false, or the crawl result if waitUntilDone is true.
 //   - error: An error if the crawl request fails.
 func (app *FirecrawlApp) CrawlURL(url string, params map[string]any, waitUntilDone bool, pollInterval int, idempotencyKey string) (any, error) {
 	headers := app.prepareHeaders(idempotencyKey)
 	crawlBody := map[string]any{"url": url}
 	for k, v := range params {
 		crawlBody[k] = v
 	}
 	resp, err := app.makeRequest(
 		http.MethodPost,
 		fmt.Sprintf("%s/v0/crawl", app.APIURL),
 		crawlBody,
 		headers,
 		"start crawl job",
 		withRetries(3),
 		withBackoff(500),
 	)
 	if err != nil {
 		return nil, err
 	}
 	var crawlResponse CrawlResponse
 	err = json.Unmarshal(resp, &crawlResponse)
 	if err != nil {
 		return nil, err
 	}
 	if waitUntilDone {
 		return app.monitorJobStatus(crawlResponse.JobID, headers, pollInterval)
 	}
 	if crawlResponse.JobID == "" {
 		return nil, fmt.Errorf("failed to get job ID")
 	}
 	return crawlResponse.JobID, nil
 }
 // CheckCrawlStatus checks the status of a crawl job using the Firecrawl API.
 //
 // Parameters:
 //   - jobID: The ID of the crawl job to check.
 //
 // Returns:
 //   - *JobStatusResponse: The status of the crawl job.
 //   - error: An error if the crawl status check request fails.
 func (app *FirecrawlApp) CheckCrawlStatus(jobID string) (*JobStatusResponse, error) {
 	headers := app.prepareHeaders("")
 	resp, err := app.makeRequest(
 		http.MethodGet,
 		fmt.Sprintf("%s/v0/crawl/status/%s", app.APIURL, jobID),
 		nil,
 		headers,
 		"check crawl status",
 		withRetries(3),
 		withBackoff(500),
 	)
 	if err != nil {
 		return nil, err
 	}
 	var jobStatusResponse JobStatusResponse
 	err = json.Unmarshal(resp, &jobStatusResponse)
 	if err != nil {
 		return nil, err
 	}
 	return &jobStatusResponse, nil
 }
 // CancelCrawlJob cancels a crawl job using the Firecrawl API.
 //
 // Parameters:
 //   - jobID: The ID of the crawl job to cancel.
 //
 // Returns:
 //   - string: The status of the crawl job after cancellation.
 //   - error: An error if the crawl job cancellation request fails.
 func (app *FirecrawlApp) CancelCrawlJob(jobID string) (string, error) {
 	headers := app.prepareHeaders("")
 	resp, err := app.makeRequest(
 		http.MethodDelete,
 		fmt.Sprintf("%s/v0/crawl/cancel/%s", app.APIURL, jobID),
 		nil,
 		headers,
 		"cancel crawl job",
 	)
 	if err != nil {
 		return "", err
 	}
 	var cancelCrawlJobResponse CancelCrawlJobResponse
 	err = json.Unmarshal(resp, &cancelCrawlJobResponse)
 	if err != nil {
 		return "", err
 	}
 	return cancelCrawlJobResponse.Status, nil
 }
 // prepareHeaders prepares the headers for an HTTP request.
 //
 // Parameters:
 //   - idempotencyKey: A string representing the idempotency key to be included in the headers.
 //     If the idempotency key is an empty string, it will not be included in the headers.
 //
 // Returns:
 //   - map[string]string: A map containing the headers for the HTTP request.
 func (app *FirecrawlApp) prepareHeaders(idempotencyKey string) map[string]string {
 	headers := map[string]string{
 		"Content-Type":  "application/json",
 		"Authorization": fmt.Sprintf("Bearer %s", app.APIKey),
 	}
 	if idempotencyKey != "" {
 		headers["x-idempotency-key"] = idempotencyKey
 	}
 	return headers
 }
 // makeRequest makes a request to the specified URL with the provided method, data, headers, and options.
 //
 // Parameters:
 //   - method: The HTTP method to use for the request (e.g., "GET", "POST", "DELETE").
 //   - url: The URL to send the request to.
 //   - data: The data to be sent in the request body.
 //   - headers: The headers to be included in the request.
 //   - action: A string describing the action being performed.
 //   - opts: Optional request options.
 //
 // Returns:
 //   - []byte: The response body from the request.
 //   - error: An error if the request fails.
 func (app *FirecrawlApp) makeRequest(method, url string, data map[string]any, headers map[string]string, action string, opts ...requestOption) ([]byte, error) {
 	var body []byte
 	var err error
 	if data != nil {
 		body, err = json.Marshal(data)
 		if err != nil {
 			return nil, err
 		}
 	}
 	req, err := http.NewRequest(method, url, bytes.NewBuffer(body))
 	if err != nil {
 		return nil, err
 	}
 	for key, value := range headers {
 		req.Header.Set(key, value)
 	}
 	var resp *http.Response
 	options := newRequestOptions(opts...)
 	for i := 0; i < options.retries; i++ {
 		resp, err = app.Client.Do(req)
 		if err != nil {
 			return nil, err
 		}
 		defer resp.Body.Close()
 		if resp.StatusCode != 502 {
 			break
 		}
 		time.Sleep(time.Duration(math.Pow(2, float64(i))) * time.Duration(options.backoff) * time.Millisecond)
 	}
 	respBody, err := io.ReadAll(resp.Body)
 	if err != nil {
 		return nil, err
 	}
 	statusCode := resp.StatusCode
 	if statusCode != 200 {
 		return nil, app.handleError(statusCode, respBody, action)
 	}
 	return respBody, nil
 }
 // monitorJobStatus monitors the status of a crawl job using the Firecrawl API.
 //
 // Parameters:
 //   - jobID: The ID of the crawl job to monitor.
 //   - headers: The headers to be included in the request.
 //   - pollInterval: The interval (in seconds) at which to poll the job status.
 //
 // Returns:
 //   - []*FirecrawlDocument: The crawl result if the job is completed.
 //   - error: An error if the crawl status check request fails.
 func (app *FirecrawlApp) monitorJobStatus(jobID string, headers map[string]string, pollInterval int) ([]*FirecrawlDocument, error) {
 	attempts := 0
 	for {
 		resp, err := app.makeRequest(
 			http.MethodGet,
 			fmt.Sprintf("%s/v0/crawl/status/%s", app.APIURL, jobID),
 			nil,
 			headers,
 			"check crawl status",
 			withRetries(3),
 			withBackoff(500),
 		)
 		if err != nil {
 			return nil, err
 		}
 		var statusData JobStatusResponse
 		err = json.Unmarshal(resp, &statusData)
 		if err != nil {
 			return nil, err
 		}
 		status := statusData.Status
 		if status == "" {
 			return nil, fmt.Errorf("invalid status in response")
 		}
 		if status == "completed" {
 			if statusData.Data != nil {
 				return statusData.Data, nil
 			}
 			attempts++
 			if attempts > 3 {
 				return nil, fmt.Errorf("crawl job completed but no data was returned")
 			}
 		} else if status == "active" || status == "paused" || status == "pending" || status == "queued" || status == "waiting" {
 			pollInterval = max(pollInterval, 2)
 			time.Sleep(time.Duration(pollInterval) * time.Second)
 		} else {
 			return nil, fmt.Errorf("crawl job failed or was stopped. Status: %s", status)
 		}
 	}
 }
 // handleError handles errors returned by the Firecrawl API.
 //
 // Parameters:
 //   - resp: The HTTP response object.
 //   - body: The response body from the HTTP response.
 //   - action: A string describing the action being performed.
 //
 // Returns:
 //   - error: An error describing the failure reason.
 func (app *FirecrawlApp) handleError(statusCode int, body []byte, action string) error {
 	var errorData map[string]any
 	err := json.Unmarshal(body, &errorData)
 	if err != nil {
 		return fmt.Errorf("failed to parse error response: %v", err)
 	}
 	errorMessage, _ := errorData["error"].(string)
 	if errorMessage == "" {
 		errorMessage = "No additional error details provided."
 	}
 	var message string
 	switch statusCode {
 	case 402:
 		message = fmt.Sprintf("Payment Required: Failed to %s. %s", action, errorMessage)
 	case 408:
 		message = fmt.Sprintf("Request Timeout: Failed to %s as the request timed out. %s", action, errorMessage)
 	case 409:
 		message = fmt.Sprintf("Conflict: Failed to %s due to a conflict. %s", action, errorMessage)
 	case 500:
 		message = fmt.Sprintf("Internal Server Error: Failed to %s. %s", action, errorMessage)
 	default:
 		message = fmt.Sprintf("Unexpected error during %s: Status code %d. %s", action, statusCode, errorMessage)
 	}
 	return fmt.Errorf(message)
 }
--- a/apps/go-sdk/firecrawl/firecrawl_test.go
+++ b/apps/go-sdk/firecrawl/firecrawl_test.go
@ -0,0 +1,292 @@
 package firecrawl
 import (
 	"log"
 	"os"
 	"testing"
 	"time"
 	"github.com/google/uuid"
 	"github.com/joho/godotenv"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 var API_URL string
 var TEST_API_KEY string
 func init() {
 	err := godotenv.Load("../.env")
 	if err != nil {
 		log.Fatalf("Error loading .env file: %v", err)
 	}
 	API_URL = os.Getenv("API_URL")
 	TEST_API_KEY = os.Getenv("TEST_API_KEY")
 }
 func TestNoAPIKey(t *testing.T) {
 	_, err := NewFirecrawlApp("", API_URL)
 	assert.Error(t, err)
 	assert.Contains(t, err.Error(), "no API key provided")
 }
 func TestScrapeURLInvalidAPIKey(t *testing.T) {
 	app, err := NewFirecrawlApp("invalid_api_key", API_URL)
 	require.NoError(t, err)
 	_, err = app.ScrapeURL("https://firecrawl.dev", nil)
 	assert.Error(t, err)
 	assert.Contains(t, err.Error(), "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token")
 }
 func TestBlocklistedURL(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	_, err = app.ScrapeURL("https://facebook.com/fake-test", nil)
 	assert.Error(t, err)
 	assert.Contains(t, err.Error(), "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions.")
 }
 func TestSuccessfulResponseWithValidPreviewToken(t *testing.T) {
 	app, err := NewFirecrawlApp("this_is_just_a_preview_token", API_URL)
 	require.NoError(t, err)
 	response, err := app.ScrapeURL("https://roastmywebsite.ai", nil)
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	assert.Contains(t, response.Content, "_Roast_")
 }
 func TestScrapeURLE2E(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	response, err := app.ScrapeURL("https://roastmywebsite.ai", nil)
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	assert.Contains(t, response.Content, "_Roast_")
 	assert.NotEqual(t, response.Markdown, "")
 	assert.NotNil(t, response.Metadata)
 	assert.Equal(t, response.HTML, "")
 }
 func TestSuccessfulResponseWithValidAPIKeyAndIncludeHTML(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	params := map[string]any{
 		"pageOptions": map[string]any{
 			"includeHtml": true,
 		},
 	}
 	response, err := app.ScrapeURL("https://roastmywebsite.ai", params)
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	assert.Contains(t, response.Content, "_Roast_")
 	assert.Contains(t, response.Markdown, "_Roast_")
 	assert.Contains(t, response.HTML, "<h1")
 	assert.NotNil(t, response.Metadata)
 }
 func TestSuccessfulResponseForValidScrapeWithPDFFile(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	response, err := app.ScrapeURL("https://arxiv.org/pdf/astro-ph/9301001.pdf", nil)
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	assert.Contains(t, response.Content, "We present spectrophotometric observations of the Broad Line Radio Galaxy")
 	assert.NotNil(t, response.Metadata)
 }
 func TestSuccessfulResponseForValidScrapeWithPDFFileWithoutExplicitExtension(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	response, err := app.ScrapeURL("https://arxiv.org/pdf/astro-ph/9301001", nil)
 	time.Sleep(6 * time.Second) // wait for 6 seconds
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	assert.Contains(t, response.Content, "We present spectrophotometric observations of the Broad Line Radio Galaxy")
 	assert.NotNil(t, response.Metadata)
 }
 func TestCrawlURLInvalidAPIKey(t *testing.T) {
 	app, err := NewFirecrawlApp("invalid_api_key", API_URL)
 	require.NoError(t, err)
 	_, err = app.CrawlURL("https://firecrawl.dev", nil, false, 2, "")
 	assert.Error(t, err)
 	assert.Contains(t, err.Error(), "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token")
 }
 func TestShouldReturnErrorForBlocklistedURL(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	_, err = app.CrawlURL("https://twitter.com/fake-test", nil, false, 2, "")
 	assert.Error(t, err)
 	assert.Contains(t, err.Error(), "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions.")
 }
 func TestCrawlURLWaitForCompletionE2E(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	params := map[string]any{
 		"crawlerOptions": map[string]any{
 			"excludes": []string{"blog/*"},
 		},
 	}
 	response, err := app.CrawlURL("https://roastmywebsite.ai", params, true, 2, "")
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	data, ok := response.([]*FirecrawlDocument)
 	assert.True(t, ok)
 	assert.Greater(t, len(data), 0)
 	assert.Contains(t, data[0].Content, "_Roast_")
 }
 func TestCrawlURLWithIdempotencyKeyE2E(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	uniqueIdempotencyKey := uuid.New().String()
 	params := map[string]any{
 		"crawlerOptions": map[string]any{
 			"excludes": []string{"blog/*"},
 		},
 	}
 	response, err := app.CrawlURL("https://roastmywebsite.ai", params, true, 2, uniqueIdempotencyKey)
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	data, ok := response.([]*FirecrawlDocument)
 	assert.True(t, ok)
 	assert.Greater(t, len(data), 0)
 	assert.Contains(t, data[0].Content, "_Roast_")
 	_, err = app.CrawlURL("https://firecrawl.dev", params, true, 2, uniqueIdempotencyKey)
 	assert.Error(t, err)
 	assert.Contains(t, err.Error(), "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used")
 }
 func TestCheckCrawlStatusE2E(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	params := map[string]any{
 		"crawlerOptions": map[string]any{
 			"excludes": []string{"blog/*"},
 		},
 	}
 	response, err := app.CrawlURL("https://firecrawl.dev", params, false, 2, "")
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	jobID, ok := response.(string)
 	assert.True(t, ok)
 	assert.NotEqual(t, "", jobID)
 	time.Sleep(30 * time.Second) // wait for 30 seconds
 	statusResponse, err := app.CheckCrawlStatus(jobID)
 	require.NoError(t, err)
 	assert.NotNil(t, statusResponse)
 	assert.Equal(t, "completed", statusResponse.Status)
 	assert.Greater(t, len(statusResponse.Data), 0)
 }
 func TestSearchE2E(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	response, err := app.Search("test query", nil)
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	assert.Greater(t, len(response), 2)
 	assert.NotEqual(t, response[0].Content, "")
 }
 func TestSearchInvalidAPIKey(t *testing.T) {
 	app, err := NewFirecrawlApp("invalid_api_key", API_URL)
 	require.NoError(t, err)
 	_, err = app.Search("test query", nil)
 	assert.Error(t, err)
 	assert.Contains(t, err.Error(), "Unexpected error during search: Status code 401. Unauthorized: Invalid token")
 }
 func TestLLMExtraction(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	params := map[string]any{
 		"extractorOptions": ExtractorOptions{
 			Mode:             "llm-extraction",
 			ExtractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
 			ExtractionSchema: map[string]any{
 				"type": "object",
 				"properties": map[string]any{
 					"company_mission": map[string]string{"type": "string"},
 					"supports_sso":    map[string]string{"type": "boolean"},
 					"is_open_source":  map[string]string{"type": "boolean"},
 				},
 				"required": []string{"company_mission", "supports_sso", "is_open_source"},
 			},
 		},
 	}
 	response, err := app.ScrapeURL("https://mendable.ai", params)
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	assert.Contains(t, response.LLMExtraction, "company_mission")
 	assert.IsType(t, true, response.LLMExtraction["supports_sso"])
 	assert.IsType(t, true, response.LLMExtraction["is_open_source"])
 }
 func TestCancelCrawlJobInvalidAPIKey(t *testing.T) {
 	app, err := NewFirecrawlApp("invalid_api_key", API_URL)
 	require.NoError(t, err)
 	_, err = app.CancelCrawlJob("test query")
 	assert.Error(t, err)
 	assert.Contains(t, err.Error(), "Unexpected error during cancel crawl job: Status code 401. Unauthorized: Invalid token")
 }
 func TestCancelNonExistingCrawlJob(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	jobID := uuid.New().String()
 	_, err = app.CancelCrawlJob(jobID)
 	assert.Error(t, err)
 	assert.Contains(t, err.Error(), "Job not found")
 }
 func TestCancelCrawlJobE2E(t *testing.T) {
 	app, err := NewFirecrawlApp(TEST_API_KEY, API_URL)
 	require.NoError(t, err)
 	response, err := app.CrawlURL("https://firecrawl.dev", nil, false, 2, "")
 	require.NoError(t, err)
 	assert.NotNil(t, response)
 	jobID, ok := response.(string)
 	assert.True(t, ok)
 	assert.NotEqual(t, "", jobID)
 	status, err := app.CancelCrawlJob(jobID)
 	require.NoError(t, err)
 	assert.Equal(t, "cancelled", status)
 }
--- a/apps/go-sdk/firecrawl/go.mod
+++ b/apps/go-sdk/firecrawl/go.mod
@ -0,0 +1,15 @@
 module github.com/mendableai/firecrawl-go
 go 1.22.5
 require (
 	github.com/google/uuid v1.6.0
 	github.com/joho/godotenv v1.5.1
 	github.com/stretchr/testify v1.9.0
 )
 require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 )
--- a/apps/go-sdk/firecrawl/go.sum
+++ b/apps/go-sdk/firecrawl/go.sum
@ -0,0 +1,14 @@
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
 github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/apps/js-sdk/example.js
+++ b/apps/js-sdk/example.js
@ -1,16 +1,16 @@
-import { v4 as uuidv4 } from 'uuid';
+import FirecrawlApp from './firecrawl/src/index'; //'@mendable/firecrawl-js';
 import FirecrawlApp from '@mendable/firecrawl-js';
 import { z } from "zod";
 const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
 // Scrape a website:
 const scrapeResult = await app.scrapeUrl('firecrawl.dev');
-console.log(scrapeResult.data.content)
+
 if (scrapeResult.data) {
  console.log(scrapeResult.data.markdown)
 }
 // Crawl a website:
-const idempotencyKey = uuidv4(); // optional
+const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
 const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey);
 console.log(crawlResult)
 const jobId = await crawlResult['jobId'];
@ -19,67 +19,15 @@ console.log(jobId);
 let job;
 while (true) {
  job = await app.checkCrawlStatus(jobId);
-  if (job.status == 'completed') {
+  if (job.status === 'completed') {
    break;
  }
  await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
 }
-console.log(job.data[0].content);
+if (job.data) {
-
+  console.log(job.data[0].markdown);
 // Search for a query:
 const query = 'what is mendable?'
 const searchResult = await app.search(query)
 console.log(searchResult)
 // LLM Extraction:
 //  Define schema to extract contents into using zod schema
 const zodSchema = z.object({
  top: z
    .array(
      z.object({
        title: z.string(),
        points: z.number(),
        by: z.string(),
        commentsURL: z.string(),
      })
    )
    .length(5)
    .describe("Top 5 stories on Hacker News"),
 });
 let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
  extractorOptions: { extractionSchema: zodSchema },
 });
 console.log(llmExtractionResult.data.llm_extraction);
 // Define schema to extract contents into using json schema
 const jsonSchema = {
  "type": "object",
  "properties": {
    "top": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "title": {"type": "string"},
          "points": {"type": "number"},
          "by": {"type": "string"},
          "commentsURL": {"type": "string"}
        },
        "required": ["title", "points", "by", "commentsURL"]
      },
      "minItems": 5,
      "maxItems": 5,
      "description": "Top 5 stories on Hacker News"
    }
  },
  "required": ["top"]
 }
-llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
+const mapResult = await app.map('https://firecrawl.dev');
-  extractorOptions: { extractionSchema: jsonSchema },
+console.log(mapResult)
 });
 console.log(llmExtractionResult.data.llm_extraction);
--- a/apps/js-sdk/example.ts
+++ b/apps/js-sdk/example.ts
@ -1,5 +1,5 @@
-import FirecrawlApp, { JobStatusResponse } from './firecrawl/src/index' //'@mendable/firecrawl-js';
+import FirecrawlApp from './firecrawl/src/index' //'@mendable/firecrawl-js';
-import { z } from "zod";
+import { CrawlStatusResponse } from './firecrawl/src/index';
 const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
@ -7,7 +7,7 @@ const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
 const scrapeResult = await app.scrapeUrl('firecrawl.dev');
 if (scrapeResult.data) {
-  console.log(scrapeResult.data.content)
+  console.log(scrapeResult.data.markdown)
 }
 // Crawl a website:
@ -17,9 +17,9 @@ console.log(crawlResult)
 const jobId: string = await crawlResult['jobId'];
 console.log(jobId);
-let job: JobStatusResponse;
+let job: CrawlStatusResponse;
 while (true) {
-  job = await app.checkCrawlStatus(jobId);
+  job = await app.checkCrawlStatus(jobId) as CrawlStatusResponse;
  if (job.status === 'completed') {
    break;
  }
@ -27,66 +27,8 @@ while (true) {
 }
 if (job.data) {
-  console.log(job.data[0].content);
+  console.log(job.data[0].markdown);
 }
 // Search for a query:
 const query = 'what is mendable?'
 const searchResult = await app.search(query)
 // LLM Extraction:
 //  Define schema to extract contents into using zod schema
 const zodSchema = z.object({
  top: z
    .array(
      z.object({
        title: z.string(),
        points: z.number(),
        by: z.string(),
        commentsURL: z.string(),
      })
    )
    .length(5)
    .describe("Top 5 stories on Hacker News"),
 });
 let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
  extractorOptions: { extractionSchema: zodSchema },
 });
 if (llmExtractionResult.data) {
  console.log(llmExtractionResult.data.llm_extraction);
 }
 // Define schema to extract contents into using json schema
 const jsonSchema = {
  "type": "object",
  "properties": {
    "top": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "title": {"type": "string"},
          "points": {"type": "number"},
          "by": {"type": "string"},
          "commentsURL": {"type": "string"}
        },
        "required": ["title", "points", "by", "commentsURL"]
      },
      "minItems": 5,
      "maxItems": 5,
      "description": "Top 5 stories on Hacker News"
    }
  },
  "required": ["top"]
 }
 llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
  extractorOptions: { extractionSchema: jsonSchema },
 });
 if (llmExtractionResult.data) {
  console.log(llmExtractionResult.data.llm_extraction);
 }
 const mapResult = await app.map('https://firecrawl.dev');
 console.log(mapResult)
--- a/apps/js-sdk/exampleV0.js
+++ b/apps/js-sdk/exampleV0.js
@ -0,0 +1,85 @@
 import { v4 as uuidv4 } from 'uuid';
 import FirecrawlApp from '@mendable/firecrawl-js';
 import { z } from "zod";
 const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
 // Scrape a website:
 const scrapeResult = await app.scrapeUrl('firecrawl.dev');
 console.log(scrapeResult.data.content)
 // Crawl a website:
 const idempotencyKey = uuidv4(); // optional
 const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey);
 console.log(crawlResult)
 const jobId = await crawlResult['jobId'];
 console.log(jobId);
 let job;
 while (true) {
  job = await app.checkCrawlStatus(jobId);
  if (job.status == 'completed') {
    break;
  }
  await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
 }
 console.log(job.data[0].content);
 // Search for a query:
 const query = 'what is mendable?'
 const searchResult = await app.search(query)
 console.log(searchResult)
 // LLM Extraction:
 //  Define schema to extract contents into using zod schema
 const zodSchema = z.object({
  top: z
    .array(
      z.object({
        title: z.string(),
        points: z.number(),
        by: z.string(),
        commentsURL: z.string(),
      })
    )
    .length(5)
    .describe("Top 5 stories on Hacker News"),
 });
 let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
  extractorOptions: { extractionSchema: zodSchema },
 });
 console.log(llmExtractionResult.data.llm_extraction);
 // Define schema to extract contents into using json schema
 const jsonSchema = {
  "type": "object",
  "properties": {
    "top": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "title": {"type": "string"},
          "points": {"type": "number"},
          "by": {"type": "string"},
          "commentsURL": {"type": "string"}
        },
        "required": ["title", "points", "by", "commentsURL"]
      },
      "minItems": 5,
      "maxItems": 5,
      "description": "Top 5 stories on Hacker News"
    }
  },
  "required": ["top"]
 }
 llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
  extractorOptions: { extractionSchema: jsonSchema },
 });
 console.log(llmExtractionResult.data.llm_extraction);
--- a/apps/js-sdk/exampleV0.ts
+++ b/apps/js-sdk/exampleV0.ts
@ -0,0 +1,95 @@
 import FirecrawlApp, { ScrapeResponseV0, CrawlStatusResponseV0, SearchResponseV0 } from './firecrawl/src/index' //'@mendable/firecrawl-js';
 import { z } from "zod";
 const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY", version: "v0"});
 // Scrape a website:
 const scrapeResult = await app.scrapeUrl('firecrawl.dev') as ScrapeResponseV0;
 if (scrapeResult.data) {
  console.log(scrapeResult.data.content)
 }
 // Crawl a website:
 const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
 console.log(crawlResult)
 const jobId: string = await crawlResult['jobId'];
 console.log(jobId);
 let job: CrawlStatusResponseV0;
 while (true) {
  job = await app.checkCrawlStatus(jobId) as CrawlStatusResponseV0;
  if (job.status === 'completed') {
    break;
  }
  await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
 }
 if (job.data) {
  console.log(job.data[0].content);
 }
 // Search for a query:
 const query = 'what is mendable?'
 const searchResult = await app.search(query) as SearchResponseV0;
 if (searchResult.data) {
  console.log(searchResult.data[0].content)
 }
 // LLM Extraction:
 //  Define schema to extract contents into using zod schema
 const zodSchema = z.object({
  top: z
    .array(
      z.object({
        title: z.string(),
        points: z.number(),
        by: z.string(),
        commentsURL: z.string(),
      })
    )
    .length(5)
    .describe("Top 5 stories on Hacker News"),
 });
 let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
  extractorOptions: { extractionSchema: zodSchema },
 });
 if (llmExtractionResult.data) {
  console.log(llmExtractionResult.data[0].llm_extraction);
 }
 // Define schema to extract contents into using json schema
 const jsonSchema = {
  "type": "object",
  "properties": {
    "top": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "title": {"type": "string"},
          "points": {"type": "number"},
          "by": {"type": "string"},
          "commentsURL": {"type": "string"}
        },
        "required": ["title", "points", "by", "commentsURL"]
      },
      "minItems": 5,
      "maxItems": 5,
      "description": "Top 5 stories on Hacker News"
    }
  },
  "required": ["top"]
 }
 llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
  extractorOptions: { extractionSchema: jsonSchema },
 });
 if (llmExtractionResult.data) {
  console.log(llmExtractionResult.data[0].llm_extraction);
 }
--- a/apps/js-sdk/firecrawl/.gitignore
+++ b/apps/js-sdk/firecrawl/.gitignore
@ -128,3 +128,5 @@ dist
 .yarn/build-state.yml
 .yarn/install-state.gz
 .pnp.*
 build
--- a/Show More
+++ b/Show More
		`@ -0,0 +1,2 @@`
							`API_URL=http://localhost:3002`
							`TEST_API_KEY=fc-YOUR-API-KEY`