add zod, create middleware, update openapi declaration, add crawl logic

2025-08-12 06:28:59 +08:00 · 2024-08-15 23:30:33 +02:00 · 2024-08-15 23:30:33 +02:00 · 8b7569f8f3
commit 8b7569f8f3
parent 4165de1773
10 changed files with 1604 additions and 629 deletions
--- a/apps/api/openapi-v0.json
+++ b/apps/api/openapi-v0.json
@ -0,0 +1,924 @@
 {
  "openapi": "3.0.0",
  "info": {
    "title": "Firecrawl API",
    "version": "0.0.0",
    "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.",
    "contact": {
      "name": "Firecrawl Support",
      "url": "https://firecrawl.dev/support",
      "email": "support@firecrawl.dev"
    }
  },
  "servers": [
    {
      "url": "https://api.firecrawl.dev/v0"
    }
  ],
  "paths": {
    "/scrape": {
      "post": {
        "summary": "Scrape a single URL and optionally extract information using an LLM",
        "operationId": "scrapeAndExtractFromUrl",
        "tags": ["Scraping"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri",
                    "description": "The URL to scrape"
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "headers": {
                        "type": "object",
                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "replaceAllPathsWithAbsolutePaths": {
                        "type": "boolean",
                        "description": "Replace all relative paths with absolute paths for images and links",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "fullPageScreenshot": {
                        "type": "boolean",
                        "description": "Include a full page screenshot of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  },
                  "extractorOptions": {
                    "type": "object",
                    "description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
                    "default": {},
                    "properties": {
                      "mode": {
                        "type": "string",
                        "enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
                        "description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
                      },
                      "extractionPrompt": {
                        "type": "string",
                        "description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
                      },
                      "extractionSchema": {
                        "type": "object",
                        "additionalProperties": true,
                        "description": "The schema for the data to be extracted, required only for LLM extraction modes.",
                        "required": [
                          "company_mission",
                          "supports_sso",
                          "is_open_source"
                        ]
                      }
                    }
                  },
                  "timeout": {
                    "type": "integer",
                    "description": "Timeout in milliseconds for the request",
                    "default": 30000
                  }
                },
                "required": ["url"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ScrapeResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl": {
      "post": {
        "summary": "Crawl multiple URLs based on options",
        "operationId": "crawlUrls",
        "tags": ["Crawling"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri",
                    "description": "The base URL to start crawling from"
                  },
                  "crawlerOptions": {
                    "type": "object",
                    "properties": {
                      "includes": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "URL patterns to include"
                      },
                      "excludes": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "URL patterns to exclude"
                      },
                      "generateImgAltText": {
                        "type": "boolean",
                        "description": "Generate alt text for images using LLMs (must have a paid plan)",
                        "default": false
                      },
                      "returnOnlyUrls": {
                        "type": "boolean",
                        "description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.",
                        "default": false
                      },
                      "maxDepth": {
                        "type": "integer",
                        "description": "Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern."
                      },
                      "mode": {
                        "type": "string",
                        "enum": ["default", "fast"],
                        "description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
                        "default": "default"
                      },
                      "ignoreSitemap": {
                        "type": "boolean",
                        "description": "Ignore the website sitemap when crawling",
                        "default": false
                      },
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of pages to crawl",
                        "default": 10000
                      },
                      "allowBackwardCrawling": {
                        "type": "boolean",
                        "description": "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'",
                        "default": false
                      },
                      "allowExternalContentLinks": {
                        "type": "boolean",
                        "description": "Allows the crawler to follow links to external websites.",
                        "default": false
                      }
                    }
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "headers": {
                        "type": "object",
                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "replaceAllPathsWithAbsolutePaths": {
                        "type": "boolean",
                        "description": "Replace all relative paths with absolute paths for images and links",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "fullPageScreenshot": {
                        "type": "boolean",
                        "description": "Include a full page screenshot of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  }
                },
                "required": ["url"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CrawlResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/search": {
      "post": {
        "summary": "Search for a keyword in Google, returns top page results with markdown content for each page",
        "operationId": "searchGoogle",
        "tags": ["Search"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "query": {
                    "type": "string",
                    "format": "uri",
                    "description": "The query to search for"
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "fetchPageContent": {
                        "type": "boolean",
                        "description": "Fetch the content of each page. If false, defaults to a basic fast serp API.",
                        "default": true
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      }
                    }
                  },
                  "searchOptions": {
                    "type": "object",
                    "properties": {
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of results. Max is 20 during beta."
                      }
                    }
                  }
                },
                "required": ["query"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/SearchResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl/status/{jobId}": {
      "get": {
        "tags": ["Crawl"],
        "summary": "Get the status of a crawl job",
        "operationId": "getCrawlStatus",
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "parameters": [
          {
            "name": "jobId",
            "in": "path",
            "description": "ID of the crawl job",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": {
                      "type": "string",
                      "description": "Status of the job (completed, active, failed, paused)"
                    },
                    "current": {
                      "type": "integer",
                      "description": "Current page number"
                    },
                    "total": {
                      "type": "integer",
                      "description": "Total number of pages"
                    },
                    "data": {
                      "type": "array",
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
                      "description": "Data returned from the job (null when it is in progress)"
                    },
                    "partial_data": {
                      "type": "array",
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
                      "description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl/cancel/{jobId}": {
      "delete": {
        "tags": ["Crawl"],
        "summary": "Cancel a crawl job",
        "operationId": "cancelCrawlJob",
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "parameters": [
          {
            "name": "jobId",
            "in": "path",
            "description": "ID of the crawl job",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": {
                      "type": "string",
                      "description": "Returns cancelled."
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "bearerAuth": {
        "type": "http",
        "scheme": "bearer"
      }
    },
    "schemas": {
      "ScrapeResponse": {
        "type": "object",
        "properties": {
          "success": {
            "type": "boolean"
          },
          "data": {
            "type": "object",
            "properties": {
              "markdown": {
                "type": "string"
              },
              "content": {
                "type": "string"
              },
              "html": {
                "type": "string",
                "nullable": true,
                "description": "HTML version of the content on page if `includeHtml`  is true"
              },
              "rawHtml": {
                "type": "string",
                "nullable": true,
                "description": "Raw HTML content of the page if `includeRawHtml`  is true"
              },
              "metadata": {
                "type": "object",
                "properties": {
                  "title": {
                    "type": "string"
                  },
                  "description": {
                    "type": "string"
                  },
                  "language": {
                    "type": "string",
                    "nullable": true
                  },
                  "sourceURL": {
                    "type": "string",
                    "format": "uri"
                  },
                  "<any other metadata> ": {
                    "type": "string"
                  },
                  "pageStatusCode": {
                    "type": "integer",
                    "description": "The status code of the page"
                  },
                  "pageError": {
                    "type": "string",
                    "nullable": true,
                    "description": "The error message of the page"
                  }
                }
              },
              "llm_extraction": {
                "type": "object",
                "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
                "nullable": true
              },
              "warning": {
                "type": "string",
                "nullable": true,
                "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
              }
            }
          }
        }
      },
      "CrawlStatusResponseObj": {
        "type": "object",
        "properties": {
          "markdown": {
            "type": "string"
          },
          "content": {
            "type": "string"
          },
          "html": {
            "type": "string",
            "nullable": true,
            "description": "HTML version of the content on page if `includeHtml`  is true"
          },
          "rawHtml": {
            "type": "string",
            "nullable": true,
            "description": "Raw HTML content of the page if `includeRawHtml`  is true"
          },
          "index": {
            "type": "integer",
            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
          },
          "metadata": {
            "type": "object",
            "properties": {
              "title": {
                "type": "string"
              },
              "description": {
                "type": "string"
              },
              "language": {
                "type": "string",
                "nullable": true
              },
              "sourceURL": {
                "type": "string",
                "format": "uri"
              },
              "<any other metadata> ": {
                "type": "string"
              },
              "pageStatusCode": {
                "type": "integer",
                "description": "The status code of the page"
              },
              "pageError": {
                "type": "string",
                "nullable": true,
                "description": "The error message of the page"
              }
            }
          }
        }
      },
      "SearchResponse": {
        "type": "object",
        "properties": {
          "success": {
            "type": "boolean"
          },
          "data": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "url": {
                  "type": "string"
                },
                "markdown": {
                  "type": "string"
                },
                "content": {
                  "type": "string"
                },
                "metadata": {
                  "type": "object",
                  "properties": {
                    "title": {
                      "type": "string"
                    },
                    "description": {
                      "type": "string"
                    },
                    "language": {
                      "type": "string",
                      "nullable": true
                    },
                    "sourceURL": {
                      "type": "string",
                      "format": "uri"
                    }
                  }
                }
              }
            }
          }
        }
      },
      "CrawlResponse": {
        "type": "object",
        "properties": {
          "jobId": {
            "type": "string"
          }
        }
      }
    }
  },
  "security": [
    {
      "bearerAuth": []
    }
  ]
 }
--- a/apps/api/openapi.json
+++ b/apps/api/openapi.json
@ -18,8 +18,8 @@
  "paths": {
    "/scrape": {
      "post": {
-        "summary": "Scrape a single URL and optionally extract information using an LLM",
+        "summary": "Scrape a single URL",
-        "operationId": "scrapeAndExtractFromUrl",
+        "operationId": "scrape",
        "tags": ["Scraping"],
        "security": [
          {
@ -38,94 +38,47 @@
                    "format": "uri",
                    "description": "The URL to scrape"
                  },
-                  "pageOptions": {
+                  "formats": {
-                    "type": "object",
+                    "type": "array",
-                    "properties": {
+                    "items": {
                      "type": "string",
                      "enum": ["markdown", "html", "rawHtml", "links", "screenshot", "screenshot@fullPage"]
                    },
                    "description": "Specific formats to return.\n\n - markdown: The page in Markdown format.\n - html: The page's HTML, trimmed to include only meaningful content.\n - rawHtml: The page's original HTML.\n - links: The links on the page.\n - screenshot: A screenshot of the top of the page.\n - screenshot@fullPage: A screenshot of the full page. (overridden by screenshot if present)",
                    "default": ["markdown"]
                  },
                  "headers": {
                    "type": "object",
                    "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                  },
-                      "includeHtml": {
+                  "includeTags": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                    "type": "array",
                    "items": {
                      "type": "string"
                    },
                    "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                  },
-                      "onlyMainContent": {
+                  "excludeTags": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                    "type": "array",
                    "items": {
                      "type": "string"
                    },
                    "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                  },
-                      "replaceAllPathsWithAbsolutePaths": {
+                  "onlyMainContent": {
                    "type": "boolean",
-                        "description": "Replace all relative paths with absolute paths for images and links",
+                    "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
-                        "default": false
+                    "default": true
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "fullPageScreenshot": {
                        "type": "boolean",
                        "description": "Include a full page screenshot of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  },
                  "extractorOptions": {
                    "type": "object",
                    "description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
                    "default": {},
                    "properties": {
                      "mode": {
                        "type": "string",
                        "enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
                        "description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
                      },
                      "extractionPrompt": {
                        "type": "string",
                        "description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
                      },
                      "extractionSchema": {
                        "type": "object",
                        "additionalProperties": true,
                        "description": "The schema for the data to be extracted, required only for LLM extraction modes.",
                        "required": [
                          "company_mission",
                          "supports_sso",
                          "is_open_source"
                        ]
                      }
                    }
                  },
                  "timeout": {
                    "type": "integer",
                    "description": "Timeout in milliseconds for the request",
                    "default": 30000
                  },
                  "waitFor": {
                    "type": "integer",
                    "description": "Wait x amount of milliseconds for the page to load to fetch content",
                    "default": 0
                  }
                },
                "required": ["url"]
@ -741,24 +694,42 @@
          "success": {
            "type": "boolean"
          },
          "warning": {
            "type": "string",
            "nullable": true,
            "description": "Warning message to let you know of any issues."
          },
          "data": {
            "type": "object",
            "properties": {
              "markdown": {
-                "type": "string"
+                "type": "string",
-              },
+                "nullable": true,
-              "content": {
+                "description": "Markdown content of the page if the `markdown` format was specified (default)"
                "type": "string"
              },
              "html": {
                "type": "string",
                "nullable": true,
-                "description": "HTML version of the content on page if `includeHtml`  is true"
+                "description": "HTML version of the content on page if the `html` format was specified"
              },
              "rawHtml": {
                "type": "string",
                "nullable": true,
-                "description": "Raw HTML content of the page if `includeRawHtml`  is true"
+                "description": "Raw HTML content of the page if the `rawHtml` format was specified"
              },
              "links": {
                "type": "array",
                "items": {
                  "type": "string",
                  "format": "uri"
                },
                "nullable": true,
                "description": "Links on the page if the `links` format was specified"
              },
              "screenshot": {
                "type": "string",
                "nullable": true,
                "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
              },
              "metadata": {
                "type": "object",
@ -780,27 +751,16 @@
                  "<any other metadata> ": {
                    "type": "string"
                  },
-                  "pageStatusCode": {
+                  "statusCode": {
                    "type": "integer",
                    "description": "The status code of the page"
                  },
-                  "pageError": {
+                  "error": {
                    "type": "string",
                    "nullable": true,
                    "description": "The error message of the page"
                  }
                }
              },
              "llm_extraction": {
                "type": "object",
                "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
                "nullable": true
              },
              "warning": {
                "type": "string",
                "nullable": true,
                "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
              }
            }
          }
@ -810,24 +770,33 @@
        "type": "object",
        "properties": {
          "markdown": {
-            "type": "string"
+            "type": "string",
-          },
+            "nullable": true,
-          "content": {
+            "description": "Markdown content of the page if the `markdown` format was specified (default)"
            "type": "string"
          },
          "html": {
            "type": "string",
            "nullable": true,
-            "description": "HTML version of the content on page if `includeHtml`  is true"
+            "description": "HTML version of the content on page if the `html` format was specified"
          },
          "rawHtml": {
            "type": "string",
            "nullable": true,
-            "description": "Raw HTML content of the page if `includeRawHtml`  is true"
+            "description": "Raw HTML content of the page if the `rawHtml` format was specified"
          },
-          "index": {
+          "links": {
-            "type": "integer",
+            "type": "array",
-            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
+            "items": {
              "type": "string",
              "format": "uri"
            },
            "nullable": true,
            "description": "Links on the page if the `links` format was specified"
          },
          "screenshot": {
            "type": "string",
            "nullable": true,
            "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
          },
          "metadata": {
            "type": "object",
@ -849,11 +818,11 @@
              "<any other metadata> ": {
                "type": "string"
              },
-              "pageStatusCode": {
+              "statusCode": {
                "type": "integer",
                "description": "The status code of the page"
              },
-              "pageError": {
+              "error": {
                "type": "string",
                "nullable": true,
                "description": "The error message of the page"
@ -871,16 +840,34 @@
          "data": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "url": {
                  "type": "string"
                },
              "markdown": {
-                  "type": "string"
+                "type": "string",
                "nullable": true,
                "description": "Markdown content of the page if the `markdown` format was specified (default)"
              },
-                "content": {
+              "html": {
-                  "type": "string"
+                "type": "string",
                "nullable": true,
                "description": "HTML version of the content on page if the `html` format was specified"
              },
              "rawHtml": {
                "type": "string",
                "nullable": true,
                "description": "Raw HTML content of the page if the `rawHtml` format was specified"
              },
              "links": {
                "type": "array",
                "items": {
                  "type": "string",
                  "format": "uri"
                },
                "nullable": true,
                "description": "Links on the page if the `links` format was specified"
              },
              "screenshot": {
                "type": "string",
                "nullable": true,
                "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
              },
              "metadata": {
                "type": "object",
@ -898,7 +885,18 @@
                  "sourceURL": {
                    "type": "string",
                    "format": "uri"
-                    }
+                  },
                  "<any other metadata> ": {
                    "type": "string"
                  },
                  "statusCode": {
                    "type": "integer",
                    "description": "The status code of the page"
                  },
                  "error": {
                    "type": "string",
                    "nullable": true,
                    "description": "The error message of the page"
                  }
                }
              }
@ -909,8 +907,15 @@
      "CrawlResponse": {
        "type": "object",
        "properties": {
-          "jobId": {
+          "success": {
            "type": "boolean"
          },
          "id": {
            "type": "string"
          },
          "url": {
            "type": "string",
            "format": "uri"
          }
        }
      }
--- a/apps/api/package.json
+++ b/apps/api/package.json
@ -106,7 +106,7 @@
    "uuid": "^10.0.0",
    "wordpos": "^2.1.0",
    "xml2js": "^0.6.2",
-    "zod": "^3.23.4",
+    "zod": "^3.23.8",
    "zod-to-json-schema": "^3.23.1"
  },
  "nodemonConfig": {
--- a/apps/api/pnpm-lock.yaml
+++ b/apps/api/pnpm-lock.yaml
@ -189,7 +189,7 @@ importers:
        specifier: ^0.6.2
        version: 0.6.2
      zod:
-        specifier: ^3.23.4
+        specifier: ^3.23.8
        version: 3.23.8
      zod-to-json-schema:
        specifier: ^3.23.1
--- a/apps/api/src/controllers/v1/crawl-status.ts
+++ b/apps/api/src/controllers/v1/crawl-status.ts
@ -1,22 +1,8 @@
-import { Request, Response } from "express";
+import { Response } from "express";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../../src/types";
 import { Logger } from "../../../src/lib/logger";
 import { v4 as uuidv4 } from "uuid";
 import { RequestWithAuth } from "./types";
-export async function crawlStatusController(req: Request, res: Response) {
+export async function crawlStatusController(req: RequestWithAuth, res: Response) {
  // TODO: validate req.params.jobId
  try {    
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.CrawlStatus
    );
    if (!success) {
      return res.status(status).json({ error });
    }
  // const job = await getWebScraperQueue().getJob(req.params.jobId);
  // if (!job) {
  //   return res.status(404).json({ error: "Job not found" });
@ -78,9 +64,5 @@ export async function crawlStatusController(req: Request, res: Response) {
  }
  res.status(200).json(result);
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ error: error.message });
  }
 }
--- a/apps/api/src/controllers/v1/crawl.ts
+++ b/apps/api/src/controllers/v1/crawl.ts
@ -1,140 +1,87 @@
-import { Request, Response } from "express";
+import { Response } from "express";
 import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../../src/types";
 import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
 import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
 import { createIdempotencyKey } from "../../../src/services/idempotency/create";
 import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../../../src/lib/logger";
+import { CrawlRequest, crawlRequestSchema, CrawlResponse, legacyCrawlerOptions, legacyScrapeOptions, RequestWithAuth } from "./types";
-import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
+import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../lib/crawl-redis";
-import { CrawlRequest, CrawlResponse } from "./types";
+import { logCrawl } from "../../services/logging/crawl_log";
 import { getScrapeQueue } from "../../services/queue-service";
 import { addScrapeJob } from "../../services/queue-jobs";
 import { Logger } from "../../lib/logger";
-export async function crawlController(req: Request<{}, {}, CrawlRequest>, res: Response<CrawlResponse>) {
+export async function crawlController(req: RequestWithAuth<CrawlResponse, CrawlRequest>, res: Response<CrawlResponse>) {
-  // expected req.body
+  req.body = crawlRequestSchema.parse(req.body);
  // req.body = {
  //   url: string
  //   crawlerOptions: {
  //     includePaths: string[]
  //     excludePaths: string[]
  //     maxDepth: number
  //     limit: number
  //     allowBackwardLinks: boolean >> TODO: CHANGE THIS NAME???
  //     allowExternalLinks: boolean
  //     ignoreSitemap: number
  //   }
  //   scrapeOptions: Exclude<Scrape, "url">
  // }
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Crawl
    );
    if (!success) {
      return res.status(status).json({ success: false, error });
    }
    if (req.headers["x-idempotency-key"]) {
      const isIdempotencyValid = await validateIdempotencyKey(req);
      if (!isIdempotencyValid) {
        return res.status(409).json({ success: false, error: "Idempotency key already used" });
      }
      try {
        createIdempotencyKey(req);
      } catch (error) {
        Logger.error(error);
        return res.status(500).json({ success: false, error: error.message });
      }
    }
    const { success: creditsCheckSuccess, message: creditsCheckMessage } =
      await checkTeamCredits(team_id, 1);
    if (!creditsCheckSuccess) {
      return res.status(402).json({ success: false, error: "Insufficient credits" });
    }
    let url = req.body.url;
    if (!url) {
      return res.status(400).json({ success: false, error: "Url is required" });
    }
    if (isUrlBlocked(url)) {
      return res
        .status(403)
        .json({
          success: false,
          error:
            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
        });
    }
    try {
      url = checkAndUpdateURL(url).url;
    } catch (error) {
      return res.status(400).json({ success: false, error: 'Invalid Url' });
    }
    // TODO: add job to queue
  const id = uuidv4();
  await logCrawl(id, req.auth.team_id);
  const crawlerOptions = legacyCrawlerOptions(req.body.crawlerOptions),
    pageOptions = legacyScrapeOptions(req.body.scrapeOptions);
  const sc: StoredCrawl = {
    originUrl: req.body.url,
    crawlerOptions,
    pageOptions,
    team_id: req.auth.team_id,
    createdAt: Date.now(),
  };
  const crawler = crawlToCrawler(id, sc);
  try {
    sc.robots = await crawler.getRobotsTxt();
  } catch (e) {
    Logger.debug(`[Crawl] Failed to get robots.txt (this is probably fine!): ${JSON.stringify(e)}`);
  }
  await saveCrawl(id, sc);
  const sitemap = sc.crawlerOptions.ignoreSitemap ? null : await crawler.tryGetSitemap();
  if (sitemap !== null) {
    const jobs = sitemap.map(x => {
      const url = x.url;
      const uuid = uuidv4();
      return {
        name: uuid,
        data: {
          url,
          mode: "single_urls",
          team_id: req.auth.team_id,
          crawlerOptions,
          pageOptions,
          origin: "api",
          crawl_id: id,
          sitemapped: true,
        },
        opts: {
          jobId: uuid,
          priority: 20,
        }
      };
    })
    await lockURLs(id, jobs.map(x => x.data.url));
    await addCrawlJobs(id, jobs.map(x => x.opts.jobId));
    await getScrapeQueue().addBulk(jobs);
  } else {
    await lockURL(id, sc, req.body.url);
    const job = await addScrapeJob({
      url: req.body.url,
      mode: "single_urls",
      crawlerOptions: crawlerOptions,
      team_id: req.auth.team_id,
      pageOptions: pageOptions,
      origin: "api",
      crawl_id: id,
    }, {
      priority: 15,
    });
    await addCrawlJob(id, job.id);
  }
  return res.status(200).json({
    success: true,
    id,
    url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}`,
  });
    // const mode = req.body.mode ?? "crawl";
    // const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
    // const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
    // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
    //   try {
    //     const a = new WebScraperDataProvider();
    //     await a.setOptions({
    //       jobId: uuidv4(),
    //       mode: "single_urls",
    //       urls: [url],
    //       crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
    //       pageOptions: pageOptions,
    //     });
    //     const docs = await a.getDocuments(false, (progress) => {
    //       job.progress({
    //         current: progress.current,
    //         total: progress.total,
    //         current_step: "SCRAPING",
    //         current_url: progress.currentDocumentUrl,
    //       });
    //     });
    //     return res.json({
    //       success: true,
    //       documents: docs,
    //     });
    //   } catch (error) {
    //     Logger.error(error);
    //     return res.status(500).json({ error: error.message });
    //   }
    // }
    // const job = await addWebScraperJob({
    //   url: url,
    //   mode: mode ?? "crawl", // fix for single urls not working
    //   crawlerOptions: crawlerOptions,
    //   team_id: team_id,
    //   pageOptions: pageOptions,
    //   origin: req.body.origin ?? defaultOrigin,
    // });
    // await logCrawl(job.id.toString(), team_id);
    // res.json({ jobId: job.id });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ success: false, error: error.message });
  }
 }
--- a/apps/api/src/controllers/v1/map.ts
+++ b/apps/api/src/controllers/v1/map.ts
@ -1,12 +1,12 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../../src/types";
 import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
 import { Logger } from "../../../src/lib/logger";
 import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
-import { MapRequest, MapResponse } from "./types";
+import { MapRequest, mapRequestSchema, MapResponse, RequestWithAuth } from "./types";
 import { checkTeamCredits } from "../../services/billing/credit_billing";
-export async function mapController(req: Request<{}, MapResponse, MapRequest>, res: Response<MapResponse>) {
+export async function mapController(req: RequestWithAuth<MapResponse, MapRequest>, res: Response<MapResponse>) {
  req.body = mapRequestSchema.parse(req.body);
  console.log(req.body);
  // expected req.body
  // req.body = {
@ -14,55 +14,6 @@ export async function mapController(req: Request<{}, MapResponse, MapRequest>, r
  //   crawlerOptions: 
  // }
  try {
    const { success, team_id, error, status } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Crawl
    );
    if (!success) {
      return res.status(status).json({ success: false, error });
    }
    // if (req.headers["x-idempotency-key"]) {
    //   const isIdempotencyValid = await validateIdempotencyKey(req);
    //   if (!isIdempotencyValid) {
    //     return res.status(409).json({ error: "Idempotency key already used" });
    //   }
    //   try {
    //     createIdempotencyKey(req);
    //   } catch (error) {
    //     Logger.error(error);
    //     return res.status(500).json({ error: error.message });
    //   }
    // }
    // const { success: creditsCheckSuccess, message: creditsCheckMessage } =
    //   await checkTeamCredits(team_id, 1);
    // if (!creditsCheckSuccess) {
    //   return res.status(402).json({ error: "Insufficient credits" });
    // }
    let url = req.body.url;
    if (!url) {
      return res.status(400).json({ success: false, error: "Url is required" });
    }
    if (isUrlBlocked(url)) {
      return res
        .status(403)
        .json({
          success: false,
          error:
            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
        });
    }
    try {
      url = checkAndUpdateURL(url).url;
    } catch (error) {
      return res.status(400).json({ success: false, error: 'Invalid Url' });
    }
  return res.status(200).json({ success: true, links: [ "test1", "test2" ] });
@ -112,8 +63,4 @@ export async function mapController(req: Request<{}, MapResponse, MapRequest>, r
  // await logCrawl(job.id.toString(), team_id);
  // res.json({ jobId: job.id });
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ success: false, error: error.message });
  }
 }
--- a/apps/api/src/controllers/v1/scrape.ts
+++ b/apps/api/src/controllers/v1/scrape.ts
@ -1,26 +1,11 @@
 import { Request, Response } from "express";
 import { authenticateUser } from "./auth";
 import { RateLimiterMode } from "../../types";
 import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
 import { Logger } from '../../lib/logger';
 import { checkAndUpdateURL } from '../../lib/validateUrl';
-import { ScrapeRequest, ScrapeResponse } from "./types";
+import { RequestWithAuth, ScrapeRequest, scrapeRequestSchema, ScrapeResponse } from "./types";
-export async function scrapeController(req: Request<{}, ScrapeResponse, ScrapeRequest>, res: Response<ScrapeResponse>) {
+export async function scrapeController(req: RequestWithAuth<ScrapeResponse, ScrapeRequest>, res: Response<ScrapeResponse>) {
-  let url = req.body.url;
+  req.body = scrapeRequestSchema.parse(req.body);
-  if (!url) {
+  console.log(req.body);
    return { success: false, error: "Url is required", returnCode: 400 };
  }
  if (isUrlBlocked(url)) {
    return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 };
  }
  try {
    url = checkAndUpdateURL(url).url;
  } catch (error) {
    return { success: false, error: "Invalid URL", returnCode: 400 };
  }
  // TODO: check req.body
  // mockup req.body
@ -37,17 +22,8 @@ export async function scrapeController(req: Request<{}, ScrapeResponse, ScrapeRe
  //   waitFor: number
  // }
  try {
  let earlyReturn = false;
  // make sure to authenticate user first, Bearer <token>
    const { success, team_id, error, status, plan } = await authenticateUser(
      req,
      res,
      RateLimiterMode.Scrape
    );
    if (!success) {
      return res.status(status).json({ success: false, error });
    }
  // check credits
@ -164,10 +140,6 @@ export async function scrapeController(req: Request<{}, ScrapeResponse, ScrapeRe
  // return res.status(result.returnCode).json(result);
  } catch (error) {
    Logger.error(error);
    return res.status(500).json({ success: false, error: error.message });
  }
 }
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@ -1,36 +1,96 @@
 import { Request } from "express";
 import { z } from "zod";
 import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
 import { PageOptions } from "../../lib/entities";
 export type Format = "markdown" | "html" | "rawHtml" | "links" | "screenshot" | "screenshot@fullPage";
-export type ScrapeRequest = {
+const url = z.preprocess(x => {
-  url: string;
+  if (typeof x === "string" && !/^([^.:]+:\/\/)/.test(x)) {
-  formats?: Format[];
+    if (x.startsWith("://")) {
-  headers?: { [K: string]: string };
+      return "http" + x;
-  includeTags?: string[];
+    } else {
-  excludeTags?: string[];
+      return "http://" + x;
-  onlyMainContent?: boolean;
+    }
-  timeout?: number;
+  } else {
-  waitFor?: number;
+    return x;
-}
+  }
 }, z.string().url().regex(/^https?:\/\//, "URL uses unsupported protocol").refine(x => !isUrlBlocked(x), "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."));
-export type CrawlerOptions = {
+export const scrapeOptions = z.object({
-  includePaths?: string[];
+  formats: z.enum(["markdown", "html", "rawHtml", "links", "screenshot", "screenshot@fullPage"])
-  excludePaths?: string[];
+    .array()
-  maxDepth?: number;
+    .optional()
-  limit?: number;
+    .default(["markdown"]),
-  allowBackwardLinks?: boolean; // >> TODO: CHANGE THIS NAME???
+  headers: z.record(z.string(), z.string()).optional(),
-  allowExternalLinks?: boolean;
+  includeTags: z.string().array().optional(),
-  ignoreSitemap?: boolean;
+  excludeTags: z.string().array().optional(),
-};
+  onlyMainContent: z.boolean().default(true),
  timeout: z.number().int().positive().finite().safe().default(30000), // default?
  waitFor: z.number().int().nonnegative().finite().safe().default(0),
 });
-export type CrawlRequest = {
+export type ScrapeOptions = z.infer<typeof scrapeOptions>;
  url: string;
  crawlerOptions?: CrawlerOptions;
  scrapeOptions?: Exclude<ScrapeRequest, "url">;
 };
-export type MapRequest = {
+export const scrapeRequestSchema = scrapeOptions.extend({ url });
-  url: string;
+
-  crawlerOptions?: CrawlerOptions;
+// export type ScrapeRequest = {
-};
+//   url: string;
 //   formats?: Format[];
 //   headers?: { [K: string]: string };
 //   includeTags?: string[];
 //   excludeTags?: string[];
 //   onlyMainContent?: boolean;
 //   timeout?: number;
 //   waitFor?: number;
 // }
 export type ScrapeRequest = z.infer<typeof scrapeRequestSchema>;
 const crawlerOptions = z.object({
  includePaths: z.string().array().default([]),
  excludePaths: z.string().array().default([]),
  maxDepth: z.number().default(10), // default?
  limit: z.number().default(10000), // default?
  allowBackwardLinks: z.boolean().default(false), // >> TODO: CHANGE THIS NAME???
  allowExternalLinks: z.boolean().default(false),
  ignoreSitemap: z.boolean().default(true),
 });
 // export type CrawlerOptions = {
 //   includePaths?: string[];
 //   excludePaths?: string[];
 //   maxDepth?: number;
 //   limit?: number;
 //   allowBackwardLinks?: boolean; // >> TODO: CHANGE THIS NAME???
 //   allowExternalLinks?: boolean;
 //   ignoreSitemap?: boolean;
 // };
 export type CrawlerOptions = z.infer<typeof crawlerOptions>;
 export const crawlRequestSchema = z.object({
  url,
  crawlerOptions: crawlerOptions.default({}),
  scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
 });
 // export type CrawlRequest = {
 //   url: string;
 //   crawlerOptions?: CrawlerOptions;
 //   scrapeOptions?: Exclude<ScrapeRequest, "url">;
 // };
 export type CrawlRequest = z.infer<typeof crawlRequestSchema>;
 export const mapRequestSchema = crawlerOptions.extend({ url });
 // export type MapRequest = {
 //   url: string;
 //   crawlerOptions?: CrawlerOptions;
 // };
 export type MapRequest = z.infer<typeof mapRequestSchema>;
 export type Document = {
  markdown?: string,
@ -77,6 +137,7 @@ export type Document = {
 export type ErrorResponse = {
  success: false;
  error: string;
  details?: any;
 };
 export type ScrapeResponse = ErrorResponse | {
@ -95,3 +156,40 @@ export type MapResponse = ErrorResponse | {
  success: true;
  links: string[];
 }
 type AuthObject = {
  team_id: string;
  plan: string;
 }
 export interface RequestWithMaybeAuth<ReqBody = undefined, ResBody = undefined> extends Request<{}, ReqBody, ResBody> {
  auth?: AuthObject;
 }
 export interface RequestWithAuth<ReqBody = undefined, ResBody = undefined> extends Request<{}, ReqBody, ResBody> {
  auth: AuthObject;
 }
 export function legacyCrawlerOptions(x: CrawlerOptions) {
  return {
    includes: x.includePaths,
    excludes: x.excludePaths,
    maxCrawledLinks: x.limit,
    maxCrawledDepth: x.maxDepth,
    limit: x.limit,
    generateImgAltText: false,
    allowBackwardCrawling: x.allowBackwardLinks,
    allowExternalContentLinks: x.allowExternalLinks,
  };
 }
 export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
  return {
    includeHtml: x.formats.includes("html"),
    includeRawHtml: x.formats.includes("rawHtml"),
    onlyIncludeTags: x.includeTags,
    removeTags: x.excludeTags,
    onlyMainContent: x.onlyMainContent,
    waitFor: x.waitFor,
  };
 }
--- a/apps/api/src/routes/v1.ts
+++ b/apps/api/src/routes/v1.ts
@ -1,9 +1,18 @@
-import express from "express";
+import express, { NextFunction, Request, Response } from "express";
 import { crawlController } from "../../src/controllers/v1/crawl";
 // import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
 import { scrapeController } from "../../src/controllers/v1/scrape";
 import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
 import { mapController } from "../../src/controllers/v1/map";
 import { ErrorResponse, RequestWithAuth, RequestWithMaybeAuth } from "../controllers/v1/types";
 import { RateLimiterMode } from "../types";
 import { authenticateUser } from "../controllers/v1/auth";
 import { Logger } from "../lib/logger";
 import { createIdempotencyKey } from "../services/idempotency/create";
 import { validateIdempotencyKey } from "../services/idempotency/validate";
 import { ZodError } from "zod";
 import { checkTeamCredits } from "../services/billing/credit_billing";
 import { v4 as uuidv4 } from "uuid";
 // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
 // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
 // import { searchController } from "../../src/controllers/v1/search";
@ -12,13 +21,96 @@ import { mapController } from "../../src/controllers/v1/map";
 // import { livenessController } from "../controllers/v1/liveness";
 // import { readinessController } from "../controllers/v1/readiness";
 function checkCreditsMiddleware(minimum: number): (req: RequestWithAuth, res: Response, next: NextFunction) => void {
    return (req, res, next) => {
        (async () => {
            if (!(await checkTeamCredits(req.auth.team_id, minimum)).success) {
                return res.status(402).json({ success: false, error: "Insufficient credits" });
            }
            next();
        })()
            .catch(err => next(err));
    };
 }
 function authMiddleware(rateLimiterMode: RateLimiterMode): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void {
    return (req, res, next) => {
        (async () => {
            const { success, team_id, error, status, plan } = await authenticateUser(
                req,
                res,
                rateLimiterMode,
            );
            if (!success) {
                return res.status(status).json({ success: false, error });
            }
            req.auth = { team_id, plan };
            next();
        })()
            .catch(err => next(err));
    }
 }
 function idempotencyMiddleware(req: Request, res: Response, next: NextFunction) {
    (async () => {
        if (req.headers["x-idempotency-key"]) {
            const isIdempotencyValid = await validateIdempotencyKey(req);
            if (!isIdempotencyValid) {
                return res.status(409).json({ success: false, error: "Idempotency key already used" });
            }
            // try {
            createIdempotencyKey(req);
            // } catch (error) {
            //     Logger.error(error);
            //     return res.status(500).json({ success: false, error: error.message });
            // }
        }
        next();
    })()
        .catch(err => next(err));
 }
 function wrap(controller: (req: Request, res: Response) => Promise<any>): (req: Request, res: Response, next: NextFunction) => any {
    return (req, res, next) => {
        controller(req, res)
            .catch(err => next(err))
    }
 }
 export const v1Router = express.Router();
-v1Router.post("/v1/scrape", scrapeController);
+v1Router.post(
-v1Router.post("/v1/crawl", crawlController);
+    "/v1/scrape",
-v1Router.get("/v1/crawl/:jobId", crawlStatusController);
+    authMiddleware(RateLimiterMode.Scrape),
    checkCreditsMiddleware(1),
    wrap(scrapeController)
 );
 v1Router.post(
    "/v1/crawl",
    authMiddleware(RateLimiterMode.Crawl),
    idempotencyMiddleware,
    checkCreditsMiddleware(1),
    wrap(crawlController)
 );
 v1Router.post(
    "/v1/map",
    authMiddleware(RateLimiterMode.Crawl),
    checkCreditsMiddleware(1),
    wrap(mapController)
 );
 v1Router.get(
    "/v1/crawl/:jobId",
    authMiddleware(RateLimiterMode.CrawlStatus),
    wrap(crawlStatusController)
 );
 // v1Router.post("/v1/crawlWebsitePreview", crawlPreviewController);
-// v1Router.delete("/v1/crawl/cancel/:jobId", crawlCancelController);
+// v1Router.delete("/v1/crawl/:jobId", crawlCancelController);
 // v1Router.get("/v1/checkJobStatus/:jobId", crawlJobStatusPreviewController);
 // // Auth route for key based authentication
@ -31,4 +123,12 @@ v1Router.get("/v1/crawl/:jobId", crawlStatusController);
 // v1Router.get("/v1/health/liveness", livenessController);
 // v1Router.get("/v1/health/readiness", readinessController);
-v1Router.post("/v1/map", mapController);
+v1Router.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: Response<ErrorResponse>, next: NextFunction) => {
    if (err instanceof ZodError) {
        res.status(400).json({ success: false, error: "Bad Request", details: err.errors });
    } else {
        const id = uuidv4();
        Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id  + " -- " + JSON.stringify(err));
        res.status(500).json({ success: false, error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id + "" });
    }
 });