From 8b7569f8f3bbc01c2e6e80129fcf902ddd4d1aeb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= <mo.geryy@gmail.com>
Date: Thu, 15 Aug 2024 23:30:33 +0200
Subject: [PATCH] add zod, create middleware, update openapi declaration, add
 crawl logic

---
 apps/api/openapi-v0.json                    | 924 ++++++++++++++++++++
 apps/api/openapi.json                       | 287 +++---
 apps/api/package.json                       |   2 +-
 apps/api/pnpm-lock.yaml                     |   2 +-
 apps/api/src/controllers/v1/crawl-status.ts | 134 ++-
 apps/api/src/controllers/v1/crawl.ts        | 211 ++---
 apps/api/src/controllers/v1/map.ts          | 145 +--
 apps/api/src/controllers/v1/scrape.ts       | 262 +++---
 apps/api/src/controllers/v1/types.ts        | 154 +++-
 apps/api/src/routes/v1.ts                   | 112 ++-
 10 files changed, 1604 insertions(+), 629 deletions(-)
 create mode 100644 apps/api/openapi-v0.json

diff --git a/apps/api/openapi-v0.json b/apps/api/openapi-v0.json
new file mode 100644
index 00000000..40272385
--- /dev/null
+++ b/apps/api/openapi-v0.json
@@ -0,0 +1,924 @@
+{
+  "openapi": "3.0.0",
+  "info": {
+    "title": "Firecrawl API",
+    "version": "0.0.0",
+    "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.",
+    "contact": {
+      "name": "Firecrawl Support",
+      "url": "https://firecrawl.dev/support",
+      "email": "support@firecrawl.dev"
+    }
+  },
+  "servers": [
+    {
+      "url": "https://api.firecrawl.dev/v0"
+    }
+  ],
+  "paths": {
+    "/scrape": {
+      "post": {
+        "summary": "Scrape a single URL and optionally extract information using an LLM",
+        "operationId": "scrapeAndExtractFromUrl",
+        "tags": ["Scraping"],
+        "security": [
+          {
+            "bearerAuth": []
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "url": {
+                    "type": "string",
+                    "format": "uri",
+                    "description": "The URL to scrape"
+                  },
+                  "pageOptions": {
+                    "type": "object",
+                    "properties": {
+                      "headers": {
+                        "type": "object",
+                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
+                      },
+                      "includeHtml": {
+                        "type": "boolean",
+                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
+                        "default": false
+                      },
+                      "includeRawHtml": {
+                        "type": "boolean",
+                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
+                        "default": false
+                      },
+                      "onlyIncludeTags": {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        },
+                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
+                      },
+                      "onlyMainContent": {
+                        "type": "boolean",
+                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
+                        "default": false
+                      },
+                      "removeTags": {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        },
+                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
+                      },
+                      "replaceAllPathsWithAbsolutePaths": {
+                        "type": "boolean",
+                        "description": "Replace all relative paths with absolute paths for images and links",
+                        "default": false
+                      },
+                      "screenshot": {
+                        "type": "boolean",
+                        "description": "Include a screenshot of the top of the page that you are scraping.",
+                        "default": false
+                      },
+                      "fullPageScreenshot": {
+                        "type": "boolean",
+                        "description": "Include a full page screenshot of the page that you are scraping.",
+                        "default": false
+                      },
+                      "waitFor": {
+                        "type": "integer",
+                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
+                        "default": 0
+                      }
+                    }
+                  },
+                  "extractorOptions": {
+                    "type": "object",
+                    "description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
+                    "default": {},
+                    "properties": {
+                      "mode": {
+                        "type": "string",
+                        "enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
+                        "description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
+                      },
+                      "extractionPrompt": {
+                        "type": "string",
+                        "description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
+                      },
+                      "extractionSchema": {
+                        "type": "object",
+                        "additionalProperties": true,
+                        "description": "The schema for the data to be extracted, required only for LLM extraction modes.",
+                        "required": [
+                          "company_mission",
+                          "supports_sso",
+                          "is_open_source"
+                        ]
+                      }
+                    }
+                  },
+                  "timeout": {
+                    "type": "integer",
+                    "description": "Timeout in milliseconds for the request",
+                    "default": 30000
+                  }
+                },
+                "required": ["url"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ScrapeResponse"
+                }
+              }
+            }
+          },
+          "402": {
+            "description": "Payment required",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Payment required to access this resource."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Request rate limit exceeded. Please wait and try again later."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "An unexpected error occurred on the server."
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/crawl": {
+      "post": {
+        "summary": "Crawl multiple URLs based on options",
+        "operationId": "crawlUrls",
+        "tags": ["Crawling"],
+        "security": [
+          {
+            "bearerAuth": []
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "url": {
+                    "type": "string",
+                    "format": "uri",
+                    "description": "The base URL to start crawling from"
+                  },
+                  "crawlerOptions": {
+                    "type": "object",
+                    "properties": {
+                      "includes": {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        },
+                        "description": "URL patterns to include"
+                      },
+                      "excludes": {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        },
+                        "description": "URL patterns to exclude"
+                      },
+                      "generateImgAltText": {
+                        "type": "boolean",
+                        "description": "Generate alt text for images using LLMs (must have a paid plan)",
+                        "default": false
+                      },
+                      "returnOnlyUrls": {
+                        "type": "boolean",
+                        "description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.",
+                        "default": false
+                      },
+                      "maxDepth": {
+                        "type": "integer",
+                        "description": "Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern."
+                      },
+                      "mode": {
+                        "type": "string",
+                        "enum": ["default", "fast"],
+                        "description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
+                        "default": "default"
+                      },
+                      "ignoreSitemap": {
+                        "type": "boolean",
+                        "description": "Ignore the website sitemap when crawling",
+                        "default": false
+                      },
+                      "limit": {
+                        "type": "integer",
+                        "description": "Maximum number of pages to crawl",
+                        "default": 10000
+                      },
+                      "allowBackwardCrawling": {
+                        "type": "boolean",
+                        "description": "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'",
+                        "default": false
+                      },
+                      "allowExternalContentLinks": {
+                        "type": "boolean",
+                        "description": "Allows the crawler to follow links to external websites.",
+                        "default": false
+                      }
+                    }
+                  },
+                  "pageOptions": {
+                    "type": "object",
+                    "properties": {
+                      "headers": {
+                        "type": "object",
+                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
+                      },
+                      "includeHtml": {
+                        "type": "boolean",
+                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
+                        "default": false
+                      },
+                      "includeRawHtml": {
+                        "type": "boolean",
+                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
+                        "default": false
+                      },
+                      "onlyIncludeTags": {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        },
+                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
+                      },
+                      "onlyMainContent": {
+                        "type": "boolean",
+                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
+                        "default": false
+                      },
+                      "removeTags": {
+                        "type": "array",
+                        "items": {
+                          "type": "string"
+                        },
+                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
+                      },
+                      "replaceAllPathsWithAbsolutePaths": {
+                        "type": "boolean",
+                        "description": "Replace all relative paths with absolute paths for images and links",
+                        "default": false
+                      },
+                      "screenshot": {
+                        "type": "boolean",
+                        "description": "Include a screenshot of the top of the page that you are scraping.",
+                        "default": false
+                      },
+                      "fullPageScreenshot": {
+                        "type": "boolean",
+                        "description": "Include a full page screenshot of the page that you are scraping.",
+                        "default": false
+                      },
+                      "waitFor": {
+                        "type": "integer",
+                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
+                        "default": 0
+                      }
+                    }
+                  }
+                },
+                "required": ["url"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/CrawlResponse"
+                }
+              }
+            }
+          },
+          "402": {
+            "description": "Payment required",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Payment required to access this resource."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Request rate limit exceeded. Please wait and try again later."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "An unexpected error occurred on the server."
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/search": {
+      "post": {
+        "summary": "Search for a keyword in Google, returns top page results with markdown content for each page",
+        "operationId": "searchGoogle",
+        "tags": ["Search"],
+        "security": [
+          {
+            "bearerAuth": []
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "query": {
+                    "type": "string",
+                    "format": "uri",
+                    "description": "The query to search for"
+                  },
+                  "pageOptions": {
+                    "type": "object",
+                    "properties": {
+                      "onlyMainContent": {
+                        "type": "boolean",
+                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
+                        "default": false
+                      },
+                      "fetchPageContent": {
+                        "type": "boolean",
+                        "description": "Fetch the content of each page. If false, defaults to a basic fast serp API.",
+                        "default": true
+                      },
+                      "includeHtml": {
+                        "type": "boolean",
+                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
+                        "default": false
+                      },
+                      "includeRawHtml": {
+                        "type": "boolean",
+                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
+                        "default": false
+                      }
+                    }
+                  },
+                  "searchOptions": {
+                    "type": "object",
+                    "properties": {
+                      "limit": {
+                        "type": "integer",
+                        "description": "Maximum number of results. Max is 20 during beta."
+                      }
+                    }
+                  }
+                },
+                "required": ["query"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/SearchResponse"
+                }
+              }
+            }
+          },
+          "402": {
+            "description": "Payment required",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Payment required to access this resource."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Request rate limit exceeded. Please wait and try again later."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "An unexpected error occurred on the server."
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/crawl/status/{jobId}": {
+      "get": {
+        "tags": ["Crawl"],
+        "summary": "Get the status of a crawl job",
+        "operationId": "getCrawlStatus",
+        "security": [
+          {
+            "bearerAuth": []
+          }
+        ],
+        "parameters": [
+          {
+            "name": "jobId",
+            "in": "path",
+            "description": "ID of the crawl job",
+            "required": true,
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "status": {
+                      "type": "string",
+                      "description": "Status of the job (completed, active, failed, paused)"
+                    },
+                    "current": {
+                      "type": "integer",
+                      "description": "Current page number"
+                    },
+                    "total": {
+                      "type": "integer",
+                      "description": "Total number of pages"
+                    },
+                    "data": {
+                      "type": "array",
+                      "items": {
+                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
+                      },
+                      "description": "Data returned from the job (null when it is in progress)"
+                    },
+                    "partial_data": {
+                      "type": "array",
+                      "items": {
+                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
+                      },
+                      "description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "402": {
+            "description": "Payment required",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Payment required to access this resource."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Request rate limit exceeded. Please wait and try again later."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "An unexpected error occurred on the server."
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/crawl/cancel/{jobId}": {
+      "delete": {
+        "tags": ["Crawl"],
+        "summary": "Cancel a crawl job",
+        "operationId": "cancelCrawlJob",
+        "security": [
+          {
+            "bearerAuth": []
+          }
+        ],
+        "parameters": [
+          {
+            "name": "jobId",
+            "in": "path",
+            "description": "ID of the crawl job",
+            "required": true,
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "status": {
+                      "type": "string",
+                      "description": "Returns cancelled."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "402": {
+            "description": "Payment required",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Payment required to access this resource."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "Request rate limit exceeded. Please wait and try again later."
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "example": "An unexpected error occurred on the server."
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "components": {
+    "securitySchemes": {
+      "bearerAuth": {
+        "type": "http",
+        "scheme": "bearer"
+      }
+    },
+    "schemas": {
+      "ScrapeResponse": {
+        "type": "object",
+        "properties": {
+          "success": {
+            "type": "boolean"
+          },
+          "data": {
+            "type": "object",
+            "properties": {
+              "markdown": {
+                "type": "string"
+              },
+              "content": {
+                "type": "string"
+              },
+              "html": {
+                "type": "string",
+                "nullable": true,
+                "description": "HTML version of the content on page if `includeHtml`  is true"
+              },
+              "rawHtml": {
+                "type": "string",
+                "nullable": true,
+                "description": "Raw HTML content of the page if `includeRawHtml`  is true"
+              },
+              "metadata": {
+                "type": "object",
+                "properties": {
+                  "title": {
+                    "type": "string"
+                  },
+                  "description": {
+                    "type": "string"
+                  },
+                  "language": {
+                    "type": "string",
+                    "nullable": true
+                  },
+                  "sourceURL": {
+                    "type": "string",
+                    "format": "uri"
+                  },
+                  "<any other metadata> ": {
+                    "type": "string"
+                  },
+                  "pageStatusCode": {
+                    "type": "integer",
+                    "description": "The status code of the page"
+                  },
+                  "pageError": {
+                    "type": "string",
+                    "nullable": true,
+                    "description": "The error message of the page"
+                  }
+                  
+                }
+              },
+              "llm_extraction": {
+                "type": "object",
+                "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
+                "nullable": true
+              },
+              "warning": {
+                "type": "string",
+                "nullable": true,
+                "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
+              }
+            }
+          }
+        }
+      },
+      "CrawlStatusResponseObj": {
+        "type": "object",
+        "properties": {
+          "markdown": {
+            "type": "string"
+          },
+          "content": {
+            "type": "string"
+          },
+          "html": {
+            "type": "string",
+            "nullable": true,
+            "description": "HTML version of the content on page if `includeHtml`  is true"
+          },
+          "rawHtml": {
+            "type": "string",
+            "nullable": true,
+            "description": "Raw HTML content of the page if `includeRawHtml`  is true"
+          },
+          "index": {
+            "type": "integer",
+            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
+          },
+          "metadata": {
+            "type": "object",
+            "properties": {
+              "title": {
+                "type": "string"
+              },
+              "description": {
+                "type": "string"
+              },
+              "language": {
+                "type": "string",
+                "nullable": true
+              },
+              "sourceURL": {
+                "type": "string",
+                "format": "uri"
+              },
+              "<any other metadata> ": {
+                "type": "string"
+              },
+              "pageStatusCode": {
+                "type": "integer",
+                "description": "The status code of the page"
+              },
+              "pageError": {
+                "type": "string",
+                "nullable": true,
+                "description": "The error message of the page"
+              }
+            }
+          }
+        }
+      },
+      "SearchResponse": {
+        "type": "object",
+        "properties": {
+          "success": {
+            "type": "boolean"
+          },
+          "data": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "url": {
+                  "type": "string"
+                },
+                "markdown": {
+                  "type": "string"
+                },
+                "content": {
+                  "type": "string"
+                },
+                "metadata": {
+                  "type": "object",
+                  "properties": {
+                    "title": {
+                      "type": "string"
+                    },
+                    "description": {
+                      "type": "string"
+                    },
+                    "language": {
+                      "type": "string",
+                      "nullable": true
+                    },
+                    "sourceURL": {
+                      "type": "string",
+                      "format": "uri"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      },
+      "CrawlResponse": {
+        "type": "object",
+        "properties": {
+          "jobId": {
+            "type": "string"
+          }
+        }
+      }
+    }
+  },
+  "security": [
+    {
+      "bearerAuth": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/apps/api/openapi.json b/apps/api/openapi.json
index fb0c4305..5bd3e3d8 100644
--- a/apps/api/openapi.json
+++ b/apps/api/openapi.json
@@ -18,8 +18,8 @@
   "paths": {
     "/scrape": {
       "post": {
-        "summary": "Scrape a single URL and optionally extract information using an LLM",
-        "operationId": "scrapeAndExtractFromUrl",
+        "summary": "Scrape a single URL",
+        "operationId": "scrape",
         "tags": ["Scraping"],
         "security": [
           {
@@ -38,94 +38,47 @@
                     "format": "uri",
                     "description": "The URL to scrape"
                   },
-                  "pageOptions": {
-                    "type": "object",
-                    "properties": {
-                      "headers": {
-                        "type": "object",
-                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
-                      },
-                      "includeHtml": {
-                        "type": "boolean",
-                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
-                        "default": false
-                      },
-                      "includeRawHtml": {
-                        "type": "boolean",
-                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
-                        "default": false
-                      },
-                      "onlyIncludeTags": {
-                        "type": "array",
-                        "items": {
-                          "type": "string"
-                        },
-                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
-                      },
-                      "onlyMainContent": {
-                        "type": "boolean",
-                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
-                        "default": false
-                      },
-                      "removeTags": {
-                        "type": "array",
-                        "items": {
-                          "type": "string"
-                        },
-                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
-                      },
-                      "replaceAllPathsWithAbsolutePaths": {
-                        "type": "boolean",
-                        "description": "Replace all relative paths with absolute paths for images and links",
-                        "default": false
-                      },
-                      "screenshot": {
-                        "type": "boolean",
-                        "description": "Include a screenshot of the top of the page that you are scraping.",
-                        "default": false
-                      },
-                      "fullPageScreenshot": {
-                        "type": "boolean",
-                        "description": "Include a full page screenshot of the page that you are scraping.",
-                        "default": false
-                      },
-                      "waitFor": {
-                        "type": "integer",
-                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
-                        "default": 0
-                      }
-                    }
+                  "formats": {
+                    "type": "array",
+                    "items": {
+                      "type": "string",
+                      "enum": ["markdown", "html", "rawHtml", "links", "screenshot", "screenshot@fullPage"]
+                    },
+                    "description": "Specific formats to return.\n\n - markdown: The page in Markdown format.\n - html: The page's HTML, trimmed to include only meaningful content.\n - rawHtml: The page's original HTML.\n - links: The links on the page.\n - screenshot: A screenshot of the top of the page.\n - screenshot@fullPage: A screenshot of the full page. (overridden by screenshot if present)",
+                    "default": ["markdown"]
                   },
-                  "extractorOptions": {
+                  "headers": {
                     "type": "object",
-                    "description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
-                    "default": {},
-                    "properties": {
-                      "mode": {
-                        "type": "string",
-                        "enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
-                        "description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
-                      },
-                      "extractionPrompt": {
-                        "type": "string",
-                        "description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
-                      },
-                      "extractionSchema": {
-                        "type": "object",
-                        "additionalProperties": true,
-                        "description": "The schema for the data to be extracted, required only for LLM extraction modes.",
-                        "required": [
-                          "company_mission",
-                          "supports_sso",
-                          "is_open_source"
-                        ]
-                      }
-                    }
+                    "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
+                  },
+                  "includeTags": {
+                    "type": "array",
+                    "items": {
+                      "type": "string"
+                    },
+                    "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
+                  },
+                  "excludeTags": {
+                    "type": "array",
+                    "items": {
+                      "type": "string"
+                    },
+                    "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
+                  },
+                  "onlyMainContent": {
+                    "type": "boolean",
+                    "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
+                    "default": true
                   },
                   "timeout": {
                     "type": "integer",
                     "description": "Timeout in milliseconds for the request",
                     "default": 30000
+                  },
+                  "waitFor": {
+                    "type": "integer",
+                    "description": "Wait x amount of milliseconds for the page to load to fetch content",
+                    "default": 0
                   }
                 },
                 "required": ["url"]
@@ -741,24 +694,42 @@
           "success": {
             "type": "boolean"
           },
+          "warning": {
+            "type": "string",
+            "nullable": true,
+            "description": "Warning message to let you know of any issues."
+          },
           "data": {
             "type": "object",
             "properties": {
               "markdown": {
-                "type": "string"
-              },
-              "content": {
-                "type": "string"
+                "type": "string",
+                "nullable": true,
+                "description": "Markdown content of the page if the `markdown` format was specified (default)"
               },
               "html": {
                 "type": "string",
                 "nullable": true,
-                "description": "HTML version of the content on page if `includeHtml`  is true"
+                "description": "HTML version of the content on page if the `html` format was specified"
               },
               "rawHtml": {
                 "type": "string",
                 "nullable": true,
-                "description": "Raw HTML content of the page if `includeRawHtml`  is true"
+                "description": "Raw HTML content of the page if the `rawHtml` format was specified"
+              },
+              "links": {
+                "type": "array",
+                "items": {
+                  "type": "string",
+                  "format": "uri"
+                },
+                "nullable": true,
+                "description": "Links on the page if the `links` format was specified"
+              },
+              "screenshot": {
+                "type": "string",
+                "nullable": true,
+                "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
               },
               "metadata": {
                 "type": "object",
@@ -780,27 +751,16 @@
                   "<any other metadata> ": {
                     "type": "string"
                   },
-                  "pageStatusCode": {
+                  "statusCode": {
                     "type": "integer",
                     "description": "The status code of the page"
                   },
-                  "pageError": {
+                  "error": {
                     "type": "string",
                     "nullable": true,
                     "description": "The error message of the page"
                   }
-                  
                 }
-              },
-              "llm_extraction": {
-                "type": "object",
-                "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
-                "nullable": true
-              },
-              "warning": {
-                "type": "string",
-                "nullable": true,
-                "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
               }
             }
           }
@@ -810,24 +770,33 @@
         "type": "object",
         "properties": {
           "markdown": {
-            "type": "string"
-          },
-          "content": {
-            "type": "string"
+            "type": "string",
+            "nullable": true,
+            "description": "Markdown content of the page if the `markdown` format was specified (default)"
           },
           "html": {
             "type": "string",
             "nullable": true,
-            "description": "HTML version of the content on page if `includeHtml`  is true"
+            "description": "HTML version of the content on page if the `html` format was specified"
           },
           "rawHtml": {
             "type": "string",
             "nullable": true,
-            "description": "Raw HTML content of the page if `includeRawHtml`  is true"
+            "description": "Raw HTML content of the page if the `rawHtml` format was specified"
           },
-          "index": {
-            "type": "integer",
-            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
+          "links": {
+            "type": "array",
+            "items": {
+              "type": "string",
+              "format": "uri"
+            },
+            "nullable": true,
+            "description": "Links on the page if the `links` format was specified"
+          },
+          "screenshot": {
+            "type": "string",
+            "nullable": true,
+            "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
           },
           "metadata": {
             "type": "object",
@@ -849,11 +818,11 @@
               "<any other metadata> ": {
                 "type": "string"
               },
-              "pageStatusCode": {
+              "statusCode": {
                 "type": "integer",
                 "description": "The status code of the page"
               },
-              "pageError": {
+              "error": {
                 "type": "string",
                 "nullable": true,
                 "description": "The error message of the page"
@@ -871,34 +840,63 @@
           "data": {
             "type": "array",
             "items": {
-              "type": "object",
-              "properties": {
-                "url": {
-                  "type": "string"
+              "markdown": {
+                "type": "string",
+                "nullable": true,
+                "description": "Markdown content of the page if the `markdown` format was specified (default)"
+              },
+              "html": {
+                "type": "string",
+                "nullable": true,
+                "description": "HTML version of the content on page if the `html` format was specified"
+              },
+              "rawHtml": {
+                "type": "string",
+                "nullable": true,
+                "description": "Raw HTML content of the page if the `rawHtml` format was specified"
+              },
+              "links": {
+                "type": "array",
+                "items": {
+                  "type": "string",
+                  "format": "uri"
                 },
-                "markdown": {
-                  "type": "string"
-                },
-                "content": {
-                  "type": "string"
-                },
-                "metadata": {
-                  "type": "object",
-                  "properties": {
-                    "title": {
-                      "type": "string"
-                    },
-                    "description": {
-                      "type": "string"
-                    },
-                    "language": {
-                      "type": "string",
-                      "nullable": true
-                    },
-                    "sourceURL": {
-                      "type": "string",
-                      "format": "uri"
-                    }
+                "nullable": true,
+                "description": "Links on the page if the `links` format was specified"
+              },
+              "screenshot": {
+                "type": "string",
+                "nullable": true,
+                "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified"
+              },
+              "metadata": {
+                "type": "object",
+                "properties": {
+                  "title": {
+                    "type": "string"
+                  },
+                  "description": {
+                    "type": "string"
+                  },
+                  "language": {
+                    "type": "string",
+                    "nullable": true
+                  },
+                  "sourceURL": {
+                    "type": "string",
+                    "format": "uri"
+                  },
+                  "<any other metadata> ": {
+                    "type": "string"
+                  },
+                  "statusCode": {
+                    "type": "integer",
+                    "description": "The status code of the page"
+                  },
+                  "error": {
+                    "type": "string",
+                    "nullable": true,
+                    "description": "The error message of the page"
                   }
                 }
               }
@@ -909,8 +907,15 @@
       "CrawlResponse": {
         "type": "object",
         "properties": {
-          "jobId": {
+          "success": {
+            "type": "boolean"
+          },
+          "id": {
             "type": "string"
+          },
+          "url": {
+            "type": "string",
+            "format": "uri"
           }
         }
       }
diff --git a/apps/api/package.json b/apps/api/package.json
index bd35dc65..732472e2 100644
--- a/apps/api/package.json
+++ b/apps/api/package.json
@@ -106,7 +106,7 @@
     "uuid": "^10.0.0",
     "wordpos": "^2.1.0",
     "xml2js": "^0.6.2",
-    "zod": "^3.23.4",
+    "zod": "^3.23.8",
     "zod-to-json-schema": "^3.23.1"
   },
   "nodemonConfig": {
diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml
index 4b590380..7b2e07fa 100644
--- a/apps/api/pnpm-lock.yaml
+++ b/apps/api/pnpm-lock.yaml
@@ -189,7 +189,7 @@ importers:
         specifier: ^0.6.2
         version: 0.6.2
       zod:
-        specifier: ^3.23.4
+        specifier: ^3.23.8
         version: 3.23.8
       zod-to-json-schema:
         specifier: ^3.23.1
diff --git a/apps/api/src/controllers/v1/crawl-status.ts b/apps/api/src/controllers/v1/crawl-status.ts
index e148f8db..4f65bdb1 100644
--- a/apps/api/src/controllers/v1/crawl-status.ts
+++ b/apps/api/src/controllers/v1/crawl-status.ts
@@ -1,86 +1,68 @@
-import { Request, Response } from "express";
-import { authenticateUser } from "./auth";
-import { RateLimiterMode } from "../../../src/types";
-import { Logger } from "../../../src/lib/logger";
+import { Response } from "express";
 import { v4 as uuidv4 } from "uuid";
+import { RequestWithAuth } from "./types";
 
-export async function crawlStatusController(req: Request, res: Response) {
-  // TODO: validate req.params.jobId
+export async function crawlStatusController(req: RequestWithAuth, res: Response) {
+  // const job = await getWebScraperQueue().getJob(req.params.jobId);
+  // if (!job) {
+  //   return res.status(404).json({ error: "Job not found" });
+  // }
 
-  try {    
-    const { success, team_id, error, status } = await authenticateUser(
-      req,
-      res,
-      RateLimiterMode.CrawlStatus
-    );
-    if (!success) {
-      return res.status(status).json({ error });
-    }
+  // const { current, current_url, total, current_step, partialDocs } = await job.progress();
 
-    // const job = await getWebScraperQueue().getJob(req.params.jobId);
-    // if (!job) {
-    //   return res.status(404).json({ error: "Job not found" });
-    // }
+  // let data = job.returnvalue;
+  // if (process.env.USE_DB_AUTHENTICATION === "true") {
+  //   const supabaseData = await supabaseGetJobById(req.params.jobId);
 
-    // const { current, current_url, total, current_step, partialDocs } = await job.progress();
+  //   if (supabaseData) {
+  //     data = supabaseData.docs;
+  //   }
+  // }
 
-    // let data = job.returnvalue;
-    // if (process.env.USE_DB_AUTHENTICATION === "true") {
-    //   const supabaseData = await supabaseGetJobById(req.params.jobId);
+  // const jobStatus = await job.getState();
 
-    //   if (supabaseData) {
-    //     data = supabaseData.docs;
-    //   }
-    // }
-
-    // const jobStatus = await job.getState();
-
-    // mock:
-    const id = uuidv4();
-    const result = {
-      totalCount: 100,
-      creditsUsed: 2,
-      expiresAt: new Date(Date.now() + 24 * 60 * 60 * 1000).getTime(),
-      status: "scraping", // scraping, completed, failed
-      next: `${req.protocol}://${req.get("host")}/v1/crawl/${id}`,
-      data: [{
-        markdown: "test",
-        content: "test",
-        html: "test",
-        rawHtml: "test",
-        linksOnPage: ["test1", "test2"],
-        screenshot: "test",
-        metadata: {
-          title: "test",
-          description: "test",
-          language: "test",
-          sourceURL: "test",
-          statusCode: 200,
-          error: "test"
-        }
-      },
-      {
-        markdown: "test",
-        content: "test",
-        html: "test",
-        rawHtml: "test",
-        linksOnPage: ["test1", "test2"],
-        screenshot: "test",
-        metadata: {
-          title: "test",
-          description: "test",
-          language: "test",
-          sourceURL: "test",
-          statusCode: 200,
-          error: "test"
-        }
-      }]
-    }
-
-    res.status(200).json(result);
-  } catch (error) {
-    Logger.error(error);
-    return res.status(500).json({ error: error.message });
+  // mock:
+  const id = uuidv4();
+  const result = {
+    totalCount: 100,
+    creditsUsed: 2,
+    expiresAt: new Date(Date.now() + 24 * 60 * 60 * 1000).getTime(),
+    status: "scraping", // scraping, completed, failed
+    next: `${req.protocol}://${req.get("host")}/v1/crawl/${id}`,
+    data: [{
+      markdown: "test",
+      content: "test",
+      html: "test",
+      rawHtml: "test",
+      linksOnPage: ["test1", "test2"],
+      screenshot: "test",
+      metadata: {
+        title: "test",
+        description: "test",
+        language: "test",
+        sourceURL: "test",
+        statusCode: 200,
+        error: "test"
+      }
+    },
+    {
+      markdown: "test",
+      content: "test",
+      html: "test",
+      rawHtml: "test",
+      linksOnPage: ["test1", "test2"],
+      screenshot: "test",
+      metadata: {
+        title: "test",
+        description: "test",
+        language: "test",
+        sourceURL: "test",
+        statusCode: 200,
+        error: "test"
+      }
+    }]
   }
+
+  res.status(200).json(result);
 }
 
diff --git a/apps/api/src/controllers/v1/crawl.ts b/apps/api/src/controllers/v1/crawl.ts
index b4ce293e..52fe22e6 100644
--- a/apps/api/src/controllers/v1/crawl.ts
+++ b/apps/api/src/controllers/v1/crawl.ts
@@ -1,140 +1,87 @@
-import { Request, Response } from "express";
-import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
-import { authenticateUser } from "./auth";
-import { RateLimiterMode } from "../../../src/types";
-import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
-import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
-import { createIdempotencyKey } from "../../../src/services/idempotency/create";
+import { Response } from "express";
 import { v4 as uuidv4 } from "uuid";
-import { Logger } from "../../../src/lib/logger";
-import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
-import { CrawlRequest, CrawlResponse } from "./types";
+import { CrawlRequest, crawlRequestSchema, CrawlResponse, legacyCrawlerOptions, legacyScrapeOptions, RequestWithAuth } from "./types";
+import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../lib/crawl-redis";
+import { logCrawl } from "../../services/logging/crawl_log";
+import { getScrapeQueue } from "../../services/queue-service";
+import { addScrapeJob } from "../../services/queue-jobs";
+import { Logger } from "../../lib/logger";
 
-export async function crawlController(req: Request<{}, {}, CrawlRequest>, res: Response<CrawlResponse>) {
-  // expected req.body
+export async function crawlController(req: RequestWithAuth<CrawlResponse, CrawlRequest>, res: Response<CrawlResponse>) {
+  req.body = crawlRequestSchema.parse(req.body);
+  
+  const id = uuidv4();
 
-  // req.body = {
-  //   url: string
-  //   crawlerOptions: {
-  //     includePaths: string[]
-  //     excludePaths: string[]
-  //     maxDepth: number
-  //     limit: number
-  //     allowBackwardLinks: boolean >> TODO: CHANGE THIS NAME???
-  //     allowExternalLinks: boolean
-  //     ignoreSitemap: number
-  //   }
-  //   scrapeOptions: Exclude<Scrape, "url">
-  // }
+  await logCrawl(id, req.auth.team_id);
 
+  const crawlerOptions = legacyCrawlerOptions(req.body.crawlerOptions),
+    pageOptions = legacyScrapeOptions(req.body.scrapeOptions);
+
+  const sc: StoredCrawl = {
+    originUrl: req.body.url,
+    crawlerOptions,
+    pageOptions,
+    team_id: req.auth.team_id,
+    createdAt: Date.now(),
+  };
+
+  const crawler = crawlToCrawler(id, sc);
 
   try {
-    const { success, team_id, error, status } = await authenticateUser(
-      req,
-      res,
-      RateLimiterMode.Crawl
-    );
-    if (!success) {
-      return res.status(status).json({ success: false, error });
-    }
-
-    if (req.headers["x-idempotency-key"]) {
-      const isIdempotencyValid = await validateIdempotencyKey(req);
-      if (!isIdempotencyValid) {
-        return res.status(409).json({ success: false, error: "Idempotency key already used" });
-      }
-      try {
-        createIdempotencyKey(req);
-      } catch (error) {
-        Logger.error(error);
-        return res.status(500).json({ success: false, error: error.message });
-      }
-    }
-
-    const { success: creditsCheckSuccess, message: creditsCheckMessage } =
-      await checkTeamCredits(team_id, 1);
-    if (!creditsCheckSuccess) {
-      return res.status(402).json({ success: false, error: "Insufficient credits" });
-    }
-
-    let url = req.body.url;
-    if (!url) {
-      return res.status(400).json({ success: false, error: "Url is required" });
-    }
-
-    if (isUrlBlocked(url)) {
-      return res
-        .status(403)
-        .json({
-          success: false,
-          error:
-            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
-        });
-    }
-
-    try {
-      url = checkAndUpdateURL(url).url;
-    } catch (error) {
-      return res.status(400).json({ success: false, error: 'Invalid Url' });
-    }
-
-    // TODO: add job to queue
-
-    const id = uuidv4();
-    return res.status(200).json({
-      success: true,
-      id,
-      url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}`,
-    });
-
-    // const mode = req.body.mode ?? "crawl";
-
-    // const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
-    // const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
-
-    // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
-    //   try {
-    //     const a = new WebScraperDataProvider();
-    //     await a.setOptions({
-    //       jobId: uuidv4(),
-    //       mode: "single_urls",
-    //       urls: [url],
-    //       crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
-    //       pageOptions: pageOptions,
-    //     });
-
-    //     const docs = await a.getDocuments(false, (progress) => {
-    //       job.progress({
-    //         current: progress.current,
-    //         total: progress.total,
-    //         current_step: "SCRAPING",
-    //         current_url: progress.currentDocumentUrl,
-    //       });
-    //     });
-    //     return res.json({
-    //       success: true,
-    //       documents: docs,
-    //     });
-    //   } catch (error) {
-    //     Logger.error(error);
-    //     return res.status(500).json({ error: error.message });
-    //   }
-    // }
-
-    // const job = await addWebScraperJob({
-    //   url: url,
-    //   mode: mode ?? "crawl", // fix for single urls not working
-    //   crawlerOptions: crawlerOptions,
-    //   team_id: team_id,
-    //   pageOptions: pageOptions,
-    //   origin: req.body.origin ?? defaultOrigin,
-    // });
-
-    // await logCrawl(job.id.toString(), team_id);
-
-    // res.json({ jobId: job.id });
-  } catch (error) {
-    Logger.error(error);
-    return res.status(500).json({ success: false, error: error.message });
+    sc.robots = await crawler.getRobotsTxt();
+  } catch (e) {
+    Logger.debug(`[Crawl] Failed to get robots.txt (this is probably fine!): ${JSON.stringify(e)}`);
   }
+
+  await saveCrawl(id, sc);
+
+  const sitemap = sc.crawlerOptions.ignoreSitemap ? null : await crawler.tryGetSitemap();
+
+  if (sitemap !== null) {
+    const jobs = sitemap.map(x => {
+      const url = x.url;
+      const uuid = uuidv4();
+      return {
+        name: uuid,
+        data: {
+          url,
+          mode: "single_urls",
+          team_id: req.auth.team_id,
+          crawlerOptions,
+          pageOptions,
+          origin: "api",
+          crawl_id: id,
+          sitemapped: true,
+        },
+        opts: {
+          jobId: uuid,
+          priority: 20,
+        }
+      };
+    })
+
+    await lockURLs(id, jobs.map(x => x.data.url));
+    await addCrawlJobs(id, jobs.map(x => x.opts.jobId));
+    await getScrapeQueue().addBulk(jobs);
+  } else {
+    await lockURL(id, sc, req.body.url);
+    const job = await addScrapeJob({
+      url: req.body.url,
+      mode: "single_urls",
+      crawlerOptions: crawlerOptions,
+      team_id: req.auth.team_id,
+      pageOptions: pageOptions,
+      origin: "api",
+      crawl_id: id,
+    }, {
+      priority: 15,
+    });
+    await addCrawlJob(id, job.id);
+  }
+
+  return res.status(200).json({
+    success: true,
+    id,
+    url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}`,
+  });
 }
diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts
index f4546abe..49bc86ed 100644
--- a/apps/api/src/controllers/v1/map.ts
+++ b/apps/api/src/controllers/v1/map.ts
@@ -1,12 +1,12 @@
 import { Request, Response } from "express";
-import { authenticateUser } from "./auth";
-import { RateLimiterMode } from "../../../src/types";
-import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
 import { Logger } from "../../../src/lib/logger";
 import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
-import { MapRequest, MapResponse } from "./types";
+import { MapRequest, mapRequestSchema, MapResponse, RequestWithAuth } from "./types";
+import { checkTeamCredits } from "../../services/billing/credit_billing";
 
-export async function mapController(req: Request<{}, MapResponse, MapRequest>, res: Response<MapResponse>) {
+export async function mapController(req: RequestWithAuth<MapResponse, MapRequest>, res: Response<MapResponse>) {
+  req.body = mapRequestSchema.parse(req.body);
+  console.log(req.body);
   // expected req.body
 
   // req.body = {
@@ -14,106 +14,53 @@ export async function mapController(req: Request<{}, MapResponse, MapRequest>, r
   //   crawlerOptions: 
   // }
 
-  try {
-    const { success, team_id, error, status } = await authenticateUser(
-      req,
-      res,
-      RateLimiterMode.Crawl
-    );
-    if (!success) {
-      return res.status(status).json({ success: false, error });
-    }
 
-    // if (req.headers["x-idempotency-key"]) {
-    //   const isIdempotencyValid = await validateIdempotencyKey(req);
-    //   if (!isIdempotencyValid) {
-    //     return res.status(409).json({ error: "Idempotency key already used" });
-    //   }
-    //   try {
-    //     createIdempotencyKey(req);
-    //   } catch (error) {
-    //     Logger.error(error);
-    //     return res.status(500).json({ error: error.message });
-    //   }
-    // }
+  return res.status(200).json({ success: true, links: [ "test1", "test2" ] });
 
-    // const { success: creditsCheckSuccess, message: creditsCheckMessage } =
-    //   await checkTeamCredits(team_id, 1);
-    // if (!creditsCheckSuccess) {
-    //   return res.status(402).json({ error: "Insufficient credits" });
-    // }
+  // const mode = req.body.mode ?? "crawl";
 
-    let url = req.body.url;
-    if (!url) {
-      return res.status(400).json({ success: false, error: "Url is required" });
-    }
+  // const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
+  // const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
 
-    if (isUrlBlocked(url)) {
-      return res
-        .status(403)
-        .json({
-          success: false,
-          error:
-            "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
-        });
-    }
+  // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
+  //   try {
+  //     const a = new WebScraperDataProvider();
+  //     await a.setOptions({
+  //       jobId: uuidv4(),
+  //       mode: "single_urls",
+  //       urls: [url],
+  //       crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
+  //       pageOptions: pageOptions,
+  //     });
 
-    try {
-      url = checkAndUpdateURL(url).url;
-    } catch (error) {
-      return res.status(400).json({ success: false, error: 'Invalid Url' });
-    }
+  //     const docs = await a.getDocuments(false, (progress) => {
+  //       job.progress({
+  //         current: progress.current,
+  //         total: progress.total,
+  //         current_step: "SCRAPING",
+  //         current_url: progress.currentDocumentUrl,
+  //       });
+  //     });
+  //     return res.json({
+  //       success: true,
+  //       documents: docs,
+  //     });
+  //   } catch (error) {
+  //     Logger.error(error);
+  //     return res.status(500).json({ error: error.message });
+  //   }
+  // }
 
-    return res.status(200).json({ success: true, links: [ "test1", "test2" ] });
+  // const job = await addWebScraperJob({
+  //   url: url,
+  //   mode: mode ?? "crawl", // fix for single urls not working
+  //   crawlerOptions: crawlerOptions,
+  //   team_id: team_id,
+  //   pageOptions: pageOptions,
+  //   origin: req.body.origin ?? defaultOrigin,
+  // });
 
-    // const mode = req.body.mode ?? "crawl";
+  // await logCrawl(job.id.toString(), team_id);
 
-    // const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
-    // const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
-
-    // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
-    //   try {
-    //     const a = new WebScraperDataProvider();
-    //     await a.setOptions({
-    //       jobId: uuidv4(),
-    //       mode: "single_urls",
-    //       urls: [url],
-    //       crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true },
-    //       pageOptions: pageOptions,
-    //     });
-
-    //     const docs = await a.getDocuments(false, (progress) => {
-    //       job.progress({
-    //         current: progress.current,
-    //         total: progress.total,
-    //         current_step: "SCRAPING",
-    //         current_url: progress.currentDocumentUrl,
-    //       });
-    //     });
-    //     return res.json({
-    //       success: true,
-    //       documents: docs,
-    //     });
-    //   } catch (error) {
-    //     Logger.error(error);
-    //     return res.status(500).json({ error: error.message });
-    //   }
-    // }
-
-    // const job = await addWebScraperJob({
-    //   url: url,
-    //   mode: mode ?? "crawl", // fix for single urls not working
-    //   crawlerOptions: crawlerOptions,
-    //   team_id: team_id,
-    //   pageOptions: pageOptions,
-    //   origin: req.body.origin ?? defaultOrigin,
-    // });
-
-    // await logCrawl(job.id.toString(), team_id);
-
-    // res.json({ jobId: job.id });
-  } catch (error) {
-    Logger.error(error);
-    return res.status(500).json({ success: false, error: error.message });
-  }
+  // res.json({ jobId: job.id });
 }
diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts
index e3cfcbdc..a61a7f6a 100644
--- a/apps/api/src/controllers/v1/scrape.ts
+++ b/apps/api/src/controllers/v1/scrape.ts
@@ -1,26 +1,11 @@
 import { Request, Response } from "express";
-import { authenticateUser } from "./auth";
-import { RateLimiterMode } from "../../types";
-import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
 import { Logger } from '../../lib/logger';
 import { checkAndUpdateURL } from '../../lib/validateUrl';
-import { ScrapeRequest, ScrapeResponse } from "./types";
+import { RequestWithAuth, ScrapeRequest, scrapeRequestSchema, ScrapeResponse } from "./types";
 
-export async function scrapeController(req: Request<{}, ScrapeResponse, ScrapeRequest>, res: Response<ScrapeResponse>) {
-  let url = req.body.url;
-  if (!url) {
-    return { success: false, error: "Url is required", returnCode: 400 };
-  }
-
-  if (isUrlBlocked(url)) {
-    return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 };
-  }
-
-  try {
-    url = checkAndUpdateURL(url).url;
-  } catch (error) {
-    return { success: false, error: "Invalid URL", returnCode: 400 };
-  }
+export async function scrapeController(req: RequestWithAuth<ScrapeResponse, ScrapeRequest>, res: Response<ScrapeResponse>) {
+  req.body = scrapeRequestSchema.parse(req.body);
+  console.log(req.body);
 
   // TODO: check req.body
   // mockup req.body
@@ -37,137 +22,124 @@ export async function scrapeController(req: Request<{}, ScrapeResponse, ScrapeRe
   //   waitFor: number
   // }
 
-  try {
-    let earlyReturn = false;
-    // make sure to authenticate user first, Bearer <token>
-    const { success, team_id, error, status, plan } = await authenticateUser(
-      req,
-      res,
-      RateLimiterMode.Scrape
-    );
-    if (!success) {
-      return res.status(status).json({ success: false, error });
-    }
+  let earlyReturn = false;
+  // make sure to authenticate user first, Bearer <token>
 
-    // check credits
+  // check credits
 
-    const result: ScrapeResponse = {
-      success: true,
-      warning: "test",
-      data: {
-        markdown: "test",
-        html: "test",
-        rawHtml: "test",
-        links: ["test1", "test2"],
-        screenshot: "test",
-        metadata: {
-          title: "test",
-          description: "test",
-          language: "test",
-          sourceURL: "test",
-          statusCode: 200,
-          error: "test"
-        }
+  const result: ScrapeResponse = {
+    success: true,
+    warning: "test",
+    data: {
+      markdown: "test",
+      html: "test",
+      rawHtml: "test",
+      links: ["test1", "test2"],
+      screenshot: "test",
+      metadata: {
+        title: "test",
+        description: "test",
+        language: "test",
+        sourceURL: "test",
+        statusCode: 200,
+        error: "test"
       }
     }
-
-    return res.status(200).json(result);
-
-    // const crawlerOptions = req.body.crawlerOptions ?? {};
-    // const pageOptions = { ...defaultPageOptions, ...req.body.pageOptions };
-    // const extractorOptions = { ...defaultExtractorOptions, ...req.body.extractorOptions };
-    // const origin = req.body.origin ?? defaultOrigin;
-    // let timeout = req.body.timeout ?? defaultTimeout;
-
-    // if (extractorOptions.mode.includes("llm-extraction")) {
-    //   pageOptions.onlyMainContent = true;
-    //   timeout = req.body.timeout ?? 90000;
-    // }
-
-    // const checkCredits = async () => {
-    //   try {
-    //     const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1);
-    //     if (!creditsCheckSuccess) {
-    //       earlyReturn = true;
-    //       return res.status(402).json({ error: "Insufficient credits" });
-    //     }
-    //   } catch (error) {
-    //     Logger.error(error);
-    //     earlyReturn = true;
-    //     return res.status(500).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." });
-    //   }
-    // };
-
-
-    // await checkCredits();
-
-    // const jobId = uuidv4();
-
-    // const startTime = new Date().getTime();
-    // const result = await scrapeHelper(
-    //   jobId,
-    //   req,
-    //   team_id,
-    //   crawlerOptions,
-    //   pageOptions,
-    //   extractorOptions,
-    //   timeout,
-    //   plan
-    // );
-    // const endTime = new Date().getTime();
-    // const timeTakenInSeconds = (endTime - startTime) / 1000;
-    // const numTokens = (result.data && result.data.markdown) ? numTokensFromString(result.data.markdown, "gpt-3.5-turbo") : 0;
-
-    // if (result.success) {
-    //   let creditsToBeBilled = 1; // Assuming 1 credit per document
-    //   const creditsPerLLMExtract = 50;
-
-    //   if (extractorOptions.mode.includes("llm-extraction")) {
-    //     // creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length);
-    //     creditsToBeBilled += creditsPerLLMExtract;
-    //   }
-
-    //   let startTimeBilling = new Date().getTime();
-
-    //   if (earlyReturn) {
-    //     // Don't bill if we're early returning
-    //     return;
-    //   }
-    //   const billingResult = await billTeam(
-    //     team_id,
-    //     creditsToBeBilled
-    //   );
-    //   if (!billingResult.success) {
-    //     return res.status(402).json({
-    //       success: false,
-    //       error: "Failed to bill team. Insufficient credits or subscription not found.",
-    //     });
-    //   }
-    // }
-
-    // logJob({
-    //   job_id: jobId,
-    //   success: result.success,
-    //   message: result.error,
-    //   num_docs: 1,
-    //   docs: [result.data],
-    //   time_taken: timeTakenInSeconds,
-    //   team_id: team_id,
-    //   mode: "scrape",
-    //   url: req.body.url,
-    //   crawlerOptions: crawlerOptions,
-    //   pageOptions: pageOptions,
-    //   origin: origin, 
-    //   extractor_options: extractorOptions,
-    //   num_tokens: numTokens,
-    // });
-
-    
-    // return res.status(result.returnCode).json(result);
-  } catch (error) {
-    Logger.error(error);
-    return res.status(500).json({ success: false, error: error.message });
   }
+
+  return res.status(200).json(result);
+
+  // const crawlerOptions = req.body.crawlerOptions ?? {};
+  // const pageOptions = { ...defaultPageOptions, ...req.body.pageOptions };
+  // const extractorOptions = { ...defaultExtractorOptions, ...req.body.extractorOptions };
+  // const origin = req.body.origin ?? defaultOrigin;
+  // let timeout = req.body.timeout ?? defaultTimeout;
+
+  // if (extractorOptions.mode.includes("llm-extraction")) {
+  //   pageOptions.onlyMainContent = true;
+  //   timeout = req.body.timeout ?? 90000;
+  // }
+
+  // const checkCredits = async () => {
+  //   try {
+  //     const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1);
+  //     if (!creditsCheckSuccess) {
+  //       earlyReturn = true;
+  //       return res.status(402).json({ error: "Insufficient credits" });
+  //     }
+  //   } catch (error) {
+  //     Logger.error(error);
+  //     earlyReturn = true;
+  //     return res.status(500).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." });
+  //   }
+  // };
+
+
+  // await checkCredits();
+
+  // const jobId = uuidv4();
+
+  // const startTime = new Date().getTime();
+  // const result = await scrapeHelper(
+  //   jobId,
+  //   req,
+  //   team_id,
+  //   crawlerOptions,
+  //   pageOptions,
+  //   extractorOptions,
+  //   timeout,
+  //   plan
+  // );
+  // const endTime = new Date().getTime();
+  // const timeTakenInSeconds = (endTime - startTime) / 1000;
+  // const numTokens = (result.data && result.data.markdown) ? numTokensFromString(result.data.markdown, "gpt-3.5-turbo") : 0;
+
+  // if (result.success) {
+  //   let creditsToBeBilled = 1; // Assuming 1 credit per document
+  //   const creditsPerLLMExtract = 50;
+
+  //   if (extractorOptions.mode.includes("llm-extraction")) {
+  //     // creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length);
+  //     creditsToBeBilled += creditsPerLLMExtract;
+  //   }
+
+  //   let startTimeBilling = new Date().getTime();
+
+  //   if (earlyReturn) {
+  //     // Don't bill if we're early returning
+  //     return;
+  //   }
+  //   const billingResult = await billTeam(
+  //     team_id,
+  //     creditsToBeBilled
+  //   );
+  //   if (!billingResult.success) {
+  //     return res.status(402).json({
+  //       success: false,
+  //       error: "Failed to bill team. Insufficient credits or subscription not found.",
+  //     });
+  //   }
+  // }
+
+  // logJob({
+  //   job_id: jobId,
+  //   success: result.success,
+  //   message: result.error,
+  //   num_docs: 1,
+  //   docs: [result.data],
+  //   time_taken: timeTakenInSeconds,
+  //   team_id: team_id,
+  //   mode: "scrape",
+  //   url: req.body.url,
+  //   crawlerOptions: crawlerOptions,
+  //   pageOptions: pageOptions,
+  //   origin: origin, 
+  //   extractor_options: extractorOptions,
+  //   num_tokens: numTokens,
+  // });
+
+  
+  // return res.status(result.returnCode).json(result);
 }
 
 
diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts
index 07d56e17..bd8e44f6 100644
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@@ -1,36 +1,96 @@
+import { Request } from "express";
+import { z } from "zod";
+import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
+import { PageOptions } from "../../lib/entities";
+
 export type Format = "markdown" | "html" | "rawHtml" | "links" | "screenshot" | "screenshot@fullPage";
 
-export type ScrapeRequest = {
-  url: string;
-  formats?: Format[];
-  headers?: { [K: string]: string };
-  includeTags?: string[];
-  excludeTags?: string[];
-  onlyMainContent?: boolean;
-  timeout?: number;
-  waitFor?: number;
-}
+const url = z.preprocess(x => {
+  if (typeof x === "string" && !/^([^.:]+:\/\/)/.test(x)) {
+    if (x.startsWith("://")) {
+      return "http" + x;
+    } else {
+      return "http://" + x;
+    }
+  } else {
+    return x;
+  }
+}, z.string().url().regex(/^https?:\/\//, "URL uses unsupported protocol").refine(x => !isUrlBlocked(x), "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."));
 
-export type CrawlerOptions = {
-  includePaths?: string[];
-  excludePaths?: string[];
-  maxDepth?: number;
-  limit?: number;
-  allowBackwardLinks?: boolean; // >> TODO: CHANGE THIS NAME???
-  allowExternalLinks?: boolean;
-  ignoreSitemap?: boolean;
-};
+export const scrapeOptions = z.object({
+  formats: z.enum(["markdown", "html", "rawHtml", "links", "screenshot", "screenshot@fullPage"])
+    .array()
+    .optional()
+    .default(["markdown"]),
+  headers: z.record(z.string(), z.string()).optional(),
+  includeTags: z.string().array().optional(),
+  excludeTags: z.string().array().optional(),
+  onlyMainContent: z.boolean().default(true),
+  timeout: z.number().int().positive().finite().safe().default(30000), // default?
+  waitFor: z.number().int().nonnegative().finite().safe().default(0),
+});
 
-export type CrawlRequest = {
-  url: string;
-  crawlerOptions?: CrawlerOptions;
-  scrapeOptions?: Exclude<ScrapeRequest, "url">;
-};
+export type ScrapeOptions = z.infer<typeof scrapeOptions>;
 
-export type MapRequest = {
-  url: string;
-  crawlerOptions?: CrawlerOptions;
-};
+export const scrapeRequestSchema = scrapeOptions.extend({ url });
+
+// export type ScrapeRequest = {
+//   url: string;
+//   formats?: Format[];
+//   headers?: { [K: string]: string };
+//   includeTags?: string[];
+//   excludeTags?: string[];
+//   onlyMainContent?: boolean;
+//   timeout?: number;
+//   waitFor?: number;
+// }
+
+export type ScrapeRequest = z.infer<typeof scrapeRequestSchema>;
+
+const crawlerOptions = z.object({
+  includePaths: z.string().array().default([]),
+  excludePaths: z.string().array().default([]),
+  maxDepth: z.number().default(10), // default?
+  limit: z.number().default(10000), // default?
+  allowBackwardLinks: z.boolean().default(false), // >> TODO: CHANGE THIS NAME???
+  allowExternalLinks: z.boolean().default(false),
+  ignoreSitemap: z.boolean().default(true),
+});
+
+// export type CrawlerOptions = {
+//   includePaths?: string[];
+//   excludePaths?: string[];
+//   maxDepth?: number;
+//   limit?: number;
+//   allowBackwardLinks?: boolean; // >> TODO: CHANGE THIS NAME???
+//   allowExternalLinks?: boolean;
+//   ignoreSitemap?: boolean;
+// };
+
+export type CrawlerOptions = z.infer<typeof crawlerOptions>;
+
+export const crawlRequestSchema = z.object({
+  url,
+  crawlerOptions: crawlerOptions.default({}),
+  scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
+});
+
+// export type CrawlRequest = {
+//   url: string;
+//   crawlerOptions?: CrawlerOptions;
+//   scrapeOptions?: Exclude<ScrapeRequest, "url">;
+// };
+
+export type CrawlRequest = z.infer<typeof crawlRequestSchema>;
+
+export const mapRequestSchema = crawlerOptions.extend({ url });
+
+// export type MapRequest = {
+//   url: string;
+//   crawlerOptions?: CrawlerOptions;
+// };
+
+export type MapRequest = z.infer<typeof mapRequestSchema>;
 
 export type Document = {
   markdown?: string,
@@ -77,6 +137,7 @@ export type Document = {
 export type ErrorResponse = {
   success: false;
   error: string;
+  details?: any;
 };
 
 export type ScrapeResponse = ErrorResponse | {
@@ -95,3 +156,40 @@ export type MapResponse = ErrorResponse | {
   success: true;
   links: string[];
 }
+
+type AuthObject = {
+  team_id: string;
+  plan: string;
+}
+
+export interface RequestWithMaybeAuth<ReqBody = undefined, ResBody = undefined> extends Request<{}, ReqBody, ResBody> {
+  auth?: AuthObject;
+}
+
+export interface RequestWithAuth<ReqBody = undefined, ResBody = undefined> extends Request<{}, ReqBody, ResBody> {
+  auth: AuthObject;
+}
+
+export function legacyCrawlerOptions(x: CrawlerOptions) {
+  return {
+    includes: x.includePaths,
+    excludes: x.excludePaths,
+    maxCrawledLinks: x.limit,
+    maxCrawledDepth: x.maxDepth,
+    limit: x.limit,
+    generateImgAltText: false,
+    allowBackwardCrawling: x.allowBackwardLinks,
+    allowExternalContentLinks: x.allowExternalLinks,
+  };
+}
+
+export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
+  return {
+    includeHtml: x.formats.includes("html"),
+    includeRawHtml: x.formats.includes("rawHtml"),
+    onlyIncludeTags: x.includeTags,
+    removeTags: x.excludeTags,
+    onlyMainContent: x.onlyMainContent,
+    waitFor: x.waitFor,
+  };
+}
diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts
index 5099fee9..c3ea8633 100644
--- a/apps/api/src/routes/v1.ts
+++ b/apps/api/src/routes/v1.ts
@@ -1,9 +1,18 @@
-import express from "express";
+import express, { NextFunction, Request, Response } from "express";
 import { crawlController } from "../../src/controllers/v1/crawl";
 // import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
 import { scrapeController } from "../../src/controllers/v1/scrape";
 import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
 import { mapController } from "../../src/controllers/v1/map";
+import { ErrorResponse, RequestWithAuth, RequestWithMaybeAuth } from "../controllers/v1/types";
+import { RateLimiterMode } from "../types";
+import { authenticateUser } from "../controllers/v1/auth";
+import { Logger } from "../lib/logger";
+import { createIdempotencyKey } from "../services/idempotency/create";
+import { validateIdempotencyKey } from "../services/idempotency/validate";
+import { ZodError } from "zod";
+import { checkTeamCredits } from "../services/billing/credit_billing";
+import { v4 as uuidv4 } from "uuid";
 // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
 // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
 // import { searchController } from "../../src/controllers/v1/search";
@@ -12,13 +21,96 @@ import { mapController } from "../../src/controllers/v1/map";
 // import { livenessController } from "../controllers/v1/liveness";
 // import { readinessController } from "../controllers/v1/readiness";
 
+function checkCreditsMiddleware(minimum: number): (req: RequestWithAuth, res: Response, next: NextFunction) => void {
+    return (req, res, next) => {
+        (async () => {
+            if (!(await checkTeamCredits(req.auth.team_id, minimum)).success) {
+                return res.status(402).json({ success: false, error: "Insufficient credits" });
+            }
+            next();
+        })()
+            .catch(err => next(err));
+    };
+}
+
+function authMiddleware(rateLimiterMode: RateLimiterMode): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void {
+    return (req, res, next) => {
+        (async () => {
+            const { success, team_id, error, status, plan } = await authenticateUser(
+                req,
+                res,
+                rateLimiterMode,
+            );
+
+            if (!success) {
+                return res.status(status).json({ success: false, error });
+            }
+
+            req.auth = { team_id, plan };
+            next();
+        })()
+            .catch(err => next(err));
+    }
+}
+
+function idempotencyMiddleware(req: Request, res: Response, next: NextFunction) {
+    (async () => {
+        if (req.headers["x-idempotency-key"]) {
+            const isIdempotencyValid = await validateIdempotencyKey(req);
+            if (!isIdempotencyValid) {
+                return res.status(409).json({ success: false, error: "Idempotency key already used" });
+            }
+            // try {
+            createIdempotencyKey(req);
+            // } catch (error) {
+            //     Logger.error(error);
+            //     return res.status(500).json({ success: false, error: error.message });
+            // }
+        }
+        next();
+    })()
+        .catch(err => next(err));
+}
+
+function wrap(controller: (req: Request, res: Response) => Promise<any>): (req: Request, res: Response, next: NextFunction) => any {
+    return (req, res, next) => {
+        controller(req, res)
+            .catch(err => next(err))
+    }
+}
+
 export const v1Router = express.Router();
 
-v1Router.post("/v1/scrape", scrapeController);
-v1Router.post("/v1/crawl", crawlController);
-v1Router.get("/v1/crawl/:jobId", crawlStatusController);
+v1Router.post(
+    "/v1/scrape",
+    authMiddleware(RateLimiterMode.Scrape),
+    checkCreditsMiddleware(1),
+    wrap(scrapeController)
+);
+
+v1Router.post(
+    "/v1/crawl",
+    authMiddleware(RateLimiterMode.Crawl),
+    idempotencyMiddleware,
+    checkCreditsMiddleware(1),
+    wrap(crawlController)
+);
+
+v1Router.post(
+    "/v1/map",
+    authMiddleware(RateLimiterMode.Crawl),
+    checkCreditsMiddleware(1),
+    wrap(mapController)
+);
+
+v1Router.get(
+    "/v1/crawl/:jobId",
+    authMiddleware(RateLimiterMode.CrawlStatus),
+    wrap(crawlStatusController)
+);
+
 // v1Router.post("/v1/crawlWebsitePreview", crawlPreviewController);
-// v1Router.delete("/v1/crawl/cancel/:jobId", crawlCancelController);
+// v1Router.delete("/v1/crawl/:jobId", crawlCancelController);
 // v1Router.get("/v1/checkJobStatus/:jobId", crawlJobStatusPreviewController);
 
 // // Auth route for key based authentication
@@ -31,4 +123,12 @@ v1Router.get("/v1/crawl/:jobId", crawlStatusController);
 // v1Router.get("/v1/health/liveness", livenessController);
 // v1Router.get("/v1/health/readiness", readinessController);
 
-v1Router.post("/v1/map", mapController);
\ No newline at end of file
+v1Router.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: Response<ErrorResponse>, next: NextFunction) => {
+    if (err instanceof ZodError) {
+        res.status(400).json({ success: false, error: "Bad Request", details: err.errors });
+    } else {
+        const id = uuidv4();
+        Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id  + " -- " + JSON.stringify(err));
+        res.status(500).json({ success: false, error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id + "" });
+    }
+});