diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml
index 32a92745..ff485f28 100644
--- a/apps/api/pnpm-lock.yaml
+++ b/apps/api/pnpm-lock.yaml
@@ -6801,7 +6801,7 @@ packages:
       handlebars: 4.7.8
       openai: 3.3.0
       sbd: 1.0.19
-      typescript: 5.4.5
+      typescript: 5.5.3
       uuid: 9.0.1
       zod: 3.23.8
     transitivePeerDependencies:
@@ -7767,6 +7767,12 @@ packages:
     engines: {node: '>=14.17'}
     hasBin: true
 
+  /typescript@5.5.3:
+    resolution: {integrity: sha512-/hreyEujaB0w76zKo6717l3L0o/qEUtRgdvUBvlkhoWeOVMjMuHNHk0BRBzikzuGDqNmPQbg5ifMEqsHLiIUcQ==}
+    engines: {node: '>=14.17'}
+    hasBin: true
+    dev: false
+
   /typesense@1.8.2(@babel/runtime@7.24.6):
     resolution: {integrity: sha512-aBpePjA99Qvo+OP2pJwMpvga4Jrm1Y2oV5NsrWXBxlqUDNEUCPZBIksPv2Hq0jxQxHhLLyJVbjXjByXsvpCDVA==}
     engines: {node: '>=18'}
diff --git a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts
index c8281edd..af7fe4a3 100644
--- a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts
@@ -131,6 +131,28 @@ describe("E2E Tests for API Routes", () => {
       expect(response.body.data.metadata.pageStatusCode).toBe(200);
       expect(response.body.data.metadata.pageError).toBeUndefined();
     }, 30000); // 30 seconds timeout
+
+    it.concurrent("should return a successful response with a valid API key and includeRawHtml set to true", async () => {
+      const response = await request(TEST_URL)
+        .post("/v0/scrape")
+        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+        .set("Content-Type", "application/json")
+        .send({
+          url: "https://roastmywebsite.ai",
+          pageOptions: { includeRawHtml: true },
+        });
+      expect(response.statusCode).toBe(200);
+      expect(response.body).toHaveProperty("data");
+      expect(response.body.data).toHaveProperty("content");
+      expect(response.body.data).toHaveProperty("markdown");
+      expect(response.body.data).toHaveProperty("rawHtml");
+      expect(response.body.data).toHaveProperty("metadata");
+      expect(response.body.data.content).toContain("_Roast_");
+      expect(response.body.data.markdown).toContain("_Roast_");
+      expect(response.body.data.rawHtml).toContain("<h1");
+      expect(response.body.data.metadata.pageStatusCode).toBe(200);
+      expect(response.body.data.metadata.pageError).toBeUndefined();
+    }, 30000); // 30 seconds timeout
     
    it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
       const response = await request(TEST_URL)
@@ -1177,6 +1199,47 @@ describe("E2E Tests for API Routes", () => {
       expect(llmExtraction.is_open_source).toBe(false);
       expect(typeof llmExtraction.is_open_source).toBe("boolean");
     }, 60000); // 60 secs
+
+    it.concurrent("should extract data using LLM extraction mode with RawHtml", async () => {
+      const response = await request(TEST_URL)
+        .post("/v0/scrape")
+        .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+        .set("Content-Type", "application/json")
+        .send({
+          url: "https://mendable.ai",
+    
+          extractorOptions: {
+            mode: "llm-extraction-from-raw-html",
+            extractionPrompt:
+              "Based on the information on the page, what are the primary and secondary CTA buttons?",
+            extractionSchema: {
+              type: "object",
+              properties: {
+                primary_cta: {
+                  type: "string",
+                },
+                secondary_cta: {
+                  type: "string",
+                },
+              },
+              required: ["primary_cta", "secondary_cta"],
+            },
+          },
+        });
+
+      // Ensure that the job was successfully created before proceeding with LLM extraction
+      expect(response.statusCode).toBe(200);
+
+      // Assuming the LLM extraction object is available in the response body under `data.llm_extraction`
+      let llmExtraction = response.body.data.llm_extraction;
+
+      // Check if the llm_extraction object has the required properties with correct types and values
+      expect(llmExtraction).toHaveProperty("primary_cta");
+      expect(typeof llmExtraction.primary_cta).toBe("string");
+      expect(llmExtraction).toHaveProperty("secondary_cta");
+      expect(typeof llmExtraction.secondary_cta).toBe("string");
+   
+    }, 60000); // 60 secs
   });
 
   // describe("POST /v0/scrape for Top 100 Companies", () => {
diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts
index d394efe8..f5e2c322 100644
--- a/apps/api/src/controllers/scrape.ts
+++ b/apps/api/src/controllers/scrape.ts
@@ -58,19 +58,27 @@ export async function scrapeHelper(
   }
 
   // make sure doc.content is not empty
-  const filteredDocs = docs.filter(
+  let filteredDocs = docs.filter(
     (doc: { content?: string }) => doc.content && doc.content.trim().length > 0
   );
   if (filteredDocs.length === 0) {
     return { success: true, error: "No page found", returnCode: 200, data: docs[0] };
   }
 
+ 
+  // Remove rawHtml if pageOptions.rawHtml is false and extractorOptions.mode is llm-extraction-from-raw-html
+  if (!pageOptions.includeRawHtml && extractorOptions.mode == "llm-extraction-from-raw-html") {
+    filteredDocs.forEach(doc => {
+      delete doc.rawHtml;
+    });
+  }
+
   let creditsToBeBilled = filteredDocs.length;
   const creditsPerLLMExtract = 50;
 
 
 
-  if (extractorOptions.mode === "llm-extraction") {
+  if (extractorOptions.mode === "llm-extraction" || extractorOptions.mode === "llm-extraction-from-raw-html" || extractorOptions.mode === "llm-extraction-from-markdown") {
     creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length);
   }
 
diff --git a/apps/api/src/lib/LLM-extraction/index.ts b/apps/api/src/lib/LLM-extraction/index.ts
index 6614dbdf..2156fb3c 100644
--- a/apps/api/src/lib/LLM-extraction/index.ts
+++ b/apps/api/src/lib/LLM-extraction/index.ts
@@ -8,7 +8,8 @@ import { Document, ExtractorOptions } from "../entities";
 // Generate completion using OpenAI
 export async function generateCompletions(
   documents: Document[],
-  extractionOptions: ExtractorOptions
+  extractionOptions: ExtractorOptions,
+  mode: "markdown" | "raw-html"
 ): Promise<Document[]> {
   // const schema = zodToJsonSchema(options.schema)
 
@@ -28,6 +29,7 @@ export async function generateCompletions(
             document: document,
             schema: schema,
             prompt: prompt,
+            mode: mode,
           });
           // Validate the JSON output against the schema using AJV
           const validate = ajv.compile(schema);
diff --git a/apps/api/src/lib/LLM-extraction/models.ts b/apps/api/src/lib/LLM-extraction/models.ts
index 1434e35e..8de8ee4b 100644
--- a/apps/api/src/lib/LLM-extraction/models.ts
+++ b/apps/api/src/lib/LLM-extraction/models.ts
@@ -13,26 +13,37 @@ const defaultPrompt =
   "You are a professional web scraper. Extract the contents of the webpage";
 
 function prepareOpenAIDoc(
-  document: Document
+  document: Document,
+  mode: "markdown" | "raw-html"
 ): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] {
+
   let markdown = document.markdown;
 
-// Check if the markdown content exists in the document
-  if (!markdown) {
+  let extractionTarget = document.markdown;
+
+  if (mode === "raw-html") {
+    extractionTarget = document.rawHtml;
+  }
+
+  // Check if the markdown content exists in the document
+  if (!extractionTarget) {
     throw new Error(
-      "Markdown content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai"
+      `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai`
     );
   }
 
+
+
+
   // count number of tokens
-  const numTokens = numTokensFromString(document.markdown, "gpt-4");
+  const numTokens = numTokensFromString(extractionTarget, "gpt-4");
 
   if (numTokens > maxTokens) {
     // trim the document to the maximum number of tokens, tokens != characters
-    markdown = markdown.slice(0, (maxTokens * modifier));
+    extractionTarget = extractionTarget.slice(0, (maxTokens * modifier));
   }
 
-  return [[{ type: "text", text: markdown }], numTokens];
+  return [[{ type: "text", text: extractionTarget }], numTokens];
 }
 
 export async function generateOpenAICompletions({
@@ -42,6 +53,7 @@ export async function generateOpenAICompletions({
   schema, //TODO - add zod dynamic type checking
   prompt = defaultPrompt,
   temperature,
+  mode
 }: {
   client: OpenAI;
   model?: string;
@@ -49,9 +61,10 @@ export async function generateOpenAICompletions({
   schema: any; // This should be replaced with a proper Zod schema type when available
   prompt?: string;
   temperature?: number;
+  mode: "markdown" | "raw-html";
 }): Promise<Document> {
   const openai = client as OpenAI;
-  const [content, numTokens] = prepareOpenAIDoc(document);
+  const [content, numTokens] = prepareOpenAIDoc(document, mode);
 
   const completion = await openai.chat.completions.create({
     model,
diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts
index 2f43b9a4..d2b3b002 100644
--- a/apps/api/src/lib/entities.ts
+++ b/apps/api/src/lib/entities.ts
@@ -13,6 +13,7 @@ export interface Progress {
 export type PageOptions = {
   onlyMainContent?: boolean;
   includeHtml?: boolean;
+  includeRawHtml?: boolean;
   fallback?: boolean;
   fetchPageContent?: boolean;
   waitFor?: number;
@@ -25,7 +26,7 @@ export type PageOptions = {
 };
 
 export type ExtractorOptions = {
-  mode: "markdown" | "llm-extraction";
+  mode: "markdown" | "llm-extraction" | "llm-extraction-from-markdown" | "llm-extraction-from-raw-html";
   extractionPrompt?: string;
   extractionSchema?: Record<string, any>;
 }
@@ -73,6 +74,7 @@ export class Document {
   content: string;
   markdown?: string;
   html?: string;
+  rawHtml?: string;
   llm_extraction?: Record<string, any>;
   createdAt?: Date;
   updatedAt?: Date;
diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts
index 9e318505..3badfa19 100644
--- a/apps/api/src/scraper/WebScraper/index.ts
+++ b/apps/api/src/scraper/WebScraper/index.ts
@@ -66,6 +66,7 @@ export class WebScraperDataProvider {
           const result = await scrapSingleUrl(
             url,
             this.pageOptions,
+            this.extractorOptions,
             existingHTML
           );
           processedUrls++;
@@ -269,10 +270,16 @@ export class WebScraperDataProvider {
     // documents = await this.applyImgAltText(documents);
 
     if (
-      this.extractorOptions.mode === "llm-extraction" &&
+      (this.extractorOptions.mode === "llm-extraction" || this.extractorOptions.mode === "llm-extraction-from-markdown") &&
       this.mode === "single_urls"
     ) {
-      documents = await generateCompletions(documents, this.extractorOptions);
+      documents = await generateCompletions(documents, this.extractorOptions, "markdown");
+    }
+    if (
+      (this.extractorOptions.mode === "llm-extraction-from-raw-html") &&
+      this.mode === "single_urls"
+    ) {
+      documents = await generateCompletions(documents, this.extractorOptions, "raw-html");
     }
     return documents.concat(pdfDocuments).concat(docxDocuments);
   }
diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts
index 2d66315c..c4496ce0 100644
--- a/apps/api/src/scraper/WebScraper/single_url.ts
+++ b/apps/api/src/scraper/WebScraper/single_url.ts
@@ -2,7 +2,7 @@ import * as cheerio from "cheerio";
 import { ScrapingBeeClient } from "scrapingbee";
 import { extractMetadata } from "./utils/metadata";
 import dotenv from "dotenv";
-import { Document, PageOptions, FireEngineResponse } from "../../lib/entities";
+import { Document, PageOptions, FireEngineResponse, ExtractorOptions } from "../../lib/entities";
 import { parseMarkdown } from "../../lib/html-to-markdown";
 import { urlSpecificParams } from "./utils/custom/website_params";
 import { fetchAndProcessPdf } from "./utils/pdfProcessor";
@@ -348,10 +348,14 @@ export async function scrapSingleUrl(
   pageOptions: PageOptions = {
     onlyMainContent: true,
     includeHtml: false,
+    includeRawHtml: false,
     waitFor: 0,
     screenshot: false,
     headers: undefined,
   },
+  extractorOptions: ExtractorOptions = {
+    mode: "llm-extraction-from-markdown"
+  },
   existingHtml: string = ""
 ): Promise<Document> {
   urlToScrap = urlToScrap.trim();
@@ -517,8 +521,10 @@ export async function scrapSingleUrl(
       if (attempt.pageStatusCode) {
         pageStatusCode = attempt.pageStatusCode;
       }
-      if (attempt.pageError) {
+      if (attempt.pageError && attempt.pageStatusCode != 200) {
         pageError = attempt.pageError;
+      } else {
+        pageError = undefined;
       }
 
       if (text && text.trim().length >= 100) break;
@@ -542,6 +548,7 @@ export async function scrapSingleUrl(
         content: text,
         markdown: text,
         html: pageOptions.includeHtml ? html : undefined,
+        rawHtml: pageOptions.includeRawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined,
         metadata: {
           ...metadata,
           screenshot: screenshot,
@@ -555,6 +562,7 @@ export async function scrapSingleUrl(
         content: text,
         markdown: text,
         html: pageOptions.includeHtml ? html : undefined,
+        rawHtml: pageOptions.includeRawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined,
         metadata: {
           ...metadata,
           sourceURL: urlToScrap,