From 87b54488d3c76eff954bb11442259f9db776117f Mon Sep 17 00:00:00 2001 From: Eric Ciarla Date: Fri, 28 Jun 2024 17:07:47 -0400 Subject: [PATCH] update to includeRawHtml --- apps/api/src/controllers/scrape.ts | 2 +- apps/api/src/lib/entities.ts | 2 +- apps/api/src/scraper/WebScraper/single_url.ts | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index cdee8663..56ea1d5d 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -66,7 +66,7 @@ export async function scrapeHelper( } // Remove rawHtml if pageOptions.rawHtml is false - if (!pageOptions.rawHtml) { + if (!pageOptions.includeRawHtml) { filteredDocs.forEach(doc => { delete doc.rawHtml; }); diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index 8b2bd767..d2b3b002 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -13,7 +13,7 @@ export interface Progress { export type PageOptions = { onlyMainContent?: boolean; includeHtml?: boolean; - rawHtml?: boolean; + includeRawHtml?: boolean; fallback?: boolean; fetchPageContent?: boolean; waitFor?: number; diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index 48a8a57c..a4e4217a 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -303,7 +303,7 @@ export async function scrapSingleUrl( pageOptions: PageOptions = { onlyMainContent: true, includeHtml: false, - rawHtml: false, + includeRawHtml: false, waitFor: 0, screenshot: false, headers: undefined @@ -469,7 +469,7 @@ export async function scrapSingleUrl( content: text, markdown: text, html: pageOptions.includeHtml ? html : undefined, - rawHtml: pageOptions.rawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined, + rawHtml: pageOptions.includeRawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined, metadata: { ...metadata, screenshot: screenshot, @@ -483,7 +483,7 @@ export async function scrapSingleUrl( content: text, markdown: text, html: pageOptions.includeHtml ? html : undefined, - rawHtml: pageOptions.rawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined, + rawHtml: pageOptions.includeRawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined, metadata: { ...metadata, sourceURL: urlToScrap,