update to includeRawHtml

This commit is contained in:
Eric Ciarla 2024-06-28 17:07:47 -04:00
parent 70fcf2ce03
commit 87b54488d3
3 changed files with 5 additions and 5 deletions

View File

@ -66,7 +66,7 @@ export async function scrapeHelper(
}
// Remove rawHtml if pageOptions.rawHtml is false
if (!pageOptions.rawHtml) {
if (!pageOptions.includeRawHtml) {
filteredDocs.forEach(doc => {
delete doc.rawHtml;
});

View File

@ -13,7 +13,7 @@ export interface Progress {
export type PageOptions = {
onlyMainContent?: boolean;
includeHtml?: boolean;
rawHtml?: boolean;
includeRawHtml?: boolean;
fallback?: boolean;
fetchPageContent?: boolean;
waitFor?: number;

View File

@ -303,7 +303,7 @@ export async function scrapSingleUrl(
pageOptions: PageOptions = {
onlyMainContent: true,
includeHtml: false,
rawHtml: false,
includeRawHtml: false,
waitFor: 0,
screenshot: false,
headers: undefined
@ -469,7 +469,7 @@ export async function scrapSingleUrl(
content: text,
markdown: text,
html: pageOptions.includeHtml ? html : undefined,
rawHtml: pageOptions.rawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined,
rawHtml: pageOptions.includeRawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined,
metadata: {
...metadata,
screenshot: screenshot,
@ -483,7 +483,7 @@ export async function scrapSingleUrl(
content: text,
markdown: text,
html: pageOptions.includeHtml ? html : undefined,
rawHtml: pageOptions.rawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined,
rawHtml: pageOptions.includeRawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined,
metadata: {
...metadata,
sourceURL: urlToScrap,