update to includeRawHtml

This commit is contained in:
Eric Ciarla 2024-06-28 17:07:47 -04:00
parent 70fcf2ce03
commit 87b54488d3
3 changed files with 5 additions and 5 deletions

View File

@ -66,7 +66,7 @@ export async function scrapeHelper(
} }
// Remove rawHtml if pageOptions.rawHtml is false // Remove rawHtml if pageOptions.rawHtml is false
if (!pageOptions.rawHtml) { if (!pageOptions.includeRawHtml) {
filteredDocs.forEach(doc => { filteredDocs.forEach(doc => {
delete doc.rawHtml; delete doc.rawHtml;
}); });

View File

@ -13,7 +13,7 @@ export interface Progress {
export type PageOptions = { export type PageOptions = {
onlyMainContent?: boolean; onlyMainContent?: boolean;
includeHtml?: boolean; includeHtml?: boolean;
rawHtml?: boolean; includeRawHtml?: boolean;
fallback?: boolean; fallback?: boolean;
fetchPageContent?: boolean; fetchPageContent?: boolean;
waitFor?: number; waitFor?: number;

View File

@ -303,7 +303,7 @@ export async function scrapSingleUrl(
pageOptions: PageOptions = { pageOptions: PageOptions = {
onlyMainContent: true, onlyMainContent: true,
includeHtml: false, includeHtml: false,
rawHtml: false, includeRawHtml: false,
waitFor: 0, waitFor: 0,
screenshot: false, screenshot: false,
headers: undefined headers: undefined
@ -469,7 +469,7 @@ export async function scrapSingleUrl(
content: text, content: text,
markdown: text, markdown: text,
html: pageOptions.includeHtml ? html : undefined, html: pageOptions.includeHtml ? html : undefined,
rawHtml: pageOptions.rawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined, rawHtml: pageOptions.includeRawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined,
metadata: { metadata: {
...metadata, ...metadata,
screenshot: screenshot, screenshot: screenshot,
@ -483,7 +483,7 @@ export async function scrapSingleUrl(
content: text, content: text,
markdown: text, markdown: text,
html: pageOptions.includeHtml ? html : undefined, html: pageOptions.includeHtml ? html : undefined,
rawHtml: pageOptions.rawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined, rawHtml: pageOptions.includeRawHtml || extractorOptions.mode === "llm-extraction-from-raw-html" ? rawHtml : undefined,
metadata: { metadata: {
...metadata, ...metadata,
sourceURL: urlToScrap, sourceURL: urlToScrap,