mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 22:59:04 +08:00
includehtml fix
This commit is contained in:
parent
5818236659
commit
fe2e8c0b7a
@ -44,7 +44,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
|
||||
}
|
||||
|
||||
const crawlerOptions = req.body.crawlerOptions ?? {};
|
||||
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false, removeTags: [] };
|
||||
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: true, removeTags: [] };
|
||||
|
||||
// if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
|
||||
// try {
|
||||
|
@ -132,7 +132,7 @@ export async function searchController(req: Request, res: Response) {
|
||||
}
|
||||
const crawlerOptions = req.body.crawlerOptions ?? {};
|
||||
const pageOptions = req.body.pageOptions ?? {
|
||||
includeHtml: false,
|
||||
includeHtml: true,
|
||||
onlyMainContent: true,
|
||||
fetchPageContent: true,
|
||||
removeTags: [],
|
||||
|
@ -4,7 +4,7 @@ export const defaultTimeout = 45000; // 45 seconds
|
||||
|
||||
export const defaultPageOptions = {
|
||||
onlyMainContent: false,
|
||||
includeHtml: false,
|
||||
includeHtml: true,
|
||||
waitFor: 0,
|
||||
screenshot: false,
|
||||
fullPageScreenshot: false,
|
||||
@ -17,7 +17,7 @@ export const defaultCrawlerOptions = {
|
||||
|
||||
export const defaultCrawlPageOptions = {
|
||||
onlyMainContent: false,
|
||||
includeHtml: false,
|
||||
includeHtml: true,
|
||||
removeTags: [],
|
||||
parsePDF: true
|
||||
}
|
||||
|
@ -574,7 +574,7 @@ export class WebScraperDataProvider {
|
||||
options.crawlerOptions?.generateImgAltText ?? false;
|
||||
this.pageOptions = options.pageOptions ?? {
|
||||
onlyMainContent: false,
|
||||
includeHtml: false,
|
||||
includeHtml: true,
|
||||
replaceAllPathsWithAbsolutePaths: false,
|
||||
parsePDF: true,
|
||||
removeTags: [],
|
||||
|
@ -125,7 +125,7 @@ export async function scrapSingleUrl(
|
||||
pageOptions: PageOptions = {
|
||||
includeMarkdown: true,
|
||||
onlyMainContent: true,
|
||||
includeHtml: false,
|
||||
includeHtml: true,
|
||||
includeRawHtml: false,
|
||||
waitFor: 0,
|
||||
screenshot: false,
|
||||
|
Loading…
x
Reference in New Issue
Block a user