mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 16:19:00 +08:00
added fullpagescreenshot capabilities, wip on fire-engine side
This commit is contained in:
parent
789c6cf5d7
commit
3edc3a3d15
@ -84,6 +84,11 @@
|
||||
"description": "Include a screenshot of the top of the page that you are scraping.",
|
||||
"default": false
|
||||
},
|
||||
"fullPageScreenshot": {
|
||||
"type": "boolean",
|
||||
"description": "Include a full page screenshot of the page that you are scraping.",
|
||||
"default": false
|
||||
},
|
||||
"waitFor": {
|
||||
"type": "integer",
|
||||
"description": "Wait x amount of milliseconds for the page to load to fetch content",
|
||||
@ -317,6 +322,11 @@
|
||||
"description": "Include a screenshot of the top of the page that you are scraping.",
|
||||
"default": false
|
||||
},
|
||||
"fullPageScreenshot": {
|
||||
"type": "boolean",
|
||||
"description": "Include a full page screenshot of the page that you are scraping.",
|
||||
"default": false
|
||||
},
|
||||
"waitFor": {
|
||||
"type": "integer",
|
||||
"description": "Wait x amount of milliseconds for the page to load to fetch content",
|
||||
|
@ -7,6 +7,7 @@ export const defaultPageOptions = {
|
||||
includeHtml: false,
|
||||
waitFor: 0,
|
||||
screenshot: false,
|
||||
fullPageScreenshot: false,
|
||||
parsePDF: true
|
||||
};
|
||||
|
||||
|
@ -18,6 +18,7 @@ export type PageOptions = {
|
||||
fetchPageContent?: boolean;
|
||||
waitFor?: number;
|
||||
screenshot?: boolean;
|
||||
fullPageScreenshot?: boolean;
|
||||
headers?: Record<string, string>;
|
||||
replaceAllPathsWithAbsolutePaths?: boolean;
|
||||
parsePDF?: boolean;
|
||||
|
@ -11,6 +11,7 @@ import { Logger } from "../../../lib/logger";
|
||||
* @param url The URL to scrape
|
||||
* @param waitFor The time to wait for the page to load
|
||||
* @param screenshot Whether to take a screenshot
|
||||
* @param fullPageScreenshot Whether to take a full page screenshot
|
||||
* @param pageOptions The options for the page
|
||||
* @param headers The headers to send with the request
|
||||
* @param options The options for the request
|
||||
@ -20,6 +21,7 @@ export async function scrapWithFireEngine({
|
||||
url,
|
||||
waitFor = 0,
|
||||
screenshot = false,
|
||||
fullPageScreenshot = false,
|
||||
pageOptions = { parsePDF: true },
|
||||
fireEngineOptions = {},
|
||||
headers,
|
||||
@ -28,6 +30,7 @@ export async function scrapWithFireEngine({
|
||||
url: string;
|
||||
waitFor?: number;
|
||||
screenshot?: boolean;
|
||||
fullPageScreenshot?: boolean;
|
||||
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean };
|
||||
fireEngineOptions?: FireEngineOptions;
|
||||
headers?: Record<string, string>;
|
||||
@ -71,6 +74,7 @@ export async function scrapWithFireEngine({
|
||||
url: url,
|
||||
wait: waitParam,
|
||||
screenshot: screenshotParam,
|
||||
fullPageScreenshot: fullPageScreenshot,
|
||||
headers: headers,
|
||||
pageOptions: pageOptions,
|
||||
...fireEngineOptionsParam,
|
||||
|
@ -128,6 +128,7 @@ export async function scrapSingleUrl(
|
||||
includeRawHtml: false,
|
||||
waitFor: 0,
|
||||
screenshot: false,
|
||||
fullPageScreenshot: false,
|
||||
headers: undefined,
|
||||
},
|
||||
extractorOptions: ExtractorOptions = {
|
||||
@ -171,6 +172,7 @@ export async function scrapSingleUrl(
|
||||
url,
|
||||
waitFor: pageOptions.waitFor,
|
||||
screenshot: pageOptions.screenshot,
|
||||
fullPageScreenshot: pageOptions.fullPageScreenshot,
|
||||
pageOptions: pageOptions,
|
||||
headers: pageOptions.headers,
|
||||
fireEngineOptions: {
|
||||
|
Loading…
x
Reference in New Issue
Block a user