added fullpagescreenshot capabilities, wip on fire-engine side

This commit is contained in:
rafaelsideguide 2024-08-05 18:17:37 -03:00
parent 789c6cf5d7
commit 3edc3a3d15
5 changed files with 18 additions and 0 deletions

View File

@ -84,6 +84,11 @@
"description": "Include a screenshot of the top of the page that you are scraping.", "description": "Include a screenshot of the top of the page that you are scraping.",
"default": false "default": false
}, },
"fullPageScreenshot": {
"type": "boolean",
"description": "Include a full page screenshot of the page that you are scraping.",
"default": false
},
"waitFor": { "waitFor": {
"type": "integer", "type": "integer",
"description": "Wait x amount of milliseconds for the page to load to fetch content", "description": "Wait x amount of milliseconds for the page to load to fetch content",
@ -317,6 +322,11 @@
"description": "Include a screenshot of the top of the page that you are scraping.", "description": "Include a screenshot of the top of the page that you are scraping.",
"default": false "default": false
}, },
"fullPageScreenshot": {
"type": "boolean",
"description": "Include a full page screenshot of the page that you are scraping.",
"default": false
},
"waitFor": { "waitFor": {
"type": "integer", "type": "integer",
"description": "Wait x amount of milliseconds for the page to load to fetch content", "description": "Wait x amount of milliseconds for the page to load to fetch content",

View File

@ -7,6 +7,7 @@ export const defaultPageOptions = {
includeHtml: false, includeHtml: false,
waitFor: 0, waitFor: 0,
screenshot: false, screenshot: false,
fullPageScreenshot: false,
parsePDF: true parsePDF: true
}; };

View File

@ -18,6 +18,7 @@ export type PageOptions = {
fetchPageContent?: boolean; fetchPageContent?: boolean;
waitFor?: number; waitFor?: number;
screenshot?: boolean; screenshot?: boolean;
fullPageScreenshot?: boolean;
headers?: Record<string, string>; headers?: Record<string, string>;
replaceAllPathsWithAbsolutePaths?: boolean; replaceAllPathsWithAbsolutePaths?: boolean;
parsePDF?: boolean; parsePDF?: boolean;

View File

@ -11,6 +11,7 @@ import { Logger } from "../../../lib/logger";
* @param url The URL to scrape * @param url The URL to scrape
* @param waitFor The time to wait for the page to load * @param waitFor The time to wait for the page to load
* @param screenshot Whether to take a screenshot * @param screenshot Whether to take a screenshot
* @param fullPageScreenshot Whether to take a full page screenshot
* @param pageOptions The options for the page * @param pageOptions The options for the page
* @param headers The headers to send with the request * @param headers The headers to send with the request
* @param options The options for the request * @param options The options for the request
@ -20,6 +21,7 @@ export async function scrapWithFireEngine({
url, url,
waitFor = 0, waitFor = 0,
screenshot = false, screenshot = false,
fullPageScreenshot = false,
pageOptions = { parsePDF: true }, pageOptions = { parsePDF: true },
fireEngineOptions = {}, fireEngineOptions = {},
headers, headers,
@ -28,6 +30,7 @@ export async function scrapWithFireEngine({
url: string; url: string;
waitFor?: number; waitFor?: number;
screenshot?: boolean; screenshot?: boolean;
fullPageScreenshot?: boolean;
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean }; pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean };
fireEngineOptions?: FireEngineOptions; fireEngineOptions?: FireEngineOptions;
headers?: Record<string, string>; headers?: Record<string, string>;
@ -71,6 +74,7 @@ export async function scrapWithFireEngine({
url: url, url: url,
wait: waitParam, wait: waitParam,
screenshot: screenshotParam, screenshot: screenshotParam,
fullPageScreenshot: fullPageScreenshot,
headers: headers, headers: headers,
pageOptions: pageOptions, pageOptions: pageOptions,
...fireEngineOptionsParam, ...fireEngineOptionsParam,

View File

@ -128,6 +128,7 @@ export async function scrapSingleUrl(
includeRawHtml: false, includeRawHtml: false,
waitFor: 0, waitFor: 0,
screenshot: false, screenshot: false,
fullPageScreenshot: false,
headers: undefined, headers: undefined,
}, },
extractorOptions: ExtractorOptions = { extractorOptions: ExtractorOptions = {
@ -171,6 +172,7 @@ export async function scrapSingleUrl(
url, url,
waitFor: pageOptions.waitFor, waitFor: pageOptions.waitFor,
screenshot: pageOptions.screenshot, screenshot: pageOptions.screenshot,
fullPageScreenshot: pageOptions.fullPageScreenshot,
pageOptions: pageOptions, pageOptions: pageOptions,
headers: pageOptions.headers, headers: pageOptions.headers,
fireEngineOptions: { fireEngineOptions: {