mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-07-31 15:42:00 +08:00
Nick: comments
This commit is contained in:
parent
90c54c32fd
commit
2d30cc6117
@ -3,6 +3,13 @@ import { logScrape } from "../../../services/logging/scrape_log";
|
||||
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
|
||||
import { universalTimeout } from "../global";
|
||||
|
||||
|
||||
/**
|
||||
* Scrapes a URL with Axios
|
||||
* @param url The URL to scrape
|
||||
* @param pageOptions The options for the page
|
||||
* @returns The scraped content
|
||||
*/
|
||||
export async function scrapWithFetch(
|
||||
url: string,
|
||||
pageOptions: { parsePDF?: boolean } = { parsePDF: true }
|
||||
|
@ -5,6 +5,16 @@ import { generateRequestParams } from "../single_url";
|
||||
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
|
||||
import { universalTimeout } from "../global";
|
||||
|
||||
/**
|
||||
* Scrapes a URL with Fire-Engine
|
||||
* @param url The URL to scrape
|
||||
* @param waitFor The time to wait for the page to load
|
||||
* @param screenshot Whether to take a screenshot
|
||||
* @param pageOptions The options for the page
|
||||
* @param headers The headers to send with the request
|
||||
* @param options The options for the request
|
||||
* @returns The scraped content
|
||||
*/
|
||||
export async function scrapWithFireEngine({
|
||||
url,
|
||||
waitFor = 0,
|
||||
|
@ -4,6 +4,14 @@ import { generateRequestParams } from "../single_url";
|
||||
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
|
||||
import { universalTimeout } from "../global";
|
||||
|
||||
/**
|
||||
* Scrapes a URL with Playwright
|
||||
* @param url The URL to scrape
|
||||
* @param waitFor The time to wait for the page to load
|
||||
* @param headers The headers to send with the request
|
||||
* @param pageOptions The options for the page
|
||||
* @returns The scraped content
|
||||
*/
|
||||
export async function scrapWithPlaywright(
|
||||
url: string,
|
||||
waitFor: number = 0,
|
||||
|
@ -4,7 +4,14 @@ import { fetchAndProcessPdf } from "../utils/pdfProcessor";
|
||||
import { universalTimeout } from "../global";
|
||||
import { ScrapingBeeClient } from "scrapingbee";
|
||||
|
||||
|
||||
/**
|
||||
* Scrapes a URL with ScrapingBee
|
||||
* @param url The URL to scrape
|
||||
* @param wait_browser The browser event to wait for
|
||||
* @param timeout The timeout for the scrape
|
||||
* @param pageOptions The options for the page
|
||||
* @returns The scraped content
|
||||
*/
|
||||
export async function scrapWithScrapingBee(
|
||||
url: string,
|
||||
wait_browser: string = "domcontentloaded",
|
||||
|
Loading…
x
Reference in New Issue
Block a user