Nick: comments

2025-07-31 15:42:00 +08:00 · 2024-07-03 18:01:54 -03:00 · 2024-07-03 18:01:54 -03:00 · 2d30cc6117
commit 2d30cc6117
parent 90c54c32fd
4 changed files with 33 additions and 1 deletions
--- a/apps/api/src/scraper/WebScraper/scrapers/fetch.ts
+++ b/apps/api/src/scraper/WebScraper/scrapers/fetch.ts
@ -3,6 +3,13 @@ import { logScrape } from "../../../services/logging/scrape_log";
 import { fetchAndProcessPdf } from "../utils/pdfProcessor";
 import { universalTimeout } from "../global";

+
+/**
+ * Scrapes a URL with Axios
+ * @param url The URL to scrape
+ * @param pageOptions The options for the page
+ * @returns The scraped content
+ */
 export async function scrapWithFetch(
  url: string,
  pageOptions: { parsePDF?: boolean } = { parsePDF: true }
--- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
+++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts
@ -5,6 +5,16 @@ import { generateRequestParams } from "../single_url";
 import { fetchAndProcessPdf } from "../utils/pdfProcessor";
 import { universalTimeout } from "../global";

+/**
+ * Scrapes a URL with Fire-Engine
+ * @param url The URL to scrape
+ * @param waitFor The time to wait for the page to load
+ * @param screenshot Whether to take a screenshot
+ * @param pageOptions The options for the page
+ * @param headers The headers to send with the request
+ * @param options The options for the request
+ * @returns The scraped content
+ */
 export async function scrapWithFireEngine({
  url,
  waitFor = 0,
--- a/apps/api/src/scraper/WebScraper/scrapers/playwright.ts
+++ b/apps/api/src/scraper/WebScraper/scrapers/playwright.ts
@ -4,6 +4,14 @@ import { generateRequestParams } from "../single_url";
 import { fetchAndProcessPdf } from "../utils/pdfProcessor";
 import { universalTimeout } from "../global";

+/**
+ * Scrapes a URL with Playwright
+ * @param url The URL to scrape
+ * @param waitFor The time to wait for the page to load
+ * @param headers The headers to send with the request
+ * @param pageOptions The options for the page
+ * @returns The scraped content
+ */
 export async function scrapWithPlaywright(
  url: string,
  waitFor: number = 0,
--- a/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts
+++ b/apps/api/src/scraper/WebScraper/scrapers/scrapingBee.ts
@ -4,7 +4,14 @@ import { fetchAndProcessPdf } from "../utils/pdfProcessor";
 import { universalTimeout } from "../global";
 import { ScrapingBeeClient } from "scrapingbee";

-
+/**
+ * Scrapes a URL with ScrapingBee
+ * @param url The URL to scrape
+ * @param wait_browser The browser event to wait for
+ * @param timeout The timeout for the scrape
+ * @param pageOptions The options for the page
+ * @returns The scraped content
+ */
 export async function scrapWithScrapingBee(
    url: string,
    wait_browser: string = "domcontentloaded",