mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-14 12:35:54 +08:00
Nick:
This commit is contained in:
parent
01fab6e036
commit
f82ca3be17
@ -19,6 +19,7 @@ import { scrapWithScrapingBee } from "./scrapers/scrapingBee";
|
|||||||
import { extractLinks } from "./utils/utils";
|
import { extractLinks } from "./utils/utils";
|
||||||
import { Logger } from "../../lib/logger";
|
import { Logger } from "../../lib/logger";
|
||||||
import { ScrapeEvents } from "../../lib/scrape-events";
|
import { ScrapeEvents } from "../../lib/scrape-events";
|
||||||
|
import { clientSideError } from "../../strings";
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
@ -311,7 +312,7 @@ export async function scrapSingleUrl(
|
|||||||
|
|
||||||
for (const scraper of scrapersInOrder) {
|
for (const scraper of scrapersInOrder) {
|
||||||
// If exists text coming from crawler, use it
|
// If exists text coming from crawler, use it
|
||||||
if (existingHtml && existingHtml.trim().length >= 100) {
|
if (existingHtml && existingHtml.trim().length >= 100 && !existingHtml.includes(clientSideError)) {
|
||||||
let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions);
|
let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions);
|
||||||
text = await parseMarkdown(cleanedHtml);
|
text = await parseMarkdown(cleanedHtml);
|
||||||
html = cleanedHtml;
|
html = cleanedHtml;
|
||||||
|
@ -1,2 +1,4 @@
|
|||||||
export const errorNoResults =
|
export const errorNoResults =
|
||||||
"No results found, please check the URL or contact us at help@mendable.ai to file a ticket.";
|
"No results found, please check the URL or contact us at help@mendable.ai to file a ticket.";
|
||||||
|
|
||||||
|
export const clientSideError = "client-side exception has occurred"
|
Loading…
x
Reference in New Issue
Block a user