mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-06 01:56:09 +08:00
fix(scrapeURL): includeTags/excludeTags
This commit is contained in:
parent
8d467c8ca7
commit
552d55c8fc
@ -56,7 +56,7 @@ export const removeUnwantedElements = (
|
|||||||
) => {
|
) => {
|
||||||
const soup = load(html);
|
const soup = load(html);
|
||||||
|
|
||||||
if (scrapeOptions.includeTags && scrapeOptions.includeTags.length > 0) {
|
if (scrapeOptions.includeTags && scrapeOptions.includeTags.filter(x => x.trim().length !== 0).length > 0) {
|
||||||
// Create a new root element to hold the tags to keep
|
// Create a new root element to hold the tags to keep
|
||||||
const newRoot = load("<div></div>")("div");
|
const newRoot = load("<div></div>")("div");
|
||||||
scrapeOptions.includeTags.forEach((tag) => {
|
scrapeOptions.includeTags.forEach((tag) => {
|
||||||
@ -69,7 +69,7 @@ export const removeUnwantedElements = (
|
|||||||
|
|
||||||
soup("script, style, noscript, meta, head").remove();
|
soup("script, style, noscript, meta, head").remove();
|
||||||
|
|
||||||
if (scrapeOptions.excludeTags && scrapeOptions.excludeTags.length > 0) {
|
if (scrapeOptions.excludeTags && scrapeOptions.excludeTags.filter(x => x.trim().length !== 0).length > 0) {
|
||||||
scrapeOptions.excludeTags.forEach((tag) => {
|
scrapeOptions.excludeTags.forEach((tag) => {
|
||||||
let elementsToRemove: Cheerio<AnyNode>;
|
let elementsToRemove: Cheerio<AnyNode>;
|
||||||
if (tag.startsWith("*") && tag.endsWith("*")) {
|
if (tag.startsWith("*") && tag.endsWith("*")) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user