mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-15 02:15:56 +08:00
fix: abuse of flooding elements
This commit is contained in:
parent
8b7af6d076
commit
6fb5df97cc
@ -50,6 +50,7 @@ export interface PageSnapshot {
|
|||||||
imgs?: ImgBrief[];
|
imgs?: ImgBrief[];
|
||||||
pdfs?: string[];
|
pdfs?: string[];
|
||||||
maxElemDepth?: number;
|
maxElemDepth?: number;
|
||||||
|
elemCount?: number;
|
||||||
childFrames?: PageSnapshot[];
|
childFrames?: PageSnapshot[];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,14 +118,21 @@ function briefPDFs() {
|
|||||||
return x.src === 'about:blank' ? document.location.href : x.src;
|
return x.src === 'about:blank' ? document.location.href : x.src;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
function getMaxDepthUsingTreeWalker(root) {
|
function getMaxDepthAndCountUsingTreeWalker(root) {
|
||||||
let maxDepth = 0;
|
let maxDepth = 0;
|
||||||
let currentDepth = 0;
|
let currentDepth = 0;
|
||||||
|
let elementCount = 0;
|
||||||
|
|
||||||
const treeWalker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT, null, false);
|
const treeWalker = document.createTreeWalker(
|
||||||
|
root,
|
||||||
|
NodeFilter.SHOW_ELEMENT,
|
||||||
|
(node) => (node.nodeName.toLowerCase() === 'svg') ? NodeFilter.FILTER_REJECT : NodeFilter.FILTER_ACCEPT,
|
||||||
|
false
|
||||||
|
);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
maxDepth = Math.max(maxDepth, currentDepth);
|
maxDepth = Math.max(maxDepth, currentDepth);
|
||||||
|
elementCount++; // Increment the count for the current node
|
||||||
|
|
||||||
if (treeWalker.firstChild()) {
|
if (treeWalker.firstChild()) {
|
||||||
currentDepth++;
|
currentDepth++;
|
||||||
@ -140,7 +148,10 @@ function getMaxDepthUsingTreeWalker(root) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return maxDepth + 1;
|
return {
|
||||||
|
maxDepth: maxDepth + 1,
|
||||||
|
elementCount: elementCount
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function giveSnapshot(stopActiveSnapshot) {
|
function giveSnapshot(stopActiveSnapshot) {
|
||||||
@ -153,7 +164,7 @@ function giveSnapshot(stopActiveSnapshot) {
|
|||||||
} catch (err) {
|
} catch (err) {
|
||||||
void 0;
|
void 0;
|
||||||
}
|
}
|
||||||
|
const domAnalysis = getMaxDepthAndCountUsingTreeWalker(document.documentElement);
|
||||||
const r = {
|
const r = {
|
||||||
title: document.title,
|
title: document.title,
|
||||||
href: document.location.href,
|
href: document.location.href,
|
||||||
@ -162,7 +173,8 @@ function giveSnapshot(stopActiveSnapshot) {
|
|||||||
parsed: parsed,
|
parsed: parsed,
|
||||||
imgs: [],
|
imgs: [],
|
||||||
pdfs: briefPDFs(),
|
pdfs: briefPDFs(),
|
||||||
maxElemDepth: getMaxDepthUsingTreeWalker(document.documentElement)
|
maxElemDepth: domAnalysis.maxDepth,
|
||||||
|
elemCount: domAnalysis.elementCount,
|
||||||
};
|
};
|
||||||
if (parsed && parsed.content) {
|
if (parsed && parsed.content) {
|
||||||
const elem = document.createElement('div');
|
const elem = document.createElement('div');
|
||||||
@ -478,6 +490,10 @@ document.addEventListener('load', handlePageLoad);
|
|||||||
page.emit('abuse', { url, page, sn, reason: `DoS attack suspected: DOM tree too deep` });
|
page.emit('abuse', { url, page, sn, reason: `DoS attack suspected: DOM tree too deep` });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (s?.elemCount && s.elemCount > 15_000) {
|
||||||
|
page.emit('abuse', { url, page, sn, reason: `DoS attack suspected: too many DOM elements` });
|
||||||
|
return;
|
||||||
|
}
|
||||||
snapshot = s;
|
snapshot = s;
|
||||||
nextSnapshotDeferred.resolve(s);
|
nextSnapshotDeferred.resolve(s);
|
||||||
nextSnapshotDeferred = Defer();
|
nextSnapshotDeferred = Defer();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user