mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-16 06:45:59 +08:00
fix: image url
This commit is contained in:
parent
6f65083f8d
commit
4556954d17
@ -190,7 +190,12 @@ export class CrawlerHost extends RPCHost {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const src = linkPreferredSrc;
|
let src;
|
||||||
|
try {
|
||||||
|
src = new URL(linkPreferredSrc, nominalUrl).toString();
|
||||||
|
} catch (_err) {
|
||||||
|
void 0;
|
||||||
|
}
|
||||||
const alt = cleanAttribute(node.getAttribute('alt'));
|
const alt = cleanAttribute(node.getAttribute('alt'));
|
||||||
if (!src) {
|
if (!src) {
|
||||||
return '';
|
return '';
|
||||||
|
@ -206,7 +206,7 @@ function briefImgs(elem) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
src: linkPreferredSrc,
|
src: new URL(linkPreferredSrc, document.location.href).toString(),
|
||||||
loaded: x.complete,
|
loaded: x.complete,
|
||||||
width: x.width,
|
width: x.width,
|
||||||
height: x.height,
|
height: x.height,
|
||||||
@ -437,7 +437,17 @@ document.addEventListener('load', handlePageLoad);
|
|||||||
const textContent = elem.textContent;
|
const textContent = elem.textContent;
|
||||||
const cleanedText = textContent?.split('\n').map((x: any) => x.trimEnd()).join('\n').replace(/\n{3,}/g, '\n\n');
|
const cleanedText = textContent?.split('\n').map((x: any) => x.trimEnd()).join('\n').replace(/\n{3,}/g, '\n\n');
|
||||||
|
|
||||||
const imageTags = Array.from(elem.querySelectorAll('img[src],img[data-src]')).map((x: any) => [x.getAttribute('src'), x.getAttribute('data-src')]).flat().filter(Boolean);
|
const imageTags = Array.from(elem.querySelectorAll('img[src],img[data-src]'))
|
||||||
|
.map((x: any) => [x.getAttribute('src'), x.getAttribute('data-src')])
|
||||||
|
.flat()
|
||||||
|
.map((x) => {
|
||||||
|
try {
|
||||||
|
return new URL(x, snapshot.href).toString();
|
||||||
|
} catch (err) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.filter(Boolean);
|
||||||
|
|
||||||
const imageSet = new Set(imageTags);
|
const imageSet = new Set(imageTags);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user