mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader.git
synced 2025-08-20 16:39:10 +08:00
fix: pdf mode and google web cache
This commit is contained in:
parent
f6bbddcb48
commit
e9258af742
@ -136,7 +136,8 @@ export class SnapshotFormatter extends AsyncService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let pdfMode = false;
|
let pdfMode = false;
|
||||||
if (snapshot.pdfs?.length && !snapshot.title) {
|
// in case of Google Web Cache content
|
||||||
|
if (snapshot.pdfs?.length && (!snapshot.title || snapshot.title.startsWith('cache:'))) {
|
||||||
const pdf = await this.pdfExtractor.cachedExtract(snapshot.pdfs[0],
|
const pdf = await this.pdfExtractor.cachedExtract(snapshot.pdfs[0],
|
||||||
this.threadLocal.get('cacheTolerance')
|
this.threadLocal.get('cacheTolerance')
|
||||||
);
|
);
|
||||||
@ -330,7 +331,7 @@ export class SnapshotFormatter extends AsyncService {
|
|||||||
const n = code - 200;
|
const n = code - 200;
|
||||||
if (n < 0 || n >= 200) {
|
if (n < 0 || n >= 200) {
|
||||||
const text = snapshot.statusText || STATUS_CODES[code];
|
const text = snapshot.statusText || STATUS_CODES[code];
|
||||||
formatted.warning = `Target URL returned error ${code}${text? `: ${text}` : ''}`;
|
formatted.warning = `Target URL returned error ${code}${text ? `: ${text}` : ''}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user