mirror of
https://git.mirrors.martin98.com/https://github.com/jina-ai/reader
synced 2025-08-18 05:45:54 +08:00
fix: pdf cache
This commit is contained in:
parent
fd0b77285f
commit
1ba21da0c5
@ -281,18 +281,21 @@ export class PDFExtractor extends AsyncService {
|
|||||||
// Don't try again until the next day
|
// Don't try again until the next day
|
||||||
const expireMixin = extracted ? {} : { expireAt: new Date(Date.now() + 1000 * 3600 * 24) };
|
const expireMixin = extracted ? {} : { expireAt: new Date(Date.now() + 1000 * 3600 * 24) };
|
||||||
const theID = randomUUID();
|
const theID = randomUUID();
|
||||||
|
|
||||||
await this.firebaseObjectStorage.saveFile(`pdfs/${theID}`,
|
await this.firebaseObjectStorage.saveFile(`pdfs/${theID}`,
|
||||||
Buffer.from(JSON.stringify(extracted), 'utf-8'), { contentType: 'application/json' });
|
Buffer.from(JSON.stringify(extracted), 'utf-8'), { contentType: 'application/json' });
|
||||||
|
PDFContent.save(
|
||||||
await PDFContent.COLLECTION.doc(theID).set(
|
PDFContent.from({
|
||||||
{
|
_id: theID,
|
||||||
src: url.toString(),
|
src: url.toString(),
|
||||||
meta: extracted?.meta || {},
|
meta: extracted?.meta || {},
|
||||||
urlDigest: digest,
|
urlDigest: digest,
|
||||||
createdAt: new Date(),
|
createdAt: new Date(),
|
||||||
...expireMixin
|
...expireMixin
|
||||||
}, { merge: true }
|
}).degradeForFireStore()
|
||||||
);
|
).catch((r) => {
|
||||||
|
this.logger.warn(`Unable to cache PDF content for ${url}`, { err: r });
|
||||||
|
});
|
||||||
|
|
||||||
return extracted;
|
return extracted;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user