fix: give expireAt for image cache

This commit is contained in:
yanlong.wang 2024-04-16 15:20:46 +08:00
parent 4f284f51b6
commit 8a2b095bd7
No known key found for this signature in database
GPG Key ID: C0A623C0BADF9F37
2 changed files with 22 additions and 25 deletions

View File

@ -53,8 +53,6 @@ export class CrawlerHost extends RPCHost {
turnDownPlugins = [require('turndown-plugin-gfm').gfm]; turnDownPlugins = [require('turndown-plugin-gfm').gfm];
imageShortUrlPrefix?: string;
constructor( constructor(
protected globalLogger: Logger, protected globalLogger: Logger,
protected puppeteerControl: PuppeteerControl, protected puppeteerControl: PuppeteerControl,
@ -78,13 +76,13 @@ export class CrawlerHost extends RPCHost {
let contentText = ''; let contentText = '';
if (toBeTurnedToMd) { if (toBeTurnedToMd) {
const urlToAltMap: { [k: string]: { shortDigest: string, alt?: string; }; } = {}; const urlToAltMap: { [k: string]: string | undefined; } = {};
const tasks = (snapshot.imgs || []).map(async (x) => { const tasks = (snapshot.imgs || []).map(async (x) => {
const r = await this.altTextService.getAltTextAndShortDigest(x).catch((err)=> { const r = await this.altTextService.getAltText(x).catch((err: any) => {
this.logger.warn(`Failed to get alt text for ${x.src}`, { err: marshalErrorLike(err) }); this.logger.warn(`Failed to get alt text for ${x.src}`, { err: marshalErrorLike(err) });
return undefined; return undefined;
}); });
if (r) { if (r && x.src) {
urlToAltMap[x.src.trim()] = r; urlToAltMap[x.src.trim()] = r;
} }
}); });
@ -103,7 +101,7 @@ export class CrawlerHost extends RPCHost {
const mapped = urlToAltMap[src]; const mapped = urlToAltMap[src];
imgIdx++; imgIdx++;
if (mapped) { if (mapped) {
return `![Image ${imgIdx}: ${mapped.alt || alt}](${this.imageShortUrlPrefix ? `${this.imageShortUrlPrefix}/${mapped.shortDigest}` : src})`; return `![Image ${imgIdx}: ${mapped || alt}](${src})`;
} }
return `![Image ${imgIdx}: ${alt}](${src})`; return `![Image ${imgIdx}: ${alt}](${src})`;
} }
@ -115,7 +113,7 @@ export class CrawlerHost extends RPCHost {
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) { if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) {
contentText = turnDownService.turndown(snapshot.html); contentText = turnDownService.turndown(snapshot.html);
} }
if (!contentText || (contentText.startsWith('<') && contentText.endsWith('>'))) { if (!contentText || (contentText.startsWith('<') || contentText.endsWith('>'))) {
contentText = snapshot.text; contentText = snapshot.text;
} }

View File

@ -44,32 +44,33 @@ export class AltTextService extends AsyncService {
} }
} }
async getAltTextAndShortDigest(imgBrief: ImgBrief) { async getAltText(imgBrief: ImgBrief) {
if (!imgBrief.src) { if (!imgBrief.src) {
return undefined; return undefined;
} }
if (imgBrief.alt) {
return imgBrief.alt;
}
const digest = md5Hasher.hash(imgBrief.src); const digest = md5Hasher.hash(imgBrief.src);
const shortDigest = Buffer.from(digest, 'hex').toString('base64url'); const shortDigest = Buffer.from(digest, 'hex').toString('base64url');
const existing = await ImgAlt.fromFirestore(shortDigest); const existing = await ImgAlt.fromFirestore(shortDigest);
if (existing?.generatedAlt) { if (existing) {
return { return existing.generatedAlt || existing.originalAlt || '';
shortDigest,
alt: existing.generatedAlt,
};
} }
let generatedCaption; let generatedCaption = '';
if (!imgBrief.alt) { try {
try { generatedCaption = await this.caption(imgBrief.src);
generatedCaption = await this.caption(imgBrief.src); } catch (err) {
} catch (err) { this.logger.warn(`Unable to generate alt text for ${imgBrief.src}`, { err });
this.logger.warn(`Unable to generate alt text for ${imgBrief.src}`, { err });
}
} }
// Don't try again until the next day
const expireMixin = generatedCaption ? {} : { expireAt: new Date(Date.now() + 1000 * 3600 * 24) };
await ImgAlt.COLLECTION.doc(shortDigest).set( await ImgAlt.COLLECTION.doc(shortDigest).set(
{ {
_id: shortDigest, _id: shortDigest,
@ -79,13 +80,11 @@ export class AltTextService extends AsyncService {
urlDigest: digest, urlDigest: digest,
originalAlt: imgBrief.alt || '', originalAlt: imgBrief.alt || '',
generatedAlt: generatedCaption || '', generatedAlt: generatedCaption || '',
createdAt: new Date() createdAt: new Date(),
...expireMixin
}, { merge: true } }, { merge: true }
); );
return { return generatedCaption;
shortDigest,
alt: generatedCaption,
};
} }
} }