diff --git a/package-lock.json b/package-lock.json index af61e06..08d38f7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,7 +17,7 @@ "axios": "^1.3.3", "bcrypt": "^5.1.0", "busboy": "^1.6.0", - "civkit": "^0.9.0-848ef4e", + "civkit": "^0.9.0-2570394", "core-js": "^3.37.1", "cors": "^2.8.5", "dayjs": "^1.11.9", @@ -4003,9 +4003,9 @@ } }, "node_modules/civkit": { - "version": "0.9.0-848ef4e", - "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.9.0-848ef4e.tgz", - "integrity": "sha512-yxk5AKaiZSN4ntlwybVHYgUer402CSw06KzN7wvfaYra9evZkZ7MiFHGULqMnY7657k3CH0WV4n6jGfRj1Vpvw==", + "version": "0.9.0-2570394", + "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.9.0-2570394.tgz", + "integrity": "sha512-w77agnElTEP6g+l66KhX1Ib9z7JXbR3FaR5/2yTUPIPjm32qsWkmKRvv0mZ83IcMSSmTjF9LxboYAliyTx7cIA==", "license": "AGPL", "dependencies": { "lodash": "^4.17.21", diff --git a/package.json b/package.json index a43500d..eee7f03 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,7 @@ "axios": "^1.3.3", "bcrypt": "^5.1.0", "busboy": "^1.6.0", - "civkit": "^0.9.0-848ef4e", + "civkit": "^0.9.0-2570394", "core-js": "^3.37.1", "cors": "^2.8.5", "dayjs": "^1.11.9", diff --git a/src/api/crawler.ts b/src/api/crawler.ts index 982a4b0..ce1cb8f 100644 --- a/src/api/crawler.ts +++ b/src/api/crawler.ts @@ -1069,7 +1069,6 @@ export class CrawlerHost extends RPCHost { title: snapshot.title, content: snapshot.parsed?.textContent, url: presumedURL?.href || snapshot.href, - [Symbol.dispose]: () => undefined, }; Object.defineProperty(output, 'textRepresentation', { diff --git a/src/services/alt-text.ts b/src/services/alt-text.ts index 143cea9..55a5ba1 100644 --- a/src/services/alt-text.ts +++ b/src/services/alt-text.ts @@ -33,8 +33,11 @@ export class AltTextService extends AsyncService { try { const img = await this.canvasService.loadImage(url); const contentTypeHint = Reflect.get(img, 'contentType'); + if (Math.min(img.naturalHeight, img.naturalWidth) <= 1) { + return `A ${img.naturalWidth}x${img.naturalHeight} image, likely be a tacker probe`; + } if (Math.min(img.naturalHeight, img.naturalWidth) < 64) { - throw new AssertionFailureError({ message: `Image is too small to generate alt text for url ${url}` }); + return `A ${img.naturalWidth}x${img.naturalHeight} small image, likely a logo, icon or avatar`; } const resized = this.canvasService.fitImageToSquareBox(img, 1024); const exported = await this.canvasService.canvasToBuffer(resized, 'image/png'); @@ -63,6 +66,32 @@ export class AltTextService extends AsyncService { } const digest = md5Hasher.hash(imgBrief.src); const shortDigest = Buffer.from(digest, 'hex').toString('base64url'); + let dims: number[] = []; + do { + if (imgBrief.loaded) { + if (imgBrief.naturalWidth && imgBrief.naturalHeight) { + if (Math.min(imgBrief.naturalWidth, imgBrief.naturalHeight) < 64) { + dims = [imgBrief.naturalWidth, imgBrief.naturalHeight]; + break; + } + } + } + + if (imgBrief.width && imgBrief.height) { + if (Math.min(imgBrief.width, imgBrief.height) < 64) { + dims = [imgBrief.width, imgBrief.height]; + break; + } + } + + } while (false); + + if (Math.min(...dims) <= 1) { + return `A ${dims[0]}x${dims[1]} image, likely be a tacker probe`; + } + if (Math.min(...dims) < 64) { + return `A ${dims[0]}x${dims[1]} small image, likely a logo, icon or avatar`; + } const existing = await ImgAlt.fromFirestore(shortDigest); @@ -102,4 +131,4 @@ export class AltTextService extends AsyncService { return generatedCaption; } -} +}; diff --git a/src/services/puppeteer.ts b/src/services/puppeteer.ts index 3b13f81..8bab0a7 100644 --- a/src/services/puppeteer.ts +++ b/src/services/puppeteer.ts @@ -407,18 +407,7 @@ function giveSnapshot(stopActiveSnapshot, overrideDomAnalysis) { if (document.baseURI !== r.href) { r.rebase = document.baseURI; } - r.imgs = briefImgs().filter((x)=> { - if (x.complete) { - if (Math.min(x.width, x.height, x.naturalWidth, x.naturalHeight) < 64) { - return false; - } - } - const m = Math.min(x.width, x.height); - if (m && m < 64) { - return false; - } - return true; - }); + r.imgs = briefImgs(); return r; } diff --git a/src/services/snapshot-formatter.ts b/src/services/snapshot-formatter.ts index 60c126b..f02fa4b 100644 --- a/src/services/snapshot-formatter.ts +++ b/src/services/snapshot-formatter.ts @@ -43,7 +43,7 @@ export interface FormattedPage { textRepresentation?: string; - [Symbol.dispose]: () => void; + [Symbol.dispose]?: () => void; } export const md5Hasher = new HashManager('md5', 'hex'); @@ -199,7 +199,6 @@ export class SnapshotFormatter extends AsyncService { description: (snapshot.description || '').trim(), url: nominalUrl?.toString() || snapshot.href?.trim(), publishedTime: snapshot.parsed?.publishedTime || undefined, - [Symbol.dispose]: () => { }, }; Object.assign(f, formatted); @@ -395,7 +394,6 @@ export class SnapshotFormatter extends AsyncService { url: nominalUrl?.toString() || snapshot.href?.trim(), content: contentText, publishedTime: snapshot.parsed?.publishedTime || undefined, - [Symbol.dispose]: () => { }, }; if (snapshot.status) {