diff --git a/backend/functions/package-lock.json b/backend/functions/package-lock.json index a20de7c..69795f9 100644 --- a/backend/functions/package-lock.json +++ b/backend/functions/package-lock.json @@ -15,13 +15,14 @@ "archiver": "^6.0.1", "axios": "^1.3.3", "bcrypt": "^5.1.0", + "busboy": "^1.6.0", "civkit": "^0.8.2-2eddf1b", "core-js": "^3.37.1", "cors": "^2.8.5", "dayjs": "^1.11.9", "express": "^4.19.2", "firebase-admin": "^12.1.0", - "firebase-functions": "^6.1.0", + "firebase-functions": "^6.1.1", "htmlparser2": "^9.0.0", "jose": "^5.1.0", "langdetect": "^0.2.1", @@ -48,6 +49,7 @@ "devDependencies": { "@types/archiver": "^5.3.4", "@types/bcrypt": "^5.0.0", + "@types/busboy": "^1.5.4", "@types/cors": "^2.8.17", "@types/generic-pool": "^3.8.1", "@types/node": "^20.14.13", @@ -2135,6 +2137,16 @@ "@types/node": "*" } }, + "node_modules/@types/busboy": { + "version": "1.5.4", + "resolved": "https://registry.npmjs.org/@types/busboy/-/busboy-1.5.4.tgz", + "integrity": "sha512-kG7WrUuAKK0NoyxfQHsVE6j1m01s6kMma64E+OZenQABMQyTJop1DumUWcLwAQ2JzpefU7PDYoRDKl8uZosFjw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/cacheable-request": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/@types/cacheable-request/-/cacheable-request-6.0.3.tgz", @@ -3540,7 +3552,6 @@ "version": "1.6.0", "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz", "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==", - "optional": true, "dependencies": { "streamsearch": "^1.1.0" }, @@ -5539,9 +5550,9 @@ } }, "node_modules/firebase-functions": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/firebase-functions/-/firebase-functions-6.1.0.tgz", - "integrity": "sha512-7Gq7XpIA2qo9wKhYA9Ksb0v2bHfXD70zQwBJO6//Q624A7D9KAb449K6DM0swrCoPO7NGExbPf2eC7j7e+4+xA==", + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/firebase-functions/-/firebase-functions-6.1.1.tgz", + "integrity": "sha512-q+4zsQhX04YJUz6hqaiH/j5kixljPj0PMxkm8KN3juYp3I4NC6CZ4qfy5JRfwvV8VfXM2KkJrZuyJtLyZr97aw==", "license": "MIT", "dependencies": { "@types/cors": "^2.8.5", @@ -5557,7 +5568,7 @@ "node": ">=14.10.0" }, "peerDependencies": { - "firebase-admin": "^11.10.0 || ^12.0.0" + "firebase-admin": "^11.10.0 || ^12.0.0 || ^13.0.0" } }, "node_modules/firebase-functions-test": { @@ -10960,7 +10971,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz", "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==", - "optional": true, "engines": { "node": ">=10.0.0" } diff --git a/backend/functions/package.json b/backend/functions/package.json index e3be929..6c14517 100644 --- a/backend/functions/package.json +++ b/backend/functions/package.json @@ -35,13 +35,14 @@ "archiver": "^6.0.1", "axios": "^1.3.3", "bcrypt": "^5.1.0", + "busboy": "^1.6.0", "civkit": "^0.8.2-2eddf1b", "core-js": "^3.37.1", "cors": "^2.8.5", "dayjs": "^1.11.9", "express": "^4.19.2", "firebase-admin": "^12.1.0", - "firebase-functions": "^6.1.0", + "firebase-functions": "^6.1.1", "htmlparser2": "^9.0.0", "jose": "^5.1.0", "langdetect": "^0.2.1", @@ -68,6 +69,7 @@ "devDependencies": { "@types/archiver": "^5.3.4", "@types/bcrypt": "^5.0.0", + "@types/busboy": "^1.5.4", "@types/cors": "^2.8.17", "@types/generic-pool": "^3.8.1", "@types/node": "^20.14.13", diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts index 30af2ef..d235777 100644 --- a/backend/functions/src/cloud-functions/crawler.ts +++ b/backend/functions/src/cloud-functions/crawler.ts @@ -374,7 +374,8 @@ export class CrawlerHost extends RPCHost { const targetUrlFromGet = originPath.slice(1); if (crawlerOptions.pdf) { - url = `file://pdf.${md5Hasher.hash(crawlerOptions.pdf)}`; + const pdfBuf = crawlerOptions.pdf instanceof Blob ? await crawlerOptions.pdf.arrayBuffer().then((x) => Buffer.from(x)) : Buffer.from(crawlerOptions.pdf, 'base64'); + url = `file://pdf.${md5Hasher.hash(pdfBuf)}`; } else if (targetUrlFromGet) { url = targetUrlFromGet.trim(); } else if (crawlerOptions.url) { @@ -552,7 +553,9 @@ export class CrawlerHost extends RPCHost { } if (crawlerOpts?.pdf) { - const pdfDataUrl = `data:application/pdf;base64,${encodeURIComponent(crawlerOpts.pdf)}`; + + const pdfBuf = crawlerOpts.pdf instanceof Blob ? await crawlerOpts.pdf.arrayBuffer().then((x) => Buffer.from(x)) : Buffer.from(crawlerOpts.pdf, 'base64'); + const pdfDataUrl = `data:application/pdf;base64,${pdfBuf.toString('base64')}`; const fakeSnapshot = { href: urlToCrawl.toString(), html: `