From 080056e889d3deb7e8453740e0416042922ee4f2 Mon Sep 17 00:00:00 2001
From: Zhaofeng Miao <522856232@qq.com>
Date: Thu, 22 Aug 2024 14:56:09 +0800
Subject: [PATCH] feat: allow passing base64 encoded pdf
---
.../functions/src/cloud-functions/crawler.ts | 16 ++++++
.../functions/src/dto/scrapping-options.ts | 5 ++
backend/functions/src/services/pdf-extract.ts | 51 ++++++++++++++++---
3 files changed, 66 insertions(+), 6 deletions(-)
diff --git a/backend/functions/src/cloud-functions/crawler.ts b/backend/functions/src/cloud-functions/crawler.ts
index 7a81be4..f1a8fae 100644
--- a/backend/functions/src/cloud-functions/crawler.ts
+++ b/backend/functions/src/cloud-functions/crawler.ts
@@ -977,6 +977,22 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
return;
}
+
+ if (crawlerOpts?.pdf) {
+ const pdfDataUrl = `data:application/pdf;base64,${encodeURIComponent(crawlerOpts.pdf)}`;
+ const fakeSnapshot = {
+ href: urlToCrawl.toString(),
+ html: `