mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-15 01:26:09 +08:00
fix: Large document thumbnail display failed (#2763)
### What problem does this PR solve? In MySQL, when the thumbnail base64 of a document is relatively large, the display of the document's thumbnail fails. Now, I put the document thumbnail into MiniIO storage. ### Type of change - [✓] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: chongchuanbing <chongchuanbing@gmail.com>
This commit is contained in:
parent
f7a73c5149
commit
485bfd6c08
@ -51,6 +51,7 @@ from api.utils.api_utils import get_json_result
|
|||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
from api.utils.file_utils import filename_type, thumbnail, get_project_base_directory
|
from api.utils.file_utils import filename_type, thumbnail, get_project_base_directory
|
||||||
from api.utils.web_utils import html2pdf, is_valid_url
|
from api.utils.web_utils import html2pdf, is_valid_url
|
||||||
|
from api.contants import IMG_BASE64_PREFIX
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/upload', methods=['POST'])
|
@manager.route('/upload', methods=['POST'])
|
||||||
@ -209,6 +210,11 @@ def list_docs():
|
|||||||
try:
|
try:
|
||||||
docs, tol = DocumentService.get_by_kb_id(
|
docs, tol = DocumentService.get_by_kb_id(
|
||||||
kb_id, page_number, items_per_page, orderby, desc, keywords)
|
kb_id, page_number, items_per_page, orderby, desc, keywords)
|
||||||
|
|
||||||
|
for doc_item in docs:
|
||||||
|
if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX):
|
||||||
|
doc_item['thumbnail'] = f'/v1/document/image/{kb_id}-{doc_item['thumbnail']}'
|
||||||
|
|
||||||
return get_json_result(data={"total": tol, "docs": docs})
|
return get_json_result(data={"total": tol, "docs": docs})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
@ -13,4 +13,6 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
NAME_LENGTH_LIMIT = 2 ** 10
|
NAME_LENGTH_LIMIT = 2 ** 10
|
||||||
|
|
||||||
|
IMG_BASE64_PREFIX = 'data:image/png;base64,'
|
@ -26,7 +26,7 @@ from api.db.services.common_service import CommonService
|
|||||||
from api.db.services.document_service import DocumentService
|
from api.db.services.document_service import DocumentService
|
||||||
from api.db.services.file2document_service import File2DocumentService
|
from api.db.services.file2document_service import File2DocumentService
|
||||||
from api.utils import get_uuid
|
from api.utils import get_uuid
|
||||||
from api.utils.file_utils import filename_type, thumbnail
|
from api.utils.file_utils import filename_type, thumbnail_img
|
||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
|
|
||||||
|
|
||||||
@ -354,8 +354,15 @@ class FileService(CommonService):
|
|||||||
location += "_"
|
location += "_"
|
||||||
blob = file.read()
|
blob = file.read()
|
||||||
STORAGE_IMPL.put(kb.id, location, blob)
|
STORAGE_IMPL.put(kb.id, location, blob)
|
||||||
|
|
||||||
|
doc_id = get_uuid()
|
||||||
|
|
||||||
|
img = thumbnail_img(filename, blob)
|
||||||
|
thumbnail_location = f'thumbnail_{doc_id}.png'
|
||||||
|
STORAGE_IMPL.put(kb.id, thumbnail_location, img)
|
||||||
|
|
||||||
doc = {
|
doc = {
|
||||||
"id": get_uuid(),
|
"id": doc_id,
|
||||||
"kb_id": kb.id,
|
"kb_id": kb.id,
|
||||||
"parser_id": self.get_parser(filetype, filename, kb.parser_id),
|
"parser_id": self.get_parser(filetype, filename, kb.parser_id),
|
||||||
"parser_config": kb.parser_config,
|
"parser_config": kb.parser_config,
|
||||||
@ -364,7 +371,7 @@ class FileService(CommonService):
|
|||||||
"name": filename,
|
"name": filename,
|
||||||
"location": location,
|
"location": location,
|
||||||
"size": len(blob),
|
"size": len(blob),
|
||||||
"thumbnail": thumbnail(filename, blob)
|
"thumbnail": thumbnail_location
|
||||||
}
|
}
|
||||||
DocumentService.insert(doc)
|
DocumentService.insert(doc)
|
||||||
|
|
||||||
|
@ -25,6 +25,7 @@ from cachetools import LRUCache, cached
|
|||||||
from ruamel.yaml import YAML
|
from ruamel.yaml import YAML
|
||||||
|
|
||||||
from api.db import FileType
|
from api.db import FileType
|
||||||
|
from api.contants import IMG_BASE64_PREFIX
|
||||||
|
|
||||||
PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE")
|
PROJECT_BASE = os.getenv("RAG_PROJECT_BASE") or os.getenv("RAG_DEPLOY_BASE")
|
||||||
RAG_BASE = os.getenv("RAG_BASE")
|
RAG_BASE = os.getenv("RAG_BASE")
|
||||||
@ -168,23 +169,20 @@ def filename_type(filename):
|
|||||||
|
|
||||||
return FileType.OTHER.value
|
return FileType.OTHER.value
|
||||||
|
|
||||||
|
def thumbnail_img(filename, blob):
|
||||||
def thumbnail(filename, blob):
|
|
||||||
filename = filename.lower()
|
filename = filename.lower()
|
||||||
if re.match(r".*\.pdf$", filename):
|
if re.match(r".*\.pdf$", filename):
|
||||||
pdf = pdfplumber.open(BytesIO(blob))
|
pdf = pdfplumber.open(BytesIO(blob))
|
||||||
buffered = BytesIO()
|
buffered = BytesIO()
|
||||||
pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png")
|
pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png")
|
||||||
return "data:image/png;base64," + \
|
return buffered.getvalue()
|
||||||
base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
||||||
|
|
||||||
if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
|
if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
|
||||||
image = Image.open(BytesIO(blob))
|
image = Image.open(BytesIO(blob))
|
||||||
image.thumbnail((30, 30))
|
image.thumbnail((30, 30))
|
||||||
buffered = BytesIO()
|
buffered = BytesIO()
|
||||||
image.save(buffered, format="png")
|
image.save(buffered, format="png")
|
||||||
return "data:image/png;base64," + \
|
return buffered.getvalue()
|
||||||
base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
||||||
|
|
||||||
if re.match(r".*\.(ppt|pptx)$", filename):
|
if re.match(r".*\.(ppt|pptx)$", filename):
|
||||||
import aspose.slides as slides
|
import aspose.slides as slides
|
||||||
@ -194,11 +192,15 @@ def thumbnail(filename, blob):
|
|||||||
buffered = BytesIO()
|
buffered = BytesIO()
|
||||||
presentation.slides[0].get_thumbnail(0.03, 0.03).save(
|
presentation.slides[0].get_thumbnail(0.03, 0.03).save(
|
||||||
buffered, drawing.imaging.ImageFormat.png)
|
buffered, drawing.imaging.ImageFormat.png)
|
||||||
return "data:image/png;base64," + \
|
return buffered.getvalue()
|
||||||
base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
def thumbnail(filename, blob):
|
||||||
|
img = thumbnail_img(filename, blob)
|
||||||
|
return IMG_BASE64_PREFIX + \
|
||||||
|
base64.b64encode(img).decode("utf-8")
|
||||||
|
|
||||||
def traversal_files(base):
|
def traversal_files(base):
|
||||||
for root, ds, fs in os.walk(base):
|
for root, ds, fs in os.walk(base):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user