mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-04 11:24:00 +08:00
Ensure thumbnail be smaller than 64K (#3722)
### What problem does this PR solve? Ensure thumbnail be smaller than 64K. Close #1443 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
parent
91f1814a87
commit
112ef42a19
@ -170,29 +170,52 @@ def filename_type(filename):
|
|||||||
return FileType.OTHER.value
|
return FileType.OTHER.value
|
||||||
|
|
||||||
def thumbnail_img(filename, blob):
|
def thumbnail_img(filename, blob):
|
||||||
|
"""
|
||||||
|
MySQL LongText max length is 65535
|
||||||
|
"""
|
||||||
filename = filename.lower()
|
filename = filename.lower()
|
||||||
if re.match(r".*\.pdf$", filename):
|
if re.match(r".*\.pdf$", filename):
|
||||||
pdf = pdfplumber.open(BytesIO(blob))
|
pdf = pdfplumber.open(BytesIO(blob))
|
||||||
buffered = BytesIO()
|
buffered = BytesIO()
|
||||||
pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png")
|
resolution = 32
|
||||||
return buffered.getvalue()
|
img = None
|
||||||
|
for _ in range(10):
|
||||||
|
# https://github.com/jsvine/pdfplumber?tab=readme-ov-file#creating-a-pageimage-with-to_image
|
||||||
|
pdf.pages[0].to_image(resolution=resolution).annotated.save(buffered, format="png")
|
||||||
|
img = buffered.getvalue()
|
||||||
|
if len(img) >= 64000 and resolution >= 2:
|
||||||
|
resolution = resolution / 2
|
||||||
|
buffered = BytesIO()
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return img
|
||||||
|
|
||||||
if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
|
elif re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
|
||||||
image = Image.open(BytesIO(blob))
|
image = Image.open(BytesIO(blob))
|
||||||
image.thumbnail((30, 30))
|
image.thumbnail((30, 30))
|
||||||
buffered = BytesIO()
|
buffered = BytesIO()
|
||||||
image.save(buffered, format="png")
|
image.save(buffered, format="png")
|
||||||
return buffered.getvalue()
|
return buffered.getvalue()
|
||||||
|
|
||||||
if re.match(r".*\.(ppt|pptx)$", filename):
|
elif re.match(r".*\.(ppt|pptx)$", filename):
|
||||||
import aspose.slides as slides
|
import aspose.slides as slides
|
||||||
import aspose.pydrawing as drawing
|
import aspose.pydrawing as drawing
|
||||||
try:
|
try:
|
||||||
with slides.Presentation(BytesIO(blob)) as presentation:
|
with slides.Presentation(BytesIO(blob)) as presentation:
|
||||||
buffered = BytesIO()
|
buffered = BytesIO()
|
||||||
presentation.slides[0].get_thumbnail(0.03, 0.03).save(
|
scale = 0.03
|
||||||
|
img = None
|
||||||
|
for _ in range(10):
|
||||||
|
# https://reference.aspose.com/slides/python-net/aspose.slides/slide/get_thumbnail/#float-float
|
||||||
|
presentation.slides[0].get_thumbnail(scale, scale).save(
|
||||||
buffered, drawing.imaging.ImageFormat.png)
|
buffered, drawing.imaging.ImageFormat.png)
|
||||||
return buffered.getvalue()
|
img = buffered.getvalue()
|
||||||
|
if len(img) >= 64000:
|
||||||
|
scale = scale / 2.0
|
||||||
|
buffered = BytesIO()
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return img
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return None
|
return None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user