mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 19:39:00 +08:00
### What problem does this PR solve? close #5277 by make sure the file close ### Type of change - [x] Performance Improvement --------- Signed-off-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
parent
d6836444c9
commit
8a2542157f
@ -188,6 +188,7 @@ def thumbnail_img(filename, blob):
|
||||
buffered = BytesIO()
|
||||
else:
|
||||
break
|
||||
pdf.close()
|
||||
return img
|
||||
|
||||
elif re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
|
||||
|
@ -950,7 +950,9 @@ class RAGFlowPdfParser:
|
||||
try:
|
||||
pdf = pdfplumber.open(
|
||||
fnm) if not binary else pdfplumber.open(BytesIO(binary))
|
||||
return len(pdf.pages)
|
||||
total_page = len(pdf.pages)
|
||||
pdf.close()
|
||||
return total_page
|
||||
except Exception:
|
||||
logging.exception("total_page_number")
|
||||
|
||||
@ -996,9 +998,12 @@ class RAGFlowPdfParser:
|
||||
dfs(outlines, 0)
|
||||
except Exception as e:
|
||||
logging.warning(f"Outlines exception: {e}")
|
||||
finally:
|
||||
self.pdf.close()
|
||||
if not self.outlines:
|
||||
logging.warning("Miss outlines")
|
||||
|
||||
|
||||
logging.debug("Images converted.")
|
||||
self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
|
||||
random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
|
||||
|
@ -42,6 +42,7 @@ def init_in_out(args):
|
||||
|
||||
for i, page in enumerate(images):
|
||||
outputs.append(os.path.split(fnm)[-1] + f"_{i}.jpg")
|
||||
pdf.close()
|
||||
|
||||
def images_and_outputs(fnm):
|
||||
nonlocal outputs, images
|
||||
|
@ -18,6 +18,7 @@
|
||||
# beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code
|
||||
import random
|
||||
import sys
|
||||
|
||||
from api.utils.log_utils import initRootLogger, get_project_base_directory
|
||||
from graphrag.general.index import WithCommunity, WithResolution, Dealer
|
||||
from graphrag.light.graph_extractor import GraphExtractor as LightKGExt
|
||||
|
Loading…
x
Reference in New Issue
Block a user