mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-13 03:09:04 +08:00
### What problem does this PR solve? close #5277 by make sure the file close ### Type of change - [x] Performance Improvement --------- Signed-off-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
parent
d6836444c9
commit
8a2542157f
@ -188,6 +188,7 @@ def thumbnail_img(filename, blob):
|
|||||||
buffered = BytesIO()
|
buffered = BytesIO()
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
pdf.close()
|
||||||
return img
|
return img
|
||||||
|
|
||||||
elif re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
|
elif re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
|
||||||
|
@ -950,7 +950,9 @@ class RAGFlowPdfParser:
|
|||||||
try:
|
try:
|
||||||
pdf = pdfplumber.open(
|
pdf = pdfplumber.open(
|
||||||
fnm) if not binary else pdfplumber.open(BytesIO(binary))
|
fnm) if not binary else pdfplumber.open(BytesIO(binary))
|
||||||
return len(pdf.pages)
|
total_page = len(pdf.pages)
|
||||||
|
pdf.close()
|
||||||
|
return total_page
|
||||||
except Exception:
|
except Exception:
|
||||||
logging.exception("total_page_number")
|
logging.exception("total_page_number")
|
||||||
|
|
||||||
@ -996,9 +998,12 @@ class RAGFlowPdfParser:
|
|||||||
dfs(outlines, 0)
|
dfs(outlines, 0)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Outlines exception: {e}")
|
logging.warning(f"Outlines exception: {e}")
|
||||||
|
finally:
|
||||||
|
self.pdf.close()
|
||||||
if not self.outlines:
|
if not self.outlines:
|
||||||
logging.warning("Miss outlines")
|
logging.warning("Miss outlines")
|
||||||
|
|
||||||
|
|
||||||
logging.debug("Images converted.")
|
logging.debug("Images converted.")
|
||||||
self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
|
self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
|
||||||
random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
|
random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
|
||||||
|
@ -42,6 +42,7 @@ def init_in_out(args):
|
|||||||
|
|
||||||
for i, page in enumerate(images):
|
for i, page in enumerate(images):
|
||||||
outputs.append(os.path.split(fnm)[-1] + f"_{i}.jpg")
|
outputs.append(os.path.split(fnm)[-1] + f"_{i}.jpg")
|
||||||
|
pdf.close()
|
||||||
|
|
||||||
def images_and_outputs(fnm):
|
def images_and_outputs(fnm):
|
||||||
nonlocal outputs, images
|
nonlocal outputs, images
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
# beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code
|
# beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code
|
||||||
import random
|
import random
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from api.utils.log_utils import initRootLogger, get_project_base_directory
|
from api.utils.log_utils import initRootLogger, get_project_base_directory
|
||||||
from graphrag.general.index import WithCommunity, WithResolution, Dealer
|
from graphrag.general.index import WithCommunity, WithResolution, Dealer
|
||||||
from graphrag.light.graph_extractor import GraphExtractor as LightKGExt
|
from graphrag.light.graph_extractor import GraphExtractor as LightKGExt
|
||||||
|
Loading…
x
Reference in New Issue
Block a user