From a0a480b708d75c6b31b033e5a7e3e28c33f105f7 Mon Sep 17 00:00:00 2001 From: KevinHuSh Date: Wed, 10 Apr 2024 14:06:36 +0800 Subject: [PATCH] continue add layout model for 'laws' (#292) ### What problem does this PR solve? Issue link:#289 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- rag/app/laws.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rag/app/laws.py b/rag/app/laws.py index 4478696f5..ce0d17471 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -25,8 +25,7 @@ from rag.settings import cron_logger class Docx(DocxParser): def __init__(self): - self.model_speciess = ParserType.LAWS.value - super().__init__() + pass def __clean(self, line): line = re.sub(r"\u3000", " ", line).strip() @@ -52,6 +51,10 @@ class Docx(DocxParser): class Pdf(PdfParser): + def __init__(self): + self.model_speciess = ParserType.LAWS.value + super().__init__() + def __call__(self, filename, binary=None, from_page=0, to_page=100000, zoomin=3, callback=None): callback(msg="OCR is running...")