continue add layout model for 'laws' (#292)

### What problem does this PR solve?

Issue link:#289

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
KevinHuSh 2024-04-10 14:06:36 +08:00 committed by GitHub
parent 243de6ac90
commit a0a480b708
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -25,8 +25,7 @@ from rag.settings import cron_logger
class Docx(DocxParser):
def __init__(self):
self.model_speciess = ParserType.LAWS.value
super().__init__()
pass
def __clean(self, line):
line = re.sub(r"\u3000", " ", line).strip()
@ -52,6 +51,10 @@ class Docx(DocxParser):
class Pdf(PdfParser):
def __init__(self):
self.model_speciess = ParserType.LAWS.value
super().__init__()
def __call__(self, filename, binary=None, from_page=0,
to_page=100000, zoomin=3, callback=None):
callback(msg="OCR is running...")