mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-07-14 05:01:46 +08:00
continue add layout model for 'laws' (#292)
### What problem does this PR solve? Issue link:#289 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
parent
243de6ac90
commit
a0a480b708
@ -25,8 +25,7 @@ from rag.settings import cron_logger
|
|||||||
|
|
||||||
class Docx(DocxParser):
|
class Docx(DocxParser):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.model_speciess = ParserType.LAWS.value
|
pass
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
def __clean(self, line):
|
def __clean(self, line):
|
||||||
line = re.sub(r"\u3000", " ", line).strip()
|
line = re.sub(r"\u3000", " ", line).strip()
|
||||||
@ -52,6 +51,10 @@ class Docx(DocxParser):
|
|||||||
|
|
||||||
|
|
||||||
class Pdf(PdfParser):
|
class Pdf(PdfParser):
|
||||||
|
def __init__(self):
|
||||||
|
self.model_speciess = ParserType.LAWS.value
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
def __call__(self, filename, binary=None, from_page=0,
|
def __call__(self, filename, binary=None, from_page=0,
|
||||||
to_page=100000, zoomin=3, callback=None):
|
to_page=100000, zoomin=3, callback=None):
|
||||||
callback(msg="OCR is running...")
|
callback(msg="OCR is running...")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user