### What problem does this PR solve?

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu 2024-09-29 13:20:02 +08:00 committed by GitHub
parent fc867cb959
commit daa65199e8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -10,13 +10,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
from deepdoc.parser.utils import get_txt from deepdoc.parser.utils import get_text
from rag.nlp import num_tokens_from_string from rag.nlp import num_tokens_from_string
class RAGFlowTxtParser: class RAGFlowTxtParser:
def __call__(self, fnm, binary=None, chunk_token_num=128, delimiter="\n!?;。;!?"): def __call__(self, fnm, binary=None, chunk_token_num=128, delimiter="\n!?;。;!?"):
txt = get_txt(fnm, binary) txt = get_text(fnm, binary)
return self.parser_txt(txt, chunk_token_num, delimiter) return self.parser_txt(txt, chunk_token_num, delimiter)
@classmethod @classmethod