mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-02 18:34:03 +08:00
Fix csv for TAG. (#4454)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
ecdb2a88bd
commit
e098fcf6ad
@ -91,14 +91,14 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
|
|||||||
callback(0.1, "Start to parse.")
|
callback(0.1, "Start to parse.")
|
||||||
txt = get_text(filename, binary)
|
txt = get_text(filename, binary)
|
||||||
lines = txt.split("\n")
|
lines = txt.split("\n")
|
||||||
delimiter = "\t" if any("\t" in line for line in lines) else ","
|
|
||||||
|
|
||||||
fails = []
|
fails = []
|
||||||
content = ""
|
content = ""
|
||||||
res = []
|
res = []
|
||||||
reader = csv.reader(lines, delimiter=delimiter)
|
reader = csv.reader(lines)
|
||||||
|
|
||||||
for i, row in enumerate(reader):
|
for i, row in enumerate(reader):
|
||||||
|
row = [r.strip() for r in row if r.strip()]
|
||||||
if len(row) != 2:
|
if len(row) != 2:
|
||||||
content += "\n" + lines[i]
|
content += "\n" + lines[i]
|
||||||
elif len(row) == 2:
|
elif len(row) == 2:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user