diff --git a/api/utils/file_utils.py b/api/utils/file_utils.py index 2dedcdc7b..5ab61b63b 100644 --- a/api/utils/file_utils.py +++ b/api/utils/file_utils.py @@ -146,7 +146,7 @@ def rewrite_yaml_conf(conf_path, config): def rewrite_json_file(filepath, json_data): - with open(filepath, "w") as f: + with open(filepath, "w", encoding='utf-8') as f: json.dump(json_data, f, indent=4, separators=(",", ": ")) f.close() diff --git a/deepdoc/parser/resume/entities/schools.py b/deepdoc/parser/resume/entities/schools.py index d90d9fde0..598d7ae83 100644 --- a/deepdoc/parser/resume/entities/schools.py +++ b/deepdoc/parser/resume/entities/schools.py @@ -11,7 +11,10 @@ # limitations under the License. # -import os, json,re,copy +import os +import json +import re +import copy import pandas as pd current_file_path = os.path.dirname(os.path.abspath(__file__)) TBL = pd.read_csv(os.path.join(current_file_path, "res/schools.csv"), sep="\t", header=0).fillna("") @@ -23,7 +26,7 @@ GOOD_SCH = set([re.sub(r"[,. &()()]+", "", c) for c in GOOD_SCH]) def loadRank(fnm): global TBL TBL["rank"] = 1000000 - with open(fnm, "r",encoding='UTF-8') as f: + with open(fnm, "r", encoding='utf-8') as f: while True: l = f.readline() if not l:break @@ -32,7 +35,7 @@ def loadRank(fnm): nm,rk = l[0].strip(),int(l[1]) #assert len(TBL[((TBL.name_cn == nm) | (TBL.name_en == nm))]),f"<{nm}>" TBL.loc[((TBL.name_cn == nm) | (TBL.name_en == nm)), "rank"] = rk - except Exception as e: + except Exception: pass diff --git a/deepdoc/vision/t_ocr.py b/deepdoc/vision/t_ocr.py index 910b91be1..041c77107 100644 --- a/deepdoc/vision/t_ocr.py +++ b/deepdoc/vision/t_ocr.py @@ -41,7 +41,7 @@ def main(args): "score": 1} for b, t in bxs if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]] img = draw_box(images[i], bxs, ["ocr"], 1.) img.save(outputs[i], quality=95) - with open(outputs[i] + ".txt", "w+") as f: + with open(outputs[i] + ".txt", "w+", encoding='utf-8') as f: f.write("\n".join([o["text"] for o in bxs])) diff --git a/deepdoc/vision/t_recognizer.py b/deepdoc/vision/t_recognizer.py index e5d7a22ae..a5efc5dcd 100644 --- a/deepdoc/vision/t_recognizer.py +++ b/deepdoc/vision/t_recognizer.py @@ -50,7 +50,7 @@ def main(args): if args.mode.lower() == "tsr": #lyt = [t for t in lyt if t["type"] == "table column"] html = get_table_html(images[i], lyt, ocr) - with open(outputs[i] + ".html", "w+") as f: + with open(outputs[i] + ".html", "w+", encoding='utf-8') as f: f.write(html) lyt = [{ "type": t["label"], diff --git a/rag/benchmark.py b/rag/benchmark.py index dc48bed3e..1146d55bf 100644 --- a/rag/benchmark.py +++ b/rag/benchmark.py @@ -237,8 +237,8 @@ class Benchmark: scores = sorted(scores, key=lambda kk: kk[1]) for score in scores[:10]: f.write('- text: ' + str(texts[score[0]]) + '\t qrel: ' + str(score[1]) + '\n') - json.dump(qrels, open(os.path.join(file_path, dataset + '.qrels.json'), "w+"), indent=2) - json.dump(run, open(os.path.join(file_path, dataset + '.run.json'), "w+"), indent=2) + json.dump(qrels, open(os.path.join(file_path, dataset + '.qrels.json'), "w+", encoding='utf-8'), indent=2) + json.dump(run, open(os.path.join(file_path, dataset + '.run.json'), "w+", encoding='utf-8'), indent=2) print(os.path.join(file_path, dataset + '_result.md'), 'Saved!') def __call__(self, dataset, file_path, miracl_corpus=''):