diff --git a/assets/chattts_tokenizer/tokenizer_config.json b/assets/chattts_tokenizer/tokenizer_config.json new file mode 100644 index 0000000..b62fb7f --- /dev/null +++ b/assets/chattts_tokenizer/tokenizer_config.json @@ -0,0 +1,516 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "102": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "103": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21128": { + "content": "[Sasr]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21129": { + "content": "[Pasr]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21130": { + "content": "[Easr]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21131": { + "content": "[Stts]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21132": { + "content": "[Ptts]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21133": { + "content": "[Etts]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21134": { + "content": "[Sbreak]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21135": { + "content": "[Pbreak]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21136": { + "content": "[Ebreak]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21137": { + "content": "[uv_break]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21138": { + "content": "[v_break]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21139": { + "content": "[lbreak]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21140": { + "content": "[llbreak]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21141": { + "content": "[undefine]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21142": { + "content": "[laugh]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21143": { + "content": "[spk_emb]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21144": { + "content": "[empty_spk]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21145": { + "content": "[music]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21146": { + "content": "[pure]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21147": { + "content": "[break_0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21148": { + "content": "[break_1]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21149": { + "content": "[break_2]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21150": { + "content": "[break_3]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21151": { + "content": "[break_4]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21152": { + "content": "[break_5]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21153": { + "content": "[break_6]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21154": { + "content": "[break_7]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21155": { + "content": "[laugh_0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21156": { + "content": "[laugh_1]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21157": { + "content": "[laugh_2]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21158": { + "content": "[oral_0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21159": { + "content": "[oral_1]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21160": { + "content": "[oral_2]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21161": { + "content": "[oral_3]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21162": { + "content": "[oral_4]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21163": { + "content": "[oral_5]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21164": { + "content": "[oral_6]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21165": { + "content": "[oral_7]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21166": { + "content": "[oral_8]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21167": { + "content": "[oral_9]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21168": { + "content": "[speed_0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21169": { + "content": "[speed_1]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21170": { + "content": "[speed_2]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21171": { + "content": "[speed_3]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21172": { + "content": "[speed_4]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21173": { + "content": "[speed_5]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21174": { + "content": "[speed_6]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21175": { + "content": "[speed_7]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21176": { + "content": "[speed_8]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21177": { + "content": "[speed_9]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "[Sasr]", + "[Pasr]", + "[Easr]", + "[Stts]", + "[Ptts]", + "[Etts]", + "[Sbreak]", + "[Pbreak]", + "[Ebreak]", + "[uv_break]", + "[v_break]", + "[lbreak]", + "[llbreak]", + "[undefine]", + "[laugh]", + "[spk_emb]", + "[empty_spk]", + "[music]", + "[pure]", + "[break_0]", + "[break_1]", + "[break_2]", + "[break_3]", + "[break_4]", + "[break_5]", + "[break_6]", + "[break_7]", + "[laugh_0]", + "[laugh_1]", + "[laugh_2]", + "[oral_0]", + "[oral_1]", + "[oral_2]", + "[oral_3]", + "[oral_4]", + "[oral_5]", + "[oral_6]", + "[oral_7]", + "[oral_8]", + "[oral_9]", + "[speed_0]", + "[speed_1]", + "[speed_2]", + "[speed_3]", + "[speed_4]", + "[speed_5]", + "[speed_6]", + "[speed_7]", + "[speed_8]", + "[speed_9]" + ], + "clean_up_tokenization_spaces": true, + "cls_token": "[CLS]", + "do_basic_tokenize": true, + "do_lower_case": true, + "mask_token": "[MASK]", + "max_length": 256, + "model_max_length": 1000000000000000019884624838656, + "never_split": null, + "pad_to_multiple_of": null, + "pad_token": "[PAD]", + "pad_token_type_id": 0, + "padding_side": "right", + "sep_token": "[SEP]", + "stride": 0, + "strip_accents": null, + "tokenize_chinese_chars": true, + "tokenizer_class": "BertTokenizer", + "truncation_side": "right", + "truncation_strategy": "longest_first", + "unk_token": "[UNK]" +}