diff --git a/assets/chatts_tokenizer/special_tokens_map.json b/assets/chatts_tokenizer/special_tokens_map.json new file mode 100644 index 0000000..42cfc10 --- /dev/null +++ b/assets/chatts_tokenizer/special_tokens_map.json @@ -0,0 +1,389 @@ +{ + "additional_special_tokens": [ + { + "content": "[Sasr]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[Pasr]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[Easr]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[Stts]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[Ptts]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[Etts]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[Sbreak]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[Pbreak]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[Ebreak]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[uv_break]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[v_break]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[lbreak]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[llbreak]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[undefine]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[laugh]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[spk_emb]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[empty_spk]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[music]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[pure]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[break_0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[break_1]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[break_2]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[break_3]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[break_4]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[break_5]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[break_6]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[break_7]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[laugh_0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[laugh_1]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[laugh_2]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[oral_0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[oral_1]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[oral_2]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[oral_3]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[oral_4]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[oral_5]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[oral_6]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[oral_7]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[oral_8]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[oral_9]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[speed_0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[speed_1]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[speed_2]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[speed_3]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[speed_4]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[speed_5]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[speed_6]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[speed_7]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[speed_8]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "[speed_9]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "cls_token": { + "content": "[CLS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "[SEP]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +}