diff --git a/modeling_internlm3.py b/modeling_internlm3.py index 6e877d0..eec4e6b 100644 --- a/modeling_internlm3.py +++ b/modeling_internlm3.py @@ -793,7 +793,7 @@ class InternLM3Model(InternLM3PreTrainedModel): Args: config: InternLM3Config """ - + _auto_class = "AutoModel" def __init__(self, config: InternLM3Config): super().__init__(config) self.padding_idx = config.pad_token_id @@ -1070,6 +1070,7 @@ class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ... class InternLM3ForCausalLM(InternLM3PreTrainedModel, GenerationMixin): + _auto_class = "AutoModelForCausalLM" _tied_weights_keys = ["lm_head.weight"] _tp_plan = {"lm_head": "colwise_rep"} diff --git a/tokenization_internlm3.py b/tokenization_internlm3.py index f68147f..fb919ad 100644 --- a/tokenization_internlm3.py +++ b/tokenization_internlm3.py @@ -67,7 +67,7 @@ class InternLM3Tokenizer(PreTrainedTokenizer): Whether or not to add an initial space to the input. This allows to treat the leading word just as any other word. Again, this should be set with `from_slow=True` to make sure it's taken into account. """ - + _auto_class = "AutoTokenizer" vocab_files_names = VOCAB_FILES_NAMES model_input_names = ["input_ids", "attention_mask"]