Transformers AttributeError

by XuehangCang - opened about 13 hours ago

{
    "name": "AttributeError",
    "message": "",
    "stack": "\u001b[31m---------------------------------------------------------------------------\u001b[39m\n\u001b[31mKeyError\u001b[39m                                  Traceback (most recent call last)\n\u001b[36mFile \u001b[39m\u001b[32m/app/fine-tuning/.venv/lib/python3.13/site-packages/transformers/tokenization_utils_base.py:286\u001b[39m, in \u001b[36mBatchEncoding.__getattr__\u001b[39m\u001b[34m(self, item)\u001b[39m\n\u001b[32m    285\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m286\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[30;43mself\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43mdata\u001b[39;49m\u001b[30;43m[\u001b[39;49m\u001b[30;43mitem\u001b[39;49m\u001b[30;43m]\u001b[39;49m\n\u001b[32m    287\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\n\u001b[31mKeyError\u001b[39m: 'shape'\n\nDuring handling of the above exception, another exception occurred:\n\n\u001b[31mAttributeError\u001b[39m                            Traceback (most recent call last)\n\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 10\u001b[39m\n\u001b[32m      6\u001b[39m     enable_thinking=\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[32m      7\u001b[39m     return_tensors=\u001b[33m\"pt\"\u001b[39m,\n\u001b[32m      8\u001b[39m ).to(model.device)\n\u001b[32m      9\u001b[39m \n\u001b[32m---> \u001b[39m\u001b[32m10\u001b[39m outputs = model.generate(inputs, max_new_tokens=\u001b[32m128\u001b[39m)\n\u001b[32m     11\u001b[39m print(tokenizer.decode(outputs[\u001b[32m0\u001b[39m][inputs.shape[-\u001b[32m1\u001b[39m]:], skip_special_tokens=\u001b[38;5;28;01mTrue\u001b[39;00m))\n\n\u001b[36mFile \u001b[39m\u001b[32m/app/fine-tuning/.venv/lib/python3.13/site-packages/torch/utils/_contextlib.py:124\u001b[39m, in \u001b[36mcontext_decorator.<locals>.decorate_context\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m    120\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(func)\n\u001b[32m    121\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdecorate_context\u001b[39m(*args, **kwargs):\n\u001b[32m    122\u001b[39m     \u001b[38;5;66;03m# pyrefly: ignore [bad-context-manager]\u001b[39;00m\n\u001b[32m    123\u001b[39m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[32m--> \u001b[39m\u001b[32m124\u001b[39m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[30;43mfunc\u001b[39;49m\u001b[30;43m(\u001b[39;49m\u001b[30;43m*\u001b[39;49m\u001b[30;43margs\u001b[39;49m\u001b[30;43m,\u001b[39;49m\u001b[30;43m \u001b[39;49m\u001b[30;43m*\u001b[39;49m\u001b[30;43m*\u001b[39;49m\u001b[30;43mkwargs\u001b[39;49m\u001b[30;43m)\u001b[39;49m\n\n\u001b[36mFile \u001b[39m\u001b[32m/app/fine-tuning/.venv/lib/python3.13/site-packages/transformers/generation/utils.py:2415\u001b[39m, in \u001b[36mGenerationMixin.generate\u001b[39m\u001b[34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, custom_generate, **kwargs)\u001b[39m\n\u001b[32m   2413\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33minputs_tensor\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m inspect.signature(decoding_method).parameters.keys():\n\u001b[32m   2414\u001b[39m     generation_mode_kwargs[\u001b[33m\"\u001b[39m\u001b[33minputs_tensor\u001b[39m\u001b[33m\"\u001b[39m] = inputs_tensor\n\u001b[32m-> \u001b[39m\u001b[32m2415\u001b[39m batch_size = \u001b[30;43minputs_tensor\u001b[39;49m\u001b[30;43m.\u001b[39;49m\u001b[30;43mshape\u001b[39;49m[\u001b[32m0\u001b[39m]\n\u001b[32m   2417\u001b[39m device = inputs_tensor.device\n\u001b[32m   2418\u001b[39m \u001b[38;5;28mself\u001b[39m._prepare_special_tokens(generation_config, kwargs_has_attention_mask, device=device)\n\n\u001b[36mFile \u001b[39m\u001b[32m/app/fine-tuning/.venv/lib/python3.13/site-packages/transformers/tokenization_utils_base.py:288\u001b[39m, in \u001b[36mBatchEncoding.__getattr__\u001b[39m\u001b[34m(self, item)\u001b[39m\n\u001b[32m    286\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.data[item]\n\u001b[32m    287\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m288\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m\n\n\u001b[31mAttributeError\u001b[39m: "
}

update

from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "openbmb/MiniCPM5-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype="auto",
    device_map="auto",
)

messages = [
    {"role": "user", "content": "Who are you? Please briefly introduce yourself."}
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    enable_thinking=False,
    return_tensors="pt",
)
inputs = {k: v.to(model.device) for k, v in inputs.items()}

outputs = model.generate(**inputs, max_new_tokens=128)
prompt_len = inputs["input_ids"].shape[-1]
print(tokenizer.decode(outputs[0][prompt_len:], skip_special_tokens=True))

georgethrax

OpenBMB org about 10 hours ago

Thanks for reporting this! The issue is caused by a breaking change in transformers v5.x: apply_chat_template() changed its return_dict default from False (v4) to True (v5), so the returned object is now a BatchEncoding dict rather than a plain tensor.

The README has been updated with the fix:

 inputs = tokenizer.apply_chat_template(
     messages,
     tokenize=True,
     add_generation_prompt=True,
     enable_thinking=False,
+    return_dict=True,
     return_tensors="pt",
 ).to(model.device)

-outputs = model.generate(inputs, max_new_tokens=128)
-print(tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True))
+outputs = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True))

This now works across both transformers v4 and v5.

georgethrax changed discussion status to closed about 10 hours ago

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment