YAML Metadata Warning: empty or missing yaml metadata in repo card (https://huggingface.co/docs/hub/model-cards#model-card-metadata)

import os
import torch
import shutil
import glob
from huggingface_hub import login
from optimum.neuron import NeuronModelForCausalLM
from transformers import AutoTokenizer, AutoModelForCausalLM
from safetensors.torch import save_file  # pip install safetensors

hf_token = "hf_TBF"
sequence_length = 2048
auto_cast_type = "bf16"
batch_size = 1
num_cores = 2
hf_repo = "yahavb/DeepSeek-R1-Distill-Llama-8B-Tnx-vllm"
model_dir = "./DeepSeek-R1-Distill-Llama-8B-Tnx-Sharded"
model_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"

login(hf_token, add_to_git_credential=True)

compiler_args = {"num_cores": num_cores, "auto_cast_type": auto_cast_type}
input_shapes = {"batch_size": batch_size, "sequence_length": sequence_length}

neuron_model = NeuronModelForCausalLM.from_pretrained(
    model_id,
    export=True,
    **compiler_args,
    **input_shapes
)
neuron_model.save_pretrained(model_dir, safe_serialization=True, max_shard_size="10GB")
compiled_dir = os.path.join(model_dir, "compiled")
if os.path.isdir(compiled_dir):
    for neff_file in glob.glob(os.path.join(compiled_dir, "*.neff")):
        shutil.copy(neff_file, model_dir)
    shutil.rmtree(compiled_dir)

orig_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
orig_model.save_pretrained(model_dir,safe_serialization=True,max_shard_size="10GB")

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.save_pretrained(model_dir)
neuron_model.push_to_hub(model_dir, repository_id=hf_repo)

Downloads last month: 1

Safetensors

Model size

8B params

Tensor type

BF16

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support