YAML Metadata Warning: empty or missing yaml metadata in repo card (https://huggingface.co/docs/hub/model-cards#model-card-metadata)
import os
import torch
import shutil
import glob
from huggingface_hub import login
from optimum.neuron import NeuronModelForCausalLM
from transformers import AutoTokenizer, AutoModelForCausalLM
from safetensors.torch import save_file # pip install safetensors
hf_token = "hf_TBF"
sequence_length = 2048
auto_cast_type = "bf16"
batch_size = 1
num_cores = 2
hf_repo = "yahavb/DeepSeek-R1-Distill-Llama-8B-Tnx-vllm"
model_dir = "./DeepSeek-R1-Distill-Llama-8B-Tnx-Sharded"
model_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
login(hf_token, add_to_git_credential=True)
compiler_args = {"num_cores": num_cores, "auto_cast_type": auto_cast_type}
input_shapes = {"batch_size": batch_size, "sequence_length": sequence_length}
neuron_model = NeuronModelForCausalLM.from_pretrained(
model_id,
export=True,
**compiler_args,
**input_shapes
)
neuron_model.save_pretrained(model_dir, safe_serialization=True, max_shard_size="10GB")
compiled_dir = os.path.join(model_dir, "compiled")
if os.path.isdir(compiled_dir):
for neff_file in glob.glob(os.path.join(compiled_dir, "*.neff")):
shutil.copy(neff_file, model_dir)
shutil.rmtree(compiled_dir)
orig_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
orig_model.save_pretrained(model_dir,safe_serialization=True,max_shard_size="10GB")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.save_pretrained(model_dir)
neuron_model.push_to_hub(model_dir, repository_id=hf_repo)
- Downloads last month
- 1
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support