Spaces:
Runtime error
Runtime error
| import ctranslate2 | |
| import transformers | |
| from huggingface_hub import snapshot_download | |
| model_dir = snapshot_download(repo_id="Praise2112/Mistral-7B-Instruct-v0.1-int8-ct2") | |
| # generator = ctranslate2.Generator(model_dir, device="cuda", compute_type="int8") # GPU | |
| generator = ctranslate2.Generator(model_dir, device="cpu", compute_type="int8") #CPU | |
| tokenizer = transformers.AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") | |
| messages = [ | |
| {"role": "user", "content": "What is your favourite condiment?"}, | |
| {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"}, | |
| {"role": "user", "content": "Do you have mayonnaise recipes?"} | |
| ] | |
| model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt") | |
| model_inputs = [tokenizer.convert_ids_to_tokens(model_input) for model_input in model_inputs] | |
| generated_ids = generator.generate_batch(model_inputs, max_length=1000, sampling_topk=10) | |
| decoded = [res.sequences_ids[0] for res in generated_ids] | |
| decoded = tokenizer.batch_decode(decoded) | |
| print(decoded[0]) | |
| # def speak(prompt): | |
| # # Tokenizar el prompt y convertirlo a tensores de PyTorch, luego enviarlos al dispositivo especificado | |
| # model_inputs = tokenizer([prompt], return_tensors="pt").to(device) | |
| # model.to(device) | |
| # # Generar texto condicionalmente a partir del prompt utilizando el modelo | |
| # generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True) | |
| # # Decodificar los identificadores generados en texto y imprimir el resultado | |
| # resulting_text = tokenizer.batch_decode(generated_ids)[0] | |
| # return resulting_text | |
| # iface = gr.Interface(fn=speak, inputs="text", outputs="text") | |
| # iface.launch() |