Spaces:
Running
Running
talk-llama : add alpaca support (#668)
Browse files
examples/talk-llama/prompts/talk-alpaca.txt
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
| 2 |
+
|
| 3 |
+
### Instruction:
|
| 4 |
+
|
| 5 |
+
Write a text transcript of a never ending dialog, where {0} interacts with an AI assistant named {1}.
|
| 6 |
+
{1} is helpful, kind, honest, friendly, good at writing and never fails to answer {0}’s requests immediately and with details and precision.
|
| 7 |
+
There are no annotations like (30 seconds passed...) or (to himself), just what {0} and {1} say aloud to each other.
|
| 8 |
+
The transcript only includes text, it does not include markup like HTML and Markdown.
|
| 9 |
+
{1} responds with short and concise answers.
|
| 10 |
+
|
| 11 |
+
### Response:
|
| 12 |
+
|
| 13 |
+
{0}{4} Hello, {1}!
|
| 14 |
+
{1}{4} Hello {0}! How may I help you today?
|
| 15 |
+
{0}{4} What time is it?
|
| 16 |
+
{1}{4} It is {2} o'clock.
|
| 17 |
+
{0}{4} What year is it?
|
| 18 |
+
{1}{4} We are in {3}.
|
| 19 |
+
{0}{4} What is a cat?
|
| 20 |
+
{1}{4} A cat is a domestic species of small carnivorous mammal. It is the only domesticated species in the family Felidae.
|
| 21 |
+
{0}{4} Name a color.
|
| 22 |
+
{1}{4} Blue
|
| 23 |
+
{0}{4}
|
examples/talk-llama/talk-llama.cpp
CHANGED
|
@@ -33,6 +33,8 @@ struct whisper_params {
|
|
| 33 |
int32_t max_tokens = 32;
|
| 34 |
int32_t audio_ctx = 0;
|
| 35 |
|
|
|
|
|
|
|
| 36 |
float vad_thold = 0.6f;
|
| 37 |
float freq_thold = 100.0f;
|
| 38 |
|
|
@@ -41,12 +43,14 @@ struct whisper_params {
|
|
| 41 |
bool print_special = false;
|
| 42 |
bool print_energy = false;
|
| 43 |
bool no_timestamps = true;
|
|
|
|
| 44 |
|
| 45 |
std::string person = "Georgi";
|
| 46 |
std::string language = "en";
|
| 47 |
std::string model_wsp = "models/ggml-base.en.bin";
|
| 48 |
std::string model_llama = "models/ggml-llama-7B.bin";
|
| 49 |
std::string speak = "./examples/talk/speak.sh";
|
|
|
|
| 50 |
std::string fname_out;
|
| 51 |
};
|
| 52 |
|
|
@@ -67,15 +71,24 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 67 |
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
| 68 |
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
| 69 |
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
|
|
|
| 70 |
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
| 71 |
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
| 72 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 73 |
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
|
|
|
| 74 |
else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
|
| 75 |
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
| 76 |
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
|
| 77 |
else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
|
| 78 |
else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
| 80 |
else {
|
| 81 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
|
@@ -108,7 +121,10 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 108 |
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 109 |
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
|
| 110 |
fprintf(stderr, " -mg FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
|
|
|
|
| 111 |
fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
|
|
|
|
|
|
|
| 112 |
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
| 113 |
fprintf(stderr, "\n");
|
| 114 |
}
|
|
@@ -183,8 +199,7 @@ std::string transcribe(
|
|
| 183 |
|
| 184 |
const std::string k_prompt_whisper = R"(A conversation with a person called {1}.)";
|
| 185 |
|
| 186 |
-
|
| 187 |
-
const std::string k_prompt_llama = R"( Text transcript of a never ending dialog, where {0} interacts with an AI assistant named {1}.
|
| 188 |
{1} is helpful, kind, honest, friendly, good at writing and never fails to answer {0}’s requests immediately and with details and precision.
|
| 189 |
There are no annotations like (30 seconds passed...) or (to himself), just what {0} and {1} say aloud to each other.
|
| 190 |
The transcript only includes text, it does not include markup like HTML and Markdown.
|
|
@@ -227,6 +242,7 @@ int main(int argc, char ** argv) {
|
|
| 227 |
lparams.n_ctx = 512;
|
| 228 |
lparams.seed = 1;
|
| 229 |
lparams.f16_kv = true;
|
|
|
|
| 230 |
|
| 231 |
struct llama_context * ctx_llama = llama_init_from_file(params.model_llama.c_str(), lparams);
|
| 232 |
|
|
@@ -278,7 +294,10 @@ int main(int argc, char ** argv) {
|
|
| 278 |
const std::string prompt_whisper = ::replace(k_prompt_whisper, "{1}", bot_name);
|
| 279 |
|
| 280 |
// construct the initial prompt for LLaMA inference
|
| 281 |
-
std::string prompt_llama = k_prompt_llama;
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
prompt_llama = ::replace(prompt_llama, "{0}", params.person);
|
| 284 |
prompt_llama = ::replace(prompt_llama, "{1}", bot_name);
|
|
@@ -323,9 +342,11 @@ int main(int argc, char ** argv) {
|
|
| 323 |
return 1;
|
| 324 |
}
|
| 325 |
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
|
|
|
|
|
|
| 329 |
|
| 330 |
printf("%s : done! start speaking in the microphone\n", __func__);
|
| 331 |
printf("\n");
|
|
|
|
| 33 |
int32_t max_tokens = 32;
|
| 34 |
int32_t audio_ctx = 0;
|
| 35 |
|
| 36 |
+
int32_t n_parts_llama = -1;
|
| 37 |
+
|
| 38 |
float vad_thold = 0.6f;
|
| 39 |
float freq_thold = 100.0f;
|
| 40 |
|
|
|
|
| 43 |
bool print_special = false;
|
| 44 |
bool print_energy = false;
|
| 45 |
bool no_timestamps = true;
|
| 46 |
+
bool verbose_prompt = false;
|
| 47 |
|
| 48 |
std::string person = "Georgi";
|
| 49 |
std::string language = "en";
|
| 50 |
std::string model_wsp = "models/ggml-base.en.bin";
|
| 51 |
std::string model_llama = "models/ggml-llama-7B.bin";
|
| 52 |
std::string speak = "./examples/talk/speak.sh";
|
| 53 |
+
std::string prompt = "";
|
| 54 |
std::string fname_out;
|
| 55 |
};
|
| 56 |
|
|
|
|
| 71 |
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
|
| 72 |
else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
|
| 73 |
else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
|
| 74 |
+
else if (arg == "--n-parts-llama") { params.n_parts_llama = std::stoi(argv[++i]); }
|
| 75 |
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
|
| 76 |
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
|
| 77 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 78 |
else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
|
| 79 |
+
else if (arg == "--verbose-prompt") { params.verbose_prompt = true; }
|
| 80 |
else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
|
| 81 |
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
|
| 82 |
else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
|
| 83 |
else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
|
| 84 |
else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
|
| 85 |
+
else if (arg == "--prompt-file") {
|
| 86 |
+
std::ifstream file(argv[++i]);
|
| 87 |
+
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
| 88 |
+
if (params.prompt.back() == '\n') {
|
| 89 |
+
params.prompt.pop_back();
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
|
| 93 |
else {
|
| 94 |
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
|
|
|
| 121 |
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
|
| 122 |
fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
|
| 123 |
fprintf(stderr, " -mg FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
|
| 124 |
+
fprintf(stderr, " --n-parts-llama N [%-7d] num parts in llama model file\n", params.n_parts_llama);
|
| 125 |
fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
|
| 126 |
+
fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", "");
|
| 127 |
+
fprintf(stderr, " --verbose-prompt [%-7s] print prompt at start\n", params.verbose_prompt ? "true" : "false");
|
| 128 |
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
|
| 129 |
fprintf(stderr, "\n");
|
| 130 |
}
|
|
|
|
| 199 |
|
| 200 |
const std::string k_prompt_whisper = R"(A conversation with a person called {1}.)";
|
| 201 |
|
| 202 |
+
const std::string k_prompt_llama = R"(Text transcript of a never ending dialog, where {0} interacts with an AI assistant named {1}.
|
|
|
|
| 203 |
{1} is helpful, kind, honest, friendly, good at writing and never fails to answer {0}’s requests immediately and with details and precision.
|
| 204 |
There are no annotations like (30 seconds passed...) or (to himself), just what {0} and {1} say aloud to each other.
|
| 205 |
The transcript only includes text, it does not include markup like HTML and Markdown.
|
|
|
|
| 242 |
lparams.n_ctx = 512;
|
| 243 |
lparams.seed = 1;
|
| 244 |
lparams.f16_kv = true;
|
| 245 |
+
lparams.n_parts = params.n_parts_llama;
|
| 246 |
|
| 247 |
struct llama_context * ctx_llama = llama_init_from_file(params.model_llama.c_str(), lparams);
|
| 248 |
|
|
|
|
| 294 |
const std::string prompt_whisper = ::replace(k_prompt_whisper, "{1}", bot_name);
|
| 295 |
|
| 296 |
// construct the initial prompt for LLaMA inference
|
| 297 |
+
std::string prompt_llama = params.prompt.empty() ? k_prompt_llama : params.prompt;
|
| 298 |
+
|
| 299 |
+
// need to have leading ' '
|
| 300 |
+
prompt_llama.insert(0, 1, ' ');
|
| 301 |
|
| 302 |
prompt_llama = ::replace(prompt_llama, "{0}", params.person);
|
| 303 |
prompt_llama = ::replace(prompt_llama, "{1}", bot_name);
|
|
|
|
| 342 |
return 1;
|
| 343 |
}
|
| 344 |
|
| 345 |
+
if (params.verbose_prompt) {
|
| 346 |
+
fprintf(stdout, "\n");
|
| 347 |
+
fprintf(stdout, "%s", prompt_llama.c_str());
|
| 348 |
+
fflush(stdout);
|
| 349 |
+
}
|
| 350 |
|
| 351 |
printf("%s : done! start speaking in the microphone\n", __func__);
|
| 352 |
printf("\n");
|