evanqjones commited on
Commit
cd8791b
·
unverified ·
1 Parent(s): 7e02444

talk-llama : add alpaca support (#668)

Browse files
examples/talk-llama/prompts/talk-alpaca.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Below is an instruction that describes a task. Write a response that appropriately completes the request.
2
+
3
+ ### Instruction:
4
+
5
+ Write a text transcript of a never ending dialog, where {0} interacts with an AI assistant named {1}.
6
+ {1} is helpful, kind, honest, friendly, good at writing and never fails to answer {0}’s requests immediately and with details and precision.
7
+ There are no annotations like (30 seconds passed...) or (to himself), just what {0} and {1} say aloud to each other.
8
+ The transcript only includes text, it does not include markup like HTML and Markdown.
9
+ {1} responds with short and concise answers.
10
+
11
+ ### Response:
12
+
13
+ {0}{4} Hello, {1}!
14
+ {1}{4} Hello {0}! How may I help you today?
15
+ {0}{4} What time is it?
16
+ {1}{4} It is {2} o'clock.
17
+ {0}{4} What year is it?
18
+ {1}{4} We are in {3}.
19
+ {0}{4} What is a cat?
20
+ {1}{4} A cat is a domestic species of small carnivorous mammal. It is the only domesticated species in the family Felidae.
21
+ {0}{4} Name a color.
22
+ {1}{4} Blue
23
+ {0}{4}
examples/talk-llama/talk-llama.cpp CHANGED
@@ -33,6 +33,8 @@ struct whisper_params {
33
  int32_t max_tokens = 32;
34
  int32_t audio_ctx = 0;
35
 
 
 
36
  float vad_thold = 0.6f;
37
  float freq_thold = 100.0f;
38
 
@@ -41,12 +43,14 @@ struct whisper_params {
41
  bool print_special = false;
42
  bool print_energy = false;
43
  bool no_timestamps = true;
 
44
 
45
  std::string person = "Georgi";
46
  std::string language = "en";
47
  std::string model_wsp = "models/ggml-base.en.bin";
48
  std::string model_llama = "models/ggml-llama-7B.bin";
49
  std::string speak = "./examples/talk/speak.sh";
 
50
  std::string fname_out;
51
  };
52
 
@@ -67,15 +71,24 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
67
  else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
68
  else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
69
  else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
 
70
  else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
71
  else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
72
  else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
73
  else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
 
74
  else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
75
  else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
76
  else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
77
  else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
78
  else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
 
 
 
 
 
 
 
79
  else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
80
  else {
81
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
@@ -108,7 +121,10 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
108
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
109
  fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
110
  fprintf(stderr, " -mg FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
 
111
  fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
 
 
112
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
113
  fprintf(stderr, "\n");
114
  }
@@ -183,8 +199,7 @@ std::string transcribe(
183
 
184
  const std::string k_prompt_whisper = R"(A conversation with a person called {1}.)";
185
 
186
- // need to have leading ' '
187
- const std::string k_prompt_llama = R"( Text transcript of a never ending dialog, where {0} interacts with an AI assistant named {1}.
188
  {1} is helpful, kind, honest, friendly, good at writing and never fails to answer {0}’s requests immediately and with details and precision.
189
  There are no annotations like (30 seconds passed...) or (to himself), just what {0} and {1} say aloud to each other.
190
  The transcript only includes text, it does not include markup like HTML and Markdown.
@@ -227,6 +242,7 @@ int main(int argc, char ** argv) {
227
  lparams.n_ctx = 512;
228
  lparams.seed = 1;
229
  lparams.f16_kv = true;
 
230
 
231
  struct llama_context * ctx_llama = llama_init_from_file(params.model_llama.c_str(), lparams);
232
 
@@ -278,7 +294,10 @@ int main(int argc, char ** argv) {
278
  const std::string prompt_whisper = ::replace(k_prompt_whisper, "{1}", bot_name);
279
 
280
  // construct the initial prompt for LLaMA inference
281
- std::string prompt_llama = k_prompt_llama;
 
 
 
282
 
283
  prompt_llama = ::replace(prompt_llama, "{0}", params.person);
284
  prompt_llama = ::replace(prompt_llama, "{1}", bot_name);
@@ -323,9 +342,11 @@ int main(int argc, char ** argv) {
323
  return 1;
324
  }
325
 
326
- //fprintf(stdout, "\n");
327
- //fprintf(stdout, "%s", prompt_llama.c_str());
328
- //fflush(stdout);
 
 
329
 
330
  printf("%s : done! start speaking in the microphone\n", __func__);
331
  printf("\n");
 
33
  int32_t max_tokens = 32;
34
  int32_t audio_ctx = 0;
35
 
36
+ int32_t n_parts_llama = -1;
37
+
38
  float vad_thold = 0.6f;
39
  float freq_thold = 100.0f;
40
 
 
43
  bool print_special = false;
44
  bool print_energy = false;
45
  bool no_timestamps = true;
46
+ bool verbose_prompt = false;
47
 
48
  std::string person = "Georgi";
49
  std::string language = "en";
50
  std::string model_wsp = "models/ggml-base.en.bin";
51
  std::string model_llama = "models/ggml-llama-7B.bin";
52
  std::string speak = "./examples/talk/speak.sh";
53
+ std::string prompt = "";
54
  std::string fname_out;
55
  };
56
 
 
71
  else if (arg == "-ac" || arg == "--audio-ctx") { params.audio_ctx = std::stoi(argv[++i]); }
72
  else if (arg == "-vth" || arg == "--vad-thold") { params.vad_thold = std::stof(argv[++i]); }
73
  else if (arg == "-fth" || arg == "--freq-thold") { params.freq_thold = std::stof(argv[++i]); }
74
+ else if (arg == "--n-parts-llama") { params.n_parts_llama = std::stoi(argv[++i]); }
75
  else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
76
  else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
77
  else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
78
  else if (arg == "-pe" || arg == "--print-energy") { params.print_energy = true; }
79
+ else if (arg == "--verbose-prompt") { params.verbose_prompt = true; }
80
  else if (arg == "-p" || arg == "--person") { params.person = argv[++i]; }
81
  else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
82
  else if (arg == "-mw" || arg == "--model-whisper") { params.model_wsp = argv[++i]; }
83
  else if (arg == "-ml" || arg == "--model-llama") { params.model_llama = argv[++i]; }
84
  else if (arg == "-s" || arg == "--speak") { params.speak = argv[++i]; }
85
+ else if (arg == "--prompt-file") {
86
+ std::ifstream file(argv[++i]);
87
+ std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
88
+ if (params.prompt.back() == '\n') {
89
+ params.prompt.pop_back();
90
+ }
91
+ }
92
  else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
93
  else {
94
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
 
121
  fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
122
  fprintf(stderr, " -mw FILE, --model-whisper [%-7s] whisper model file\n", params.model_wsp.c_str());
123
  fprintf(stderr, " -mg FILE, --model-llama [%-7s] llama model file\n", params.model_llama.c_str());
124
+ fprintf(stderr, " --n-parts-llama N [%-7d] num parts in llama model file\n", params.n_parts_llama);
125
  fprintf(stderr, " -s FILE, --speak TEXT [%-7s] command for TTS\n", params.speak.c_str());
126
+ fprintf(stderr, " --prompt-file FNAME [%-7s] file with custom prompt to start dialog\n", "");
127
+ fprintf(stderr, " --verbose-prompt [%-7s] print prompt at start\n", params.verbose_prompt ? "true" : "false");
128
  fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
129
  fprintf(stderr, "\n");
130
  }
 
199
 
200
  const std::string k_prompt_whisper = R"(A conversation with a person called {1}.)";
201
 
202
+ const std::string k_prompt_llama = R"(Text transcript of a never ending dialog, where {0} interacts with an AI assistant named {1}.
 
203
  {1} is helpful, kind, honest, friendly, good at writing and never fails to answer {0}’s requests immediately and with details and precision.
204
  There are no annotations like (30 seconds passed...) or (to himself), just what {0} and {1} say aloud to each other.
205
  The transcript only includes text, it does not include markup like HTML and Markdown.
 
242
  lparams.n_ctx = 512;
243
  lparams.seed = 1;
244
  lparams.f16_kv = true;
245
+ lparams.n_parts = params.n_parts_llama;
246
 
247
  struct llama_context * ctx_llama = llama_init_from_file(params.model_llama.c_str(), lparams);
248
 
 
294
  const std::string prompt_whisper = ::replace(k_prompt_whisper, "{1}", bot_name);
295
 
296
  // construct the initial prompt for LLaMA inference
297
+ std::string prompt_llama = params.prompt.empty() ? k_prompt_llama : params.prompt;
298
+
299
+ // need to have leading ' '
300
+ prompt_llama.insert(0, 1, ' ');
301
 
302
  prompt_llama = ::replace(prompt_llama, "{0}", params.person);
303
  prompt_llama = ::replace(prompt_llama, "{1}", bot_name);
 
342
  return 1;
343
  }
344
 
345
+ if (params.verbose_prompt) {
346
+ fprintf(stdout, "\n");
347
+ fprintf(stdout, "%s", prompt_llama.c_str());
348
+ fflush(stdout);
349
+ }
350
 
351
  printf("%s : done! start speaking in the microphone\n", __func__);
352
  printf("\n");