ggerganov commited on
Commit
3568438
unverified
1 Parent(s): 7c7a4d7

models : add "convert-h5-to-ggml.py" script (#157)

Browse files

Converts transformers models to ggml.
Although the conversion is successful, it does not work for some reason.
Not sure why

Files changed (1) hide show
  1. models/convert-h5-to-ggml.py +184 -0
models/convert-h5-to-ggml.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import sys
4
+ import struct
5
+ import json
6
+ import code
7
+ import torch
8
+ import numpy as np
9
+
10
+ from transformers import WhisperForConditionalGeneration
11
+
12
+ conv_map = {'self_attn_layer_norm': 'attn_ln',
13
+ 'encoder_attn.k_proj': 'attn.key',
14
+ 'self_attn.out_proj': 'attn.out',
15
+ 'encoder_attn.out_proj': 'cross_attn.out',
16
+ 'self_attn.q_proj': 'attn.query',
17
+ 'encoder_attn.q_proj': 'cross_attn.query',
18
+ 'self_attn.v_proj': 'attn.value',
19
+ 'encoder_attn.v_proj': 'cross_attn.value',
20
+ 'encoder_attn_layer_norm': 'cross_attn_ln',
21
+ 'fc1': 'mlp.0',
22
+ 'fc2': 'mlp.2',
23
+ 'final_layer_norm': 'mlp_ln',
24
+ 'encoder.layer_norm.bias': 'encoder.ln_post.bias',
25
+ 'encoder.layer_norm.weight': 'encoder.ln_post.weight',
26
+ 'encoder.embed_positions.weight': 'encoder.positional_embedding',
27
+ 'decoder.layer_norm.bias': 'decoder.ln.bias',
28
+ 'decoder.layer_norm.weight': 'decoder.ln.weight',
29
+ 'decoder.embed_positions.weight': 'decoder.positional_embedding',
30
+ 'decoder.embed_tokens.weight': 'decoder.token_embedding.weight',
31
+ }
32
+
33
+ # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
34
+ def bytes_to_unicode():
35
+ """
36
+ Returns list of utf-8 byte and a corresponding list of unicode strings.
37
+ The reversible bpe codes work on unicode strings.
38
+ This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
39
+ When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
40
+ This is a signficant percentage of your normal, say, 32K bpe vocab.
41
+ To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
42
+ And avoids mapping to whitespace/control characters the bpe code barfs on.
43
+ """
44
+ bs = list(range(ord("!"), ord("~")+1))+list(range(ord("隆"), ord("卢")+1))+list(range(ord("庐"), ord("每")+1))
45
+ cs = bs[:]
46
+ n = 0
47
+ for b in range(2**8):
48
+ if b not in bs:
49
+ bs.append(b)
50
+ cs.append(2**8+n)
51
+ n += 1
52
+ cs = [chr(n) for n in cs]
53
+ return dict(zip(bs, cs))
54
+
55
+ if len(sys.argv) < 4:
56
+ print("Usage: convert-h5-to-ggml.py dir_model path-to-whisper-repo dir-output [use-f32]\n")
57
+ sys.exit(1)
58
+
59
+ dir_model = sys.argv[1]
60
+ dir_whisper = sys.argv[2]
61
+ dir_out = sys.argv[3]
62
+
63
+ with open(dir_model + "/vocab.json", "r") as f:
64
+ encoder = json.load(f)
65
+ with open(dir_model + "/added_tokens.json", "r") as f:
66
+ encoder_added = json.load(f)
67
+ with open(dir_model + "/config.json", "r") as f:
68
+ hparams = json.load(f)
69
+
70
+ model = WhisperForConditionalGeneration.from_pretrained(dir_model)
71
+
72
+ #code.interact(local=locals())
73
+
74
+ n_mels = hparams["num_mel_bins"]
75
+ with np.load(os.path.join(dir_whisper, "whisper/assets", "mel_filters.npz")) as f:
76
+ filters = torch.from_numpy(f[f"mel_{n_mels}"])
77
+
78
+ dir_tokenizer = dir_model
79
+
80
+ fname_out = dir_out + "/ggml-model.bin"
81
+
82
+ with open(dir_tokenizer + "/vocab.json", "r", encoding="utf8") as f:
83
+ tokens = json.load(f)
84
+
85
+
86
+ use_f16 = True
87
+
88
+ fout = open(fname_out, "wb")
89
+
90
+ fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex
91
+ fout.write(struct.pack("i", hparams["vocab_size"]))
92
+ fout.write(struct.pack("i", hparams["max_source_positions"]))
93
+ fout.write(struct.pack("i", hparams["d_model"]))
94
+ fout.write(struct.pack("i", hparams["decoder_attention_heads"]))
95
+ fout.write(struct.pack("i", hparams["decoder_layers"]))
96
+ fout.write(struct.pack("i", hparams["max_length"]))
97
+ fout.write(struct.pack("i", hparams["d_model"]))
98
+ fout.write(struct.pack("i", hparams["encoder_attention_heads"]))
99
+ fout.write(struct.pack("i", hparams["encoder_layers"]))
100
+ fout.write(struct.pack("i", hparams["num_mel_bins"]))
101
+ fout.write(struct.pack("i", use_f16))
102
+
103
+ fout.write(struct.pack("i", filters.shape[0]))
104
+ fout.write(struct.pack("i", filters.shape[1]))
105
+ for i in range(filters.shape[0]):
106
+ for j in range(filters.shape[1]):
107
+ fout.write(struct.pack("f", filters[i][j]))
108
+
109
+ byte_encoder = bytes_to_unicode()
110
+ byte_decoder = {v:k for k, v in byte_encoder.items()}
111
+
112
+ fout.write(struct.pack("i", len(tokens)))
113
+
114
+ tokens = sorted(tokens.items(), key=lambda x: x[1])
115
+ for key in tokens:
116
+ text = bytearray([byte_decoder[c] for c in key[0]])
117
+ fout.write(struct.pack("i", len(text)))
118
+ fout.write(text)
119
+
120
+ list_vars = model.state_dict()
121
+ for name in list_vars.keys():
122
+ if name == "proj_out.weight":
123
+ print('Skipping', name)
124
+ continue
125
+
126
+ src = name
127
+
128
+ nn = name
129
+ nn = nn.split(".")[1:]
130
+ if nn[1] == "layers":
131
+ nn[1] = "blocks"
132
+ if ".".join(nn[3:-1]) == "self_attn.k_proj":
133
+ mapped = "attn.key" if nn[0] == "encoder" else "cross_attn.key"
134
+ else:
135
+ mapped = conv_map[".".join(nn[3:-1])]
136
+ name = ".".join(nn[:3] + [mapped] + nn[-1:])
137
+ else:
138
+ name = ".".join(nn)
139
+ name = conv_map[name] if name in conv_map else name
140
+
141
+ print(src, ' -> ', name)
142
+ data = list_vars[src].squeeze().numpy()
143
+ data = data.astype(np.float16)
144
+
145
+ # reshape conv bias from [n] to [n, 1]
146
+ if name == "encoder.conv1.bias" or \
147
+ name == "encoder.conv2.bias":
148
+ data = data.reshape(data.shape[0], 1)
149
+ print(" Reshaped variable: " + name + " to shape: ", data.shape)
150
+
151
+ n_dims = len(data.shape)
152
+ print(name, n_dims, data.shape)
153
+
154
+ # looks like the whisper models are in f16 by default
155
+ # so we need to convert the small tensors to f32 until we fully support f16 in ggml
156
+ # ftype == 0 -> float32, ftype == 1 -> float16
157
+ ftype = 1;
158
+ if use_f16:
159
+ if n_dims < 2 or \
160
+ name == "encoder.conv1.bias" or \
161
+ name == "encoder.conv2.bias" or \
162
+ name == "encoder.positional_embedding" or \
163
+ name == "decoder.positional_embedding":
164
+ print(" Converting to float32")
165
+ data = data.astype(np.float32)
166
+ ftype = 0
167
+ else:
168
+ data = data.astype(np.float32)
169
+ ftype = 0
170
+
171
+ # header
172
+ str = name.encode('utf-8')
173
+ fout.write(struct.pack("iii", n_dims, len(str), ftype))
174
+ for i in range(n_dims):
175
+ fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
176
+ fout.write(str);
177
+
178
+ # data
179
+ data.tofile(fout)
180
+
181
+ fout.close()
182
+
183
+ print("Done. Output file: " + fname_out)
184
+ print("")