Update modules/vad/silero_vad.py
Browse files- modules/vad/silero_vad.py +13 -17
modules/vad/silero_vad.py
CHANGED
|
@@ -244,6 +244,17 @@ class SileroVAD:
|
|
| 244 |
f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
|
| 245 |
)
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
def restore_speech_timestamps(
|
| 248 |
self,
|
| 249 |
segments: List[dict],
|
|
@@ -256,23 +267,8 @@ class SileroVAD:
|
|
| 256 |
ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)
|
| 257 |
|
| 258 |
for segment in segments:
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
for word in segment["text"]:
|
| 262 |
-
# Ensure the word start and end times are resolved to the same chunk.
|
| 263 |
-
middle = (word.start + word.end) / 2
|
| 264 |
-
chunk_index = ts_map.get_chunk_index(middle)
|
| 265 |
-
word.start = ts_map.get_original_time(word.start, chunk_index)
|
| 266 |
-
word.end = ts_map.get_original_time(word.end, chunk_index)
|
| 267 |
-
words.append(word)
|
| 268 |
-
|
| 269 |
-
segment["start"] = words[0].start
|
| 270 |
-
segment["end"] = words[-1].end
|
| 271 |
-
segment["text"] = words
|
| 272 |
|
| 273 |
-
else:
|
| 274 |
-
segment["start"] = ts_map.get_original_time(segment["start"])
|
| 275 |
-
segment["end"] = ts_map.get_original_time(segment["end"])
|
| 276 |
-
|
| 277 |
return segments
|
| 278 |
|
|
|
|
| 244 |
f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
|
| 245 |
)
|
| 246 |
|
| 247 |
+
def restore_speech_timestamps(
|
| 248 |
+
self,
|
| 249 |
+
segments: List[dict],
|
| 250 |
+
speech_chunks: List[dict],
|
| 251 |
+
sampling_rate: Optional[int] = None,
|
| 252 |
+
) -> List[dict]:
|
| 253 |
+
if sampling_rate is None:
|
| 254 |
+
sampling_rate = self.sampling_rate
|
| 255 |
+
|
| 256 |
+
ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)
|
| 257 |
+
|
| 258 |
def restore_speech_timestamps(
|
| 259 |
self,
|
| 260 |
segments: List[dict],
|
|
|
|
| 267 |
ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)
|
| 268 |
|
| 269 |
for segment in segments:
|
| 270 |
+
segment["start"] = ts_map.get_original_time(segment["start"])
|
| 271 |
+
segment["end"] = ts_map.get_original_time(segment["end"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
return segments
|
| 274 |
|