Spaces:

LAP-DEV
/

Demo

Running

LAP-DEV commited on Feb 17

Commit

6543165

verified ·

1 Parent(s): 51cf04f

Update modules/vad/silero_vad.py

Files changed (1) hide show

modules/vad/silero_vad.py CHANGED Viewed

@@ -244,6 +244,17 @@ class SileroVAD:
             f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
         )
     def restore_speech_timestamps(
         self,
         segments: List[dict],
@@ -256,23 +267,8 @@ class SileroVAD:
         ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)
         for segment in segments:
-            if segment["text"]:
-                words = []
-                for word in segment["text"]:
-                    # Ensure the word start and end times are resolved to the same chunk.
-                    middle = (word.start + word.end) / 2
-                    chunk_index = ts_map.get_chunk_index(middle)
-                    word.start = ts_map.get_original_time(word.start, chunk_index)
-                    word.end = ts_map.get_original_time(word.end, chunk_index)
-                    words.append(word)
-                segment["start"] = words[0].start
-                segment["end"] = words[-1].end
-                segment["text"] = words
-            else:
-                segment["start"] = ts_map.get_original_time(segment["start"])
-                segment["end"] = ts_map.get_original_time(segment["end"])
         return segments

             f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
         )
+    def restore_speech_timestamps(
+        self,
+        segments: List[dict],
+        speech_chunks: List[dict],
+        sampling_rate: Optional[int] = None,
+    ) -> List[dict]:
+        if sampling_rate is None:
+            sampling_rate = self.sampling_rate
+        ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)
     def restore_speech_timestamps(
         self,
         segments: List[dict],
         ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)
         for segment in segments:
+            segment["start"] = ts_map.get_original_time(segment["start"])
+            segment["end"] = ts_map.get_original_time(segment["end"])
         return segments