Refactor Project to 3 parts: Models, Control, Data

Need readme
2026-07-05 02:56:20 +08:00 · 2022-12-03 16:54:06 +08:00
parent b402f9dbdf
commit 74a3fc97d0
179 changed files with 197 additions and 27924 deletions
--- a/models/encoder/data_objects/utterance.py
+++ b/models/encoder/data_objects/utterance.py
@@ -0,0 +1,26 @@
+import numpy as np
+
+
+class Utterance:
+    def __init__(self, frames_fpath, wave_fpath):
+        self.frames_fpath = frames_fpath
+        self.wave_fpath = wave_fpath
+        
+    def get_frames(self):
+        return np.load(self.frames_fpath)
+
+    def random_partial(self, n_frames):
+        """
+        Crops the frames into a partial utterance of n_frames
+        
+        :param n_frames: The number of frames of the partial utterance
+        :return: the partial utterance frames and a tuple indicating the start and end of the 
+        partial utterance in the complete utterance.
+        """
+        frames = self.get_frames()
+        if frames.shape[0] == n_frames:
+            start = 0
+        else:
+            start = np.random.randint(0, frames.shape[0] - n_frames)
+        end = start + n_frames
+        return frames[start:end], (start, end)