mirror of
https://github.com/babysor/Realtime-Voice-Clone-Chinese.git
synced 2026-04-14 10:30:14 +08:00
Refactor Project to 3 parts: Models, Control, Data
Need readme
This commit is contained in:
26
models/encoder/data_objects/utterance.py
Normal file
26
models/encoder/data_objects/utterance.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Utterance:
|
||||
def __init__(self, frames_fpath, wave_fpath):
|
||||
self.frames_fpath = frames_fpath
|
||||
self.wave_fpath = wave_fpath
|
||||
|
||||
def get_frames(self):
|
||||
return np.load(self.frames_fpath)
|
||||
|
||||
def random_partial(self, n_frames):
|
||||
"""
|
||||
Crops the frames into a partial utterance of n_frames
|
||||
|
||||
:param n_frames: The number of frames of the partial utterance
|
||||
:return: the partial utterance frames and a tuple indicating the start and end of the
|
||||
partial utterance in the complete utterance.
|
||||
"""
|
||||
frames = self.get_frames()
|
||||
if frames.shape[0] == n_frames:
|
||||
start = 0
|
||||
else:
|
||||
start = np.random.randint(0, frames.shape[0] - n_frames)
|
||||
end = start + n_frames
|
||||
return frames[start:end], (start, end)
|
||||
Reference in New Issue
Block a user