mirror of
https://github.com/babysor/Realtime-Voice-Clone-Chinese.git
synced 2026-04-28 12:50:52 +08:00
Some changes to make it easier to install the dependencies
This commit is contained in:
@@ -39,7 +39,7 @@ def preprocess_wav(fpath_or_wav: Union[str, Path, np.ndarray],
|
||||
|
||||
# Resample the wav if needed
|
||||
if source_sr is not None and source_sr != sampling_rate:
|
||||
wav = librosa.resample(wav, source_sr, sampling_rate)
|
||||
wav = librosa.resample(wav, orig_sr = source_sr, target_sr = sampling_rate)
|
||||
|
||||
# Apply the preprocessing: normalize volume and shorten long silences
|
||||
if normalize:
|
||||
@@ -99,7 +99,7 @@ def trim_long_silences(wav):
|
||||
return ret[width - 1:] / width
|
||||
|
||||
audio_mask = moving_average(voice_flags, vad_moving_average_width)
|
||||
audio_mask = np.round(audio_mask).astype(np.bool)
|
||||
audio_mask = np.round(audio_mask).astype(bool)
|
||||
|
||||
# Dilate the voiced regions
|
||||
audio_mask = binary_dilation(audio_mask, np.ones(vad_max_silence_length + 1))
|
||||
|
||||
@@ -21,7 +21,7 @@ colormap = np.array([
|
||||
[33, 0, 127],
|
||||
[0, 0, 0],
|
||||
[183, 183, 183],
|
||||
], dtype=np.float) / 255
|
||||
], dtype=float) / 255
|
||||
|
||||
|
||||
class Visualizations:
|
||||
|
||||
Reference in New Issue
Block a user