Some changes to make it easier to install the dependencies

2026-04-28 12:50:52 +08:00 · 2023-06-02 17:22:38 +08:00
parent b78d0d2a26
commit 9f1dbeeecc
13 changed files with 93 additions and 46 deletions
--- a/models/encoder/audio.py
+++ b/models/encoder/audio.py
@@ -39,7 +39,7 @@ def preprocess_wav(fpath_or_wav: Union[str, Path, np.ndarray],
    
    # Resample the wav if needed
    if source_sr is not None and source_sr != sampling_rate:
-        wav = librosa.resample(wav, source_sr, sampling_rate)
+        wav = librosa.resample(wav, orig_sr = source_sr, target_sr = sampling_rate)
        
    # Apply the preprocessing: normalize volume and shorten long silences 
    if normalize:
@@ -99,7 +99,7 @@ def trim_long_silences(wav):
        return ret[width - 1:] / width
    
    audio_mask = moving_average(voice_flags, vad_moving_average_width)
-    audio_mask = np.round(audio_mask).astype(np.bool)
+    audio_mask = np.round(audio_mask).astype(bool)
    
    # Dilate the voiced regions
    audio_mask = binary_dilation(audio_mask, np.ones(vad_max_silence_length + 1))
--- a/models/encoder/visualizations.py
+++ b/models/encoder/visualizations.py
@@ -21,7 +21,7 @@ colormap = np.array([
    [33, 0, 127],
    [0, 0, 0],
    [183, 183, 183],
-], dtype=np.float) / 255 
+], dtype=float) / 255 


 class Visualizations: