From b46e7a78667732114c22d4f5774c8481d6b75683 Mon Sep 17 00:00:00 2001 From: babysor00 Date: Fri, 1 Oct 2021 22:13:39 +0800 Subject: [PATCH] New web with selecting wav files --- web/__init__.py | 18 +++-- web/config/default.py | 1 + web/templates/index.html | 151 +++++++++++++++++++++++++++++++++++---- 3 files changed, 151 insertions(+), 19 deletions(-) diff --git a/web/__init__.py b/web/__init__.py index 8484d5c..330c91d 100644 --- a/web/__init__.py +++ b/web/__init__.py @@ -9,6 +9,7 @@ from vocoder.wavernn import inference as rnn_vocoder import numpy as np import re from scipy.io.wavfile import write +import librosa import io import base64 from flask_cors import CORS @@ -30,6 +31,7 @@ def webApp(): synthesizers = list(Path(syn_models_dirt).glob("**/*.pt")) synthesizers_cache = {} encoder.load_model(Path("encoder/saved_models/pretrained.pt")) + # rnn_vocoder.load_model(Path("vocoder/saved_models/pretrained/pretrained.pt")) gan_vocoder.load_model(Path("vocoder/saved_models/pretrained/g_hifigan.pt")) def pcm2float(sig, dtype='float32'): @@ -66,7 +68,6 @@ def webApp(): @app.route("/api/synthesize", methods=["POST"]) def synthesize(): # TODO Implementation with json to support more platform - # Load synthesizer if "synt_path" in request.form: synt_path = request.form["synt_path"] @@ -80,10 +81,16 @@ def webApp(): current_synt = synthesizers_cache[synt_path] print("using synthesizer model: " + str(synt_path)) # Load input wav - wav_base64 = request.form["upfile_b64"] - wav = base64.b64decode(bytes(wav_base64, 'utf-8')) - wav = pcm2float(np.frombuffer(wav, dtype=np.int16), dtype=np.float32) - encoder_wav = encoder.preprocess_wav(wav, 16000) + if "upfile_b64" in request.form: + wav_base64 = request.form["upfile_b64"] + wav = base64.b64decode(bytes(wav_base64, 'utf-8')) + wav = pcm2float(np.frombuffer(wav, dtype=np.int16), dtype=np.float32) + sample_rate = Synthesizer.sample_rate + else: + wav, sample_rate, = librosa.load(request.files['file']) + write("temp.wav", sample_rate, wav) #Make sure we get the correct wav + + encoder_wav = encoder.preprocess_wav(wav, sample_rate) embed, _, _ = encoder.embed_utterance(encoder_wav, return_partials=True) # Load input text @@ -100,6 +107,7 @@ def webApp(): embeds = [embed] * len(texts) specs = current_synt.synthesize_spectrograms(texts, embeds) spec = np.concatenate(specs, axis=1) + # wav = rnn_vocoder.infer_waveform(spec) wav = gan_vocoder.infer_waveform(spec) # Return cooked wav diff --git a/web/config/default.py b/web/config/default.py index 7892ae8..02149ab 100644 --- a/web/config/default.py +++ b/web/config/default.py @@ -5,3 +5,4 @@ PORT = 8080 MAX_CONTENT_PATH =1024 * 1024 * 4 # mp3文件大小限定不能超过4M SECRET_KEY = "mockingbird_key" WTF_CSRF_SECRET_KEY = "mockingbird_key" +TEMPLATES_AUTO_RELOAD = True \ No newline at end of file diff --git a/web/templates/index.html b/web/templates/index.html index 9493067..d8cfac4 100644 --- a/web/templates/index.html +++ b/web/templates/index.html @@ -38,22 +38,37 @@
-
请输入中文
+
1. 请输入中文
- -
+
+
2. 请直接录音,点击停止结束
-
- +
+
或上传音频
+ + +
+
+
+
3. 选择Synthesizer模型
+ + + +
+
+ +
+