New web with selecting wav files

This commit is contained in:
babysor00
2021-10-01 22:13:39 +08:00
parent 8a384a1191
commit b46e7a7866
3 changed files with 151 additions and 19 deletions

View File

@@ -9,6 +9,7 @@ from vocoder.wavernn import inference as rnn_vocoder
import numpy as np
import re
from scipy.io.wavfile import write
import librosa
import io
import base64
from flask_cors import CORS
@@ -30,6 +31,7 @@ def webApp():
synthesizers = list(Path(syn_models_dirt).glob("**/*.pt"))
synthesizers_cache = {}
encoder.load_model(Path("encoder/saved_models/pretrained.pt"))
# rnn_vocoder.load_model(Path("vocoder/saved_models/pretrained/pretrained.pt"))
gan_vocoder.load_model(Path("vocoder/saved_models/pretrained/g_hifigan.pt"))
def pcm2float(sig, dtype='float32'):
@@ -66,7 +68,6 @@ def webApp():
@app.route("/api/synthesize", methods=["POST"])
def synthesize():
# TODO Implementation with json to support more platform
# Load synthesizer
if "synt_path" in request.form:
synt_path = request.form["synt_path"]
@@ -80,10 +81,16 @@ def webApp():
current_synt = synthesizers_cache[synt_path]
print("using synthesizer model: " + str(synt_path))
# Load input wav
wav_base64 = request.form["upfile_b64"]
wav = base64.b64decode(bytes(wav_base64, 'utf-8'))
wav = pcm2float(np.frombuffer(wav, dtype=np.int16), dtype=np.float32)
encoder_wav = encoder.preprocess_wav(wav, 16000)
if "upfile_b64" in request.form:
wav_base64 = request.form["upfile_b64"]
wav = base64.b64decode(bytes(wav_base64, 'utf-8'))
wav = pcm2float(np.frombuffer(wav, dtype=np.int16), dtype=np.float32)
sample_rate = Synthesizer.sample_rate
else:
wav, sample_rate, = librosa.load(request.files['file'])
write("temp.wav", sample_rate, wav) #Make sure we get the correct wav
encoder_wav = encoder.preprocess_wav(wav, sample_rate)
embed, _, _ = encoder.embed_utterance(encoder_wav, return_partials=True)
# Load input text
@@ -100,6 +107,7 @@ def webApp():
embeds = [embed] * len(texts)
specs = current_synt.synthesize_spectrograms(texts, embeds)
spec = np.concatenate(specs, axis=1)
# wav = rnn_vocoder.infer_waveform(spec)
wav = gan_vocoder.infer_waveform(spec)
# Return cooked wav

View File

@@ -5,3 +5,4 @@ PORT = 8080
MAX_CONTENT_PATH =1024 * 1024 * 4 # mp3文件大小限定不能超过4M
SECRET_KEY = "mockingbird_key"
WTF_CSRF_SECRET_KEY = "mockingbird_key"
TEMPLATES_AUTO_RELOAD = True

View File

@@ -38,22 +38,37 @@
</div>
<div style="margin-left: 5%;margin-top: 50px;width: 90%;">
<div style="font-size: larger;font-weight: bolder;">请输入中文</div>
<div style="font-size: larger;font-weight: bolder;">1. 请输入中文</div>
<textarea id="user_input_text"
style="border:1px solid #ccc; width: 100%; height: 100px; font-size: 15px; margin-top: 10px;"></textarea>
</div>
<div class="pd btns" style="margin-left: 5%;margin-top: 20px;width: 90%; text-align:right;">
<div class="pd btns" style="margin-left: 5%;margin-top: 20px;width: 90%; ">
<!-- <div>
<button onclick="recOpen()" style="margin-right:10px">打开录音,请求权限</button>
<button onclick="recClose()" style="margin-right:0">关闭录音,释放资源</button>
</div> -->
<div style="font-size: larger;font-weight: bolder;">2. 请直接录音,点击停止结束</div>
<button onclick="recStart()" >录制</button>
<button onclick="recStop()">停止</button>
<button onclick="recPlay()" >播放</button>
<button onclick="recUpload()" >上传</button>
</div>
<div class="pd btns" style="margin-left: 5%;margin-top: 20px;width: 90%; ">
<div style="font-size: larger;font-weight: bolder;">或上传音频</div>
<input type="file" id="fileInput" accept=".wav" />
<label for="fileInput">选择音频</label>
<div id="audio1"></div>
</div>
<div class="pd btns" style="margin-left: 5%;margin-top: 20px;width: 90%; ">
<div style="font-size: larger;font-weight: bolder;">3. 选择Synthesizer模型</div>
<span class="box">
<select id="select">
</select>
</span>
</div>
<div class="pd btns" style="margin-left: 5%;margin-top: 20px;width: 90%; text-align:right;">
<button id="upload" onclick="recUpload()">上传合成</button>
</div>
<!-- 波形绘制区域 -->
<!-- <div class="pd recpower">
<div style="height:40px;width:100%;background:#fff;position:relative;">
@@ -76,6 +91,37 @@
<script>
$("#fileInput").change(function(){
var file = $("#fileInput").get(0).files;
if (file.length > 0) {
var path = URL.createObjectURL(file[0]);
var audio = document.createElement('audio');
audio.src = path;
audio.controls = true;
$('#audio1').empty().append(audio);
}
});
fetch("/api/synthesizers", {
method: 'get',
headers: {
"X-CSRFToken": "{{ csrf_token() }}"
}
}).then(function (res) {
if (!res.ok) throw Error(res.statusText);
return res.json();
}).then(function (data) {
for (var synt of data) {
var option = document.createElement('option');
option.text = synt.name
option.value = synt.path
$("#select").append(option);
}
}).catch(function (err) {
console.log('Error: ' + err.message);
})
var rec, wave, recBlob;
/**调用open打开录音请求好录音权限**/
var recOpen = function () {//一般在显示出录音按钮或相关的录音界面时进行此方法调用,后面用户点击开始录音时就能畅通无阻了
@@ -194,9 +240,15 @@
/**上传**/
function recUpload() {
var blob = recBlob;
var blob
var loadedAudios = $("#fileInput").get(0).files
if (loadedAudios.length > 0) {
blob = loadedAudios[0];
} else {
blob = recBlob;
}
if (!blob) {
reclog("请先录音,然后停止后再上传", 1);
reclog("请先录音或选择音频,然后停止后再上传", 1);
return;
};
@@ -211,15 +263,18 @@
var csrftoken = "{{ csrf_token() }}";
var user_input_text = document.getElementById("user_input_text");
var input_text = user_input_text.value;
var postData = "";
postData += "mime=" + encodeURIComponent(blob.type);//告诉后端这个录音是什么格式的可能前后端都固定的mp3可以不用写
postData += "&upfile_b64=" + encodeURIComponent((/.+;\s*base64\s*,\s*(.+)$/i.exec(reader.result) || [])[1]) //录音文件内容后端进行base64解码成二进制
postData += "&text=" + encodeURIComponent(input_text);
var postData = new FormData();
postData.append("text", input_text)
postData.append("file", blob)
var sel = document.getElementById("select");
var path = sel.options[sel.selectedIndex].value;
if (!!path) {
postData.append("synt_path", path);
}
fetch(api, {
method: 'post',
headers: {
"Content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"X-CSRFToken": csrftoken
},
body: postData
@@ -338,7 +393,6 @@
padding: 12px;
border-radius: 6px;
background: #fff;
--border: 1px solid #327de8;
box-shadow: 2px 2px 3px #aaa;
}
@@ -348,7 +402,7 @@
cursor: pointer;
border: none;
border-radius: 3px;
background: #327de8;
background: #5698c3;
color: #fff;
padding: 0 15px;
margin: 3px 10px 3px 0;
@@ -359,6 +413,13 @@
vertical-align: middle;
}
.btns #upload {
background: #5698c3;
color: #fff;
width: 100px;
height: 42px;
}
.btns button:active {
background: #5da1f5
}
@@ -379,6 +440,68 @@
padding: 2px 8px;
border-radius: 99px;
}
#fileInput {
width: 0.1px;
height: 0.1px;
opacity: 0;
overflow: hidden;
position: absolute;
z-index: -1;
}
#fileInput + label {
padding: 0 15px;
border-radius: 4px;
color: white;
background-color: #5698c3;
display: inline-block;
width: 70px;
line-height: 36px;
height: 36px;
}
#fileInput + label {
cursor: pointer; /* "hand" cursor */
}
#fileInput:focus + label,
#fileInput + label:hover {
background-color: #5da1f5;
}
.box select {
background-color: #5698c3;
color: white;
padding: 8px;
width: 120px;
border: none;
border-radius: 4px;
font-size: 0.5em;
outline: none;
margin: 3px 10px 3px 0;
}
.box::before {
content: "\f13a";
position: absolute;
top: 0;
right: 0;
width: 20%;
height: 100%;
text-align: center;
font-size: 28px;
line-height: 45px;
color: rgba(255, 255, 255, 0.5);
background-color: rgba(255, 255, 255, 0.1);
pointer-events: none;
}
.box:hover::before {
color: rgba(255, 255, 255, 0.6);
background-color: rgba(255, 255, 255, 0.2);
}
.box select option {
padding: 30px;
}
</style>
</body>