From d328ecba81e4e7968941e52572eb2e651247c09e Mon Sep 17 00:00:00 2001 From: babysor00 Date: Wed, 20 Oct 2021 00:27:13 +0800 Subject: [PATCH] Reconstruct UI of toolbox --- toolbox/__init__.py | 2 +- toolbox/assets/mb.png | Bin 0 -> 5748 bytes toolbox/ui.py | 136 +++++++++++++++++++++++------------------- 3 files changed, 77 insertions(+), 61 deletions(-) create mode 100644 toolbox/assets/mb.png diff --git a/toolbox/__init__.py b/toolbox/__init__.py index 7d67b52..d162a78 100644 --- a/toolbox/__init__.py +++ b/toolbox/__init__.py @@ -234,7 +234,7 @@ class Toolbox: texts = processed_texts embed = self.ui.selected_utterance.embed embeds = [embed] * len(texts) - specs = self.synthesizer.synthesize_spectrograms(texts, embeds, style_idx=int(self.ui.style_idx_textbox.text())) + specs = self.synthesizer.synthesize_spectrograms(texts, embeds, style_idx=int(self.ui.slider.value())) breaks = [spec.shape[1] for spec in specs] spec = np.concatenate(specs, axis=1) diff --git a/toolbox/assets/mb.png b/toolbox/assets/mb.png new file mode 100644 index 0000000000000000000000000000000000000000..abd804cab48147cdfafc4a385cf501322bca6e1c GIT binary patch literal 5748 zcmaJ_WmHt(*QUFrVdxk^hLo0Wq&s9_7!+j4p&39>K#-CiS`k6%ZV;p!=?-D&?hyX- z`}BT$Kb(8kK6~AL*F9@Ldq2;P($`fdC1NDPz`!8Y)KD=*?>^`SC%{5KI|=MA&>JS) zP+bY5Y=n6SZQ#67)KSF1sEjARx5Gu-gzg#^a10Ex-~TPlUbiAB1_mvmri!AmpUpwG zuQj#VOz#_?G!8sm;y=_C96d_=xJk0pjtchYL8apqN%YUhZIf~G$IazV&fZqoe(hnT zvWjWtH?WrgcGFhohbiC~C)72cJ*2`J%*=h>qyd7$5RXD{MyOwhz)_3vqip1H7@Ipy zUsTw?daSvk1aluguGDOzD2GBN$lQ?ZP7RE$h@;g>hgZSps;&)3k@ zHaLzh;k76$bG`KO0fA~W{HFEd+Us_HUAijY>Sj?7w022n@}BIP6zk(V)vnjCBA z@9z<{mcyyq`i-6r_jiyZE>W(B=cj4TGiAY2u$bZR?Kd&ZemG|8ZOSgLXWX+BBc*{e z&B_yRZLr9s7QvO$b?&?XUzWlk=4QyuI551b+j!oQ6BO)=EERBSub77EDRR2J|RNm(siq`QYY zB&`{_oNsFz;0`$%L)p?99N+n2n4dTv-{fUrOym(C)1@?Z-z5y=;?u|&B@8hlfS*qO-*Gstr@Z+ z>`$YgxUG;q$|07m@^6$Sya&a#T;c=n=eR&p+9Z<}mF*X0OFvb;S?gl>$II%}N)1$b z$04SQBOfM;(+yXJ^&b)%N@`VXf{jY}t5Wb5-G4 zUF+SS%*w3&4;Q8ChCDdc=C`EQKO|Z8^8^uw~rn&CzJaf4_opfX#>1_O77HgDndE^`Lfo9(%t2I`CbLS{h z-5m}Uukz7X9vN)>h!O~)M& z;*Xk9ej+IA?%K(@@y>RIlJ*+VA{eJcGe8Kaov8EUwKhDa5?^$YSl zYJ3(hLbL_{3piSC?RQgssg2k}3AnMcvV)MLX8hGtmR8(ZjvF7q_BX%OqA_)FV=XVV zTh@26564LDUdcYX8nG8uiu0>e27>G5&%MgOwmx8dPa0V3_)9YEg58L98%Hk;L5Hr$ z&dv^b{O3DaQKRLm|6E;F^>fhvY~^$3=>!*nm)iH&VAHbj9&CW&+%#BgZY&Ep8nxs zfTX+aFh_#n)zQl6G*WLg=LH)41e~0l*sAPOrD0A{U=OdMfUJV2$)?DA`p-{|4V|1$ z3__po5y@PEbj}#*Wv!iLGgtvdP%u$BHSBUXhAJO;l)12dKM^ zYYLzq>PVx7DfNT?U7`eJ?t!g+dC3Ej6$po$+C-Zbag4bO_SC{;mxrT<;s7#rg5S)_ zZSvLQU++ER00KXfc)jVXAWCdval6{Db(oax{;`ntDSzxo90zx%yiaB+Kjb0&;K2E& zuJlLqj7p6UpPdT)5SB=h^kI)O#b+(Ra6R&@6NYde$8(&%y*eVtetO*#%cLKB@1VXP zz!vJ~=a(?SwT-z77JIkNnpn7>9tT<7JSaF^1+}as9vOlqgV5l7T;I+RX)RVw<7^@|k0-XkvDK0+(}vJ`K3#D+r4U%;sP&Mi2-U8q88%P7|QFMABE z`>u(A>F>=xmJ5x{MZ)W_HztPXUH=AM$)@(1IEeRf+WpT^Uz8(l8=9KP=!Np!lTM?! zqKb`wP%+09|8=H5s6#)?)x*mRG(C?Im|D z`=~d8_k05YV@1hZ1p9|nga|q?2}<*vtM>g;NH!_8Ov&1$(xiItdwV4VzF89_jI8ly zzOj!QctvQPqxK??u(Wn~8pyigG?BZ_qe4;R8h5`sBeno~IdL$#6#>gC{|8)NOq8-Z~FWWYG?X@<>~Cg>8Hb!$%`?P?6_Zu zbZB^=ga^V#@HTn*=lw>+Z`%aC^Ke0@U2)}%7YE{Y=`Q5p6KO6Up5-QQVmuw|#OfAE zPM0le-vZ2H^`n;VZkS{#;lBEXy*sMIq*iUA;eC7Oin9EOk&*Wk<6pg7JI^%zyJ1nq z(o#XUQy(5b-Lyi>g}`S+XC&lARh{EWgZ6y=RN~ScsVUBZK?np96mg;$P>LAU?^}38s#e!Qr z&8_VwBG_MFU*FoAS*u^Y z5A*CA8FB561Hi(e4}n~D-yExBK7J&~P*5Vs%=*=fH&&$iWQktG#%4KzU0F9*uS*c4{WM2qZLg{{G!A7#4w1O88WOwJ5y1k|inp110d)JJ#Uq z(8d@J;b`1`jc||?5PUXeq)yYulnE{8yp7AvAPnK9b zp&_;cei)$VhujDyP>-oim0Hn-lH6UN`~^sRv9&v_>WdKtT<%*W!MXoLhC(0^eM-Zo zNJau&{`U6o#13;5&2UpEIBFzYTm5c3KR&(K>me{hhJjnngx_b2qI1X!&s{;#x;`MO!8!!@_nA5<%7EC203Q#p&TS4wQuEbE zO3w5_U~q6SbjyCeGbmI1_1y7+|zTGHnFm* zYaN>keFy<5vvmQBzc$LFHN}~h0*U5dJRp%mga`~}g9Hd5NEnTF*!!faBwjr=TD5XONIg1>Yo-%tjFFQLYRsR1-F&fyEK@nw zTApm`LsR~IztzT?vaEO2kzs87M-GrS{*ibXq?JW-8$0AolqK852UDbAkwX-<6Ld)w zo0gb&SSMCybWIfM6(%VVFj122j=g4KVR2+anqjS*8SYM(e)<#ZpNkvpVTwBw@2?Z8 zU$Cm+7T&1E*0#dPz|fC=JFaWpUXCbyee#FVkQJ7OAP% zE6G1LOq8zo1+AdXPz$%;QR|gsrhrwQP#Uy^vr-atf7pD)NNA_kfl(T1!QO(n@tmim ztEvGQ6nsPu_`S#}(zI z$vd24v+~zAgn3GA2MSK15pg8h*a}YuTD~iYeOxJ4z_0Oa zx)WVo>_N(TzABk(siT8~_BKYVx3w+{sXqJ5t<4?akIt(*JDLl&_B&UR*SI|L^75P# zxz}t|BUz0@UrMNWxw*%Mqz3ZQS19U6cwD_VWzX9BIhf8~uu>*cGCDr~4ga~q1;?ej zzF=WN0lEefkdqez3U#vwtRpDyu0-FAKtXU|aTkO(cq`jCeUM_7MN!_;)92wv#u@C?DM7r)irwWlCc`BS=y8mV`0ixhe-(wyHT$n9vd1z$_@xb7%WQ_ zXQ7;j*P)k(?Xy2i`H5hmsHi9*f#P2?PTlz%MeX06_+@B#IwBghdED*t%G-!&laNTw z6-4(q0)e2I6dNo+Lo?`Qo6`R7E+Zr304<5#mo7;rD26d5Z0==+j85m@dy-&1)%%yD z?P2IK3B#SZncXvax2{6-;QJGG)E{h}#&g~su;ey8rDX1+QYdK-A z9Dp2xz6Xtjx0ANHnJHvMRjaGE=Q;^EXwGvEuISNuZ)nCW#V@nnoSDACQ;;hP8J)7F z#`YMImy$v@4k_LbSfbf=MFYBVKr&KKri`oo?2_)QIJ92ocI=Pm_hvkIUR60!cxZ%g zSsrVBa7fmRGFV9b@(84G-9n2OU8G3(fFc*id6ZL13EH=F0LTNkV1Gn6td&1zi5;h# zgAy+DDs>37ubUT48XSn84jis;!72O1UJ0;n8bO$tm_(vm`8Y+qw#U$%p+h0O4cbrT zIiPZroZd!C-WB>WQAI_CS`_*fq4g|y7mUEE{ifc5c(3gPQb7DxD#OV?6jJ8AK7(|}s7yZ1gpu&&M zzDLm$FQ}f5Qq?<6yzY*sfuaSh3=mjkzL|ioQW4kYUQT|rb{%(mn-&lXX6E}P-<8j; z@GV^l2?u=nmH`dXuEGp6Jd@$ z&y`BrpN1E}fr*K^KcpRh_MVngE>7Z($e15GVLQDByV5Y>~;|C^m|t|C#TUmL4=q@)^g zZ<^}MQuqpDV+TfbueP1SyQXnWS}aRV`I6tpsud`0RUU_ITeDX#8gD?5YYmV14J9eN V&EL21(34^eO;uf$G9}xv{{hN{34QDataset"), i, 0) - browser_layout.addWidget(self.dataset_box, i + 1, 0) - self.speaker_box = QComboBox() - browser_layout.addWidget(QLabel("Speaker"), i, 1) - browser_layout.addWidget(self.speaker_box, i + 1, 1) - self.utterance_box = QComboBox() - browser_layout.addWidget(QLabel("Utterance"), i, 2) - browser_layout.addWidget(self.utterance_box, i + 1, 2) - self.browser_load_button = QPushButton("Load") - browser_layout.addWidget(self.browser_load_button, i + 1, 3) - i += 2 - # Random buttons + source_groupbox = QGroupBox('Source(源音频)') + source_layout = QGridLayout() + source_groupbox.setLayout(source_layout) + browser_layout.addWidget(source_groupbox, i, 0, 1, 4) + + self.dataset_box = QComboBox() + source_layout.addWidget(QLabel("Dataset(数据集):"), i, 0) + source_layout.addWidget(self.dataset_box, i, 1) self.random_dataset_button = QPushButton("Random") - browser_layout.addWidget(self.random_dataset_button, i, 0) + source_layout.addWidget(self.random_dataset_button, i, 2) + i += 1 + self.speaker_box = QComboBox() + source_layout.addWidget(QLabel("Speaker(说话者)"), i, 0) + source_layout.addWidget(self.speaker_box, i, 1) self.random_speaker_button = QPushButton("Random") - browser_layout.addWidget(self.random_speaker_button, i, 1) + source_layout.addWidget(self.random_speaker_button, i, 2) + i += 1 + self.utterance_box = QComboBox() + source_layout.addWidget(QLabel("Utterance(音频):"), i, 0) + source_layout.addWidget(self.utterance_box, i, 1) self.random_utterance_button = QPushButton("Random") - browser_layout.addWidget(self.random_utterance_button, i, 2) + source_layout.addWidget(self.random_utterance_button, i, 2) + + i += 1 + source_layout.addWidget(QLabel("Use(使用):"), i, 0) + self.browser_load_button = QPushButton("Load Above(加载上面)") + source_layout.addWidget(self.browser_load_button, i, 1, 1, 2) self.auto_next_checkbox = QCheckBox("Auto select next") self.auto_next_checkbox.setChecked(True) - browser_layout.addWidget(self.auto_next_checkbox, i, 3) - i += 1 + source_layout.addWidget(self.auto_next_checkbox, i+1, 1) + self.browser_browse_button = QPushButton("Browse(打开本地)") + source_layout.addWidget(self.browser_browse_button, i, 3) + self.record_button = QPushButton("Record(录音)") + source_layout.addWidget(self.record_button, i+1, 3) + i += 2 # Utterance box - browser_layout.addWidget(QLabel("Use embedding from:"), i, 0) + browser_layout.addWidget(QLabel("Current(当前):"), i, 0) self.utterance_history = QComboBox() - browser_layout.addWidget(self.utterance_history, i, 1, 1, 3) - i += 1 - - # Random & next utterance buttons - self.browser_browse_button = QPushButton("Browse") - browser_layout.addWidget(self.browser_browse_button, i, 0) - self.record_button = QPushButton("Record") - browser_layout.addWidget(self.record_button, i, 1) - self.play_button = QPushButton("Play") + browser_layout.addWidget(self.utterance_history, i, 1) + self.play_button = QPushButton("Play(播放)") browser_layout.addWidget(self.play_button, i, 2) - self.stop_button = QPushButton("Stop") + self.stop_button = QPushButton("Stop(暂停)") browser_layout.addWidget(self.stop_button, i, 3) - i += 1 + i += 1 + model_groupbox = QGroupBox('Models(模型选择)') + model_layout = QHBoxLayout() + model_groupbox.setLayout(model_layout) + browser_layout.addWidget(model_groupbox, i, 0, 1, 4) # Model and audio output selection self.encoder_box = QComboBox() - browser_layout.addWidget(QLabel("Encoder"), i, 0) - browser_layout.addWidget(self.encoder_box, i + 1, 0) + model_layout.addWidget(QLabel("Encoder:")) + model_layout.addWidget(self.encoder_box) self.synthesizer_box = QComboBox() - browser_layout.addWidget(QLabel("Synthesizer"), i, 1) - browser_layout.addWidget(self.synthesizer_box, i + 1, 1) + model_layout.addWidget(QLabel("Synthesizer:")) + model_layout.addWidget(self.synthesizer_box) self.vocoder_box = QComboBox() - browser_layout.addWidget(QLabel("Vocoder"), i, 2) - browser_layout.addWidget(self.vocoder_box, i + 1, 2) + model_layout.addWidget(QLabel("Vocoder:")) + model_layout.addWidget(self.vocoder_box) - self.audio_out_devices_cb=QComboBox() - browser_layout.addWidget(QLabel("Audio Output"), i, 3) - browser_layout.addWidget(self.audio_out_devices_cb, i + 1, 3) - i += 2 #Replay & Save Audio - browser_layout.addWidget(QLabel("Toolbox Output:"), i, 0) + i = 0 + output_layout.addWidget(QLabel("Toolbox Output:"), i, 0) self.waves_cb = QComboBox() self.waves_cb_model = QStringListModel() self.waves_cb.setModel(self.waves_cb_model) self.waves_cb.setToolTip("Select one of the last generated waves in this section for replaying or exporting") - browser_layout.addWidget(self.waves_cb, i, 1) + output_layout.addWidget(self.waves_cb, i, 1) self.replay_wav_button = QPushButton("Replay") self.replay_wav_button.setToolTip("Replay last generated vocoder") - browser_layout.addWidget(self.replay_wav_button, i, 2) + output_layout.addWidget(self.replay_wav_button, i, 2) self.export_wav_button = QPushButton("Export") self.export_wav_button.setToolTip("Save last generated vocoder audio in filesystem as a wav file") - browser_layout.addWidget(self.export_wav_button, i, 3) + output_layout.addWidget(self.export_wav_button, i, 3) + self.audio_out_devices_cb=QComboBox() i += 1 - + output_layout.addWidget(QLabel("Audio Output"), i, 0) + output_layout.addWidget(self.audio_out_devices_cb, i, 1) ## Embed & spectrograms vis_layout.addStretch() @@ -554,7 +567,6 @@ class UI(QDialog): for side in ["top", "right", "bottom", "left"]: ax.spines[side].set_visible(False) - ## Generation self.text_prompt = QPlainTextEdit(default_text) gen_layout.addWidget(self.text_prompt, stretch=1) @@ -576,10 +588,14 @@ class UI(QDialog): self.seed_textbox = QLineEdit() self.seed_textbox.setMaximumWidth(80) layout_seed.addWidget(self.seed_textbox, 0, 1) - layout_seed.addWidget(QLabel("Style#:(0~9)"), 0, 2) - self.style_idx_textbox = QLineEdit("-1") - self.style_idx_textbox.setMaximumWidth(80) - layout_seed.addWidget(self.style_idx_textbox, 0, 3) + self.slider = QSlider(Qt.Horizontal) + self.slider.setTickInterval(1) + self.slider.setFocusPolicy(Qt.NoFocus) + self.slider.setSingleStep(1) + self.slider.setRange(-1, 9) + self.slider.setValue(-1) + layout_seed.addWidget(QLabel("Style:"), 0, 2) + layout_seed.addWidget(self.slider, 0, 3) self.trim_silences_checkbox = QCheckBox("Enhance vocoder output") self.trim_silences_checkbox.setToolTip("When checked, trims excess silence in vocoder output." " This feature requires `webrtcvad` to be installed.") @@ -597,7 +613,7 @@ class UI(QDialog): ## Set the size of the window and of the elements - max_size = QDesktopWidget().availableGeometry(self).size() * 0.8 + max_size = QDesktopWidget().availableGeometry(self).size() * 0.5 self.resize(max_size) ## Finalize the display