diff --git a/synthesizer/inference.py b/synthesizer/inference.py index f89dbac..e5ab1bf 100644 --- a/synthesizer/inference.py +++ b/synthesizer/inference.py @@ -70,7 +70,7 @@ class Synthesizer: def synthesize_spectrograms(self, texts: List[str], embeddings: Union[np.ndarray, List[np.ndarray]], - return_alignments=False, style_idx=0, min_stop_token=5): + return_alignments=False, style_idx=0, min_stop_token=5, steps=2000): """ Synthesizes mel spectrograms from texts and speaker embeddings. @@ -125,7 +125,7 @@ class Synthesizer: speaker_embeddings = torch.tensor(speaker_embeds).float().to(self.device) # Inference - _, mels, alignments = self._model.generate(chars, speaker_embeddings, style_idx=style_idx, min_stop_token=min_stop_token) + _, mels, alignments = self._model.generate(chars, speaker_embeddings, style_idx=style_idx, min_stop_token=min_stop_token, steps=steps) mels = mels.detach().cpu().numpy() for m in mels: # Trim silence from end of each spectrogram diff --git a/synthesizer/models/tacotron.py b/synthesizer/models/tacotron.py index 0ed665f..e7c26f2 100644 --- a/synthesizer/models/tacotron.py +++ b/synthesizer/models/tacotron.py @@ -420,7 +420,7 @@ class Tacotron(nn.Module): return mel_outputs, linear, attn_scores, stop_outputs - def generate(self, x, speaker_embedding=None, steps=200, style_idx=0, min_stop_token=5): + def generate(self, x, speaker_embedding=None, steps=2000, style_idx=0, min_stop_token=5): self.eval() device = next(self.parameters()).device # use same device as parameters diff --git a/toolbox/__init__.py b/toolbox/__init__.py index 4517270..827833a 100644 --- a/toolbox/__init__.py +++ b/toolbox/__init__.py @@ -235,7 +235,7 @@ class Toolbox: embed = self.ui.selected_utterance.embed embeds = [embed] * len(texts) min_token = int(self.ui.token_slider.value()) - specs = self.synthesizer.synthesize_spectrograms(texts, embeds, style_idx=int(self.ui.style_slider.value()), min_stop_token=min_token) + specs = self.synthesizer.synthesize_spectrograms(texts, embeds, style_idx=int(self.ui.style_slider.value()), min_stop_token=min_token, steps=int(self.ui.length_slider.value())*200) breaks = [spec.shape[1] for spec in specs] spec = np.concatenate(specs, axis=1) diff --git a/toolbox/ui.py b/toolbox/ui.py index ae5d2bc..6c33418 100644 --- a/toolbox/ui.py +++ b/toolbox/ui.py @@ -618,6 +618,19 @@ class UI(QDialog): layout_seed.addWidget(self.token_value_label, 2, 1) layout_seed.addWidget(self.token_slider, 2, 3) + self.length_slider = QSlider(Qt.Horizontal) + self.length_slider.setTickInterval(1) + self.length_slider.setFocusPolicy(Qt.NoFocus) + self.length_slider.setSingleStep(1) + self.length_slider.setRange(1, 10) + self.length_value_label = QLabel("2") + self.length_slider.setValue(2) + layout_seed.addWidget(QLabel("MaxLength(最大句长):"), 3, 0) + + self.length_slider.valueChanged.connect(lambda s: self.length_value_label.setNum(s)) + layout_seed.addWidget(self.length_value_label, 3, 1) + layout_seed.addWidget(self.length_slider, 3, 3) + gen_layout.addLayout(layout_seed) self.loading_bar = QProgressBar()