mirror of
https://github.com/babysor/Realtime-Voice-Clone-Chinese.git
synced 2026-04-14 02:20:46 +08:00
【bugfix】 fix bug causing non-sense output for long texts 修复多段文字发音错误
This commit is contained in:
@@ -90,13 +90,10 @@ class Synthesizer:
|
||||
|
||||
simple_table([("Tacotron", str(tts_k) + "k"),
|
||||
("r", self._model.r)])
|
||||
|
||||
#convert chinese char to pinyin
|
||||
list_of_pinyin = lazy_pinyin(texts, style=Style.TONE3)
|
||||
texts = [" ".join([v for v in list_of_pinyin if v.strip()])]
|
||||
texts = [" ".join(lazy_pinyin(v, style=Style.TONE3)) for v in texts]
|
||||
|
||||
# Preprocess text inputs
|
||||
inputs = [text_to_sequence(text.strip(), hparams.tts_cleaner_names) for text in texts]
|
||||
inputs = [text_to_sequence(text, hparams.tts_cleaner_names) for text in texts]
|
||||
if not isinstance(embeddings, list):
|
||||
embeddings = [embeddings]
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ through Unidecode. For other data, you can modify _characters. See TRAINING_DATA
|
||||
|
||||
_pad = "_"
|
||||
_eos = "~"
|
||||
_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz12340!\'(),-.:;? '
|
||||
_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!\'(),-.:;? '
|
||||
# Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
|
||||
#_arpabet = ["@' + s for s in cmudict.valid_symbols]
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import traceback
|
||||
import sys
|
||||
import torch
|
||||
import librosa
|
||||
import re
|
||||
from audioread.exceptions import NoBackendError
|
||||
|
||||
# Use this directory structure for your datasets, or modify it to fit your needs
|
||||
@@ -224,6 +225,13 @@ class Toolbox:
|
||||
self.init_synthesizer()
|
||||
|
||||
texts = self.ui.text_prompt.toPlainText().split("\n")
|
||||
punctuation = '!,。、,' # punctuate and split/clean text
|
||||
processed_texts = []
|
||||
for text in texts:
|
||||
for processed_text in re.sub(r'[{}]+'.format(punctuation), '\n', text).split('\n'):
|
||||
if processed_text:
|
||||
processed_texts.append(processed_text.strip())
|
||||
texts = processed_texts
|
||||
embed = self.ui.selected_utterance.embed
|
||||
embeds = [embed] * len(texts)
|
||||
specs = self.synthesizer.synthesize_spectrograms(texts, embeds)
|
||||
|
||||
Reference in New Issue
Block a user