huge commit. assistant code is now semifunctional

This commit is contained in:
Looki2000
2023-12-01 21:48:56 +01:00
parent af6f2a13b7
commit f18e41476f
7 changed files with 501 additions and 168 deletions

View File

@@ -1,5 +1,4 @@
import os
import torch
import pyaudio
from TTS.api import TTS
from TTS.tts.configs.xtts_config import XttsConfig
@@ -7,27 +6,35 @@ from TTS.tts.models.xtts import Xtts
from TTS.utils.generic_utils import get_user_data_dir
import threading
import time
import re
# Check if CUDA is available
if torch.cuda.is_available():
print("Using CUDA")
device = "cuda"
else:
print("Using CPU")
device = "cpu"
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
class TTSstream:
def __init__(self, speaker_wav):
class TTSStream:
def __init__(self, speaker_wav=None, device=None):
model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
if device is None:
import torch
# Check if CUDA is available
if torch.cuda.is_available():
print("Using CUDA")
device = "cuda"
else:
print("Using CPU")
device = "cpu"
#print(model_path)
print("Loading TTS model... ", end="")
#
# download model if it doesn't exist
if not os.path.exists(os.path.join(model_path, "config.json")):
print("Downloading model...")
print("Downloading model... ", end="")
tts = TTS()
tts.download_model_by_name(model_name=model_name)
@@ -43,11 +50,16 @@ class TTSstream:
)
self.model.to(device)
print("Done!")
self.gpt_cond_latent, self.speaker_embedding = self.model.get_conditioning_latents(audio_path=speaker_wav)
if speaker_wav is not None:
#self.gpt_cond_latent, self.speaker_embedding = self.model.get_conditioning_latents(audio_path=speaker_wav)
self.change_speaker(speaker_wav)
def change_speaker(self, speaker_wav):
print("Loading speaker... ", end="")
self.gpt_cond_latent, self.speaker_embedding = self.model.get_conditioning_latents(audio_path=speaker_wav)
print("Done!")
def _write_stream(self):
# play first play_buffer_size samples and remove them from the buffer
@@ -58,6 +70,7 @@ class TTSstream:
self.stream.write(self.chunk)
else:
if self.all_done:
#self.thread_ended = True
break
time.sleep(0.01)
@@ -65,42 +78,60 @@ class TTSstream:
def tts_speak(self, text):
self.play_buffer_size = 512
chunks = self.model.inference_stream(
text,
"pl",
self.gpt_cond_latent,
self.speaker_embedding,
stream_chunk_size=20,
)
# open pyaudio stream
p = pyaudio.PyAudio()
self.stream = p.open(format=pyaudio.paFloat32, channels=1, rate=24000, output=True)
# for each sentence ending with . or ! or ?
for text in re.split(r"(?<=[.!?])", text):
text = text.strip()
self.chunks_bin = b""
self.all_done = False
# run write_stream as thread
thread = threading.Thread(target=self._write_stream)
thread.start()
while True:
try:
# read chunks from chunks generator as they are generated
for self.chunk in chunks:
self.chunks_bin += self.chunk.cpu().numpy().astype("float32").tobytes()
break
# some weird error caused by coqui-tts
except:
print("Error occured when generating audio stream. Retrying...")
if len(text) == 0:
continue
self.all_done = True
chunks = self.model.inference_stream(
text,
"pl",
self.gpt_cond_latent,
self.speaker_embedding,
stream_chunk_size=20,
)
# wait for thread to finish
thread.join()
self.chunks_bin = b""
self.all_done = False
# run write_stream as thread
#self.thread_ended = False
thread = threading.Thread(target=self._write_stream)
thread.start()
while True:
try:
# read chunks from chunks generator as they are generated
for self.chunk in chunks:
self.chunks_bin += self.chunk.cpu().numpy().astype("float32").tobytes()
break
# some weird error caused by coqui-tts
except:
print("Error occured when generating audio stream. Retrying...")
continue
self.all_done = True
# wait for thread to finish
thread.join()
# wait for thread ended
#while not self.thread_ended:
# time.sleep(0.01)
#while True:
# if self.thread_ended:
# break
# print("Waiting for thread to end...")
# time.sleep(0.01)
self.stream.close()
p.terminate()