v2.7.3

BBC-Esq · web-flow · commit 6c6bced5fc6b · 2023-12-18T21:58:42.000-05:00
diff --git a/src/bark_module.py b/src/bark_module.py
@@ -0,0 +1,123 @@
+import threading
+import queue
+from transformers import AutoProcessor, BarkModel
+import torch
+import numpy as np
+import re
+import time
+import pyaudio
+import gc
+
+class BarkAudio:
+    def __init__(self):
+        self.processor = AutoProcessor.from_pretrained("suno/bark-small")
+        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self.model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(self.device)
+        self.model = self.model.to_bettertransformer()
+        # self.model.enable_cpu_offload()
+
+        self.sentence_queue = queue.Queue()
+        self.processing_queue = queue.Queue()
+        self.start_time = None
+
+    def play_audio_thread(self):
+        while True:
+            queue_item = self.sentence_queue.get()
+            if queue_item is None:
+                break
+
+            audio_array, sampling_rate, sentence_num = queue_item
+            elapsed_time = time.time() - self.start_time
+            print(f"({elapsed_time:.2f} seconds) Playing sentence #{sentence_num}")
+
+            p = pyaudio.PyAudio()
+            stream = p.open(format=pyaudio.paInt16, channels=1, rate=sampling_rate, output=True)
+            stream.write(audio_array.tobytes())
+            stream.stop_stream()
+            stream.close()
+            p.terminate()
+
+        self.stop()
+
+    def process_text_thread(self):
+        sentence_count = 1
+        while True:
+            text_prompt = self.processing_queue.get()
+            if text_prompt is None:
+                break
+
+            sentences = re.split(r'[.!?;]+', text_prompt)
+
+            for sentence in sentences:
+                if sentence.strip():
+                    elapsed_time = time.time() - self.start_time
+                    print(f"({elapsed_time:.2f} seconds) Processing sentence #{sentence_count}")
+                    voice_preset = "v2/en_speaker_6"
+                    
+                    inputs = self.processor(text=sentence, voice_preset=voice_preset, return_tensors="pt")
+                    
+                    with torch.no_grad():
+                        speech_output = self.model.generate(**inputs.to(self.device), do_sample=True)
+
+                    audio_array = speech_output[0].cpu().numpy()
+                    audio_array = np.int16(audio_array / np.max(np.abs(audio_array)) * 32767)
+                    sampling_rate = self.model.generation_config.sample_rate
+
+                    self.sentence_queue.put((audio_array, sampling_rate, sentence_count))
+                    sentence_count += 1
+
+    def run(self):
+        with open('chat_history.txt', 'r', encoding='utf-8') as file:
+            llm_response = file.read()
+            self.processing_queue.put(llm_response)
+
+        self.start_time = time.time()
+
+        processing_thread = threading.Thread(target=self.process_text_thread)
+        playback_thread = threading.Thread(target=self.play_audio_thread)
+        processing_thread.start()
+        playback_thread.start()
+
+        processing_thread.join()
+        playback_thread.join()
+
+    def stop(self):
+        self.sentence_queue.put(None)
+
+        self.release_resources()
+
+    def release_resources(self):
+        del self.model
+        del self.processor
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        gc.collect()
+
+if __name__ == "__main__":
+    bark_audio = BarkAudio()
+    bark_audio.run()
+
+'''
+INSTRUCTIONS:
+
+(1) Bark consists of 4 models but only one is used at any given moment.  You can uncomment "self.model.enable_cpu_offload()"
+    to put 3 models into RAM and only the one being used into VRAM.  This saves VRAM at a significant speed cost.
+
+(2) Delete ", torch_dtype=torch.float16" verbatim to run the model in float32 instead of float16.  You must leave ".to(device)".
+
+(3) You can comment out "model = model.to_bettertransformer()" to NOT use "better transformer," which is a library from Huggingface.
+    Only do this if Better Transformers isn't compatible with your system, but it should be, and it provides a 5-20% speedup.
+
+(4) Finally, to use the Bark full-size model remove "-small" on the two lines above; for example, it should read "suno/bark" instead.
+
+*** You can experiment with any combination items (1)-(4) above to get the VRAM/speed/quality you want.  For example, using the
+    full-size Bark models but only at float16...or using the "-small" models but at full float32. ***
+'''
+
+'''
+INSTRUCTIONS:
+                    
+Go here for examples of different voices:
+
+https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c
+'''
diff --git a/src/gui.py b/src/gui.py
@@ -7,6 +7,7 @@
 import torch
 import yaml
 import sys
+import threading  # Import threading
 from initialize import main as initialize_system
 from metrics_bar import MetricsBar
 from download_model import download_embedding_model
@@ -19,6 +20,9 @@
 import voice_recorder_module
 from utilities import list_theme_files, make_theme_changer, load_stylesheet
 
+# Import BarkAudio from bark_module
+from bark_module import BarkAudio
+
 class DocQA_GUI(QWidget):
     def __init__(self):
         super().__init__()
@@ -93,9 +97,15 @@ def init_ui(self):
         self.submit_button.clicked.connect(self.on_submit_button_clicked)
         right_vbox.addWidget(self.submit_button)
 
+        # Test Embeddings checkbox and Bark button
+        checkbox_button_hbox = QHBoxLayout()
         self.test_embeddings_checkbox = QCheckBox("Test Embeddings")
         self.test_embeddings_checkbox.stateChanged.connect(self.on_test_embeddings_changed)
-        right_vbox.addWidget(self.test_embeddings_checkbox)
+        checkbox_button_hbox.addWidget(self.test_embeddings_checkbox)
+        bark_button = QPushButton("Bark")
+        bark_button.clicked.connect(self.on_bark_button_clicked)  # Connect to the new handler
+        checkbox_button_hbox.addWidget(bark_button)
+        right_vbox.addLayout(checkbox_button_hbox)
 
         # Create and add button row
         button_row_widget = self.create_button_row(self.on_submit_button_clicked)
@@ -196,6 +206,15 @@ def stop_recording():
 
         return row_widget
 
+    # Handler for the Bark button click
+    def on_bark_button_clicked(self):
+        threading.Thread(target=self.run_bark_module).start()
+
+    # Method to instantiate and run BarkAudio
+    def run_bark_module(self):
+        bark_audio = BarkAudio()  # Instantiate BarkAudio when the button is clicked
+        bark_audio.run()
+
 if __name__ == '__main__':
     app = QApplication(sys.argv)
     app.setStyle(QStyleFactory.create('fusion'))
diff --git a/src/requirements.txt b/src/requirements.txt
@@ -18,5 +18,8 @@ PyAudio==0.2.14
 faster-whisper==0.10.0
 termcolor==2.3.0
 pypandoc==1.12
-pydub==0.25.1
-PyYAML==6.0.1
+PyYAML==6.0.1
+transformers==4.36.0
+accelerate==0.25.0
+optimum==1.15.0
+pydub==0.25.1
diff --git a/src/server_connector.py b/src/server_connector.py
@@ -173,11 +173,25 @@ def ask_local_chatgpt(query, persist_directory=PERSIST_DIRECTORY, client_setting
 
     response_json = connect_to_local_chatgpt(augmented_query)
 
+    full_response = []
+
     for chunk_message in response_json:
+        if full_response and isinstance(full_response[-1], str):
+            full_response[-1] += chunk_message
+        else:
+            full_response.append(chunk_message)
+
         yield chunk_message
 
+    # Save the full response to chat_history.txt
+    with open('chat_history.txt', 'w', encoding='utf-8') as file:
+        for message in full_response:
+            file.write(message)
+
     yield "\n\n"
     
+    # LLM's response complete
+    # format and append citations
     citations = format_metadata_as_citations(metadata_list)
     
     unique_citations = []
@@ -195,7 +209,6 @@ def ask_local_chatgpt(query, persist_directory=PERSIST_DIRECTORY, client_setting
 
     return {"answer": response_json, "sources": relevant_contexts}
 
-
 if __name__ == "__main__":
     user_input = "Your query here"
-    interact_with_chat(user_input)
+    interact_with_chat(user_input)