Version 1.4.1 files.

BBC-Esq · web-flow · commit fc854e7682c8 · 2023-09-12T12:40:17.000-04:00
diff --git a/config.yaml b/config.yaml
@@ -26,4 +26,4 @@ DOCUMENT_MAP:
   .txt: TextLoader
   .xls: UnstructuredExcelLoader
   .xlsx: UnstructuredExcelLoader
-EMBEDDING_MODEL_NAME: C:/PATH/Scripts/LM Search Vector Database_v1_working/Embedding_Models/BAAI--bge-base-en
+EMBEDDING_MODEL_NAME: C:/PATH/Scripts/LM Search Vector Database_v1_4 - working/Embedding_Models/hkunlp--instructor-large
diff --git a/document_chunker.py b/document_chunker.py
@@ -1,8 +1,7 @@
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 def split_documents(documents):
-
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=400)
     texts = text_splitter.split_documents(documents)
+    
     return texts
-
diff --git a/document_loader.py b/document_loader.py
@@ -41,22 +41,29 @@ def load_single_document(file_path: str) -> Document:
     return loader.load()[0]
 
 def load_document_batch(filepaths):
+    
     with ThreadPoolExecutor(len(filepaths)) as exe:
         futures = [exe.submit(load_single_document, name) for name in filepaths]
         data_list = [future.result() for future in futures]
-        return (data_list, filepaths)
+    
+    return (data_list, filepaths)
 
 def load_documents(source_dir: str) -> list[Document]:
+    
     all_files = os.listdir(source_dir)
     paths = [os.path.join(source_dir, file_path) for file_path in all_files if os.path.splitext(file_path)[1] in DOCUMENT_MAP.keys()]
     
     n_workers = min(INGEST_THREADS, max(len(paths), 1))
     chunksize = round(len(paths) / n_workers)
     docs = []
+    
     with ProcessPoolExecutor(n_workers) as executor:
         futures = [executor.submit(load_document_batch, paths[i : (i + chunksize)]) for i in range(0, len(paths), chunksize)]
         for future in as_completed(futures):
             contents, _ = future.result()
             docs.extend(contents)
-
+    
     return docs
+
+if __name__ == "__main__":
+    load_documents(SOURCE_DIRECTORY)
diff --git a/example.png b/example.png
diff --git a/gui.py b/gui.py
@@ -1,4 +1,5 @@
 import tkinter as tk
+from tkinter import font as tkfont
 from gui_table import create_table
 import threading
 from nvml import CudaVramLogic
@@ -41,9 +42,17 @@ def __init__(self, root):
         self.create_chromadb_button.pack(pady=5)
 
         create_table(left_frame)
-
-        self.cuda_info_label = tk.Label(left_frame, text="CUDA & VRAM Info", font=("Segoe UI Historic", 10))
-        self.cuda_info_label.pack(pady=5)
+        
+        # GPU label
+        self.gpu_info_label = tk.Label(left_frame, font=("Segoe UI Semibold", 16), foreground='green')
+        self.gpu_info_label.pack(pady=1)
+        
+        # VRAM label
+        self.vram_info_label = tk.Label(left_frame, font=("Segoe UI Semibold", 16), foreground='blue')
+        self.vram_info_label.pack(pady=1)
+
+        # Adjust CudaVramLogic initialization:
+        self.cuda_logic = CudaVramLogic(self.vram_info_label, self.gpu_info_label, self.root)
 
         main_pane.add(left_frame)
 
@@ -75,8 +84,6 @@ def __init__(self, root):
         scroll2.pack(side=tk.RIGHT, fill=tk.Y)
         self.read_only_text.config(yscrollcommand=scroll2.set)
 
-        self.cuda_logic = CudaVramLogic(self.cuda_info_label, self.root)
-
         self.center_window(root)
 
     def center_window(self, root):
diff --git a/gui_logic.py b/gui_logic.py
@@ -8,6 +8,7 @@
 from server_connector import interact_with_chat
 import subprocess
 import server_connector
+import threading
 
 def load_config():
     with open("config.yaml", 'r') as stream:
@@ -54,10 +55,13 @@ def download_embedding_model(self):
         
         if selected_model:
             model_url = f"https://huggingface.co/{selected_model}"
-            
             target_directory = os.path.join("Embedding_Models", selected_model.replace("/", "--"))
             
-            subprocess.run(["git", "clone", model_url, target_directory])
+            def download_model():
+                subprocess.run(["git", "clone", model_url, target_directory])
+                
+            download_thread = threading.Thread(target=download_model)
+            download_thread.start()
 
     def select_embedding_model_directory(self):
         initial_dir = 'Embedding_Models' if os.path.exists('Embedding_Models') else os.path.expanduser("~")
@@ -101,7 +105,12 @@ def create_chromadb(self):
         
         if response:
             embedding_model_path = getattr(self, "embedding_model_directory", "")
-            os.system(f'python ingest_improved.py "{embedding_model_path}"')
+            
+            def run_create_chromadb(embedding_model_path):
+                os.system(f'python ingest_improved.py "{embedding_model_path}"')
+            
+            create_chromadb_thread = threading.Thread(target=run_create_chromadb, args=(embedding_model_path,))
+            create_chromadb_thread.start()
 
     def submit_query(self):
         current_dir = os.path.dirname(os.path.realpath(__file__))
@@ -120,14 +129,21 @@ def submit_query(self):
             return
 
         query = self.gui.text_input.get("1.0", tk.END).strip()
-        answer = interact_with_chat(query)
-        self.gui.read_only_text.config(state=tk.NORMAL)
-        self.gui.read_only_text.delete("1.0", tk.END)
-        self.gui.read_only_text.insert(tk.END, answer)
-        self.gui.read_only_text.config(state=tk.DISABLED)
+
+        # Move the chat interaction logic to a separate function
+        def interact_with_chat_and_update_gui(query):
+            answer = interact_with_chat(query)
+            self.gui.read_only_text.config(state=tk.NORMAL)
+            self.gui.read_only_text.delete("1.0", tk.END)
+            self.gui.read_only_text.insert(tk.END, answer)
+            self.gui.read_only_text.config(state=tk.DISABLED)
+
+        # Create a thread for chat interaction and GUI update
+        chat_thread = threading.Thread(target=interact_with_chat_and_update_gui, args=(query,))
+        chat_thread.start()
 
 if __name__ == "__main__":
     root = tk.Tk()
     app = DocQA_GUI(root)
     logic = DocQA_Logic(app)
-    root.mainloop()
+    root.mainloop()
diff --git a/gui_table.py b/gui_table.py
@@ -31,12 +31,13 @@ def create_table(parent_frame):
 
     pro_tip_text = (
         "DO NOT have LM Studio running when creating the vector database. The VRAM numbers above refer to when creating "
-        "the database. After it's created, run LM Studio and load your LLM (remember only Llama2-based models work "
-        "currently when querying the database). To query the database, the embedding model will use about half the VRAM "
-        "it used when creating it. Use the LARGEST embedding model you can possibly fit into VRAM while the LLM is loaded "
-        "into LM Studio (remembering the half rule above). The quality of the embedding model is ACTUALLY MORE important "
-        "than the size of the LLM. Experiment with low-quality LLMs and high-quality embedding models. EXAMPLE: q3_k_3 "
-        "model + instructor-xl worked just fine together."
+        "the database, which attempts to use multithreading, hence more VRAM used. After it's created, run LM Studio and "
+        " load your LLM (remember only Llama2-based models work currently when querying the database). To query the database, "
+        " the embedding model uses much less VRAM.  Use the LARGEST embedding model you can possibly fit into VRAM while the "
+        " LLM is loaded into LM Studio. The quality of the embedding model is ACTUALLY MORE important than the size of the LLM. "
+        " Experiment with low-quality LLMs and high-quality embedding models. For example, q3_k_3 model + instructor-xl worked "
+        " just fine together.  If your text has a lot of technical jargon, a larger LLM might be better, but for everyday usage, "
+        " strive to use as large of an embedding model as possible with as large of an LLM with the remaining VRAM."
     )
 
     pro_tip_description = tk.Label(parent_frame, text=pro_tip_text, wraplength=400, justify="left")
diff --git a/ingest_improved.py b/ingest_improved.py
@@ -1,6 +1,5 @@
 import logging
 import os
-import sys
 import shutil
 import torch
 import yaml
@@ -45,7 +44,7 @@ def main():
             model_kwargs={"device": COMPUTE_DEVICE},
         )
     
-    # Delete current vector database before creating new one
+    # Delete the current vector database before creating a new one
     if os.path.exists(PERSIST_DIRECTORY):
         shutil.rmtree(PERSIST_DIRECTORY)
         os.makedirs(PERSIST_DIRECTORY)
@@ -63,4 +62,5 @@ def main():
     logging.basicConfig(
         format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO
     )
+    
     main()
diff --git a/nvml.py b/nvml.py
@@ -1,7 +1,6 @@
 from pynvml import *
 from multiprocessing import Process, Pipe, Event
 import time
-import tkinter as tk
 
 def monitor_nvml(pipe, stop_event):
     nvmlInit()
@@ -35,23 +34,20 @@ def stop_monitoring(p, stop_event):
     p.join()
 
 class CudaVramLogic:
-    def __init__(self, label, root):
-        self.cuda_info_label = label
+    def __init__(self, vram_label, gpu_label, root):
+        self.vram_label = vram_label
+        self.gpu_label = gpu_label
         self.root = root
         self.parent_conn, self.process, self.stop_event = start_monitoring()
-        self.update_cuda_info()
+        self.update_info()
 
-    def update_cuda_info(self):
+    def update_info(self):
         if self.parent_conn.poll():
             memory_used_str, gpu_utilization = self.parent_conn.recv()
-            info_text = f"Memory Used: {memory_used_str} | GPU Utilization: {gpu_utilization}"
-            self.cuda_info_label.config(text=info_text)
-        self.root.after(500, self.update_cuda_info)
+            self.vram_label.config(text=f"VRAM: {memory_used_str}")
+            self.gpu_label.config(text=f"GPU: {gpu_utilization}")
+        self.root.after(500, self.update_info)
 
     def stop_and_exit(self):
         stop_monitoring(self.process, self.stop_event)
         self.root.quit()
-
-# If the script is executed directly, it will just run without outputting any metrics.
-if __name__ == "__main__":
-    pass