v2.6

BBC-Esq · web-flow · commit 8c4d02dfa9ca · 2023-11-02T09:02:13.000-04:00
diff --git a/src/User_Manual/config.yaml b/src/User_Manual/config.yaml
@@ -26,11 +26,18 @@ AVAILABLE_MODELS:
 - jinaai/jina-embeddings-v2-base-en
 - jinaai/jina-embeddings-v2-small-en
 COMPUTE_DEVICE: cpu
+Compute_Device:
+  available:
+  - cuda
+  - cpu
+  database_creation: cpu
+  database_query: cpu
 EMBEDDING_MODEL_NAME: 
-chunk_overlap: 200
-chunk_size: 600
 database:
-  contexts: 15
+  chunk_overlap: 200
+  chunk_size: 750
+  contexts: 10
+  device: null
   similarity: 0.9
 embedding-models:
   bge:
@@ -41,7 +48,7 @@ embedding-models:
 server:
   api_key: ''
   connection_str: http://localhost:1234/v1
-  model_max_tokens: 512
+  model_max_tokens: -1
   model_temperature: 0.1
   prefix: '[INST]'
   suffix: '[/INST]'
@@ -53,5 +60,5 @@ styles:
   text: 'background-color: #092327; color: light gray; font: 12pt "Segoe UI Historic";'
 transcriber:
   device: cpu
-  model: base.en
-  quant: float32
+  model: small.en
+  quant: int8
diff --git a/src/User_Manual/settings.html b/src/User_Manual/settings.html
@@ -116,7 +116,8 @@ <h3>Prefix and Suffix</h3>
     <h2>Embedding Models Settings</h2>
     <p>These settings apply only if you're using a model named <code>BGE</code> or <code>Instructor</code>. Tread carefully
 	when adjusting these settings because it could hinder performance.  You can search online on how to adjust these depending
-	on the type of text being entered into the vector database.</p>
+	on the type of text being entered into the vector database.  Also, if you change the chunk size or overlap settings you must
+	recreate the vector database for the changes to take effect.</p>
 	
 	<p>All other types of embedding models that my program uses don't require specialized settings.</p>
 
diff --git a/src/User_Manual/tips.html b/src/User_Manual/tips.html
@@ -102,6 +102,12 @@ <h2 style="color: #f0f0f0;" align="center">Manage VRAM</h2>
 		database and a Ctranslate2 Whisper model for the transcription functionality. Therefore, it is important manage your
 		memory to achieve the best performance.</p>
 		
+		<p>If you are tight on VRAM, do not have LM Studio running while the vector database is being created.  I highly
+		recommend that you choose "cpu" for when querying the database and only use "cuda" or "mps" when creating the database.
+		Creating the database takes 5,000 times more compute power and it's worth using gpu-acceleration (and hence VRAM).
+		However, merely querying the database can be done on any CPU easily and when you select "CPU" you're using system RAM
+		and not VRAM.  The option to choose difference compute devices for database creation versus querying is a recent addition.
+		
 		<p>To save VRAM, unplug any secondary monitors from the GPU and plug them into graphics ports (e.g. HDMI or DisplayPort)
 		coming directly from your motherboard.  This will prevent these monitors from using your GPU.  You will most likely want
 		to keep your main monitor plugged in (e.g. for gaming).</p>
@@ -121,7 +127,6 @@ <h2 style="color: #f0f0f0;" align="center">Select an Appropriate Embedding Model
 		<p><b>https://instructor-embedding.github.io/</b></p>
 		<p><b>https://github.com/FlagOpen/FlagEmbedding</b></p>
 		<p><b>https://huggingface.co/thenlper/gte-large</b></p>
-		<p><b>https://huggingface.co/intfloat/multilingual-e5-large</b></p>
 		<p><b>https://huggingface.co/jinaai/jina-embedding-l-en-v1</b></p>
 		
 		<h2 style="color: #f0f0f0;" align="center">Select the Appropriate Model Within LM Studio</h2>