1
+ AVAILABLE_MODELS = [
2
+ {
3
+ 'details' : {
4
+ 'description' : 'Well rounded & customizable.' ,
5
+ 'dimensions' : 384 ,
6
+ 'max_sequence' : 512 ,
7
+ 'size_mb' : 134
8
+ },
9
+ 'model' : 'BAAI/bge-small-en-v1.5'
10
+ },
11
+ {
12
+ 'details' : {
13
+ 'description' : 'Well rounded & customizable.' ,
14
+ 'dimensions' : 768 ,
15
+ 'max_sequence' : 512 ,
16
+ 'size_mb' : 438
17
+ },
18
+ 'model' : 'BAAI/bge-base-en-v1.5'
19
+ },
20
+ {
21
+ 'details' : {
22
+ 'description' : 'Well rounded & slight RAG improvement.' ,
23
+ 'dimensions' : 768 ,
24
+ 'max_sequence' : 512 ,
25
+ 'size_mb' : 438
26
+ },
27
+ 'model' : 'BAAI/llm-embedder'
28
+ },
29
+ {
30
+ 'details' : {
31
+ 'description' : 'Well rounded & customizable.' ,
32
+ 'dimensions' : 1024 ,
33
+ 'max_sequence' : 512 ,
34
+ 'size_mb' : 1340
35
+ },
36
+ 'model' : 'BAAI/bge-large-en-v1.5'
37
+ },
38
+ {
39
+ 'details' : {
40
+ 'description' : 'Well rounded & customizable.' ,
41
+ 'dimensions' : 768 ,
42
+ 'max_sequence' : 512 ,
43
+ 'size_mb' : 439
44
+ },
45
+ 'model' : 'hkunlp/instructor-base'
46
+ },
47
+ {
48
+ 'details' : {
49
+ 'description' : 'Well rounded & customizable.' ,
50
+ 'dimensions' : 1024 ,
51
+ 'max_sequence' : 512 ,
52
+ 'size_mb' : 1340
53
+ },
54
+ 'model' : 'hkunlp/instructor-large'
55
+ },
56
+ {
57
+ 'details' : {
58
+ 'description' : 'Well rounded & customizable.' ,
59
+ 'dimensions' : 1024 ,
60
+ 'max_sequence' : 512 ,
61
+ 'size_mb' : 4960
62
+ },
63
+ 'model' : 'hkunlp/instructor-xl'
64
+ },
65
+ {
66
+ 'details' : {
67
+ 'description' : 'Well rounded' ,
68
+ 'dimensions' : 312 ,
69
+ 'max_sequence' : 512 ,
70
+ 'size_mb' : 58
71
+ },
72
+ 'model' : 'jinaai/jina-embedding-t-en-v1'
73
+ },
74
+ {
75
+ 'details' : {
76
+ 'description' : 'Well rounded' ,
77
+ 'dimensions' : 512 ,
78
+ 'max_sequence' : 512 ,
79
+ 'size_mb' : 141
80
+ },
81
+ 'model' : 'jinaai/jina-embedding-s-en-v1'
82
+ },
83
+ {
84
+ 'details' : {
85
+ 'description' : 'Well rounded' ,
86
+ 'dimensions' : 768 ,
87
+ 'max_sequence' : 512 ,
88
+ 'size_mb' : 439
89
+ },
90
+ 'model' : 'jinaai/jina-embedding-b-en-v1'
91
+ },
92
+ {
93
+ 'details' : {
94
+ 'description' : 'Well rounded' ,
95
+ 'dimensions' : 1024 ,
96
+ 'max_sequence' : 512 ,
97
+ 'size_mb' : 1340
98
+ },
99
+ 'model' : 'jinaai/jina-embedding-l-en-v1'
100
+ },
101
+ {
102
+ 'details' : {
103
+ 'description' : 'Clustering or semantic search' ,
104
+ 'dimensions' : 768 ,
105
+ 'max_sequence' : 512 ,
106
+ 'size_mb' : 329
107
+ },
108
+ 'model' : 'sentence-transformers/all-distilroberta-v1'
109
+ },
110
+ {
111
+ 'details' : {
112
+ 'description' : 'Clustering or semantic search' ,
113
+ 'dimensions' : 384 ,
114
+ 'max_sequence' : 256 ,
115
+ 'size_mb' : 91
116
+ },
117
+ 'model' : 'sentence-transformers/all-MiniLM-L6-v2'
118
+ },
119
+ {
120
+ 'details' : {
121
+ 'description' : 'Clustering or semantic search' ,
122
+ 'dimensions' : 768 ,
123
+ 'max_sequence' : 384 ,
124
+ 'size_mb' : 438
125
+ },
126
+ 'model' : 'sentence-transformers/all-mpnet-base-v2'
127
+ },
128
+ {
129
+ 'details' : {
130
+ 'description' : 'Semantic search.' ,
131
+ 'dimensions' : 768 ,
132
+ 'max_sequence' : 512 ,
133
+ 'size_mb' : 219
134
+ },
135
+ 'model' : 'sentence-transformers/gtr-t5-base'
136
+ },
137
+ {
138
+ 'details' : {
139
+ 'description' : 'Semantic search.' ,
140
+ 'dimensions' : 768 ,
141
+ 'max_sequence' : 512 ,
142
+ 'size_mb' : 670
143
+ },
144
+ 'model' : 'sentence-transformers/gtr-t5-large'
145
+ },
146
+ {
147
+ 'details' : {
148
+ 'description' : 'Semantic search.' ,
149
+ 'dimensions' : 768 ,
150
+ 'max_sequence' : 512 ,
151
+ 'size_mb' : 2480
152
+ },
153
+ 'model' : 'sentence-transformers/gtr-t5-xl'
154
+ },
155
+ {
156
+ 'details' : {
157
+ 'description' : 'Clustering or semantic search' ,
158
+ 'dimensions' : 768 ,
159
+ 'max_sequence' : 512 ,
160
+ 'size_mb' : 265
161
+ },
162
+ 'model' : 'sentence-transformers/msmarco-distilbert-base-v4'
163
+ },
164
+ {
165
+ 'details' : {
166
+ 'description' : 'Semantic search.' ,
167
+ 'dimensions' : 768 ,
168
+ 'max_sequence' : 384 ,
169
+ 'size_mb' : 265
170
+ },
171
+ 'model' : 'sentence-transformers/msmarco-distilbert-cos-v5'
172
+ },
173
+ {
174
+ 'details' : {
175
+ 'description' : 'Clustering or semantic search' ,
176
+ 'dimensions' : 384 ,
177
+ 'max_sequence' : 512 ,
178
+ 'size_mb' : 91
179
+ },
180
+ 'model' : 'sentence-transformers/msmarco-MiniLM-L-6-v3'
181
+ },
182
+ {
183
+ 'details' : {
184
+ 'description' : 'Semantic search.' ,
185
+ 'dimensions' : 384 ,
186
+ 'max_sequence' : 384 ,
187
+ 'size_mb' : 91
188
+ },
189
+ 'model' : 'sentence-transformers/msmarco-MiniLM-L6-cos-v5'
190
+ },
191
+ {
192
+ 'details' : {
193
+ 'description' : 'Clustering or semantic search' ,
194
+ 'dimensions' : 768 ,
195
+ 'max_sequence' : 510 ,
196
+ 'size_mb' : 499
197
+ },
198
+ 'model' : 'sentence-transformers/msmarco-roberta-base-v3'
199
+ },
200
+ {
201
+ 'details' : {
202
+ 'description' : 'Semantic search.' ,
203
+ 'dimensions' : 768 ,
204
+ 'max_sequence' : 512 ,
205
+ 'size_mb' : 265
206
+ },
207
+ 'model' : 'sentence-transformers/multi-qa-distilbert-cos-v1'
208
+ },
209
+ {
210
+ 'details' : {
211
+ 'description' : 'Semantic search.' ,
212
+ 'dimensions' : 384 ,
213
+ 'max_sequence' : 512 ,
214
+ 'size_mb' : 91
215
+ },
216
+ 'model' : 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1'
217
+ },
218
+ {
219
+ 'details' : {
220
+ 'description' : 'Semantic search.' ,
221
+ 'dimensions' : 768 ,
222
+ 'max_sequence' : 512 ,
223
+ 'size_mb' : 438
224
+ },
225
+ 'model' : 'sentence-transformers/multi-qa-mpnet-base-cos-v1'
226
+ },
227
+ {
228
+ 'details' : {
229
+ 'description' : 'Sentence similarity' ,
230
+ 'dimensions' : 768 ,
231
+ 'max_sequence' : 256 ,
232
+ 'size_mb' : 219
233
+ },
234
+ 'model' : 'sentence-transformers/sentence-t5-base'
235
+ },
236
+ {
237
+ 'details' : {
238
+ 'description' : 'Sentence similarity' ,
239
+ 'dimensions' : 768 ,
240
+ 'max_sequence' : 256 ,
241
+ 'size_mb' : 670
242
+ },
243
+ 'model' : 'sentence-transformers/sentence-t5-large'
244
+ },
245
+ {
246
+ 'details' : {
247
+ 'description' : 'Sentence similarity' ,
248
+ 'dimensions' : 768 ,
249
+ 'max_sequence' : 256 ,
250
+ 'size_mb' : 2480
251
+ },
252
+ 'model' : 'sentence-transformers/sentence-t5-xl'
253
+ },
254
+ {
255
+ 'details' : {
256
+ 'description' : 'Well rounded' ,
257
+ 'dimensions' : 384 ,
258
+ 'max_sequence' : 512 ,
259
+ 'size_mb' : 67
260
+ },
261
+ 'model' : 'thenlper/gte-small'
262
+ },
263
+ {
264
+ 'details' : {
265
+ 'description' : 'Well rounded' ,
266
+ 'dimensions' : 768 ,
267
+ 'max_sequence' : 512 ,
268
+ 'size_mb' : 219
269
+ },
270
+ 'model' : 'thenlper/gte-base'
271
+ },
272
+ {
273
+ 'details' : {
274
+ 'description' : 'Well rounded' ,
275
+ 'dimensions' : 1024 ,
276
+ 'max_sequence' : 512 ,
277
+ 'size_mb' : 670
278
+ },
279
+ 'model' : 'thenlper/gte-large'
280
+ }
281
+ ]
282
+
283
+ DOCUMENT_LOADERS = {
284
+ ".pdf" : "PyMuPDFLoader" ,
285
+ ".docx" : "Docx2txtLoader" ,
286
+ ".txt" : "TextLoader" ,
287
+ ".enex" : "EverNoteLoader" ,
288
+ ".epub" : "UnstructuredEPubLoader" ,
289
+ ".eml" : "UnstructuredEmailLoader" ,
290
+ ".msg" : "UnstructuredEmailLoader" ,
291
+ ".csv" : "UnstructuredCSVLoader" ,
292
+ ".xls" : "UnstructuredExcelLoader" ,
293
+ ".xlsx" : "UnstructuredExcelLoader" ,
294
+ ".xlsm" : "UnstructuredExcelLoader" ,
295
+ ".rtf" : "UnstructuredRTFLoader" ,
296
+ ".odt" : "UnstructuredODTLoader" ,
297
+ ".md" : "UnstructuredMarkdownLoader" ,
298
+ }
299
+
300
+ # Define model names
301
+ WHISPER_MODEL_NAMES = ["tiny" , "tiny.en" , "base" , "base.en" , "small" , "small.en" , "medium" , "medium.en" , "large-v2" ]
302
+
303
+ CHUNKS_ONLY_TOOLTIP = "Only return relevant chunks without connecting to the LLM. Extremely useful to test the chunk size/overlap settings."
304
+
305
+ SPEAK_RESPONSE_TOOLTIP = "Only click this after the LLM's entire response is received otherwise your computer might explode."
306
+
307
+ DOWNLOAD_EMBEDDING_MODEL_TOOLTIP = "Remember, wait until downloading is complete!"
0 commit comments