Skip to content

Commit a5eaa11

Browse files
authored
v2.7.4
1 parent 862267b commit a5eaa11

File tree

1 file changed

+307
-0
lines changed

1 file changed

+307
-0
lines changed

src/constants.py

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
AVAILABLE_MODELS = [
2+
{
3+
'details': {
4+
'description': 'Well rounded & customizable.',
5+
'dimensions': 384,
6+
'max_sequence': 512,
7+
'size_mb': 134
8+
},
9+
'model': 'BAAI/bge-small-en-v1.5'
10+
},
11+
{
12+
'details': {
13+
'description': 'Well rounded & customizable.',
14+
'dimensions': 768,
15+
'max_sequence': 512,
16+
'size_mb': 438
17+
},
18+
'model': 'BAAI/bge-base-en-v1.5'
19+
},
20+
{
21+
'details': {
22+
'description': 'Well rounded & slight RAG improvement.',
23+
'dimensions': 768,
24+
'max_sequence': 512,
25+
'size_mb': 438
26+
},
27+
'model': 'BAAI/llm-embedder'
28+
},
29+
{
30+
'details': {
31+
'description': 'Well rounded & customizable.',
32+
'dimensions': 1024,
33+
'max_sequence': 512,
34+
'size_mb': 1340
35+
},
36+
'model': 'BAAI/bge-large-en-v1.5'
37+
},
38+
{
39+
'details': {
40+
'description': 'Well rounded & customizable.',
41+
'dimensions': 768,
42+
'max_sequence': 512,
43+
'size_mb': 439
44+
},
45+
'model': 'hkunlp/instructor-base'
46+
},
47+
{
48+
'details': {
49+
'description': 'Well rounded & customizable.',
50+
'dimensions': 1024,
51+
'max_sequence': 512,
52+
'size_mb': 1340
53+
},
54+
'model': 'hkunlp/instructor-large'
55+
},
56+
{
57+
'details': {
58+
'description': 'Well rounded & customizable.',
59+
'dimensions': 1024,
60+
'max_sequence': 512,
61+
'size_mb': 4960
62+
},
63+
'model': 'hkunlp/instructor-xl'
64+
},
65+
{
66+
'details': {
67+
'description': 'Well rounded',
68+
'dimensions': 312,
69+
'max_sequence': 512,
70+
'size_mb': 58
71+
},
72+
'model': 'jinaai/jina-embedding-t-en-v1'
73+
},
74+
{
75+
'details': {
76+
'description': 'Well rounded',
77+
'dimensions': 512,
78+
'max_sequence': 512,
79+
'size_mb': 141
80+
},
81+
'model': 'jinaai/jina-embedding-s-en-v1'
82+
},
83+
{
84+
'details': {
85+
'description': 'Well rounded',
86+
'dimensions': 768,
87+
'max_sequence': 512,
88+
'size_mb': 439
89+
},
90+
'model': 'jinaai/jina-embedding-b-en-v1'
91+
},
92+
{
93+
'details': {
94+
'description': 'Well rounded',
95+
'dimensions': 1024,
96+
'max_sequence': 512,
97+
'size_mb': 1340
98+
},
99+
'model': 'jinaai/jina-embedding-l-en-v1'
100+
},
101+
{
102+
'details': {
103+
'description': 'Clustering or semantic search',
104+
'dimensions': 768,
105+
'max_sequence': 512,
106+
'size_mb': 329
107+
},
108+
'model': 'sentence-transformers/all-distilroberta-v1'
109+
},
110+
{
111+
'details': {
112+
'description': 'Clustering or semantic search',
113+
'dimensions': 384,
114+
'max_sequence': 256,
115+
'size_mb': 91
116+
},
117+
'model': 'sentence-transformers/all-MiniLM-L6-v2'
118+
},
119+
{
120+
'details': {
121+
'description': 'Clustering or semantic search',
122+
'dimensions': 768,
123+
'max_sequence': 384,
124+
'size_mb': 438
125+
},
126+
'model': 'sentence-transformers/all-mpnet-base-v2'
127+
},
128+
{
129+
'details': {
130+
'description': 'Semantic search.',
131+
'dimensions': 768,
132+
'max_sequence': 512,
133+
'size_mb': 219
134+
},
135+
'model': 'sentence-transformers/gtr-t5-base'
136+
},
137+
{
138+
'details': {
139+
'description': 'Semantic search.',
140+
'dimensions': 768,
141+
'max_sequence': 512,
142+
'size_mb': 670
143+
},
144+
'model': 'sentence-transformers/gtr-t5-large'
145+
},
146+
{
147+
'details': {
148+
'description': 'Semantic search.',
149+
'dimensions': 768,
150+
'max_sequence': 512,
151+
'size_mb': 2480
152+
},
153+
'model': 'sentence-transformers/gtr-t5-xl'
154+
},
155+
{
156+
'details': {
157+
'description': 'Clustering or semantic search',
158+
'dimensions': 768,
159+
'max_sequence': 512,
160+
'size_mb': 265
161+
},
162+
'model': 'sentence-transformers/msmarco-distilbert-base-v4'
163+
},
164+
{
165+
'details': {
166+
'description': 'Semantic search.',
167+
'dimensions': 768,
168+
'max_sequence': 384,
169+
'size_mb': 265
170+
},
171+
'model': 'sentence-transformers/msmarco-distilbert-cos-v5'
172+
},
173+
{
174+
'details': {
175+
'description': 'Clustering or semantic search',
176+
'dimensions': 384,
177+
'max_sequence': 512,
178+
'size_mb': 91
179+
},
180+
'model': 'sentence-transformers/msmarco-MiniLM-L-6-v3'
181+
},
182+
{
183+
'details': {
184+
'description': 'Semantic search.',
185+
'dimensions': 384,
186+
'max_sequence': 384,
187+
'size_mb': 91
188+
},
189+
'model': 'sentence-transformers/msmarco-MiniLM-L6-cos-v5'
190+
},
191+
{
192+
'details': {
193+
'description': 'Clustering or semantic search',
194+
'dimensions': 768,
195+
'max_sequence': 510,
196+
'size_mb': 499
197+
},
198+
'model': 'sentence-transformers/msmarco-roberta-base-v3'
199+
},
200+
{
201+
'details': {
202+
'description': 'Semantic search.',
203+
'dimensions': 768,
204+
'max_sequence': 512,
205+
'size_mb': 265
206+
},
207+
'model': 'sentence-transformers/multi-qa-distilbert-cos-v1'
208+
},
209+
{
210+
'details': {
211+
'description': 'Semantic search.',
212+
'dimensions': 384,
213+
'max_sequence': 512,
214+
'size_mb': 91
215+
},
216+
'model': 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1'
217+
},
218+
{
219+
'details': {
220+
'description': 'Semantic search.',
221+
'dimensions': 768,
222+
'max_sequence': 512,
223+
'size_mb': 438
224+
},
225+
'model': 'sentence-transformers/multi-qa-mpnet-base-cos-v1'
226+
},
227+
{
228+
'details': {
229+
'description': 'Sentence similarity',
230+
'dimensions': 768,
231+
'max_sequence': 256,
232+
'size_mb': 219
233+
},
234+
'model': 'sentence-transformers/sentence-t5-base'
235+
},
236+
{
237+
'details': {
238+
'description': 'Sentence similarity',
239+
'dimensions': 768,
240+
'max_sequence': 256,
241+
'size_mb': 670
242+
},
243+
'model': 'sentence-transformers/sentence-t5-large'
244+
},
245+
{
246+
'details': {
247+
'description': 'Sentence similarity',
248+
'dimensions': 768,
249+
'max_sequence': 256,
250+
'size_mb': 2480
251+
},
252+
'model': 'sentence-transformers/sentence-t5-xl'
253+
},
254+
{
255+
'details': {
256+
'description': 'Well rounded',
257+
'dimensions': 384,
258+
'max_sequence': 512,
259+
'size_mb': 67
260+
},
261+
'model': 'thenlper/gte-small'
262+
},
263+
{
264+
'details': {
265+
'description': 'Well rounded',
266+
'dimensions': 768,
267+
'max_sequence': 512,
268+
'size_mb': 219
269+
},
270+
'model': 'thenlper/gte-base'
271+
},
272+
{
273+
'details': {
274+
'description': 'Well rounded',
275+
'dimensions': 1024,
276+
'max_sequence': 512,
277+
'size_mb': 670
278+
},
279+
'model': 'thenlper/gte-large'
280+
}
281+
]
282+
283+
DOCUMENT_LOADERS = {
284+
".pdf": "PyMuPDFLoader",
285+
".docx": "Docx2txtLoader",
286+
".txt": "TextLoader",
287+
".enex": "EverNoteLoader",
288+
".epub": "UnstructuredEPubLoader",
289+
".eml": "UnstructuredEmailLoader",
290+
".msg": "UnstructuredEmailLoader",
291+
".csv": "UnstructuredCSVLoader",
292+
".xls": "UnstructuredExcelLoader",
293+
".xlsx": "UnstructuredExcelLoader",
294+
".xlsm": "UnstructuredExcelLoader",
295+
".rtf": "UnstructuredRTFLoader",
296+
".odt": "UnstructuredODTLoader",
297+
".md": "UnstructuredMarkdownLoader",
298+
}
299+
300+
# Define model names
301+
WHISPER_MODEL_NAMES = ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v2"]
302+
303+
CHUNKS_ONLY_TOOLTIP = "Only return relevant chunks without connecting to the LLM. Extremely useful to test the chunk size/overlap settings."
304+
305+
SPEAK_RESPONSE_TOOLTIP = "Only click this after the LLM's entire response is received otherwise your computer might explode."
306+
307+
DOWNLOAD_EMBEDDING_MODEL_TOOLTIP = "Remember, wait until downloading is complete!"

0 commit comments

Comments
 (0)