29
29
from utilities import my_cprint , get_model_native_precision , get_appropriate_dtype , supports_flash_attention
30
30
from constants import VECTOR_MODELS
31
31
32
- logging .basicConfig (level = logging .CRITICAL , force = True )
32
+ logging .basicConfig (level = logging .INFO , force = True )
33
33
# logging.basicConfig(level=logging.DEBUG, force=True)
34
34
logger = logging .getLogger (__name__ )
35
35
@@ -51,6 +51,7 @@ def prepare_encode_kwargs(self):
51
51
def create (self ):
52
52
prepared_kwargs = self .prepare_kwargs ()
53
53
prepared_encode_kwargs = self .prepare_encode_kwargs ()
54
+
54
55
return HuggingFaceEmbeddings (
55
56
model_name = self .model_name ,
56
57
show_progress = not self .is_query ,
@@ -143,12 +144,42 @@ def prepare_kwargs(self):
143
144
return stella_kwargs
144
145
145
146
147
+ # class AlibabaEmbedding(BaseEmbeddingModel):
148
+ # def prepare_kwargs(self):
149
+ # ali_kwargs = deepcopy(self.model_kwargs)
150
+ # compute_device = ali_kwargs.get("device", "").lower()
151
+ # is_cuda = compute_device == "cuda"
152
+ # use_xformers = is_cuda and supports_flash_attention()
153
+ # ali_kwargs["tokenizer_kwargs"] = {
154
+ # "padding": "longest",
155
+ # "truncation": True,
156
+ # "max_length": 8192
157
+ # }
158
+ # ali_kwargs["config_kwargs"] = {
159
+ # "use_memory_efficient_attention": use_xformers,
160
+ # "unpad_inputs": use_xformers,
161
+ # "attn_implementation": "eager" if use_xformers else "sdpa"
162
+ # }
163
+ # return ali_kwargs
164
+
165
+ # def prepare_encode_kwargs(self):
166
+ # encode_kwargs = super().prepare_encode_kwargs()
167
+ # encode_kwargs.update({
168
+ # "padding": True,
169
+ # "truncation": True,
170
+ # "max_length": 8192
171
+ # })
172
+ # return encode_kwargs
173
+
174
+
146
175
class AlibabaEmbedding (BaseEmbeddingModel ):
147
176
def prepare_kwargs (self ):
148
177
ali_kwargs = deepcopy (self .model_kwargs )
178
+
149
179
compute_device = ali_kwargs .get ("device" , "" ).lower ()
150
180
is_cuda = compute_device == "cuda"
151
181
use_xformers = is_cuda and supports_flash_attention ()
182
+
152
183
ali_kwargs ["tokenizer_kwargs" ] = {
153
184
"padding" : "longest" ,
154
185
"truncation" : True ,
@@ -171,6 +202,7 @@ def prepare_encode_kwargs(self):
171
202
return encode_kwargs
172
203
173
204
205
+
174
206
def create_vector_db_in_process (database_name ):
175
207
create_vector_db = CreateVectorDB (database_name = database_name )
176
208
create_vector_db .run ()
0 commit comments