You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
f"fast-langdetect: MD5 hash verification failed for {model_path}, "
126
+
f"please check the integrity of the downloaded file from {FASTTEXT_LARGE_MODEL_URL}. "
127
+
"\n This may seriously reduce the prediction accuracy. "
128
+
"If you want to ignore this, please set `fast_langdetect.ft_detect.infer.VERIFY_FASTTEXT_LARGE_MODEL = None` "
129
+
)
90
130
ifnotmodel_path.exists():
91
-
raiseDetectError(f"FastText model file not found at {model_path}")
131
+
ifdownload_url:
132
+
download_model(download_url, model_path, proxy)
133
+
ifnotmodel_path.exists():
134
+
raiseDetectError(f"FastText model file not found at {model_path}")
92
135
93
136
try:
94
137
# Load FastText model
95
138
returnfasttext.load_model(str(model_path))
96
139
exceptExceptionase:
97
-
logger.error(f"fast-langdetect:Failed to load FastText model from {model_path}: {e}")
140
+
logger.warning(f"fast-langdetect:Failed to load FastText model from {model_path}: {e}")
98
141
raiseDetectError(f"Failed to load FastText model: {e}")
99
142
100
143
@@ -131,7 +174,7 @@ def load_model(
131
174
_model_cache.cache_model(cache_key, model)
132
175
returnmodel
133
176
exceptExceptionase:
134
-
logger.error(f"fast-langdetect:Failed to load model ({'low'iflow_memoryelse'high'} memory): {e}")
177
+
logger.warning(f"fast-langdetect:Failed to load model ({'low'iflow_memoryelse'high'} memory): {e}")
135
178
ifuse_strict_mode:
136
179
raiseDetectError("Failed to load FastText model.") frome
137
180
elifnotlow_memory:
@@ -149,12 +192,15 @@ def detect(
149
192
) ->Dict[str, Union[str, float]]:
150
193
"""
151
194
Detect the language of a text using FastText.
152
-
This function assumes to be given a single line of text. We split words on whitespace (space, newline, tab, vertical tab) and the control characters carriage return, formfeed and the null character.
153
-
If the model is not supervised, this function will throw a ValueError.
195
+
196
+
- You MUST manually remove line breaks(`n`) from the text to be processed in advance, otherwise a ValueError is raised.
197
+
198
+
- In scenarios **where accuracy is important**, you should not rely on the detection results of small models, use `low_memory=False` to download larger models!
199
+
154
200
:param text: The text for language detection
155
-
:param low_memory: Whether to use a memory-efficient model
201
+
:param low_memory: Whether to use the compressed version of the model (https://fasttext.cc/docs/en/language-identification.html)
156
202
:param model_download_proxy: Download proxy for the model if needed
157
-
:param use_strict_mode: If it was enabled, strictly loads large model or raises error if it fails
203
+
:param use_strict_mode: When this parameter is enabled, the fallback after loading failure will be disabled.
158
204
:return: A dictionary with detected language and confidence score
159
205
:raises LanguageDetectionError: If detection fails
160
206
"""
@@ -176,14 +222,26 @@ def detect(
176
222
defdetect_multilingual(
177
223
text: str,
178
224
*,
179
-
low_memory: bool=True,
225
+
low_memory: bool=False,
180
226
model_download_proxy: Optional[str] =None,
181
227
k: int=5,
182
228
threshold: float=0.0,
183
229
use_strict_mode: bool=False,
184
230
) ->List[Dict[str, Any]]:
185
231
"""
186
232
Detect the top-k probable languages for a given text.
233
+
234
+
- You MUST manually remove line breaks(`n`) from the text to be processed in advance, otherwise a ValueError is raised.
235
+
236
+
- In scenarios **where accuracy is important**, you should not rely on the detection results of small models, use `low_memory=False` to download larger models!
237
+
238
+
:param text: The text for language detection
239
+
:param low_memory: Whether to use the compressed version of the model (https://fasttext.cc/docs/en/language-identification.html)
240
+
:param model_download_proxy: Download proxy for the model if needed
241
+
:param k: Number of top languages to return
242
+
:param threshold: Minimum confidence score to consider
243
+
:param use_strict_mode: When this parameter is enabled, the fallback after loading failure will be disabled.
244
+
:return: A list of dictionaries with detected languages and confidence scores
0 commit comments