6
6
import hashlib
7
7
import logging
8
8
import os
9
- import tempfile
10
9
import platform
11
10
import re
12
11
import shutil
12
+ import tempfile
13
13
from pathlib import Path
14
14
from typing import Dict , List , Optional , Union , Any
15
15
@@ -143,29 +143,29 @@ def _load_windows_compatible(self, model_path: Path) -> Any:
143
143
:raises DetectError: If all loading strategies fail
144
144
"""
145
145
model_path_str = str (model_path .resolve ())
146
-
146
+
147
147
# Try to load model directly
148
148
try :
149
149
return fasttext .load_model (model_path_str )
150
150
except Exception as e :
151
151
logger .debug (f"fast-langdetect: Load model failed: { e } " )
152
-
152
+
153
153
# Try to load model using relative path
154
154
try :
155
155
cwd = Path .cwd ()
156
156
rel_path = os .path .relpath (model_path , cwd )
157
157
return fasttext .load_model (rel_path )
158
158
except Exception as e :
159
159
logger .debug (f"fast-langdetect: Failed to load model using relative path: { e } " )
160
-
160
+
161
161
# Use temporary file as last resort
162
162
logger .debug (f"fast-langdetect: Using temporary file to load model: { model_path } " )
163
163
tmp_path = None
164
164
try :
165
165
# Use NamedTemporaryFile to create a temporary file
166
166
tmp_fd , tmp_path = tempfile .mkstemp (suffix = '.bin' )
167
167
os .close (tmp_fd ) # Close file descriptor
168
-
168
+
169
169
# Copy model file to temporary location
170
170
shutil .copy2 (model_path , tmp_path )
171
171
return fasttext .load_model (tmp_path )
@@ -207,14 +207,14 @@ class LangDetectConfig:
207
207
"""
208
208
209
209
def __init__ (
210
- self ,
211
- cache_dir : Optional [str ] = None ,
212
- custom_model_path : Optional [str ] = None ,
213
- proxy : Optional [str ] = None ,
214
- allow_fallback : bool = True ,
215
- disable_verify : bool = False ,
216
- verify_hash : Optional [str ] = None ,
217
- normalize_input : bool = False ,
210
+ self ,
211
+ cache_dir : Optional [str ] = None ,
212
+ custom_model_path : Optional [str ] = None ,
213
+ proxy : Optional [str ] = None ,
214
+ allow_fallback : bool = True ,
215
+ disable_verify : bool = False ,
216
+ verify_hash : Optional [str ] = None ,
217
+ normalize_input : bool = False ,
218
218
):
219
219
self .cache_dir = cache_dir or CACHE_DIRECTORY
220
220
self .custom_model_path = custom_model_path
@@ -276,7 +276,7 @@ def _get_model(self, low_memory: bool = True) -> Any:
276
276
raise DetectError ("Failed to load model" ) from e
277
277
278
278
def detect (
279
- self , text : str , low_memory : bool = True
279
+ self , text : str , low_memory : bool = True
280
280
) -> Dict [str , Union [str , float ]]:
281
281
"""
282
282
Detect primary language of text.
@@ -302,11 +302,11 @@ def detect(
302
302
raise DetectError ("Language detection failed" ) from e
303
303
304
304
def detect_multilingual (
305
- self ,
306
- text : str ,
307
- low_memory : bool = False ,
308
- k : int = 5 ,
309
- threshold : float = 0.0 ,
305
+ self ,
306
+ text : str ,
307
+ low_memory : bool = False ,
308
+ k : int = 5 ,
309
+ threshold : float = 0.0 ,
310
310
) -> List [Dict [str , Any ]]:
311
311
"""
312
312
Detect multiple possible languages in text.
@@ -355,24 +355,24 @@ def _normalize_text(text: str, should_normalize: bool = False) -> str:
355
355
"""
356
356
if not should_normalize :
357
357
return text
358
-
358
+
359
359
# Check if text is all uppercase (or mostly uppercase)
360
360
if text .isupper () or (
361
- len (re .findall (r'[A-Z]' , text )) > 0.8 * len (re .findall (r'[A-Za-z]' , text ))
362
- and len (text ) > 5
361
+ len (re .findall (r'[A-Z]' , text )) > 0.8 * len (re .findall (r'[A-Za-z]' , text ))
362
+ and len (text ) > 5
363
363
):
364
364
return text .lower ()
365
-
365
+
366
366
return text
367
367
368
368
369
369
def detect (
370
- text : str ,
371
- * ,
372
- low_memory : bool = True ,
373
- model_download_proxy : Optional [str ] = None ,
374
- use_strict_mode : bool = False ,
375
- normalize_input : bool = True ,
370
+ text : str ,
371
+ * ,
372
+ low_memory : bool = True ,
373
+ model_download_proxy : Optional [str ] = None ,
374
+ use_strict_mode : bool = False ,
375
+ normalize_input : bool = True ,
376
376
) -> Dict [str , Union [str , float ]]:
377
377
"""
378
378
Simple interface for language detection.
@@ -396,7 +396,7 @@ def detect(
396
396
)
397
397
if model_download_proxy or use_strict_mode or normalize_input :
398
398
config = LangDetectConfig (
399
- proxy = model_download_proxy ,
399
+ proxy = model_download_proxy ,
400
400
allow_fallback = not use_strict_mode ,
401
401
normalize_input = normalize_input
402
402
)
@@ -406,14 +406,14 @@ def detect(
406
406
407
407
408
408
def detect_multilingual (
409
- text : str ,
410
- * ,
411
- low_memory : bool = False ,
412
- model_download_proxy : Optional [str ] = None ,
413
- k : int = 5 ,
414
- threshold : float = 0.0 ,
415
- use_strict_mode : bool = False ,
416
- normalize_input : bool = True ,
409
+ text : str ,
410
+ * ,
411
+ low_memory : bool = False ,
412
+ model_download_proxy : Optional [str ] = None ,
413
+ k : int = 5 ,
414
+ threshold : float = 0.0 ,
415
+ use_strict_mode : bool = False ,
416
+ normalize_input : bool = True ,
417
417
) -> List [Dict [str , Any ]]:
418
418
"""
419
419
Simple interface for multi-language detection.
@@ -439,7 +439,7 @@ def detect_multilingual(
439
439
)
440
440
if model_download_proxy or use_strict_mode or normalize_input :
441
441
config = LangDetectConfig (
442
- proxy = model_download_proxy ,
442
+ proxy = model_download_proxy ,
443
443
allow_fallback = not use_strict_mode ,
444
444
normalize_input = normalize_input
445
445
)
0 commit comments