@@ -77,8 +77,9 @@ def _checkTaxonomy(tid: Union[int, str]):
77
77
"""Check if a taxonomy ID is present in the taxonomy database"""
78
78
79
79
if not len (taxParents ):
80
- logger .fatal ("Taxonomy not loaded. \" loadTaxonomy()\" must be called first." )
81
- _die ("Taxonomy not loaded. \" loadTaxonomy()\" must be called first." )
80
+ logger .info ("Taxonomy not loaded. Call \" loadTaxonomy()\" first to avoid this message..." )
81
+ logger .info ("Loading taxonomy..." )
82
+ loadTaxonomy ()
82
83
83
84
if tid :
84
85
# tid must be in string type
@@ -103,7 +104,6 @@ def _taxid2fullLink(tid: Union[int, str]) -> dict:
103
104
104
105
Returns:
105
106
dict: A dictionary containing the full lineage of the target taxon.
106
-
107
107
"""
108
108
tid = _checkTaxonomy (tid )
109
109
if tid == "unknown" : return {}
@@ -371,7 +371,14 @@ def taxid2parent(tid: Union[int, str], norank: bool=False) -> str:
371
371
372
372
return tid
373
373
374
- def name2taxid (name , rank = None , superkingdom = None , fuzzy = True , cutoff = 0.7 , max_matches = 3 , reset = False , expand = True ) -> list :
374
+ def name2taxid (name : str ,
375
+ rank : str = None ,
376
+ superkingdom : str = None ,
377
+ fuzzy : bool = True ,
378
+ cutoff : float = 0.7 ,
379
+ max_matches : int = 3 ,
380
+ reset : bool = False ,
381
+ expand : bool = True ) -> list :
375
382
"""
376
383
Get the taxonomic ID of a given taxonomic name.
377
384
@@ -400,22 +407,35 @@ def name2taxid(name, rank=None, superkingdom=None, fuzzy=True, cutoff=0.7, max_m
400
407
names_dmp_file = taxonomy_dir + "/names.dmp"
401
408
402
409
if df_names is None and expand and os .path .isfile ( names_dmp_file ):
410
+ logging .debug (f"Loading { names_dmp_file } " )
403
411
df_names = pd .read_csv (names_dmp_file ,
404
- sep = '\t | \t ' ,
405
- engine = 'python' ,
406
- header = None ,
407
- names = ['taxid' , 'name' , 'annot' , 'type' ],
408
- usecols = [ 'taxid' , ' name'],
409
- index_col = 'name' )
412
+ sep = '\t ' ,
413
+ header = None ,
414
+ names = [ 'taxid' , 'sep1' , 'name' , 'sep2' , 'annot' , 'sep3' , 'type' , 'sep4' ] ,
415
+ usecols = ['taxid' ,'name' ],
416
+ index_col = ' name')
417
+ logging . debug ( f"names.dmp loaded" )
410
418
411
419
if not name in nameTid or reset :
412
420
matched_taxid = []
421
+ df_temp = None
422
+ logging .debug (f"Searching { name } ..." )
413
423
414
424
if expand :
415
- import difflib
416
- matches = difflib .get_close_matches (name , df_names .index , max_matches , cutoff )
417
- logger .debug (f'{ name } : { matches } ' )
418
- df_temp = df_names .loc [matches ,:]
425
+ if fuzzy == True :
426
+ import difflib
427
+ matches = difflib .get_close_matches (name , df_names .index , max_matches , cutoff )
428
+ logger .debug (f'{ name } : { matches } ' )
429
+ df_temp = df_names .loc [matches ,:]
430
+ else :
431
+ if name in df_names .index :
432
+ df_temp = df_names .loc [[name ],:]
433
+ else :
434
+ df_temp = df_names .head (0 )
435
+
436
+ if len (df_temp )== 0 :
437
+ nameTid [name ] = []
438
+ return nameTid [name ]
419
439
420
440
if rank :
421
441
df_temp ['rank' ] = df_temp .taxid .apply (taxid2rank )
@@ -429,8 +449,7 @@ def name2taxid(name, rank=None, superkingdom=None, fuzzy=True, cutoff=0.7, max_m
429
449
430
450
nameTid [name ] = df_temp .head (max_matches ).taxid .to_list ()
431
451
return nameTid [name ]
432
-
433
- else : # searching scientific names only
452
+ else : # The 'expand' flag is False, we will search scientific names only
434
453
for taxid in taxNames :
435
454
if fuzzy == True :
436
455
if not name in taxNames [taxid ]:
@@ -453,7 +472,10 @@ def name2taxid(name, rank=None, superkingdom=None, fuzzy=True, cutoff=0.7, max_m
453
472
nameTid [name ] = matched_taxid
454
473
return nameTid [name ][:max_matches ]
455
474
else :
456
- return nameTid [name ][:max_matches ]
475
+ if len (nameTid [name ]):
476
+ return nameTid [name ][:max_matches ]
477
+ else :
478
+ return []
457
479
458
480
def taxid2nameOnRank (tid : Union [int , str ], target_rank = None ) -> str :
459
481
"""
0 commit comments