@@ -140,6 +140,7 @@ def _taxid2lineage(tid: Union[int, str],
140
140
info = _autoVivification ()
141
141
level = {abbr : '' for abbr in abbr_to_major_level }
142
142
143
+ tid_orig = tid
143
144
rank = taxid2rank (tid )
144
145
orig_rank = rank
145
146
name = taxid2name (tid )
@@ -161,7 +162,7 @@ def _taxid2lineage(tid: Union[int, str],
161
162
rank = _getTaxRank (tid )
162
163
name = _getTaxName (tid )
163
164
164
- if name == 'root' : break
165
+ if name == 'root' : break
165
166
166
167
# try to get the closest "no_rank" taxa to "type" representing subtype/group (mainly for virus)
167
168
if guess_type == True :
@@ -175,30 +176,34 @@ def _taxid2lineage(tid: Union[int, str],
175
176
ranks = list (abbr_to_major_level .keys ())
176
177
ranks .reverse ()
177
178
idx = 0
178
-
179
+
179
180
# input taxid is a major rank
180
181
if orig_rank in major_level_to_abbr :
181
182
idx = ranks .index ( major_level_to_abbr [orig_rank ] )
182
183
# if not, find the next major rank
183
184
else :
184
- nmtid = taxid2nearestMajorTaxid ( tid )
185
+ nmtid = taxid2nearestMajorTaxid ( tid_orig )
185
186
nmrank = taxid2rank ( nmtid )
186
187
if nmrank == "root" :
187
188
idx = 7
188
189
else :
189
190
idx = ranks .index ( major_level_to_abbr [nmrank ] )
190
191
191
192
for lvl in ranks [idx :]:
192
- if all_major_rank == False :
193
- if not level [ lvl ]: continue
193
+ if not all_major_rank :
194
+ break
194
195
196
+ logging .debug (f'{ lvl } : { level } ' )
195
197
if not level [lvl ]:
196
- level [lvl ] = "%s - no_%s_rank" % (last ,lvl )
197
- info [abbr_to_major_level [lvl ]]["name" ] = "%s - no_%s_rank" % (last ,lvl )
198
- info [abbr_to_major_level [lvl ]]["taxid" ] = 0
198
+ level [lvl ] = f'{ last } - no_{ lvl } _rank'
199
+ info [abbr_to_major_level [lvl ]]['name' ] = f'{ last } - no_{ lvl } _rank'
200
+ info [abbr_to_major_level [lvl ]]['taxid' ] = 0
201
+ logging .debug (f'no { lvl } - { level [lvl ]} ' )
199
202
200
203
last = level [lvl ]
201
204
205
+ logging .debug (f'{ info } ' )
206
+
202
207
if print_strain == True :
203
208
if orig_rank == "strain" :
204
209
info ["strain" ]["name" ] = str_name
@@ -640,7 +645,7 @@ def taxid2nearestMajorTaxid(tid: Union[int, str]) -> str:
640
645
def taxid2lineage (tid : Union [int , str ], all_major_rank = True , print_strain = False , space2underscore = False , sep = "|" ) -> str :
641
646
"""
642
647
Returns the taxonomic lineage for a given taxonomic identifier (tid) as a formatted string.
643
-
648
+
644
649
Parameters:
645
650
tid (Union[int, str]): A taxonomic identifier, which can be either an integer or a string.
646
651
all_major_rank (bool): If True, all major taxonomic ranks will be included in the lineage; if False, only the lowest common ancestor will be included. Default is True.
@@ -650,9 +655,8 @@ def taxid2lineage(tid: Union[int, str], all_major_rank=True, print_strain=False,
650
655
651
656
Returns:
652
657
str: A formatted string containing the taxonomic lineage information, with each rank separated by the specified separator (default is "|").
653
-
654
658
"""
655
-
659
+
656
660
lineage = _taxid2lineage ( tid , all_major_rank , print_strain , space2underscore )
657
661
texts = []
658
662
for rank in major_level_to_abbr :
@@ -755,7 +759,7 @@ def acc2taxid_raw(acc: str, accession2taxid_file: Optional[str] = None) -> str:
755
759
756
760
return accTid [acc ]
757
761
758
- def acc2taxid (acc : str , type : Optional [str ]= 'nucl' ) -> str :
762
+ def acc2taxid (acc : str , type : Optional [str ] = 'nucl' , mapping_file : Optional [ str ] = None ) -> str :
759
763
"""
760
764
Get the taxonomy ID for a given accession.
761
765
@@ -789,6 +793,11 @@ def acc2taxid(acc: str, type: Optional[str]='nucl') -> str:
789
793
f'{ taxonomy_dir } /accession2taxid/pdb.accession2taxid'
790
794
]
791
795
796
+ if mapping_file :
797
+ acc2taxid_files = [mapping_file ]
798
+
799
+ logger .debug ( f"type: { type } ; acc2taxid_files: { acc2taxid_files } " )
800
+
792
801
# check if accession2taxid files exist
793
802
avail_acc2taxid_files = []
794
803
0 commit comments