Skip to content

Commit c430e03

Browse files
committed
Merge branch 'main' of github.com:poeli/taxonomy
2 parents 1b3a919 + da063da commit c430e03

File tree

2 files changed

+23
-14
lines changed

2 files changed

+23
-14
lines changed

src/detaxa/__main__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def acc2taxid(accession, mapping, debug):
134134
datefmt='%Y-%m-%d %H:%M',
135135
)
136136

137-
print(t.acc2taxid(accession, mapping))
137+
print(t.acc2taxid(accession, mapping_file=mapping))
138138

139139
@cli.command()
140140
@click.option('-d', '--database',
@@ -189,4 +189,4 @@ def update(database, accnucl, accwgs, accprot, accpdb, accdead, debug):
189189

190190

191191
if __name__ == '__main__':
192-
cli()
192+
cli()

src/detaxa/taxonomy.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ def _taxid2lineage(tid: Union[int, str],
140140
info = _autoVivification()
141141
level = {abbr: '' for abbr in abbr_to_major_level}
142142

143+
tid_orig = tid
143144
rank = taxid2rank(tid)
144145
orig_rank = rank
145146
name = taxid2name(tid)
@@ -161,7 +162,7 @@ def _taxid2lineage(tid: Union[int, str],
161162
rank = _getTaxRank(tid)
162163
name = _getTaxName(tid)
163164

164-
if name == 'root': break
165+
if name == 'root' : break
165166

166167
# try to get the closest "no_rank" taxa to "type" representing subtype/group (mainly for virus)
167168
if guess_type==True:
@@ -175,30 +176,34 @@ def _taxid2lineage(tid: Union[int, str],
175176
ranks = list(abbr_to_major_level.keys())
176177
ranks.reverse()
177178
idx = 0
178-
179+
179180
# input taxid is a major rank
180181
if orig_rank in major_level_to_abbr:
181182
idx = ranks.index( major_level_to_abbr[orig_rank] )
182183
# if not, find the next major rank
183184
else:
184-
nmtid = taxid2nearestMajorTaxid( tid )
185+
nmtid = taxid2nearestMajorTaxid( tid_orig )
185186
nmrank = taxid2rank( nmtid )
186187
if nmrank == "root":
187188
idx = 7
188189
else:
189190
idx = ranks.index( major_level_to_abbr[nmrank] )
190191

191192
for lvl in ranks[idx:]:
192-
if all_major_rank == False:
193-
if not level[lvl]: continue
193+
if not all_major_rank:
194+
break
194195

196+
logging.debug(f'{lvl}: {level}')
195197
if not level[lvl]:
196-
level[lvl] = "%s - no_%s_rank"%(last,lvl)
197-
info[abbr_to_major_level[lvl]]["name"] = "%s - no_%s_rank"%(last,lvl)
198-
info[abbr_to_major_level[lvl]]["taxid"] = 0
198+
level[lvl] = f'{last} - no_{lvl}_rank'
199+
info[abbr_to_major_level[lvl]]['name'] = f'{last} - no_{lvl}_rank'
200+
info[abbr_to_major_level[lvl]]['taxid'] = 0
201+
logging.debug(f'no {lvl} - {level[lvl]}')
199202

200203
last=level[lvl]
201204

205+
logging.debug(f'{info}')
206+
202207
if print_strain==True:
203208
if orig_rank == "strain":
204209
info["strain"]["name"] = str_name
@@ -640,7 +645,7 @@ def taxid2nearestMajorTaxid(tid: Union[int, str]) -> str:
640645
def taxid2lineage(tid: Union[int, str], all_major_rank=True, print_strain=False, space2underscore=False, sep="|") -> str:
641646
"""
642647
Returns the taxonomic lineage for a given taxonomic identifier (tid) as a formatted string.
643-
648+
644649
Parameters:
645650
tid (Union[int, str]): A taxonomic identifier, which can be either an integer or a string.
646651
all_major_rank (bool): If True, all major taxonomic ranks will be included in the lineage; if False, only the lowest common ancestor will be included. Default is True.
@@ -650,9 +655,8 @@ def taxid2lineage(tid: Union[int, str], all_major_rank=True, print_strain=False,
650655
651656
Returns:
652657
str: A formatted string containing the taxonomic lineage information, with each rank separated by the specified separator (default is "|").
653-
654658
"""
655-
659+
656660
lineage = _taxid2lineage( tid, all_major_rank, print_strain, space2underscore)
657661
texts = []
658662
for rank in major_level_to_abbr:
@@ -755,7 +759,7 @@ def acc2taxid_raw(acc: str, accession2taxid_file: Optional[str] = None) -> str:
755759

756760
return accTid[acc]
757761

758-
def acc2taxid(acc: str, type: Optional[str]='nucl') -> str:
762+
def acc2taxid(acc: str, type: Optional[str] = 'nucl', mapping_file: Optional[str] = None) -> str:
759763
"""
760764
Get the taxonomy ID for a given accession.
761765
@@ -789,6 +793,11 @@ def acc2taxid(acc: str, type: Optional[str]='nucl') -> str:
789793
f'{taxonomy_dir}/accession2taxid/pdb.accession2taxid'
790794
]
791795

796+
if mapping_file:
797+
acc2taxid_files = [mapping_file]
798+
799+
logger.debug( f"type: {type}; acc2taxid_files: {acc2taxid_files}" )
800+
792801
# check if accession2taxid files exist
793802
avail_acc2taxid_files = []
794803

0 commit comments

Comments
 (0)