Skip to content

Commit 0d9028c

Browse files
chasemcSidduppal
andauthored
Fix NA issues after Pandas update (#361)
* 🐛 fixes recursive_dbscan/pandas NA * Fix hdbscan binning * Update version --------- Co-authored-by: Siddharth Uppal <suppal3@wisc.edu>
1 parent 695c09c commit 0d9028c

File tree

4 files changed

+16
-4
lines changed

4 files changed

+16
-4
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.2.2
1+
2.2.3

autometa/binning/recursive_dbscan.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,11 @@ def recursive_dbscan(
186186
coverage_stddev_cutoff=coverage_stddev_cutoff,
187187
gc_content_stddev_cutoff=gc_content_stddev_cutoff,
188188
)
189-
median_completeness = filtered_df.completeness.median()
189+
if filtered_df.empty:
190+
median_completeness = float("-inf")
191+
else:
192+
median_completeness = filtered_df.completeness.median()
193+
190194
if median_completeness >= best_median:
191195
best_median = median_completeness
192196
best_df = df
@@ -379,7 +383,11 @@ def recursive_hdbscan(
379383
coverage_stddev_cutoff=coverage_stddev_cutoff,
380384
gc_content_stddev_cutoff=gc_content_stddev_cutoff,
381385
)
382-
median_completeness = filtered_df.completeness.median()
386+
if filtered_df.empty:
387+
median_completeness = float("-inf")
388+
else:
389+
median_completeness = filtered_df.completeness.median()
390+
383391
if median_completeness >= best_median:
384392
best_median = median_completeness
385393
best_df = df

autometa/binning/utilities.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131

3232
import pandas as pd
3333

34+
import numpy as np
35+
3436
from typing import Iterable, Tuple
3537

3638
from autometa.taxonomy.database import TaxonomyDatabase
@@ -183,7 +185,9 @@ def add_metrics(
183185
# redundant_marker_count = cluster_marker_counts.gt(1).sum(axis=1)
184186
# calculate completeness and purity and std. dev. metrics
185187
completeness = present_marker_count / reference_markers_count * 100
188+
completeness = completeness.where(~np.isnan(completeness),pd.NA)
186189
purity = single_copy_marker_count / present_marker_count * 100
190+
purity = purity.where(~np.isnan(purity), pd.NA)
187191
coverage_stddev = main_grouped_by_cluster.coverage.std()
188192
gc_content_stddev = main_grouped_by_cluster.gc_content.std()
189193
# merge metrics with given dataframe

nextflow.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ manifest {
1212
doi = "https://doi.org/10.1093/nar/gkz148"
1313
mainScript = "main.nf"
1414
nextflowVersion = ">=21.04.0"
15-
version = "2.2.2"
15+
version = "2.2.3"
1616
}
1717

1818

0 commit comments

Comments
 (0)