Skip to content

Commit 5bfceba

Browse files
committed
Fix bug #26 in diagnosis.py to avoid overflow in data integrity check.
1 parent ef63762 commit 5bfceba

File tree

3 files changed

+32
-14
lines changed

3 files changed

+32
-14
lines changed

src/gsMap/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
Genetics-informed pathogenic spatial mapping
33
"""
44

5-
__version__ = "1.73.1"
5+
__version__ = "1.73.2"

src/gsMap/diagnosis.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
5151
mk_score = mk_score.loc[trait_ldsc_result.index]
5252

5353
# Filter out genes with no variation
54-
non_zero_std_cols = mk_score.columns[mk_score.std() > 0]
55-
mk_score = mk_score.loc[:, non_zero_std_cols]
54+
has_variation = (~mk_score.eq(mk_score.iloc[0], axis=1)).any()
55+
mk_score = mk_score.loc[:, has_variation]
5656

5757
logger.info("Calculating correlation between gene marker scores and trait logp-values...")
5858
corr = mk_score.corrwith(trait_ldsc_result["logp"])
@@ -69,10 +69,6 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
6969
}
7070
)
7171

72-
# Filter based on median GSS score
73-
high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair[
74-
high_GSS_Gene_annotation_pair["Median_GSS"] >= 1.0
75-
]
7672
high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair.merge(
7773
corr, left_on="Gene", right_index=True
7874
)
@@ -161,6 +157,20 @@ def generate_manhattan_plot(config: DiagnosisConfig):
161157
+ gwas_data_to_plot["Annotation"].astype(str)
162158
)
163159

160+
# Verify data integrity
161+
if gwas_data_with_gene_annotation_sort.empty:
162+
logger.error("Filtered GWAS data is empty, cannot create Manhattan plot")
163+
return
164+
165+
if len(gwas_data_to_plot) == 0:
166+
logger.error("No SNPs passed filtering criteria for Manhattan plot")
167+
return
168+
169+
# Log some diagnostic information
170+
logger.info(f"Creating Manhattan plot with {len(gwas_data_to_plot)} SNPs")
171+
logger.info(f"Columns available: {list(gwas_data_to_plot.columns)}")
172+
logger.info(f"Chromosome column values: {gwas_data_to_plot['CHR'].unique()}")
173+
164174
fig = ManhattanPlot(
165175
dataframe=gwas_data_to_plot,
166176
title="gsMap Diagnosis Manhattan Plot",

src/gsMap/utils/manhattan_plot.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -308,13 +308,21 @@ def __init__(
308308
self.index = "INDEX"
309309
self.pos = "POSITION"
310310

311-
# Fixes the bug where one chromosome is missing by adding a sequential
312-
# index column.
313-
idx = 0
314-
for i in self.data[chrm].unique():
315-
idx = idx + 1
316-
self.data.loc[self.data[chrm] == i, self.index] = int(idx)
317-
# Set the type to be the same as provided for chrm column
311+
self.data[self.index] = 0 # Initialize with zeros as default value
312+
313+
if not self.data.empty and len(self.data[chrm].unique()) > 0:
314+
idx = 0
315+
for i in self.data[chrm].unique():
316+
idx = idx + 1
317+
self.data.loc[self.data[chrm] == i, self.index] = int(idx)
318+
else:
319+
import logging
320+
321+
logger = logging.getLogger("gsMap.utils.manhattan_plot")
322+
logger.warning(
323+
"No chromosome data found or empty dataframe when creating Manhattan plot"
324+
)
325+
318326
self.data[self.index] = self.data[self.index].astype(self.data[chrm].dtype)
319327

320328
# This section sets up positions and ticks. Ticks should be placed in

0 commit comments

Comments
 (0)