Fix bug #26 in diagnosis.py to avoid overflow in data integrity check.

Ganten-Hornby · Ganten-Hornby · commit 5bfcebaee615 · 2025-04-21T20:52:51.000+08:00
diff --git a/src/gsMap/__init__.py b/src/gsMap/__init__.py
@@ -2,4 +2,4 @@
 Genetics-informed pathogenic spatial mapping
 """
 
-__version__ = "1.73.1"
+__version__ = "1.73.2"
diff --git a/src/gsMap/diagnosis.py b/src/gsMap/diagnosis.py
@@ -51,8 +51,8 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
     mk_score = mk_score.loc[trait_ldsc_result.index]
 
     # Filter out genes with no variation
-    non_zero_std_cols = mk_score.columns[mk_score.std() > 0]
-    mk_score = mk_score.loc[:, non_zero_std_cols]
+    has_variation = (~mk_score.eq(mk_score.iloc[0], axis=1)).any()
+    mk_score = mk_score.loc[:, has_variation]
 
     logger.info("Calculating correlation between gene marker scores and trait logp-values...")
     corr = mk_score.corrwith(trait_ldsc_result["logp"])
@@ -69,10 +69,6 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
         }
     )
 
-    # Filter based on median GSS score
-    high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair[
-        high_GSS_Gene_annotation_pair["Median_GSS"] >= 1.0
-    ]
     high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair.merge(
         corr, left_on="Gene", right_index=True
     )
@@ -161,6 +157,20 @@ def generate_manhattan_plot(config: DiagnosisConfig):
         + gwas_data_to_plot["Annotation"].astype(str)
     )
 
+    # Verify data integrity
+    if gwas_data_with_gene_annotation_sort.empty:
+        logger.error("Filtered GWAS data is empty, cannot create Manhattan plot")
+        return
+
+    if len(gwas_data_to_plot) == 0:
+        logger.error("No SNPs passed filtering criteria for Manhattan plot")
+        return
+
+    # Log some diagnostic information
+    logger.info(f"Creating Manhattan plot with {len(gwas_data_to_plot)} SNPs")
+    logger.info(f"Columns available: {list(gwas_data_to_plot.columns)}")
+    logger.info(f"Chromosome column values: {gwas_data_to_plot['CHR'].unique()}")
+
     fig = ManhattanPlot(
         dataframe=gwas_data_to_plot,
         title="gsMap Diagnosis Manhattan Plot",
diff --git a/src/gsMap/utils/manhattan_plot.py b/src/gsMap/utils/manhattan_plot.py
@@ -308,13 +308,21 @@ def __init__(
         self.index = "INDEX"
         self.pos = "POSITION"
 
-        # Fixes the bug where one chromosome is missing by adding a sequential
-        # index column.
-        idx = 0
-        for i in self.data[chrm].unique():
-            idx = idx + 1
-            self.data.loc[self.data[chrm] == i, self.index] = int(idx)
-        # Set the type to be the same as provided for chrm column
+        self.data[self.index] = 0  # Initialize with zeros as default value
+
+        if not self.data.empty and len(self.data[chrm].unique()) > 0:
+            idx = 0
+            for i in self.data[chrm].unique():
+                idx = idx + 1
+                self.data.loc[self.data[chrm] == i, self.index] = int(idx)
+        else:
+            import logging
+
+            logger = logging.getLogger("gsMap.utils.manhattan_plot")
+            logger.warning(
+                "No chromosome data found or empty dataframe when creating Manhattan plot"
+            )
+
         self.data[self.index] = self.data[self.index].astype(self.data[chrm].dtype)
 
         # This section sets up positions and ticks. Ticks should be placed in