Skip to content

Commit 9094367

Browse files
committed
Enhance sccomp_proportional_fold_change function to support complex interaction categories in from/to parameters. Improved handling for both two-factor and three-factor interactions, ensuring correct new_data structure creation. Added comprehensive unit tests to validate functionality across various interaction scenarios and error handling for invalid inputs.
1 parent 1867901 commit 9094367

File tree

3 files changed

+338
-6
lines changed

3 files changed

+338
-6
lines changed

R/sccomp_proportional_fold_change.R

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,23 +66,44 @@ sccomp_proportional_fold_change <- function(.data, formula_composition, from, to
6666
#' @export
6767
#'
6868
#' @importFrom glue glue
69+
#' @importFrom stringr str_split
6970
#'
7071
sccomp_proportional_fold_change.sccomp_tbl = function(.data, formula_composition, from, to){
7172

72-
my_factor = parse_formula(formula_composition)
73+
my_factors = parse_formula(formula_composition)
7374

7475
# Get the sample column name from the original data
7576
.sample = attr(.data, ".sample")
7677

78+
# Handle interaction categories by parsing the from/to strings
79+
if (length(my_factors) > 1) {
80+
# For interactions, parse the category strings
81+
from_parts <- str_split(from, ":")[[1]]
82+
to_parts <- str_split(to, ":")[[1]]
83+
84+
# Create new_data with individual factor columns
85+
new_data <- tibble(
86+
!!quo_name(.sample) := c(to, from)
87+
)
88+
89+
# Add each factor column
90+
for (i in seq_along(my_factors)) {
91+
new_data <- new_data %>%
92+
mutate(!!my_factors[i] := c(to_parts[i], from_parts[i]))
93+
}
94+
} else {
95+
# For single factor, use the original approach
96+
new_data <- tibble(
97+
!!quo_name(.sample) := c(to, from),
98+
!!my_factors := c(to, from)
99+
)
100+
}
101+
77102
# Predict the composition for the specified conditions
78103
.data |>
79104
sccomp_predict(
80105
formula_composition = formula_composition,
81-
new_data =
82-
tibble(
83-
!!quo_name(.sample) := c(to, from),
84-
!!my_factor := c(to, from)
85-
)
106+
new_data = new_data
86107
) |>
87108

88109
# Nest the predicted data by cell group

inst/NEWS.rd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
\section{News in version 2.1.18, Bioconductor 3.22 Release}{
55
\itemize{
66
\item {Fixed issue with sccomp_proportional_fold_change function for interaction models.} The function now properly handles complex models with interactions by correctly creating the new_data structure that sccomp_predict expects. This resolves GitHub issue #193 where the function would fail with the error "your new_data might be malformed" when working with interaction models. The fix ensures that the sample column name is dynamically retrieved from the model attributes rather than hardcoded, making the function more robust and flexible.
7+
\item {Enhanced interaction category support in sccomp_proportional_fold_change.} The function now properly handles interaction categories in the from/to parameters, supporting both two-factor interactions (e.g., "treatment:followup") and three-factor interactions (e.g., "treatment:followup:B"). The function automatically parses interaction strings and creates the correct new_data structure with individual factor columns.
8+
\item {Added comprehensive unit tests for sccomp_proportional_fold_change.} A new test file (test-sccomp_proportional_fold_change.R) has been added with 20 different test scenarios covering simple models, interaction models, error handling, and edge cases. The tests progress from simple single-factor models to complex three-factor interaction models, ensuring the function works correctly across all use cases. New tests specifically cover interaction categories in from/to parameters.
79
}}
810

911
\section{News in version 2.1.14, Bioconductor 3.22 Release}{

tests/testthat/test-sccomp_proportional_fold_change.R

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,4 +670,313 @@ test_that("sccomp_proportional_fold_change handles identical conditions", {
670670
expect_s3_class(result, "tbl_df")
671671
expect_equal(nrow(result), 3)
672672
expect_true(is.numeric(result$proportion_fold_change))
673+
})
674+
675+
# Test 16: Interaction in from/to categories - two-factor interaction
676+
test_that("sccomp_proportional_fold_change works with interaction categories in from/to", {
677+
skip_cmdstan()
678+
679+
# Create test data with two factors that will have interactions
680+
test_data <- data.frame(
681+
sample = rep(paste0("s", 1:8), each = 3),
682+
cell_group = rep(c("cell1", "cell2", "cell3"), times = 8),
683+
treatment = rep(c("control", "treatment"), each = 12),
684+
timepoint = rep(c("baseline", "followup"), each = 6, times = 2),
685+
count = as.integer(c(
686+
# control-baseline
687+
10, 20, 15,
688+
12, 22, 17,
689+
# control-followup
690+
15, 25, 20,
691+
18, 28, 23,
692+
# treatment-baseline
693+
20, 30, 25,
694+
22, 32, 27,
695+
# treatment-followup
696+
30, 40, 35,
697+
35, 45, 40
698+
))
699+
)
700+
701+
# Fit model with interaction
702+
estimate <- sccomp_estimate(
703+
test_data,
704+
formula_composition = ~ treatment * timepoint,
705+
formula_variability = ~ 1,
706+
sample = "sample",
707+
cell_group = "cell_group",
708+
abundance = "count",
709+
cores = 1,
710+
verbose = FALSE
711+
)
712+
713+
# Test proportional fold change with interaction categories
714+
result <- sccomp_proportional_fold_change(
715+
estimate,
716+
formula_composition = ~ treatment * timepoint,
717+
from = "control:baseline",
718+
to = "treatment:followup"
719+
)
720+
721+
# Basic expectations
722+
expect_s3_class(result, "tbl_df")
723+
expect_equal(nrow(result), 3)
724+
expect_true(is.numeric(result$proportion_fold_change))
725+
expect_true(all(result$proportion_fold_change > 0)) # Should be positive for treatment effect
726+
expect_true(is.character(result$statement))
727+
expect_true(is.numeric(result$average_uncertainty))
728+
})
729+
730+
# Test 17: Interaction in from/to categories - three-factor interaction
731+
test_that("sccomp_proportional_fold_change works with three-factor interaction categories", {
732+
skip_cmdstan()
733+
734+
# Create test data with three factors
735+
test_data <- data.frame(
736+
sample = rep(paste0("s", 1:16), each = 3),
737+
cell_group = rep(c("cell1", "cell2", "cell3"), times = 16),
738+
treatment = rep(c("control", "treatment"), each = 24),
739+
timepoint = rep(c("baseline", "followup"), each = 12, times = 2),
740+
cohort = rep(c("A", "B"), each = 6, times = 4),
741+
count = as.integer(c(
742+
# control-baseline-A
743+
10, 20, 15,
744+
12, 22, 17,
745+
# control-baseline-B
746+
11, 21, 16,
747+
13, 23, 18,
748+
# control-followup-A
749+
15, 25, 20,
750+
18, 28, 23,
751+
# control-followup-B
752+
16, 26, 21,
753+
19, 29, 24,
754+
# treatment-baseline-A
755+
20, 30, 25,
756+
22, 32, 27,
757+
# treatment-baseline-B
758+
21, 31, 26,
759+
23, 33, 28,
760+
# treatment-followup-A
761+
30, 40, 35,
762+
35, 45, 40,
763+
# treatment-followup-B
764+
31, 41, 36,
765+
36, 46, 41
766+
))
767+
)
768+
769+
# Fit model with three-factor interaction
770+
estimate <- sccomp_estimate(
771+
test_data,
772+
formula_composition = ~ treatment * timepoint * cohort,
773+
formula_variability = ~ 1,
774+
sample = "sample",
775+
cell_group = "cell_group",
776+
abundance = "count",
777+
cores = 1,
778+
verbose = FALSE
779+
)
780+
781+
# Test proportional fold change with three-factor interaction categories
782+
result <- sccomp_proportional_fold_change(
783+
estimate,
784+
formula_composition = ~ treatment * timepoint * cohort,
785+
from = "control:baseline:A",
786+
to = "treatment:followup:B"
787+
)
788+
789+
# Basic expectations
790+
expect_s3_class(result, "tbl_df")
791+
expect_equal(nrow(result), 3)
792+
expect_true(is.numeric(result$proportion_fold_change))
793+
expect_true(all(result$proportion_fold_change > 0)) # Should be positive for treatment effect
794+
expect_true(is.character(result$statement))
795+
expect_true(is.numeric(result$average_uncertainty))
796+
})
797+
798+
# Test 18: Interaction categories with different factor orders
799+
test_that("sccomp_proportional_fold_change works with different factor orders in interaction", {
800+
skip_cmdstan()
801+
802+
# Create test data
803+
test_data <- data.frame(
804+
sample = rep(paste0("s", 1:8), each = 3),
805+
cell_group = rep(c("cell1", "cell2", "cell3"), times = 8),
806+
treatment = rep(c("control", "treatment"), each = 12),
807+
timepoint = rep(c("baseline", "followup"), each = 6, times = 2),
808+
count = as.integer(c(
809+
# control-baseline
810+
10, 20, 15,
811+
12, 22, 17,
812+
# control-followup
813+
15, 25, 20,
814+
18, 28, 23,
815+
# treatment-baseline
816+
20, 30, 25,
817+
22, 32, 27,
818+
# treatment-followup
819+
30, 40, 35,
820+
35, 45, 40
821+
))
822+
)
823+
824+
# Fit model with interaction
825+
estimate <- sccomp_estimate(
826+
test_data,
827+
formula_composition = ~ treatment * timepoint,
828+
formula_variability = ~ 1,
829+
sample = "sample",
830+
cell_group = "cell_group",
831+
abundance = "count",
832+
cores = 1,
833+
verbose = FALSE
834+
)
835+
836+
# Test with different factor orders in the interaction
837+
result1 <- sccomp_proportional_fold_change(
838+
estimate,
839+
formula_composition = ~ treatment * timepoint,
840+
from = "control:baseline",
841+
to = "treatment:followup"
842+
)
843+
844+
result2 <- sccomp_proportional_fold_change(
845+
estimate,
846+
formula_composition = ~ treatment * timepoint,
847+
from = "baseline:control",
848+
to = "followup:treatment"
849+
)
850+
851+
# Both should work and produce similar results
852+
expect_s3_class(result1, "tbl_df")
853+
expect_s3_class(result2, "tbl_df")
854+
expect_equal(nrow(result1), 3)
855+
expect_equal(nrow(result2), 3)
856+
expect_true(is.numeric(result1$proportion_fold_change))
857+
expect_true(is.numeric(result2$proportion_fold_change))
858+
})
859+
860+
# Test 19: Complex interaction with nested factors
861+
test_that("sccomp_proportional_fold_change works with complex nested interactions", {
862+
skip_cmdstan()
863+
864+
# Create test data with nested structure
865+
test_data <- data.frame(
866+
sample = rep(paste0("s", 1:12), each = 3),
867+
cell_group = rep(c("cell1", "cell2", "cell3"), times = 12),
868+
treatment = rep(c("control", "treatment"), each = 18),
869+
timepoint = rep(c("baseline", "followup"), each = 9, times = 2),
870+
batch = rep(c("batch1", "batch2"), each = 3, times = 6),
871+
count = as.integer(c(
872+
# control-baseline-batch1
873+
10, 20, 15,
874+
12, 22, 17,
875+
11, 21, 16,
876+
# control-baseline-batch2
877+
13, 23, 18,
878+
14, 24, 19,
879+
15, 25, 20,
880+
# control-followup-batch1
881+
18, 28, 23,
882+
20, 30, 25,
883+
19, 29, 24,
884+
# control-followup-batch2
885+
21, 31, 26,
886+
22, 32, 27,
887+
23, 33, 28,
888+
# treatment-baseline-batch1
889+
25, 35, 30,
890+
27, 37, 32,
891+
26, 36, 31,
892+
# treatment-baseline-batch2
893+
28, 38, 33,
894+
29, 39, 34,
895+
30, 40, 35,
896+
# treatment-followup-batch1
897+
35, 45, 40,
898+
37, 47, 42,
899+
36, 46, 41,
900+
# treatment-followup-batch2
901+
38, 48, 43,
902+
39, 49, 44,
903+
40, 50, 45
904+
))
905+
)
906+
907+
# Fit model with complex interaction
908+
estimate <- sccomp_estimate(
909+
test_data,
910+
formula_composition = ~ treatment * timepoint * batch,
911+
formula_variability = ~ 1,
912+
sample = "sample",
913+
cell_group = "cell_group",
914+
abundance = "count",
915+
cores = 1,
916+
verbose = FALSE
917+
)
918+
919+
# Test with complex interaction categories
920+
result <- sccomp_proportional_fold_change(
921+
estimate,
922+
formula_composition = ~ treatment * timepoint * batch,
923+
from = "control:baseline:batch1",
924+
to = "treatment:followup:batch2"
925+
)
926+
927+
# Basic expectations
928+
expect_s3_class(result, "tbl_df")
929+
expect_equal(nrow(result), 3)
930+
expect_true(is.numeric(result$proportion_fold_change))
931+
expect_true(all(result$proportion_fold_change > 0)) # Should be positive for treatment effect
932+
expect_true(is.character(result$statement))
933+
expect_true(is.numeric(result$average_uncertainty))
934+
})
935+
936+
# Test 20: Error handling for invalid interaction categories
937+
test_that("sccomp_proportional_fold_change handles invalid interaction categories gracefully", {
938+
skip_cmdstan()
939+
940+
# Create test data
941+
test_data <- data.frame(
942+
sample = rep(paste0("s", 1:4), each = 3),
943+
cell_group = rep(c("cell1", "cell2", "cell3"), times = 4),
944+
treatment = rep(c("control", "treatment"), each = 6),
945+
timepoint = rep(c("baseline", "followup"), each = 3, times = 2),
946+
count = as.integer(c(10, 20, 15, 15, 25, 20, 12, 22, 17, 18, 28, 23))
947+
)
948+
949+
# Fit model with interaction
950+
estimate <- sccomp_estimate(
951+
test_data,
952+
formula_composition = ~ treatment * timepoint,
953+
formula_variability = ~ 1,
954+
sample = "sample",
955+
cell_group = "cell_group",
956+
abundance = "count",
957+
cores = 1,
958+
verbose = FALSE
959+
)
960+
961+
# Test with invalid interaction category
962+
expect_error(
963+
sccomp_proportional_fold_change(
964+
estimate,
965+
formula_composition = ~ treatment * timepoint,
966+
from = "invalid:category",
967+
to = "treatment:followup"
968+
),
969+
regexp = "Error in.*sccomp_predict"
970+
)
971+
972+
# Test with missing factor in interaction
973+
expect_error(
974+
sccomp_proportional_fold_change(
975+
estimate,
976+
formula_composition = ~ treatment * timepoint,
977+
from = "control",
978+
to = "treatment:followup"
979+
),
980+
regexp = "Error in.*sccomp_predict"
981+
)
673982
})

0 commit comments

Comments
 (0)