Skip to content

Commit 9606d62

Browse files
committed
update case studies
1 parent 320be6f commit 9606d62

File tree

2 files changed

+108
-50
lines changed

2 files changed

+108
-50
lines changed

inst/case_study/case_study_of_CEBPB.R

Lines changed: 75 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ library(gplots)
44
# CEBPB motifs in TFregulomeR
55
CEBPB_record <- dataBrowser(tf = "CEBPB")
66

7-
# (Meth)Motif logo for all CEBPB in TFregulomeR compendium for Figure S1
7+
# (Meth)Motif logo for all CEBPB in TFregulomeR compendium for Supplementary Figure 1
88
for (id in CEBPB_record$ID){
99
motif_matrix <- searchMotif(id = id)
1010
plotLogo(MM_object = motif_matrix)
@@ -31,18 +31,18 @@ for (i in seq(1,16,1)){
3131
}
3232
K562_CEBPB_all_peaks$sum <- apply(K562_CEBPB_all_peaks[,CEBPB_record$cell_tissue_name],1,sum)
3333

34-
# Peak numbers, MethMotif logs and read enrichments for 16 K562 CEBPB sub-ensembles for Figure S2 and Figure 1A
34+
# Peak numbers, MethMotif logs and read enrichments for 16 K562 CEBPB sub-ensembles for Supplementary Figure 2 and Figure 1A
3535
sub_ensemble_peak_num <- c()
36-
sub_ensemble_read_score <- as.data.frame(matrix(nrow = nrow(K562_CEBPB_all_peaks),
36+
sub_ensemble_read_score <- as.data.frame(matrix(nrow = nrow(K562_CEBPB_all_peaks),
3737
ncol = 2))
3838
colnames(sub_ensemble_read_score) <- c("cell_type_num", "read_fold_change")
3939
sub_ensemble_read_score$cell_type_num <- K562_CEBPB_all_peaks$sum
4040
for (i in seq(1,16,1)){
4141
peak_subset_i <- K562_CEBPB_all_peaks[which(K562_CEBPB_all_peaks$sum==i),]
4242
sub_ensemble_peak_num <- c(sub_ensemble_peak_num, nrow(peak_subset_i))
43-
sub_ensemble_read_score[which(sub_ensemble_read_score$cell_type_num==i),'read_fold_change'] <-
43+
sub_ensemble_read_score[which(sub_ensemble_read_score$cell_type_num==i),'read_fold_change'] <-
4444
peak_subset_i$tag_fold_change
45-
45+
4646
# MethMotif logo
4747
common_peak_i <- commonPeaks(user_target_peak_list = list(peak_subset_i[,1:5]),
4848
user_target_peak_id = "MM1_HSA_K562_CEBPB",
@@ -56,7 +56,7 @@ for (i in seq(1,16,1)){
5656
sub_ensemble_read_score$cell_type_num <- factor(sub_ensemble_read_score$cell_type_num,
5757
levels = seq(1,16,1))
5858
pdf("read_enrichment_scores_across_16_K562_CEBPB_sub-ensembles.pdf")
59-
boxplot(read_fold_change~cell_type_num,sub_ensemble_read_score,
59+
boxplot(read_fold_change~cell_type_num,sub_ensemble_read_score,
6060
xlab = "number of cell types sharing the peaks",
6161
ylab = "Read enrichment score", outline=FALSE)
6262
dev.off()
@@ -84,19 +84,19 @@ cofactor_16_subsets_matrix <- cofactor_16_subsets_res$intersection_matrix
8484
cofactor_16_subsets_matrix_t <- as.data.frame(t(cofactor_16_subsets_matrix))
8585
# filter out cofactor whose binding percents are less than 5 in all sub-ensembles
8686
cofactor_16_subsets_matrix_filtered <- cofactor_16_subsets_matrix_t[!(cofactor_16_subsets_matrix_t$user_peak_x1<=5 &
87-
cofactor_16_subsets_matrix_t$user_peak_x2<=5 &
88-
cofactor_16_subsets_matrix_t$user_peak_x3<=5 &
87+
cofactor_16_subsets_matrix_t$user_peak_x2<=5 &
88+
cofactor_16_subsets_matrix_t$user_peak_x3<=5 &
8989
cofactor_16_subsets_matrix_t$user_peak_x4<=5 &
90-
cofactor_16_subsets_matrix_t$user_peak_x5<=5 &
91-
cofactor_16_subsets_matrix_t$user_peak_x6<=5 &
92-
cofactor_16_subsets_matrix_t$user_peak_x7<=5 &
93-
cofactor_16_subsets_matrix_t$user_peak_x8<=5 &
90+
cofactor_16_subsets_matrix_t$user_peak_x5<=5 &
91+
cofactor_16_subsets_matrix_t$user_peak_x6<=5 &
92+
cofactor_16_subsets_matrix_t$user_peak_x7<=5 &
93+
cofactor_16_subsets_matrix_t$user_peak_x8<=5 &
9494
cofactor_16_subsets_matrix_t$user_peak_x9<=5 &
95-
cofactor_16_subsets_matrix_t$user_peak_x10<=5 &
96-
cofactor_16_subsets_matrix_t$user_peak_x11<=5 &
97-
cofactor_16_subsets_matrix_t$user_peak_x12<=5 &
98-
cofactor_16_subsets_matrix_t$user_peak_x13<=5 &
99-
cofactor_16_subsets_matrix_t$user_peak_x14<=5 &
95+
cofactor_16_subsets_matrix_t$user_peak_x10<=5 &
96+
cofactor_16_subsets_matrix_t$user_peak_x11<=5 &
97+
cofactor_16_subsets_matrix_t$user_peak_x12<=5 &
98+
cofactor_16_subsets_matrix_t$user_peak_x13<=5 &
99+
cofactor_16_subsets_matrix_t$user_peak_x14<=5 &
100100
cofactor_16_subsets_matrix_t$user_peak_x15<=5 &
101101
cofactor_16_subsets_matrix_t$user_peak_x16<=5),]
102102
color <- colorRampPalette(c("white","#D46A6A", "#801515", "#550000"))
@@ -133,50 +133,50 @@ for (i in seq(1,16,1)){
133133
tag_density_value = "median",
134134
return_methylation_profile = TRUE,
135135
angle_of_methylation_profile = "x")
136-
136+
137137
meth_matrix_i <- cofactor_in_subsets_res_i$methylation_profile_matrix
138138
CEBPB_CEBPD_meth_i <- meth_matrix_i["MM1_HSA_K562_CEBPB","MM1_HSA_K562_CEBPD"][[1]]
139139
CEBPB_CEBPD_meth_value_i <- sum(CEBPB_CEBPD_meth_i[9:10])*100/sum(CEBPB_CEBPD_meth_i)
140140
CEBPB_CEBPD_meth_value <- c(CEBPB_CEBPD_meth_value, CEBPB_CEBPD_meth_value_i)
141-
141+
142142
CEBPB_ATF4_meth_i <- meth_matrix_i["MM1_HSA_K562_CEBPB","MM1_HSA_K562_ATF4"][[1]]
143143
CEBPB_ATF4_meth_value_i <- sum(CEBPB_ATF4_meth_i[9:10])*100/sum(CEBPB_ATF4_meth_i)
144144
CEBPB_ATF4_meth_value <- c(CEBPB_ATF4_meth_value, CEBPB_ATF4_meth_value_i)
145-
145+
146146
tag_density_median_i <- cofactor_in_subsets_res_i$tag_density_matrix
147-
CEBPB_CEBPD_tag_median <- c(CEBPB_CEBPD_tag_median,
147+
CEBPB_CEBPD_tag_median <- c(CEBPB_CEBPD_tag_median,
148148
tag_density_median_i["MM1_HSA_K562_CEBPB",
149149
"MM1_HSA_K562_CEBPD"])
150-
CEBPB_ATF4_tag_median <- c(CEBPB_ATF4_tag_median,
150+
CEBPB_ATF4_tag_median <- c(CEBPB_ATF4_tag_median,
151151
tag_density_median_i["MM1_HSA_K562_CEBPB",
152152
"MM1_HSA_K562_ATF4"])
153-
153+
154154
cofactor_tag_q1_i <- intersectPeakMatrixResult(intersectPeakMatrix = cofactor_in_subsets_i,
155155
return_tag_density = TRUE,
156156
angle_of_tag_density = "x",
157157
tag_density_value = "quartile_25")
158158
tag_density_q1_i <- cofactor_tag_q1_i$tag_density_matrix
159-
CEBPB_CEBPD_tag_q1 <- c(CEBPB_CEBPD_tag_q1,
159+
CEBPB_CEBPD_tag_q1 <- c(CEBPB_CEBPD_tag_q1,
160160
tag_density_q1_i["MM1_HSA_K562_CEBPB",
161161
"MM1_HSA_K562_CEBPD"])
162-
CEBPB_ATF4_tag_q1 <- c(CEBPB_ATF4_tag_q1,
162+
CEBPB_ATF4_tag_q1 <- c(CEBPB_ATF4_tag_q1,
163163
tag_density_q1_i["MM1_HSA_K562_CEBPB",
164164
"MM1_HSA_K562_ATF4"])
165165
cofactor_tag_q3_i <- intersectPeakMatrixResult(intersectPeakMatrix = cofactor_in_subsets_i,
166166
return_tag_density = TRUE,
167167
tag_density_value = "quartile_75",
168168
angle_of_tag_density = "x")
169169
tag_density_q3_i <- cofactor_tag_q3_i$tag_density_matrix
170-
CEBPB_CEBPD_tag_q3 <- c(CEBPB_CEBPD_tag_q3,
170+
CEBPB_CEBPD_tag_q3 <- c(CEBPB_CEBPD_tag_q3,
171171
tag_density_q3_i["MM1_HSA_K562_CEBPB",
172172
"MM1_HSA_K562_CEBPD"])
173-
CEBPB_ATF4_tag_q3 <- c(CEBPB_ATF4_tag_q3,
173+
CEBPB_ATF4_tag_q3 <- c(CEBPB_ATF4_tag_q3,
174174
tag_density_q3_i["MM1_HSA_K562_CEBPB",
175175
"MM1_HSA_K562_ATF4"])
176176
}
177177
pdf("mCG_percentage_of_CEBPB-CEPBD_and_CEBPB-ATF4_in_16_subsets.pdf")
178178
plot(x = seq(1,16,1),
179-
y = CEBPB_CEBPD_meth_value,
179+
y = CEBPB_CEBPD_meth_value,
180180
type="l", ylim = c(0,30), xlim = c(0,17), ylab = "5mC percentage (%)",
181181
xlab = "number of shared cell types", col="blue")
182182
lines(x = seq(1,16,1),
@@ -185,7 +185,7 @@ dev.off()
185185

186186
pdf("read_enrichments_of_CEBPB-CEPBD_and_CEBPB-ATF4_in_16_subsets.pdf")
187187
plot(x = seq(1,16,1),
188-
y = CEBPB_CEBPD_tag_median,
188+
y = CEBPB_CEBPD_tag_median,
189189
type="l", ylim = c(0,100), xlim = c(0,17), ylab = "tag density median",
190190
xlab = "number of shared cell types", col="blue")
191191
points(x = seq(1,16,1),
@@ -216,13 +216,13 @@ K562_exclusivePeak_output <- exclusivePeaks(target_peak_id = "MM1_HSA_K562_CEBPB
216216
motif_only_for_target_peak = TRUE,
217217
excluded_peak_id = CEBPB_record_ID_noK562,
218218
motif_only_for_excluded_peak = TRUE)
219-
K562_exclusivePeak_result <- exclusivePeakResult(exclusivePeaks = K562_exclusivePeak_output,
219+
K562_exclusivePeak_result <- exclusivePeakResult(exclusivePeaks = K562_exclusivePeak_output,
220220
return_exclusive_peak_sites = TRUE)
221221
K562_exclusivePeak_peak <- K562_exclusivePeak_result$exclusive_peak_list$MM1_HSA_K562_CEBPB_exclusive_peaks
222222

223223
K562_exclusivePeak_with_ATF4_output <- commonPeaks(user_target_peak_list = list(K562_exclusivePeak_peak),
224224
user_target_peak_id = "MM1_HSA_K562_CEBPB",
225-
compared_peak_id = "MM1_HSA_K562_ATF4",
225+
compared_peak_id = "MM1_HSA_K562_ATF4",
226226
motif_only_for_compared_peak = TRUE)
227227
K562_exclusivePeak_with_ATF4_res <- commonPeakResult(commonPeaks = K562_exclusivePeak_with_ATF4_output,
228228
return_common_peak_sites = TRUE,
@@ -232,7 +232,7 @@ K562_exclusivePeak_without_ATF4_peaks <- K562_exclusivePeak_peak[!(K562_exclusiv
232232

233233
K562_exclusivePeak_without_ATF4_output <- commonPeaks(user_target_peak_list = list(K562_exclusivePeak_without_ATF4_peaks),
234234
user_target_peak_id = "MM1_HSA_K562_CEBPB",
235-
compared_peak_id = "MM1_HSA_K562_CEBPB",
235+
compared_peak_id = "MM1_HSA_K562_CEBPB",
236236
motif_only_for_compared_peak = TRUE)
237237
commonPeakResult(commonPeaks = K562_exclusivePeak_without_ATF4_output,
238238
save_MethMotif_logo = TRUE)
@@ -258,51 +258,51 @@ K562_commonPeak_without_CEBPB_peaks <- K562_commonPeak_peak[!(K562_commonPeak_pe
258258

259259
K562_commonPeak_without_ATF4_output <- commonPeaks(user_target_peak_list = list(K562_commonPeak_without_CEBPB_peaks),
260260
user_target_peak_id = "MM1_HSA_K562_CEBPB",
261-
compared_peak_id = "MM1_HSA_K562_CEBPB",
261+
compared_peak_id = "MM1_HSA_K562_CEBPB",
262262
motif_only_for_compared_peak = TRUE)
263263
commonPeakResult(commonPeaks = K562_commonPeak_without_ATF4_output,
264264
save_MethMotif_logo = TRUE)
265265

266266

267-
# motif in shared and exclusive CEBPB targets in all other 15 cell types (Figure S3)
267+
# motif in shared and exclusive CEBPB targets in all other 15 cell types (Supplementary Figure 3)
268268
for (i in CEBPB_record$ID){
269269
common_i <- commonPeaks(target_peak_id = i,
270-
motif_only_for_target_peak = TRUE,
270+
motif_only_for_target_peak = TRUE,
271271
compared_peak_id = CEBPB_record$ID,
272272
motif_only_for_compared_peak = TRUE)
273273
common_i_res <- commonPeakResult(commonPeaks = common_i,
274274
save_MethMotif_logo = TRUE)
275-
275+
276276
# exclude ID i from all CEBPB IDs
277277
cebpd_id_no_i <- CEBPB_record$ID[!(CEBPB_record$ID %in% i)]
278278
exclusive_i <- exclusivePeaks(target_peak_id = i,
279279
motif_only_for_target_peak = TRUE,
280-
excluded_peak_id = cebpd_id_no_i,
280+
excluded_peak_id = cebpd_id_no_i,
281281
motif_only_for_excluded_peak = TRUE)
282-
exclusive_i_res <- exclusivePeakResult(exclusivePeaks = exclusive_i,
282+
exclusive_i_res <- exclusivePeakResult(exclusivePeaks = exclusive_i,
283283
save_MethMotif_logo = TRUE)
284284
}
285285

286286

287-
# functions of CEBPB/CEBPD and CEBPB/ATF4 targets in K562 (Figure S4)
287+
# functions of CEBPB/CEBPD and CEBPB/ATF4 targets in K562 (Supplementary Figure 4)
288288
# load required package for GREAT annotation
289289
library(rGREAT)
290290
# load required package for genomic conversion from hg38 to hg19
291291
library(liftOver)
292292
# all CEBPB-CEBPD co-binding regions in K562
293293
K562_CEBPB_CEBPD <- commonPeaks(target_peak_id = "MM1_HSA_K562_CEBPB",
294294
motif_only_for_target_peak = TRUE,
295-
compared_peak_id = "MM1_HSA_K562_CEBPD",
295+
compared_peak_id = "MM1_HSA_K562_CEBPD",
296296
motif_only_for_compared_peak = TRUE)
297297
K562_CEBPB_CEBPD_res <- commonPeakResult(commonPeaks = K562_CEBPB_CEBPD,
298-
return_common_peak_sites = TRUE,
299-
save_MethMotif_logo = TRUE,
298+
return_common_peak_sites = TRUE,
299+
save_MethMotif_logo = TRUE,
300300
return_summary = TRUE)
301301
K562_CEBPB_CEBPD_res$peak_summary
302302
#> percentage_in_original_inputs(%)
303303
#> MM1_HSA_K562_CEBPB_common_peaks 6.532727
304304
K562_CEBPB_CEBPD_peak <- K562_CEBPB_CEBPD_res$common_peak_list$MM1_HSA_K562_CEBPB_common_peaks
305-
K562_CEBPB_CEBPD_great <- greatAnnotate(peaks = K562_CEBPB_CEBPD_peak,
305+
K562_CEBPB_CEBPD_great <- greatAnnotate(peaks = K562_CEBPB_CEBPD_peak,
306306
return_annotation = TRUE)
307307
K562_CEBPB_CEBPD_great_bp <- K562_CEBPB_CEBPD_great[which(K562_CEBPB_CEBPD_great$category=="BP"),]
308308

@@ -311,9 +311,9 @@ K562_CEBPB_ATF4 <- commonPeaks(target_peak_id = "MM1_HSA_K562_CEBPB",
311311
motif_only_for_target_peak = TRUE,
312312
compared_peak_id = "MM1_HSA_K562_ATF4",
313313
motif_only_for_compared_peak = TRUE)
314-
K562_CEBPB_ATF4_res <- commonPeakResult(commonPeaks = K562_CEBPB_ATF4,
315-
return_common_peak_sites = TRUE,
316-
save_MethMotif_logo = TRUE,
314+
K562_CEBPB_ATF4_res <- commonPeakResult(commonPeaks = K562_CEBPB_ATF4,
315+
return_common_peak_sites = TRUE,
316+
save_MethMotif_logo = TRUE,
317317
return_summary = TRUE)
318318
K562_CEBPB_ATF4_res$peak_summary
319319
#> percentage_in_original_inputs(%)
@@ -323,3 +323,32 @@ K562_CEBPB_ATF4_great <- greatAnnotate(peaks = K562_CEBPB_ATF4_peak, return_anno
323323
K562_CEBPB_ATF4_great_bp <- K562_CEBPB_ATF4_great[which(K562_CEBPB_ATF4_great$category=="BP"),]
324324

325325

326+
327+
# identify CEBPB motif in K562 ATF4 peaks(Supplementary Figure 5A)
328+
# 1) plot ATF4 overal motif logo
329+
ATF4_motif <- searchMotif(id="MM1_HSA_K562_ATF4")
330+
plotLogo(MM_object = ATF4_motif)
331+
# 2) get ATF4 peaks co-bound by CEBPB
332+
ATF4_with_CEBPB <- commonPeaks(target_peak_id = "MM1_HSA_K562_ATF4",
333+
motif_only_for_target_peak = TRUE,
334+
compared_peak_id = "MM1_HSA_K562_CEBPB",
335+
motif_only_for_compared_peak = TRUE)
336+
ATF4_with_CEBPB_res <- commonPeakResult(commonPeaks = ATF4_with_CEBPB,
337+
return_common_peak_sites = TRUE,
338+
save_MethMotif_logo = TRUE)
339+
ATF4_peaks_with_CEBPB = ATF4_with_CEBPB_res$common_peak_list$MM1_HSA_K562_ATF4_common_peaks
340+
341+
K562_TFBS <- dataBrowser(cell_tissue_name = "K562")
342+
# 3) get all K562 PWM IDs except CEBPB and ATF4
343+
K562_TFBS_no_CEBPB_ATF4 <- K562_TFBS$ID[(K562_TFBS$ID != "MM1_HSA_K562_ATF4" &
344+
K562_TFBS$ID != "MM1_HSA_K562_CEBPB")]
345+
# 4) filter out the peaks also co-bound by other TFs in the ATF4-CEBPB co-binding peaks obtained at step 2.
346+
ATF4_peaks_with_CEBPB_no_other <- exclusivePeaks(user_target_peak_list = list(ATF4_peaks_with_CEBPB),
347+
user_target_peak_id = "MM1_HSA_K562_ATF4",
348+
excluded_peak_id = K562_TFBS_no_CEBPB_ATF4,
349+
motif_only_for_excluded_peak = TRUE)
350+
351+
ATF4_peaks_with_CEBPB_no_other_res <- exclusivePeakResult(exclusivePeaks = ATF4_peaks_with_CEBPB_no_other,
352+
save_MethMotif_logo = TRUE)
353+
354+

0 commit comments

Comments
 (0)