@@ -4,7 +4,7 @@ library(gplots)
4
4
# CEBPB motifs in TFregulomeR
5
5
CEBPB_record <- dataBrowser(tf = " CEBPB" )
6
6
7
- # (Meth)Motif logo for all CEBPB in TFregulomeR compendium for Figure S1
7
+ # (Meth)Motif logo for all CEBPB in TFregulomeR compendium for Supplementary Figure 1
8
8
for (id in CEBPB_record $ ID ){
9
9
motif_matrix <- searchMotif(id = id )
10
10
plotLogo(MM_object = motif_matrix )
@@ -31,18 +31,18 @@ for (i in seq(1,16,1)){
31
31
}
32
32
K562_CEBPB_all_peaks $ sum <- apply(K562_CEBPB_all_peaks [,CEBPB_record $ cell_tissue_name ],1 ,sum )
33
33
34
- # Peak numbers, MethMotif logs and read enrichments for 16 K562 CEBPB sub-ensembles for Figure S2 and Figure 1A
34
+ # Peak numbers, MethMotif logs and read enrichments for 16 K562 CEBPB sub-ensembles for Supplementary Figure 2 and Figure 1A
35
35
sub_ensemble_peak_num <- c()
36
- sub_ensemble_read_score <- as.data.frame(matrix (nrow = nrow(K562_CEBPB_all_peaks ),
36
+ sub_ensemble_read_score <- as.data.frame(matrix (nrow = nrow(K562_CEBPB_all_peaks ),
37
37
ncol = 2 ))
38
38
colnames(sub_ensemble_read_score ) <- c(" cell_type_num" , " read_fold_change" )
39
39
sub_ensemble_read_score $ cell_type_num <- K562_CEBPB_all_peaks $ sum
40
40
for (i in seq(1 ,16 ,1 )){
41
41
peak_subset_i <- K562_CEBPB_all_peaks [which(K562_CEBPB_all_peaks $ sum == i ),]
42
42
sub_ensemble_peak_num <- c(sub_ensemble_peak_num , nrow(peak_subset_i ))
43
- sub_ensemble_read_score [which(sub_ensemble_read_score $ cell_type_num == i ),' read_fold_change' ] <-
43
+ sub_ensemble_read_score [which(sub_ensemble_read_score $ cell_type_num == i ),' read_fold_change' ] <-
44
44
peak_subset_i $ tag_fold_change
45
-
45
+
46
46
# MethMotif logo
47
47
common_peak_i <- commonPeaks(user_target_peak_list = list (peak_subset_i [,1 : 5 ]),
48
48
user_target_peak_id = " MM1_HSA_K562_CEBPB" ,
@@ -56,7 +56,7 @@ for (i in seq(1,16,1)){
56
56
sub_ensemble_read_score $ cell_type_num <- factor (sub_ensemble_read_score $ cell_type_num ,
57
57
levels = seq(1 ,16 ,1 ))
58
58
pdf(" read_enrichment_scores_across_16_K562_CEBPB_sub-ensembles.pdf" )
59
- boxplot(read_fold_change ~ cell_type_num ,sub_ensemble_read_score ,
59
+ boxplot(read_fold_change ~ cell_type_num ,sub_ensemble_read_score ,
60
60
xlab = " number of cell types sharing the peaks" ,
61
61
ylab = " Read enrichment score" , outline = FALSE )
62
62
dev.off()
@@ -84,19 +84,19 @@ cofactor_16_subsets_matrix <- cofactor_16_subsets_res$intersection_matrix
84
84
cofactor_16_subsets_matrix_t <- as.data.frame(t(cofactor_16_subsets_matrix ))
85
85
# filter out cofactor whose binding percents are less than 5 in all sub-ensembles
86
86
cofactor_16_subsets_matrix_filtered <- cofactor_16_subsets_matrix_t [! (cofactor_16_subsets_matrix_t $ user_peak_x1 < = 5 &
87
- cofactor_16_subsets_matrix_t $ user_peak_x2 < = 5 &
88
- cofactor_16_subsets_matrix_t $ user_peak_x3 < = 5 &
87
+ cofactor_16_subsets_matrix_t $ user_peak_x2 < = 5 &
88
+ cofactor_16_subsets_matrix_t $ user_peak_x3 < = 5 &
89
89
cofactor_16_subsets_matrix_t $ user_peak_x4 < = 5 &
90
- cofactor_16_subsets_matrix_t $ user_peak_x5 < = 5 &
91
- cofactor_16_subsets_matrix_t $ user_peak_x6 < = 5 &
92
- cofactor_16_subsets_matrix_t $ user_peak_x7 < = 5 &
93
- cofactor_16_subsets_matrix_t $ user_peak_x8 < = 5 &
90
+ cofactor_16_subsets_matrix_t $ user_peak_x5 < = 5 &
91
+ cofactor_16_subsets_matrix_t $ user_peak_x6 < = 5 &
92
+ cofactor_16_subsets_matrix_t $ user_peak_x7 < = 5 &
93
+ cofactor_16_subsets_matrix_t $ user_peak_x8 < = 5 &
94
94
cofactor_16_subsets_matrix_t $ user_peak_x9 < = 5 &
95
- cofactor_16_subsets_matrix_t $ user_peak_x10 < = 5 &
96
- cofactor_16_subsets_matrix_t $ user_peak_x11 < = 5 &
97
- cofactor_16_subsets_matrix_t $ user_peak_x12 < = 5 &
98
- cofactor_16_subsets_matrix_t $ user_peak_x13 < = 5 &
99
- cofactor_16_subsets_matrix_t $ user_peak_x14 < = 5 &
95
+ cofactor_16_subsets_matrix_t $ user_peak_x10 < = 5 &
96
+ cofactor_16_subsets_matrix_t $ user_peak_x11 < = 5 &
97
+ cofactor_16_subsets_matrix_t $ user_peak_x12 < = 5 &
98
+ cofactor_16_subsets_matrix_t $ user_peak_x13 < = 5 &
99
+ cofactor_16_subsets_matrix_t $ user_peak_x14 < = 5 &
100
100
cofactor_16_subsets_matrix_t $ user_peak_x15 < = 5 &
101
101
cofactor_16_subsets_matrix_t $ user_peak_x16 < = 5 ),]
102
102
color <- colorRampPalette(c(" white" ," #D46A6A" , " #801515" , " #550000" ))
@@ -133,50 +133,50 @@ for (i in seq(1,16,1)){
133
133
tag_density_value = " median" ,
134
134
return_methylation_profile = TRUE ,
135
135
angle_of_methylation_profile = " x" )
136
-
136
+
137
137
meth_matrix_i <- cofactor_in_subsets_res_i $ methylation_profile_matrix
138
138
CEBPB_CEBPD_meth_i <- meth_matrix_i [" MM1_HSA_K562_CEBPB" ," MM1_HSA_K562_CEBPD" ][[1 ]]
139
139
CEBPB_CEBPD_meth_value_i <- sum(CEBPB_CEBPD_meth_i [9 : 10 ])* 100 / sum(CEBPB_CEBPD_meth_i )
140
140
CEBPB_CEBPD_meth_value <- c(CEBPB_CEBPD_meth_value , CEBPB_CEBPD_meth_value_i )
141
-
141
+
142
142
CEBPB_ATF4_meth_i <- meth_matrix_i [" MM1_HSA_K562_CEBPB" ," MM1_HSA_K562_ATF4" ][[1 ]]
143
143
CEBPB_ATF4_meth_value_i <- sum(CEBPB_ATF4_meth_i [9 : 10 ])* 100 / sum(CEBPB_ATF4_meth_i )
144
144
CEBPB_ATF4_meth_value <- c(CEBPB_ATF4_meth_value , CEBPB_ATF4_meth_value_i )
145
-
145
+
146
146
tag_density_median_i <- cofactor_in_subsets_res_i $ tag_density_matrix
147
- CEBPB_CEBPD_tag_median <- c(CEBPB_CEBPD_tag_median ,
147
+ CEBPB_CEBPD_tag_median <- c(CEBPB_CEBPD_tag_median ,
148
148
tag_density_median_i [" MM1_HSA_K562_CEBPB" ,
149
149
" MM1_HSA_K562_CEBPD" ])
150
- CEBPB_ATF4_tag_median <- c(CEBPB_ATF4_tag_median ,
150
+ CEBPB_ATF4_tag_median <- c(CEBPB_ATF4_tag_median ,
151
151
tag_density_median_i [" MM1_HSA_K562_CEBPB" ,
152
152
" MM1_HSA_K562_ATF4" ])
153
-
153
+
154
154
cofactor_tag_q1_i <- intersectPeakMatrixResult(intersectPeakMatrix = cofactor_in_subsets_i ,
155
155
return_tag_density = TRUE ,
156
156
angle_of_tag_density = " x" ,
157
157
tag_density_value = " quartile_25" )
158
158
tag_density_q1_i <- cofactor_tag_q1_i $ tag_density_matrix
159
- CEBPB_CEBPD_tag_q1 <- c(CEBPB_CEBPD_tag_q1 ,
159
+ CEBPB_CEBPD_tag_q1 <- c(CEBPB_CEBPD_tag_q1 ,
160
160
tag_density_q1_i [" MM1_HSA_K562_CEBPB" ,
161
161
" MM1_HSA_K562_CEBPD" ])
162
- CEBPB_ATF4_tag_q1 <- c(CEBPB_ATF4_tag_q1 ,
162
+ CEBPB_ATF4_tag_q1 <- c(CEBPB_ATF4_tag_q1 ,
163
163
tag_density_q1_i [" MM1_HSA_K562_CEBPB" ,
164
164
" MM1_HSA_K562_ATF4" ])
165
165
cofactor_tag_q3_i <- intersectPeakMatrixResult(intersectPeakMatrix = cofactor_in_subsets_i ,
166
166
return_tag_density = TRUE ,
167
167
tag_density_value = " quartile_75" ,
168
168
angle_of_tag_density = " x" )
169
169
tag_density_q3_i <- cofactor_tag_q3_i $ tag_density_matrix
170
- CEBPB_CEBPD_tag_q3 <- c(CEBPB_CEBPD_tag_q3 ,
170
+ CEBPB_CEBPD_tag_q3 <- c(CEBPB_CEBPD_tag_q3 ,
171
171
tag_density_q3_i [" MM1_HSA_K562_CEBPB" ,
172
172
" MM1_HSA_K562_CEBPD" ])
173
- CEBPB_ATF4_tag_q3 <- c(CEBPB_ATF4_tag_q3 ,
173
+ CEBPB_ATF4_tag_q3 <- c(CEBPB_ATF4_tag_q3 ,
174
174
tag_density_q3_i [" MM1_HSA_K562_CEBPB" ,
175
175
" MM1_HSA_K562_ATF4" ])
176
176
}
177
177
pdf(" mCG_percentage_of_CEBPB-CEPBD_and_CEBPB-ATF4_in_16_subsets.pdf" )
178
178
plot(x = seq(1 ,16 ,1 ),
179
- y = CEBPB_CEBPD_meth_value ,
179
+ y = CEBPB_CEBPD_meth_value ,
180
180
type = " l" , ylim = c(0 ,30 ), xlim = c(0 ,17 ), ylab = " 5mC percentage (%)" ,
181
181
xlab = " number of shared cell types" , col = " blue" )
182
182
lines(x = seq(1 ,16 ,1 ),
@@ -185,7 +185,7 @@ dev.off()
185
185
186
186
pdf(" read_enrichments_of_CEBPB-CEPBD_and_CEBPB-ATF4_in_16_subsets.pdf" )
187
187
plot(x = seq(1 ,16 ,1 ),
188
- y = CEBPB_CEBPD_tag_median ,
188
+ y = CEBPB_CEBPD_tag_median ,
189
189
type = " l" , ylim = c(0 ,100 ), xlim = c(0 ,17 ), ylab = " tag density median" ,
190
190
xlab = " number of shared cell types" , col = " blue" )
191
191
points(x = seq(1 ,16 ,1 ),
@@ -216,13 +216,13 @@ K562_exclusivePeak_output <- exclusivePeaks(target_peak_id = "MM1_HSA_K562_CEBPB
216
216
motif_only_for_target_peak = TRUE ,
217
217
excluded_peak_id = CEBPB_record_ID_noK562 ,
218
218
motif_only_for_excluded_peak = TRUE )
219
- K562_exclusivePeak_result <- exclusivePeakResult(exclusivePeaks = K562_exclusivePeak_output ,
219
+ K562_exclusivePeak_result <- exclusivePeakResult(exclusivePeaks = K562_exclusivePeak_output ,
220
220
return_exclusive_peak_sites = TRUE )
221
221
K562_exclusivePeak_peak <- K562_exclusivePeak_result $ exclusive_peak_list $ MM1_HSA_K562_CEBPB_exclusive_peaks
222
222
223
223
K562_exclusivePeak_with_ATF4_output <- commonPeaks(user_target_peak_list = list (K562_exclusivePeak_peak ),
224
224
user_target_peak_id = " MM1_HSA_K562_CEBPB" ,
225
- compared_peak_id = " MM1_HSA_K562_ATF4" ,
225
+ compared_peak_id = " MM1_HSA_K562_ATF4" ,
226
226
motif_only_for_compared_peak = TRUE )
227
227
K562_exclusivePeak_with_ATF4_res <- commonPeakResult(commonPeaks = K562_exclusivePeak_with_ATF4_output ,
228
228
return_common_peak_sites = TRUE ,
@@ -232,7 +232,7 @@ K562_exclusivePeak_without_ATF4_peaks <- K562_exclusivePeak_peak[!(K562_exclusiv
232
232
233
233
K562_exclusivePeak_without_ATF4_output <- commonPeaks(user_target_peak_list = list (K562_exclusivePeak_without_ATF4_peaks ),
234
234
user_target_peak_id = " MM1_HSA_K562_CEBPB" ,
235
- compared_peak_id = " MM1_HSA_K562_CEBPB" ,
235
+ compared_peak_id = " MM1_HSA_K562_CEBPB" ,
236
236
motif_only_for_compared_peak = TRUE )
237
237
commonPeakResult(commonPeaks = K562_exclusivePeak_without_ATF4_output ,
238
238
save_MethMotif_logo = TRUE )
@@ -258,51 +258,51 @@ K562_commonPeak_without_CEBPB_peaks <- K562_commonPeak_peak[!(K562_commonPeak_pe
258
258
259
259
K562_commonPeak_without_ATF4_output <- commonPeaks(user_target_peak_list = list (K562_commonPeak_without_CEBPB_peaks ),
260
260
user_target_peak_id = " MM1_HSA_K562_CEBPB" ,
261
- compared_peak_id = " MM1_HSA_K562_CEBPB" ,
261
+ compared_peak_id = " MM1_HSA_K562_CEBPB" ,
262
262
motif_only_for_compared_peak = TRUE )
263
263
commonPeakResult(commonPeaks = K562_commonPeak_without_ATF4_output ,
264
264
save_MethMotif_logo = TRUE )
265
265
266
266
267
- # motif in shared and exclusive CEBPB targets in all other 15 cell types (Figure S3 )
267
+ # motif in shared and exclusive CEBPB targets in all other 15 cell types (Supplementary Figure 3 )
268
268
for (i in CEBPB_record $ ID ){
269
269
common_i <- commonPeaks(target_peak_id = i ,
270
- motif_only_for_target_peak = TRUE ,
270
+ motif_only_for_target_peak = TRUE ,
271
271
compared_peak_id = CEBPB_record $ ID ,
272
272
motif_only_for_compared_peak = TRUE )
273
273
common_i_res <- commonPeakResult(commonPeaks = common_i ,
274
274
save_MethMotif_logo = TRUE )
275
-
275
+
276
276
# exclude ID i from all CEBPB IDs
277
277
cebpd_id_no_i <- CEBPB_record $ ID [! (CEBPB_record $ ID %in% i )]
278
278
exclusive_i <- exclusivePeaks(target_peak_id = i ,
279
279
motif_only_for_target_peak = TRUE ,
280
- excluded_peak_id = cebpd_id_no_i ,
280
+ excluded_peak_id = cebpd_id_no_i ,
281
281
motif_only_for_excluded_peak = TRUE )
282
- exclusive_i_res <- exclusivePeakResult(exclusivePeaks = exclusive_i ,
282
+ exclusive_i_res <- exclusivePeakResult(exclusivePeaks = exclusive_i ,
283
283
save_MethMotif_logo = TRUE )
284
284
}
285
285
286
286
287
- # functions of CEBPB/CEBPD and CEBPB/ATF4 targets in K562 (Figure S4 )
287
+ # functions of CEBPB/CEBPD and CEBPB/ATF4 targets in K562 (Supplementary Figure 4 )
288
288
# load required package for GREAT annotation
289
289
library(rGREAT )
290
290
# load required package for genomic conversion from hg38 to hg19
291
291
library(liftOver )
292
292
# all CEBPB-CEBPD co-binding regions in K562
293
293
K562_CEBPB_CEBPD <- commonPeaks(target_peak_id = " MM1_HSA_K562_CEBPB" ,
294
294
motif_only_for_target_peak = TRUE ,
295
- compared_peak_id = " MM1_HSA_K562_CEBPD" ,
295
+ compared_peak_id = " MM1_HSA_K562_CEBPD" ,
296
296
motif_only_for_compared_peak = TRUE )
297
297
K562_CEBPB_CEBPD_res <- commonPeakResult(commonPeaks = K562_CEBPB_CEBPD ,
298
- return_common_peak_sites = TRUE ,
299
- save_MethMotif_logo = TRUE ,
298
+ return_common_peak_sites = TRUE ,
299
+ save_MethMotif_logo = TRUE ,
300
300
return_summary = TRUE )
301
301
K562_CEBPB_CEBPD_res $ peak_summary
302
302
# > percentage_in_original_inputs(%)
303
303
# > MM1_HSA_K562_CEBPB_common_peaks 6.532727
304
304
K562_CEBPB_CEBPD_peak <- K562_CEBPB_CEBPD_res $ common_peak_list $ MM1_HSA_K562_CEBPB_common_peaks
305
- K562_CEBPB_CEBPD_great <- greatAnnotate(peaks = K562_CEBPB_CEBPD_peak ,
305
+ K562_CEBPB_CEBPD_great <- greatAnnotate(peaks = K562_CEBPB_CEBPD_peak ,
306
306
return_annotation = TRUE )
307
307
K562_CEBPB_CEBPD_great_bp <- K562_CEBPB_CEBPD_great [which(K562_CEBPB_CEBPD_great $ category == " BP" ),]
308
308
@@ -311,9 +311,9 @@ K562_CEBPB_ATF4 <- commonPeaks(target_peak_id = "MM1_HSA_K562_CEBPB",
311
311
motif_only_for_target_peak = TRUE ,
312
312
compared_peak_id = " MM1_HSA_K562_ATF4" ,
313
313
motif_only_for_compared_peak = TRUE )
314
- K562_CEBPB_ATF4_res <- commonPeakResult(commonPeaks = K562_CEBPB_ATF4 ,
315
- return_common_peak_sites = TRUE ,
316
- save_MethMotif_logo = TRUE ,
314
+ K562_CEBPB_ATF4_res <- commonPeakResult(commonPeaks = K562_CEBPB_ATF4 ,
315
+ return_common_peak_sites = TRUE ,
316
+ save_MethMotif_logo = TRUE ,
317
317
return_summary = TRUE )
318
318
K562_CEBPB_ATF4_res $ peak_summary
319
319
# > percentage_in_original_inputs(%)
@@ -323,3 +323,32 @@ K562_CEBPB_ATF4_great <- greatAnnotate(peaks = K562_CEBPB_ATF4_peak, return_anno
323
323
K562_CEBPB_ATF4_great_bp <- K562_CEBPB_ATF4_great [which(K562_CEBPB_ATF4_great $ category == " BP" ),]
324
324
325
325
326
+
327
+ # identify CEBPB motif in K562 ATF4 peaks(Supplementary Figure 5A)
328
+ # 1) plot ATF4 overal motif logo
329
+ ATF4_motif <- searchMotif(id = " MM1_HSA_K562_ATF4" )
330
+ plotLogo(MM_object = ATF4_motif )
331
+ # 2) get ATF4 peaks co-bound by CEBPB
332
+ ATF4_with_CEBPB <- commonPeaks(target_peak_id = " MM1_HSA_K562_ATF4" ,
333
+ motif_only_for_target_peak = TRUE ,
334
+ compared_peak_id = " MM1_HSA_K562_CEBPB" ,
335
+ motif_only_for_compared_peak = TRUE )
336
+ ATF4_with_CEBPB_res <- commonPeakResult(commonPeaks = ATF4_with_CEBPB ,
337
+ return_common_peak_sites = TRUE ,
338
+ save_MethMotif_logo = TRUE )
339
+ ATF4_peaks_with_CEBPB = ATF4_with_CEBPB_res $ common_peak_list $ MM1_HSA_K562_ATF4_common_peaks
340
+
341
+ K562_TFBS <- dataBrowser(cell_tissue_name = " K562" )
342
+ # 3) get all K562 PWM IDs except CEBPB and ATF4
343
+ K562_TFBS_no_CEBPB_ATF4 <- K562_TFBS $ ID [(K562_TFBS $ ID != " MM1_HSA_K562_ATF4" &
344
+ K562_TFBS $ ID != " MM1_HSA_K562_CEBPB" )]
345
+ # 4) filter out the peaks also co-bound by other TFs in the ATF4-CEBPB co-binding peaks obtained at step 2.
346
+ ATF4_peaks_with_CEBPB_no_other <- exclusivePeaks(user_target_peak_list = list (ATF4_peaks_with_CEBPB ),
347
+ user_target_peak_id = " MM1_HSA_K562_ATF4" ,
348
+ excluded_peak_id = K562_TFBS_no_CEBPB_ATF4 ,
349
+ motif_only_for_excluded_peak = TRUE )
350
+
351
+ ATF4_peaks_with_CEBPB_no_other_res <- exclusivePeakResult(exclusivePeaks = ATF4_peaks_with_CEBPB_no_other ,
352
+ save_MethMotif_logo = TRUE )
353
+
354
+
0 commit comments