@@ -117,12 +117,13 @@ sub runSomaticVariantCallers {
117
117
open MERGE_SH, " >$bash_file " or die " cannot open file $bash_file \n " ;
118
118
print MERGE_SH " #!/bin/bash\n\n " ;
119
119
print MERGE_SH " echo \" Start Merge\t\" `date` `uname -n` >> $sample_tumor_log_dir /merge.log\n\n " ;
120
+ print MERGE_SH " module load $opt {GATK_JAVA_MODULE}\n " ;
120
121
121
122
# Merge vcfs
122
123
my $invcf ;
123
- my $outvcf = " $sample_tumor_out_dir /$sample_tumor_name \_ merged_somatics .vcf" ;
124
+ my $outvcf = " $sample_tumor_out_dir /$sample_tumor_name \_ somatics .vcf" ;
124
125
print MERGE_SH " java -Xmx" .$opt {SOMVARMERGE_MEM }." G -Djava.io.tmpdir=$sample_tumor_tmp_dir -jar $opt {GATK_PATH}/GenomeAnalysisTK.jar -T CombineVariants -R $opt {GENOME} -o $outvcf --genotypemergeoption uniquify " ;
125
- if ($opt {SOMVAR_STRELKA } eq " yes" ){ print MERGE_SH " -V:strelka $sample_tumor_out_dir /strelka/passed.somatic.merged.vcf " ; }
126
+ if ($opt {SOMVAR_STRELKA } eq " yes" ){ print MERGE_SH " -V:strelka $sample_tumor_out_dir /strelka/passed.somatic.merged.processed. vcf " ; }
126
127
if ($opt {SOMVAR_VARSCAN } eq " yes" ){ print MERGE_SH " -V:varscan $sample_tumor_out_dir /varscan/$sample_tumor_name .merged.Somatic.hc.vcf " ; }
127
128
if ($opt {SOMVAR_FREEBAYES } eq " yes" ){ print MERGE_SH " -V:freebayes $sample_tumor_out_dir /freebayes/$sample_tumor_name \_ somatic_filtered.vcf " ; }
128
129
if ($opt {SOMVAR_MUTECT } eq " yes" ){ print MERGE_SH " -V:mutect $sample_tumor_out_dir /mutect/$sample_tumor_name \_ mutect_passed.vcf " ;}
@@ -131,7 +132,7 @@ sub runSomaticVariantCallers {
131
132
# Filter vcf on target
132
133
if ($opt {SOMVAR_TARGETS }){
133
134
$invcf = $outvcf ;
134
- $outvcf = " $sample_tumor_out_dir /$sample_tumor_name \_ filtered_merged_somatics .vcf" ;
135
+ $outvcf = " $sample_tumor_out_dir /$sample_tumor_name \_ filtered_somatics .vcf" ;
135
136
print MERGE_SH " java -Xmx" .$opt {SOMVARMERGE_MEM }." G -Djava.io.tmpdir=$sample_tumor_tmp_dir -jar $opt {GATK_PATH}/GenomeAnalysisTK.jar -T SelectVariants -R $opt {GENOME} -L $opt {SOMVAR_TARGETS} -V $invcf -o $outvcf \n " ;
136
137
print MERGE_SH " rm $invcf *\n\n " ;
137
138
}
@@ -164,11 +165,22 @@ sub runSomaticVariantCallers {
164
165
}
165
166
166
167
# # Melt somatic vcf
167
- $invcf = $outvcf ;
168
- my $suffix = " _melted.vcf" ;
169
- $outvcf =~ s / .vcf/ $suffix / ;
170
- print MERGE_SH " python $opt {IAP_PATH}/scripts/melt_somatic_vcf.py -t $sample_tumor -v $invcf > $outvcf \n\n " ;
171
-
168
+ if ($opt {SOMVAR_MELT } eq " yes" ){
169
+ $invcf = $outvcf ;
170
+ my $suffix = " _melted.vcf" ;
171
+ $outvcf =~ s / .vcf/ $suffix / ;
172
+ print MERGE_SH " python $opt {IAP_PATH}/scripts/melt_somatic_vcf.py -t $sample_tumor -v $invcf > $outvcf \n\n " ;
173
+ }
174
+
175
+ # # Filter PON
176
+ if ($opt {SOMVAR_PONFILE }){
177
+ $invcf = $outvcf ;
178
+ my $suffix = " _PON.vcf" ;
179
+ $outvcf =~ s / .vcf/ $suffix / ;
180
+ print MERGE_SH " python $opt {IAP_PATH}/scripts/annotatePON.py -p $opt {SOMVAR_PONFILE} -i $invcf -o - | $opt {BCFTOOLS_PATH}/bcftools filter -e 'PON_COUNT!=\" .\" && MIN(PON_COUNT) > 5' -s PON -m+ -o $outvcf \n " ;
181
+ print MERGE_SH " $opt {IGVTOOLS_PATH}/igvtools index $outvcf \n\n " ;
182
+ }
183
+
172
184
# # Check output files
173
185
print MERGE_SH " if [ \"\$ (tail -n 1 $preAnnotateVCF | cut -f 1,2)\" = \"\$ (tail -n 1 $outvcf | cut -f 1,2)\" -a -s $preAnnotateVCF -a -s $outvcf ]\n " ;
174
186
print MERGE_SH " then\n " ;
@@ -226,15 +238,23 @@ sub runStrelka {
226
238
# Check strelka completed
227
239
print STRELKA_SH " \t if [ -f $strelka_out_dir /task.complete ]\n " ;
228
240
print STRELKA_SH " \t then\n " ;
241
+ print STRELKA_SH " \t\t module load $opt {GATK_JAVA_MODULE}\n " ;
229
242
print STRELKA_SH " \t\t java -Xmx" .$opt {STRELKA_MEM }." G -jar $opt {GATK_PATH}/GenomeAnalysisTK.jar -T CombineVariants -R $opt {GENOME} --genotypemergeoption unsorted -V:snp results/passed.somatic.snvs.vcf -V:indel results/passed.somatic.indels.vcf -o passed.somatic.merged.vcf\n " ;
230
- # print STRELKA_SH "\t\tperl -p -e 's/\\t([A-Z][A-Z]:)/\\tGT:\$1/g' passed.somatic.merged.vcf | perl -p -e 's/(:T[UO]R?)\\t/\$1\\t0\\/0:/g' | perl -p -e 's/(:\\d+,\\d+)\\t/\$1\\t0\\/1:/g' | perl -p -e 's/(#CHROM.*)/##StrelkaGATKCompatibility=Added GT fields to strelka calls for gatk compatibility.\\n\$1/g' > tmp.vcf\n";
231
- print STRELKA_SH " \t\t awk -F'\t ' -v OFS='\t ' '/^#CHROM/ { print \" ##StrelkaGATKCompatibility=Added GT fields to strelka calls for gatk compatibility.\" ; } /^[^#]/ { \$ 9 = \" GT:\" \$ 9; \$ 10 = \" 0/0:\" \$ 10; \$ 11 = \" 0/1:\" \$ 11; } { print }' passed.somatic.merged.vcf | python $opt {IAP_PATH}/scripts/filterStrelka.py > tmp.vcf\n " ;
232
- print STRELKA_SH " \t\t mv tmp.vcf passed.somatic.merged.vcf\n " ;
233
243
print STRELKA_SH " \t\t rm -r chromosomes/ \n " ;
234
- print STRELKA_SH " \t\t touch $log_dir /strelka.done\n " ;
244
+
245
+ # Strelka hmftools postprocess
246
+ print STRELKA_SH " \t\t guixr load-profile $opt {HMFTOOLS_PROFILE} -- << EOF\n " ;
247
+ print STRELKA_SH " java -jar \\\$ GUIX_JARPATH/strelka-post-process.jar -v passed.somatic.merged.vcf -hc_bed $opt {GIAB_HIGH_CONFIDENCE_BED} -t $sample_tumor -o passed.somatic.merged.processed.vcf\n " ;
248
+ print STRELKA_SH " EOF\n " ;
249
+ print STRELKA_SH " \t\t if [ -s passed.somatic.merged.vcf -a -s passed.somatic.merged.processed.vcf -a -s passed.somatic.merged.vcf.idx -a -s passed.somatic.merged.processed.vcf.idx ]\n " ;
250
+ print STRELKA_SH " \t\t then\n " ;
251
+ print STRELKA_SH " \t\t\t touch $log_dir /strelka.done\n " ;
252
+ print STRELKA_SH " \t\t else\n " ;
253
+ print STRELKA_SH " \t\t\t echo \" ERROR: Strelka or hmftools postprocess failed.\" >&2\n " ;
254
+ print STRELKA_SH " \t\t fi\n " ;
235
255
print STRELKA_SH " \t fi\n\n " ;
256
+
236
257
print STRELKA_SH " \t echo \" End Strelka\t\" `date` \"\t $sample_ref_bam \t $sample_tumor_bam \t\" `uname -n` >> $log_dir /strelka.log\n\n " ;
237
-
238
258
print STRELKA_SH " else\n " ;
239
259
print STRELKA_SH " \t echo \" ERROR: $sample_tumor_bam or $sample_ref_bam does not exist.\" >&2\n " ;
240
260
print STRELKA_SH " fi\n " ;
@@ -417,6 +437,7 @@ sub runVarscan {
417
437
print VARSCAN_SH " \t java -Xmx" .$opt {VARSCAN_MEM }." G -jar $opt {VARSCAN_PATH} processSomatic $sample_tumor_name .snp.vcf $opt {VARSCAN_POSTSETTINGS}\n\n " ;
418
438
419
439
# merge varscan hc snps and indels
440
+ print VARSCAN_SH " \t module load $opt {GATK_JAVA_MODULE}\n " ;
420
441
print VARSCAN_SH " \t java -Xmx" .$opt {VARSCAN_MEM }." G -jar $opt {GATK_PATH}/GenomeAnalysisTK.jar -T CombineVariants -R $opt {GENOME} --genotypemergeoption unsorted -o $sample_tumor_name .merged.Somatic.hc.vcf -V:snp $sample_tumor_name .snp.Somatic.hc.vcf -V:indel $sample_tumor_name .indel.Somatic.hc.vcf\n " ;
421
442
print VARSCAN_SH " \t sed -i 's/SSC/VS_SSC/' $sample_tumor_name .merged.Somatic.hc.vcf\n\n " ; # to resolve merge conflict with FB vcfs
422
443
@@ -500,6 +521,7 @@ sub runFreeBayes {
500
521
print FREEBAYES_SH " then\n " ;
501
522
print FREEBAYES_SH " \t echo \" Start Freebayes\t\" `date` \"\t $chr \t $sample_ref_bam \t $sample_tumor_bam \t\" `uname -n` >> $log_dir /freebayes.log\n\n " ;
502
523
print FREEBAYES_SH " \t $freebayes_command \n " ;
524
+ print FREEBAYES_SH " \t module load $opt {GATK_JAVA_MODULE}\n " ;
503
525
print FREEBAYES_SH " \t $sort_uniq_filter_command \n " ;
504
526
print FREEBAYES_SH " \t $mv_command \n\n " ;
505
527
print FREEBAYES_SH " \t echo \" End Freebayes\t\" `date` \"\t $chr $sample_ref_bam \t $sample_tumor_bam \t\" `uname -n` >> $log_dir /freebayes.log\n " ;
0 commit comments