Skip to content

Commit 5f69f2e

Browse files
authored
Merge pull request #111 from UMCUGenetics/release_v2.6.1
Release v2.6.1
2 parents 9340658 + 4e7e088 commit 5f69f2e

12 files changed

+80
-33
lines changed

IAP/baseRecal.pm

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ sub runBaseRecalibration {
5858
else { $command .= "-knownSites $knownFile " }
5959
}
6060
}
61+
### Generate QC option
62+
if($opt{BASERECALIBRATION_QC} eq 'yes'){
63+
$command .= "-generateQC ";
64+
}
6165
### retry option
6266
if($opt{QUEUE_RETRY} eq 'yes'){
6367
$command .= "-retry 1 ";
@@ -112,9 +116,11 @@ sub runBaseRecalibration {
112116
print BASERECALFS_SH "\t\tmv $opt{OUTPUT_DIR}/$sample/tmp/$outBam $opt{OUTPUT_DIR}/$sample/mapping/\n";
113117
print BASERECALFS_SH "\t\tmv $opt{OUTPUT_DIR}/$sample/tmp/$outBai $opt{OUTPUT_DIR}/$sample/mapping/\n";
114118
print BASERECALFS_SH "\t\tcp $opt{OUTPUT_DIR}/$sample/mapping/$outBai $opt{OUTPUT_DIR}/$sample/mapping/$outBamBai\n";
115-
print BASERECALFS_SH "\t\tmv $opt{OUTPUT_DIR}/$sample/tmp/*_baseRecalibration.pdf $opt{OUTPUT_DIR}/$sample/logs/\n";
116-
print BASERECALFS_SH "\t\tmv $opt{OUTPUT_DIR}/$sample/tmp/*_post_recal_data.table $opt{OUTPUT_DIR}/$sample/logs/\n";
117-
print BASERECALFS_SH "\t\tmv $opt{OUTPUT_DIR}/$sample/tmp/*_recal_data.table $opt{OUTPUT_DIR}/$sample/logs/\n";
119+
if($opt{BASERECALIBRATION_QC} eq 'yes'){
120+
print BASERECALFS_SH "\t\tmv $opt{OUTPUT_DIR}/$sample/tmp/*_baseRecalibration.pdf $opt{OUTPUT_DIR}/$sample/logs/\n";
121+
print BASERECALFS_SH "\t\tmv $opt{OUTPUT_DIR}/$sample/tmp/*_post_recal_data.table $opt{OUTPUT_DIR}/$sample/logs/\n";
122+
print BASERECALFS_SH "\t\tmv $opt{OUTPUT_DIR}/$sample/tmp/*_recal_data.table $opt{OUTPUT_DIR}/$sample/logs/\n";
123+
}
118124

119125
print BASERECALFS_SH "\t\ttouch $opt{OUTPUT_DIR}/$sample/logs/BaseRecalibration_$sample.done\n";
120126
print BASERECALFS_SH "\telse\n";

IAP/check.pm

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,11 +335,18 @@ sub runCheck {
335335
}
336336
}
337337
}
338+
339+
print BASH "\n\tcd $opt{OUTPUT_DIR}\n";
340+
341+
# Run cleanup script if set to yes
342+
if($opt{CHECKING_CLEANUP} eq "yes"){
343+
print BASH "\tpython $opt{CHECKING_CLEANUP_SCRIPT} > logs/checking_cleanup.log 2> logs/checking_cleanup.log \n";
344+
}
345+
338346
# Send email.
339347
print BASH "\tmail -s \"IAP DONE $runName\" \"$opt{MAIL}\" < $logFile\n";
340348

341349
# Create md5sum.txt
342-
print BASH "\n\tcd $opt{OUTPUT_DIR}\n";
343350
print BASH "\tfind . -type f \\( ! -iname \"md5sum.txt\" \\) -exec md5sum \"{}\" \\; > md5sum.txt\n";
344351

345352
print BASH "fi\n";

IAP/vcfutils.pm

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ sub runVcfUtils {
125125
print VCFUTILS_SH "$opt{BCFTOOLS_PATH}/bcftools view -f PASS $opt{OUTPUT_DIR}/$vcf | $opt{BCFTOOLS_PATH}/bcftools roh -s $sample -O s $opt{ROH_SETTINGS} - > $sample\_ROH_sites.txt\n";
126126
print VCFUTILS_SH "$opt{BCFTOOLS_PATH}/bcftools view -f PASS $opt{OUTPUT_DIR}/$vcf | $opt{BCFTOOLS_PATH}/bcftools roh -s $sample -O r $opt{ROH_SETTINGS} - > $sample\_ROH_regions.txt\n";
127127
#print VCFUTILS_SH "python $opt{IAP_PATH}/scripts/get_roh_regions.py $sample\_ROH.txt > $sample\_ROH_regions.txt\n"; # Not needed anymore.
128-
print VCFUTILS_SH "\tmv $sample\_ROH*.txt $output_dir/\n";
128+
print VCFUTILS_SH "mv $sample\_ROH*.txt $output_dir/\n";
129129
print VCFUTILS_SH "if [ -s $output_dir/$sample\_ROH_sites.txt -a -s $output_dir/$sample\_ROH_regions.txt ]; then\n";
130130
print VCFUTILS_SH "\ttouch $opt{OUTPUT_DIR}/logs/ROH_$sample.done\n";
131131
print VCFUTILS_SH "else\n";
@@ -149,15 +149,24 @@ sub runVcfUtils {
149149
print VCFUTILS_SH "cd $opt{OUTPUT_DIR}/tmp/\n";
150150
my $output_vcf = $vcf;
151151
$output_vcf =~ s/$runName/$sample/g;
152-
print VCFUTILS_SH "java -Xmx".$opt{VCFUTILS_MEM}."G -jar $opt{GATK_PATH}/GenomeAnalysisTK.jar -T SelectVariants -R $opt{GENOME} -V $opt{OUTPUT_DIR}/$vcf -o $output_vcf -sn $sample\n";
153-
## Check output
154-
print VCFUTILS_SH "if [ \"\$(tail -n 1 $opt{OUTPUT_DIR}/$vcf | cut -f 1,2)\" = \"\$(tail -n 1 $output_vcf | cut -f 1,2)\" ]\n";
152+
my $input_vcf = "$opt{OUTPUT_DIR}/$vcf";
153+
print VCFUTILS_SH "if [ -s $input_vcf ];\n";
155154
print VCFUTILS_SH "then\n";
156-
print VCFUTILS_SH "\tmv $output_vcf $output_dir/\n";
157-
print VCFUTILS_SH "\ttouch $opt{OUTPUT_DIR}/logs/SINGLE_SAMPLE_VCF_$sample.done\n";
155+
print VCFUTILS_SH "\tjava -Xmx".$opt{VCFUTILS_MEM}."G -jar $opt{GATK_PATH}/GenomeAnalysisTK.jar -T SelectVariants -R $opt{GENOME} -V $input_vcf -o $output_vcf -sn $sample\n";
156+
157+
## Check output
158+
print VCFUTILS_SH "\tif [ \"\$(tail -n 1 $input_vcf | cut -f 1,2)\" = \"\$(tail -n 1 $output_vcf | cut -f 1,2)\" ]\n";
159+
print VCFUTILS_SH "\tthen\n";
160+
print VCFUTILS_SH "\t\tmv $output_vcf $output_dir/\n";
161+
print VCFUTILS_SH "\t\ttouch $opt{OUTPUT_DIR}/logs/SINGLE_SAMPLE_VCF_$sample.done\n";
162+
print VCFUTILS_SH "\telse\n";
163+
print VCFUTILS_SH "\t\tfailed=true\n";
164+
print VCFUTILS_SH "\tfi\n";
165+
158166
print VCFUTILS_SH "else\n";
159167
print VCFUTILS_SH "\tfailed=true\n";
160168
print VCFUTILS_SH "fi\n\n";
169+
161170
}
162171
}
163172
}

QScripts/BaseRecalibrator.scala

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ class Realigner extends QScript {
2828
@Input(doc ="Database of known polymorphic sites to skip over in the recalibration algorithm", shortName="knownSites", required=false)
2929
var knownFiles: List[File] = Nil
3030

31+
@Argument(doc="Generate QC tables and plots to analyse covariation remaining after recalibration.", shortName="generateQC", required=false)
32+
var generateQC: Boolean = false
33+
3134
// This trait allows us set the variables below in one place,
3235
// and then reuse this trait on each CommandLineGATK function below.
3336
trait BR_Arguments extends CommandLineGATK {
@@ -43,38 +46,39 @@ class Realigner extends QScript {
4346

4447
// Analyze patterns of covariation in the sequence dataset
4548
baseRecalibrator.input_file :+= bamFile
46-
if(knownFiles != Nil){
47-
baseRecalibrator.knownSites = knownFiles
48-
}
4949
baseRecalibrator.out = swapExt(bamFile, ".bam", "_recal_data.table")
50-
5150
baseRecalibrator.scatterCount = numScatters
5251
baseRecalibrator.nct = numCPUThreads
53-
54-
// Do a second pass to analyze covariation remaining after recalibration
55-
baseRecalibratorSecond.input_file :+= bamFile
5652
if(knownFiles != Nil){
57-
baseRecalibratorSecond.knownSites = knownFiles
53+
baseRecalibrator.knownSites = knownFiles
5854
}
59-
baseRecalibratorSecond.BQSR = baseRecalibrator.out
60-
baseRecalibratorSecond.out = swapExt(bamFile, ".bam", "_post_recal_data.table")
61-
62-
baseRecalibratorSecond.scatterCount = numScatters
63-
baseRecalibratorSecond.nct = numCPUThreads
64-
65-
// Generate before and after plots
66-
analyzeCovariates.before = baseRecalibrator.out
67-
analyzeCovariates.after = baseRecalibratorSecond.out
68-
analyzeCovariates.plots = swapExt(baseRecalibrator.out, "recal_data.table", "baseRecalibration.pdf")
6955

7056
// Apply the recalibration to your sequence data
7157
printReads.input_file :+= bamFile
7258
printReads.BQSR = baseRecalibrator.out
7359
printReads.out = swapExt(bamFile, "bam", "recalibrated.bam")
74-
7560
printReads.scatterCount = numScatters
7661
printReads.nct = numCPUThreads
7762

78-
add(baseRecalibrator,baseRecalibratorSecond,analyzeCovariates,printReads)
63+
add(baseRecalibrator,printReads)
64+
65+
if (generateQC == true) {
66+
// Do a second pass to analyze covariation remaining after recalibration
67+
baseRecalibratorSecond.input_file :+= bamFile
68+
baseRecalibratorSecond.BQSR = baseRecalibrator.out
69+
baseRecalibratorSecond.out = swapExt(bamFile, ".bam", "_post_recal_data.table")
70+
baseRecalibratorSecond.scatterCount = numScatters
71+
baseRecalibratorSecond.nct = numCPUThreads
72+
if(knownFiles != Nil){
73+
baseRecalibratorSecond.knownSites = knownFiles
74+
}
75+
76+
// Generate before and after plots
77+
analyzeCovariates.before = baseRecalibrator.out
78+
analyzeCovariates.after = baseRecalibratorSecond.out
79+
analyzeCovariates.plots = swapExt(baseRecalibrator.out, "recal_data.table", "baseRecalibration.pdf")
80+
81+
add(baseRecalibratorSecond,analyzeCovariates)
82+
}
7983
}
8084
}

QScripts/HaplotypeCaller.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ class VariantCaller extends QScript {
125125
genotypeGVCFs.scatterCount = numScatters
126126
genotypeGVCFs.num_threads = numCPUThreads
127127
genotypeGVCFs.out = outputFilename + ".raw_variants.vcf"
128+
129+
genotypeGVCFs.stand_emit_conf = standEmitConf
130+
genotypeGVCFs.stand_call_conf = standCallConf
128131

129132
// Optional input
130133
if (dbsnpFile != null) {
@@ -205,7 +208,10 @@ class VariantCaller extends QScript {
205208
genotypeGVCFs.scatterCount = numScatters
206209
genotypeGVCFs.num_threads = numCPUThreads
207210
genotypeGVCFs.out = outputFilename + ".raw_variants.vcf"
208-
211+
212+
genotypeGVCFs.stand_emit_conf = standEmitConf
213+
genotypeGVCFs.stand_call_conf = standCallConf
214+
209215
// Optional input
210216
if (dbsnpFile != null) {
211217
genotypeGVCFs.D = dbsnpFile

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## IAP
1+
## IAP [![DOI](https://zenodo.org/badge/19342535.svg)](https://zenodo.org/badge/latestdoi/19342535)
22
Illumina analysis pipeline.
33

44
## Download
@@ -212,6 +212,7 @@ BASERECALIBRATION_MEM maximum_memory
212212
BASERECALIBRATION_SCALA QScripts/BaseRecalibrator.scala
213213
BASERECALIBRATION_SCATTER number_of_scatters
214214
BASERECALIBRATION_KNOWN GATK_bundle/1000G_phase1.indels.b37.vcf GATK_bundle/dbsnp_137.b37.vcf GATK_bundle/Mills_and_1000G_gold_standard.indels.b37.vcf | common indel and snp files supplied by gatk
215+
BASERECALIBRATION_QC no/yes | generate QC data to analyse covariation remaining after recalibration
215216
216217
####CALLING CLUSTER CONFIGURATION####
217218
CALLING_MASTERQUEUE queue_name
@@ -463,5 +464,7 @@ CHECKING_QUEUE queue_name
463464
CHECKING_TIME estimated runtime
464465
CHECKING_THREADS number_of_threads
465466
CHECKING_RM list,of,files,to,remove
467+
CHECKING_CLEANUP yes/no
468+
CHECKING_CLEANUP_SCRIPT /path/to/cleanup_script.py
466469
467470
```

illumina_pipeline.pl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,7 @@ sub checkConfig{
521521
if(! $opt{BASERECALIBRATION_TIME}){ print "ERROR: No BASERECALIBRATION_TIME option found in config files.\n"; $checkFailed = 1; }
522522
if(! $opt{BASERECALIBRATION_SCALA}){ print "ERROR: No BASERECALIBRATION_SCALA option found in config files.\n"; $checkFailed = 1; }
523523
if(! $opt{BASERECALIBRATION_SCATTER}){ print "ERROR: No BASERECALIBRATION_SCATTER option found in config files.\n"; $checkFailed = 1; }
524+
if(! $opt{BASERECALIBRATION_QC}){ print "ERROR: No BASERECALIBRATION_QC option found in config files.\n"; $checkFailed = 1; }
524525
if(! $opt{QUEUE_RETRY}){ print "ERROR: No QUEUE_RETRY option found in config files.\n"; $checkFailed = 1; }
525526
if(! $opt{FLAGSTAT_QUEUE}){ print "ERROR: No FLAGSTAT_QUEUE option found in config files.\n"; $checkFailed = 1; }
526527
if(! $opt{FLAGSTAT_THREADS}){ print "ERROR: No FLAGSTAT_THREADS option found in config files.\n"; $checkFailed = 1; }
@@ -844,6 +845,10 @@ sub checkConfig{
844845
if(! $opt{CHECKING_MEM}){ print "ERROR: No CHECKING_MEM found in .ini file\n"; $checkFailed = 1; }
845846
if(! $opt{CHECKING_TIME}){ print "ERROR: No CHECKING_TIME found in .ini file\n"; $checkFailed = 1; }
846847
if(! $opt{CHECKING_RM}){ print "ERROR: No CHECKING_RM found in .ini file\n"; $checkFailed = 1; }
848+
if(! $opt{CHECKING_CLEANUP}){ print "ERROR: No CHECKING_CLEANUP found in .ini file\n"; $checkFailed = 1; }
849+
if($opt{CHECKING_CLEANUP} eq "yes"){
850+
if(! $opt{CHECKING_CLEANUP_SCRIPT}){ print "ERROR: No CHECKING_CLEANUP_SCRIPT found in .ini file\n"; $checkFailed = 1; }
851+
}
847852
}
848853

849854
if ($checkFailed) {

settings/CPCT.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,3 +327,4 @@ CHECKING_TIME 24:0:0
327327
CHECKING_THREADS 1
328328
CHECKING_MEM 10
329329
CHECKING_RM tmp,*.pileup.gz,*.pileup.gz.tbi,*.recalibrated.bam,*.recalibrated.bai,*.recalibrated.bam.bai
330+
CHECKING_CLEANUP no

settings/UMCU_Genome_somatic.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,3 +328,4 @@ CHECKING_TIME 24:0:0
328328
CHECKING_THREADS 1
329329
CHECKING_MEM 10
330330
CHECKING_RM tmp,*.pileup.gz,*.pileup.gz.tbi,*.recalibrated.bam,*.recalibrated.bai,*.recalibrated.bam.bai
331+
CHECKING_CLEANUP no

settings/UMCU_MIPS.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,4 @@ CHECKING_TIME 2:0:0
166166
CHECKING_THREADS 1
167167
CHECKING_MEM 10
168168
CHECKING_RM tmp
169+
CHECKING_CLEANUP no

0 commit comments

Comments
 (0)