Skip to content

Commit 7b039bd

Browse files
committed
Merge branch 'release-v2.3.0'
2 parents 6aaf4bf + daa8ea6 commit 7b039bd

17 files changed

+615
-176
lines changed

QScripts/HaplotypeCaller.scala

Lines changed: 167 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@ package org.broadinstitute.gatk.queue.qscripts
33
import org.broadinstitute.gatk.queue.QScript
44
import org.broadinstitute.gatk.queue.extensions.gatk._
55

6-
class VariantCaller extends QScript {
7-
// Create an alias 'qscript' to be able to access variables in the VariantCaller.
8-
// 'qscript' is now the same as 'VariantCaller.this'
9-
qscript =>
6+
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode
107

8+
class VariantCaller extends QScript {
119
// Required arguments. All initialized to empty values.
1210
@Input(doc="The reference file for the bam files.", shortName="R", required=true)
1311
var referenceFile: File = _
@@ -16,7 +14,7 @@ class VariantCaller extends QScript {
1614
var bamFiles: List[File] = Nil
1715

1816
@Input(doc="Output core filename.", shortName="O", required=true)
19-
var out: File = _
17+
var outputFilename: File = _
2018

2119
@Argument(doc="Maxmem.", shortName="mem", required=true)
2220
var maxMem: Int = _
@@ -46,33 +44,175 @@ class VariantCaller extends QScript {
4644
@Argument(doc="Ploidy (number of chromosomes) per sample", shortName="ploidy", required=false)
4745
var samplePloidy: Int = 2
4846

47+
@Argument(doc="", shortName="gvcf", required=false)
48+
var gvcf: Boolean = false
49+
50+
@Argument(doc="", shortName="sexAware", required=false)
51+
var sexAware: Boolean = false
52+
53+
@Argument(doc="", shortName="sex", required=false)
54+
var sexes: List[String] = Nil
55+
4956
def script() {
50-
val haplotypeCaller = new HaplotypeCaller
57+
// Define common settings for original HC, gvcf HC and sex aware gvcf HC.
58+
trait HC_Arguments extends HaplotypeCaller {
59+
this.reference_sequence = referenceFile
60+
61+
this.scatterCount = numScatters
62+
this.memoryLimit = maxMem
63+
this.num_cpu_threads_per_data_thread = numCPUThreads
5164

52-
// All required input
53-
haplotypeCaller.input_file = bamFiles
54-
haplotypeCaller.reference_sequence = referenceFile
55-
haplotypeCaller.out = qscript.out + ".raw_variants.vcf"
65+
this.stand_emit_conf = standEmitConf
66+
this.stand_call_conf = standCallConf
67+
}
5668

57-
haplotypeCaller.scatterCount = numScatters
58-
haplotypeCaller.memoryLimit = maxMem
59-
haplotypeCaller.num_cpu_threads_per_data_thread = numCPUThreads
69+
// original HC
70+
if (gvcf == false && sexAware == false) {
71+
val haplotypeCaller = new HaplotypeCaller with HC_Arguments
6072

61-
haplotypeCaller.stand_emit_conf = standEmitConf
62-
haplotypeCaller.stand_call_conf = standCallConf
73+
// All required input
74+
haplotypeCaller.input_file = bamFiles
75+
haplotypeCaller.out = outputFilename + ".raw_variants.vcf"
6376

64-
// Optional input
65-
if (dbsnpFile != null) {
66-
haplotypeCaller.D = dbsnpFile
77+
// Optional input
78+
if (dbsnpFile != null) {
79+
haplotypeCaller.D = dbsnpFile
80+
}
81+
82+
if (targetFile != null) {
83+
haplotypeCaller.L :+= targetFile
84+
haplotypeCaller.ip = intervalPadding
85+
}
86+
87+
haplotypeCaller.sample_ploidy = samplePloidy
88+
89+
//add function to queue
90+
add(haplotypeCaller)
6791
}
68-
if (targetFile != null) {
69-
haplotypeCaller.L :+= targetFile
70-
haplotypeCaller.ip = intervalPadding
92+
93+
// GVCF HC
94+
else if (gvcf == true && sexAware == false) {
95+
var gvcfFiles : List[File] = Nil
96+
97+
// Make gvcf per bam file
98+
for (bamFile <- bamFiles) {
99+
val haplotypeCaller = new HaplotypeCaller with HC_Arguments
100+
101+
// All required input
102+
haplotypeCaller.input_file :+= bamFile
103+
haplotypeCaller.out = swapExt(bamFile, "bam", "g.vcf.gz")
104+
105+
// gVCF settings
106+
haplotypeCaller.emitRefConfidence = ReferenceConfidenceMode.GVCF
107+
108+
// Optional input
109+
if (targetFile != null) {
110+
haplotypeCaller.L :+= targetFile
111+
haplotypeCaller.ip = intervalPadding
112+
}
113+
114+
haplotypeCaller.sample_ploidy = samplePloidy
115+
116+
//add function to queue
117+
gvcfFiles :+= haplotypeCaller.out
118+
add(haplotypeCaller)
119+
}
120+
121+
//Joint genotyping
122+
val genotypeGVCFs = new GenotypeGVCFs
123+
genotypeGVCFs.V = gvcfFiles
124+
genotypeGVCFs.reference_sequence = referenceFile
125+
genotypeGVCFs.scatterCount = numScatters
126+
genotypeGVCFs.num_threads = numCPUThreads
127+
genotypeGVCFs.out = outputFilename + ".raw_variants.vcf"
128+
129+
// Optional input
130+
if (dbsnpFile != null) {
131+
genotypeGVCFs.D = dbsnpFile
132+
}
133+
134+
if (targetFile != null) {
135+
genotypeGVCFs.L :+= targetFile
136+
genotypeGVCFs.ip = intervalPadding
137+
}
138+
// Add function to queue
139+
add(genotypeGVCFs)
71140
}
72-
73-
haplotypeCaller.sample_ploidy = samplePloidy
74-
75-
//add function to queue
76-
add(haplotypeCaller)
141+
142+
// GVCF HC Sexaware HUMAN ONLY
143+
else if (gvcf == true && sexAware == true) {
144+
var gvcfFiles : List[File] = Nil
145+
146+
// Make gvcf per bam file
147+
for ((bamFile, sex) <- (bamFiles,sexes).zipped){
148+
149+
// Set common settings
150+
trait HC_Arguments extends HaplotypeCaller {
151+
this.reference_sequence = referenceFile
152+
153+
this.scatterCount = numScatters
154+
this.memoryLimit = maxMem
155+
this.num_cpu_threads_per_data_thread = numCPUThreads
156+
157+
this.stand_emit_conf = standEmitConf
158+
this.stand_call_conf = standCallConf
159+
160+
this.input_file :+= bamFile
161+
162+
this.emitRefConfidence = ReferenceConfidenceMode.GVCF
163+
}
164+
165+
val haplotypeCallerAutosome = new HaplotypeCaller with HC_Arguments
166+
val haplotypeCallerAllosome = new HaplotypeCaller with HC_Arguments
167+
168+
// HUMAN AUTOSOME
169+
haplotypeCallerAutosome.sample_ploidy = 2
170+
haplotypeCallerAutosome.intervalsString = List("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","MT")
171+
haplotypeCallerAutosome.out = swapExt(bamFile, "bam", "Autosome.g.vcf.gz")
172+
173+
// HUMAN ALLOSOME
174+
haplotypeCallerAllosome.out = swapExt(bamFile, "bam", "Allosome.g.vcf.gz")
175+
// Sex aware
176+
if ( sex == "male" ){
177+
haplotypeCallerAllosome.intervalsString = List("X","Y")
178+
haplotypeCallerAllosome.sample_ploidy = 1
179+
} else if ( sex == "female" ){
180+
haplotypeCallerAllosome.intervalsString = List("X")
181+
haplotypeCallerAllosome.sample_ploidy = 2
182+
}
183+
184+
//add functions to queue
185+
add(haplotypeCallerAutosome,haplotypeCallerAllosome)
186+
187+
// Combine gvcfs -> probably can use CatVariants
188+
val combineGVCF = new CatVariants
189+
combineGVCF.reference = referenceFile
190+
combineGVCF.V :+= haplotypeCallerAutosome.out
191+
combineGVCF.V :+= haplotypeCallerAllosome.out
192+
combineGVCF.out = swapExt(bamFile, "bam", "g.vcf.gz")
193+
combineGVCF.assumeSorted = true
194+
gvcfFiles :+= combineGVCF.out
195+
196+
//add function to queue
197+
add(combineGVCF)
198+
gvcfFiles :+= combineGVCF.out
199+
}
200+
201+
//Joint genotyping
202+
val genotypeGVCFs = new GenotypeGVCFs
203+
genotypeGVCFs.V = gvcfFiles
204+
genotypeGVCFs.reference_sequence = referenceFile
205+
genotypeGVCFs.scatterCount = numScatters
206+
genotypeGVCFs.num_threads = numCPUThreads
207+
genotypeGVCFs.out = outputFilename + ".raw_variants.vcf"
208+
209+
// Optional input
210+
if (dbsnpFile != null) {
211+
genotypeGVCFs.D = dbsnpFile
212+
}
213+
214+
// Add function to queue
215+
add(genotypeGVCFs)
216+
}
77217
}
78-
}
218+
}

QScripts/HaplotypeCaller_gVCF.scala

Lines changed: 0 additions & 111 deletions
This file was deleted.

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,8 @@ CALLING_THREADS number_of_threads
216216
CALLING_MEM maximum_memory
217217
CALLING_SCATTER number_of_scatters
218218
CALLING_SCALA QScripts/HaplotypeCaller.scala
219-
CALLING_GVCF no/yes | Set to yes if gvcf scala is used.
219+
CALLING_GVCF no/yes
220+
CALLING_SEXAWARE no/yes | Enable sex aware calling, only in combination with gvcf mode and human data
220221
CALLING_DBSNP GATK_bundle/dbsnp_137.b37.vcf | common snp file supplied by gatk
221222
CALLING_STANDCALLCONF 30 | The minimum phred-scaled confidence threshold at which variants should be called. Gatk default = 30
222223
CALLING_STANDEMITCONF 15 | The minimum phred-scaled confidence threshold at which variants should be emitted (and filtered with LowQual if less than the calling threshold) Gatk default = 15
@@ -371,7 +372,6 @@ BAF_THREADS number_of_threads
371372
BAF_MEM maximum_memory
372373
BIOVCF_PATH /path/to/biovcf/bin
373374
BAF_SNPS /path/to/CytoScanHD/CytoScanHD_hg19_SNPs_sorted.bed
374-
BAF_PLOTSCRIPT /path/to/IAP/scripts/makeBAFplot.R
375375
376376
#### CALLABLE LOCI CLUSTER CONFIGURATION####
377377
CALLABLE_LOCI_QUEUE queue_name

illumina_baf.pm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ sub runBAF {
125125
if (-e "$log_dir/BAF_PLOT_$sample.done"){
126126
print "WARNING: $log_dir/BAF_PLOT._$sample done exists, skipping BAF plot for $sample \n";
127127
} else {
128-
$command = "Rscript $opt{BAF_PLOTSCRIPT} $tmp_dir $output_dir/$output_baf ";
128+
$command = "Rscript $opt{IAP_PATH}/scripts/makeBAFplot.R $tmp_dir $output_dir/$output_baf ";
129129
print BAF_SH "echo \"Start BAF plotting\t\" `date` \"\t\" `uname -n` >> $log_dir/BAF_$sample.log\n";
130130
print BAF_SH "if [ -s $output_dir/$output_baf -a -e $log_dir/BAF_FILE_$sample.done ]\n";
131131
print BAF_SH "then\n";

illumina_baseRecal.pm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ sub runBaseRecalibration {
4747
my $jobNative = &jobNative(\%opt,"BASERECALIBRATION");
4848
$command .= "-jobQueue $opt{BASERECALIBRATION_QUEUE} -jobNative \"$jobNative\" -jobRunner GridEngine -jobReport $opt{OUTPUT_DIR}/$sample/logs/BaseRecalibration.jobReport.txt "; #Queue options
4949
# baseRecalibration options
50-
$command .= "-S $opt{BASERECALIBRATION_SCALA} -R $opt{GENOME} -I $opt{OUTPUT_DIR}/$sample/mapping/$inBam -mem $opt{BASERECALIBRATION_MEM} -nct $opt{BASERECALIBRATION_THREADS} -nsc $opt{BASERECALIBRATION_SCATTER} ";
50+
$command .= "-S $opt{IAP_PATH}/$opt{BASERECALIBRATION_SCALA} -R $opt{GENOME} -I $opt{OUTPUT_DIR}/$sample/mapping/$inBam -mem $opt{BASERECALIBRATION_MEM} -nct $opt{BASERECALIBRATION_THREADS} -nsc $opt{BASERECALIBRATION_SCATTER} ";
5151

5252
### Parsing known files and add them to $command.
5353
my @knownFiles;

0 commit comments

Comments
 (0)