Skip to content

Commit f03fb9e

Browse files
authored
Merge pull request #85 from CuppenResearch/release-v2.4.0
Release v2.4.0
2 parents 7b039bd + a2c05b7 commit f03fb9e

15 files changed

+547
-78
lines changed

LICENSE

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2016 Cuppen Research
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
22+

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ perl illumina_pipeline.pl /path/to/output_dir/settings.config>
4747
- [IGVtools](https://www.broadinstitute.org/igv/igvtools)
4848
- [Contra](http://contra-cnv.sourceforge.net/)
4949
- [FREEC](http://bioinfo-out.curie.fr/projects/freec/)
50+
- [QDNAseq](https://github.com/ccagc/QDNAseq) (used tag: v1.9.2-HMF.1)
5051
- [Varscan](http://varscan.sourceforge.net/)
5152
- [Strelka](https://sites.google.com/site/strelkasomaticvariantcaller/)
5253
- [Freebayes](https://github.com/ekg/freebayes)
@@ -338,6 +339,14 @@ CNVCHECK_MEM maximum_memory
338339
CNV_MODE sample_control
339340
CNV_TARGETS /path/to/target.bed | Optional, use for targeted data e.g. exome.
340341
342+
## QDNASEQ
343+
CNV_QDNASEQ yes
344+
QDNASEQ_QUEUE queue_name
345+
QDNASEQ_TIME estimated runtime
346+
QDNASEQ_THREADS number of threads
347+
QDNASEQ_MEM maximum memory
348+
QDNASEQ_PATH /hpc/local/CentOS7/cog_bioinf/QDNAseq_v1.9.2-HMF.1
349+
341350
## Contra
342351
CNV_CONTRA yes/no
343352
CONTRA_THREADS number_of_threads
@@ -435,5 +444,6 @@ NIPT_MASTER_THREADS number_of_threads
435444
CHECKING_QUEUE queue_name
436445
CHECKING_TIME estimated runtime
437446
CHECKING_THREADS number_of_threads
447+
CHECKING_RM list,of,files,to,remove
438448
439449
```

illumina_check.pm

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -306,9 +306,11 @@ sub runCheck {
306306
print BASH "else\n";
307307
print BASH "\techo \"The pipeline completed successfully. The md5sum file will be created.\">>$logFile\n";
308308

309-
# Remove all tmp folders and empty logs except .done files if pipeline completed successfully
310-
print BASH "\trm -r $opt{OUTPUT_DIR}/tmp\n";
311-
print BASH "\trm -r $opt{OUTPUT_DIR}/*/tmp\n";
309+
# Remove files/folders based on CHECKING_RM and empty logs except .done files if pipeline completed successfully
310+
my @removeFiles = split(",", $opt{CHECKING_RM});
311+
foreach my $removeFile (@removeFiles){
312+
print BASH "\tfind $opt{OUTPUT_DIR} -name $removeFile -print0 | xargs -0 rm -r -- \n";
313+
}
312314
print BASH "\tfind $opt{OUTPUT_DIR}/logs -size 0 -not -name \"*.done\" -delete\n";
313315
print BASH "\tfind $opt{OUTPUT_DIR}/*/logs -size 0 -not -name \"*.done\" -delete\n";
314316
print BASH "\tfind $opt{OUTPUT_DIR}/somaticVariants/*/logs -size 0 -not -name \"*.done\" -delete\n";
@@ -321,12 +323,6 @@ sub runCheck {
321323
}
322324
}
323325
}
324-
if($opt{SOMATIC_VARIANTS} eq "yes" && $opt{SOMVAR_VARSCAN} eq "yes"){
325-
foreach my $sample (@{$opt{SOMATIC_SAMPLES_UNIQ}}){
326-
print BASH "\trm $opt{OUTPUT_DIR}/$sample/mapping/$sample*.pileup.gz\n";
327-
print BASH "\trm $opt{OUTPUT_DIR}/$sample/mapping/$sample*.pileup.gz.tbi\n";
328-
}
329-
}
330326
# Send email.
331327
print BASH "\tmail -s \"IAP DONE $runName\" \"$opt{MAIL}\" < $logFile\n";
332328

illumina_copyNumber.pm

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,27 @@ sub runCopyNumberTools {
151151
my $freec_job = runFreec($sample, $sample_out_dir, $sample_job_dir, $sample_log_dir, $sample_bam, "", \@running_jobs, \%opt);
152152
if($freec_job){push(@cnv_jobs, $freec_job)};
153153
}
154+
if($opt{CNV_QDNASEQ} eq "yes"){
155+
print "\n###SCHEDULING QDNASEQ####\n";
156+
my $qdnaseq_job = runqDNAseq($sample, $sample_out_dir, $sample_job_dir, $sample_log_dir, $sample_bam, \@running_jobs, \%opt);
157+
if($qdnaseq_job){push(@cnv_jobs, $qdnaseq_job)};
158+
}
159+
160+
154161
## Check copy number analysis
155162
my $job_id = "CHECK_".$sample."_".get_job_id();
156163
my $bash_file = $sample_job_dir."/".$job_id.".sh";
157164

158165
open CHECK_SH, ">$bash_file" or die "cannot open file $bash_file \n";
159166
print CHECK_SH "#!/bin/bash\n\n";
160167
print CHECK_SH "echo \"Start Check\t\" `date` `uname -n` >> $sample_log_dir/check.log\n\n";
161-
print CHECK_SH "if [[ -f $sample_log_dir/freec.done ]]\n";
168+
if($opt{CNV_QDNASEQ} eq "yes" && $opt{CNV_FREEC} eq "yes"){
169+
print CHECK_SH "if [ -f $sample_log_dir/freec.done -a -f $sample_log_dir/qdnaseq.done ]\n";
170+
} elsif ($opt{CNV_QDNASEQ} eq "yes" && $opt{CNV_FREEC} eq "no") {
171+
print CHECK_SH "if [ -f $sample_log_dir/qdnaseq.done ]\n";
172+
} elsif ($opt{CNV_QDNASEQ} eq "no" && $opt{CNV_FREEC} eq "yes") {
173+
print CHECK_SH "if [ -f $sample_log_dir/freec.done ]\n";
174+
}
162175
print CHECK_SH "then\n";
163176
print CHECK_SH "\ttouch $sample_log_dir/$sample.done\n";
164177
print CHECK_SH "fi\n\n";
@@ -177,6 +190,62 @@ sub runCopyNumberTools {
177190
}
178191

179192
### Copy number analysis tools
193+
sub runqDNAseq {
194+
###
195+
# Run qDNAseq
196+
###
197+
my ($sample_name, $out_dir, $job_dir, $log_dir, $sample_bam, $running_jobs, $opt) = (@_);
198+
my @running_jobs = @{$running_jobs};
199+
my %opt = %{$opt};
200+
201+
# Skip qdnseq if done file exists
202+
if (-e "$log_dir/qdnaseq.done"){
203+
print "WARNING: $log_dir/qdnaseq.done exists, skipping \n";
204+
return;
205+
}
206+
207+
## Create qdnaseq output directory
208+
my $qdnaseq_out_dir = "$out_dir/qdnaseq";
209+
if(! -e $qdnaseq_out_dir){
210+
make_path($qdnaseq_out_dir) or die "Couldn't create directory: $qdnaseq_out_dir\n";
211+
}
212+
213+
my $command = "Rscript $opt{IAP_PATH}/scripts/run_QDNAseq.R -qdnaseq_path $opt{QDNASEQ_PATH} ";
214+
$command .= "-s $sample_name ";
215+
$command .= "-b $sample_bam ";
216+
217+
## Create qDNAseq bash script
218+
my $job_id = "QDNASEQ_".$sample_name."_".get_job_id();
219+
my $bash_file = $job_dir."/".$job_id.".sh";
220+
221+
open QDNASEQ_SH, ">$bash_file" or die "cannot open file $bash_file \n";
222+
print QDNASEQ_SH "#!/bin/bash\n\n";
223+
print QDNASEQ_SH "if [ -s $sample_bam ]\n";
224+
print QDNASEQ_SH "then\n";
225+
print QDNASEQ_SH "\techo \"Start QDNASEQ\t\" `date` \"\t $sample_bam\t\" `uname -n` >> $log_dir/qdnaseq.log\n";
226+
print QDNASEQ_SH "\tcd $qdnaseq_out_dir\n";
227+
print QDNASEQ_SH "\t$command\n";
228+
print QDNASEQ_SH "\tif [ -s $qdnaseq_out_dir/$sample_name*.vcf ]\n";
229+
print QDNASEQ_SH "\tthen\n";
230+
print QDNASEQ_SH "\t\ttouch $log_dir/qdnaseq.done\n";
231+
print QDNASEQ_SH "\tfi\n";
232+
print QDNASEQ_SH "\techo \"End QDNASEQ\t\" `date` \"\t $sample_bam\t\" `uname -n` >> $log_dir/qdnaseq.log\n";
233+
print QDNASEQ_SH "else\n";
234+
print QDNASEQ_SH "\techo \"ERROR: Input bam files do not exist.\" >> $log_dir/qdnaseq.log\n";
235+
print QDNASEQ_SH "fi\n";
236+
close QDNASEQ_SH;
237+
238+
## Run job
239+
my $qsub = &qsubTemplate(\%opt,"QDNASEQ");
240+
if ( @running_jobs ){
241+
system "$qsub -o $log_dir -e $log_dir -N $job_id -hold_jid ".join(",",@running_jobs)." $bash_file";
242+
} else {
243+
system "$qsub -o $log_dir -e $log_dir -N $job_id $bash_file";
244+
}
245+
return $job_id;
246+
}
247+
248+
180249
sub runFreec {
181250
###
182251
# Run freec and plot result
@@ -185,7 +254,7 @@ sub runFreec {
185254
my @running_jobs = @{$running_jobs};
186255
my %opt = %{$opt};
187256

188-
## Skip Contra if .done file exist
257+
## Skip Freec if .done file exist
189258
if (-e "$log_dir/freec.done"){
190259
print "WARNING: $log_dir/freec.done exists, skipping \n";
191260
return;

illumina_pipeline.pl

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ sub checkConfig{
594594
if(! $opt{FREEBAYES_TIME}){ print "ERROR: No FREEBAYES_TIME option found in config files.\n"; $checkFailed = 1; }
595595
if(! $opt{FREEBAYES_SETTINGS}){ print "ERROR: No FREEBAYES_SETTINGS option found in config files.\n"; $checkFailed = 1; }
596596
if(! $opt{FREEBAYES_SOMATICFILTER}){ print "ERROR: No FREEBAYES_SOMATICFILTER option found in config files.\n"; $checkFailed = 1; }
597-
if(! $opt{FREEBAYES_GERMLINEFILTER}){ print "ERROR: No FREEBAYES_GERMLINEFILTER option found in config files.\n"; $checkFailed = 1; }
597+
#if(! $opt{FREEBAYES_GERMLINEFILTER}){ print "ERROR: No FREEBAYES_GERMLINEFILTER option found in config files.\n"; $checkFailed = 1; }
598598
}
599599
if(! $opt{SOMVAR_MUTECT}){ print "ERROR: No SOMVAR_MUTECT option found in config files.\n"; $checkFailed = 1; }
600600
if($opt{SOMVAR_MUTECT} eq "yes"){
@@ -656,6 +656,14 @@ sub checkConfig{
656656
if(! $opt{FREEC_WINDOW}){ print "ERROR: No FREEC_WINDOW option found in config files.\n"; $checkFailed = 1; }
657657
if(! $opt{FREEC_TELOCENTROMERIC}){ print "ERROR: No FREEC_TELOCENTROMERIC option found in config files.\n"; $checkFailed = 1; }
658658
}
659+
if(! $opt{CNV_QDNASEQ}){ print "ERROR: No CNV_QDNASEQ in config files.\n"; $checkFailed = 1; }
660+
if($opt{CNV_QDNASEQ} eq "yes"){
661+
if(! $opt{QDNASEQ_PATH}){ print "ERROR: No QDNASEQ_PATH option found in config files.\n"; $checkFailed = 1; }
662+
if(! $opt{QDNASEQ_QUEUE}){ print "ERROR: No QDNASEQ_QUEUE option found in config files.\n"; $checkFailed = 1; }
663+
if(! $opt{QDNASEQ_THREADS}){ print "ERROR: No QDNASEQ_THREADS option found in config files.\n"; $checkFailed = 1; }
664+
if(! $opt{QDNASEQ_MEM}){ print "ERROR: No QDNASEQ_MEM option found in config files.\n"; $checkFailed = 1; }
665+
if(! $opt{QDNASEQ_TIME}){ print "ERROR: No QDNASEQ_TIME option found in config files.\n"; $checkFailed = 1; }
666+
}
659667
}
660668
## SV_CALLING
661669
if($opt{SV_CALLING} eq "yes"){
@@ -793,6 +801,7 @@ sub checkConfig{
793801
if(! $opt{CHECKING_THREADS}){ print "ERROR: No CHECKING_THREADS found in .ini file\n"; $checkFailed = 1; }
794802
if(! $opt{CHECKING_MEM}){ print "ERROR: No CHECKING_MEM found in .ini file\n"; $checkFailed = 1; }
795803
if(! $opt{CHECKING_TIME}){ print "ERROR: No CHECKING_TIME found in .ini file\n"; $checkFailed = 1; }
804+
if(! $opt{CHECKING_RM}){ print "ERROR: No CHECKING_RM found in .ini file\n"; $checkFailed = 1; }
796805
}
797806

798807
if ($checkFailed) {

illumina_somaticVariants.pm

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,13 @@ sub runStrelka {
215215
print STRELKA_SH "\techo \"Start Strelka\t\" `date` \"\t $sample_ref_bam \t $sample_tumor_bam\t\" `uname -n` >> $log_dir/strelka.log\n\n";
216216

217217
# Run Strelka
218-
print STRELKA_SH "\t$opt{STRELKA_PATH}/bin/configureStrelkaWorkflow.pl --tumor $sample_tumor_bam --normal $sample_ref_bam --ref $opt{GENOME} --config $opt{STRELKA_INI} --output-dir $strelka_out_dir\n\n";
218+
if (-e $strelka_out_dir){
219+
print STRELKA_SH "\trm -r $strelka_out_dir \n";
220+
}
221+
print STRELKA_SH "\t$opt{STRELKA_PATH}/bin/configureStrelkaWorkflow.pl --tumor $sample_tumor_bam --normal $sample_ref_bam --ref $opt{GENOME} --config $opt{IAP_PATH}/$opt{STRELKA_INI} --output-dir $strelka_out_dir\n\n";
219222

220223
print STRELKA_SH "\tcd $strelka_out_dir\n";
221-
print STRELKA_SH "\tmake -j 8\n\n";
224+
print STRELKA_SH "\tmake -j $opt{STRELKA_THREADS}\n\n";
222225

223226
# Check strelka completed
224227
print STRELKA_SH "\tif [ -f $strelka_out_dir/task.complete ]\n";
@@ -542,26 +545,13 @@ sub runFreeBayes {
542545
# Uniqify freebayes output
543546
print FREEBAYES_SH "\tuniq $freebayes_out_dir/$sample_tumor_name.vcf > $freebayes_out_dir/$sample_tumor_name.uniq.vcf\n";
544547
print FREEBAYES_SH "\tmv $freebayes_out_dir/$sample_tumor_name.uniq.vcf $freebayes_out_dir/$sample_tumor_name.vcf\n\n";
545-
546-
# get sample ids
547-
print FREEBAYES_SH "\tsample_R=`grep -P \"^#CHROM\" $freebayes_out_dir/$sample_tumor_name.vcf | cut -f 10`\n";
548-
print FREEBAYES_SH "\tsample_T=`grep -P \"^#CHROM\" $freebayes_out_dir/$sample_tumor_name.vcf | cut -f 11`\n\n";
549-
550-
# annotate somatic and germline scores
551-
print FREEBAYES_SH "\t$opt{VCFLIB_PATH}/vcfsamplediff VT \$sample_R \$sample_T $freebayes_out_dir/$sample_tumor_name.vcf > $freebayes_out_dir/$sample_tumor_name\_VTannot.vcf\n";
552-
print FREEBAYES_SH "\tsed -i 's/SSC/FB_SSC/' $freebayes_out_dir/$sample_tumor_name\_VTannot.vcf\n"; # to resolve merge conflicts with varscan vcfs
553-
print FREEBAYES_SH "\tgrep -P \"^#\" $freebayes_out_dir/$sample_tumor_name\_VTannot.vcf > $freebayes_out_dir/$sample_tumor_name\_germline.vcf\n";
554-
print FREEBAYES_SH "\tgrep -P \"^#\" $freebayes_out_dir/$sample_tumor_name\_VTannot.vcf > $freebayes_out_dir/$sample_tumor_name\_somatic.vcf\n";
555-
print FREEBAYES_SH "\tgrep -i \"VT=germline\" $freebayes_out_dir/$sample_tumor_name\_VTannot.vcf >> $freebayes_out_dir/$sample_tumor_name\_germline.vcf\n";
556-
print FREEBAYES_SH "\tgrep -i \"VT=somatic\" $freebayes_out_dir/$sample_tumor_name\_VTannot.vcf >> $freebayes_out_dir/$sample_tumor_name\_somatic.vcf\n";
557-
print FREEBAYES_SH "\trm $freebayes_out_dir/$sample_tumor_name\_VTannot.vcf\n\n";
558-
559-
# Filter
560-
print FREEBAYES_SH "\tcat $freebayes_out_dir/$sample_tumor_name\_somatic.vcf | java -Xmx".$opt{FREEBAYES_MEM}."G -jar $opt{SNPEFF_PATH}/SnpSift.jar filter \"$opt{FREEBAYES_SOMATICFILTER}\" > $freebayes_out_dir/$sample_tumor_name\_somatic_filtered.vcf\n";
561-
print FREEBAYES_SH "\tcat $freebayes_out_dir/$sample_tumor_name\_germline.vcf | java -Xmx".$opt{FREEBAYES_MEM}."G -jar $opt{SNPEFF_PATH}/SnpSift.jar filter \"$opt{FREEBAYES_GERMLINEFILTER}\" > $freebayes_out_dir/$sample_tumor_name\_germline_filtered.vcf\n\n";
548+
549+
# Filter freebayes vcf
550+
print FREEBAYES_SH "python $opt{IAP_PATH}/scripts/filterFreebayes.py -v $freebayes_out_dir/$sample_tumor_name.vcf |";
551+
print FREEBAYES_SH "java -Xmx".$opt{FREEBAYES_MEM}."G -jar $opt{SNPEFF_PATH}/SnpSift.jar filter \"$opt{FREEBAYES_SOMATICFILTER}\" > $freebayes_out_dir/$sample_tumor_name\_somatic_filtered.vcf \n";
562552

563553
#Check freebayes completed
564-
print FREEBAYES_SH "\tif [ -s $freebayes_out_dir/$sample_tumor_name\_somatic_filtered.vcf -a -s $freebayes_out_dir/$sample_tumor_name\_germline_filtered.vcf ]\n";
554+
print FREEBAYES_SH "\tif [ -s $freebayes_out_dir/$sample_tumor_name\_somatic_filtered.vcf ]\n";
565555
print FREEBAYES_SH "\tthen\n";
566556
print FREEBAYES_SH "\t\t$rm_command\n";
567557
print FREEBAYES_SH "\t\ttouch $log_dir/freebayes.done\n\n"; ## Check on complete output!!!

scripts/filterFreebayes.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
filterFreebayes.py
5+
Applies BCBIO developed filters to the freebayes somatic output.
6+
2 main filters applied:
7+
1. LOD Tumor, LOD Normal > 3.5
8+
2. Freq of Tumor / Freq Normal > 2.7
9+
"""
10+
11+
import argparse
12+
import re
13+
14+
TUMOR_PARTS_INDEX = 10
15+
NORMAL_PARTS_INDEX = 9
16+
17+
# Thresholds are like phred scores, so 3.5 = phred35
18+
TUMOR_THRESHOLD = 3.5
19+
NORMAL_THRESHOLD = 3.5
20+
21+
# Code copied from BCBIO Freebayes pipeline.
22+
def customFilterFreebayes(vcf_file):
23+
with open(vcf_file, "r") as f:
24+
stripped_lines = (line.strip("\n") for line in f)
25+
somatic_lines = (line for line in stripped_lines if _check_line(line))
26+
print "\n".join(somatic_lines)
27+
28+
### Filtering
29+
30+
def _check_line(line):
31+
parts = line.split("\t")
32+
return line.startswith("#") or (_check_lods(parts, TUMOR_THRESHOLD, NORMAL_THRESHOLD) and _check_freqs(parts))
33+
34+
def _check_lods(parts, tumor_thresh, normal_thresh):
35+
"""Ensure likelihoods for tumor and normal pass thresholds.
36+
37+
Skipped if no FreeBayes GL annotations available.
38+
"""
39+
try:
40+
gl_index = parts[8].split(":").index("GL")
41+
except ValueError:
42+
return True
43+
try:
44+
tumor_gls = [float(x) for x in parts[TUMOR_PARTS_INDEX].split(":")[gl_index].split(",") if x != "."]
45+
if tumor_gls:
46+
tumor_lod = max(tumor_gls[i] - tumor_gls[0] for i in range(1, len(tumor_gls)))
47+
else:
48+
tumor_lod = -1.0
49+
# No GL information, no tumor call (so fail it)
50+
except IndexError:
51+
tumor_lod = -1.0
52+
try:
53+
normal_gls = [float(x) for x in parts[NORMAL_PARTS_INDEX].split(":")[gl_index].split(",") if x != "."]
54+
if normal_gls:
55+
normal_lod = min(normal_gls[0] - normal_gls[i] for i in range(1, len(normal_gls)))
56+
else:
57+
normal_lod = normal_thresh
58+
# No GL inofmration, no normal call (so pass it)
59+
except IndexError:
60+
normal_lod = normal_thresh
61+
return normal_lod >= normal_thresh and tumor_lod >= tumor_thresh
62+
63+
def _check_freqs(parts):
64+
"""Ensure frequency of tumor to normal passes a reasonable threshold.
65+
66+
Avoids calling low frequency tumors also present at low frequency in normals,
67+
which indicates a contamination or persistent error.
68+
"""
69+
thresh_ratio = 2.7
70+
try: # FreeBayes
71+
ao_index = parts[8].split(":").index("AO")
72+
ro_index = parts[8].split(":").index("RO")
73+
except ValueError:
74+
ao_index, ro_index = None, None
75+
try: # VarDict
76+
af_index = parts[8].split(":").index("AF")
77+
except ValueError:
78+
af_index = None
79+
if af_index is None and ao_index is None:
80+
raise NotImplementedError("Unexpected format annotations: %s" % parts[0])
81+
def _calc_freq(item):
82+
try:
83+
if ao_index is not None and ro_index is not None:
84+
ao = sum([int(x) for x in item.split(":")[ao_index].split(",")])
85+
ro = int(item.split(":")[ro_index])
86+
freq = ao / float(ao + ro)
87+
elif af_index is not None:
88+
freq = float(item.split(":")[af_index])
89+
except (IndexError, ValueError, ZeroDivisionError):
90+
freq = 0.0
91+
return freq
92+
tumor_freq, normal_freq = _calc_freq(parts[TUMOR_PARTS_INDEX]), _calc_freq(parts[NORMAL_PARTS_INDEX])
93+
return normal_freq <= 0.001 or normal_freq <= tumor_freq / thresh_ratio
94+
95+
96+
if __name__ == "__main__":
97+
parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=100, width=200))
98+
99+
required_named = parser.add_argument_group("required named arguments")
100+
required_named.add_argument("-v", "--vcf_file", help="path/to/file.vcf", required=True)
101+
102+
args = parser.parse_args()
103+
customFilterFreebayes(args.vcf_file)

scripts/makeBAFplot.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ f2 <- function(x) sum(unlist(x)[2:length(x)])
1818
f1 <- function(x) sum(unlist(x))
1919

2020
determine_peaks <- function(region) {
21-
# ADD CHECK FOR NUMBER OF ROWS >= 50?
2221
quant <- quantile(region$baf)
2322
if(quant[2]==quant[4]) {
2423
# LOW COMPLEXITY
@@ -35,6 +34,10 @@ determine_peaks <- function(region) {
3534
tp<-turnpoints(ts_y)
3635
peaks <- data.frame(baf=d$x[tp$tppos], dens=d$y[tp$tppos])
3736
df <- data.frame(subset(peaks, dens>=2.0))
37+
if (nrow(df)<2) {
38+
# complex region
39+
df <- data.frame(subset(peaks, dens>=1.0))
40+
}
3841
df$CHROM <- region$CHROM[1]
3942
df$POS <- mean(region$POS)
4043
df$CALL <- NA

0 commit comments

Comments
 (0)