Skip to content
This repository was archived by the owner on Aug 21, 2018. It is now read-only.

Commit 58b87c5

Browse files
authored
Merge pull request #115 from ewels/master
Strandedness - fixes and new additions
2 parents c3d8526 + d9a822d commit 58b87c5

File tree

3 files changed

+47
-43
lines changed

3 files changed

+47
-43
lines changed

conf/uppmax.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ process {
8686
}
8787
$featureCounts {
8888
module = ['bioinfo-tools', 'subread/1.5.1']
89-
}
89+
}
9090
$stringtieFPKM {
9191
module = ['bioinfo-tools', 'StringTie/1.3.3']
9292
}
@@ -108,7 +108,7 @@ params {
108108
clusterOptions = false
109109
rlocation = "$HOME/R/nxtflow_libs/"
110110
saveReference = true
111-
reverse_stranded=true
111+
reverse_stranded = true
112112
// illumina iGenomes reference file paths on UPPMAX
113113
genomes {
114114
'GRCh37' {

docs/usage.md

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,29 @@ all files as single end. The file path should be in quotation marks to prevent s
3030

3131
If left unspecified, the pipeline will assume that the data is in a directory called `data` in the working directory.
3232

33+
### Library strandedness
34+
Three command line flags / config parameters set the library strandedness for a run:
35+
36+
* `--forward_stranded`
37+
* `--reverse_stranded`
38+
* `--unstranded`
39+
40+
If not set, the pipeline will be run as unstranded. The UPPMAX configuration file sets `reverse_stranded` to true by default.
41+
Use `--unstranded` or `--forward_stranded` to overwrite this.
42+
43+
These flags affect the commands used for several steps in the pipeline - namely HISAT2, featureCounts, RSeQC (`RPKM_saturation.py`)
44+
and StringTie:
45+
* `--forward_stranded`
46+
* HISAT2: `--rna-strandness F` / `--rna-strandness FR`
47+
* featureCounts: `-s 1`
48+
* RSeQC: `-d ++,--` / `-d 1++,1--,2+-,2-+`
49+
* StringTie: `--fr`
50+
* `--reverse_stranded`
51+
* HISAT2: `--rna-strandness R` / `--rna-strandness RF`
52+
* featureCounts: `-s 2`
53+
* RSeQC: `-d +-,-+` / `-d 1+-,1-+,2++,2--`
54+
* StringTie: `--rf`
55+
3356
## Alignment tool
3457
By default, the pipeline uses [STAR](https://github.com/alexdobin/STAR) to align the raw FastQ reads
3558
to the reference genome. STAR is fast and common, but requires a great deal of RAM to run, typically
@@ -123,16 +146,6 @@ for common RNA-seq library preparation kits.
123146
| `--pico` | SMARTer Stranded Total RNA-Seq Kit - Pico Input | 3 | 0 | 0 | 3 |
124147

125148

126-
### strand direction for StringTie and feturecounts
127-
The strandedness of the library can be set py using the `--forward_stranded` and `--reverse_stranded` flags. Both are set as
128-
`false` by default but reversed is set to true in the Uppmax config file. If you library instead is forward oriented simply specify the`--forward_stranded` flag.
129-
130-
e.g.
131-
```groovy
132-
`--forward_stranded`
133-
```
134-
135-
136149
## Job Resources
137150
### Automatic resubmission
138151
Each step in the pipeline has a default set of requirements for number of CPUs,
@@ -170,24 +183,6 @@ The output directory where the results will be saved.
170183
### `--sampleLevel`
171184
Used to turn of the edgeR MDS and heatmap. Set automatically when running on fewer than 3 samples.
172185

173-
### `--strandRule`
174-
Some RSeQC jobs need to know the stranded nature of the library. By default, the pipeline will use
175-
`++,--` for single end libraries and `1+-,1-+,2++,2--` for paired end libraries. These codes are for
176-
strand specific libraries (antisense). `1+-,1-+,2++,2--` decodes as:
177-
178-
* Reads 1 mapped to `+` => parental gene on `+`
179-
* Reads 1 mapped to `-` => parental gene on `-`
180-
* Reads 2 mapped to `+` => parental gene on `-`
181-
* Reads 2 mapped to `-` => parental gene on `+`
182-
183-
Use this parameter to override these defaults. For example, if your data is paired end and strand specific,
184-
but same-sense to the reference, you could run:
185-
186-
```bash
187-
nextflow run NGI-RNAseq/main.nf --strandRule '1++,1--,2+-,2-+'
188-
```
189-
Use `--strandRule 'none'` if your data is not strand specific.
190-
191186
### `--rlocation`
192187
Some steps in the pipeline run R with required modules. By default, the pipeline will install
193188
these modules to `~/R/nxtflow_libs/` if not present. You can specify what path to use with this

main.nf

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ vim: syntax=groovy
1111
#### Authors
1212
Phil Ewels @ewels <phil.ewels@scilifelab.se>
1313
Rickard Hammarén @Hammarn <rickard.hammaren@scilifelab.se>
14-
Docker and AWS integration by
15-
Denis Moreno @Galithil <denis.moreno@scilifelab.se>
14+
Docker and AWS integration by
15+
Denis Moreno @Galithil <denis.moreno@scilifelab.se>
1616
----------------------------------------------------------------------------------------
1717
*/
1818

@@ -29,6 +29,7 @@ params.project = false
2929
params.genome = false
3030
params.forward_stranded = false
3131
params.reverse_stranded = false
32+
params.unstranded = false
3233
params.star_index = params.genome ? params.genomes[ params.genome ].star ?: false : false
3334
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
3435
params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false
@@ -53,7 +54,6 @@ if (params.rlocation){
5354

5455
def single
5556
params.sampleLevel = false
56-
params.strandRule = false
5757

5858
// Custom trimming options
5959
params.clip_r1 = 0
@@ -140,6 +140,7 @@ if(params.aligner == 'star'){
140140
if(params.gtf) log.info "GTF Annotation : ${params.gtf}"
141141
else if(params.download_gtf) log.info "GTF URL : ${params.download_gtf}"
142142
if(params.bed12) log.info "BED Annotation : ${params.bed12}"
143+
log.info "Strandedness : ${params.unstranded ? 'None' : params.forward_stranded ? 'Forward' : params.reverse_stranded ? 'Reverse' : 'None'}"
143144
log.info "Current home : $HOME"
144145
log.info "Current user : $USER"
145146
log.info "Current path : $PWD"
@@ -489,11 +490,18 @@ if(params.aligner == 'hisat2'){
489490
script:
490491
index_base = hs2_indices[0].toString() - ~/.\d.ht2/
491492
prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/
493+
def rnastrandness = ''
494+
if (params.forward_stranded && !params.unstranded){
495+
rnastrandness = single ? '--rna-strandness F' : '--rna-strandness FR'
496+
} else if (params.reverse_stranded && !params.unstranded){
497+
rnastrandness = single ? '--rna-strandness R' : '--rna-strandness RF'
498+
}
492499
if (single) {
493500
"""
494501
set -o pipefail # Capture exit codes from HISAT2, not samtools
495502
hisat2 -x $index_base \\
496503
-U $reads \\
504+
$rnastrandness \\
497505
--known-splicesite-infile $alignment_splicesites \\
498506
-p ${task.cpus} \\
499507
--met-stderr \\
@@ -506,6 +514,7 @@ if(params.aligner == 'hisat2'){
506514
hisat2 -x $index_base \\
507515
-1 ${reads[0]} \\
508516
-2 ${reads[1]} \\
517+
$rnastrandness \\
509518
--known-splicesite-infile $alignment_splicesites \\
510519
--no-mixed \\
511520
--no-discordant \\
@@ -582,12 +591,12 @@ process rseqc {
582591

583592
script:
584593
def strandRule = ''
585-
if (params.forward_stranded){
586-
strandRule = params.strandRule ?: (single ? '-d +-,-+' : '-d 1+-,1-+,2++,2–')
587-
} else if (params.reverse_stranded){
588-
strandRule = params.strandRule ?: (single ? '-d ++,--' : '-d 1+-,1-+,2++,2--')
594+
if (params.forward_stranded && !params.unstranded){
595+
strandRule = single ? '-d ++,--' : '-d 1++,1--,2+-,2-+'
596+
} else if (params.reverse_stranded && !params.unstranded){
597+
strandRule = single ? '-d +-,-+' : '-d 1+-,1-+,2++,2--'
589598
}
590-
"""
599+
"""
591600
samtools index $bam_rseqc
592601
infer_experiment.py -i $bam_rseqc -r $bed12 > ${bam_rseqc.baseName}.infer_experiment.txt
593602
RPKM_saturation.py -i $bam_rseqc -r $bed12 $strandRule -o ${bam_rseqc.baseName}.RPKM_saturation
@@ -718,10 +727,10 @@ process featureCounts {
718727

719728
script:
720729
def featureCounts_direction = 0
721-
if (params.reverse_stranded){
722-
featureCounts_direction = 2
723-
} else if (params.forward_stranded) {
730+
if (params.forward_stranded && !params.unstranded) {
724731
featureCounts_direction = 1
732+
} else if (params.reverse_stranded && !params.unstranded){
733+
featureCounts_direction = 2
725734
}
726735
"""
727736
featureCounts -a $gtf -g gene_id -o ${bam_featurecounts.baseName}_gene.featureCounts.txt -p -s $featureCounts_direction $bam_featurecounts
@@ -775,9 +784,9 @@ process stringtieFPKM {
775784

776785
script:
777786
def StringTie_direction = ''
778-
if (params.forward_stranded){
787+
if (params.forward_stranded && !params.unstranded){
779788
StringTie_direction = "--fr"
780-
} else if (!params.reverse_stranded){
789+
} else if (!params.reverse_stranded && !params.unstranded){
781790
StringTie_direction = "--rf"
782791
}
783792
"""

0 commit comments

Comments
 (0)