Merge pull request #117 from ewels/master

Hammarn · web-flow · commit adc816b0ea6c · 2017-04-11T14:08:37.000+02:00
Don't save intermediate files by default
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,33 @@
 # NGI-RNAseq
 
+## [1.0.2](https://github.com/SciLifeLab/NGI-RNAseq/releases/tag/1.0.2) - 2017-04-11
+A couple of tweaks to help the pipeline in production:
+
+* Trimming FastQ files and intermediate BAM files now not saved by default
+  * This is configurable in the config or with `--saveTrimmed` / `--saveAlignedIntermediates`
+* featureCounts merge process uses `.collect()` for better consistency
+
+## [1.0.1](https://github.com/SciLifeLab/NGI-RNAseq/releases/tag/1.0.1) - 2017-04-10
+This release includes a bugfix for the last major release relating to the strandedness of `RSEQC`.
+
+* Single end reverse is now correctly `+-,-+.`
+* Single end forward is now correctly `++, --`
+* PE forward is now correctly `-1++,1--,2+-,2-+`
+
 ## [1.0](https://github.com/SciLifeLab/NGI-RNAseq/releases/tag/1.0) - 2017-04-05
-* Initial production release 
+The pipeline has now been validated for use in our production work.
+This version includes some new features:
+
+* The output from featureCounts is now merged into a single table and supplied along side the individual reports.
+* markDuplicates JVM memory is now automatically scaled based on the process memory
+* an `html` file with results documentation is now generated and supplied amongst the results
+* It's now possible to configure the pipeline for different stranded libraries with just a simple CL flag.
+* Additional support and documentation for other platforms than Uppmax. Inluding C3SE.
+* + Numerous minor tweaks and improvements.
+
+## [0.3](https://github.com/SciLifeLab/NGI-RNAseq/releases/tag/0.3) - 2016-12-13
+In order to properly validate this pipeline and take it into production we need to tag a stable release.
+I've tagged specific software versions in the `uppmax.config` file.
+
+## [0.2](https://github.com/SciLifeLab/NGI-RNAseq/releases/tag/0.2) - 2016-10-14
+First (semi-) stable release of the new NGI-RNAseq pipeline, as we head towards deployment in production.
diff --git a/docs/usage.md b/docs/usage.md
@@ -123,6 +123,15 @@ and BED12 files will then be generated from these downloaded files.
 Supply this parameter to save any generated reference genome files to your results folder.
 These can then be used for future pipeline runs, reducing processing times.
 
+### `--saveTrimmed`
+By default, trimmed FastQ files will not be saved to the results directory. Specify this
+flag (or set to true in your config file) to copy these files when complete.
+
+### `--saveAlignedIntermediates`
+As above, by default intermediate BAM files will not be saved. The final BAM files created
+after the Picard MarkDuplicates step are always saved. Set to true to also copy out BAM
+files from STAR / HISAT2 and sorting steps.
+
 ## Adapter Trimming
 If specific additional trimming is required (for example, from additional tags),
 you can use any of the following command line parameters. These affect the command
diff --git a/main.nf b/main.nf
@@ -22,7 +22,7 @@ vim: syntax=groovy
  */
 
 // Pipeline version
-version = 1.0
+version = '1.0.2'
 
 // Configurable variables
 params.project = false
@@ -41,6 +41,8 @@ params.download_fasta = false
 params.download_gtf = false
 params.hisatBuildMemory = 200 // Required amount of memory in GB to build HISAT2 index with splice sites
 params.saveReference = false
+params.saveTrimmed = false
+params.saveAlignedIntermediates = false
 params.reads = "data/*{1,2}.fastq.gz"
 params.outdir = './results'
 
@@ -148,6 +150,9 @@ log.info "R libraries    : ${params.rlocation}"
 log.info "Script dir     : $baseDir"
 log.info "Working dir    : $workDir"
 log.info "Output dir     : ${params.outdir}"
+log.info "Save Reference : ${params.saveReference}"
+log.info "Save Trimmed   : ${params.saveTrimmed}"
+log.info "Save Intermeds : ${params.saveAlignedIntermediates}"
 if( params.pico       ) log.info "Trim Profile   : SMARTer Stranded Total RNA-Seq Kit - Pico Input"
 if( params.clip_r1 > 0) log.info "Trim R1        : ${params.clip_r1}"
 if( params.clip_r2 > 0) log.info "Trim R2        : ${params.clip_r2}"
@@ -380,7 +385,7 @@ process trim_galore {
         saveAs: {filename ->
             if (filename.indexOf("_fastqc") > 0) "FastQC/$filename"
             else if (filename.indexOf("trimming_report.txt") > 0) "logs/$filename"
-            else "$filename"
+            else params.saveTrimmed ? filename : null
         }
 
     input:
@@ -434,7 +439,10 @@ if(params.aligner == 'star'){
     process star {
         tag "$prefix"
         publishDir "${params.outdir}/STAR", mode: 'copy',
-            saveAs: {filename -> filename.indexOf(".out") > 0 ? "logs/$filename" : "$filename"}
+            saveAs: {filename ->
+                if (filename.indexOf(".out") > 0) "logs/$filename"
+                else params.saveAlignedIntermediates ? filename : null
+            }
 
         input:
         file reads from trimmed_reads
@@ -476,7 +484,10 @@ if(params.aligner == 'hisat2'){
     process hisat2Align {
         tag "$prefix"
         publishDir "${params.outdir}/HISAT2", mode: 'copy',
-            saveAs: {filename -> filename.indexOf("_log.txt") > 0 ? "logs/$filename" : "aligned/$filename"}
+            saveAs: {filename ->
+                if (filename.indexOf("_log.txt") > 0) "logs/$filename"
+                else params.saveAlignedIntermediates ? filename : null
+            }
 
         input:
         file reads from trimmed_reads
@@ -528,7 +539,8 @@ if(params.aligner == 'hisat2'){
 
     process hisat2_sortOutput {
         tag "${hisat2_bam.baseName}"
-        publishDir "${params.outdir}/HISAT2/aligned_sorted", mode: 'copy'
+        publishDir "${params.outdir}/HISAT2", mode: 'copy',
+            saveAs: {filename -> params.saveAlignedIntermediates ? "aligned_sorted/$filename" : null }
 
         input:
         file hisat2_bam
@@ -748,7 +760,7 @@ process merge_featureCounts {
     publishDir "${params.outdir}/featureCounts", mode: 'copy'
 
     input:
-    file input_files from featureCounts_to_merge.toList()
+    file input_files from featureCounts_to_merge.collect()
 
     output:
     file 'merged_gene_counts.txt'