Skip to content

Commit bb0ab0c

Browse files
authored
Update Snakefile
More documentations
1 parent 219a37e commit bb0ab0c

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

Snakefile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ rule pre_QC:
7878
"""
7979

8080
# Run BBMap's Clumpify to remove optical duplicates pre-mapping
81+
## Please adjust the 'dupedist' value according to your sequencing platform in the scripts/clumpify_OpDup.sh file
82+
## (recommendations included within the script).
8183
rule clumpify:
8284
input:
8385
trimmed_1 = rules.pre_QC.output.trimmed_1
@@ -177,6 +179,7 @@ rule addRG:
177179
sh scripts/addRG.sh {input.pass2_bam} {params.meta} scripts/PICARD_addRG.sh
178180
"""
179181

182+
# Filter BAM files based on quality with samtools
180183
rule filterBAM:
181184
input:
182185
withRG = rules.addRG.output.withRG
@@ -194,6 +197,7 @@ rule filterBAM:
194197
samtools view -bq 20 {input.withRG} > {output.filt}
195198
"""
196199

200+
# Sort BAM files, then mark and remove duplicates
197201
rule sortMarkDup:
198202
input:
199203
filt = rules.filterBAM.output.filt
@@ -211,6 +215,7 @@ rule sortMarkDup:
211215
sh scripts/sortMarkDup.sh {input.filt}
212216
"""
213217

218+
# Splits reads that contain Ns in their cigar string
214219
rule splitN:
215220
input:
216221
nodup = rules.sortMarkDup.output.nodup
@@ -231,6 +236,7 @@ rule splitN:
231236
sh scripts/splitncigar.sh {input.nodup} {params.genome_fa}
232237
"""
233238

239+
# Perform base recalibration (get the recalibration table)
234240
rule baseRecalib:
235241
input:
236242
nodup = rules.splitN.output.ncigar
@@ -253,6 +259,7 @@ rule baseRecalib:
253259
sh scripts/base_recalibrator.sh {input.nodup} {params.genome_fa} {params.indel1} {params.indel2}
254260
"""
255261

262+
# Apply base recalibration
256263
rule applybqsr:
257264
input:
258265
nodup = rules.splitN.output.ncigar,
@@ -274,6 +281,7 @@ rule applybqsr:
274281
sh scripts/apply_bqsr.sh {input.nodup} {params.genome_fa} {input.recaltab}
275282
"""
276283

284+
# Call germline SNPs and indels via local re-assembly of haplotypes
277285
rule haploCall:
278286
input:
279287
finalbam = rules.applybqsr.output.finalbam
@@ -294,6 +302,7 @@ rule haploCall:
294302
sh scripts/haploCall.sh {input.finalbam} {params.genome_fa}
295303
"""
296304

305+
# Get list of GVCF files for merging
297306
rule getListGeno:
298307
input:
299308
expand("vars/{name}first.g.vcf.gz", name = SAMPLES["name"])
@@ -306,6 +315,7 @@ rule getListGeno:
306315
echo "{input}" | sed 's/vars/-V vars/g' > {output.gvcf_list}
307316
"""
308317

318+
# Merge GVCFs and call genotype
309319
rule genotype:
310320
input:
311321
gvcf_list = rules.getListGeno.output.gvcf_list
@@ -328,6 +338,7 @@ rule genotype:
328338
sh scripts/genotypegvcfs.sh {input.gvcf_list} {params.genome_fa} {params.interval}
329339
"""
330340

341+
# Initial filtering of variants called
331342
rule filGen:
332343
input:
333344
genotyped = rules.genotype.output.genotyped
@@ -348,6 +359,7 @@ rule filGen:
348359
sh scripts/variantFil.sh {input.genotyped} {params.genome_fa}
349360
"""
350361

362+
# Select variants based on previous filtering
351363
rule selGen:
352364
input:
353365
varFil = rules.filGen.output.varFil
@@ -368,6 +380,7 @@ rule selGen:
368380
sh scripts/variantSel.sh {input.varFil} {params.genome_fa}
369381
"""
370382

383+
# Clean up VCF file
371384
rule plink_clean:
372385
input:
373386
varSel = rules.selGen.output.varSel
@@ -385,6 +398,7 @@ rule plink_clean:
385398
Rscript scripts/clean_data.R VCF/Genotyped_filterOut VCF/Genotyped_filterOut_clean
386399
"""
387400

401+
# Filter based on MAF<0.05
388402
rule qc_plink:
389403
input:
390404
bim_clean = rules.plink_clean.output.bim_clean
@@ -399,6 +413,7 @@ rule qc_plink:
399413
sh scripts/qc_all_basic.sh VCF/Genotyped_filterOut_clean VCF/Genotyped_filterOut_clean_maf005 0.05 0 0.1 0.2
400414
"""
401415

416+
# Compare with HapMap3 reads and take overlaps
402417
rule hapmap3:
403418
input:
404419
bim_maf = rules.qc_plink.output.bim_maf
@@ -423,6 +438,7 @@ rule hapmap3:
423438
plink --bfile ./VCF/Genotyped_filterOut_clean_maf005 --extract {output.hm3_list} --make-bed --out ./VCF/Genotyped_filterOut_clean_maf005_hm3
424439
"""
425440

441+
# Filter based on ld<0.05
426442
rule ld:
427443
input:
428444
hm3_bim = rules.hapmap3.output.hm3_bim
@@ -440,6 +456,7 @@ rule ld:
440456
sh scripts/ld_thinning_pruning.sh VCF/Genotyped_filterOut_clean_maf005_hm3 VCF/Genotyped_filterOut_clean_maf005_hm3_ld005 1000 50 0.05 {params.ld_reg}
441457
"""
442458

459+
# Generate genetic PCs
443460
rule pca:
444461
input:
445462
ld_bim = rules.ld.output.ld_bim

0 commit comments

Comments
 (0)