Skip to content

Commit da2ddad

Browse files
committed
Fixing RAM issues with MAFToGVCF. This should reduce RAM requirements by 3x.
1 parent 7f5a36b commit da2ddad

File tree

2 files changed

+32
-10
lines changed

2 files changed

+32
-10
lines changed

src/main/kotlin/biokotlin/genome/MAFToGVCF.kt

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -114,18 +114,19 @@ class MAFToGVCF {
114114
val sampleName = variantsMap.keys.first()
115115

116116
// sort the variants by contig and position.
117-
val variants = variantsMap.values.first().sortedWith(VariantContextComparator(contigList))
117+
val variants = variantsMap.values.first().sortedBy { Position(it.chr,it.startPos) }
118118

119-
exportVariantContext(sampleName, variants, gvcfOutput, refSeqs)
119+
exportVariantContext(sampleName, variants, gvcfOutput, refSeqs, outJustGT, delAsSymbolic, maxDeletionSize)
120120
if (compressAndIndex) {
121121
// compress and index the file with bgzip and tabix.
122122
compressAndIndexFile(gvcfOutput)
123123
}
124124
} else if (variantsMap.size == 2) {
125125
val outputNames = twoOutputFiles(gvcfOutput)
126126
variantsMap.entries.forEachIndexed { index, (name, variants) ->
127-
val sortedVariants = variants.sortedWith(VariantContextComparator(contigList))
128-
val outputFile = exportVariantContext(name, sortedVariants, outputNames[index], refSeqs)
127+
val sortedVariants = variants.sortedBy { Position(it.chr,it.startPos) }
128+
129+
val outputFile = exportVariantContext(name, sortedVariants, outputNames[index], refSeqs, outJustGT, delAsSymbolic, maxDeletionSize)
129130
if (compressAndIndex) {
130131
compressAndIndexFile(outputNames[index])
131132
}
@@ -149,7 +150,7 @@ class MAFToGVCF {
149150
delAsSymbolic: Boolean = false,
150151
maxDeletionSize: Int = 0,
151152
anchorwaveLegacy: Boolean = false
152-
): Map<String, List<VariantContext>> {
153+
): Map<String, List<AssemblyVariantInfo>> {
153154

154155
val mafRecords = loadMAFRecords(mafFile)
155156
return if (twoGvcfs) {
@@ -246,7 +247,7 @@ class MAFToGVCF {
246247
delAsSymbolic: Boolean,
247248
maxDeletionSize: Int,
248249
anchorwaveLegacy: Boolean = false
249-
): List<VariantContext> {
250+
): List<AssemblyVariantInfo> {
250251
var variantInfos = mutableListOf<AssemblyVariantInfo>()
251252

252253
for (record in mafRecords) {
@@ -263,7 +264,7 @@ class MAFToGVCF {
263264
variantInfos = fillInMissingVariantBlocks(variantInfos, refGenomeSequence, true)
264265
}
265266

266-
return createVariantContextsFromInfo(sampleName, variantInfos, outJustGT, delAsSymbolic, maxDeletionSize)
267+
return variantInfos
267268
}
268269

269270
fun removeRefBlocks(variantInfos: MutableList<AssemblyVariantInfo>) : MutableList<AssemblyVariantInfo> {
@@ -1117,9 +1118,12 @@ class MAFToGVCF {
11171118
*/
11181119
fun exportVariantContext(
11191120
sampleName: String,
1120-
variantContexts: List<VariantContext>,
1121+
variantContexts: List<AssemblyVariantInfo>,
11211122
outputFileName: String,
1122-
refGenomeSequence: Map<String, NucSeq>
1123+
refGenomeSequence: Map<String, NucSeq>,
1124+
outputJustGT: Boolean,
1125+
delAsSymbolic: Boolean,
1126+
maxDeletionSize: Int
11231127
) {
11241128
val writer = VariantContextWriterBuilder()
11251129
.unsetOption(Options.INDEX_ON_THE_FLY)
@@ -1132,7 +1136,7 @@ class MAFToGVCF {
11321136
addSequenceDictionary(header, refGenomeSequence)
11331137
writer.writeHeader(header)
11341138
for (variant in variantContexts) {
1135-
writer.add(variant)
1139+
writer.add(convertVariantInfoToContext(sampleName, variant, outputJustGT, delAsSymbolic, maxDeletionSize))
11361140
}
11371141

11381142
writer.close()

src/test/kotlin/biokotlin/genome/MAFToGVCFTest.kt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,24 @@ class MAFToGVCFTest : StringSpec({
619619

620620
}
621621
}
622+
623+
"TestSort" {
624+
val chr1 = "1"
625+
val chr2 = "2"
626+
val chr10 = "10"
627+
628+
val asmInfos = listOf(AssemblyVariantInfo(chr1,10,20,"0","A","T",true, intArrayOf(10,20), "chr1", 10, 20, "+"),
629+
AssemblyVariantInfo(chr2,10,20,"0","A","T",true, intArrayOf(10,20), "chr2", 10, 20, "+"),
630+
AssemblyVariantInfo(chr10,10,20,"0","A","T",true, intArrayOf(10,20), "chr10", 10, 20, "+"),
631+
AssemblyVariantInfo(chr2,30,40,"0","A","T",true, intArrayOf(10,20), "chr2", 10, 20, "+"),
632+
AssemblyVariantInfo(chr1,1,9,"0","A","T",true, intArrayOf(10,20), "chr1", 10, 20, "+"),
633+
)
634+
635+
val expectedInfos = listOf(Pair(chr1,1), Pair(chr1,10), Pair(chr2,10), Pair(chr2,30), Pair(chr10,10))
636+
637+
val sorted = asmInfos.sortedBy { Position(it.chr,it.startPos) }
638+
sorted.map { Pair(it.chr,it.startPos) } shouldBe expectedInfos
639+
}
622640

623641
})
624642

0 commit comments

Comments
 (0)