@@ -21,8 +21,10 @@ class MAFToGVCFTest : StringSpec({
21
21
val diploidRefFile = " ${testingDir} /CML103DiploidTest.fa"
22
22
val mafFile = " ${testingDir} /B97.maf"
23
23
val mafFileInverted = " ${testingDir} /B97_inverted.maf"
24
+ val mafFileInvertedLegacy = " ${testingDir} /B97_inverted_Legacy.maf"
24
25
val diploidMafFile = " ${testingDir} /B97diploid.maf"
25
26
val mafFileNs = " ${testingDir} /mafWithNs.maf"
27
+ val mafFileNsLegacy = " ${testingDir} /mafWithNs_Legacy.maf"
26
28
27
29
val truthGVCFFile = " ${testingDir} /B97_truth.gvcf"
28
30
val truthGVCFFileInverted = " ${testingDir} /B97_truth_inverted.gvcf"
@@ -84,6 +86,7 @@ class MAFToGVCFTest : StringSpec({
84
86
createTruthGVCFFile(truthGVCFFile)
85
87
86
88
createInvertedMAFFile(mafFileInverted)
89
+ createInvertedMAFFileLegacy(mafFileInvertedLegacy)
87
90
createTruthInversionGVCFFile(truthGVCFFileInverted)
88
91
89
92
// Create the known overlapping GVCF truth file for 1st diploid
@@ -99,6 +102,7 @@ class MAFToGVCFTest : StringSpec({
99
102
// Create ref, maf, and truth GVCF for Ns testing
100
103
createRefWithN(refNFile)
101
104
createNsMAFFile(mafFileNs)
105
+ createNsMAFFileLegacy(mafFileNsLegacy)
102
106
createTruthNs(truthGVCFFileNs)
103
107
104
108
@@ -313,8 +317,8 @@ class MAFToGVCFTest : StringSpec({
313
317
}
314
318
}
315
319
316
- " TestInversion " {
317
- MAFToGVCF ().createGVCFfromMAF(mafFileInverted ,refFile,outputFileInverted,sampleName,fillGaps=false,compressAndIndex=false)
320
+ " TestInversionLegacy " {
321
+ MAFToGVCF ().createGVCFfromMAF(mafFileInvertedLegacy ,refFile,outputFileInverted,sampleName,fillGaps=false,compressAndIndex=false, anchorwaveLegacy = true )
318
322
319
323
val truthVariantIterator = VCFFileReader (File (truthGVCFFileInverted),false).iterator()
320
324
val truthVariants = mutableListOf<VariantContext >()
@@ -625,6 +629,51 @@ class MAFToGVCFTest : StringSpec({
625
629
}
626
630
}
627
631
632
+ " testMAFsWithNsLegacy" {
633
+ MAFToGVCF ().createGVCFfromMAF(mafFileNsLegacy,refNFile,outputFileNs,sampleName,fillGaps=false,compressAndIndex=false, anchorwaveLegacy = true)
634
+
635
+ val truthVariantIterator = VCFFileReader (File (truthGVCFFileNs),false).iterator()
636
+ val truthVariants = mutableListOf<VariantContext >()
637
+ while (truthVariantIterator.hasNext()) {
638
+ truthVariants.add(truthVariantIterator.next())
639
+ }
640
+ val truthMap = truthVariants.associateBy { Position (it.contig, it.start) }
641
+
642
+ val outputVariantIterator = VCFFileReader (File (outputFileNs), false).iterator()
643
+ val outputVariants = mutableListOf<VariantContext >()
644
+ while (outputVariantIterator.hasNext()) {
645
+ outputVariants.add(outputVariantIterator.next())
646
+ }
647
+
648
+ // mafBlocks.size shouldBe 4
649
+ outputVariants.size shouldBe truthVariants.size
650
+
651
+ for (variant in outputVariants) {
652
+ if (!truthMap.containsKey(Position (variant.contig, variant.start))) {
653
+ fail("No matching variant found: ${variant.contig}:${variant.start}")
654
+ }
655
+ val matchingTruth = truthMap[Position (variant.contig, variant.start)]!!
656
+
657
+ // Check END
658
+ variant.end shouldBe matchingTruth.end
659
+ // Check alleles
660
+ variant.alleles.toTypedArray() contentEquals matchingTruth.alleles.toTypedArray() shouldBe true
661
+ // Check GT
662
+ (matchingTruth.getGenotype(0).genotypeString == variant.getGenotype(0).genotypeString) shouldBe true
663
+ // Check AD
664
+ (matchingTruth.getGenotype(0).ad contentEquals variant.getGenotype(0).ad) shouldBe true
665
+ // Check ASM Contig
666
+ (matchingTruth.getAttribute("ASM_Chr ") == variant.getAttribute("ASM_Chr ")) shouldBe true
667
+ // Check ASM Start
668
+ (matchingTruth.getAttribute("ASM_Start ") == variant.getAttribute("ASM_Start ")) shouldBe true
669
+ // Check ASM END
670
+ (matchingTruth.getAttribute("ASM_End ") == variant.getAttribute("ASM_End ")) shouldBe true
671
+ // Check ASM Strand
672
+ (matchingTruth.getAttribute("ASM_Strand ") == variant.getAttribute("ASM_Strand ")) shouldBe true
673
+
674
+ }
675
+ }
676
+
628
677
" testMAFsWithNs" {
629
678
MAFToGVCF ().createGVCFfromMAF(mafFileNs,refNFile,outputFileNs,sampleName,fillGaps=false,compressAndIndex=false)
630
679
@@ -661,6 +710,7 @@ class MAFToGVCFTest : StringSpec({
661
710
// Check ASM Contig
662
711
(matchingTruth.getAttribute("ASM_Chr ") == variant.getAttribute("ASM_Chr ")) shouldBe true
663
712
// Check ASM Start
713
+ println("ASM_Start : ${matchingTruth.getAttribute("ASM_Start ")} == ${variant.getAttribute("ASM_Start ")}")
664
714
(matchingTruth.getAttribute("ASM_Start ") == variant.getAttribute("ASM_Start ")) shouldBe true
665
715
// Check ASM END
666
716
(matchingTruth.getAttribute("ASM_End ") == variant.getAttribute("ASM_End ")) shouldBe true
@@ -781,6 +831,30 @@ fun createMAFFileWithEIQlines(outputFile: String) {
781
831
}
782
832
}
783
833
fun createInvertedMAFFile (outputFile : String ) {
834
+ File (outputFile).bufferedWriter().use {output ->
835
+ output.write(" ##maf version=1 scoring=Tba.v8\n\n " )
836
+
837
+ output.write(" a\t score=23262.0\n " )
838
+ output.write(" s\t chr7\t 12\t 38\t +\t 158545518\t AAA-GGGAATGTTAACCAAATGA---ATTGTCTCTTACGGTG\n " )
839
+ output.write(" s\t chr4\t 81344243\t 40\t +\t 187371129\t -AA-GGGGATGCTAAGCCAATGAGTTGTTGTCTCTCAATGTG\n\n " )
840
+
841
+ output.write(" a\t score=5062.0\n " )
842
+ output.write(" s\t chr7\t 450\t 6\t +\t 158545518\t TAAAGAT---GGGT\n " )
843
+ output.write(" s\t chr4\t 81444246\t 6\t +\t 187371129\t TAAGGATCCC---T\n\n " )
844
+
845
+ output.write(" a\t score=6636.0\n " )
846
+ output.write(" s\t chr1\t 0\t 40\t +\t 158545518\t -----GCAGCTGAAAACAGTCAATCTTACACACTTGGGGCCTACT\n " )
847
+ output.write(" s\t chr6\t 97794583\t 45\t - 151104725\t AAAAAGACAGCTGAAAATATCAATCTTACACACTTGGGGCCTACT\n\n " )
848
+
849
+ // we need a chr10 in here to test sorting the maf records
850
+ output.write(" a\t score=6636.0\n " )
851
+ output.write(" s\t chr10\t 0\t 40\t +\t 158545518\t -----GCAGCTGAAAACAGTCAATCTTACACACTTGGGGCCTACT\n " )
852
+ output.write(" s\t chr6\t 53310097\t 45\t + 151104725\t AAAAAGACAGCTGAAAATATCAATCTTACACACTTGGGGCCTACT\n\n " )
853
+
854
+ }
855
+ }
856
+
857
+ fun createInvertedMAFFileLegacy (outputFile : String ) {
784
858
File (outputFile).bufferedWriter().use {output ->
785
859
output.write(" ##maf version=1 scoring=Tba.v8\n\n " )
786
860
@@ -850,28 +924,46 @@ fun createMAFFileWithEIQlines(outputFile: String) {
850
924
}
851
925
}
852
926
853
- /* *
854
- * Function to create a MAF file used for testing. This covers alignments with lots of Ns
855
- *
856
- */
857
- fun createNsMAFFile (outputFile : String ) {
858
- File (outputFile).bufferedWriter().use {output ->
859
- output.write(" ##maf version 1\n\n " )
927
+ /* *
928
+ * Function to create a MAF file used for testing. This covers alignments with lots of Ns
929
+ *
930
+ */
931
+ fun createNsMAFFile (outputFile : String ) {
932
+ File (outputFile).bufferedWriter().use {output ->
933
+ output.write(" ##maf version 1\n\n " )
860
934
861
- output.write(" a\t score=12\n " )
862
- output.write(" s\t Chr01\t 0\t 129\t +\t 129\t TGTCGACTCAGCTC---CACACTCG---ACTCCNCTACGCATCACNCNNNCCTACTCTACACACTCCACCACACA--CTCTCGT----CGTACGTGCG---CGTAGAG--CGA--GATCGACTACCC--ATCAG--GGCTCAGCTG------AGC------TCG\n " )
863
- output.write(" s\t Chr01\t 0\t 156\t +\t 184\t TGTCNNNTCACNNNGTACTCCACACGAANNNTCNCTNCGCATCACNCNNNNNNACTCTAC--NNNN----ACACAAANNNTCNNATAACGTACGTANNNNNGGTAGAGTTNNNNNGATCG--NNNNNNNATCAANNNNNNGAGCTGTCNNNNAGNNNNNATTCG\n\n " )
935
+ output.write(" a\t score=12\n " )
936
+ output.write(" s\t Chr01\t 0\t 129\t +\t 129\t TGTCGACTCAGCTC---CACACTCG---ACTCCNCTACGCATCACNCNNNCCTACTCTACACACTCCACCACACA--CTCTCGT----CGTACGTGCG---CGTAGAG--CGA--GATCGACTACCC--ATCAG--GGCTCAGCTG------AGC------TCG\n " )
937
+ output.write(" s\t Chr01\t 0\t 156\t +\t 184\t TGTCNNNTCACNNNGTACTCCACACGAANNNTCNCTNCGCATCACNCNNNNNNACTCTAC--NNNN----ACACAAANNNTCNNATAACGTACGTANNNNNGGTAGAGTTNNNNNGATCG--NNNNNNNATCAANNNNNNGAGCTGTCNNNNAGNNNNNATTCG\n\n " )
864
938
865
- output.write(" a\t score=12\n " )
866
- output.write(" s\t Chr02\t 2\t 20\t +\t 113\t NNNGCTAGCTAGCTCA-------GCGC\n " )
867
- output.write(" s\t Chr02\t 10\t 25\t +\t 160\t NNNNN--GCTAGCTNNNNNNTATGCGC\n\n " )
939
+ output.write(" a\t score=12\n " )
940
+ output.write(" s\t Chr02\t 2\t 20\t +\t 113\t NNNGCTAGCTAGCTCA-------GCGC\n " )
941
+ output.write(" s\t Chr02\t 10\t 25\t +\t 160\t NNNNN--GCTAGCTNNNNNNTATGCGC\n\n " )
868
942
869
- output.write(" a\t score=12\n " )
870
- output.write(" s\t Chr02\t 22\t 66\t +\t 113\t ACACCTGTGTGCAGCTGCTTACGGGGCGCGCCCCATCTCGCGGGGCTCATGCGAACCNNNCGCATG\n " )
871
- output.write(" s\t Chr02\t 100 \t 58\t -\t 160\t ACA--NNNNTGCAAC--NNNNGGNNNNGNNNN--TTCTCGCGGNNNN--NNNNGNCCNNNCNNNNN\n\n " )
943
+ output.write(" a\t score=12\n " )
944
+ output.write(" s\t Chr02\t 22\t 66\t +\t 113\t ACACCTGTGTGCAGCTGCTTACGGGGCGCGCCCCATCTCGCGGGGCTCATGCGAACCNNNCGCATG\n " )
945
+ output.write(" s\t Chr02\t 2 \t 58\t -\t 160\t ACA--NNNNTGCAAC--NNNNGGNNNNGNNNN--TTCTCGCGGNNNN--NNNNGNCCNNNCNNNNN\n\n " )
872
946
947
+ }
948
+ }
949
+ fun createNsMAFFileLegacy (outputFile : String ) {
950
+ File (outputFile).bufferedWriter().use {output ->
951
+ output.write(" ##maf version 1\n\n " )
952
+
953
+ output.write(" a\t score=12\n " )
954
+ output.write(" s\t Chr01\t 0\t 129\t +\t 129\t TGTCGACTCAGCTC---CACACTCG---ACTCCNCTACGCATCACNCNNNCCTACTCTACACACTCCACCACACA--CTCTCGT----CGTACGTGCG---CGTAGAG--CGA--GATCGACTACCC--ATCAG--GGCTCAGCTG------AGC------TCG\n " )
955
+ output.write(" s\t Chr01\t 0\t 156\t +\t 184\t TGTCNNNTCACNNNGTACTCCACACGAANNNTCNCTNCGCATCACNCNNNNNNACTCTAC--NNNN----ACACAAANNNTCNNATAACGTACGTANNNNNGGTAGAGTTNNNNNGATCG--NNNNNNNATCAANNNNNNGAGCTGTCNNNNAGNNNNNATTCG\n\n " )
956
+
957
+ output.write(" a\t score=12\n " )
958
+ output.write(" s\t Chr02\t 2\t 20\t +\t 113\t NNNGCTAGCTAGCTCA-------GCGC\n " )
959
+ output.write(" s\t Chr02\t 10\t 25\t +\t 160\t NNNNN--GCTAGCTNNNNNNTATGCGC\n\n " )
960
+
961
+ output.write(" a\t score=12\n " )
962
+ output.write(" s\t Chr02\t 22\t 66\t +\t 113\t ACACCTGTGTGCAGCTGCTTACGGGGCGCGCCCCATCTCGCGGGGCTCATGCGAACCNNNCGCATG\n " )
963
+ output.write(" s\t Chr02\t 100\t 58\t -\t 160\t ACA--NNNNTGCAAC--NNNNGGNNNNGNNNN--TTCTCGCGGNNNN--NNNNGNCCNNNCNNNNN\n\n " )
964
+
965
+ }
873
966
}
874
- }
875
967
876
968
/* *
877
969
* Simple function to create a simple MAF file used for testing. This covers most of the edge cases we have run into.
0 commit comments