Skip to content

Commit dd77d4e

Browse files
authored
Merge pull request #32 from maize-genetics/fasta-gzip-ext
Fasta gzip ext
2 parents 7ef6fdc + 88ced7c commit dd77d4e

File tree

6 files changed

+36
-14
lines changed

6 files changed

+36
-14
lines changed

src/main/kotlin/biokotlin/genome/GenomicFeatures.kt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@ package biokotlin.genome
33

44
import biokotlin.seq.NucSeqRecord
55
import biokotlin.seqIO.NucSeqIO
6-
import org.jetbrains.kotlinx.dataframe.*
6+
import biokotlin.util.bufferedReader
7+
import org.jetbrains.kotlinx.dataframe.ColumnsContainer
8+
import org.jetbrains.kotlinx.dataframe.DataColumn
9+
import org.jetbrains.kotlinx.dataframe.DataFrame
10+
import org.jetbrains.kotlinx.dataframe.DataRow
711
import org.jetbrains.kotlinx.dataframe.api.*
8-
import java.io.File
912

1013
/**
1114
* The GenomicFeatures class processes data from a GFF formatted file.
@@ -97,7 +100,7 @@ class GenomicFeatures(val gffFile:String, val refFasta:String? = null) {
97100

98101
var totalCount = 0
99102
var batchCount = 0
100-
val gffLines = File(gffFile).bufferedReader().readLines()
103+
val gffLines = bufferedReader(gffFile).readLines()
101104
println("readGffToLists: number of file lines read: ${gffLines.size}")
102105
for (line in gffLines) {
103106
totalCount++

src/main/kotlin/biokotlin/genome/MAFProcessingUtils.kt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import com.google.common.collect.RangeMap
66
import com.google.common.collect.Sets
77
import com.google.common.collect.TreeRangeMap
88
import io.github.oshai.kotlinlogging.KotlinLogging
9-
import io.github.oshai.kotlinlogging.KotlinLogging.logger
109
import org.jetbrains.kotlinx.dataframe.DataFrame
1110
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
1211
import java.io.BufferedReader
@@ -87,8 +86,8 @@ fun mergeWiggleFiles(file1:String, file2:String, contig:String, outputFile:Stri
8786
// Take 2 wiggle files - must be the same length. Merge the values from the 2
8887
// into a new file.
8988

90-
val file1Lines = File(file1).bufferedReader().readLines()
91-
val file2Lines = File(file2).bufferedReader().readLines()
89+
val file1Lines = bufferedReader(file1).readLines()
90+
val file2Lines = bufferedReader(file2).readLines()
9291

9392
check(file1Lines.size == file2Lines.size) {"mergeWiggleFiles: ERROR, ${file1} size ${file1Lines.size} does not match ${file2} size ${file2Lines.size}"}
9493

src/main/kotlin/biokotlin/genome/Ranges.kt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,13 @@ import biokotlin.seq.NucSeq
99
import biokotlin.seq.NucSeqRecord
1010
import biokotlin.seq.ProteinSeq
1111
import biokotlin.seq.SeqRecord
12+
import biokotlin.util.bufferedReader
1213
import com.google.common.collect.*
1314
import org.jetbrains.kotlinx.dataframe.DataFrame
1415
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
1516
import java.io.ByteArrayOutputStream
1617
import java.io.File
1718
import java.util.*
18-
import kotlin.Comparator
19-
import kotlin.collections.HashMap
2019

2120
/**
2221
* This class defines Biokotlin ranges as well as functions that may be run against
@@ -860,8 +859,7 @@ fun findNegativePeaks(positive: NucSeq, rangeList: List<SRange>, pairingFunc: (N
860859
fun fastaToNucSeq (fasta: String): Map<String, NucSeq> {
861860
val chromNucSeqMap = HashMap<String,NucSeq>()
862861
try {
863-
val file = File(fasta)
864-
file.bufferedReader().use { br ->
862+
bufferedReader(fasta).use { br ->
865863
var currChrom: String = "-1"
866864
var currSeq = ByteArrayOutputStream()
867865
var line = br.readLine()

src/main/kotlin/biokotlin/kmer/KmerIO.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package biokotlin.kmer
22

3+
import biokotlin.util.bufferedReader
34
import it.unimi.dsi.fastutil.BigArrays
45
import net.jpountz.lz4.LZ4FrameInputStream
56
import net.jpountz.lz4.LZ4FrameOutputStream
@@ -28,7 +29,7 @@ class KmerIO(filename: String, isCompressed: Boolean = true): Iterator<Pair<Kmer
2829
reader = if (isCompressed) {
2930
BufferedReader(InputStreamReader(LZ4FrameInputStream(FileInputStream(File(filename)))))
3031
} else {
31-
File(filename).bufferedReader()
32+
bufferedReader(filename)
3233
}
3334

3435
/*

src/main/kotlin/biokotlin/seqIO/FastqIO.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ package biokotlin.seqIO
33
import biokotlin.seq.NucSeqRecord
44
import biokotlin.seq.Seq
55
import biokotlin.seq.SeqRecord
6+
import biokotlin.util.bufferedReader
67
import com.google.common.collect.ImmutableMap
78
import kotlinx.coroutines.*
89
import kotlinx.coroutines.channels.Channel
910
import java.io.BufferedReader
10-
import java.io.File
1111

1212
/**
1313
[FastqIO] implements a [SequenceIterator] for a FASTQ file at path [filename]
@@ -100,7 +100,7 @@ class FastqIO(val filename: String) : SequenceIterator {
100100
String, String>>) {
101101

102102
try {
103-
File(filename).bufferedReader().use { reader ->
103+
bufferedReader(filename).use { reader ->
104104
var line = reader.readLine()
105105
var lineNumber = 1
106106
while (line != null) {

src/main/kotlin/biokotlin/seqIO/SeqIO.kt

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,28 @@ import java.io.File
66

77

88
enum class SeqFormat(val suffixes: List<String>) {
9-
fasta(listOf("fa", "fasta", "fa.gz", "fasta.gz")),
9+
// https://en.wikipedia.org/wiki/FASTA_format
10+
// .fasta, .fas, .fa, .fna, .ffn, .faa, .mpfa, .frn
11+
fasta(
12+
listOf(
13+
"fa",
14+
"fasta",
15+
"fa.gz",
16+
"fasta.gz",
17+
"fas",
18+
"fas.gz",
19+
"fna",
20+
"fna.gz",
21+
"ffn",
22+
"ffn.gz",
23+
"faa",
24+
"faa.gz",
25+
"mpfa",
26+
"mpfa.gz",
27+
"frn",
28+
"frn.gz"
29+
)
30+
),
1031
fastq(listOf("fq", "fastq", "fq.gz", "fastq.gz"))
1132
}
1233

0 commit comments

Comments
 (0)