maize-genetics · tcasstevens · Mar 13, 2025 · Mar 12, 2025 · Mar 13, 2025 · Mar 13, 2025
diff --git a/build.gradle.kts b/build.gradle.kts
@@ -114,7 +114,6 @@ dependencies {
     implementation("org.jgrapht:jgrapht-core:1.5.1")
 
 
-    implementation("io.github.oshai:kotlin-logging-jvm:5.0.0")
     implementation(group = "ch.qos.logback", name = "logback-classic", version = "1.2.6")
     implementation("it.unimi.dsi:fastutil:8.5.12")
     implementation("org.lz4:lz4-java:1.8.0")

diff --git a/src/main/kotlin/biokotlin/featureTree/Graph.kt b/src/main/kotlin/biokotlin/featureTree/Graph.kt
@@ -1,11 +1,9 @@
 package biokotlin.featureTree
 
 import biokotlin.util.bufferedReader
-import io.github.oshai.kotlinlogging.KotlinLogging
 import kotlinx.coroutines.async
 import kotlinx.coroutines.awaitAll
 import kotlinx.coroutines.runBlocking
-import java.io.FileReader
 import java.util.*
 import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.ConcurrentMap
@@ -354,6 +352,14 @@ internal class Graph private constructor(
             assert { invariants() }
         }
 
+        fun addParents(newParents: List<Node>) {
+            val parentsNotAdded = newParents.filter { it !in parents }
+            parents.addAll(parentsNotAdded)
+            parentsNotAdded.forEach { it.children.addLast(this) }
+            incrementTopo()
+            assert { invariants() }
+        }
+
         /**
          * Puts node and all orphaned descendants into a deleted state where they cannot be read from nor written to.
          */
@@ -831,6 +837,17 @@ internal class Graph private constructor(
     fun containsName(name: String): Boolean = byName.contains(name)
 
     companion object {
+
+        fun parseException(
+            lineNumber: Int,
+            line: String,
+            textCorrector: ((String) -> String)?,
+            file: String,
+            helpText: String
+        ): ParseException {
+            return ParseException(lineNumber, line, textCorrector, file, helpText)
+        }
+
         /**
          * Returns a graph representation of the file.
          * @see [Genome.fromFile]
@@ -845,83 +862,283 @@ internal class Graph private constructor(
         ): Graph {
             // PLANNED: concurrent reading of ### directive
 
-            val graph = Graph(multipleParentage)
-            modifySchema?.invoke(graph.schema)
+            val graph = getGraph(file, textCorrecter, multipleParentage, modifySchema)
+
             bufferedReader(file).useLines { lines ->
                 var lineCounter = 0
+                var commentCounter = 0
                 for (line in lines) {
+
                     lineCounter++
                     if (line.isEmpty() || line.isBlank()) continue //skip blank lines
                     // PLANNED: comment support
                     if (line.startsWith("#")) {
-                        // This has been known to print over 4000 lines of comments in a single file, which is not useful.
-                        //logger.info { "Comments not yet supported. Comment at line $lineCounter discarded: $line" }
+                        commentCounter++
+                        if (commentCounter == 1) {
+                            println("Comments not yet supported. Comment at line $lineCounter discarded: $line")
+                        }
                         continue
                     }
 
                     val corrected = textCorrecter?.invoke(line) ?: line
 
-                    fun parseException(helpText: String): ParseException {
-                        return ParseException(lineCounter, line, textCorrecter, file, helpText)
+                    val split = corrected.split("\t")
+
+                    if (split.size != 9) throw parseException(
+                        lineCounter,
+                        line,
+                        textCorrecter,
+                        file,
+                        "Should contain 9 tab-delineated columns. $corrected"
+                    )
+
+                    val seqid = split[0]
+                    val source = split[1]
+                    val type = split[2]
+                    val start = split[3].toIntOrNull()
+                        ?: throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Cannot parse start ${split[3]} into an integer."
+                        )
+                    val end = split[4].toIntOrNull()
+                        ?: throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Cannot parse start ${split[4]} into an integer."
+                        )
+                    val score = split[5].toDoubleOrNull()
+                    val strand =
+                        Strand.fromString(split[6]) ?: throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Cannot parse ${split[6]} into a strand."
+                        )
+                    val phase =
+                        Phase.fromString(split[7]) ?: throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Cannot parse ${split[7]} into a phase."
+                        )
+                    if (!split[8].trimEnd(';').split(';').map { it.split('=').first() }.allUnique()) {
+                        throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Cannot have multiple instances of the same tag"
+                        )
+                    }
+                    val attributes = split[8].trimEnd(';').split(';').associate {
+                        val tagValue = it.split('=')
+                        if (tagValue.size != 2)
+                            throw parseException(
+                                lineCounter,
+                                line,
+                                textCorrecter,
+                                file,
+                                "All distinct attributes must be separated by a ; character."
+                            )
+                        val values = tagValue[1].split(',')
+                        tagValue[0] to values
+                    }
+
+                    if ((attributes["ID"]?.size ?: 0) > 1) throw parseException(
+                        lineCounter,
+                        line,
+                        textCorrecter,
+                        file,
+                        "Cannot have multiple IDs."
+                    )
+                    val id = attributes["ID"]?.get(0)
+
+                    val parentIDs = attributes["Parent"]
+                    val parents = parentIDs?.map {
+                        graph.byID(it)
+                            ?: throw parseException(
+                                lineCounter,
+                                line,
+                                textCorrecter,
+                                file,
+                                "Contains Parent attribute $it, which is not the ID of a previous line."
+                            )
+                    } ?: listOf(graph.root)
+                    val resolvedParents = if (parentResolver == null || parents.size <= 1) {
+                        parents
+                    } else {
+                        listOf(parents[parentResolver(corrected, parents.map { IFeature(it as DataNode) })])
                     }
 
+                    if (resolvedParents.size > 1 && !multipleParentage)
+                        throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Must enable multipleParentage to have features with multiple parents"
+                        )
+
+                    if (id == null) {
+                        graph.DataNode(
+                            resolvedParents.toMutableList(), LinkedList(), Data(
+                                seqid,
+                                source,
+                                type,
+                                mutableListOf(start..end),
+                                score,
+                                strand,
+                                mutableListOf(phase),
+                                attributes.toMutableMap()
+                            )
+                        )
+                    } else { // nodes with an ID where created in the first pass
+                        val node = graph.byID(id)!!
+                        node.addParents(resolvedParents)
+                    }
+
+                }
+            }
+            return graph
+        }
+
+        /**
+         * First pass through GFF file to get nodes.
+         */
+        private fun getGraph(
+            file: String,
+            textCorrecter: ((String) -> String)?, // PLANNED: robust convenience function framework
+            // parentResolver: ParentResolver?,
+            multipleParentage: Boolean,
+            modifySchema: (TypeSchema.() -> Unit)?
+        ): Graph {
+            // PLANNED: concurrent reading of ### directive
+
+            val graph = Graph(multipleParentage)
+            modifySchema?.invoke(graph.schema)
+            bufferedReader(file).useLines { lines ->
+                var lineCounter = 0
+                var commentCounter = 0
+                for (line in lines) {
+                    lineCounter++
+                    if (line.isEmpty() || line.isBlank()) continue //skip blank lines
+                    // PLANNED: comment support
+                    if (line.startsWith("#")) {
+                        commentCounter++
+                        if (commentCounter == 1) {
+                            println("Comments not yet supported. Comment at line $lineCounter discarded: $line")
+                        }
+                        continue
+                    }
+
+                    val corrected = textCorrecter?.invoke(line) ?: line
+
                     val split = corrected.split("\t")
 
-                    if (split.size != 9) throw parseException("Should contain 9 tab-delineated columns. ${corrected}")
+                    if (split.size != 9) throw parseException(
+                        lineCounter,
+                        line,
+                        textCorrecter,
+                        file,
+                        "Should contain 9 tab-delineated columns. $corrected"
+                    )
 
                     val seqid = split[0]
                     val source = split[1]
                     val type = split[2]
                     val start = split[3].toIntOrNull()
-                        ?: throw parseException("Cannot parse start ${split[3]} into an integer.")
+                        ?: throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Cannot parse start ${split[3]} into an integer."
+                        )
                     val end = split[4].toIntOrNull()
-                        ?: throw parseException("Cannot parse start ${split[4]} into an integer.")
+                        ?: throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Cannot parse start ${split[4]} into an integer."
+                        )
                     val score = split[5].toDoubleOrNull()
                     val strand =
-                        Strand.fromString(split[6]) ?: throw parseException("Cannot parse ${split[6]} into a strand.")
+                        Strand.fromString(split[6]) ?: throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Cannot parse ${split[6]} into a strand."
+                        )
                     val phase =
-                        Phase.fromString(split[7]) ?: throw parseException("Cannot parse ${split[7]} into a phase.")
-                    if (!split[8].trimEnd(';').split(';').map { it.split('=').first() }.allUnique() ) {
-                        throw parseException("Cannot have multiple instances of the same tag")
+                        Phase.fromString(split[7]) ?: throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Cannot parse ${split[7]} into a phase."
+                        )
+                    if (!split[8].trimEnd(';').split(';').map { it.split('=').first() }.allUnique()) {
+                        throw parseException(
+                            lineCounter,
+                            line,
+                            textCorrecter,
+                            file,
+                            "Cannot have multiple instances of the same tag"
+                        )
                     }
                     val attributes = split[8].trimEnd(';').split(';').associate {
                         val tagValue = it.split('=')
                         if (tagValue.size != 2)
-                            throw parseException("All distinct attributes must be separated by a ; character.")
+                            throw parseException(
+                                lineCounter,
+                                line,
+                                textCorrecter,
+                                file,
+                                "All distinct attributes must be separated by a ; character."
+                            )
                         val values = tagValue[1].split(',')
                         tagValue[0] to values
                     }
 
-                    if ((attributes["ID"]?.size ?: 0) > 1) throw parseException("Cannot have multiple IDs.")
+                    if ((attributes["ID"]?.size ?: 0) > 1) throw parseException(
+                        lineCounter,
+                        line,
+                        textCorrecter,
+                        file,
+                        "Cannot have multiple IDs."
+                    )
                     val id = attributes["ID"]?.get(0)
                     if (id != null) {
                         val existing = graph.byID(id)
                         if (existing != null) {
                             val compatible =
                                 existing.seqid == seqid || existing.source == source || existing.type == type ||
                                         existing.score == score || existing.strand == strand
-                            if (!compatible) throw parseException("Shares ID \"$id\" with $existing but they are not compatible.")
+                            if (!compatible) throw parseException(
+                                lineCounter,
+                                line,
+                                textCorrecter,
+                                file,
+                                "Shares ID \"$id\" with $existing but they are not compatible."
+                            )
                             existing.addDiscontinuity(start..end, phase)
                             continue
                         }
-                    }
-
-                    val parentIDs = attributes["Parent"]
-                    val parents = parentIDs?.map {
-                        graph.byID(it)
-                            ?: throw parseException("Contains Parent attribute $it, which is not the ID of a previous line.")
-                    } ?: listOf(graph.root)
-                    val resolvedParents = if (parentResolver == null || parents.size <= 1) {
-                        parents
                     } else {
-                        listOf(parents[parentResolver(corrected, parents.map { IFeature(it as DataNode) })])
+                        continue
                     }
 
-                    if (resolvedParents.size > 1 && !multipleParentage)
-                        throw parseException("Must enable multipleParentage to have features with multiple parents")
-
                     graph.DataNode(
-                        resolvedParents.toMutableList(), LinkedList(), Data(
+                        mutableListOf(), LinkedList(), Data(
                             seqid,
                             source,
                             type,
@@ -934,9 +1151,9 @@ internal class Graph private constructor(
                     )
                 }
             }
+
             return graph
         }
-    }
-}
 
-private val logger = KotlinLogging.logger {}
+    }
+}
diff --git a/src/main/kotlin/biokotlin/genome/MAFProcessingUtils.kt b/src/main/kotlin/biokotlin/genome/MAFProcessingUtils.kt
@@ -5,7 +5,6 @@ import com.google.common.collect.Range
 import com.google.common.collect.RangeMap
 import com.google.common.collect.Sets
 import com.google.common.collect.TreeRangeMap
-import io.github.oshai.kotlinlogging.KotlinLogging
 import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.api.toDataFrame
 import java.io.BufferedReader
@@ -29,7 +28,6 @@ import java.util.stream.Collectors
 // Data class to be used when creating a dataFrame for chrom percent coverage statistics
 // This may be used if can get Kotlin DataFrame vs Krangl DataFrame to work.
 data class ChromStats(val contig: String, val numRegionBPs: Int, val percentCov: Double, val percentId: Double)
-private val logger = KotlinLogging.logger {}
 fun createWiggleFilesFromCoverageIdentity(coverage:IntArray, identity:IntArray, contig:String, outputDir:String) {
 
     // There will be 2 wiggle files created: 1 for identity and 1 for coverage