1
1
package biokotlin.featureTree
2
2
3
3
import biokotlin.util.bufferedReader
4
- import io.github.oshai.kotlinlogging.KotlinLogging
5
4
import kotlinx.coroutines.async
6
5
import kotlinx.coroutines.awaitAll
7
6
import kotlinx.coroutines.runBlocking
8
- import java.io.FileReader
9
7
import java.util.*
10
8
import java.util.concurrent.ConcurrentHashMap
11
9
import java.util.concurrent.ConcurrentMap
@@ -354,6 +352,14 @@ internal class Graph private constructor(
354
352
assert { invariants() }
355
353
}
356
354
355
+ fun addParents (newParents : List <Node >) {
356
+ val parentsNotAdded = newParents.filter { it !in parents }
357
+ parents.addAll(parentsNotAdded)
358
+ parentsNotAdded.forEach { it.children.addLast(this ) }
359
+ incrementTopo()
360
+ assert { invariants() }
361
+ }
362
+
357
363
/* *
358
364
* Puts node and all orphaned descendants into a deleted state where they cannot be read from nor written to.
359
365
*/
@@ -831,6 +837,17 @@ internal class Graph private constructor(
831
837
fun containsName (name : String ): Boolean = byName.contains(name)
832
838
833
839
companion object {
840
+
841
+ fun parseException (
842
+ lineNumber : Int ,
843
+ line : String ,
844
+ textCorrector : ((String ) -> String )? ,
845
+ file : String ,
846
+ helpText : String
847
+ ): ParseException {
848
+ return ParseException (lineNumber, line, textCorrector, file, helpText)
849
+ }
850
+
834
851
/* *
835
852
* Returns a graph representation of the file.
836
853
* @see [Genome.fromFile]
@@ -845,83 +862,283 @@ internal class Graph private constructor(
845
862
): Graph {
846
863
// PLANNED: concurrent reading of ### directive
847
864
848
- val graph = Graph ( multipleParentage)
849
- modifySchema?.invoke(graph.schema)
865
+ val graph = getGraph(file, textCorrecter, multipleParentage, modifySchema )
866
+
850
867
bufferedReader(file).useLines { lines ->
851
868
var lineCounter = 0
869
+ var commentCounter = 0
852
870
for (line in lines) {
871
+
853
872
lineCounter++
854
873
if (line.isEmpty() || line.isBlank()) continue // skip blank lines
855
874
// PLANNED: comment support
856
875
if (line.startsWith(" #" )) {
857
- // This has been known to print over 4000 lines of comments in a single file, which is not useful.
858
- // logger.info { "Comments not yet supported. Comment at line $lineCounter discarded: $line" }
876
+ commentCounter++
877
+ if (commentCounter == 1 ) {
878
+ println (" Comments not yet supported. Comment at line $lineCounter discarded: $line " )
879
+ }
859
880
continue
860
881
}
861
882
862
883
val corrected = textCorrecter?.invoke(line) ? : line
863
884
864
- fun parseException (helpText : String ): ParseException {
865
- return ParseException (lineCounter, line, textCorrecter, file, helpText)
885
+ val split = corrected.split(" \t " )
886
+
887
+ if (split.size != 9 ) throw parseException(
888
+ lineCounter,
889
+ line,
890
+ textCorrecter,
891
+ file,
892
+ " Should contain 9 tab-delineated columns. $corrected "
893
+ )
894
+
895
+ val seqid = split[0 ]
896
+ val source = split[1 ]
897
+ val type = split[2 ]
898
+ val start = split[3 ].toIntOrNull()
899
+ ? : throw parseException(
900
+ lineCounter,
901
+ line,
902
+ textCorrecter,
903
+ file,
904
+ " Cannot parse start ${split[3 ]} into an integer."
905
+ )
906
+ val end = split[4 ].toIntOrNull()
907
+ ? : throw parseException(
908
+ lineCounter,
909
+ line,
910
+ textCorrecter,
911
+ file,
912
+ " Cannot parse start ${split[4 ]} into an integer."
913
+ )
914
+ val score = split[5 ].toDoubleOrNull()
915
+ val strand =
916
+ Strand .fromString(split[6 ]) ? : throw parseException(
917
+ lineCounter,
918
+ line,
919
+ textCorrecter,
920
+ file,
921
+ " Cannot parse ${split[6 ]} into a strand."
922
+ )
923
+ val phase =
924
+ Phase .fromString(split[7 ]) ? : throw parseException(
925
+ lineCounter,
926
+ line,
927
+ textCorrecter,
928
+ file,
929
+ " Cannot parse ${split[7 ]} into a phase."
930
+ )
931
+ if (! split[8 ].trimEnd(' ;' ).split(' ;' ).map { it.split(' =' ).first() }.allUnique()) {
932
+ throw parseException(
933
+ lineCounter,
934
+ line,
935
+ textCorrecter,
936
+ file,
937
+ " Cannot have multiple instances of the same tag"
938
+ )
939
+ }
940
+ val attributes = split[8 ].trimEnd(' ;' ).split(' ;' ).associate {
941
+ val tagValue = it.split(' =' )
942
+ if (tagValue.size != 2 )
943
+ throw parseException(
944
+ lineCounter,
945
+ line,
946
+ textCorrecter,
947
+ file,
948
+ " All distinct attributes must be separated by a ; character."
949
+ )
950
+ val values = tagValue[1 ].split(' ,' )
951
+ tagValue[0 ] to values
952
+ }
953
+
954
+ if ((attributes[" ID" ]?.size ? : 0 ) > 1 ) throw parseException(
955
+ lineCounter,
956
+ line,
957
+ textCorrecter,
958
+ file,
959
+ " Cannot have multiple IDs."
960
+ )
961
+ val id = attributes[" ID" ]?.get(0 )
962
+
963
+ val parentIDs = attributes[" Parent" ]
964
+ val parents = parentIDs?.map {
965
+ graph.byID(it)
966
+ ? : throw parseException(
967
+ lineCounter,
968
+ line,
969
+ textCorrecter,
970
+ file,
971
+ " Contains Parent attribute $it , which is not the ID of a previous line."
972
+ )
973
+ } ? : listOf (graph.root)
974
+ val resolvedParents = if (parentResolver == null || parents.size <= 1 ) {
975
+ parents
976
+ } else {
977
+ listOf (parents[parentResolver(corrected, parents.map { IFeature (it as DataNode ) })])
866
978
}
867
979
980
+ if (resolvedParents.size > 1 && ! multipleParentage)
981
+ throw parseException(
982
+ lineCounter,
983
+ line,
984
+ textCorrecter,
985
+ file,
986
+ " Must enable multipleParentage to have features with multiple parents"
987
+ )
988
+
989
+ if (id == null ) {
990
+ graph.DataNode (
991
+ resolvedParents.toMutableList(), LinkedList (), Data (
992
+ seqid,
993
+ source,
994
+ type,
995
+ mutableListOf (start.. end),
996
+ score,
997
+ strand,
998
+ mutableListOf (phase),
999
+ attributes.toMutableMap()
1000
+ )
1001
+ )
1002
+ } else { // nodes with an ID where created in the first pass
1003
+ val node = graph.byID(id)!!
1004
+ node.addParents(resolvedParents)
1005
+ }
1006
+
1007
+ }
1008
+ }
1009
+ return graph
1010
+ }
1011
+
1012
+ /* *
1013
+ * First pass through GFF file to get nodes.
1014
+ */
1015
+ private fun getGraph (
1016
+ file : String ,
1017
+ textCorrecter : ((String ) -> String )? , // PLANNED: robust convenience function framework
1018
+ // parentResolver: ParentResolver?,
1019
+ multipleParentage : Boolean ,
1020
+ modifySchema : (TypeSchema .() -> Unit )?
1021
+ ): Graph {
1022
+ // PLANNED: concurrent reading of ### directive
1023
+
1024
+ val graph = Graph (multipleParentage)
1025
+ modifySchema?.invoke(graph.schema)
1026
+ bufferedReader(file).useLines { lines ->
1027
+ var lineCounter = 0
1028
+ var commentCounter = 0
1029
+ for (line in lines) {
1030
+ lineCounter++
1031
+ if (line.isEmpty() || line.isBlank()) continue // skip blank lines
1032
+ // PLANNED: comment support
1033
+ if (line.startsWith(" #" )) {
1034
+ commentCounter++
1035
+ if (commentCounter == 1 ) {
1036
+ println (" Comments not yet supported. Comment at line $lineCounter discarded: $line " )
1037
+ }
1038
+ continue
1039
+ }
1040
+
1041
+ val corrected = textCorrecter?.invoke(line) ? : line
1042
+
868
1043
val split = corrected.split(" \t " )
869
1044
870
- if (split.size != 9 ) throw parseException(" Should contain 9 tab-delineated columns. ${corrected} " )
1045
+ if (split.size != 9 ) throw parseException(
1046
+ lineCounter,
1047
+ line,
1048
+ textCorrecter,
1049
+ file,
1050
+ " Should contain 9 tab-delineated columns. $corrected "
1051
+ )
871
1052
872
1053
val seqid = split[0 ]
873
1054
val source = split[1 ]
874
1055
val type = split[2 ]
875
1056
val start = split[3 ].toIntOrNull()
876
- ? : throw parseException(" Cannot parse start ${split[3 ]} into an integer." )
1057
+ ? : throw parseException(
1058
+ lineCounter,
1059
+ line,
1060
+ textCorrecter,
1061
+ file,
1062
+ " Cannot parse start ${split[3 ]} into an integer."
1063
+ )
877
1064
val end = split[4 ].toIntOrNull()
878
- ? : throw parseException(" Cannot parse start ${split[4 ]} into an integer." )
1065
+ ? : throw parseException(
1066
+ lineCounter,
1067
+ line,
1068
+ textCorrecter,
1069
+ file,
1070
+ " Cannot parse start ${split[4 ]} into an integer."
1071
+ )
879
1072
val score = split[5 ].toDoubleOrNull()
880
1073
val strand =
881
- Strand .fromString(split[6 ]) ? : throw parseException(" Cannot parse ${split[6 ]} into a strand." )
1074
+ Strand .fromString(split[6 ]) ? : throw parseException(
1075
+ lineCounter,
1076
+ line,
1077
+ textCorrecter,
1078
+ file,
1079
+ " Cannot parse ${split[6 ]} into a strand."
1080
+ )
882
1081
val phase =
883
- Phase .fromString(split[7 ]) ? : throw parseException(" Cannot parse ${split[7 ]} into a phase." )
884
- if (! split[8 ].trimEnd(' ;' ).split(' ;' ).map { it.split(' =' ).first() }.allUnique() ) {
885
- throw parseException(" Cannot have multiple instances of the same tag" )
1082
+ Phase .fromString(split[7 ]) ? : throw parseException(
1083
+ lineCounter,
1084
+ line,
1085
+ textCorrecter,
1086
+ file,
1087
+ " Cannot parse ${split[7 ]} into a phase."
1088
+ )
1089
+ if (! split[8 ].trimEnd(' ;' ).split(' ;' ).map { it.split(' =' ).first() }.allUnique()) {
1090
+ throw parseException(
1091
+ lineCounter,
1092
+ line,
1093
+ textCorrecter,
1094
+ file,
1095
+ " Cannot have multiple instances of the same tag"
1096
+ )
886
1097
}
887
1098
val attributes = split[8 ].trimEnd(' ;' ).split(' ;' ).associate {
888
1099
val tagValue = it.split(' =' )
889
1100
if (tagValue.size != 2 )
890
- throw parseException(" All distinct attributes must be separated by a ; character." )
1101
+ throw parseException(
1102
+ lineCounter,
1103
+ line,
1104
+ textCorrecter,
1105
+ file,
1106
+ " All distinct attributes must be separated by a ; character."
1107
+ )
891
1108
val values = tagValue[1 ].split(' ,' )
892
1109
tagValue[0 ] to values
893
1110
}
894
1111
895
- if ((attributes[" ID" ]?.size ? : 0 ) > 1 ) throw parseException(" Cannot have multiple IDs." )
1112
+ if ((attributes[" ID" ]?.size ? : 0 ) > 1 ) throw parseException(
1113
+ lineCounter,
1114
+ line,
1115
+ textCorrecter,
1116
+ file,
1117
+ " Cannot have multiple IDs."
1118
+ )
896
1119
val id = attributes[" ID" ]?.get(0 )
897
1120
if (id != null ) {
898
1121
val existing = graph.byID(id)
899
1122
if (existing != null ) {
900
1123
val compatible =
901
1124
existing.seqid == seqid || existing.source == source || existing.type == type ||
902
1125
existing.score == score || existing.strand == strand
903
- if (! compatible) throw parseException(" Shares ID \" $id \" with $existing but they are not compatible." )
1126
+ if (! compatible) throw parseException(
1127
+ lineCounter,
1128
+ line,
1129
+ textCorrecter,
1130
+ file,
1131
+ " Shares ID \" $id \" with $existing but they are not compatible."
1132
+ )
904
1133
existing.addDiscontinuity(start.. end, phase)
905
1134
continue
906
1135
}
907
- }
908
-
909
- val parentIDs = attributes[" Parent" ]
910
- val parents = parentIDs?.map {
911
- graph.byID(it)
912
- ? : throw parseException(" Contains Parent attribute $it , which is not the ID of a previous line." )
913
- } ? : listOf (graph.root)
914
- val resolvedParents = if (parentResolver == null || parents.size <= 1 ) {
915
- parents
916
1136
} else {
917
- listOf (parents[parentResolver(corrected, parents.map { IFeature (it as DataNode ) })])
1137
+ continue
918
1138
}
919
1139
920
- if (resolvedParents.size > 1 && ! multipleParentage)
921
- throw parseException(" Must enable multipleParentage to have features with multiple parents" )
922
-
923
1140
graph.DataNode (
924
- resolvedParents.toMutableList (), LinkedList (), Data (
1141
+ mutableListOf (), LinkedList (), Data (
925
1142
seqid,
926
1143
source,
927
1144
type,
@@ -934,9 +1151,9 @@ internal class Graph private constructor(
934
1151
)
935
1152
}
936
1153
}
1154
+
937
1155
return graph
938
1156
}
939
- }
940
- }
941
1157
942
- private val logger = KotlinLogging .logger {}
1158
+ }
1159
+ }
0 commit comments