File tree Expand file tree Collapse file tree 4 files changed +24
-0
lines changed Expand file tree Collapse file tree 4 files changed +24
-0
lines changed Original file line number Diff line number Diff line change
1
+ 0.2.2 2023-09-06
2
+ - Fix behaviour for end of text character positions
3
+ when no end of sentence occured before.
4
+
1
5
0.2.1 2023-09-05
2
6
- Add english tokenizer.
3
7
- Fix buffer bug.
Original file line number Diff line number Diff line change @@ -1018,6 +1018,10 @@ PARSECHAR:
1018
1018
1019
1019
if eot {
1020
1020
eot = false
1021
+ if ! sentenceEnd {
1022
+ sentenceEnd = true
1023
+ w .SentenceEnd (buffc )
1024
+ }
1021
1025
textEnd = true
1022
1026
w .TextEnd (0 )
1023
1027
if DEBUG {
Original file line number Diff line number Diff line change @@ -592,6 +592,10 @@ PARSECHARM:
592
592
593
593
if eot {
594
594
eot = false
595
+ if ! sentenceEnd {
596
+ sentenceEnd = true
597
+ w .SentenceEnd (buffc )
598
+ }
595
599
textEnd = true
596
600
w .TextEnd (buffc )
597
601
rewindBuffer = true
Original file line number Diff line number Diff line change @@ -85,6 +85,18 @@ func TestTokenWriterFromOptions(t *testing.T) {
85
85
matStr = w .String ()
86
86
assert .Equal ("1 5 5 6\n 1 6\n 0 3 3 4\n 0 4\n " , matStr )
87
87
88
+ w .Reset ()
89
+ mat .TransduceTokenWriter (strings .NewReader ("Tree\n \x04 \n " ), tws )
90
+
91
+ matStr = w .String ()
92
+ assert .Equal ("0 4\n 0 4\n " , matStr )
93
+
94
+ w .Reset ()
95
+ mat .TransduceTokenWriter (strings .NewReader ("Tree.\n \x04 \n " ), tws )
96
+
97
+ matStr = w .String ()
98
+ assert .Equal ("0 4 4 5\n 0 5\n " , matStr )
99
+
88
100
//
89
101
// Write sentence offsets without token offsets
90
102
tws = NewTokenWriter (w , SENTENCE_POS | NEWLINE_AFTER_EOT )
You can’t perform that action at this time.
0 commit comments