Skip to content

Commit 2921403

Browse files
committed
Edits for better handling of slightly tricky unprintable UTF-8
1 parent 43a9c20 commit 2921403

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

convex-core/src/main/java/convex/core/data/StringShort.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,10 +228,18 @@ protected void printEscaped(BlobBuilder sb, long start, long end) {
228228
// ASCII range, might be escape character
229229
Text.writeEscapedByte(sb,b);
230230
} else {
231-
CVMChar ch=get(i);
232-
if (ch==null) ch=CVMChar.BAD_CHARACTER;
233-
sb.append(ch);
234-
i+=CVMChar.utfLength(ch.getCodePoint())-1;
231+
int cp=charAt(i);
232+
if (cp<0) {
233+
sb.append(CVMChar.BAD_CHARACTER);
234+
i+=1; // skip one byte? or should we error correct?
235+
} else {
236+
// need to copy exactly one UTF character
237+
int len=CVMChar.utfLength(cp);
238+
for (int j=0; j<len; j++) {
239+
sb.append(byteAt(i+j));
240+
}
241+
i+=len-1;
242+
}
235243
}
236244
}
237245
return;

convex-core/src/test/java/convex/core/lang/ReaderTest.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import static convex.test.Assertions.*;
44
import static org.junit.jupiter.api.Assertions.assertEquals;
5+
import static org.junit.jupiter.api.Assertions.assertNotEquals;
56
import static org.junit.jupiter.api.Assertions.assertNull;
67
import static org.junit.jupiter.api.Assertions.assertSame;
78
import static org.junit.jupiter.api.Assertions.assertThrows;
@@ -276,10 +277,15 @@ public void testResolve() {
276277
}
277278

278279
@Test public void testUnprintablePrint() {
279-
AString bad=Strings.create(Blob.fromHex("ff"));
280+
AString bad=Strings.create(Blob.fromHex("ff")); // bad UTF-8
280281
AString pbad=RT.print(bad);
281282
AString expected=Strings.create("\"\uFFFD\"");
282283
assertEquals(expected,pbad);
284+
285+
String ps=pbad.toString();
286+
assertNotEquals(bad,Reader.read(ps)); // not reproducing the bad UTF-8
287+
assertEquals(pbad,Reader.read(ps).print()); // printed version should round trip
288+
doReadPrintTest(ps);
283289
}
284290

285291
@Test

0 commit comments

Comments
 (0)