Skip to content

Commit 43a9c20

Browse files
committed
Better print handling for bad UTF-8 characters
1 parent b5ec68e commit 43a9c20

File tree

4 files changed

+20
-2
lines changed

4 files changed

+20
-2
lines changed

convex-core/src/main/java/convex/core/data/AString.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public boolean print(BlobBuilder sb, long limit) {
4141
long avail=limit-sb.count();
4242
if (avail>0) {
4343
sb.append('"');
44-
sb.append(slice(0,avail-1));
44+
printEscaped(sb,0,avail-1);
4545
}
4646
return false;
4747
}

convex-core/src/main/java/convex/core/data/StringShort.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import java.nio.charset.StandardCharsets;
44

5+
import convex.core.data.prim.CVMChar;
56
import convex.core.data.util.BlobBuilder;
67
import convex.core.exceptions.InvalidDataException;
78
import convex.core.text.Text;
@@ -223,7 +224,15 @@ protected void printEscaped(BlobBuilder sb, long start, long end) {
223224
if ((start<0)||(start>end)||(end>n)) throw new IllegalArgumentException(Errors.badRange(start, end));
224225
for (long i=start; i<end; i++) {
225226
byte b=data.byteAtUnchecked(i);
226-
Text.writeEscapedByte(sb,b);
227+
if (b>=0) {
228+
// ASCII range, might be escape character
229+
Text.writeEscapedByte(sb,b);
230+
} else {
231+
CVMChar ch=get(i);
232+
if (ch==null) ch=CVMChar.BAD_CHARACTER;
233+
sb.append(ch);
234+
i+=CVMChar.utfLength(ch.getCodePoint())-1;
235+
}
227236
}
228237
return;
229238
}

convex-core/src/main/java/convex/core/data/prim/CVMChar.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ public final class CVMChar extends APrimitive implements Comparable<CVMChar> {
2626
public static int MAX_CODEPOINT=0x10ffff; // 21 bits max Unicode value
2727
public static CVMChar MAX_VALUE=create(MAX_CODEPOINT); // 21 bits max Unicode value
2828

29+
public static CVMChar BAD_CHARACTER=create(0xFFFD);
30+
2931
private static final int CACHE_SIZE=256;
3032

3133
private static final CVMChar[] cache=new CVMChar[CACHE_SIZE];

convex-core/src/test/java/convex/core/lang/ReaderTest.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,13 @@ public void testResolve() {
275275
assertParseException(() -> Reader.read("@(foo)"));
276276
}
277277

278+
@Test public void testUnprintablePrint() {
279+
AString bad=Strings.create(Blob.fromHex("ff"));
280+
AString pbad=RT.print(bad);
281+
AString expected=Strings.create("\"\uFFFD\"");
282+
assertEquals(expected,pbad);
283+
}
284+
278285
@Test
279286
public void testTooManyClosingParens() {
280287
// See #244

0 commit comments

Comments
 (0)