Skip to content

Commit 3c8477b

Browse files
committed
Better handling of Java escaped Strings, remove commons-text dependency
1 parent 1231dba commit 3c8477b

File tree

7 files changed

+134
-17
lines changed

7 files changed

+134
-17
lines changed

convex-core/src/main/java/convex/core/lang/reader/AntlrReader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
import convex.core.lang.reader.antlr.ConvexParser.TagContext;
7373
import convex.core.lang.reader.antlr.ConvexParser.TaggedFormContext;
7474
import convex.core.lang.reader.antlr.ConvexParser.VectorContext;
75+
import convex.core.text.Text;
7576
import convex.core.util.Utils;
7677

7778
public class AntlrReader {
@@ -446,7 +447,7 @@ public void exitString(StringContext ctx) {
446447
String s=ctx.getStop().getText();
447448
int n=s.length();
448449
s=s.substring(1, n-1); // skip surrounding double quotes
449-
s=ReaderUtils.unescapeString(s);
450+
s=Text.unescapeJava(s);
450451
push(Strings.create(s));
451452
}
452453

convex-core/src/main/java/convex/core/lang/reader/ReaderUtils.java

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
import java.util.HashMap;
44

5-
import org.apache.commons.text.StringEscapeUtils; // TODO: Kill dependency?
6-
75
import convex.core.data.ACell;
86
import convex.core.data.AHashMap;
97
import convex.core.data.AMap;
@@ -58,18 +56,7 @@ public static Symbol getQuotingSymbol(String s) {
5856
return quotingSymbols.get(s);
5957
}
6058

61-
/**
62-
* Unescapes a string according to Java rules
63-
* @param s String to unescape
64-
* @return Unescaped string
65-
*/
66-
public static String unescapeString(String s) {
67-
return StringEscapeUtils.unescapeJava(s);
68-
}
6959

70-
public static String escapeString(String s) {
71-
return StringEscapeUtils.escapeJava(s);
72-
}
7360

7461
private static final HashMap<String,ACell> specialLiterals=Maps.hashMapOf(
7562
"##NaN",CVMDouble.NaN,

convex-core/src/main/java/convex/core/text/Text.java

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import convex.core.Coin;
1111
import convex.core.data.prim.CVMDouble;
1212
import convex.core.data.util.BlobBuilder;
13+
import convex.core.exceptions.TODOException;
14+
import convex.core.util.Utils;
1315

1416
public class Text {
1517
private static final int WHITESPACE_LENGTH = 32;
@@ -191,7 +193,90 @@ public static String repeat(char c, int count) {
191193
Arrays.fill(cs,c);
192194
return new String(cs);
193195
}
196+
197+
public static String escapeJava(String s) {
198+
throw new TODOException();
199+
}
194200

201+
/**
202+
* Unescapes a Java string
203+
* @param st String to unescape
204+
* @return Unescaped string, or null if not a valid Java String
205+
*/
206+
public static String unescapeJava(String st) {
207+
StringBuilder sb = null;
208+
int n=st.length();
195209

210+
for (int i = 0; i < n; i++) {
211+
int startPos=i;
212+
char ch = st.charAt(i);
213+
if ((ch == '\\')&&(i+1<n)) {
214+
char nextChar = st.charAt(i + 1);
215+
216+
// Check for octal escape, consumes 1-3 octal chars greedily
217+
if (nextChar >= '0' && nextChar <= '7') {
218+
int code=Utils.octalVal(nextChar);
219+
for (int j=i+2; j<i+4; j++) {
220+
if (j>=n) break;
221+
int v=Utils.octalVal(st.charAt(j));
222+
if (v<0) break; // no more octal
223+
if (code>=32) break; // wouldn't be valid, JLS maximum octal value of 377
224+
code=code*8+v;
225+
i++;
226+
}
227+
ch=(char) code;
228+
} else {
229+
switch (nextChar) {
230+
case '\\':
231+
ch = '\\';
232+
break;
233+
case 'b':
234+
ch = '\b';
235+
break;
236+
case 'f':
237+
ch = '\f';
238+
break;
239+
case 'n':
240+
ch = '\n';
241+
break;
242+
case 'r':
243+
ch = '\r';
244+
break;
245+
case 't':
246+
ch = '\t';
247+
break;
248+
case '\"':
249+
ch = '\"';
250+
break;
251+
case '\'':
252+
ch = '\'';
253+
break;
254+
// Hex Unicode: u????
255+
case 'u':
256+
if (i+6 > n) {
257+
return null; // insufficient chars for unicode
258+
}
259+
int cp=0;
260+
for (int j=0; j<4; j++) {
261+
int v=Utils.hexVal(st.charAt(i+j+2));
262+
if (v<0) { // not a hex value
263+
return null;
264+
}
265+
cp=cp*16+Utils.hexVal(st.charAt(i+j+2));
266+
}
267+
if (sb==null) sb=new StringBuilder(st.substring(0, startPos));
268+
sb.append(Character.toChars(cp));
269+
i += 5; // skip extra 5 chars on top of loop increment
270+
continue;
271+
}
272+
}
273+
i++; // skip a char, since we consumed nextChar
274+
}
275+
// We are appending a single char
276+
if (sb==null) sb=new StringBuilder(st.substring(0, startPos));
277+
sb.append(ch);
278+
}
279+
return (sb==null)?st:sb.toString();
280+
}
196281

197282
}

convex-core/src/main/java/convex/core/util/Utils.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ public static BigInteger hexToBigInt(String hex) {
286286
/**
287287
* Gets the value of a single hex character e.g. hexVal('c') => 12
288288
*
289-
* @param c Character representing a hex digit
289+
* @param c Character possibly representing a hex digit
290290
* @return int in the range 0..15 inclusive, or -1 if not a hex char
291291
*/
292292
public static int hexVal(char c) {
@@ -303,6 +303,20 @@ public static int hexVal(char c) {
303303

304304
return -1;
305305
}
306+
307+
/**
308+
* Gets the value of a single octal character e.g. octalVal('6') => 6
309+
*
310+
* @param c Character possibly representing an octal digit
311+
* @return int in the range 0..7 inclusive, or -1 if not an octal char
312+
*/
313+
public static int octalVal(char c) {
314+
int v = (int) c;
315+
316+
if ((v<48)||(v>55)) return -1; // out of possible range
317+
318+
return v-48;
319+
}
306320

307321
/**
308322
* Converts a byte array of length N to a hex string of length 2N

convex-core/src/main/java/module-info.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
exports convex.dlfs;
2323

2424
requires transitive org.antlr.antlr4.runtime;
25-
requires org.apache.commons.text;
2625
requires org.bouncycastle.pkix;
2726
requires transitive org.bouncycastle.provider;
2827
requires org.bouncycastle.util;

convex-core/src/test/java/convex/core/lang/ReaderTest.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import convex.core.data.prim.CVMDouble;
3333
import convex.core.data.prim.CVMLong;
3434
import convex.core.exceptions.ParseException;
35+
import convex.core.text.Text;
3536
import convex.test.Samples;
3637

3738
/**
@@ -243,7 +244,6 @@ public void testStrings() {
243244

244245
// Multi-line String
245246
assertEquals(Strings.create("\n"), Reader.read("\"\n\""));
246-
247247
}
248248

249249
@Test
@@ -412,6 +412,17 @@ public void doIdempotencyTest(ACell cell) {
412412
doReadPrintTest("^{} 0xa89e59cc8ab9fc6a13785a37938c85b306b24663415effc01063a6e25ef52ebcd3647d3a77e0a33908a372146fdccab6");
413413
}
414414

415+
/**
416+
* Test cases for strings with Java escapes
417+
*/
418+
@Test public void testJavaEscapes() {
419+
doEscapeTest("!0\\","\\410\\");
420+
}
421+
422+
private void doEscapeTest(String raw, String escaped) {
423+
assertEquals(raw,Text.unescapeJava(escaped));
424+
}
425+
415426
/**
416427
* Test cases that should read and print identically
417428
*/

convex-core/src/test/java/convex/core/text/TextTest.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package convex.core.text;
22

33
import static org.junit.Assert.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertNull;
45

56
import java.text.ParseException;
67

@@ -21,6 +22,25 @@ public void testWhiteSpace() {
2122
checkWhiteSpace(97);
2223
checkWhiteSpace(100);
2324
}
25+
26+
@Test
27+
public void testUnescapeJava() {
28+
assertEquals("foo",Text.unescapeJava("foo"));
29+
30+
assertEquals("\\",Text.unescapeJava("\\"));
31+
32+
assertEquals("zzAzzB",Text.unescapeJava("zz\\u0041zz\\u0042"));
33+
assertNull(Text.unescapeJava("\\u"));
34+
assertNull(Text.unescapeJava("\\u0x0x")); // not valid unicode
35+
assertNull(Text.unescapeJava("\\u012")); // not valid unicode (only 3 chars)
36+
37+
// octal escapes
38+
assertEquals("a%b",Text.unescapeJava("a\\45b"));
39+
assertEquals("\19",Text.unescapeJava("\\19"));
40+
assertEquals("\0\0",Text.unescapeJava("\\0\\0"));
41+
assertEquals("\1\11\111",Text.unescapeJava("\\1\\11\\111"));
42+
assertEquals("!0",Text.unescapeJava("\\410")); // Yeah, JLS has max octal value of 377. Don't ask why...
43+
}
2444

2545
private void checkWhiteSpace(int len) {
2646
String s = Text.whiteSpace(len);

0 commit comments

Comments
 (0)