Skip to content

Commit 58e4344

Browse files
Merge pull request #227 from dispatch/1.0/encode-emoji
[1.0.x] Correctly url encode emoji in path segments
2 parents 0e4eb5d + c62507e commit 58e4344

File tree

3 files changed

+30
-22
lines changed

3 files changed

+30
-22
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ scala:
77
- 2.11.12
88
- 2.12.7
99
jdk:
10-
- oraclejdk8
10+
- openjdk8
1111
matrix:
1212
include:
1313
- jdk: openjdk11

core/src/main/scala/uri.scala

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -50,30 +50,33 @@ object UriEncode {
5050
def pchar = unreserved ++ (
5151
':' :: '@' :: '&' :: '=' :: '+' :: '$' :: ',' :: Nil
5252
)
53-
val segmentValid = (';' +: pchar).toSet
53+
val segmentValid: Set[Char] = (';' +: pchar).toSet
5454

55-
private val validMarkers = (0 to segmentValid.max.toInt).map(i => segmentValid(i.toChar)).toArray
56-
private def isValidChar(ch: Char) = (ch < validMarkers.length) && validMarkers(ch.toInt)
55+
// There are likely more optimal ways of doing this calculation, however
56+
// it seems unlikely that long path segments are often on the hot path
57+
// of a request in such a way that they can't be cached. If that proves
58+
// not to be true, then we can revisit.
59+
private def isValidChar(b: Byte) = {
60+
segmentValid.contains(b.toChar)
61+
}
5762

5863
def path(pathSegment: String, encoding: String = "UTF-8") = {
59-
if (pathSegment.forall(isValidChar)) {
64+
val pathBytes = pathSegment.getBytes(encoding)
65+
66+
if (pathBytes.forall(isValidChar)) {
6067
pathSegment
61-
}
62-
else {
68+
} else {
6369
val sb = new StringBuilder(pathSegment.length << 1)
6470

65-
pathSegment foreach { ch =>
66-
if (isValidChar(ch)) {
67-
sb.append(ch)
68-
}
69-
else {
70-
ch.toString.getBytes(encoding) foreach { b =>
71-
val hi = (b >>> 4) & 0xf
72-
val lo = b & 0xf
73-
sb.append('%')
74-
.append((if (hi > 9) hi + '7' else hi + '0').toChar)
75-
.append((if (lo > 9) lo + '7' else lo + '0').toChar)
76-
}
71+
pathBytes.foreach { b =>
72+
if (isValidChar(b)) {
73+
sb.append(b.toChar)
74+
} else {
75+
val hi = (b >>> 4) & 0xf
76+
val lo = b & 0xf
77+
sb.append('%')
78+
.append((if (hi > 9) hi + '7' else hi + '0').toChar)
79+
.append((if (lo > 9) lo + '7' else lo + '0').toChar)
7780
}
7881
}
7982

core/src/test/scala/uri.scala

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,26 @@
11
package dispatch.spec
22

33
import org.scalacheck._
4-
import org.scalacheck.Prop.BooleanOperators
4+
import org.scalacheck.Prop._
55

66
object UriSpecification extends Properties("Uri") {
77
/** java.net.URLDecoder should *NOT* be used for testing URI segment decoding
88
* because it implements completely different functionality: query parameter decoding
99
*/
10-
property("encode-decode") = Prop.forAll { (path: String) =>
10+
property("Encodes and decodes basic strings") = Prop.forAll { (path: String) =>
1111
!path.contains(":") ==> {
1212
new java.net.URI(dispatch.UriEncode.path(path)).getPath == path
1313
} // else Prop.throws(classOf[java.net.URISyntaxException])
1414
}
1515

1616
/** if there is nothing to escape, encoder must return original reference */
17-
property("noop") = Prop.forAll(Gen.choose(0,100)) { (n: Int) =>
17+
property("Does nothing if there's nothing eo encode") = Prop.forAll(Gen.choose(0,100)) { (n: Int) =>
1818
val path = "A" * n
1919
dispatch.UriEncode.path(path) eq path
2020
}
21+
22+
property("Encodes emoji correctly") = forAll(Gen.const("unused")) { (sample: String) =>
23+
val path = "roma🇮🇹"
24+
new java.net.URI(dispatch.UriEncode.path(path)).getPath == (path)
25+
}
2126
}

0 commit comments

Comments
 (0)