|
| 1 | +import Foundation |
| 2 | + |
| 3 | +// MARK: Escaping |
| 4 | + |
| 5 | +// ref: https://github.com/alexisakers/HTMLString/tree/main |
| 6 | +// Due to Cocoapods dependency issues, it is not possible to declare dependencies directly in the Pod Spec. |
| 7 | +// Therefore, the source code has been directly included in the project, and all copyright and intellectual property rights belong to the original author at https://github.com/alexisakers/HTMLString. |
| 8 | + |
| 9 | +extension String { |
| 10 | + |
| 11 | + /// |
| 12 | + /// Returns a copy of the current `String` where every character incompatible with HTML Unicode |
| 13 | + /// encoding (UTF-16 or UTF-8) is replaced by a decimal HTML entity. |
| 14 | + /// |
| 15 | + /// ### Examples |
| 16 | + /// |
| 17 | + /// | String | Result | Format | |
| 18 | + /// |--------|--------|--------| |
| 19 | + /// | `&` | `&` | Decimal entity (part of the Unicode special characters) | |
| 20 | + /// | `Σ` | `Σ` | Not escaped (Unicode compliant) | |
| 21 | + /// | `🇺🇸` | `🇺🇸` | Not escaped (Unicode compliant) | |
| 22 | + /// | `a` | `a` | Not escaped (alphanumerical) | |
| 23 | + /// |
| 24 | + |
| 25 | + public func addingUnicodeEntities() -> String { |
| 26 | + var result = "" |
| 27 | + |
| 28 | + for character in self { |
| 29 | + if HTMLStringMappings.unsafeUnicodeCharacters.contains(character) { |
| 30 | + // One of the required escapes for security reasons |
| 31 | + result.append(contentsOf: "&#\(character.asciiValue!);") |
| 32 | + } else { |
| 33 | + // Not a required escape, no need to replace the character |
| 34 | + result.append(character) |
| 35 | + } |
| 36 | + } |
| 37 | + |
| 38 | + return result |
| 39 | + } |
| 40 | + |
| 41 | + /// |
| 42 | + /// Returns a copy of the current `String` where every character incompatible with HTML ASCII |
| 43 | + /// encoding is replaced by a decimal HTML entity. |
| 44 | + /// |
| 45 | + /// ### Examples |
| 46 | + /// |
| 47 | + /// | String | Result | Format | |
| 48 | + /// |--------|--------|--------| |
| 49 | + /// | `&` | `&` | Decimal entity | |
| 50 | + /// | `Σ` | `Σ` | Decimal entity | |
| 51 | + /// | `🇺🇸` | `🇺🇸` | Combined decimal entities (extented grapheme cluster) | |
| 52 | + /// | `a` | `a` | Not escaped (alphanumerical) | |
| 53 | + /// |
| 54 | + /// ### Performance |
| 55 | + /// |
| 56 | + /// If your webpage is unicode encoded (UTF-16 or UTF-8) use `addingUnicodeEntities` instead, |
| 57 | + /// as it is faster and produces a less bloated and more readable HTML. |
| 58 | + /// |
| 59 | + |
| 60 | + public func addingASCIIEntities() -> String { |
| 61 | + var result = "" |
| 62 | + |
| 63 | + for character in self { |
| 64 | + if let asciiiValue = character.asciiValue { |
| 65 | + if HTMLStringMappings.unsafeUnicodeCharacters.contains(character) { |
| 66 | + // One of the required escapes for security reasons |
| 67 | + result.append(contentsOf: "&#\(asciiiValue);") |
| 68 | + } else { |
| 69 | + // Not a required escape, no need to replace the character |
| 70 | + result.append(character) |
| 71 | + } |
| 72 | + } else { |
| 73 | + // Not an ASCII Character, we need to escape. |
| 74 | + let escape = character.unicodeScalars.reduce(into: "") { $0 += "&#\($1.value);" } |
| 75 | + result.append(contentsOf: escape) |
| 76 | + } |
| 77 | + } |
| 78 | + |
| 79 | + return result |
| 80 | + } |
| 81 | +} |
| 82 | + |
| 83 | +// MARK: - Unescaping |
| 84 | + |
| 85 | +extension String { |
| 86 | + |
| 87 | + /// |
| 88 | + /// Replaces every HTML entity in the receiver with the matching Unicode character. |
| 89 | + /// |
| 90 | + /// ### Examples |
| 91 | + /// |
| 92 | + /// | String | Result | Format | |
| 93 | + /// |--------|--------|--------| |
| 94 | + /// | `&` | `&` | Keyword entity | |
| 95 | + /// | `Σ` | `Σ` | Decimal entity | |
| 96 | + /// | `č` | `č` | Hexadecimal entity | |
| 97 | + /// | `🇺🇸` | `🇺🇸` | Combined decimal entities (extented grapheme cluster) | |
| 98 | + /// | `a` | `a` | Not an entity | |
| 99 | + /// | `&` | `&` | Not an entity | |
| 100 | + /// |
| 101 | + |
| 102 | + public func removingHTMLEntities() -> String { |
| 103 | + var result = "" |
| 104 | + var currentIndex = startIndex |
| 105 | + |
| 106 | + while let delimiterIndex = self[currentIndex...].firstIndex(of: "&") { |
| 107 | + // Avoid unnecessary operations |
| 108 | + var semicolonIndex = self.index(after: delimiterIndex) |
| 109 | + |
| 110 | + // Parse the last sequence (ex: Fish & chips & sauce -> "&" instead of "& chips &") |
| 111 | + var lastDelimiterIndex = delimiterIndex |
| 112 | + |
| 113 | + while semicolonIndex != endIndex, self[semicolonIndex] != ";" { |
| 114 | + if self[semicolonIndex] == "&" { |
| 115 | + lastDelimiterIndex = semicolonIndex |
| 116 | + } |
| 117 | + |
| 118 | + semicolonIndex = self.index(after: semicolonIndex) |
| 119 | + } |
| 120 | + |
| 121 | + // Fast path if semicolon doesn't exists in current range |
| 122 | + if semicolonIndex == endIndex { |
| 123 | + result.append(contentsOf: self[currentIndex..<semicolonIndex]) |
| 124 | + return result |
| 125 | + } |
| 126 | + |
| 127 | + let escapableRange = index(after: lastDelimiterIndex) ..< semicolonIndex |
| 128 | + let escapableContent = self[escapableRange] |
| 129 | + |
| 130 | + result.append(contentsOf: self[currentIndex..<lastDelimiterIndex]) |
| 131 | + |
| 132 | + let cursorPosition: Index |
| 133 | + if let unescapedNumber = escapableContent.unescapeAsNumber() { |
| 134 | + result.append(contentsOf: unescapedNumber) |
| 135 | + cursorPosition = self.index(semicolonIndex, offsetBy: 1) |
| 136 | + } else if let unescapedCharacter = HTMLStringMappings.unescapingTable[String(escapableContent)] { |
| 137 | + result.append(contentsOf: unescapedCharacter) |
| 138 | + cursorPosition = self.index(semicolonIndex, offsetBy: 1) |
| 139 | + } else { |
| 140 | + result.append(self[lastDelimiterIndex]) |
| 141 | + cursorPosition = self.index(after: lastDelimiterIndex) |
| 142 | + } |
| 143 | + |
| 144 | + currentIndex = cursorPosition |
| 145 | + } |
| 146 | + |
| 147 | + result.append(contentsOf: self[currentIndex...]) |
| 148 | + |
| 149 | + return result |
| 150 | + } |
| 151 | +} |
| 152 | + |
| 153 | +// MARK: - Helpers |
| 154 | + |
| 155 | +extension StringProtocol { |
| 156 | + |
| 157 | + /// Unescapes the receives as a number if possible. |
| 158 | + fileprivate func unescapeAsNumber() -> String? { |
| 159 | + guard hasPrefix("#") else { return nil } |
| 160 | + |
| 161 | + let unescapableContent = self.dropFirst() |
| 162 | + let isHexadecimal = unescapableContent.hasPrefix("x") || hasPrefix("X") |
| 163 | + let radix = isHexadecimal ? 16 : 10 |
| 164 | + |
| 165 | + guard let numberStartIndex = unescapableContent.index(unescapableContent.startIndex, offsetBy: isHexadecimal ? 1 : 0, limitedBy: unescapableContent.endIndex) else { |
| 166 | + return nil |
| 167 | + } |
| 168 | + |
| 169 | + let numberString = unescapableContent[numberStartIndex ..< endIndex] |
| 170 | + |
| 171 | + guard let codePoint = UInt32(numberString, radix: radix), let scalar = UnicodeScalar(codePoint) else { |
| 172 | + return nil |
| 173 | + } |
| 174 | + |
| 175 | + return String(scalar) |
| 176 | + } |
| 177 | +} |
0 commit comments