Skip to content

Commit 249b044

Browse files
committed
Treat more special characters as trailing delimiters (#3)
* `;` is now treated the same as `,` and `:` * `<` and `>` now also need to match, same as other brackets * `/` can still be within or at the end of an URL, but if it's within a group of other delimiters, it behaves as a delimiter Together, these new rules result in `">`, `"/>` and `");` to be excluded at the end of links, while hopefully not messing with the overall heuristics too much.
1 parent 5160add commit 249b044

File tree

2 files changed

+41
-1
lines changed

2 files changed

+41
-1
lines changed

src/main/java/org/nibor/autolink/internal/UrlScanner.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ private int findLast(CharSequence input, int beginIndex) {
5454
int round = 0;
5555
int square = 0;
5656
int curly = 0;
57+
int angle = 0;
5758
boolean doubleQuote = false;
5859
boolean singleQuote = false;
5960
int last = beginIndex;
@@ -67,13 +68,22 @@ private int findLast(CharSequence input, int beginIndex) {
6768
case '\u000B':
6869
case '\f':
6970
case '\r':
71+
// These can never be part of an URL, so stop now
7072
break loop;
7173
case '?':
7274
case '!':
7375
case '.':
7476
case ',':
7577
case ':':
78+
case ';':
79+
// These may be part of an URL but not at the end
7680
continue loop;
81+
case '/':
82+
// This may be part of an URL and at the end, but not if the previous character can't be the end of an URL
83+
if (last != i - 1) {
84+
continue loop;
85+
}
86+
break;
7787
case '(':
7888
round++;
7989
break;
@@ -92,6 +102,12 @@ private int findLast(CharSequence input, int beginIndex) {
92102
case '}':
93103
curly--;
94104
break;
105+
case '<':
106+
angle++;
107+
break;
108+
case '>':
109+
angle--;
110+
break;
95111
case '"':
96112
doubleQuote = !doubleQuote;
97113
break;
@@ -102,7 +118,7 @@ private int findLast(CharSequence input, int beginIndex) {
102118
last = i;
103119
continue loop;
104120
}
105-
if (round >= 0 && square >= 0 && curly >= 0 && !doubleQuote && !singleQuote) {
121+
if (round >= 0 && square >= 0 && curly >= 0 && angle >= 0 && !doubleQuote && !singleQuote) {
106122
last = i;
107123
}
108124
}

src/test/java/org/nibor/autolink/AutolinkUrlTest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,15 @@ public void delimiterSeparation() {
8787
assertLinked("http://example.org/:", "|http://example.org/|:");
8888
assertLinked("http://example.org/?", "|http://example.org/|?");
8989
assertLinked("http://example.org/!", "|http://example.org/|!");
90+
assertLinked("http://example.org/;", "|http://example.org/|;");
9091
}
9192

9293
@Test
9394
public void matchingPunctuation() {
9495
assertLinked("http://example.org/a(b)", "|http://example.org/a(b)|");
9596
assertLinked("http://example.org/a[b]", "|http://example.org/a[b]|");
9697
assertLinked("http://example.org/a{b}", "|http://example.org/a{b}|");
98+
assertLinked("http://example.org/a<b>", "|http://example.org/a<b>|");
9799
assertLinked("http://example.org/a\"b\"", "|http://example.org/a\"b\"|");
98100
assertLinked("http://example.org/a'b'", "|http://example.org/a'b'|");
99101
assertLinked("(http://example.org/)", "(|http://example.org/|)");
@@ -110,11 +112,33 @@ public void matchingPunctuationTricky() {
110112
assertLinked("[(http://example.org/)]", "[(|http://example.org/|)]");
111113
assertLinked("(http://example.org/).", "(|http://example.org/|).");
112114
assertLinked("(http://example.org/.)", "(|http://example.org/|.)");
115+
assertLinked("http://example.org/>", "|http://example.org/|>");
113116
// not sure about these:
114117
assertLinked("http://example.org/(", "|http://example.org/(|");
115118
assertLinked("http://example.org/]()", "|http://example.org/|]()");
116119
}
117120

121+
@Test
122+
public void html() {
123+
assertLinked("http://example.org\">", "|http://example.org|\">");
124+
assertLinked("http://example.org'>", "|http://example.org|'>");
125+
assertLinked("http://example.org\"/>", "|http://example.org|\"/>");
126+
assertLinked("http://example.org'/>", "|http://example.org|'/>");
127+
}
128+
129+
@Test
130+
public void css() {
131+
assertLinked("http://example.org\");", "|http://example.org|\");");
132+
assertLinked("http://example.org');", "|http://example.org|');");
133+
}
134+
135+
@Test
136+
public void slash() {
137+
assertLinked("http://example.org/", "|http://example.org/|");
138+
assertLinked("http://example.org/a/", "|http://example.org/a/|");
139+
assertLinked("http://example.org//", "|http://example.org//|");
140+
}
141+
118142
@Test
119143
public void multiple() {
120144
assertLinked("http://one.org/ http://two.org/", "|http://one.org/| |http://two.org/|");

0 commit comments

Comments
 (0)