|
23 | 23 | import java.util.ArrayList;
|
24 | 24 | import java.util.Collections;
|
25 | 25 | import java.util.HashMap;
|
| 26 | +import java.util.HashSet; |
26 | 27 | import java.util.List;
|
27 | 28 | import java.util.Map;
|
| 29 | +import java.util.Set; |
28 | 30 | import java.util.UUID;
|
29 | 31 | import java.util.regex.Matcher;
|
30 | 32 | import java.util.regex.Pattern;
|
@@ -189,6 +191,7 @@ public String getRedirectUrl() {
|
189 | 191 | private static final Vulnerability VULN = Vulnerabilities.getDefault().get("wasc_38");
|
190 | 192 |
|
191 | 193 | private static final Logger LOGGER = LogManager.getLogger(ExternalRedirectScanRule.class);
|
| 194 | + private static final List<String> JS_PRE_CHECKS = List.of("window.", "location.", "location="); |
192 | 195 |
|
193 | 196 | private int payloadCount;
|
194 | 197 |
|
@@ -489,11 +492,176 @@ private static RedirectType isRedirected(String payload, HttpMessage msg) {
|
489 | 492 | }
|
490 | 493 |
|
491 | 494 | private static boolean isRedirectPresent(Pattern pattern, String value) {
|
492 |
| - Matcher matcher = pattern.matcher(value); |
| 495 | + // Ensure the value has something we're interested in before dealing with comments |
| 496 | + if (!StringUtils.containsIgnoreCase(value, SITE_HOST) |
| 497 | + && JS_PRE_CHECKS.stream() |
| 498 | + .noneMatch(chk -> StringUtils.containsIgnoreCase(value, chk))) { |
| 499 | + return false; |
| 500 | + } |
| 501 | + Set<String> extractedComments = extractJsComments(value); |
| 502 | + String[] valueWithoutComments = {value}; |
| 503 | + extractedComments.forEach( |
| 504 | + comment -> valueWithoutComments[0] = valueWithoutComments[0].replace(comment, "")); |
| 505 | + |
| 506 | + Matcher matcher = pattern.matcher(valueWithoutComments[0]); |
| 507 | + |
493 | 508 | return matcher.find()
|
494 | 509 | && StringUtils.startsWithIgnoreCase(matcher.group(1), HttpHeader.HTTP);
|
495 | 510 | }
|
496 | 511 |
|
| 512 | + private static Set<String> extractJsComments(String js) { |
| 513 | + // Some of the escapes in the comments below are double because of Java requirements |
| 514 | + Set<String> comments = new HashSet<>(); |
| 515 | + |
| 516 | + final int n = js.length(); |
| 517 | + boolean inSingle = false; // '...' |
| 518 | + boolean inDouble = false; // "..." |
| 519 | + int i = 0; |
| 520 | + |
| 521 | + while (i < n) { |
| 522 | + char c = js.charAt(i); |
| 523 | + |
| 524 | + // Inside a quoted string? Only look for the matching quote, consuming full escapes. |
| 525 | + if (inSingle || inDouble) { |
| 526 | + if (c == '\\') { |
| 527 | + i = consumeJsEscape(js, i); // Returns index of the last char of the escape |
| 528 | + } else if (inSingle && c == '\'') { |
| 529 | + inSingle = false; |
| 530 | + } else if (inDouble && c == '"') { |
| 531 | + inDouble = false; |
| 532 | + } |
| 533 | + i++; |
| 534 | + continue; |
| 535 | + } |
| 536 | + |
| 537 | + // Not inside a string: maybe we’re entering one? |
| 538 | + if (c == '\'') { |
| 539 | + inSingle = true; |
| 540 | + i++; |
| 541 | + continue; |
| 542 | + } |
| 543 | + if (c == '"') { |
| 544 | + inDouble = true; |
| 545 | + i++; |
| 546 | + continue; |
| 547 | + } |
| 548 | + |
| 549 | + // Not in a string: check for comments |
| 550 | + if (c == '/' && i + 1 < n) { |
| 551 | + char d = js.charAt(i + 1); |
| 552 | + |
| 553 | + // Single-line //... |
| 554 | + if (d == '/') { |
| 555 | + int end = i + 2; |
| 556 | + while (end < n && !isJsLineTerminator(js.charAt(end))) end++; |
| 557 | + comments.add(js.substring(i, end)); |
| 558 | + i = end; // position at line break (or end) |
| 559 | + continue; |
| 560 | + } |
| 561 | + |
| 562 | + // Multi-line /* ... */ |
| 563 | + if (d == '*') { |
| 564 | + int end = js.indexOf("*/", i + 2); |
| 565 | + if (end == -1) { |
| 566 | + // Unterminated: consume to end |
| 567 | + comments.add(js.substring(i)); |
| 568 | + i = n; |
| 569 | + } else { |
| 570 | + comments.add(js.substring(i, end + 2)); |
| 571 | + i = end + 2; |
| 572 | + } |
| 573 | + continue; |
| 574 | + } |
| 575 | + } |
| 576 | + |
| 577 | + // Otherwise, just move on. |
| 578 | + i++; |
| 579 | + } |
| 580 | + |
| 581 | + return comments; |
| 582 | + } |
| 583 | + |
| 584 | + /** |
| 585 | + * Consumes a full JS escape sequence starting at the backslash. Returns the index of the last |
| 586 | + * character that belongs to the escape. Handles: \n, \r, \t, \b, \f, \v, \0, \', \", \\, |
| 587 | + * line-continuations, \xHH, \uFFFF, \\u{...} |
| 588 | + */ |
| 589 | + private static int consumeJsEscape(String s, int backslash) { |
| 590 | + int n = s.length(); |
| 591 | + int i = backslash; |
| 592 | + if (i + 1 >= n) { |
| 593 | + return i; // Nothing to consume after '\' |
| 594 | + } |
| 595 | + |
| 596 | + char e = s.charAt(i + 1); |
| 597 | + |
| 598 | + // Line continuation: backslash followed by a line terminator |
| 599 | + if (isJsLineTerminator(e)) { |
| 600 | + // Consume \r\n as a unit if present |
| 601 | + if (e == '\r' && i + 2 < n && s.charAt(i + 2) == '\n') { |
| 602 | + return i + 2; |
| 603 | + } |
| 604 | + return i + 1; |
| 605 | + } |
| 606 | + |
| 607 | + // \xHH (2 hex digits) |
| 608 | + if (e == 'x' || e == 'X') { |
| 609 | + int j = i + 2; |
| 610 | + int consumed = 0; |
| 611 | + while (j < n && consumed < 2 && isHexDigit(s.charAt(j))) { |
| 612 | + j++; |
| 613 | + consumed++; |
| 614 | + } |
| 615 | + // Even if malformed, we stop at the last hex digit we found |
| 616 | + return j - 1; |
| 617 | + } |
| 618 | + |
| 619 | + // \uFFFF or \\u{...} |
| 620 | + if (e == 'u' || e == 'U') { |
| 621 | + int j = i + 2; |
| 622 | + if (j < n && s.charAt(j) == '{') { |
| 623 | + // \\u{hex+} |
| 624 | + j++; |
| 625 | + while (j < n && isHexDigit(s.charAt(j))) { |
| 626 | + j++; |
| 627 | + } |
| 628 | + if (j < n && s.charAt(j) == '}') j++; // Close if present |
| 629 | + return j - 1; // End of } or last hex if malformed |
| 630 | + } else { |
| 631 | + // \\uHHHH (exactly 4 hex if well-formed) |
| 632 | + int consumed = 0; |
| 633 | + while (j < n && consumed < 4 && isHexDigit(s.charAt(j))) { |
| 634 | + j++; |
| 635 | + consumed++; |
| 636 | + } |
| 637 | + return j - 1; |
| 638 | + } |
| 639 | + } |
| 640 | + |
| 641 | + // Octal escapes (legacy). Consume up to 3 octal digits if present. |
| 642 | + if (e >= '0' && e <= '7') { |
| 643 | + int j = i + 1; |
| 644 | + int consumed = 0; |
| 645 | + while (j < n && consumed < 3 && s.charAt(j) >= '0' && s.charAt(j) <= '7') { |
| 646 | + j++; |
| 647 | + consumed++; |
| 648 | + } |
| 649 | + return j - 1; |
| 650 | + } |
| 651 | + |
| 652 | + // Simple one-char escapes: \n \r \t \b \f \v \0 \' \" \\ |
| 653 | + return i + 1; |
| 654 | + } |
| 655 | + |
| 656 | + private static boolean isHexDigit(char c) { |
| 657 | + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); |
| 658 | + } |
| 659 | + |
| 660 | + private static boolean isJsLineTerminator(char c) { |
| 661 | + // JS line terminators: LF, CR, LS, PS |
| 662 | + return c == '\n' || c == '\r' || c == '\u2028' || c == '\u2029'; |
| 663 | + } |
| 664 | + |
497 | 665 | /**
|
498 | 666 | * Give back the risk associated to this vulnerability (high)
|
499 | 667 | *
|
|
0 commit comments