src: remove explicit UTF-8 validity check in url

This step was never part of the URL Standard's host parser algorithm, and is rendered unnecessary after IDNA errors are no longer ignored. PR-URL: https://github.com/nodejs/node/pull/11859 Refs: c2a302c50b3787666339371 "src: do not ignore IDNA conversion error" Refs: https://url.spec.whatwg.org/#concept-host-parser Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
2017-03-14 23:41:57 -07:00 · 2017-03-14 23:41:57 -07:00 · d099f8e317
commit d099f8e317
parent 4cdb0e89d8
1 changed files with 0 additions and 30 deletions
--- a/src/node_url.cc
+++ b/src/node_url.cc
@ -15,11 +15,6 @@
 #include <stdio.h>
 #include <cmath>

-#if defined(NODE_HAVE_I18N_SUPPORT)
-#include <unicode/utf8.h>
-#include <unicode/utf.h>
-#endif
-
 #define UNICODE_REPLACEMENT_CHARACTER 0xFFFD

 namespace node {
@ -113,21 +108,6 @@ namespace url {
    output->assign(*buf, buf.length());
    return true;
  }
-
-  // Unfortunately there's not really a better way to do this.
-  // Iterate through each encoded codepoint and verify that
-  // it is a valid unicode codepoint.
-  static bool IsValidUTF8(std::string* input) {
-    const char* p = input->c_str();
-    int32_t len = input->length();
-    for (int32_t i = 0; i < len;) {
-      UChar32 c;
-      U8_NEXT_UNSAFE(p, i, c);
-      if (!U_IS_UNICODE_CHAR(c))
-        return false;
-    }
-    return true;
-  }
 #else
  // Intentional non-ops if ICU is not present.
  static bool ToUnicode(std::string* input, std::string* output) {
@ -139,10 +119,6 @@ namespace url {
    *output = *input;
    return true;
  }
-
-  static bool IsValidUTF8(std::string* input) {
-    return true;
-  }
 #endif

  // If a UTF-16 character is a low/trailing surrogate.
@ -395,12 +371,6 @@ namespace url {
    if (PercentDecode(input, length, &decoded) < 0)
      goto end;

-    // If there are any invalid UTF8 byte sequences, we have to fail.
-    // Unfortunately this means iterating through the string and checking
-    // each decoded codepoint.
-    if (!IsValidUTF8(&decoded))
-      goto end;
-
    // Then we have to punycode toASCII
    if (!ToASCII(&decoded, &decoded))
      goto end;