src: remove explicit UTF-8 validity check in url
This step was never part of the URL Standard's host parser algorithm, and is rendered unnecessary after IDNA errors are no longer ignored. PR-URL: https://github.com/nodejs/node/pull/11859 Refs: c2a302c50b3787666339371 "src: do not ignore IDNA conversion error" Refs: https://url.spec.whatwg.org/#concept-host-parser Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
This commit is contained in:
parent
4cdb0e89d8
commit
d099f8e317
@ -15,11 +15,6 @@
|
||||
#include <stdio.h>
|
||||
#include <cmath>
|
||||
|
||||
#if defined(NODE_HAVE_I18N_SUPPORT)
|
||||
#include <unicode/utf8.h>
|
||||
#include <unicode/utf.h>
|
||||
#endif
|
||||
|
||||
#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD
|
||||
|
||||
namespace node {
|
||||
@ -113,21 +108,6 @@ namespace url {
|
||||
output->assign(*buf, buf.length());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Unfortunately there's not really a better way to do this.
|
||||
// Iterate through each encoded codepoint and verify that
|
||||
// it is a valid unicode codepoint.
|
||||
static bool IsValidUTF8(std::string* input) {
|
||||
const char* p = input->c_str();
|
||||
int32_t len = input->length();
|
||||
for (int32_t i = 0; i < len;) {
|
||||
UChar32 c;
|
||||
U8_NEXT_UNSAFE(p, i, c);
|
||||
if (!U_IS_UNICODE_CHAR(c))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
// Intentional non-ops if ICU is not present.
|
||||
static bool ToUnicode(std::string* input, std::string* output) {
|
||||
@ -139,10 +119,6 @@ namespace url {
|
||||
*output = *input;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool IsValidUTF8(std::string* input) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
// If a UTF-16 character is a low/trailing surrogate.
|
||||
@ -395,12 +371,6 @@ namespace url {
|
||||
if (PercentDecode(input, length, &decoded) < 0)
|
||||
goto end;
|
||||
|
||||
// If there are any invalid UTF8 byte sequences, we have to fail.
|
||||
// Unfortunately this means iterating through the string and checking
|
||||
// each decoded codepoint.
|
||||
if (!IsValidUTF8(&decoded))
|
||||
goto end;
|
||||
|
||||
// Then we have to punycode toASCII
|
||||
if (!ToASCII(&decoded, &decoded))
|
||||
goto end;
|
||||
|
Loading…
x
Reference in New Issue
Block a user