src: fix ucs-2 buffer encoding regression
StringBytes::Write() did a plain memcpy() when is_extern is true but that's wrong when the source is a two-byte string and the destination a one-byte or UTF-8 string. The impact is limited to strings > 1,031,913 bytes because those are normally the only strings that are externalized, although the use of the 'externalize strings' extension (--expose_externalize_string) can also trigger it. This commit also cleans up the bytes versus characters confusion in StringBytes::Write() because that was closely intertwined with the UCS-2 encoding regression. One wasn't fixable without the other. Fixes: https://github.com/iojs/io.js/issues/1024 Fixes: https://github.com/joyent/node/issues/8683 PR-URL: https://github.com/iojs/io.js/pull/1042 Reviewed-By: Trevor Norris <trev.norris@gmail.com>
This commit is contained in:
parent
2eda2d6096
commit
1640dedb3b
@ -415,9 +415,6 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
|
|||||||
if (max_length == 0)
|
if (max_length == 0)
|
||||||
return args.GetReturnValue().Set(0);
|
return args.GetReturnValue().Set(0);
|
||||||
|
|
||||||
if (encoding == UCS2)
|
|
||||||
max_length = max_length / 2;
|
|
||||||
|
|
||||||
if (offset >= obj_length)
|
if (offset >= obj_length)
|
||||||
return env->ThrowRangeError("Offset is out of bounds");
|
return env->ThrowRangeError("Offset is out of bounds");
|
||||||
|
|
||||||
|
@ -279,13 +279,15 @@ size_t StringBytes::Write(Isolate* isolate,
|
|||||||
int* chars_written) {
|
int* chars_written) {
|
||||||
HandleScope scope(isolate);
|
HandleScope scope(isolate);
|
||||||
const char* data = nullptr;
|
const char* data = nullptr;
|
||||||
size_t len = 0;
|
size_t nbytes = 0;
|
||||||
bool is_extern = GetExternalParts(isolate, val, &data, &len);
|
const bool is_extern = GetExternalParts(isolate, val, &data, &nbytes);
|
||||||
size_t extlen = len;
|
const size_t external_nbytes = nbytes;
|
||||||
|
|
||||||
CHECK(val->IsString() == true);
|
CHECK(val->IsString() == true);
|
||||||
Local<String> str = val.As<String>();
|
Local<String> str = val.As<String>();
|
||||||
len = len < buflen ? len : buflen;
|
|
||||||
|
if (nbytes > buflen)
|
||||||
|
nbytes = buflen;
|
||||||
|
|
||||||
int flags = String::HINT_MANY_WRITES_EXPECTED |
|
int flags = String::HINT_MANY_WRITES_EXPECTED |
|
||||||
String::NO_NULL_TERMINATION |
|
String::NO_NULL_TERMINATION |
|
||||||
@ -295,67 +297,65 @@ size_t StringBytes::Write(Isolate* isolate,
|
|||||||
case ASCII:
|
case ASCII:
|
||||||
case BINARY:
|
case BINARY:
|
||||||
case BUFFER:
|
case BUFFER:
|
||||||
if (is_extern)
|
if (is_extern && str->IsOneByte()) {
|
||||||
memcpy(buf, data, len);
|
memcpy(buf, data, nbytes);
|
||||||
else
|
} else {
|
||||||
len = str->WriteOneByte(reinterpret_cast<uint8_t*>(buf),
|
uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
|
||||||
0,
|
nbytes = str->WriteOneByte(dst, 0, buflen, flags);
|
||||||
buflen,
|
}
|
||||||
flags);
|
|
||||||
if (chars_written != nullptr)
|
if (chars_written != nullptr)
|
||||||
*chars_written = len;
|
*chars_written = nbytes;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case UTF8:
|
case UTF8:
|
||||||
if (is_extern)
|
nbytes = str->WriteUtf8(buf, buflen, chars_written, flags);
|
||||||
// TODO(tjfontaine) should this validate invalid surrogate pairs as
|
|
||||||
// well?
|
|
||||||
memcpy(buf, data, len);
|
|
||||||
else
|
|
||||||
len = str->WriteUtf8(buf, buflen, chars_written, flags);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case UCS2:
|
case UCS2: {
|
||||||
if (is_extern)
|
uint16_t* const dst = reinterpret_cast<uint16_t*>(buf);
|
||||||
memcpy(buf, data, len);
|
size_t nchars;
|
||||||
else
|
if (is_extern && !str->IsOneByte()) {
|
||||||
len = str->Write(reinterpret_cast<uint16_t*>(buf), 0, buflen, flags);
|
memcpy(buf, data, nbytes);
|
||||||
|
nchars = nbytes / sizeof(*dst);
|
||||||
|
} else {
|
||||||
|
nchars = buflen / sizeof(*dst);
|
||||||
|
nchars = str->Write(dst, 0, nchars, flags);
|
||||||
|
nbytes = nchars * sizeof(*dst);
|
||||||
|
}
|
||||||
if (IsBigEndian()) {
|
if (IsBigEndian()) {
|
||||||
// Node's "ucs2" encoding wants LE character data stored in
|
// Node's "ucs2" encoding wants LE character data stored in
|
||||||
// the Buffer, so we need to reorder on BE platforms. See
|
// the Buffer, so we need to reorder on BE platforms. See
|
||||||
// http://nodejs.org/api/buffer.html regarding Node's "ucs2"
|
// http://nodejs.org/api/buffer.html regarding Node's "ucs2"
|
||||||
// encoding specification
|
// encoding specification
|
||||||
uint16_t* buf16 = reinterpret_cast<uint16_t*>(buf);
|
for (size_t i = 0; i < nchars; i++)
|
||||||
for (size_t i = 0; i < len; i++) {
|
dst[i] = dst[i] << 8 | dst[i] >> 8;
|
||||||
buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (chars_written != nullptr)
|
if (chars_written != nullptr)
|
||||||
*chars_written = len;
|
*chars_written = nchars;
|
||||||
len = len * sizeof(uint16_t);
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case BASE64:
|
case BASE64:
|
||||||
if (is_extern) {
|
if (is_extern) {
|
||||||
len = base64_decode(buf, buflen, data, extlen);
|
nbytes = base64_decode(buf, buflen, data, external_nbytes);
|
||||||
} else {
|
} else {
|
||||||
String::Value value(str);
|
String::Value value(str);
|
||||||
len = base64_decode(buf, buflen, *value, value.length());
|
nbytes = base64_decode(buf, buflen, *value, value.length());
|
||||||
}
|
}
|
||||||
if (chars_written != nullptr) {
|
if (chars_written != nullptr) {
|
||||||
*chars_written = len;
|
*chars_written = nbytes;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case HEX:
|
case HEX:
|
||||||
if (is_extern) {
|
if (is_extern) {
|
||||||
len = hex_decode(buf, buflen, data, extlen);
|
nbytes = hex_decode(buf, buflen, data, external_nbytes);
|
||||||
} else {
|
} else {
|
||||||
String::Value value(str);
|
String::Value value(str);
|
||||||
len = hex_decode(buf, buflen, *value, value.length());
|
nbytes = hex_decode(buf, buflen, *value, value.length());
|
||||||
}
|
}
|
||||||
if (chars_written != nullptr) {
|
if (chars_written != nullptr) {
|
||||||
*chars_written = len * 2;
|
*chars_written = nbytes;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -364,7 +364,7 @@ size_t StringBytes::Write(Isolate* isolate,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return len;
|
return nbytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -106,3 +106,16 @@ var PRE_3OF4_APEX = Math.ceil((EXTERN_APEX / 4) * 3) - RADIOS;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
|
|
||||||
|
// https://github.com/iojs/io.js/issues/1024
|
||||||
|
(function() {
|
||||||
|
var a = Array(1 << 20).join('x');
|
||||||
|
var b = Buffer(a, 'ucs2').toString('ucs2');
|
||||||
|
var c = Buffer(b, 'utf8').toString('utf8');
|
||||||
|
|
||||||
|
assert.equal(a.length, b.length);
|
||||||
|
assert.equal(b.length, c.length);
|
||||||
|
|
||||||
|
assert.equal(a, b);
|
||||||
|
assert.equal(b, c);
|
||||||
|
})();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user