src: fix ucs-2 buffer encoding regression

StringBytes::Write() did a plain memcpy() when is_extern is true but
that's wrong when the source is a two-byte string and the destination
a one-byte or UTF-8 string.

The impact is limited to strings > 1,031,913 bytes because those are
normally the only strings that are externalized, although the use of
the 'externalize strings' extension (--expose_externalize_string) can
also trigger it.

This commit also cleans up the bytes versus characters confusion in
StringBytes::Write() because that was closely intertwined with the
UCS-2 encoding regression.  One wasn't fixable without the other.

Fixes: https://github.com/iojs/io.js/issues/1024
Fixes: https://github.com/joyent/node/issues/8683
PR-URL: https://github.com/iojs/io.js/pull/1042
Reviewed-By: Trevor Norris <trev.norris@gmail.com>
This commit is contained in:
Ben Noordhuis 2015-03-03 15:44:54 +01:00
parent 2eda2d6096
commit 1640dedb3b
3 changed files with 49 additions and 39 deletions

View File

@ -415,9 +415,6 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
if (max_length == 0) if (max_length == 0)
return args.GetReturnValue().Set(0); return args.GetReturnValue().Set(0);
if (encoding == UCS2)
max_length = max_length / 2;
if (offset >= obj_length) if (offset >= obj_length)
return env->ThrowRangeError("Offset is out of bounds"); return env->ThrowRangeError("Offset is out of bounds");

View File

@ -279,13 +279,15 @@ size_t StringBytes::Write(Isolate* isolate,
int* chars_written) { int* chars_written) {
HandleScope scope(isolate); HandleScope scope(isolate);
const char* data = nullptr; const char* data = nullptr;
size_t len = 0; size_t nbytes = 0;
bool is_extern = GetExternalParts(isolate, val, &data, &len); const bool is_extern = GetExternalParts(isolate, val, &data, &nbytes);
size_t extlen = len; const size_t external_nbytes = nbytes;
CHECK(val->IsString() == true); CHECK(val->IsString() == true);
Local<String> str = val.As<String>(); Local<String> str = val.As<String>();
len = len < buflen ? len : buflen;
if (nbytes > buflen)
nbytes = buflen;
int flags = String::HINT_MANY_WRITES_EXPECTED | int flags = String::HINT_MANY_WRITES_EXPECTED |
String::NO_NULL_TERMINATION | String::NO_NULL_TERMINATION |
@ -295,67 +297,65 @@ size_t StringBytes::Write(Isolate* isolate,
case ASCII: case ASCII:
case BINARY: case BINARY:
case BUFFER: case BUFFER:
if (is_extern) if (is_extern && str->IsOneByte()) {
memcpy(buf, data, len); memcpy(buf, data, nbytes);
else } else {
len = str->WriteOneByte(reinterpret_cast<uint8_t*>(buf), uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
0, nbytes = str->WriteOneByte(dst, 0, buflen, flags);
buflen, }
flags);
if (chars_written != nullptr) if (chars_written != nullptr)
*chars_written = len; *chars_written = nbytes;
break; break;
case UTF8: case UTF8:
if (is_extern) nbytes = str->WriteUtf8(buf, buflen, chars_written, flags);
// TODO(tjfontaine) should this validate invalid surrogate pairs as
// well?
memcpy(buf, data, len);
else
len = str->WriteUtf8(buf, buflen, chars_written, flags);
break; break;
case UCS2: case UCS2: {
if (is_extern) uint16_t* const dst = reinterpret_cast<uint16_t*>(buf);
memcpy(buf, data, len); size_t nchars;
else if (is_extern && !str->IsOneByte()) {
len = str->Write(reinterpret_cast<uint16_t*>(buf), 0, buflen, flags); memcpy(buf, data, nbytes);
nchars = nbytes / sizeof(*dst);
} else {
nchars = buflen / sizeof(*dst);
nchars = str->Write(dst, 0, nchars, flags);
nbytes = nchars * sizeof(*dst);
}
if (IsBigEndian()) { if (IsBigEndian()) {
// Node's "ucs2" encoding wants LE character data stored in // Node's "ucs2" encoding wants LE character data stored in
// the Buffer, so we need to reorder on BE platforms. See // the Buffer, so we need to reorder on BE platforms. See
// http://nodejs.org/api/buffer.html regarding Node's "ucs2" // http://nodejs.org/api/buffer.html regarding Node's "ucs2"
// encoding specification // encoding specification
uint16_t* buf16 = reinterpret_cast<uint16_t*>(buf); for (size_t i = 0; i < nchars; i++)
for (size_t i = 0; i < len; i++) { dst[i] = dst[i] << 8 | dst[i] >> 8;
buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8);
}
} }
if (chars_written != nullptr) if (chars_written != nullptr)
*chars_written = len; *chars_written = nchars;
len = len * sizeof(uint16_t);
break; break;
}
case BASE64: case BASE64:
if (is_extern) { if (is_extern) {
len = base64_decode(buf, buflen, data, extlen); nbytes = base64_decode(buf, buflen, data, external_nbytes);
} else { } else {
String::Value value(str); String::Value value(str);
len = base64_decode(buf, buflen, *value, value.length()); nbytes = base64_decode(buf, buflen, *value, value.length());
} }
if (chars_written != nullptr) { if (chars_written != nullptr) {
*chars_written = len; *chars_written = nbytes;
} }
break; break;
case HEX: case HEX:
if (is_extern) { if (is_extern) {
len = hex_decode(buf, buflen, data, extlen); nbytes = hex_decode(buf, buflen, data, external_nbytes);
} else { } else {
String::Value value(str); String::Value value(str);
len = hex_decode(buf, buflen, *value, value.length()); nbytes = hex_decode(buf, buflen, *value, value.length());
} }
if (chars_written != nullptr) { if (chars_written != nullptr) {
*chars_written = len * 2; *chars_written = nbytes;
} }
break; break;
@ -364,7 +364,7 @@ size_t StringBytes::Write(Isolate* isolate,
break; break;
} }
return len; return nbytes;
} }

View File

@ -106,3 +106,16 @@ var PRE_3OF4_APEX = Math.ceil((EXTERN_APEX / 4) * 3) - RADIOS;
} }
} }
})(); })();
// https://github.com/iojs/io.js/issues/1024
(function() {
var a = Array(1 << 20).join('x');
var b = Buffer(a, 'ucs2').toString('ucs2');
var c = Buffer(b, 'utf8').toString('utf8');
assert.equal(a.length, b.length);
assert.equal(b.length, c.length);
assert.equal(a, b);
assert.equal(b, c);
})();