src: fix fs.write() externalized string handling
* Respect `encoding` argument when the string is externalized. * Copy the string when the write request can outlive the externalized string. This commit removes `StringBytes::GetExternalParts()` because it is fundamentally broken. Fixes: https://github.com/nodejs/node/issues/18146 PR-URL: https://github.com/nodejs/node/pull/18216 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
This commit is contained in:
parent
0b1841d83f
commit
b2b9d11a14
@ -39,6 +39,7 @@
|
||||
# include <io.h>
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace node {
|
||||
@ -1019,40 +1020,53 @@ static void WriteString(const FunctionCallbackInfo<Value>& args) {
|
||||
|
||||
CHECK(args[0]->IsInt32());
|
||||
|
||||
std::unique_ptr<char[]> delete_on_return;
|
||||
Local<Value> req;
|
||||
Local<Value> string = args[1];
|
||||
Local<Value> value = args[1];
|
||||
int fd = args[0]->Int32Value();
|
||||
char* buf = nullptr;
|
||||
int64_t pos;
|
||||
size_t len;
|
||||
const int64_t pos = GET_OFFSET(args[2]);
|
||||
const auto enc = ParseEncoding(env->isolate(), args[3], UTF8);
|
||||
const auto is_async = args[4]->IsObject();
|
||||
|
||||
// will assign buf and len if string was external
|
||||
if (!StringBytes::GetExternalParts(string,
|
||||
const_cast<const char**>(&buf),
|
||||
&len)) {
|
||||
enum encoding enc = ParseEncoding(env->isolate(), args[3], UTF8);
|
||||
len = StringBytes::StorageSize(env->isolate(), string, enc);
|
||||
// Avoid copying the string when it is externalized but only when:
|
||||
// 1. The target encoding is compatible with the string's encoding, and
|
||||
// 2. The write is synchronous, otherwise the string might get neutered
|
||||
// while the request is in flight, and
|
||||
// 3. For UCS2, when the host system is little-endian. Big-endian systems
|
||||
// need to call StringBytes::Write() to ensure proper byte swapping.
|
||||
// The const_casts are conceptually sound: memory is read but not written.
|
||||
if (!is_async && value->IsString()) {
|
||||
auto string = value.As<String>();
|
||||
if ((enc == ASCII || enc == LATIN1) && string->IsExternalOneByte()) {
|
||||
auto ext = string->GetExternalOneByteStringResource();
|
||||
buf = const_cast<char*>(ext->data());
|
||||
len = ext->length();
|
||||
} else if (enc == UCS2 && IsLittleEndian() && string->IsExternal()) {
|
||||
auto ext = string->GetExternalStringResource();
|
||||
buf = reinterpret_cast<char*>(const_cast<uint16_t*>(ext->data()));
|
||||
len = ext->length() * sizeof(*ext->data());
|
||||
}
|
||||
}
|
||||
|
||||
if (buf == nullptr) {
|
||||
len = StringBytes::StorageSize(env->isolate(), value, enc);
|
||||
buf = new char[len];
|
||||
// SYNC_CALL returns on error. Make sure to always free the memory.
|
||||
if (!is_async) delete_on_return.reset(buf);
|
||||
// StorageSize may return too large a char, so correct the actual length
|
||||
// by the write size
|
||||
len = StringBytes::Write(env->isolate(), buf, len, args[1], enc);
|
||||
}
|
||||
pos = GET_OFFSET(args[2]);
|
||||
|
||||
uv_buf_t uvbuf = uv_buf_init(const_cast<char*>(buf), len);
|
||||
uv_buf_t uvbuf = uv_buf_init(buf, len);
|
||||
|
||||
if (args[4]->IsObject()) {
|
||||
if (is_async) {
|
||||
CHECK_EQ(args.Length(), 5);
|
||||
AsyncCall(env, args, "write", UTF8, AfterInteger,
|
||||
uv_fs_write, fd, &uvbuf, 1, pos);
|
||||
} else {
|
||||
// SYNC_CALL returns on error. Make sure to always free the memory.
|
||||
struct Delete {
|
||||
inline explicit Delete(char* pointer) : pointer_(pointer) {}
|
||||
inline ~Delete() { delete[] pointer_; }
|
||||
char* const pointer_;
|
||||
};
|
||||
Delete delete_on_return(buf);
|
||||
SYNC_CALL(write, nullptr, fd, &uvbuf, 1, pos)
|
||||
return args.GetReturnValue().Set(SYNC_RESULT);
|
||||
}
|
||||
|
@ -27,6 +27,8 @@
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
// When creating strings >= this length v8's gc spins up and consumes
|
||||
@ -269,39 +271,6 @@ static size_t hex_decode(char* buf,
|
||||
}
|
||||
|
||||
|
||||
bool StringBytes::GetExternalParts(Local<Value> val,
|
||||
const char** data,
|
||||
size_t* len) {
|
||||
if (Buffer::HasInstance(val)) {
|
||||
*data = Buffer::Data(val);
|
||||
*len = Buffer::Length(val);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!val->IsString())
|
||||
return false;
|
||||
|
||||
Local<String> str = val.As<String>();
|
||||
|
||||
if (str->IsExternalOneByte()) {
|
||||
const String::ExternalOneByteStringResource* ext;
|
||||
ext = str->GetExternalOneByteStringResource();
|
||||
*data = ext->data();
|
||||
*len = ext->length();
|
||||
return true;
|
||||
|
||||
} else if (str->IsExternal()) {
|
||||
const String::ExternalStringResource* ext;
|
||||
ext = str->GetExternalStringResource();
|
||||
*data = reinterpret_cast<const char*>(ext->data());
|
||||
*len = ext->length() * sizeof(*ext->data());
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
size_t StringBytes::WriteUCS2(char* buf,
|
||||
size_t buflen,
|
||||
Local<String> str,
|
||||
@ -347,17 +316,15 @@ size_t StringBytes::Write(Isolate* isolate,
|
||||
enum encoding encoding,
|
||||
int* chars_written) {
|
||||
HandleScope scope(isolate);
|
||||
const char* data = nullptr;
|
||||
size_t nbytes = 0;
|
||||
const bool is_extern = GetExternalParts(val, &data, &nbytes);
|
||||
const size_t external_nbytes = nbytes;
|
||||
size_t nbytes;
|
||||
int nchars;
|
||||
|
||||
if (chars_written == nullptr)
|
||||
chars_written = &nchars;
|
||||
|
||||
CHECK(val->IsString() == true);
|
||||
Local<String> str = val.As<String>();
|
||||
|
||||
if (nbytes > buflen)
|
||||
nbytes = buflen;
|
||||
|
||||
int flags = String::HINT_MANY_WRITES_EXPECTED |
|
||||
String::NO_NULL_TERMINATION |
|
||||
String::REPLACE_INVALID_UTF8;
|
||||
@ -365,13 +332,14 @@ size_t StringBytes::Write(Isolate* isolate,
|
||||
switch (encoding) {
|
||||
case ASCII:
|
||||
case LATIN1:
|
||||
if (is_extern && str->IsOneByte()) {
|
||||
memcpy(buf, data, nbytes);
|
||||
if (str->IsExternalOneByte()) {
|
||||
auto ext = str->GetExternalOneByteStringResource();
|
||||
nbytes = std::min(buflen, ext->length());
|
||||
memcpy(buf, ext->data(), nbytes);
|
||||
} else {
|
||||
uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
|
||||
nbytes = str->WriteOneByte(dst, 0, buflen, flags);
|
||||
}
|
||||
if (chars_written != nullptr)
|
||||
*chars_written = nbytes;
|
||||
break;
|
||||
|
||||
@ -383,14 +351,8 @@ size_t StringBytes::Write(Isolate* isolate,
|
||||
case UCS2: {
|
||||
size_t nchars;
|
||||
|
||||
if (is_extern && !str->IsOneByte()) {
|
||||
memcpy(buf, data, nbytes);
|
||||
nchars = nbytes / sizeof(uint16_t);
|
||||
} else {
|
||||
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
|
||||
}
|
||||
if (chars_written != nullptr)
|
||||
*chars_written = nchars;
|
||||
*chars_written = static_cast<int>(nchars);
|
||||
|
||||
// Node's "ucs2" encoding wants LE character data stored in
|
||||
// the Buffer, so we need to reorder on BE platforms. See
|
||||
@ -403,27 +365,25 @@ size_t StringBytes::Write(Isolate* isolate,
|
||||
}
|
||||
|
||||
case BASE64:
|
||||
if (is_extern) {
|
||||
nbytes = base64_decode(buf, buflen, data, external_nbytes);
|
||||
if (str->IsExternalOneByte()) {
|
||||
auto ext = str->GetExternalOneByteStringResource();
|
||||
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
|
||||
} else {
|
||||
String::Value value(str);
|
||||
nbytes = base64_decode(buf, buflen, *value, value.length());
|
||||
}
|
||||
if (chars_written != nullptr) {
|
||||
*chars_written = nbytes;
|
||||
}
|
||||
break;
|
||||
|
||||
case HEX:
|
||||
if (is_extern) {
|
||||
nbytes = hex_decode(buf, buflen, data, external_nbytes);
|
||||
if (str->IsExternalOneByte()) {
|
||||
auto ext = str->GetExternalOneByteStringResource();
|
||||
nbytes = hex_decode(buf, buflen, ext->data(), ext->length());
|
||||
} else {
|
||||
String::Value value(str);
|
||||
nbytes = hex_decode(buf, buflen, *value, value.length());
|
||||
}
|
||||
if (chars_written != nullptr) {
|
||||
*chars_written = nbytes;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -500,49 +460,34 @@ size_t StringBytes::Size(Isolate* isolate,
|
||||
Local<Value> val,
|
||||
enum encoding encoding) {
|
||||
HandleScope scope(isolate);
|
||||
size_t data_size = 0;
|
||||
bool is_buffer = Buffer::HasInstance(val);
|
||||
|
||||
if (is_buffer && (encoding == BUFFER || encoding == LATIN1))
|
||||
if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1))
|
||||
return Buffer::Length(val);
|
||||
|
||||
const char* data;
|
||||
if (GetExternalParts(val, &data, &data_size))
|
||||
return data_size;
|
||||
|
||||
Local<String> str = val->ToString(isolate);
|
||||
|
||||
switch (encoding) {
|
||||
case ASCII:
|
||||
case LATIN1:
|
||||
data_size = str->Length();
|
||||
break;
|
||||
return str->Length();
|
||||
|
||||
case BUFFER:
|
||||
case UTF8:
|
||||
data_size = str->Utf8Length();
|
||||
break;
|
||||
return str->Utf8Length();
|
||||
|
||||
case UCS2:
|
||||
data_size = str->Length() * sizeof(uint16_t);
|
||||
break;
|
||||
return str->Length() * sizeof(uint16_t);
|
||||
|
||||
case BASE64: {
|
||||
String::Value value(str);
|
||||
data_size = base64_decoded_size(*value, value.length());
|
||||
break;
|
||||
return base64_decoded_size(*value, value.length());
|
||||
}
|
||||
|
||||
case HEX:
|
||||
data_size = str->Length() / 2;
|
||||
break;
|
||||
|
||||
default:
|
||||
CHECK(0 && "unknown encoding");
|
||||
break;
|
||||
return str->Length() / 2;
|
||||
}
|
||||
|
||||
return data_size;
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
|
@ -81,12 +81,6 @@ class StringBytes {
|
||||
v8::Local<v8::Value> val,
|
||||
enum encoding enc);
|
||||
|
||||
// If the string is external then assign external properties to data and len,
|
||||
// then return true. If not return false.
|
||||
static bool GetExternalParts(v8::Local<v8::Value> val,
|
||||
const char** data,
|
||||
size_t* len);
|
||||
|
||||
// Write the bytes from the string or buffer into the char*
|
||||
// returns the number of bytes written, which will always be
|
||||
// <= buflen. Use StorageSize/Size first to know how much
|
||||
|
@ -19,6 +19,7 @@
|
||||
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
// USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
// Flags: --expose_externalize_string
|
||||
'use strict';
|
||||
const common = require('../common');
|
||||
const assert = require('assert');
|
||||
@ -30,6 +31,49 @@ const fn3 = path.join(common.tmpDir, 'write3.txt');
|
||||
const expected = 'ümlaut.';
|
||||
const constants = fs.constants;
|
||||
|
||||
/* eslint-disable no-undef */
|
||||
common.allowGlobals(externalizeString, isOneByteString, x);
|
||||
|
||||
{
|
||||
const expected = 'ümlaut eins'; // Must be a unique string.
|
||||
externalizeString(expected);
|
||||
assert.strictEqual(true, isOneByteString(expected));
|
||||
const fd = fs.openSync(fn, 'w');
|
||||
fs.writeSync(fd, expected, 0, 'latin1');
|
||||
fs.closeSync(fd);
|
||||
assert.strictEqual(expected, fs.readFileSync(fn, 'latin1'));
|
||||
}
|
||||
|
||||
{
|
||||
const expected = 'ümlaut zwei'; // Must be a unique string.
|
||||
externalizeString(expected);
|
||||
assert.strictEqual(true, isOneByteString(expected));
|
||||
const fd = fs.openSync(fn, 'w');
|
||||
fs.writeSync(fd, expected, 0, 'utf8');
|
||||
fs.closeSync(fd);
|
||||
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
|
||||
}
|
||||
|
||||
{
|
||||
const expected = '中文 1'; // Must be a unique string.
|
||||
externalizeString(expected);
|
||||
assert.strictEqual(false, isOneByteString(expected));
|
||||
const fd = fs.openSync(fn, 'w');
|
||||
fs.writeSync(fd, expected, 0, 'ucs2');
|
||||
fs.closeSync(fd);
|
||||
assert.strictEqual(expected, fs.readFileSync(fn, 'ucs2'));
|
||||
}
|
||||
|
||||
{
|
||||
const expected = '中文 2'; // Must be a unique string.
|
||||
externalizeString(expected);
|
||||
assert.strictEqual(false, isOneByteString(expected));
|
||||
const fd = fs.openSync(fn, 'w');
|
||||
fs.writeSync(fd, expected, 0, 'utf8');
|
||||
fs.closeSync(fd);
|
||||
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
|
||||
}
|
||||
|
||||
common.refreshTmpDir();
|
||||
|
||||
fs.open(fn, 'w', 0o644, common.mustCall(function(err, fd) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user