src: fix fs.write() externalized string handling
* Respect `encoding` argument when the string is externalized. * Copy the string when the write request can outlive the externalized string. This commit removes `StringBytes::GetExternalParts()` because it is fundamentally broken. Fixes: https://github.com/nodejs/node/issues/18146 PR-URL: https://github.com/nodejs/node/pull/18216 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
This commit is contained in:
parent
0b1841d83f
commit
b2b9d11a14
@ -39,6 +39,7 @@
|
|||||||
# include <io.h>
|
# include <io.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace node {
|
namespace node {
|
||||||
@ -1019,40 +1020,53 @@ static void WriteString(const FunctionCallbackInfo<Value>& args) {
|
|||||||
|
|
||||||
CHECK(args[0]->IsInt32());
|
CHECK(args[0]->IsInt32());
|
||||||
|
|
||||||
|
std::unique_ptr<char[]> delete_on_return;
|
||||||
Local<Value> req;
|
Local<Value> req;
|
||||||
Local<Value> string = args[1];
|
Local<Value> value = args[1];
|
||||||
int fd = args[0]->Int32Value();
|
int fd = args[0]->Int32Value();
|
||||||
char* buf = nullptr;
|
char* buf = nullptr;
|
||||||
int64_t pos;
|
|
||||||
size_t len;
|
size_t len;
|
||||||
|
const int64_t pos = GET_OFFSET(args[2]);
|
||||||
|
const auto enc = ParseEncoding(env->isolate(), args[3], UTF8);
|
||||||
|
const auto is_async = args[4]->IsObject();
|
||||||
|
|
||||||
// will assign buf and len if string was external
|
// Avoid copying the string when it is externalized but only when:
|
||||||
if (!StringBytes::GetExternalParts(string,
|
// 1. The target encoding is compatible with the string's encoding, and
|
||||||
const_cast<const char**>(&buf),
|
// 2. The write is synchronous, otherwise the string might get neutered
|
||||||
&len)) {
|
// while the request is in flight, and
|
||||||
enum encoding enc = ParseEncoding(env->isolate(), args[3], UTF8);
|
// 3. For UCS2, when the host system is little-endian. Big-endian systems
|
||||||
len = StringBytes::StorageSize(env->isolate(), string, enc);
|
// need to call StringBytes::Write() to ensure proper byte swapping.
|
||||||
|
// The const_casts are conceptually sound: memory is read but not written.
|
||||||
|
if (!is_async && value->IsString()) {
|
||||||
|
auto string = value.As<String>();
|
||||||
|
if ((enc == ASCII || enc == LATIN1) && string->IsExternalOneByte()) {
|
||||||
|
auto ext = string->GetExternalOneByteStringResource();
|
||||||
|
buf = const_cast<char*>(ext->data());
|
||||||
|
len = ext->length();
|
||||||
|
} else if (enc == UCS2 && IsLittleEndian() && string->IsExternal()) {
|
||||||
|
auto ext = string->GetExternalStringResource();
|
||||||
|
buf = reinterpret_cast<char*>(const_cast<uint16_t*>(ext->data()));
|
||||||
|
len = ext->length() * sizeof(*ext->data());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buf == nullptr) {
|
||||||
|
len = StringBytes::StorageSize(env->isolate(), value, enc);
|
||||||
buf = new char[len];
|
buf = new char[len];
|
||||||
|
// SYNC_CALL returns on error. Make sure to always free the memory.
|
||||||
|
if (!is_async) delete_on_return.reset(buf);
|
||||||
// StorageSize may return too large a char, so correct the actual length
|
// StorageSize may return too large a char, so correct the actual length
|
||||||
// by the write size
|
// by the write size
|
||||||
len = StringBytes::Write(env->isolate(), buf, len, args[1], enc);
|
len = StringBytes::Write(env->isolate(), buf, len, args[1], enc);
|
||||||
}
|
}
|
||||||
pos = GET_OFFSET(args[2]);
|
|
||||||
|
|
||||||
uv_buf_t uvbuf = uv_buf_init(const_cast<char*>(buf), len);
|
uv_buf_t uvbuf = uv_buf_init(buf, len);
|
||||||
|
|
||||||
if (args[4]->IsObject()) {
|
if (is_async) {
|
||||||
CHECK_EQ(args.Length(), 5);
|
CHECK_EQ(args.Length(), 5);
|
||||||
AsyncCall(env, args, "write", UTF8, AfterInteger,
|
AsyncCall(env, args, "write", UTF8, AfterInteger,
|
||||||
uv_fs_write, fd, &uvbuf, 1, pos);
|
uv_fs_write, fd, &uvbuf, 1, pos);
|
||||||
} else {
|
} else {
|
||||||
// SYNC_CALL returns on error. Make sure to always free the memory.
|
|
||||||
struct Delete {
|
|
||||||
inline explicit Delete(char* pointer) : pointer_(pointer) {}
|
|
||||||
inline ~Delete() { delete[] pointer_; }
|
|
||||||
char* const pointer_;
|
|
||||||
};
|
|
||||||
Delete delete_on_return(buf);
|
|
||||||
SYNC_CALL(write, nullptr, fd, &uvbuf, 1, pos)
|
SYNC_CALL(write, nullptr, fd, &uvbuf, 1, pos)
|
||||||
return args.GetReturnValue().Set(SYNC_RESULT);
|
return args.GetReturnValue().Set(SYNC_RESULT);
|
||||||
}
|
}
|
||||||
|
@ -27,6 +27,8 @@
|
|||||||
|
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <string.h> // memcpy
|
#include <string.h> // memcpy
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
// When creating strings >= this length v8's gc spins up and consumes
|
// When creating strings >= this length v8's gc spins up and consumes
|
||||||
@ -269,39 +271,6 @@ static size_t hex_decode(char* buf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool StringBytes::GetExternalParts(Local<Value> val,
|
|
||||||
const char** data,
|
|
||||||
size_t* len) {
|
|
||||||
if (Buffer::HasInstance(val)) {
|
|
||||||
*data = Buffer::Data(val);
|
|
||||||
*len = Buffer::Length(val);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!val->IsString())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
Local<String> str = val.As<String>();
|
|
||||||
|
|
||||||
if (str->IsExternalOneByte()) {
|
|
||||||
const String::ExternalOneByteStringResource* ext;
|
|
||||||
ext = str->GetExternalOneByteStringResource();
|
|
||||||
*data = ext->data();
|
|
||||||
*len = ext->length();
|
|
||||||
return true;
|
|
||||||
|
|
||||||
} else if (str->IsExternal()) {
|
|
||||||
const String::ExternalStringResource* ext;
|
|
||||||
ext = str->GetExternalStringResource();
|
|
||||||
*data = reinterpret_cast<const char*>(ext->data());
|
|
||||||
*len = ext->length() * sizeof(*ext->data());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
size_t StringBytes::WriteUCS2(char* buf,
|
size_t StringBytes::WriteUCS2(char* buf,
|
||||||
size_t buflen,
|
size_t buflen,
|
||||||
Local<String> str,
|
Local<String> str,
|
||||||
@ -347,17 +316,15 @@ size_t StringBytes::Write(Isolate* isolate,
|
|||||||
enum encoding encoding,
|
enum encoding encoding,
|
||||||
int* chars_written) {
|
int* chars_written) {
|
||||||
HandleScope scope(isolate);
|
HandleScope scope(isolate);
|
||||||
const char* data = nullptr;
|
size_t nbytes;
|
||||||
size_t nbytes = 0;
|
int nchars;
|
||||||
const bool is_extern = GetExternalParts(val, &data, &nbytes);
|
|
||||||
const size_t external_nbytes = nbytes;
|
if (chars_written == nullptr)
|
||||||
|
chars_written = &nchars;
|
||||||
|
|
||||||
CHECK(val->IsString() == true);
|
CHECK(val->IsString() == true);
|
||||||
Local<String> str = val.As<String>();
|
Local<String> str = val.As<String>();
|
||||||
|
|
||||||
if (nbytes > buflen)
|
|
||||||
nbytes = buflen;
|
|
||||||
|
|
||||||
int flags = String::HINT_MANY_WRITES_EXPECTED |
|
int flags = String::HINT_MANY_WRITES_EXPECTED |
|
||||||
String::NO_NULL_TERMINATION |
|
String::NO_NULL_TERMINATION |
|
||||||
String::REPLACE_INVALID_UTF8;
|
String::REPLACE_INVALID_UTF8;
|
||||||
@ -365,14 +332,15 @@ size_t StringBytes::Write(Isolate* isolate,
|
|||||||
switch (encoding) {
|
switch (encoding) {
|
||||||
case ASCII:
|
case ASCII:
|
||||||
case LATIN1:
|
case LATIN1:
|
||||||
if (is_extern && str->IsOneByte()) {
|
if (str->IsExternalOneByte()) {
|
||||||
memcpy(buf, data, nbytes);
|
auto ext = str->GetExternalOneByteStringResource();
|
||||||
|
nbytes = std::min(buflen, ext->length());
|
||||||
|
memcpy(buf, ext->data(), nbytes);
|
||||||
} else {
|
} else {
|
||||||
uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
|
uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
|
||||||
nbytes = str->WriteOneByte(dst, 0, buflen, flags);
|
nbytes = str->WriteOneByte(dst, 0, buflen, flags);
|
||||||
}
|
}
|
||||||
if (chars_written != nullptr)
|
*chars_written = nbytes;
|
||||||
*chars_written = nbytes;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BUFFER:
|
case BUFFER:
|
||||||
@ -383,14 +351,8 @@ size_t StringBytes::Write(Isolate* isolate,
|
|||||||
case UCS2: {
|
case UCS2: {
|
||||||
size_t nchars;
|
size_t nchars;
|
||||||
|
|
||||||
if (is_extern && !str->IsOneByte()) {
|
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
|
||||||
memcpy(buf, data, nbytes);
|
*chars_written = static_cast<int>(nchars);
|
||||||
nchars = nbytes / sizeof(uint16_t);
|
|
||||||
} else {
|
|
||||||
nbytes = WriteUCS2(buf, buflen, str, flags, &nchars);
|
|
||||||
}
|
|
||||||
if (chars_written != nullptr)
|
|
||||||
*chars_written = nchars;
|
|
||||||
|
|
||||||
// Node's "ucs2" encoding wants LE character data stored in
|
// Node's "ucs2" encoding wants LE character data stored in
|
||||||
// the Buffer, so we need to reorder on BE platforms. See
|
// the Buffer, so we need to reorder on BE platforms. See
|
||||||
@ -403,27 +365,25 @@ size_t StringBytes::Write(Isolate* isolate,
|
|||||||
}
|
}
|
||||||
|
|
||||||
case BASE64:
|
case BASE64:
|
||||||
if (is_extern) {
|
if (str->IsExternalOneByte()) {
|
||||||
nbytes = base64_decode(buf, buflen, data, external_nbytes);
|
auto ext = str->GetExternalOneByteStringResource();
|
||||||
|
nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
|
||||||
} else {
|
} else {
|
||||||
String::Value value(str);
|
String::Value value(str);
|
||||||
nbytes = base64_decode(buf, buflen, *value, value.length());
|
nbytes = base64_decode(buf, buflen, *value, value.length());
|
||||||
}
|
}
|
||||||
if (chars_written != nullptr) {
|
*chars_written = nbytes;
|
||||||
*chars_written = nbytes;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case HEX:
|
case HEX:
|
||||||
if (is_extern) {
|
if (str->IsExternalOneByte()) {
|
||||||
nbytes = hex_decode(buf, buflen, data, external_nbytes);
|
auto ext = str->GetExternalOneByteStringResource();
|
||||||
|
nbytes = hex_decode(buf, buflen, ext->data(), ext->length());
|
||||||
} else {
|
} else {
|
||||||
String::Value value(str);
|
String::Value value(str);
|
||||||
nbytes = hex_decode(buf, buflen, *value, value.length());
|
nbytes = hex_decode(buf, buflen, *value, value.length());
|
||||||
}
|
}
|
||||||
if (chars_written != nullptr) {
|
*chars_written = nbytes;
|
||||||
*chars_written = nbytes;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -500,49 +460,34 @@ size_t StringBytes::Size(Isolate* isolate,
|
|||||||
Local<Value> val,
|
Local<Value> val,
|
||||||
enum encoding encoding) {
|
enum encoding encoding) {
|
||||||
HandleScope scope(isolate);
|
HandleScope scope(isolate);
|
||||||
size_t data_size = 0;
|
|
||||||
bool is_buffer = Buffer::HasInstance(val);
|
|
||||||
|
|
||||||
if (is_buffer && (encoding == BUFFER || encoding == LATIN1))
|
if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1))
|
||||||
return Buffer::Length(val);
|
return Buffer::Length(val);
|
||||||
|
|
||||||
const char* data;
|
|
||||||
if (GetExternalParts(val, &data, &data_size))
|
|
||||||
return data_size;
|
|
||||||
|
|
||||||
Local<String> str = val->ToString(isolate);
|
Local<String> str = val->ToString(isolate);
|
||||||
|
|
||||||
switch (encoding) {
|
switch (encoding) {
|
||||||
case ASCII:
|
case ASCII:
|
||||||
case LATIN1:
|
case LATIN1:
|
||||||
data_size = str->Length();
|
return str->Length();
|
||||||
break;
|
|
||||||
|
|
||||||
case BUFFER:
|
case BUFFER:
|
||||||
case UTF8:
|
case UTF8:
|
||||||
data_size = str->Utf8Length();
|
return str->Utf8Length();
|
||||||
break;
|
|
||||||
|
|
||||||
case UCS2:
|
case UCS2:
|
||||||
data_size = str->Length() * sizeof(uint16_t);
|
return str->Length() * sizeof(uint16_t);
|
||||||
break;
|
|
||||||
|
|
||||||
case BASE64: {
|
case BASE64: {
|
||||||
String::Value value(str);
|
String::Value value(str);
|
||||||
data_size = base64_decoded_size(*value, value.length());
|
return base64_decoded_size(*value, value.length());
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
case HEX:
|
case HEX:
|
||||||
data_size = str->Length() / 2;
|
return str->Length() / 2;
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
CHECK(0 && "unknown encoding");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return data_size;
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,12 +81,6 @@ class StringBytes {
|
|||||||
v8::Local<v8::Value> val,
|
v8::Local<v8::Value> val,
|
||||||
enum encoding enc);
|
enum encoding enc);
|
||||||
|
|
||||||
// If the string is external then assign external properties to data and len,
|
|
||||||
// then return true. If not return false.
|
|
||||||
static bool GetExternalParts(v8::Local<v8::Value> val,
|
|
||||||
const char** data,
|
|
||||||
size_t* len);
|
|
||||||
|
|
||||||
// Write the bytes from the string or buffer into the char*
|
// Write the bytes from the string or buffer into the char*
|
||||||
// returns the number of bytes written, which will always be
|
// returns the number of bytes written, which will always be
|
||||||
// <= buflen. Use StorageSize/Size first to know how much
|
// <= buflen. Use StorageSize/Size first to know how much
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||||
// USE OR OTHER DEALINGS IN THE SOFTWARE.
|
// USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
// Flags: --expose_externalize_string
|
||||||
'use strict';
|
'use strict';
|
||||||
const common = require('../common');
|
const common = require('../common');
|
||||||
const assert = require('assert');
|
const assert = require('assert');
|
||||||
@ -30,6 +31,49 @@ const fn3 = path.join(common.tmpDir, 'write3.txt');
|
|||||||
const expected = 'ümlaut.';
|
const expected = 'ümlaut.';
|
||||||
const constants = fs.constants;
|
const constants = fs.constants;
|
||||||
|
|
||||||
|
/* eslint-disable no-undef */
|
||||||
|
common.allowGlobals(externalizeString, isOneByteString, x);
|
||||||
|
|
||||||
|
{
|
||||||
|
const expected = 'ümlaut eins'; // Must be a unique string.
|
||||||
|
externalizeString(expected);
|
||||||
|
assert.strictEqual(true, isOneByteString(expected));
|
||||||
|
const fd = fs.openSync(fn, 'w');
|
||||||
|
fs.writeSync(fd, expected, 0, 'latin1');
|
||||||
|
fs.closeSync(fd);
|
||||||
|
assert.strictEqual(expected, fs.readFileSync(fn, 'latin1'));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const expected = 'ümlaut zwei'; // Must be a unique string.
|
||||||
|
externalizeString(expected);
|
||||||
|
assert.strictEqual(true, isOneByteString(expected));
|
||||||
|
const fd = fs.openSync(fn, 'w');
|
||||||
|
fs.writeSync(fd, expected, 0, 'utf8');
|
||||||
|
fs.closeSync(fd);
|
||||||
|
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const expected = '中文 1'; // Must be a unique string.
|
||||||
|
externalizeString(expected);
|
||||||
|
assert.strictEqual(false, isOneByteString(expected));
|
||||||
|
const fd = fs.openSync(fn, 'w');
|
||||||
|
fs.writeSync(fd, expected, 0, 'ucs2');
|
||||||
|
fs.closeSync(fd);
|
||||||
|
assert.strictEqual(expected, fs.readFileSync(fn, 'ucs2'));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const expected = '中文 2'; // Must be a unique string.
|
||||||
|
externalizeString(expected);
|
||||||
|
assert.strictEqual(false, isOneByteString(expected));
|
||||||
|
const fd = fs.openSync(fn, 'w');
|
||||||
|
fs.writeSync(fd, expected, 0, 'utf8');
|
||||||
|
fs.closeSync(fd);
|
||||||
|
assert.strictEqual(expected, fs.readFileSync(fn, 'utf8'));
|
||||||
|
}
|
||||||
|
|
||||||
common.refreshTmpDir();
|
common.refreshTmpDir();
|
||||||
|
|
||||||
fs.open(fn, 'w', 0o644, common.mustCall(function(err, fd) {
|
fs.open(fn, 'w', 0o644, common.mustCall(function(err, fd) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user