buffer: add buffer.transcode
Add buffer.transcode(source, from, to) method. Primarily uses ICU to transcode a buffer's content from one of Node.js' supported encodings to another. Originally part of a proposal to add a new unicode module. Decided to refactor the approach towrds individual PRs without a new module. Refs: https://github.com/nodejs/node/pull/8075 PR-URL: https://github.com/nodejs/node/pull/9038 Reviewed-By: Anna Henningsen <anna@addaleax.net>
This commit is contained in:
parent
1cf55f806b
commit
e8eaaa7724
@ -2302,6 +2302,33 @@ added: v3.0.0
|
|||||||
On 32-bit architectures, this value is `(2^30)-1` (~1GB).
|
On 32-bit architectures, this value is `(2^30)-1` (~1GB).
|
||||||
On 64-bit architectures, this value is `(2^31)-1` (~2GB).
|
On 64-bit architectures, this value is `(2^31)-1` (~2GB).
|
||||||
|
|
||||||
|
## buffer.transcode(source, fromEnc, toEnc)
|
||||||
|
<!-- YAML
|
||||||
|
added: REPLACEME
|
||||||
|
-->
|
||||||
|
|
||||||
|
* `source` {Buffer} A `Buffer` instance
|
||||||
|
* `fromEnc` {String} The current encoding
|
||||||
|
* `toEnc` {String} To target encoding
|
||||||
|
|
||||||
|
Re-encodes the given `Buffer` instance from one character encoding to another.
|
||||||
|
Returns a new `Buffer` instance.
|
||||||
|
|
||||||
|
Throws if the `fromEnc` or `toEnc` specify invalid character encodings or if
|
||||||
|
conversion from `fromEnc` to `toEnc` is not permitted.
|
||||||
|
|
||||||
|
The transcoding process will use substitution characters if a given byte
|
||||||
|
sequence cannot be adequately represented in the target encoding. For instance:
|
||||||
|
|
||||||
|
```js
|
||||||
|
const newBuf = buffer.transcode(Buffer.from('€'), 'utf8', 'ascii');
|
||||||
|
console.log(newBuf.toString('ascii'));
|
||||||
|
// prints '?'
|
||||||
|
```
|
||||||
|
|
||||||
|
Because the Euro (`€`) sign is not representable in US-ASCII, it is replaced
|
||||||
|
with `?` in the transcoded `Buffer`.
|
||||||
|
|
||||||
## Class: SlowBuffer
|
## Class: SlowBuffer
|
||||||
<!-- YAML
|
<!-- YAML
|
||||||
deprecated: v6.0.0
|
deprecated: v6.0.0
|
||||||
|
@ -1360,3 +1360,7 @@ Buffer.prototype.swap64 = function swap64() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
Buffer.prototype.toLocaleString = Buffer.prototype.toString;
|
Buffer.prototype.toLocaleString = Buffer.prototype.toString;
|
||||||
|
|
||||||
|
// Put this at the end because internal/buffer has a circular
|
||||||
|
// dependency on Buffer.
|
||||||
|
exports.transcode = require('internal/buffer').transcode;
|
||||||
|
30
lib/internal/buffer.js
Normal file
30
lib/internal/buffer.js
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
if (!process.binding('config').hasIntl) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalizeEncoding = require('internal/util').normalizeEncoding;
|
||||||
|
const Buffer = require('buffer').Buffer;
|
||||||
|
|
||||||
|
const icu = process.binding('icu');
|
||||||
|
|
||||||
|
// Transcodes the Buffer from one encoding to another, returning a new
|
||||||
|
// Buffer instance.
|
||||||
|
exports.transcode = function transcode(source, fromEncoding, toEncoding) {
|
||||||
|
if (!Buffer.isBuffer(source))
|
||||||
|
throw new TypeError('"source" argument must be a Buffer');
|
||||||
|
if (source.length === 0) return Buffer.alloc(0);
|
||||||
|
|
||||||
|
fromEncoding = normalizeEncoding(fromEncoding) || fromEncoding;
|
||||||
|
toEncoding = normalizeEncoding(toEncoding) || toEncoding;
|
||||||
|
const result = icu.transcode(source, fromEncoding, toEncoding);
|
||||||
|
if (Buffer.isBuffer(result))
|
||||||
|
return result;
|
||||||
|
|
||||||
|
const code = icu.icuErrName(result);
|
||||||
|
const err = new Error(`Unable to transcode Buffer [${code}]`);
|
||||||
|
err.code = code;
|
||||||
|
err.errno = result;
|
||||||
|
throw err;
|
||||||
|
};
|
1
node.gyp
1
node.gyp
@ -74,6 +74,7 @@
|
|||||||
'lib/v8.js',
|
'lib/v8.js',
|
||||||
'lib/vm.js',
|
'lib/vm.js',
|
||||||
'lib/zlib.js',
|
'lib/zlib.js',
|
||||||
|
'lib/internal/buffer.js',
|
||||||
'lib/internal/child_process.js',
|
'lib/internal/child_process.js',
|
||||||
'lib/internal/cluster.js',
|
'lib/internal/cluster.js',
|
||||||
'lib/internal/freelist.js',
|
'lib/internal/freelist.js',
|
||||||
|
@ -22,23 +22,6 @@
|
|||||||
if (!(r)) return env->ThrowRangeError("out of range index"); \
|
if (!(r)) return env->ThrowRangeError("out of range index"); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \
|
|
||||||
do { \
|
|
||||||
if (!HasInstance(obj)) \
|
|
||||||
return env->ThrowTypeError("argument should be a Buffer"); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define SPREAD_ARG(val, name) \
|
|
||||||
CHECK((val)->IsUint8Array()); \
|
|
||||||
Local<Uint8Array> name = (val).As<Uint8Array>(); \
|
|
||||||
ArrayBuffer::Contents name##_c = name->Buffer()->GetContents(); \
|
|
||||||
const size_t name##_offset = name->ByteOffset(); \
|
|
||||||
const size_t name##_length = name->ByteLength(); \
|
|
||||||
char* const name##_data = \
|
|
||||||
static_cast<char*>(name##_c.Data()) + name##_offset; \
|
|
||||||
if (name##_length > 0) \
|
|
||||||
CHECK_NE(name##_data, nullptr);
|
|
||||||
|
|
||||||
#define SLICE_START_END(start_arg, end_arg, end_max) \
|
#define SLICE_START_END(start_arg, end_arg, end_max) \
|
||||||
size_t start; \
|
size_t start; \
|
||||||
size_t end; \
|
size_t end; \
|
||||||
@ -448,7 +431,7 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
|
|||||||
Isolate* isolate = env->isolate();
|
Isolate* isolate = env->isolate();
|
||||||
|
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
|
||||||
SPREAD_ARG(args.This(), ts_obj);
|
SPREAD_BUFFER_ARG(args.This(), ts_obj);
|
||||||
|
|
||||||
if (ts_obj_length == 0)
|
if (ts_obj_length == 0)
|
||||||
return args.GetReturnValue().SetEmptyString();
|
return args.GetReturnValue().SetEmptyString();
|
||||||
@ -465,7 +448,7 @@ void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
|
|||||||
Environment* env = Environment::GetCurrent(args);
|
Environment* env = Environment::GetCurrent(args);
|
||||||
|
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
|
||||||
SPREAD_ARG(args.This(), ts_obj);
|
SPREAD_BUFFER_ARG(args.This(), ts_obj);
|
||||||
|
|
||||||
if (ts_obj_length == 0)
|
if (ts_obj_length == 0)
|
||||||
return args.GetReturnValue().SetEmptyString();
|
return args.GetReturnValue().SetEmptyString();
|
||||||
@ -543,8 +526,8 @@ void Copy(const FunctionCallbackInfo<Value> &args) {
|
|||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
||||||
Local<Object> target_obj = args[0].As<Object>();
|
Local<Object> target_obj = args[0].As<Object>();
|
||||||
SPREAD_ARG(args.This(), ts_obj);
|
SPREAD_BUFFER_ARG(args.This(), ts_obj);
|
||||||
SPREAD_ARG(target_obj, target);
|
SPREAD_BUFFER_ARG(target_obj, target);
|
||||||
|
|
||||||
size_t target_start;
|
size_t target_start;
|
||||||
size_t source_start;
|
size_t source_start;
|
||||||
@ -577,7 +560,7 @@ void Fill(const FunctionCallbackInfo<Value>& args) {
|
|||||||
Environment* env = Environment::GetCurrent(args);
|
Environment* env = Environment::GetCurrent(args);
|
||||||
|
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
||||||
SPREAD_ARG(args[0], ts_obj);
|
SPREAD_BUFFER_ARG(args[0], ts_obj);
|
||||||
|
|
||||||
size_t start = args[2]->Uint32Value();
|
size_t start = args[2]->Uint32Value();
|
||||||
size_t end = args[3]->Uint32Value();
|
size_t end = args[3]->Uint32Value();
|
||||||
@ -590,7 +573,7 @@ void Fill(const FunctionCallbackInfo<Value>& args) {
|
|||||||
|
|
||||||
// First check if Buffer has been passed.
|
// First check if Buffer has been passed.
|
||||||
if (Buffer::HasInstance(args[1])) {
|
if (Buffer::HasInstance(args[1])) {
|
||||||
SPREAD_ARG(args[1], fill_obj);
|
SPREAD_BUFFER_ARG(args[1], fill_obj);
|
||||||
str_length = fill_obj_length;
|
str_length = fill_obj_length;
|
||||||
memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length));
|
memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length));
|
||||||
goto start_fill;
|
goto start_fill;
|
||||||
@ -669,7 +652,7 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
|
|||||||
Environment* env = Environment::GetCurrent(args);
|
Environment* env = Environment::GetCurrent(args);
|
||||||
|
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
|
||||||
SPREAD_ARG(args.This(), ts_obj);
|
SPREAD_BUFFER_ARG(args.This(), ts_obj);
|
||||||
|
|
||||||
if (!args[0]->IsString())
|
if (!args[0]->IsString())
|
||||||
return env->ThrowTypeError("Argument must be a string");
|
return env->ThrowTypeError("Argument must be a string");
|
||||||
@ -747,7 +730,7 @@ static inline void Swizzle(char* start, unsigned int len) {
|
|||||||
template <typename T, enum Endianness endianness>
|
template <typename T, enum Endianness endianness>
|
||||||
void ReadFloatGeneric(const FunctionCallbackInfo<Value>& args) {
|
void ReadFloatGeneric(const FunctionCallbackInfo<Value>& args) {
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
|
||||||
SPREAD_ARG(args[0], ts_obj);
|
SPREAD_BUFFER_ARG(args[0], ts_obj);
|
||||||
|
|
||||||
uint32_t offset = args[1]->Uint32Value();
|
uint32_t offset = args[1]->Uint32Value();
|
||||||
CHECK_LE(offset + sizeof(T), ts_obj_length);
|
CHECK_LE(offset + sizeof(T), ts_obj_length);
|
||||||
@ -881,8 +864,8 @@ void CompareOffset(const FunctionCallbackInfo<Value> &args) {
|
|||||||
|
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]);
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]);
|
||||||
SPREAD_ARG(args[0], ts_obj);
|
SPREAD_BUFFER_ARG(args[0], ts_obj);
|
||||||
SPREAD_ARG(args[1], target);
|
SPREAD_BUFFER_ARG(args[1], target);
|
||||||
|
|
||||||
size_t target_start;
|
size_t target_start;
|
||||||
size_t source_start;
|
size_t source_start;
|
||||||
@ -921,8 +904,8 @@ void Compare(const FunctionCallbackInfo<Value> &args) {
|
|||||||
|
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]);
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args[1]);
|
||||||
SPREAD_ARG(args[0], obj_a);
|
SPREAD_BUFFER_ARG(args[0], obj_a);
|
||||||
SPREAD_ARG(args[1], obj_b);
|
SPREAD_BUFFER_ARG(args[1], obj_b);
|
||||||
|
|
||||||
size_t cmp_length = MIN(obj_a_length, obj_b_length);
|
size_t cmp_length = MIN(obj_a_length, obj_b_length);
|
||||||
|
|
||||||
@ -977,7 +960,7 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
|
|||||||
UTF8);
|
UTF8);
|
||||||
|
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
|
||||||
SPREAD_ARG(args[0], ts_obj);
|
SPREAD_BUFFER_ARG(args[0], ts_obj);
|
||||||
|
|
||||||
Local<String> needle = args[1].As<String>();
|
Local<String> needle = args[1].As<String>();
|
||||||
int64_t offset_i64 = args[2]->IntegerValue();
|
int64_t offset_i64 = args[2]->IntegerValue();
|
||||||
@ -1084,8 +1067,8 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
|
|||||||
|
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);
|
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);
|
||||||
SPREAD_ARG(args[0], ts_obj);
|
SPREAD_BUFFER_ARG(args[0], ts_obj);
|
||||||
SPREAD_ARG(args[1], buf);
|
SPREAD_BUFFER_ARG(args[1], buf);
|
||||||
int64_t offset_i64 = args[2]->IntegerValue();
|
int64_t offset_i64 = args[2]->IntegerValue();
|
||||||
bool is_forward = args[4]->IsTrue();
|
bool is_forward = args[4]->IsTrue();
|
||||||
|
|
||||||
@ -1143,7 +1126,7 @@ void IndexOfNumber(const FunctionCallbackInfo<Value>& args) {
|
|||||||
ASSERT(args[3]->IsBoolean());
|
ASSERT(args[3]->IsBoolean());
|
||||||
|
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
|
||||||
SPREAD_ARG(args[0], ts_obj);
|
SPREAD_BUFFER_ARG(args[0], ts_obj);
|
||||||
|
|
||||||
uint32_t needle = args[1]->Uint32Value();
|
uint32_t needle = args[1]->Uint32Value();
|
||||||
int64_t offset_i64 = args[2]->IntegerValue();
|
int64_t offset_i64 = args[2]->IntegerValue();
|
||||||
@ -1171,7 +1154,7 @@ void IndexOfNumber(const FunctionCallbackInfo<Value>& args) {
|
|||||||
void Swap16(const FunctionCallbackInfo<Value>& args) {
|
void Swap16(const FunctionCallbackInfo<Value>& args) {
|
||||||
Environment* env = Environment::GetCurrent(args);
|
Environment* env = Environment::GetCurrent(args);
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
||||||
SPREAD_ARG(args[0], ts_obj);
|
SPREAD_BUFFER_ARG(args[0], ts_obj);
|
||||||
SwapBytes16(ts_obj_data, ts_obj_length);
|
SwapBytes16(ts_obj_data, ts_obj_length);
|
||||||
args.GetReturnValue().Set(args[0]);
|
args.GetReturnValue().Set(args[0]);
|
||||||
}
|
}
|
||||||
@ -1180,7 +1163,7 @@ void Swap16(const FunctionCallbackInfo<Value>& args) {
|
|||||||
void Swap32(const FunctionCallbackInfo<Value>& args) {
|
void Swap32(const FunctionCallbackInfo<Value>& args) {
|
||||||
Environment* env = Environment::GetCurrent(args);
|
Environment* env = Environment::GetCurrent(args);
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
||||||
SPREAD_ARG(args[0], ts_obj);
|
SPREAD_BUFFER_ARG(args[0], ts_obj);
|
||||||
SwapBytes32(ts_obj_data, ts_obj_length);
|
SwapBytes32(ts_obj_data, ts_obj_length);
|
||||||
args.GetReturnValue().Set(args[0]);
|
args.GetReturnValue().Set(args[0]);
|
||||||
}
|
}
|
||||||
@ -1189,7 +1172,7 @@ void Swap32(const FunctionCallbackInfo<Value>& args) {
|
|||||||
void Swap64(const FunctionCallbackInfo<Value>& args) {
|
void Swap64(const FunctionCallbackInfo<Value>& args) {
|
||||||
Environment* env = Environment::GetCurrent(args);
|
Environment* env = Environment::GetCurrent(args);
|
||||||
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
||||||
SPREAD_ARG(args[0], ts_obj);
|
SPREAD_BUFFER_ARG(args[0], ts_obj);
|
||||||
SwapBytes64(ts_obj_data, ts_obj_length);
|
SwapBytes64(ts_obj_data, ts_obj_length);
|
||||||
args.GetReturnValue().Set(args[0]);
|
args.GetReturnValue().Set(args[0]);
|
||||||
}
|
}
|
||||||
|
280
src/node_i18n.cc
280
src/node_i18n.cc
@ -24,6 +24,7 @@
|
|||||||
#if defined(NODE_HAVE_I18N_SUPPORT)
|
#if defined(NODE_HAVE_I18N_SUPPORT)
|
||||||
|
|
||||||
#include "node.h"
|
#include "node.h"
|
||||||
|
#include "node_buffer.h"
|
||||||
#include "env.h"
|
#include "env.h"
|
||||||
#include "env-inl.h"
|
#include "env-inl.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
@ -34,6 +35,10 @@
|
|||||||
#include <unicode/uchar.h>
|
#include <unicode/uchar.h>
|
||||||
#include <unicode/udata.h>
|
#include <unicode/udata.h>
|
||||||
#include <unicode/uidna.h>
|
#include <unicode/uidna.h>
|
||||||
|
#include <unicode/utypes.h>
|
||||||
|
#include <unicode/ucnv.h>
|
||||||
|
#include <unicode/utf8.h>
|
||||||
|
#include <unicode/utf16.h>
|
||||||
|
|
||||||
#ifdef NODE_HAVE_SMALL_ICU
|
#ifdef NODE_HAVE_SMALL_ICU
|
||||||
/* if this is defined, we have a 'secondary' entry point.
|
/* if this is defined, we have a 'secondary' entry point.
|
||||||
@ -54,7 +59,9 @@ namespace node {
|
|||||||
|
|
||||||
using v8::Context;
|
using v8::Context;
|
||||||
using v8::FunctionCallbackInfo;
|
using v8::FunctionCallbackInfo;
|
||||||
|
using v8::Isolate;
|
||||||
using v8::Local;
|
using v8::Local;
|
||||||
|
using v8::MaybeLocal;
|
||||||
using v8::Object;
|
using v8::Object;
|
||||||
using v8::String;
|
using v8::String;
|
||||||
using v8::Value;
|
using v8::Value;
|
||||||
@ -63,6 +70,275 @@ bool flag_icu_data_dir = false;
|
|||||||
|
|
||||||
namespace i18n {
|
namespace i18n {
|
||||||
|
|
||||||
|
const size_t kStorageSize = 1024;
|
||||||
|
|
||||||
|
// TODO(jasnell): This could potentially become a member of MaybeStackBuffer
|
||||||
|
// at some point in the future. Care would need to be taken with the
|
||||||
|
// MaybeStackBuffer<UChar> variant below.
|
||||||
|
MaybeLocal<Object> AsBuffer(Isolate* isolate,
|
||||||
|
MaybeStackBuffer<char>* buf,
|
||||||
|
size_t len) {
|
||||||
|
if (buf->IsAllocated()) {
|
||||||
|
MaybeLocal<Object> ret = Buffer::New(isolate, buf->out(), len);
|
||||||
|
if (!ret.IsEmpty()) buf->Release();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
return Buffer::Copy(isolate, buf->out(), len);
|
||||||
|
}
|
||||||
|
|
||||||
|
MaybeLocal<Object> AsBuffer(Isolate* isolate,
|
||||||
|
MaybeStackBuffer<UChar>* buf,
|
||||||
|
size_t len) {
|
||||||
|
char* dst = reinterpret_cast<char*>(**buf);
|
||||||
|
MaybeLocal<Object> ret;
|
||||||
|
if (buf->IsAllocated()) {
|
||||||
|
ret = Buffer::New(isolate, dst, len);
|
||||||
|
if (!ret.IsEmpty()) buf->Release();
|
||||||
|
} else {
|
||||||
|
ret = Buffer::Copy(isolate, dst, len);
|
||||||
|
}
|
||||||
|
if (!ret.IsEmpty() && IsBigEndian()) {
|
||||||
|
SPREAD_BUFFER_ARG(ret.ToLocalChecked(), buf);
|
||||||
|
SwapBytes16(buf_data, buf_length);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Converter {
|
||||||
|
explicit Converter(const char* name, const char* sub = NULL)
|
||||||
|
: conv(nullptr) {
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
conv = ucnv_open(name, &status);
|
||||||
|
CHECK(U_SUCCESS(status));
|
||||||
|
if (sub != NULL) {
|
||||||
|
ucnv_setSubstChars(conv, sub, strlen(sub), &status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
~Converter() {
|
||||||
|
ucnv_close(conv);
|
||||||
|
}
|
||||||
|
|
||||||
|
UConverter* conv;
|
||||||
|
};
|
||||||
|
|
||||||
|
// One-Shot Converters
|
||||||
|
|
||||||
|
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
|
||||||
|
const char* data,
|
||||||
|
const size_t length,
|
||||||
|
const size_t length_in_chars) {
|
||||||
|
dest->AllocateSufficientStorage(length_in_chars);
|
||||||
|
char* dst = reinterpret_cast<char*>(**dest);
|
||||||
|
memcpy(dst, data, length);
|
||||||
|
if (IsBigEndian()) {
|
||||||
|
SwapBytes16(dst, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef MaybeLocal<Object> (*TranscodeFunc)(Isolate* isolate,
|
||||||
|
const char* fromEncoding,
|
||||||
|
const char* toEncoding,
|
||||||
|
const char* source,
|
||||||
|
const size_t source_length,
|
||||||
|
UErrorCode* status);
|
||||||
|
|
||||||
|
MaybeLocal<Object> Transcode(Isolate* isolate,
|
||||||
|
const char* fromEncoding,
|
||||||
|
const char* toEncoding,
|
||||||
|
const char* source,
|
||||||
|
const size_t source_length,
|
||||||
|
UErrorCode* status) {
|
||||||
|
*status = U_ZERO_ERROR;
|
||||||
|
MaybeLocal<Object> ret;
|
||||||
|
MaybeStackBuffer<char> result;
|
||||||
|
Converter to(toEncoding, "?");
|
||||||
|
Converter from(fromEncoding);
|
||||||
|
const uint32_t limit = source_length * ucnv_getMaxCharSize(to.conv);
|
||||||
|
result.AllocateSufficientStorage(limit);
|
||||||
|
char* target = *result;
|
||||||
|
ucnv_convertEx(to.conv, from.conv, &target, target + limit,
|
||||||
|
&source, source + source_length, nullptr, nullptr,
|
||||||
|
nullptr, nullptr, true, true, status);
|
||||||
|
if (U_SUCCESS(*status))
|
||||||
|
ret = AsBuffer(isolate, &result, target - &result[0]);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
MaybeLocal<Object> TranscodeToUcs2(Isolate* isolate,
|
||||||
|
const char* fromEncoding,
|
||||||
|
const char* toEncoding,
|
||||||
|
const char* source,
|
||||||
|
const size_t source_length,
|
||||||
|
UErrorCode* status) {
|
||||||
|
*status = U_ZERO_ERROR;
|
||||||
|
MaybeLocal<Object> ret;
|
||||||
|
MaybeStackBuffer<UChar> destbuf(source_length);
|
||||||
|
Converter from(fromEncoding);
|
||||||
|
const size_t length_in_chars = source_length * sizeof(*destbuf);
|
||||||
|
ucnv_toUChars(from.conv, *destbuf, length_in_chars,
|
||||||
|
source, source_length, status);
|
||||||
|
if (U_SUCCESS(*status))
|
||||||
|
ret = AsBuffer(isolate, &destbuf, length_in_chars);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
MaybeLocal<Object> TranscodeFromUcs2(Isolate* isolate,
|
||||||
|
const char* fromEncoding,
|
||||||
|
const char* toEncoding,
|
||||||
|
const char* source,
|
||||||
|
const size_t source_length,
|
||||||
|
UErrorCode* status) {
|
||||||
|
*status = U_ZERO_ERROR;
|
||||||
|
MaybeStackBuffer<UChar> sourcebuf;
|
||||||
|
MaybeLocal<Object> ret;
|
||||||
|
Converter to(toEncoding, "?");
|
||||||
|
const size_t length_in_chars = source_length / sizeof(UChar);
|
||||||
|
CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
|
||||||
|
MaybeStackBuffer<char> destbuf(length_in_chars);
|
||||||
|
const uint32_t len = ucnv_fromUChars(to.conv, *destbuf, length_in_chars,
|
||||||
|
*sourcebuf, length_in_chars, status);
|
||||||
|
if (U_SUCCESS(*status))
|
||||||
|
ret = AsBuffer(isolate, &destbuf, len);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
MaybeLocal<Object> TranscodeUcs2FromUtf8(Isolate* isolate,
|
||||||
|
const char* fromEncoding,
|
||||||
|
const char* toEncoding,
|
||||||
|
const char* source,
|
||||||
|
const size_t source_length,
|
||||||
|
UErrorCode* status) {
|
||||||
|
*status = U_ZERO_ERROR;
|
||||||
|
MaybeStackBuffer<UChar, kStorageSize> destbuf;
|
||||||
|
int32_t result_length;
|
||||||
|
u_strFromUTF8(*destbuf, kStorageSize, &result_length,
|
||||||
|
source, source_length, status);
|
||||||
|
MaybeLocal<Object> ret;
|
||||||
|
if (U_SUCCESS(*status)) {
|
||||||
|
ret = AsBuffer(isolate, &destbuf, result_length * sizeof(**destbuf));
|
||||||
|
} else if (*status == U_BUFFER_OVERFLOW_ERROR) {
|
||||||
|
*status = U_ZERO_ERROR;
|
||||||
|
destbuf.AllocateSufficientStorage(result_length);
|
||||||
|
u_strFromUTF8(*destbuf, result_length, &result_length,
|
||||||
|
source, source_length, status);
|
||||||
|
if (U_SUCCESS(*status))
|
||||||
|
ret = AsBuffer(isolate, &destbuf, result_length * sizeof(**destbuf));
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
MaybeLocal<Object> TranscodeUtf8FromUcs2(Isolate* isolate,
|
||||||
|
const char* fromEncoding,
|
||||||
|
const char* toEncoding,
|
||||||
|
const char* source,
|
||||||
|
const size_t source_length,
|
||||||
|
UErrorCode* status) {
|
||||||
|
*status = U_ZERO_ERROR;
|
||||||
|
MaybeLocal<Object> ret;
|
||||||
|
const size_t length_in_chars = source_length / sizeof(UChar);
|
||||||
|
int32_t result_length;
|
||||||
|
MaybeStackBuffer<UChar> sourcebuf;
|
||||||
|
MaybeStackBuffer<char, kStorageSize> destbuf;
|
||||||
|
CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
|
||||||
|
u_strToUTF8(*destbuf, kStorageSize, &result_length,
|
||||||
|
*sourcebuf, length_in_chars, status);
|
||||||
|
if (U_SUCCESS(*status)) {
|
||||||
|
ret = AsBuffer(isolate, &destbuf, result_length);
|
||||||
|
} else if (*status == U_BUFFER_OVERFLOW_ERROR) {
|
||||||
|
*status = U_ZERO_ERROR;
|
||||||
|
destbuf.AllocateSufficientStorage(result_length);
|
||||||
|
u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
|
||||||
|
length_in_chars, status);
|
||||||
|
if (U_SUCCESS(*status)) {
|
||||||
|
ret = Buffer::New(isolate, *destbuf, result_length);
|
||||||
|
destbuf.Release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* EncodingName(const enum encoding encoding) {
|
||||||
|
switch (encoding) {
|
||||||
|
case ASCII: return "us-ascii";
|
||||||
|
case LATIN1: return "iso8859-1";
|
||||||
|
case UCS2: return "utf16le";
|
||||||
|
case UTF8: return "utf-8";
|
||||||
|
default: return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SupportedEncoding(const enum encoding encoding) {
|
||||||
|
switch (encoding) {
|
||||||
|
case ASCII:
|
||||||
|
case LATIN1:
|
||||||
|
case UCS2:
|
||||||
|
case UTF8: return true;
|
||||||
|
default: return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Transcode(const FunctionCallbackInfo<Value>&args) {
|
||||||
|
Environment* env = Environment::GetCurrent(args);
|
||||||
|
Isolate* isolate = env->isolate();
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
MaybeLocal<Object> result;
|
||||||
|
|
||||||
|
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
|
||||||
|
SPREAD_BUFFER_ARG(args[0], ts_obj);
|
||||||
|
const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER);
|
||||||
|
const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER);
|
||||||
|
|
||||||
|
if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) {
|
||||||
|
TranscodeFunc tfn = &Transcode;
|
||||||
|
switch (fromEncoding) {
|
||||||
|
case ASCII:
|
||||||
|
case LATIN1:
|
||||||
|
if (toEncoding == UCS2)
|
||||||
|
tfn = &TranscodeToUcs2;
|
||||||
|
break;
|
||||||
|
case UTF8:
|
||||||
|
if (toEncoding == UCS2)
|
||||||
|
tfn = &TranscodeUcs2FromUtf8;
|
||||||
|
break;
|
||||||
|
case UCS2:
|
||||||
|
switch (toEncoding) {
|
||||||
|
case UCS2:
|
||||||
|
tfn = &Transcode;
|
||||||
|
break;
|
||||||
|
case UTF8:
|
||||||
|
tfn = &TranscodeUtf8FromUcs2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
tfn = TranscodeFromUcs2;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// This should not happen because of the SupportedEncoding checks
|
||||||
|
ABORT();
|
||||||
|
}
|
||||||
|
|
||||||
|
result = tfn(isolate, EncodingName(fromEncoding), EncodingName(toEncoding),
|
||||||
|
ts_obj_data, ts_obj_length, &status);
|
||||||
|
} else {
|
||||||
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.IsEmpty())
|
||||||
|
return args.GetReturnValue().Set(status);
|
||||||
|
|
||||||
|
return args.GetReturnValue().Set(result.ToLocalChecked());
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ICUErrorName(const FunctionCallbackInfo<Value>& args) {
|
||||||
|
Environment* env = Environment::GetCurrent(args);
|
||||||
|
UErrorCode status = static_cast<UErrorCode>(args[0]->Int32Value());
|
||||||
|
args.GetReturnValue().Set(
|
||||||
|
String::NewFromUtf8(env->isolate(),
|
||||||
|
u_errorName(status),
|
||||||
|
v8::NewStringType::kNormal).ToLocalChecked());
|
||||||
|
}
|
||||||
|
|
||||||
bool InitializeICUDirectory(const char* icu_data_path) {
|
bool InitializeICUDirectory(const char* icu_data_path) {
|
||||||
if (icu_data_path != nullptr) {
|
if (icu_data_path != nullptr) {
|
||||||
flag_icu_data_dir = true;
|
flag_icu_data_dir = true;
|
||||||
@ -282,6 +558,10 @@ void Init(Local<Object> target,
|
|||||||
env->SetMethod(target, "toUnicode", ToUnicode);
|
env->SetMethod(target, "toUnicode", ToUnicode);
|
||||||
env->SetMethod(target, "toASCII", ToASCII);
|
env->SetMethod(target, "toASCII", ToASCII);
|
||||||
env->SetMethod(target, "getStringWidth", GetStringWidth);
|
env->SetMethod(target, "getStringWidth", GetStringWidth);
|
||||||
|
|
||||||
|
// One-shot converters
|
||||||
|
env->SetMethod(target, "icuErrName", ICUErrorName);
|
||||||
|
env->SetMethod(target, "transcode", Transcode);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace i18n
|
} // namespace i18n
|
||||||
|
27
src/util.h
27
src/util.h
@ -343,6 +343,15 @@ class MaybeStackBuffer {
|
|||||||
buf_ = nullptr;
|
buf_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsAllocated() {
|
||||||
|
return buf_ != buf_st_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Release() {
|
||||||
|
buf_ = buf_st_;
|
||||||
|
length_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
MaybeStackBuffer() : length_(0), buf_(buf_st_) {
|
MaybeStackBuffer() : length_(0), buf_(buf_st_) {
|
||||||
// Default to a zero-length, null-terminated buffer.
|
// Default to a zero-length, null-terminated buffer.
|
||||||
buf_[0] = T();
|
buf_[0] = T();
|
||||||
@ -378,6 +387,24 @@ class BufferValue : public MaybeStackBuffer<char> {
|
|||||||
explicit BufferValue(v8::Isolate* isolate, v8::Local<v8::Value> value);
|
explicit BufferValue(v8::Isolate* isolate, v8::Local<v8::Value> value);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \
|
||||||
|
do { \
|
||||||
|
if (!Buffer::HasInstance(obj)) \
|
||||||
|
return env->ThrowTypeError("argument should be a Buffer"); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define SPREAD_BUFFER_ARG(val, name) \
|
||||||
|
CHECK((val)->IsUint8Array()); \
|
||||||
|
Local<v8::Uint8Array> name = (val).As<v8::Uint8Array>(); \
|
||||||
|
v8::ArrayBuffer::Contents name##_c = name->Buffer()->GetContents(); \
|
||||||
|
const size_t name##_offset = name->ByteOffset(); \
|
||||||
|
const size_t name##_length = name->ByteLength(); \
|
||||||
|
char* const name##_data = \
|
||||||
|
static_cast<char*>(name##_c.Data()) + name##_offset; \
|
||||||
|
if (name##_length > 0) \
|
||||||
|
CHECK_NE(name##_data, nullptr);
|
||||||
|
|
||||||
|
|
||||||
} // namespace node
|
} // namespace node
|
||||||
|
|
||||||
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
|
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
|
||||||
|
48
test/parallel/test-icu-transcode.js
Normal file
48
test/parallel/test-icu-transcode.js
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
require('../common');
|
||||||
|
const buffer = require('buffer');
|
||||||
|
const assert = require('assert');
|
||||||
|
|
||||||
|
const orig = Buffer.from('tést €', 'utf8');
|
||||||
|
|
||||||
|
// Test Transcoding
|
||||||
|
const tests = {
|
||||||
|
'latin1': [0x74, 0xe9, 0x73, 0x74, 0x20, 0x3f],
|
||||||
|
'ascii': [0x74, 0x3f, 0x73, 0x74, 0x20, 0x3f],
|
||||||
|
'ucs2': [0x74, 0x00, 0xe9, 0x00, 0x73,
|
||||||
|
0x00, 0x74, 0x00, 0x20, 0x00,
|
||||||
|
0xac, 0x20]
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const test in tests) {
|
||||||
|
const dest = buffer.transcode(orig, 'utf8', test);
|
||||||
|
assert.strictEqual(dest.length, tests[test].length);
|
||||||
|
for (var n = 0; n < tests[test].length; n++)
|
||||||
|
assert.strictEqual(dest[n], tests[test][n]);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const dest = buffer.transcode(Buffer.from(tests.ucs2), 'ucs2', 'utf8');
|
||||||
|
assert.strictEqual(dest.toString(), orig.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const utf8 = Buffer.from('€'.repeat(4000), 'utf8');
|
||||||
|
const ucs2 = Buffer.from('€'.repeat(4000), 'ucs2');
|
||||||
|
const utf8_to_ucs2 = buffer.transcode(utf8, 'utf8', 'ucs2');
|
||||||
|
const ucs2_to_utf8 = buffer.transcode(ucs2, 'ucs2', 'utf8');
|
||||||
|
assert.deepStrictEqual(utf8, ucs2_to_utf8);
|
||||||
|
assert.deepStrictEqual(ucs2, utf8_to_ucs2);
|
||||||
|
assert.strictEqual(ucs2_to_utf8.toString('utf8'),
|
||||||
|
utf8_to_ucs2.toString('ucs2'));
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.throws(
|
||||||
|
() => buffer.transcode(Buffer.from('a'), 'b', 'utf8'),
|
||||||
|
/Unable to transcode Buffer \[U_ILLEGAL_ARGUMENT_ERROR\]/
|
||||||
|
);
|
||||||
|
assert.throws(
|
||||||
|
() => buffer.transcode(Buffer.from('a'), 'uf8', 'b'),
|
||||||
|
/Unable to transcode Buffer \[U_ILLEGAL_ARGUMENT_ERROR\]/
|
||||||
|
);
|
@ -20,9 +20,7 @@
|
|||||||
'type': 'none',
|
'type': 'none',
|
||||||
'toolsets': [ 'target' ],
|
'toolsets': [ 'target' ],
|
||||||
'direct_dependent_settings': {
|
'direct_dependent_settings': {
|
||||||
'defines': [
|
'defines': []
|
||||||
'UCONFIG_NO_CONVERSION=1',
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user