string_decoder: reimplement in C++

Implement string decoder in C++. The perks are a decent speed boost (for decoding, whereas creation show some performance degradation), that this can now be used more easily to add native decoding support to C++ streams and (arguably) more readable variable names. PR-URL: https://github.com/nodejs/node/pull/18537 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
2018-02-01 02:28:39 +01:00 · 2018-02-01 02:28:39 +01:00 · 180af17b52
commit 180af17b52
parent de848ac1e0
7 changed files with 491 additions and 251 deletions
--- a/lib/string_decoder.js
+++ b/lib/string_decoder.js
@ -22,10 +22,23 @@
 'use strict';
 const { Buffer } = require('buffer');
 const {
  kIncompleteCharactersStart,
  kIncompleteCharactersEnd,
  kMissingBytes,
  kBufferedBytes,
  kEncodingField,
  kSize,
  decode,
  flush,
  encodings
 } = internalBinding('string_decoder');
 const internalUtil = require('internal/util');
 const errors = require('internal/errors');
 const isEncoding = Buffer[internalUtil.kIsEncodingSymbol];
 const kNativeDecoder = Symbol('kNativeDecoder');
 // Do not cache `Buffer.isEncoding` when checking encoding names as some
 // modules monkey-patch it to support additional encodings
 function normalizeEncoding(enc) {
@ -36,258 +49,54 @@ function normalizeEncoding(enc) {
  return nenc || enc;
 }
 const encodingsMap = {};
 for (var i = 0; i < encodings.length; ++i)
  encodingsMap[encodings[i]] = i;
 // StringDecoder provides an interface for efficiently splitting a series of
 // buffers into a series of JS strings without breaking apart multi-byte
 // characters.
-exports.StringDecoder = StringDecoder;
+class StringDecoder {
-function StringDecoder(encoding) {
+  constructor(encoding) {
    this.encoding = normalizeEncoding(encoding);
-  var nb;
+    this[kNativeDecoder] = Buffer.alloc(kSize);
-  switch (this.encoding) {
+    this[kNativeDecoder][kEncodingField] = encodingsMap[this.encoding];
    case 'utf16le':
      this.text = utf16Text;
      this.end = utf16End;
      nb = 4;
      break;
    case 'utf8':
      this.fillLast = utf8FillLast;
      nb = 4;
      break;
    case 'base64':
      this.text = base64Text;
      this.end = base64End;
      nb = 3;
      break;
    default:
      this.write = simpleWrite;
      this.end = simpleEnd;
      return;
  }
  this.lastNeed = 0;
  this.lastTotal = 0;
  this.lastChar = Buffer.allocUnsafe(nb);
  }
-StringDecoder.prototype.write = function(buf) {
+  write(buf) {
-  if (buf.length === 0)
+    if (typeof buf === 'string')
-    return '';
+      return buf;
-  var r;
+    if (!ArrayBuffer.isView(buf))
-  var i;
+      throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'buf',
-  if (this.lastNeed) {
+                                 ['Buffer', 'Uint8Array', 'ArrayBufferView']);
-    r = this.fillLast(buf);
+    return decode(this[kNativeDecoder], buf);
    if (r === undefined)
      return '';
    i = this.lastNeed;
    this.lastNeed = 0;
  } else {
    i = 0;
  }
  if (i < buf.length)
    return (r ? r + this.text(buf, i) : this.text(buf, i));
  return r || '';
 };
 StringDecoder.prototype.end = utf8End;
 // Returns only complete characters in a Buffer
 StringDecoder.prototype.text = utf8Text;
 // Attempts to complete a partial non-UTF-8 character using bytes from a Buffer
 StringDecoder.prototype.fillLast = function(buf) {
  if (this.lastNeed <= buf.length) {
    buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, this.lastNeed);
    return this.lastChar.toString(this.encoding, 0, this.lastTotal);
  }
  buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, buf.length);
  this.lastNeed -= buf.length;
 };
 // Checks the type of a UTF-8 byte, whether it's ASCII, a leading byte, or a
 // continuation byte. If an invalid byte is detected, -2 is returned.
 function utf8CheckByte(byte) {
  if (byte <= 0x7F)
    return 0;
  else if (byte >> 5 === 0x06)
    return 2;
  else if (byte >> 4 === 0x0E)
    return 3;
  else if (byte >> 3 === 0x1E)
    return 4;
  return (byte >> 6 === 0x02 ? -1 : -2);
  }
-// Checks at most 3 bytes at the end of a Buffer in order to detect an
+  end(buf) {
-// incomplete multi-byte UTF-8 character. The total number of bytes (2, 3, or 4)
+    let ret = '';
-// needed to complete the UTF-8 character (if applicable) are returned.
+    if (buf !== undefined)
-function utf8CheckIncomplete(self, buf, i) {
+      ret = this.write(buf);
-  var j = buf.length - 1;
+    if (this[kNativeDecoder][kBufferedBytes] > 0)
-  if (j < i)
+      ret += flush(this[kNativeDecoder]);
-    return 0;
+    return ret;
  var nb = utf8CheckByte(buf[j]);
  if (nb >= 0) {
    if (nb > 0)
      self.lastNeed = nb - 1;
    return nb;
  }
  if (--j < i || nb === -2)
    return 0;
  nb = utf8CheckByte(buf[j]);
  if (nb >= 0) {
    if (nb > 0)
      self.lastNeed = nb - 2;
    return nb;
  }
  if (--j < i || nb === -2)
    return 0;
  nb = utf8CheckByte(buf[j]);
  if (nb >= 0) {
    if (nb > 0) {
      if (nb === 2)
        nb = 0;
      else
        self.lastNeed = nb - 3;
    }
    return nb;
  }
  return 0;
  }
-// Validates as many continuation bytes for a multi-byte UTF-8 character as
+  /* Everything below this line is undocumented legacy stuff. */
-// needed or are available. If we see a non-continuation byte where we expect
+
-// one, we "replace" the validated continuation bytes we've seen so far with
+  text(buf, offset) {
-// a single UTF-8 replacement character ('\ufffd'), to match v8's UTF-8 decoding
+    this[kNativeDecoder][kMissingBytes] = 0;
-// behavior. The continuation byte check is included three times in the case
+    this[kNativeDecoder][kBufferedBytes] = 0;
-// where all of the continuation bytes for a character exist in the same buffer.
+    return this.write(buf.slice(offset));
 // It is also done this way as a slight performance increase instead of using a
 // loop.
 function utf8CheckExtraBytes(self, buf, p) {
  if ((buf[0] & 0xC0) !== 0x80) {
    self.lastNeed = 0;
    return '\ufffd';
  }
  if (self.lastNeed > 1 && buf.length > 1) {
    if ((buf[1] & 0xC0) !== 0x80) {
      self.lastNeed = 1;
      return '\ufffd';
    }
    if (self.lastNeed > 2 && buf.length > 2) {
      if ((buf[2] & 0xC0) !== 0x80) {
        self.lastNeed = 2;
        return '\ufffd';
  }
  get lastTotal() {
    return this[kNativeDecoder][kBufferedBytes] + this.lastNeed;
  }
  get lastChar() {
    return this[kNativeDecoder].subarray(kIncompleteCharactersStart,
                                         kIncompleteCharactersEnd);
  }
 }
-// Attempts to complete a multi-byte UTF-8 character using bytes from a Buffer.
+exports.StringDecoder = StringDecoder;
 function utf8FillLast(buf) {
  const p = this.lastTotal - this.lastNeed;
  var r = utf8CheckExtraBytes(this, buf, p);
  if (r !== undefined)
    return r;
  if (this.lastNeed <= buf.length) {
    buf.copy(this.lastChar, p, 0, this.lastNeed);
    return this.lastChar.toString(this.encoding, 0, this.lastTotal);
  }
  buf.copy(this.lastChar, p, 0, buf.length);
  this.lastNeed -= buf.length;
 }
 // Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a
 // partial character, the character's bytes are buffered until the required
 // number of bytes are available.
 function utf8Text(buf, i) {
  const total = utf8CheckIncomplete(this, buf, i);
  if (!this.lastNeed)
    return buf.toString('utf8', i);
  this.lastTotal = total;
  const end = buf.length - (total - this.lastNeed);
  buf.copy(this.lastChar, 0, end);
  return buf.toString('utf8', i, end);
 }
 // For UTF-8, a replacement character is added when ending on a partial
 // character.
 function utf8End(buf) {
  const r = (buf && buf.length ? this.write(buf) : '');
  if (this.lastNeed) {
    this.lastNeed = 0;
    this.lastTotal = 0;
    return r + '\ufffd';
  }
  return r;
 }
 // UTF-16LE typically needs two bytes per character, but even if we have an even
 // number of bytes available, we need to check if we end on a leading/high
 // surrogate. In that case, we need to wait for the next two bytes in order to
 // decode the last character properly.
 function utf16Text(buf, i) {
  if ((buf.length - i) % 2 === 0) {
    const r = buf.toString('utf16le', i);
    if (r) {
      const c = r.charCodeAt(r.length - 1);
      if (c >= 0xD800 && c <= 0xDBFF) {
        this.lastNeed = 2;
        this.lastTotal = 4;
        this.lastChar[0] = buf[buf.length - 2];
        this.lastChar[1] = buf[buf.length - 1];
        return r.slice(0, -1);
      }
    }
    return r;
  }
  this.lastNeed = 1;
  this.lastTotal = 2;
  this.lastChar[0] = buf[buf.length - 1];
  return buf.toString('utf16le', i, buf.length - 1);
 }
 // For UTF-16LE we do not explicitly append special replacement characters if we
 // end on a partial character, we simply let v8 handle that.
 function utf16End(buf) {
  const r = (buf && buf.length ? this.write(buf) : '');
  if (this.lastNeed) {
    const end = this.lastTotal - this.lastNeed;
    this.lastNeed = 0;
    this.lastTotal = 0;
    return r + this.lastChar.toString('utf16le', 0, end);
  }
  return r;
 }
 function base64Text(buf, i) {
  const n = (buf.length - i) % 3;
  if (n === 0)
    return buf.toString('base64', i);
  this.lastNeed = 3 - n;
  this.lastTotal = 3;
  if (n === 1) {
    this.lastChar[0] = buf[buf.length - 1];
  } else {
    this.lastChar[0] = buf[buf.length - 2];
    this.lastChar[1] = buf[buf.length - 1];
  }
  return buf.toString('base64', i, buf.length - n);
 }
 function base64End(buf) {
  const r = (buf && buf.length ? this.write(buf) : '');
  if (this.lastNeed) {
    const end = 3 - this.lastNeed;
    this.lastNeed = 0;
    this.lastTotal = 0;
    return r + this.lastChar.toString('base64', 0, end);
  }
  return r;
 }
 // Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex)
 function simpleWrite(buf) {
  return buf.toString(this.encoding);
 }
 function simpleEnd(buf) {
  return (buf && buf.length ? this.write(buf) : '');
 }
--- a/node.gyp
+++ b/node.gyp
@ -326,6 +326,7 @@
        'src/signal_wrap.cc',
        'src/spawn_sync.cc',
        'src/string_bytes.cc',
        'src/string_decoder.cc',
        'src/string_search.cc',
        'src/stream_base.cc',
        'src/stream_wrap.cc',
@ -379,6 +380,8 @@
        'src/req_wrap.h',
        'src/req_wrap-inl.h',
        'src/string_bytes.h',
        'src/string_decoder.h',
        'src/string_decoder-inl.h',
        'src/stream_base.h',
        'src/stream_base-inl.h',
        'src/stream_wrap.h',
@ -989,6 +992,7 @@
        '<(obj_path)<(obj_separator)node_url.<(obj_suffix)',
        '<(obj_path)<(obj_separator)util.<(obj_suffix)',
        '<(obj_path)<(obj_separator)string_bytes.<(obj_suffix)',
        '<(obj_path)<(obj_separator)string_decoder.<(obj_suffix)',
        '<(obj_path)<(obj_separator)string_search.<(obj_suffix)',
        '<(obj_path)<(obj_separator)stream_base.<(obj_suffix)',
        '<(obj_path)<(obj_separator)node_constants.<(obj_suffix)',
--- a/src/node_internals.h
+++ b/src/node_internals.h
@ -120,6 +120,7 @@ struct sockaddr;
    V(signal_wrap)                                                            \
    V(spawn_sync)                                                             \
    V(stream_wrap)                                                            \
    V(string_decoder)                                                         \
    V(tcp_wrap)                                                               \
    V(timer_wrap)                                                             \
    V(trace_events)                                                           \
--- a/src/string_decoder-inl.h
+++ b/src/string_decoder-inl.h
@ -0,0 +1,38 @@
 #ifndef SRC_STRING_DECODER_INL_H_
 #define SRC_STRING_DECODER_INL_H_
 #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
 #include "string_decoder.h"
 #include "util.h"
 namespace node {
 void StringDecoder::SetEncoding(enum encoding encoding) {
  state_[kBufferedBytes] = 0;
  state_[kMissingBytes] = 0;
  state_[kEncodingField] = encoding;
 }
 enum encoding StringDecoder::Encoding() const {
  return static_cast<enum encoding>(state_[kEncodingField]);
 }
 unsigned StringDecoder::BufferedBytes() const {
  return state_[kBufferedBytes];
 }
 unsigned StringDecoder::MissingBytes() const {
  return state_[kMissingBytes];
 }
 char* StringDecoder::IncompleteCharacterBuffer() {
  return reinterpret_cast<char*>(state_ + kIncompleteCharactersStart);
 }
 }  // namespace node
 #endif  // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
 #endif   // SRC_STRING_DECODER_INL_H_
--- a/src/string_decoder.cc
+++ b/src/string_decoder.cc
@ -0,0 +1,334 @@
 #include "string_decoder-inl.h"
 #include "string_bytes.h"
 #include "node_internals.h"
 #include "node_buffer.h"
 using v8::Array;
 using v8::Context;
 using v8::FunctionCallbackInfo;
 using v8::Integer;
 using v8::Isolate;
 using v8::Local;
 using v8::MaybeLocal;
 using v8::Object;
 using v8::String;
 using v8::Value;
 namespace node {
 namespace {
 MaybeLocal<String> MakeString(Isolate* isolate,
                              const char* data,
                              size_t length,
                              enum encoding encoding) {
  Local<Value> error;
  MaybeLocal<Value> ret;
  if (encoding == UTF8) {
    return String::NewFromUtf8(
        isolate,
        data,
        v8::NewStringType::kNormal,
        length);
  } else if (encoding == UCS2) {
 #ifdef DEBUG
    CHECK_EQ(reinterpret_cast<uintptr_t>(data) % 2, 0);
    CHECK_EQ(length % 2, 0);
 #endif
    ret = StringBytes::Encode(
        isolate,
        reinterpret_cast<const uint16_t*>(data),
        length / 2,
        &error);
  } else {
    ret = StringBytes::Encode(
        isolate,
        data,
        length,
        encoding,
        &error);
  }
  if (ret.IsEmpty()) {
    CHECK(!error.IsEmpty());
    isolate->ThrowException(error);
  }
 #ifdef DEBUG
  CHECK(ret.IsEmpty() || ret.ToLocalChecked()->IsString());
 #endif
  return ret.FromMaybe(Local<Value>()).As<String>();
 }
 }  // anonymous namespace
 MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
                                             const char* data,
                                             size_t* nread_ptr) {
  Local<String> prepend, body;
  size_t nread = *nread_ptr;
  if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) {
    // See if we want bytes to finish a character from the previous
    // chunk; if so, copy the new bytes to the missing bytes buffer
    // and create a small string from it that is to be prepended to the
    // main body.
    if (MissingBytes() > 0) {
      // There are never more bytes missing than the pre-calculated maximum.
      CHECK_LE(MissingBytes() + BufferedBytes(),
               kIncompleteCharactersEnd);
      if (Encoding() == UTF8) {
        // For UTF-8, we need special treatment to align with the V8 decoder:
        // If an incomplete character is found at a chunk boundary, we turn
        // that character into a single invalid one.
        for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
          if ((data[i] & 0xC0) != 0x80) {
            // This byte is not a continuation byte even though it should have
            // been one.
            // Act as if there was a 1-byte incomplete character, which does
            // not make sense but works here because we know it's invalid.
            state_[kMissingBytes] = 0;
            state_[kBufferedBytes] = 1;
            data += i;
            nread -= i;
            break;
          }
        }
      }
      size_t found_bytes =
          std::min(nread, static_cast<size_t>(MissingBytes()));
      memcpy(IncompleteCharacterBuffer() + BufferedBytes(),
             data,
             found_bytes);
      // Adjust the two buffers.
      data += found_bytes;
      nread -= found_bytes;
      state_[kMissingBytes] -= found_bytes;
      state_[kBufferedBytes] += found_bytes;
      if (LIKELY(MissingBytes() == 0)) {
        // If no more bytes are missing, create a small string that we
        // will later prepend.
        if (!MakeString(isolate,
                        IncompleteCharacterBuffer(),
                        BufferedBytes(),
                        Encoding()).ToLocal(&prepend)) {
          return MaybeLocal<String>();
        }
        *nread_ptr += BufferedBytes();
        // No more buffered bytes.
        state_[kBufferedBytes] = 0;
      }
    }
    // It could be that trying to finish the previous chunk already
    // consumed all data that we received in this chunk.
    if (UNLIKELY(nread == 0)) {
      body = !prepend.IsEmpty() ? prepend : String::Empty(isolate);
      prepend = Local<String>();
    } else {
 #ifdef DEBUG
      // If not, that means is no character left to finish at this point.
      CHECK_EQ(MissingBytes(), 0);
      CHECK_EQ(BufferedBytes(), 0);
 #endif
      // See whether there is a character that we may have to cut off and
      // finish when receiving the next chunk.
      if (Encoding() == UTF8 && data[nread - 1] & 0x80) {
        // This is UTF-8 encoded data and we ended on a non-ASCII UTF-8 byte.
        // This means we'll need to figure out where the character to which
        // the byte belongs begins.
        for (size_t i = nread - 1; ; --i) {
 #ifdef DEBUG
          CHECK_LT(i, nread);
 #endif
          state_[kBufferedBytes]++;
          if ((data[i] & 0xC0) == 0x80) {
            // This byte does not start a character (a "trailing" byte).
            if (state_[kBufferedBytes] >= 4 || i == 0) {
              // We either have more then 4 trailing bytes (which means
              // the current character would not be inside the range for
              // valid Unicode, and in particular cannot be represented
              // through JavaScript's UTF-16-based approach to strings), or the
              // current buffer does not contain the start of an UTF-8 character
              // at all. Either way, this is invalid UTF8 and we can just
              // let the engine's decoder handle it.
              state_[kBufferedBytes] = 0;
              break;
            }
          } else {
            // Found the first byte of a UTF-8 character. By looking at the
            // upper bits we can tell how long the character *should* be.
            if ((data[i] & 0xE0) == 0xC0) {
              state_[kMissingBytes] = 2;
            } else if ((data[i] & 0xF0) == 0xE0) {
              state_[kMissingBytes] = 3;
            } else if ((data[i] & 0xF8) == 0xF0) {
              state_[kMissingBytes] = 4;
            } else {
              // This lead byte would indicate a character outside of the
              // representable range.
              state_[kBufferedBytes] = 0;
              break;
            }
            if (BufferedBytes() >= MissingBytes()) {
              // Received more or exactly as many trailing bytes than the lead
              // character would indicate. In the "==" case, we have valid
              // data and don't need to slice anything off;
              // in the ">" case, this is invalid UTF-8 anyway.
              state_[kMissingBytes] = 0;
              state_[kBufferedBytes] = 0;
            }
            state_[kMissingBytes] -= state_[kBufferedBytes];
            break;
          }
        }
      } else if (Encoding() == UCS2) {
        if ((nread % 2) == 1) {
          // We got half a codepoint, and need the second byte of it.
          state_[kBufferedBytes] = 1;
          state_[kMissingBytes] = 1;
        } else if ((data[nread - 1] & 0xFC) == 0xD8) {
          // Half a split UTF-16 character.
          state_[kBufferedBytes] = 2;
          state_[kMissingBytes] = 2;
        }
      } else if (Encoding() == BASE64) {
        state_[kBufferedBytes] = nread % 3;
        if (state_[kBufferedBytes] > 0)
          state_[kMissingBytes] = 3 - BufferedBytes();
      }
      if (BufferedBytes() > 0) {
        // Copy the requested number of buffered bytes from the end of the
        // input into the incomplete character buffer.
        nread -= BufferedBytes();
        *nread_ptr -= BufferedBytes();
        memcpy(IncompleteCharacterBuffer(), data + nread, BufferedBytes());
      }
      if (nread > 0) {
        if (!MakeString(isolate, data, nread, Encoding()).ToLocal(&body))
          return MaybeLocal<String>();
      } else {
        body = String::Empty(isolate);
      }
    }
    if (prepend.IsEmpty()) {
      return body;
    } else {
      return String::Concat(prepend, body);
    }
  } else {
    CHECK(Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1);
    return MakeString(isolate, data, nread, Encoding());
  }
 }
 MaybeLocal<String> StringDecoder::FlushData(Isolate* isolate) {
  if (Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1) {
    CHECK_EQ(MissingBytes(), 0);
    CHECK_EQ(BufferedBytes(), 0);
  }
  if (Encoding() == UCS2 && BufferedBytes() % 2 == 1) {
    // Ignore a single trailing byte, like the JS decoder does.
    state_[kMissingBytes]--;
    state_[kBufferedBytes]--;
  }
  if (BufferedBytes() == 0)
    return String::Empty(isolate);
  MaybeLocal<String> ret =
      MakeString(isolate,
                 IncompleteCharacterBuffer(),
                 BufferedBytes(),
                 Encoding());
  state_[kMissingBytes] = 0;
  state_[kBufferedBytes] = 0;
  return ret;
 }
 namespace {
 void DecodeData(const FunctionCallbackInfo<Value>& args) {
  StringDecoder* decoder =
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
  CHECK_NE(decoder, nullptr);
  size_t nread = Buffer::Length(args[1]);
  MaybeLocal<String> ret =
      decoder->DecodeData(args.GetIsolate(), Buffer::Data(args[1]), &nread);
  if (!ret.IsEmpty())
    args.GetReturnValue().Set(ret.ToLocalChecked());
 }
 void FlushData(const FunctionCallbackInfo<Value>& args) {
  StringDecoder* decoder =
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
  CHECK_NE(decoder, nullptr);
  MaybeLocal<String> ret = decoder->FlushData(args.GetIsolate());
  if (!ret.IsEmpty())
    args.GetReturnValue().Set(ret.ToLocalChecked());
 }
 void InitializeStringDecoder(Local<Object> target,
                             Local<Value> unused,
                             Local<Context> context) {
  Environment* env = Environment::GetCurrent(context);
  Isolate* isolate = env->isolate();
 #define SET_DECODER_CONSTANT(name)                                            \
  target->Set(context,                                                        \
              FIXED_ONE_BYTE_STRING(isolate, #name),                          \
              Integer::New(isolate, StringDecoder::name)).FromJust()
  SET_DECODER_CONSTANT(kIncompleteCharactersStart);
  SET_DECODER_CONSTANT(kIncompleteCharactersEnd);
  SET_DECODER_CONSTANT(kMissingBytes);
  SET_DECODER_CONSTANT(kBufferedBytes);
  SET_DECODER_CONSTANT(kEncodingField);
  SET_DECODER_CONSTANT(kNumFields);
  Local<Array> encodings = Array::New(isolate);
 #define ADD_TO_ENCODINGS_ARRAY(cname, jsname)                                 \
  encodings->Set(context,                                                     \
                 static_cast<int32_t>(cname),                                 \
                 FIXED_ONE_BYTE_STRING(isolate, jsname)).FromJust()
  ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
  ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
  ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
  ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
  ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
  ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");
  ADD_TO_ENCODINGS_ARRAY(LATIN1, "latin1");
  target->Set(context,
              FIXED_ONE_BYTE_STRING(isolate, "encodings"),
              encodings).FromJust();
  target->Set(context,
              FIXED_ONE_BYTE_STRING(isolate, "kSize"),
              Integer::New(isolate, sizeof(StringDecoder))).FromJust();
  env->SetMethod(target, "decode", DecodeData);
  env->SetMethod(target, "flush", FlushData);
 }
 }  // anonymous namespace
 }  // namespace node
 NODE_MODULE_CONTEXT_AWARE_INTERNAL(string_decoder,
                                   node::InitializeStringDecoder)
--- a/src/string_decoder.h
+++ b/src/string_decoder.h
@ -0,0 +1,50 @@
 #ifndef SRC_STRING_DECODER_H_
 #define SRC_STRING_DECODER_H_
 #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
 #include "node.h"
 namespace node {
 class StringDecoder {
 public:
  StringDecoder() { state_[kEncodingField] = BUFFER; }
  inline void SetEncoding(enum encoding encoding);
  inline enum encoding Encoding() const;
  inline char* IncompleteCharacterBuffer();
  inline unsigned MissingBytes() const;
  inline unsigned BufferedBytes() const;
  // Decode a string from the specified encoding.
  // The value pointed to by `nread` will be modified to reflect that
  // less data may have been read because it ended on an incomplete character
  // and more data may have been read because a previously incomplete character
  // was finished.
  v8::MaybeLocal<v8::String> DecodeData(v8::Isolate* isolate,
                                        const char* data,
                                        size_t* nread);
  // Flush an incomplete character. For character encodings like UTF8 this
  // means printing replacement characters, buf for e.g. Base64 the returned
  // string contains more data.
  v8::MaybeLocal<v8::String> FlushData(v8::Isolate* isolate);
  enum Fields {
    kIncompleteCharactersStart = 0,
    kIncompleteCharactersEnd = 4,
    kMissingBytes = 4,
    kBufferedBytes = 5,
    kEncodingField = 6,
    kNumFields = 7
  };
 private:
  uint8_t state_[kNumFields] = {};
 };
 }  // namespace node
 #endif  // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
 #endif   // SRC_STRING_DECODER_H_
--- a/test/parallel/test-string-decoder.js
+++ b/test/parallel/test-string-decoder.js
@ -128,6 +128,10 @@ assert.strictEqual(decoder.write(Buffer.from('3DD8', 'hex')), '');
 assert.strictEqual(decoder.write(Buffer.from('4D', 'hex')), '');
 assert.strictEqual(decoder.end(), '\ud83d');
 decoder = new StringDecoder('utf16le');
 assert.strictEqual(decoder.write(Buffer.from('3DD84D', 'hex')), '\ud83d');
 assert.strictEqual(decoder.end(), '');
 common.expectsError(
  () => new StringDecoder(1),
  {