string_decoder: Fix failures from new test cases
This patch simplifies the implementation of StringDecoder, fixes the failures from the new test cases, and also no longer relies on v8's WriteUtf8 function to encode individual surrogates.
This commit is contained in:
parent
22b839845c
commit
9fbd0f0f7d
@ -57,29 +57,29 @@ var StringDecoder = exports.StringDecoder = function(encoding) {
|
|||||||
|
|
||||||
StringDecoder.prototype.write = function(buffer) {
|
StringDecoder.prototype.write = function(buffer) {
|
||||||
var charStr = '';
|
var charStr = '';
|
||||||
var offset = 0;
|
|
||||||
|
|
||||||
// if our last write ended with an incomplete multibyte character
|
// if our last write ended with an incomplete multibyte character
|
||||||
while (this.charLength) {
|
while (this.charLength) {
|
||||||
// determine how many remaining bytes this buffer has to offer for this char
|
// determine how many remaining bytes this buffer has to offer for this char
|
||||||
var i = (buffer.length >= this.charLength - this.charReceived) ?
|
var available = (buffer.length >= this.charLength - this.charReceived) ?
|
||||||
this.charLength - this.charReceived :
|
this.charLength - this.charReceived :
|
||||||
buffer.length;
|
buffer.length;
|
||||||
|
|
||||||
// add the new bytes to the char buffer
|
// add the new bytes to the char buffer
|
||||||
buffer.copy(this.charBuffer, this.charReceived, offset, i);
|
buffer.copy(this.charBuffer, this.charReceived, 0, available);
|
||||||
this.charReceived += (i - offset);
|
this.charReceived += available;
|
||||||
offset = i;
|
|
||||||
|
|
||||||
if (this.charReceived < this.charLength) {
|
if (this.charReceived < this.charLength) {
|
||||||
// still not enough chars in this buffer? wait for more ...
|
// still not enough chars in this buffer? wait for more ...
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// remove bytes belonging to the current character from the buffer
|
||||||
|
buffer = buffer.slice(available, buffer.length);
|
||||||
|
|
||||||
// get the character that was split
|
// get the character that was split
|
||||||
charStr = this.charBuffer.slice(0, this.charLength).toString(this.encoding);
|
charStr = this.charBuffer.slice(0, this.charLength).toString(this.encoding);
|
||||||
|
|
||||||
// lead surrogate (D800-DBFF) is also the incomplete character
|
// CESU-8: lead surrogate (D800-DBFF) is also the incomplete character
|
||||||
var charCode = charStr.charCodeAt(charStr.length - 1);
|
var charCode = charStr.charCodeAt(charStr.length - 1);
|
||||||
if (charCode >= 0xD800 && charCode <= 0xDBFF) {
|
if (charCode >= 0xD800 && charCode <= 0xDBFF) {
|
||||||
this.charLength += this.surrogateSize;
|
this.charLength += this.surrogateSize;
|
||||||
@ -89,34 +89,33 @@ StringDecoder.prototype.write = function(buffer) {
|
|||||||
this.charReceived = this.charLength = 0;
|
this.charReceived = this.charLength = 0;
|
||||||
|
|
||||||
// if there are no more bytes in this buffer, just emit our char
|
// if there are no more bytes in this buffer, just emit our char
|
||||||
if (i == buffer.length) return charStr;
|
if (buffer.length === 0) {
|
||||||
|
return charStr;
|
||||||
// otherwise cut off the characters end from the beginning of this buffer
|
}
|
||||||
buffer = buffer.slice(i, buffer.length);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
var lenIncomplete = this.detectIncompleteChar(buffer);
|
// determine and set charLength / charReceived
|
||||||
|
this.detectIncompleteChar(buffer);
|
||||||
|
|
||||||
var end = buffer.length;
|
var end = buffer.length;
|
||||||
if (this.charLength) {
|
if (this.charLength) {
|
||||||
// buffer the incomplete character bytes we got
|
// buffer the incomplete character bytes we got
|
||||||
buffer.copy(this.charBuffer, 0, buffer.length - lenIncomplete, end);
|
buffer.copy(this.charBuffer, 0, buffer.length - this.charReceived, end);
|
||||||
this.charReceived = lenIncomplete;
|
end -= this.charReceived;
|
||||||
end -= lenIncomplete;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
charStr += buffer.toString(this.encoding, 0, end);
|
charStr += buffer.toString(this.encoding, 0, end);
|
||||||
|
|
||||||
var end = charStr.length - 1;
|
var end = charStr.length - 1;
|
||||||
var charCode = charStr.charCodeAt(end);
|
var charCode = charStr.charCodeAt(end);
|
||||||
// lead surrogate (D800-DBFF) is also the incomplete character
|
// CESU-8: lead surrogate (D800-DBFF) is also the incomplete character
|
||||||
if (charCode >= 0xD800 && charCode <= 0xDBFF) {
|
if (charCode >= 0xD800 && charCode <= 0xDBFF) {
|
||||||
var size = this.surrogateSize;
|
var size = this.surrogateSize;
|
||||||
this.charLength += size;
|
this.charLength += size;
|
||||||
this.charReceived += size;
|
this.charReceived += size;
|
||||||
this.charBuffer.copy(this.charBuffer, size, 0, size);
|
this.charBuffer.copy(this.charBuffer, size, 0, size);
|
||||||
this.charBuffer.write(charStr.charAt(charStr.length - 1), this.encoding);
|
buffer.copy(this.charBuffer, 0, 0, size);
|
||||||
return charStr.substring(0, end);
|
return charStr.substring(0, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -153,8 +152,7 @@ StringDecoder.prototype.detectIncompleteChar = function(buffer) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
this.charReceived = i;
|
||||||
return i;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
StringDecoder.prototype.end = function(buffer) {
|
StringDecoder.prototype.end = function(buffer) {
|
||||||
@ -177,13 +175,11 @@ function passThroughWrite(buffer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function utf16DetectIncompleteChar(buffer) {
|
function utf16DetectIncompleteChar(buffer) {
|
||||||
var incomplete = this.charReceived = buffer.length % 2;
|
this.charReceived = buffer.length % 2;
|
||||||
this.charLength = incomplete ? 2 : 0;
|
this.charLength = this.charReceived ? 2 : 0;
|
||||||
return incomplete;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function base64DetectIncompleteChar(buffer) {
|
function base64DetectIncompleteChar(buffer) {
|
||||||
var incomplete = this.charReceived = buffer.length % 3;
|
this.charReceived = buffer.length % 3;
|
||||||
this.charLength = incomplete ? 3 : 0;
|
this.charLength = this.charReceived ? 3 : 0;
|
||||||
return incomplete;
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user