streams2: Make Transform streams pull-style
That is, the transform is triggered by a _read, not by a _write. This way, backpressure works properly.
This commit is contained in:
parent
9b5abe5bfe
commit
3b59fd70f4
@ -146,6 +146,7 @@ Readable.prototype.read = function(n) {
|
||||
// probably kick off another stream.read(), which can trigger
|
||||
// another _read(n,cb) before this one returns!
|
||||
if (state.length < state.lowWaterMark) {
|
||||
state.reading = true;
|
||||
this._read(state.bufferSize, onread.bind(this));
|
||||
return;
|
||||
}
|
||||
|
@ -19,6 +19,7 @@
|
||||
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
// USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
|
||||
// a transform stream is a readable/writable stream where you do
|
||||
// something with the data. Sometimes it's called a "filter",
|
||||
// but that's not a great name for it, since that implies a thing where
|
||||
@ -29,6 +30,39 @@
|
||||
// necessarily symmetric or synchronous transformation. For example,
|
||||
// a zlib stream might take multiple plain-text writes(), and then
|
||||
// emit a single compressed chunk some time in the future.
|
||||
//
|
||||
// Here's how this works:
|
||||
//
|
||||
// The Transform stream has all the aspects of the readable and writable
|
||||
// stream classes. When you write(chunk), that calls _write(chunk,cb)
|
||||
// internally, and returns false if there's a lot of pending writes
|
||||
// buffered up. When you call read(), that calls _read(n,cb) until
|
||||
// there's enough pending readable data buffered up.
|
||||
//
|
||||
// In a transform stream, the written data is placed in a buffer. When
|
||||
// _read(n,cb) is called, it transforms the queued up data, calling the
|
||||
// buffered _write cb's as it consumes chunks. If consuming a single
|
||||
// written chunk would result in multiple output chunks, then the first
|
||||
// outputted bit calls the readcb, and subsequent chunks just go into
|
||||
// the read buffer, and will cause it to emit 'readable' if necessary.
|
||||
//
|
||||
// This way, back-pressure is actually determined by the reading side,
|
||||
// since _read has to be called to start processing a new chunk. However,
|
||||
// a pathological inflate type of transform can cause excessive buffering
|
||||
// here. For example, imagine a stream where every byte of input is
|
||||
// interpreted as an integer from 0-255, and then results in that many
|
||||
// bytes of output. Writing the 4 bytes {ff,ff,ff,ff} would result in
|
||||
// 1kb of data being output. In this case, you could write a very small
|
||||
// amount of input, and end up with a very large amount of output. In
|
||||
// such a pathological inflating mechanism, there'd be no way to tell
|
||||
// the system to stop doing the transform. A single 4MB write could
|
||||
// cause the system to run out of memory.
|
||||
//
|
||||
// However, even in such a pathological case, only a single written chunk
|
||||
// would be consumed, and then the rest would wait (un-transformed) until
|
||||
// the results of the previous transformed chunk were consumed. Because
|
||||
// the transform happens on-demand, it will only transform as much as is
|
||||
// necessary to fill the readable buffer to the specified lowWaterMark.
|
||||
|
||||
module.exports = Transform;
|
||||
|
||||
@ -36,12 +70,21 @@ var Duplex = require('_stream_duplex');
|
||||
var util = require('util');
|
||||
util.inherits(Transform, Duplex);
|
||||
|
||||
function TransformState() {
|
||||
this.buffer = [];
|
||||
this.transforming = false;
|
||||
this.pendingReadCb = null;
|
||||
}
|
||||
|
||||
function Transform(options) {
|
||||
Duplex.call(this, options);
|
||||
|
||||
// bind output so that it can be passed around as a regular function.
|
||||
this._output = this._output.bind(this);
|
||||
|
||||
// the queue of _write chunks that are pending being transformed
|
||||
this._transformState = new TransformState();
|
||||
|
||||
// when the writable side finishes, then flush out anything remaining.
|
||||
this.once('finish', function() {
|
||||
if ('function' === typeof this._flush)
|
||||
@ -65,33 +108,65 @@ Transform.prototype._transform = function(chunk, output, cb) {
|
||||
throw new Error('not implemented');
|
||||
};
|
||||
|
||||
|
||||
Transform.prototype._write = function(chunk, cb) {
|
||||
this._transform(chunk, this._output, cb);
|
||||
var ts = this._transformState;
|
||||
ts.buffer.push([chunk, cb]);
|
||||
|
||||
// now we have something to transform, if we were waiting for it.
|
||||
if (ts.pendingReadCb && !ts.transforming) {
|
||||
var readcb = ts.pendingReadCb;
|
||||
ts.pendingReadCb = null;
|
||||
this._read(-1, readcb);
|
||||
}
|
||||
};
|
||||
|
||||
Transform.prototype._read = function(n, cb) {
|
||||
Transform.prototype._read = function(n, readcb) {
|
||||
var ws = this._writableState;
|
||||
var rs = this._readableState;
|
||||
var ts = this._transformState;
|
||||
|
||||
// basically a no-op, since the _transform will fill the
|
||||
// _readableState.buffer and emit 'readable' for us, and set ended
|
||||
// Usually, we want to just not call the cb, and set the reading
|
||||
// flag to false, so that another _read will happen next time,
|
||||
// but no state changes.
|
||||
rs.reading = false;
|
||||
if (ts.pendingReadCb)
|
||||
throw new Error('_read while _read already in progress');
|
||||
|
||||
// however, if the writable side has ended, and its buffer is clear,
|
||||
// then that means that the input has all been consumed, and no more
|
||||
// will ever be provide. treat this as an EOF, and pass back 0 bytes.
|
||||
if ((ws.ended || ws.ending) && ws.length === 0)
|
||||
cb();
|
||||
ts.pendingReadCb = readcb;
|
||||
|
||||
// if there's nothing pending, then we just wait.
|
||||
// if we're already transforming, then also just hold on a sec.
|
||||
// we've already stashed the readcb, so we can come back later
|
||||
// when we have something to transform
|
||||
if (ts.buffer.length === 0 || ts.transforming)
|
||||
return;
|
||||
|
||||
// go ahead and transform that thing, now that someone wants it
|
||||
var req = ts.buffer.shift();
|
||||
var chunk = req[0];
|
||||
var writecb = req[1];
|
||||
var output = this._output;
|
||||
ts.transforming = true;
|
||||
this._transform(chunk, output, function(er, data) {
|
||||
ts.transforming = false;
|
||||
if (data)
|
||||
output(data);
|
||||
writecb(er);
|
||||
}.bind(this));
|
||||
};
|
||||
|
||||
Transform.prototype._output = function(chunk) {
|
||||
if (!chunk || !chunk.length)
|
||||
return;
|
||||
|
||||
// if we've got a pending readcb, then just call that,
|
||||
// and let Readable take care of it. If not, then we fill
|
||||
// the readable buffer ourselves, and emit whatever's needed.
|
||||
var ts = this._transformState;
|
||||
var readcb = ts.pendingReadCb;
|
||||
if (readcb) {
|
||||
ts.pendingReadCb = null;
|
||||
readcb(null, chunk);
|
||||
return;
|
||||
}
|
||||
|
||||
// otherwise, it's up to us to fill the rs buffer.
|
||||
var state = this._readableState;
|
||||
var len = state.length;
|
||||
state.buffer.push(chunk);
|
||||
@ -110,6 +185,18 @@ function done(er) {
|
||||
// that nothing more will ever be provided
|
||||
var ws = this._writableState;
|
||||
var rs = this._readableState;
|
||||
var ts = this._transformState;
|
||||
|
||||
if (ws.length)
|
||||
throw new Error('calling transform done when ws.length != 0');
|
||||
|
||||
if (ts.transforming)
|
||||
throw new Error('calling transform done when still transforming');
|
||||
|
||||
// if we were waiting on a read, let them know that it isn't coming.
|
||||
var readcb = ts.pendingReadCb;
|
||||
if (readcb)
|
||||
return readcb();
|
||||
|
||||
rs.ended = true;
|
||||
// we may have gotten a 'null' read before, and since there is
|
||||
@ -117,7 +204,6 @@ function done(er) {
|
||||
// now so that the consumer knows to pick up the tail bits.
|
||||
if (rs.length && rs.needReadable)
|
||||
this.emit('readable');
|
||||
else if (rs.length === 0) {
|
||||
else if (rs.length === 0)
|
||||
this.emit('end');
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user