buffer: add Buffer.prototype.lastIndexOf()

* Remove unnecessary templating from SearchString SearchString used to have separate PatternChar and SubjectChar template type arguments, apparently to support things like searching for an 8-bit string inside a 16-bit string or vice versa. However, SearchString is only used from node_buffer.cc, where PatternChar and SubjectChar are always the same. Since this is extra complexity that's unused and untested (simplifying to a single Char template argument still compiles and didn't break any unit tests), I removed it. * Use Boyer-Hoore[-Horspool] for both indexOf and lastIndexOf Add test cases for lastIndexOf. Test the fallback from BMH to Boyer-Moore, which looks like it was totally untested before. * Extra bounds checks in node_buffer.cc * Extra asserts in string_search.h * Buffer.lastIndexOf: clean up, enforce consistency w/ String.lastIndexOf * Polyfill memrchr(3) for non-GNU systems PR-URL: https://github.com/nodejs/node/pull/4846 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Trevor Norris <trev.norris@gmail.com>
2016-01-28 22:12:09 +01:00 · 2016-01-28 22:12:09 +01:00 · 6c1e5ad3ab
commit 6c1e5ad3ab
parent d5922bd7a9
5 changed files with 471 additions and 252 deletions
--- a/doc/api/buffer.md
+++ b/doc/api/buffer.md
@ -988,6 +988,46 @@ for (var key of buf.keys()) {
 //   5
 ```

+### buf.lastIndexOf(value[, byteOffset][, encoding])
+
+* `value` {String|Buffer|Number}
+* `byteOffset` {Number} Default: `buf.length`
+* `encoding` {String} Default: `'utf8'`
+* Return: {Number}
+
+Identical to [`Buffer#indexOf()`][], but searches the Buffer from back to front
+instead of front to back. Returns the starting index position of `value` in
+Buffer or `-1` if the Buffer does not contain `value`. The `value` can be a
+String, Buffer or Number. Strings are by default interpreted as UTF8. If
+`byteOffset` is provided, will return the last match that begins at or before
+`byteOffset`.
+
+```js
+const buf = new Buffer('this buffer is a buffer');
+
+buf.lastIndexOf('this');
+  // returns 0
+buf.lastIndexOf('buffer');
+  // returns 17
+buf.lastIndexOf(new Buffer('buffer'));
+  // returns 17
+buf.lastIndexOf(97); // ascii for 'a'
+  // returns 15
+buf.lastIndexOf(new Buffer('yolo'));
+  // returns -1
+buf.lastIndexOf('buffer', 5)
+  // returns 5
+buf.lastIndexOf('buffer', 4)
+  // returns -1
+
+const utf16Buffer = new Buffer('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2');
+
+utf16Buffer.lastIndexOf('\u03a3', null, 'ucs2');
+  // returns 6
+utf16Buffer.lastIndexOf('\u03a3', -5, 'ucs2');
+  // returns 4
+```
+
 ### buf.length

 * {Number}
--- a/lib/buffer.js
+++ b/lib/buffer.js
@ -598,7 +598,48 @@ Buffer.prototype.compare = function compare(target,
  return binding.compareOffset(this, target, start, thisStart, end, thisEnd);
 };

-function slowIndexOf(buffer, val, byteOffset, encoding) {
+
+// Finds either the first index of `val` in `buffer` at offset >= `byteOffset`,
+// OR the last index of `val` in `buffer` at offset <= `byteOffset`.
+//
+// Arguments:
+// - buffer - a Buffer to search
+// - val - a string, Buffer, or number
+// - byteOffset - an index into `buffer`; will be clamped to an int32
+// - encoding - an optional encoding, relevant is val is a string
+// - dir - true for indexOf, false for lastIndexOf
+function bidirectionalIndexOf(buffer, val, byteOffset, encoding, dir) {
+  if (typeof byteOffset === 'string') {
+    encoding = byteOffset;
+    byteOffset = undefined;
+  } else if (byteOffset > 0x7fffffff) {
+    byteOffset = 0x7fffffff;
+  } else if (byteOffset < -0x80000000) {
+    byteOffset = -0x80000000;
+  }
+  byteOffset = +byteOffset;  // Coerce to Number.
+  if (isNaN(byteOffset)) {
+    // If the offset is undefined, null, NaN, "foo", etc, search whole buffer.
+    byteOffset = dir ? 0 : (buffer.length - 1);
+  }
+  dir = !!dir;  // Cast to bool.
+
+  if (typeof val === 'string') {
+    if (encoding === undefined) {
+      return binding.indexOfString(buffer, val, byteOffset, encoding, dir);
+    }
+    return slowIndexOf(buffer, val, byteOffset, encoding, dir);
+  } else if (val instanceof Buffer) {
+    return binding.indexOfBuffer(buffer, val, byteOffset, encoding, dir);
+  } else if (typeof val === 'number') {
+    return binding.indexOfNumber(buffer, val, byteOffset, dir);
+  }
+
+  throw new TypeError('"val" argument must be string, number or Buffer');
+}
+
+
+function slowIndexOf(buffer, val, byteOffset, encoding, dir) {
  var loweredCase = false;
  for (;;) {
    switch (encoding) {
@ -609,13 +650,13 @@ function slowIndexOf(buffer, val, byteOffset, encoding) {
      case 'utf16le':
      case 'utf-16le':
      case 'binary':
-        return binding.indexOfString(buffer, val, byteOffset, encoding);
+        return binding.indexOfString(buffer, val, byteOffset, encoding, dir);

      case 'base64':
      case 'ascii':
      case 'hex':
        return binding.indexOfBuffer(
-            buffer, Buffer.from(val, encoding), byteOffset, encoding);
+            buffer, Buffer.from(val, encoding), byteOffset, encoding, dir);

      default:
        if (loweredCase) {
@ -628,29 +669,14 @@ function slowIndexOf(buffer, val, byteOffset, encoding) {
  }
 }

+
 Buffer.prototype.indexOf = function indexOf(val, byteOffset, encoding) {
-  if (typeof byteOffset === 'string') {
-    encoding = byteOffset;
-    byteOffset = 0;
-  } else if (byteOffset > 0x7fffffff) {
-    byteOffset = 0x7fffffff;
-  } else if (byteOffset < -0x80000000) {
-    byteOffset = -0x80000000;
-  }
-  byteOffset >>= 0;
+  return bidirectionalIndexOf(this, val, byteOffset, encoding, true);
+};

-  if (typeof val === 'string') {
-    if (encoding === undefined) {
-      return binding.indexOfString(this, val, byteOffset, encoding);
-    }
-    return slowIndexOf(this, val, byteOffset, encoding);
-  } else if (val instanceof Buffer) {
-    return binding.indexOfBuffer(this, val, byteOffset, encoding);
-  } else if (typeof val === 'number') {
-    return binding.indexOfNumber(this, val, byteOffset);
-  }

-  throw new TypeError('"val" argument must be string, number or Buffer');
+Buffer.prototype.lastIndexOf = function lastIndexOf(val, byteOffset, encoding) {
+  return bidirectionalIndexOf(this, val, byteOffset, encoding, false);
 };


--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@ -943,9 +943,44 @@ void Compare(const FunctionCallbackInfo<Value> &args) {
 }


+// Computes the offset for starting an indexOf or lastIndexOf search.
+// Returns either a valid offset in [0...<length - 1>], ie inside the Buffer,
+// or -1 to signal that there is no possible match.
+int64_t IndexOfOffset(size_t length, int64_t offset_i64, bool is_forward) {
+  int64_t length_i64 = static_cast<int64_t>(length);
+  if (length_i64 == 0) {
+    // Empty buffer, no match.
+    return -1;
+  }
+  if (offset_i64 < 0) {
+    if (offset_i64 + length_i64 >= 0) {
+      // Negative offsets count backwards from the end of the buffer.
+      return length_i64 + offset_i64;
+    } else if (is_forward) {
+      // indexOf from before the start of the buffer: search the whole buffer.
+      return 0;
+    } else {
+      // lastIndexOf from before the start of the buffer: no match.
+      return -1;
+    }
+  } else {
+    if (offset_i64 < length_i64) {
+      // Valid positive offset.
+      return offset_i64;
+    } else if (is_forward) {
+      // indexOf from past the end of the buffer: no match.
+      return -1;
+    } else {
+      // lastIndexOf from past the end of the buffer: search the whole buffer.
+      return length_i64 - 1;
+    }
+  }
+}
+
 void IndexOfString(const FunctionCallbackInfo<Value>& args) {
  ASSERT(args[1]->IsString());
  ASSERT(args[2]->IsNumber());
+  ASSERT(args[4]->IsBoolean());

  enum encoding enc = ParseEncoding(args.GetIsolate(),
                                    args[3],
@ -955,31 +990,26 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
  SPREAD_ARG(args[0], ts_obj);

  Local<String> needle = args[1].As<String>();
+  int64_t offset_i64 = args[2]->IntegerValue();
+  bool is_forward = args[4]->IsTrue();
+
  const char* haystack = ts_obj_data;
  const size_t haystack_length = ts_obj_length;
  // Extended latin-1 characters are 2 bytes in Utf8.
  const size_t needle_length =
      enc == BINARY ? needle->Length() : needle->Utf8Length();

-
  if (needle_length == 0 || haystack_length == 0) {
    return args.GetReturnValue().Set(-1);
  }

-  int64_t offset_i64 = args[2]->IntegerValue();
-  size_t offset = 0;
-
-  if (offset_i64 < 0) {
-    if (offset_i64 + static_cast<int64_t>(haystack_length) < 0) {
-      offset = 0;
-    } else {
-      offset = static_cast<size_t>(haystack_length + offset_i64);
+  int64_t opt_offset = IndexOfOffset(haystack_length, offset_i64, is_forward);
+  if (opt_offset <= -1) {
+    return args.GetReturnValue().Set(-1);
  }
-  } else {
-    offset = static_cast<size_t>(offset_i64);
-  }
-
-  if (haystack_length < offset || needle_length + offset > haystack_length) {
+  size_t offset = static_cast<size_t>(opt_offset);
+  CHECK_LT(offset, haystack_length);
+  if (is_forward && needle_length + offset > haystack_length) {
    return args.GetReturnValue().Set(-1);
  }

@ -1007,13 +1037,15 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
                            haystack_length / 2,
                            decoded_string,
                            decoder.size() / 2,
-                            offset / 2);
+                            offset / 2,
+                            is_forward);
    } else {
      result = SearchString(reinterpret_cast<const uint16_t*>(haystack),
                            haystack_length / 2,
                            reinterpret_cast<const uint16_t*>(*needle_value),
                            needle_value.length(),
-                            offset / 2);
+                            offset / 2,
+                            is_forward);
    }
    result *= 2;
  } else if (enc == UTF8) {
@ -1025,7 +1057,8 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
                          haystack_length,
                          reinterpret_cast<const uint8_t*>(*needle_value),
                          needle_length,
-                          offset);
+                          offset,
+                          is_forward);
  } else if (enc == BINARY) {
    uint8_t* needle_data = static_cast<uint8_t*>(malloc(needle_length));
    if (needle_data == nullptr) {
@ -1038,7 +1071,8 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
                          haystack_length,
                          needle_data,
                          needle_length,
-                          offset);
+                          offset,
+                          is_forward);
    free(needle_data);
  }

@ -1049,17 +1083,18 @@ void IndexOfString(const FunctionCallbackInfo<Value>& args) {
 void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
  ASSERT(args[1]->IsObject());
  ASSERT(args[2]->IsNumber());
+  ASSERT(args[4]->IsBoolean());

  enum encoding enc = ParseEncoding(args.GetIsolate(),
                                    args[3],
                                    UTF8);

  THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
+  THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[1]);
  SPREAD_ARG(args[0], ts_obj);
  SPREAD_ARG(args[1], buf);
-
-  if (buf_length > 0)
-    CHECK_NE(buf_data, nullptr);
+  int64_t offset_i64 = args[2]->IntegerValue();
+  bool is_forward = args[4]->IsTrue();

  const char* haystack = ts_obj_data;
  const size_t haystack_length = ts_obj_length;
@ -1070,19 +1105,13 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
    return args.GetReturnValue().Set(-1);
  }

-  int64_t offset_i64 = args[2]->IntegerValue();
-  size_t offset = 0;
-
-  if (offset_i64 < 0) {
-    if (offset_i64 + static_cast<int64_t>(haystack_length) < 0)
-      offset = 0;
-    else
-      offset = static_cast<size_t>(haystack_length + offset_i64);
-  } else {
-    offset = static_cast<size_t>(offset_i64);
+  int64_t opt_offset = IndexOfOffset(haystack_length, offset_i64, is_forward);
+  if (opt_offset <= -1) {
+    return args.GetReturnValue().Set(-1);
  }
-
-  if (haystack_length < offset || needle_length + offset > haystack_length) {
+  size_t offset = static_cast<size_t>(opt_offset);
+  CHECK_LT(offset, haystack_length);
+  if (is_forward && needle_length + offset > haystack_length) {
    return args.GetReturnValue().Set(-1);
  }

@ -1097,7 +1126,8 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
        haystack_length / 2,
        reinterpret_cast<const uint16_t*>(needle),
        needle_length / 2,
-        offset / 2);
+        offset / 2,
+        is_forward);
    result *= 2;
  } else {
    result = SearchString(
@ -1105,7 +1135,8 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
        haystack_length,
        reinterpret_cast<const uint8_t*>(needle),
        needle_length,
-        offset);
+        offset,
+        is_forward);
  }

  args.GetReturnValue().Set(
@ -1115,28 +1146,29 @@ void IndexOfBuffer(const FunctionCallbackInfo<Value>& args) {
 void IndexOfNumber(const FunctionCallbackInfo<Value>& args) {
  ASSERT(args[1]->IsNumber());
  ASSERT(args[2]->IsNumber());
+  ASSERT(args[3]->IsBoolean());

  THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
  SPREAD_ARG(args[0], ts_obj);

  uint32_t needle = args[1]->Uint32Value();
  int64_t offset_i64 = args[2]->IntegerValue();
-  size_t offset;
+  bool is_forward = args[3]->IsTrue();

-  if (offset_i64 < 0) {
-    if (offset_i64 + static_cast<int64_t>(ts_obj_length) < 0)
-      offset = 0;
-    else
-      offset = static_cast<size_t>(ts_obj_length + offset_i64);
-  } else {
-    offset = static_cast<size_t>(offset_i64);
-  }
-
-  if (ts_obj_length == 0 || offset + 1 > ts_obj_length)
+  int64_t opt_offset = IndexOfOffset(ts_obj_length, offset_i64, is_forward);
+  if (opt_offset <= -1) {
    return args.GetReturnValue().Set(-1);
+  }
+  size_t offset = static_cast<size_t>(opt_offset);
+  CHECK_LT(offset, ts_obj_length);

-  void* ptr = memchr(ts_obj_data + offset, needle, ts_obj_length - offset);
-  char* ptr_char = static_cast<char*>(ptr);
+  const void* ptr;
+  if (is_forward) {
+    ptr = memchr(ts_obj_data + offset, needle, ts_obj_length - offset);
+  } else {
+    ptr = node::stringsearch::MemrchrFill(ts_obj_data, needle, offset + 1);
+  }
+  const char* ptr_char = static_cast<const char*>(ptr);
  args.GetReturnValue().Set(ptr ? static_cast<int>(ptr_char - ts_obj_data)
                                : -1);
 }
--- a/src/string_search.h
+++ b/src/string_search.h
@ -21,60 +21,35 @@ T Max(T a, T b) {

 static const uint32_t kMaxOneByteCharCodeU = 0xff;

-
-static inline size_t NonOneByteStart(const uint16_t* chars, size_t length) {
-  const uint16_t* limit = chars + length;
-  const uint16_t* start = chars;
-  while (chars < limit) {
-    if (*chars > kMaxOneByteCharCodeU)
-      return static_cast<size_t>(chars - start);
-    ++chars;
-  }
-  return static_cast<size_t>(chars - start);
-}
-
-
-static inline bool IsOneByte(const uint16_t* chars, size_t length) {
-  return NonOneByteStart(chars, length) >= length;
-}
-
-
 template <typename T>
 class Vector {
 public:
-  Vector(T* data, size_t length) : start_(data), length_(length) {
+  Vector(T* data, size_t length, bool isForward)
+      : start_(data), length_(length), is_forward_(isForward) {
    ASSERT(length > 0 && data != nullptr);
  }

-  // Returns the length of the vector.
+  // Returns the start of the memory range.
+  // For vector v this is NOT necessarily &v[0], see forward().
+  const T* start() const { return start_; }
+
+  // Returns the length of the vector, in characters.
  size_t length() const { return length_; }

-  T* start() const { return start_; }
+  // Returns true if the Vector is front-to-back, false if back-to-front.
+  // In the latter case, v[0] corresponds to the *end* of the memory range.
+  size_t forward() const { return is_forward_; }

  // Access individual vector elements - checks bounds in debug mode.
  T& operator[](size_t index) const {
    ASSERT(0 <= index && index < length_);
-    return start_[index];
-  }
-
-  const T& at(size_t index) const { return operator[](index); }
-
-  bool operator==(const Vector<T>& other) const {
-    if (length_ != other.length_)
-      return false;
-    if (start_ == other.start_)
-      return true;
-    for (size_t i = 0; i < length_; ++i) {
-      if (start_[i] != other.start_[i]) {
-        return false;
-      }
-    }
-    return true;
+    return start_[is_forward_ ? index : (length_ - index - 1)];
  }

 private:
  T* start_;
  size_t length_;
+  bool is_forward_;
 };


@ -114,31 +89,17 @@ class StringSearchBase {
  // Table used temporarily while building the BoyerMoore good suffix
  // shift table.
  static int kSuffixTable[kBMMaxShift + 1];
-
-  static inline bool IsOneByteString(Vector<const uint8_t> string) {
-    return true;
-  }
-
-  static inline bool IsOneByteString(Vector<const uint16_t> string) {
-    return IsOneByte(string.start(), string.length());
-  }
 };

-template <typename PatternChar, typename SubjectChar>
+template <typename Char>
 class StringSearch : private StringSearchBase {
 public:
-  explicit StringSearch(Vector<const PatternChar> pattern)
+  explicit StringSearch(Vector<const Char> pattern)
      : pattern_(pattern), start_(0) {
    if (pattern.length() >= kBMMaxShift) {
      start_ = pattern.length() - kBMMaxShift;
    }

-    if (sizeof(PatternChar) > sizeof(SubjectChar)) {
-      if (!IsOneByteString(pattern_)) {
-        strategy_ = &FailSearch;
-        return;
-      }
-    }
    size_t pattern_length = pattern_.length();
    CHECK_GT(pattern_length, 0);
    if (pattern_length < kBMMinPatternLength) {
@ -152,12 +113,12 @@ class StringSearch : private StringSearchBase {
    strategy_ = &InitialSearch;
  }

-  size_t Search(Vector<const SubjectChar> subject, size_t index) {
+  size_t Search(Vector<const Char> subject, size_t index) {
    return strategy_(this, subject, index);
  }

  static inline int AlphabetSize() {
-    if (sizeof(PatternChar) == 1) {
+    if (sizeof(Char) == 1) {
      // Latin1 needle.
      return kLatin1AlphabetSize;
    } else {
@ -165,42 +126,42 @@ class StringSearch : private StringSearchBase {
      return kUC16AlphabetSize;
    }

-    static_assert(sizeof(PatternChar) == sizeof(uint8_t) ||
-                      sizeof(PatternChar) == sizeof(uint16_t),
-                  "sizeof(PatternChar) == sizeof(uint16_t) || sizeof(uint8_t)");
+    static_assert(sizeof(Char) == sizeof(uint8_t) ||
+                  sizeof(Char) == sizeof(uint16_t),
+                  "sizeof(Char) == sizeof(uint16_t) || sizeof(uint8_t)");
  }

 private:
  typedef size_t (*SearchFunction)(  // NOLINT - it's not a cast!
-      StringSearch<PatternChar, SubjectChar>*,
-      Vector<const SubjectChar>,
+      StringSearch<Char>*,
+      Vector<const Char>,
      size_t);

-  static size_t FailSearch(StringSearch<PatternChar, SubjectChar>*,
-                           Vector<const SubjectChar> subject,
+  static size_t FailSearch(StringSearch<Char>*,
+                           Vector<const Char> subject,
                           size_t) {
    return subject.length();
  }

-  static size_t SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
-                                 Vector<const SubjectChar> subject,
+  static size_t SingleCharSearch(StringSearch<Char>* search,
+                                 Vector<const Char> subject,
                                 size_t start_index);

-  static size_t LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
-                             Vector<const SubjectChar> subject,
+  static size_t LinearSearch(StringSearch<Char>* search,
+                             Vector<const Char> subject,
                             size_t start_index);

-  static size_t InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
-                              Vector<const SubjectChar> subject,
+  static size_t InitialSearch(StringSearch<Char>* search,
+                              Vector<const Char> subject,
                              size_t start_index);

  static size_t BoyerMooreHorspoolSearch(
-      StringSearch<PatternChar, SubjectChar>* search,
-      Vector<const SubjectChar> subject,
+      StringSearch<Char>* search,
+      Vector<const Char> subject,
      size_t start_index);

-  static size_t BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
-                                 Vector<const SubjectChar> subject,
+  static size_t BoyerMooreSearch(StringSearch<Char>* search,
+                                 Vector<const Char> subject,
                                 size_t start_index);

  void PopulateBoyerMooreHorspoolTable();
@ -214,16 +175,10 @@ class StringSearch : private StringSearchBase {
  }

  static inline int CharOccurrence(int* bad_char_occurrence,
-                                   SubjectChar char_code) {
-    if (sizeof(SubjectChar) == 1) {
+                                   Char char_code) {
+    if (sizeof(Char) == 1) {
      return bad_char_occurrence[static_cast<int>(char_code)];
    }
-    if (sizeof(PatternChar) == 1) {
-      if (exceedsOneByte(char_code)) {
-        return -1;
-      }
-      return bad_char_occurrence[static_cast<unsigned int>(char_code)];
-    }
    // Both pattern and subject are UC16. Reduce character to equivalence class.
    int equiv_class = char_code % kUC16AlphabetSize;
    return bad_char_occurrence[equiv_class];
@ -250,7 +205,7 @@ class StringSearch : private StringSearchBase {
  }

  // The pattern to search for.
-  Vector<const PatternChar> pattern_;
+  Vector<const Char> pattern_;
  // Pointer to implementation of the search.
  SearchFunction strategy_;
  // Cache value of Max(0, pattern_length() - kBMMaxShift)
@ -274,111 +229,138 @@ inline uint8_t GetHighestValueByte(uint16_t character) {
 inline uint8_t GetHighestValueByte(uint8_t character) { return character; }


-template <typename PatternChar, typename SubjectChar>
-inline size_t FindFirstCharacter(Vector<const PatternChar> pattern,
-                              Vector<const SubjectChar> subject, size_t index) {
-  const PatternChar pattern_first_char = pattern[0];
+// Searches for a byte value in a memory buffer, back to front.
+// Uses memrchr(3) on systems which support it, for speed.
+// Falls back to a vanilla for loop on non-GNU systems such as Windows.
+inline const void* MemrchrFill(const void* haystack, uint8_t needle,
+                               size_t haystack_len) {
+#ifdef _GNU_SOURCE
+  return memrchr(haystack, needle, haystack_len);
+#else
+  const uint8_t* haystack8 = static_cast<const uint8_t*>(haystack);
+  for (size_t i = haystack_len - 1; i != static_cast<size_t>(-1); i--) {
+    if (haystack8[i] == needle) {
+      return haystack8 + i;
+    }
+  }
+  return nullptr;
+#endif
+}
+
+
+// Finds the first occurence of *two-byte* character pattern[0] in the string
+// `subject`. Does not check that the whole pattern matches.
+template <typename Char>
+inline size_t FindFirstCharacter(Vector<const Char> pattern,
+                              Vector<const Char> subject, size_t index) {
+  const Char pattern_first_char = pattern[0];
  const size_t max_n = (subject.length() - pattern.length() + 1);

+  // For speed, search for the more `rare` of the two bytes in pattern[0]
+  // using memchr / memrchr (which are much faster than a simple for loop).
  const uint8_t search_byte = GetHighestValueByte(pattern_first_char);
-  const SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
  size_t pos = index;
  do {
-    const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>(
-        memchr(subject.start() + pos, search_byte,
-               (max_n - pos) * sizeof(SubjectChar)));
+    size_t bytes_to_search;
+    const void* void_pos;
+    if (subject.forward()) {
+      // Assert that bytes_to_search won't overflow
+      CHECK_LE(pos, max_n);
+      CHECK_LE(max_n - pos, SIZE_MAX / sizeof(Char));
+      bytes_to_search = (max_n - pos) * sizeof(Char);
+      void_pos = memchr(subject.start() + pos, search_byte, bytes_to_search);
+    } else {
+      CHECK_LE(pos, subject.length());
+      CHECK_LE(subject.length() - pos, SIZE_MAX / sizeof(Char));
+      bytes_to_search = (subject.length() - pos) * sizeof(Char);
+      void_pos = MemrchrFill(subject.start(), search_byte, bytes_to_search);
+    }
+    const Char* char_pos = static_cast<const Char*>(void_pos);
    if (char_pos == nullptr)
      return subject.length();
-    char_pos = AlignDown(char_pos, sizeof(SubjectChar));
-    pos = static_cast<size_t>(char_pos - subject.start());
-    if (subject[pos] == search_char)
+
+    // Then, for each match, verify that the full two bytes match pattern[0].
+    char_pos = AlignDown(char_pos, sizeof(Char));
+    size_t raw_pos = static_cast<size_t>(char_pos - subject.start());
+    pos = subject.forward() ? raw_pos : (subject.length() - raw_pos - 1);
+    if (subject[pos] == pattern_first_char) {
+      // Match found, hooray.
      return pos;
+    }
+    // Search byte matched, but the other byte of pattern[0] didn't. Keep going.
  } while (++pos < max_n);

  return subject.length();
 }


+// Finds the first occurance of the byte pattern[0] in string `subject`.
+// Does not verify that the whole pattern matches.
 template <>
 inline size_t FindFirstCharacter(Vector<const uint8_t> pattern,
                                 Vector<const uint8_t> subject,
                                 size_t index) {
  const uint8_t pattern_first_char = pattern[0];
+  const size_t subj_len = subject.length();
  const size_t max_n = (subject.length() - pattern.length() + 1);

-  const uint8_t* char_pos = reinterpret_cast<const uint8_t*>(
-      memchr(subject.start() + index, pattern_first_char, max_n - index));
-  if (char_pos == nullptr)
-    return subject.length();
-  return static_cast<size_t>(char_pos - subject.start());
+  const void* pos;
+  if (subject.forward()) {
+    pos = memchr(subject.start() + index, pattern_first_char, max_n - index);
+  } else {
+    pos = MemrchrFill(subject.start(), pattern_first_char, subj_len - index);
+  }
+  const uint8_t* char_pos = static_cast<const uint8_t*>(pos);
+  if (char_pos == nullptr) {
+    return subj_len;
+  }
+
+  size_t raw_pos = static_cast<size_t>(char_pos - subject.start());
+  return subject.forward() ? raw_pos : (subj_len - raw_pos - 1);
 }

 //---------------------------------------------------------------------
 // Single Character Pattern Search Strategy
 //---------------------------------------------------------------------

-template <typename PatternChar, typename SubjectChar>
-size_t StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
-    StringSearch<PatternChar, SubjectChar>* search,
-    Vector<const SubjectChar> subject,
+template <typename Char>
+size_t StringSearch<Char>::SingleCharSearch(
+    StringSearch<Char>* search,
+    Vector<const Char> subject,
    size_t index) {
  CHECK_EQ(1, search->pattern_.length());
-  PatternChar pattern_first_char = search->pattern_[0];
-
-  if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
  return FindFirstCharacter(search->pattern_, subject, index);
-  } else {
-    if (sizeof(PatternChar) > sizeof(SubjectChar)) {
-      if (exceedsOneByte(pattern_first_char)) {
-        return -1;
-      }
-    }
-    return FindFirstCharacter(search->pattern_, subject, index);
-  }
 }

 //---------------------------------------------------------------------
 // Linear Search Strategy
 //---------------------------------------------------------------------

-template <typename PatternChar, typename SubjectChar>
-inline bool CharCompare(const PatternChar* pattern,
-                        const SubjectChar* subject,
-                        size_t length) {
-  ASSERT_GT(length, 0);
-  size_t pos = 0;
-  do {
-    if (pattern[pos] != subject[pos]) {
-      return false;
-    }
-    pos++;
-  } while (pos < length);
-  return true;
-}
-
 // Simple linear search for short patterns. Never bails out.
-template <typename PatternChar, typename SubjectChar>
-size_t StringSearch<PatternChar, SubjectChar>::LinearSearch(
-    StringSearch<PatternChar, SubjectChar>* search,
-    Vector<const SubjectChar> subject,
+template <typename Char>
+size_t StringSearch<Char>::LinearSearch(
+    StringSearch<Char>* search,
+    Vector<const Char> subject,
    size_t index) {
-  Vector<const PatternChar> pattern = search->pattern_;
+  Vector<const Char> pattern = search->pattern_;
  CHECK_GT(pattern.length(), 1);
  const size_t pattern_length = pattern.length();
-  size_t i = index;
  const size_t n = subject.length() - pattern_length;
-  while (i <= n) {
+  for (size_t i = index; i <= n; i++) {
    i = FindFirstCharacter(pattern, subject, i);
    if (i == subject.length())
      return subject.length();
    ASSERT_LE(i, n);
-    i++;

-    // Loop extracted to separate function to allow using return to do
-    // a deeper break.
-    if (CharCompare(pattern.start() + 1, subject.start() + i,
-                    pattern_length - 1)) {
-      return i - 1;
+    bool matches = true;
+    for (size_t j = 1; j < pattern_length; j++) {
+      if (pattern[j] != subject[i + j]) {
+        matches = false;
+        break;
+      }
+    }
+    if (matches) {
+      return i;
    }
  }
  return subject.length();
@ -388,12 +370,12 @@ size_t StringSearch<PatternChar, SubjectChar>::LinearSearch(
 // Boyer-Moore string search
 //---------------------------------------------------------------------

-template <typename PatternChar, typename SubjectChar>
-size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
-    StringSearch<PatternChar, SubjectChar>* search,
-    Vector<const SubjectChar> subject,
+template <typename Char>
+size_t StringSearch<Char>::BoyerMooreSearch(
+    StringSearch<Char>* search,
+    Vector<const Char> subject,
    size_t start_index) {
-  Vector<const PatternChar> pattern = search->pattern_;
+  Vector<const Char> pattern = search->pattern_;
  const size_t subject_length = subject.length();
  const size_t pattern_length = pattern.length();
  // Only preprocess at most kBMMaxShift last characters of pattern.
@ -402,7 +384,7 @@ size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
  int* bad_char_occurence = search->bad_char_table();
  int* good_suffix_shift = search->good_suffix_shift_table();

-  PatternChar last_char = pattern[pattern_length - 1];
+  Char last_char = pattern[pattern_length - 1];
  size_t index = start_index;
  // Continue search from i.
  while (index <= subject_length - pattern_length) {
@ -426,7 +408,7 @@ size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
      // Fall back on BMH shift.
      index += pattern_length - 1 -
               CharOccurrence(bad_char_occurence,
-                              static_cast<SubjectChar>(last_char));
+                              static_cast<Char>(last_char));
    } else {
      int gs_shift = good_suffix_shift[j + 1];
      int bc_occ = CharOccurrence(bad_char_occurence, c);
@ -441,10 +423,10 @@ size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
  return subject.length();
 }

-template <typename PatternChar, typename SubjectChar>
-void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
+template <typename Char>
+void StringSearch<Char>::PopulateBoyerMooreTable() {
  const size_t pattern_length = pattern_.length();
-  const PatternChar* pattern = pattern_.start();
+  Vector<const Char> pattern = pattern_;
  // Only look at the last kBMMaxShift characters of pattern (from start_
  // to pattern_length).
  const size_t start = start_;
@ -467,12 +449,12 @@ void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
  }

  // Find suffixes.
-  PatternChar last_char = pattern[pattern_length - 1];
+  Char last_char = pattern_[pattern_length - 1];
  size_t suffix = pattern_length + 1;
  {
    size_t i = pattern_length;
    while (i > start) {
-      PatternChar c = pattern[i - 1];
+      Char c = pattern[i - 1];
      while (suffix <= pattern_length && c != pattern[suffix - 1]) {
        if (static_cast<size_t>(shift_table[suffix]) == length) {
          shift_table[suffix] = suffix - i;
@ -511,22 +493,22 @@ void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
 // Boyer-Moore-Horspool string search.
 //---------------------------------------------------------------------

-template <typename PatternChar, typename SubjectChar>
-size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
-    StringSearch<PatternChar, SubjectChar>* search,
-    Vector<const SubjectChar> subject,
+template <typename Char>
+size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
+    StringSearch<Char>* search,
+    Vector<const Char> subject,
    size_t start_index) {
-  Vector<const PatternChar> pattern = search->pattern_;
+  Vector<const Char> pattern = search->pattern_;
  const size_t subject_length = subject.length();
  const size_t pattern_length = pattern.length();
  int* char_occurrences = search->bad_char_table();
  int64_t badness = -pattern_length;

  // How bad we are doing without a good-suffix table.
-  PatternChar last_char = pattern[pattern_length - 1];
+  Char last_char = pattern[pattern_length - 1];
  int last_char_shift =
      pattern_length - 1 -
-      CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
+      CharOccurrence(char_occurrences, static_cast<Char>(last_char));

  // Perform search
  size_t index = start_index;  // No matches found prior to this index.
@ -564,8 +546,8 @@ size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
  return subject.length();
 }

-template <typename PatternChar, typename SubjectChar>
-void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
+template <typename Char>
+void StringSearch<Char>::PopulateBoyerMooreHorspoolTable() {
  const size_t pattern_length = pattern_.length();

  int* bad_char_occurrence = bad_char_table();
@ -585,8 +567,8 @@ void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
    }
  }
  for (size_t i = start; i < pattern_length - 1; i++) {
-    PatternChar c = pattern_[i];
-    int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
+    Char c = pattern_[i];
+    int bucket = (sizeof(Char) == 1) ? c : c % AlphabetSize();
    bad_char_occurrence[bucket] = i;
  }
 }
@ -597,12 +579,12 @@ void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {

 // Simple linear search for short patterns, which bails out if the string
 // isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
-template <typename PatternChar, typename SubjectChar>
-size_t StringSearch<PatternChar, SubjectChar>::InitialSearch(
-    StringSearch<PatternChar, SubjectChar>* search,
-    Vector<const SubjectChar> subject,
+template <typename Char>
+size_t StringSearch<Char>::InitialSearch(
+    StringSearch<Char>* search,
+    Vector<const Char> subject,
    size_t index) {
-  Vector<const PatternChar> pattern = search->pattern_;
+  Vector<const Char> pattern = search->pattern_;
  const size_t pattern_length = pattern.length();
  // Badness is a count of how much work we have done.  When we have
  // done enough work we decide it's probably worth switching to a better
@ -642,11 +624,11 @@ size_t StringSearch<PatternChar, SubjectChar>::InitialSearch(
 // If searching multiple times for the same pattern, a search
 // object should be constructed once and the Search function then called
 // for each search.
-template <typename SubjectChar, typename PatternChar>
-size_t SearchString(Vector<const SubjectChar> subject,
-                    Vector<const PatternChar> pattern,
+template <typename Char>
+size_t SearchString(Vector<const Char> subject,
+                    Vector<const Char> pattern,
                    size_t start_index) {
-  StringSearch<PatternChar, SubjectChar> search(pattern);
+  StringSearch<Char> search(pattern);
  return search.Search(subject, start_index);
 }
 }
@ -655,16 +637,38 @@ size_t SearchString(Vector<const SubjectChar> subject,
 namespace node {
 using node::stringsearch::Vector;

-template <typename SubjectChar, typename PatternChar>
-size_t SearchString(const SubjectChar* haystack,
+template <typename Char>
+size_t SearchString(const Char* haystack,
                    size_t haystack_length,
-                    const PatternChar* needle,
+                    const Char* needle,
                    size_t needle_length,
-                    size_t start_index) {
-  return node::stringsearch::SearchString(
-      Vector<const SubjectChar>(haystack, haystack_length),
-      Vector<const PatternChar>(needle, needle_length),
-      start_index);
+                    size_t start_index,
+                    bool is_forward) {
+  // To do a reverse search (lastIndexOf instead of indexOf) without redundant
+  // code, create two vectors that are reversed views into the input strings.
+  // For example, v_needle[0] would return the *last* character of the needle.
+  // So we're searching for the first instance of rev(needle) in rev(haystack)
+  Vector<const Char> v_needle = Vector<const Char>(
+      needle, needle_length, is_forward);
+  Vector<const Char> v_haystack = Vector<const Char>(
+      haystack, haystack_length, is_forward);
+  ASSERT(haystack_length >= needle_length);
+  size_t diff = haystack_length - needle_length;
+  size_t relative_start_index;
+  if (is_forward) {
+    relative_start_index = start_index;
+  } else if (diff < start_index) {
+    relative_start_index = 0;
+  } else {
+    relative_start_index = diff - start_index;
+  }
+  size_t pos = node::stringsearch::SearchString(
+      v_haystack, v_needle, relative_start_index);
+  if (pos == haystack_length) {
+    // not found
+    return pos;
+  }
+  return is_forward ? pos : (haystack_length - needle_length - pos);
 }
 }  // namespace node

--- a/test/parallel/test-buffer-indexof.js
+++ b/test/parallel/test-buffer-indexof.js
@ -282,3 +282,120 @@ assert.throws(function() {
 assert.throws(function() {
  b.indexOf([]);
 });
+
+// All code for handling encodings is shared between Buffer.indexOf and
+// Buffer.lastIndexOf, so only testing the separate lastIndexOf semantics.
+
+// Test lastIndexOf basic functionality; Buffer b contains 'abcdef'.
+// lastIndexOf string:
+assert.equal(b.lastIndexOf('a'), 0);
+assert.equal(b.lastIndexOf('a', 1), 0);
+assert.equal(b.lastIndexOf('b', 1), 1);
+assert.equal(b.lastIndexOf('c', 1), -1);
+assert.equal(b.lastIndexOf('a', -1), 0);
+assert.equal(b.lastIndexOf('a', -4), 0);
+assert.equal(b.lastIndexOf('a', -b.length), 0);
+assert.equal(b.lastIndexOf('a', -b.length - 1), -1);
+assert.equal(b.lastIndexOf('a', NaN), 0);
+assert.equal(b.lastIndexOf('a', -Infinity), -1);
+assert.equal(b.lastIndexOf('a', Infinity), 0);
+// lastIndexOf Buffer:
+assert.equal(b.lastIndexOf(buf_a), 0);
+assert.equal(b.lastIndexOf(buf_a, 1), 0);
+assert.equal(b.lastIndexOf(buf_a, -1), 0);
+assert.equal(b.lastIndexOf(buf_a, -4), 0);
+assert.equal(b.lastIndexOf(buf_a, -b.length), 0);
+assert.equal(b.lastIndexOf(buf_a, -b.length - 1), -1);
+assert.equal(b.lastIndexOf(buf_a, NaN), 0);
+assert.equal(b.lastIndexOf(buf_a, -Infinity), -1);
+assert.equal(b.lastIndexOf(buf_a, Infinity), 0);
+assert.equal(b.lastIndexOf(buf_bc), 1);
+assert.equal(b.lastIndexOf(buf_bc, 2), 1);
+assert.equal(b.lastIndexOf(buf_bc, -1), 1);
+assert.equal(b.lastIndexOf(buf_bc, -3), 1);
+assert.equal(b.lastIndexOf(buf_bc, -5), 1);
+assert.equal(b.lastIndexOf(buf_bc, -6), -1);
+assert.equal(b.lastIndexOf(buf_bc, NaN), 1);
+assert.equal(b.lastIndexOf(buf_bc, -Infinity), -1);
+assert.equal(b.lastIndexOf(buf_bc, Infinity), 1);
+assert.equal(b.lastIndexOf(buf_f), b.length - 1);
+assert.equal(b.lastIndexOf(buf_z), -1);
+assert.equal(b.lastIndexOf(buf_empty), -1);
+assert.equal(b.lastIndexOf(buf_empty, 1), -1);
+assert.equal(b.lastIndexOf(buf_empty, b.length + 1), -1);
+assert.equal(b.lastIndexOf(buf_empty, Infinity), -1);
+// lastIndexOf number:
+assert.equal(b.lastIndexOf(0x61), 0);
+assert.equal(b.lastIndexOf(0x61, 1), 0);
+assert.equal(b.lastIndexOf(0x61, -1), 0);
+assert.equal(b.lastIndexOf(0x61, -4), 0);
+assert.equal(b.lastIndexOf(0x61, -b.length), 0);
+assert.equal(b.lastIndexOf(0x61, -b.length - 1), -1);
+assert.equal(b.lastIndexOf(0x61, NaN), 0);
+assert.equal(b.lastIndexOf(0x61, -Infinity), -1);
+assert.equal(b.lastIndexOf(0x61, Infinity), 0);
+assert.equal(b.lastIndexOf(0x0), -1);
+
+// Test weird offset arguments.
+// Behaviour should match String.lastIndexOf:
+assert.equal(b.lastIndexOf('b', 0), -1);
+assert.equal(b.lastIndexOf('b', undefined), 1);
+assert.equal(b.lastIndexOf('b', null), -1);
+assert.equal(b.lastIndexOf('b', {}), 1);
+assert.equal(b.lastIndexOf('b', []), -1);
+assert.equal(b.lastIndexOf('b', [2]), 1);
+
+// Test lastIndexOf on a longer buffer:
+var bufferString = new Buffer('a man a plan a canal panama');
+assert.equal(15, bufferString.lastIndexOf('canal'));
+assert.equal(21, bufferString.lastIndexOf('panama'));
+assert.equal(0, bufferString.lastIndexOf('a man a plan a canal panama'));
+assert.equal(-1, bufferString.lastIndexOf('a man a plan a canal mexico'));
+assert.equal(13, bufferString.lastIndexOf('a '));
+assert.equal(13, bufferString.lastIndexOf('a ', 13));
+assert.equal(6, bufferString.lastIndexOf('a ', 12));
+assert.equal(0, bufferString.lastIndexOf('a ', 5));
+assert.equal(13, bufferString.lastIndexOf('a ', -1));
+assert.equal(0, bufferString.lastIndexOf('a ', -27));
+assert.equal(-1, bufferString.lastIndexOf('a ', -28));
+
+// The above tests test the LINEAR and SINGLE-CHAR strategies.
+// Now, we test the BOYER-MOORE-HORSPOOL strategy.
+// Test lastIndexOf on a long buffer w multiple matches:
+pattern = 'JABACABADABACABA';
+assert.equal(1535, longBufferString.lastIndexOf(pattern));
+assert.equal(1535, longBufferString.lastIndexOf(pattern, 1535));
+assert.equal(511, longBufferString.lastIndexOf(pattern, 1534));
+
+// Finally, give it a really long input to trigger fallback from BMH to
+// regular BOYER-MOORE (which has better worst-case complexity).
+
+// Generate a really long Thue-Morse sequence of 'yolo' and 'swag',
+// "yolo swag swag yolo swag yolo yolo swag" ..., goes on for about 5MB.
+// This is hard to search because it all looks similar, but never repeats.
+
+// countBits returns the number of bits in the binary reprsentation of n.
+function countBits(n) {
+  for (var count = 0; n > 0; count++) {
+    n = n & (n - 1); // remove top bit
+  }
+  return count;
+}
+var parts = [];
+for (var i = 0; i < 1000000; i++) {
+  parts.push((countBits(i) % 2 === 0) ? 'yolo' : 'swag');
+}
+var reallyLong = new Buffer(parts.join(' '));
+assert.equal('yolo swag swag yolo', reallyLong.slice(0, 19).toString());
+
+// Expensive reverse searches. Stress test lastIndexOf:
+pattern = reallyLong.slice(0, 100000);  // First 1/50th of the pattern.
+assert.equal(4751360, reallyLong.lastIndexOf(pattern));
+assert.equal(3932160, reallyLong.lastIndexOf(pattern, 4000000));
+assert.equal(2949120, reallyLong.lastIndexOf(pattern, 3000000));
+pattern = reallyLong.slice(100000, 200000);  // Second 1/50th.
+assert.equal(4728480, reallyLong.lastIndexOf(pattern));
+pattern = reallyLong.slice(0, 1000000);  // First 1/5th.
+assert.equal(3932160, reallyLong.lastIndexOf(pattern));
+pattern = reallyLong.slice(0, 2000000);  // first 2/5ths.
+assert.equal(0, reallyLong.lastIndexOf(pattern));