array.c: improve operations on small arrays
[Feature #13884] Reduce number of memory allocations for "and", "or" and "diff" operations on small arrays Very often, arrays are used to filter parameters and to select interesting items from 2 collections and very often these collections are small enough, for example: ```ruby SAFE_COLUMNS = [:id, :title, :created_at] def columns @all_columns & SAFE_COLUMNS end ``` In this patch, I got rid of unnecessary memory allocations for small arrays when "and", "or" and "diff" operations are performed. name | HEAD | PATCH -----------------+------:+------: array_small_and | 0.615 | 0.263 array_small_diff | 0.676 | 0.282 array_small_or | 0.953 | 0.463 name | PATCH -----------------+------: array_small_and | 2.343 array_small_diff | 2.392 array_small_or | 2.056 name | HEAD | PATCH -----------------+------:+------: array_small_and | 1.429 | 1.005 array_small_diff | 1.493 | 0.878 array_small_or | 1.672 | 1.152 name | PATCH -----------------+------: array_small_and | 1.422 array_small_diff | 1.700 array_small_or | 1.452 Author: Dmitry Bochkarev <dimabochkarev@gmail.com> git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@60057 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
ff6573b696
commit
9bc73cd81f
54
array.c
54
array.c
@ -30,6 +30,7 @@ VALUE rb_cArray;
|
|||||||
|
|
||||||
#define ARY_DEFAULT_SIZE 16
|
#define ARY_DEFAULT_SIZE 16
|
||||||
#define ARY_MAX_SIZE (LONG_MAX / (int)sizeof(VALUE))
|
#define ARY_MAX_SIZE (LONG_MAX / (int)sizeof(VALUE))
|
||||||
|
#define SMALL_ARRAY_LEN 16
|
||||||
|
|
||||||
# define ARY_SHARED_P(ary) \
|
# define ARY_SHARED_P(ary) \
|
||||||
(assert(!FL_TEST((ary), ELTS_SHARED) || !FL_TEST((ary), RARRAY_EMBED_FLAG)), \
|
(assert(!FL_TEST((ary), ELTS_SHARED) || !FL_TEST((ary), RARRAY_EMBED_FLAG)), \
|
||||||
@ -3985,6 +3986,20 @@ rb_ary_includes(VALUE ary, VALUE item)
|
|||||||
return Qfalse;
|
return Qfalse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
rb_ary_includes_by_eql(VALUE ary, VALUE item)
|
||||||
|
{
|
||||||
|
long i;
|
||||||
|
VALUE e;
|
||||||
|
|
||||||
|
for (i=0; i<RARRAY_LEN(ary); i++) {
|
||||||
|
e = RARRAY_AREF(ary, i);
|
||||||
|
if (rb_eql(item, e)) {
|
||||||
|
return Qtrue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Qfalse;
|
||||||
|
}
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
recursive_cmp(VALUE ary1, VALUE ary2, int recur)
|
recursive_cmp(VALUE ary1, VALUE ary2, int recur)
|
||||||
@ -4135,9 +4150,19 @@ rb_ary_diff(VALUE ary1, VALUE ary2)
|
|||||||
VALUE hash;
|
VALUE hash;
|
||||||
long i;
|
long i;
|
||||||
|
|
||||||
hash = ary_make_hash(to_ary(ary2));
|
ary2 = to_ary(ary2);
|
||||||
ary3 = rb_ary_new();
|
ary3 = rb_ary_new();
|
||||||
|
|
||||||
|
if (RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) {
|
||||||
|
for (i=0; i<RARRAY_LEN(ary1); i++) {
|
||||||
|
VALUE elt = rb_ary_elt(ary1, i);
|
||||||
|
if (rb_ary_includes_by_eql(ary2, elt)) continue;
|
||||||
|
rb_ary_push(ary3, elt);
|
||||||
|
}
|
||||||
|
return ary3;
|
||||||
|
}
|
||||||
|
|
||||||
|
hash = ary_make_hash(ary2);
|
||||||
for (i=0; i<RARRAY_LEN(ary1); i++) {
|
for (i=0; i<RARRAY_LEN(ary1); i++) {
|
||||||
if (st_lookup(rb_hash_tbl_raw(hash), RARRAY_AREF(ary1, i), 0)) continue;
|
if (st_lookup(rb_hash_tbl_raw(hash), RARRAY_AREF(ary1, i), 0)) continue;
|
||||||
rb_ary_push(ary3, rb_ary_elt(ary1, i));
|
rb_ary_push(ary3, rb_ary_elt(ary1, i));
|
||||||
@ -4173,6 +4198,17 @@ rb_ary_and(VALUE ary1, VALUE ary2)
|
|||||||
ary2 = to_ary(ary2);
|
ary2 = to_ary(ary2);
|
||||||
ary3 = rb_ary_new();
|
ary3 = rb_ary_new();
|
||||||
if (RARRAY_LEN(ary2) == 0) return ary3;
|
if (RARRAY_LEN(ary2) == 0) return ary3;
|
||||||
|
|
||||||
|
if (RARRAY_LEN(ary1) <= SMALL_ARRAY_LEN && RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) {
|
||||||
|
for (i=0; i<RARRAY_LEN(ary1); i++) {
|
||||||
|
v = RARRAY_AREF(ary1, i);
|
||||||
|
if (!rb_ary_includes_by_eql(ary2, v)) continue;
|
||||||
|
if (rb_ary_includes_by_eql(ary3, v)) continue;
|
||||||
|
rb_ary_push(ary3, v);
|
||||||
|
}
|
||||||
|
return ary3;
|
||||||
|
}
|
||||||
|
|
||||||
hash = ary_make_hash(ary2);
|
hash = ary_make_hash(ary2);
|
||||||
table = rb_hash_tbl_raw(hash);
|
table = rb_hash_tbl_raw(hash);
|
||||||
|
|
||||||
@ -4218,8 +4254,22 @@ rb_ary_or(VALUE ary1, VALUE ary2)
|
|||||||
long i;
|
long i;
|
||||||
|
|
||||||
ary2 = to_ary(ary2);
|
ary2 = to_ary(ary2);
|
||||||
hash = ary_make_hash(ary1);
|
if (RARRAY_LEN(ary1) + RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) {
|
||||||
|
ary3 = rb_ary_new();
|
||||||
|
for (i=0; i<RARRAY_LEN(ary1); i++) {
|
||||||
|
VALUE elt = rb_ary_elt(ary1, i);
|
||||||
|
if (rb_ary_includes_by_eql(ary3, elt)) continue;
|
||||||
|
rb_ary_push(ary3, elt);
|
||||||
|
}
|
||||||
|
for (i=0; i<RARRAY_LEN(ary2); i++) {
|
||||||
|
VALUE elt = rb_ary_elt(ary2, i);
|
||||||
|
if (rb_ary_includes_by_eql(ary3, elt)) continue;
|
||||||
|
rb_ary_push(ary3, elt);
|
||||||
|
}
|
||||||
|
return ary3;
|
||||||
|
}
|
||||||
|
|
||||||
|
hash = ary_make_hash(ary1);
|
||||||
for (i=0; i<RARRAY_LEN(ary2); i++) {
|
for (i=0; i<RARRAY_LEN(ary2); i++) {
|
||||||
VALUE elt = RARRAY_AREF(ary2, i);
|
VALUE elt = RARRAY_AREF(ary2, i);
|
||||||
if (!st_update(RHASH_TBL_RAW(hash), (st_data_t)elt, ary_hash_orset, (st_data_t)elt)) {
|
if (!st_update(RHASH_TBL_RAW(hash), (st_data_t)elt, ary_hash_orset, (st_data_t)elt)) {
|
||||||
|
17
benchmark/bm_array_small_and.rb
Normal file
17
benchmark/bm_array_small_and.rb
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
MIN_SIZE = ENV.fetch('SMALL_ARRAY_MIN', 0).to_i
|
||||||
|
MAX_SIZE = ENV.fetch('SMALL_ARRAY_MAX', 16).to_i
|
||||||
|
ITERATIONS = ENV.fetch('SMALL_ARRAY_ITERATIONS', 100).to_i
|
||||||
|
|
||||||
|
ARRAYS = (MIN_SIZE..MAX_SIZE).map do |size1|
|
||||||
|
(MIN_SIZE..MAX_SIZE).map do |size2|
|
||||||
|
[Array.new(size1) { rand(MAX_SIZE) }, Array.new(size2) { rand(MAX_SIZE) }]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
ITERATIONS.times do
|
||||||
|
ARRAYS.each do |group|
|
||||||
|
group.each do |arr1, arr2|
|
||||||
|
arr1 & arr2
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
17
benchmark/bm_array_small_diff.rb
Normal file
17
benchmark/bm_array_small_diff.rb
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
MIN_SIZE = ENV.fetch('SMALL_ARRAY_MIN', 0).to_i
|
||||||
|
MAX_SIZE = ENV.fetch('SMALL_ARRAY_MAX', 16).to_i
|
||||||
|
ITERATIONS = ENV.fetch('SMALL_ARRAY_ITERATIONS', 100).to_i
|
||||||
|
|
||||||
|
ARRAYS = (MIN_SIZE..MAX_SIZE).map do |size1|
|
||||||
|
(MIN_SIZE..MAX_SIZE).map do |size2|
|
||||||
|
[Array.new(size1) { rand(MAX_SIZE) }, Array.new(size2) { rand(MAX_SIZE) }]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
ITERATIONS.times do
|
||||||
|
ARRAYS.each do |group|
|
||||||
|
group.each do |arr1, arr2|
|
||||||
|
arr1 - arr2
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
17
benchmark/bm_array_small_or.rb
Normal file
17
benchmark/bm_array_small_or.rb
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
MIN_SIZE = ENV.fetch('SMALL_ARRAY_MIN', 0).to_i
|
||||||
|
MAX_SIZE = ENV.fetch('SMALL_ARRAY_MAX', 16).to_i
|
||||||
|
ITERATIONS = ENV.fetch('SMALL_ARRAY_ITERATIONS', 100).to_i
|
||||||
|
|
||||||
|
ARRAYS = (MIN_SIZE..MAX_SIZE).map do |size1|
|
||||||
|
(MIN_SIZE..MAX_SIZE).map do |size2|
|
||||||
|
[Array.new(size1) { rand(MAX_SIZE) }, Array.new(size2) { rand(MAX_SIZE) }]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
ITERATIONS.times do
|
||||||
|
ARRAYS.each do |group|
|
||||||
|
group.each do |arr1, arr2|
|
||||||
|
arr1 | arr2
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -49,17 +49,18 @@ describe "Array#&" do
|
|||||||
|
|
||||||
obj1 = mock('1')
|
obj1 = mock('1')
|
||||||
obj2 = mock('2')
|
obj2 = mock('2')
|
||||||
obj1.should_receive(:hash).at_least(1).and_return(0)
|
obj1.stub!(:hash).and_return(0)
|
||||||
obj2.should_receive(:hash).at_least(1).and_return(0)
|
obj2.stub!(:hash).and_return(0)
|
||||||
obj1.should_receive(:eql?).at_least(1).and_return(true)
|
obj1.should_receive(:eql?).at_least(1).and_return(true)
|
||||||
|
obj2.should_receive(:eql?).at_least(1).and_return(true)
|
||||||
|
|
||||||
([obj1] & [obj2]).should == [obj1]
|
([obj1] & [obj2]).should == [obj1]
|
||||||
([obj1, obj1, obj2, obj2] & [obj2]).should == [obj1]
|
([obj1, obj1, obj2, obj2] & [obj2]).should == [obj1]
|
||||||
|
|
||||||
obj1 = mock('3')
|
obj1 = mock('3')
|
||||||
obj2 = mock('4')
|
obj2 = mock('4')
|
||||||
obj1.should_receive(:hash).at_least(1).and_return(0)
|
obj1.stub!(:hash).and_return(0)
|
||||||
obj2.should_receive(:hash).at_least(1).and_return(0)
|
obj2.stub!(:hash).and_return(0)
|
||||||
obj1.should_receive(:eql?).at_least(1).and_return(false)
|
obj1.should_receive(:eql?).at_least(1).and_return(false)
|
||||||
|
|
||||||
([obj1] & [obj2]).should == []
|
([obj1] & [obj2]).should == []
|
||||||
@ -78,7 +79,7 @@ describe "Array#&" do
|
|||||||
|
|
||||||
it "properly handles an identical item even when its #eql? isn't reflexive" do
|
it "properly handles an identical item even when its #eql? isn't reflexive" do
|
||||||
x = mock('x')
|
x = mock('x')
|
||||||
x.should_receive(:hash).at_least(1).and_return(42)
|
x.stub!(:hash).and_return(42)
|
||||||
x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI.
|
x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI.
|
||||||
|
|
||||||
([x] & [x]).should == [x]
|
([x] & [x]).should == [x]
|
||||||
|
@ -46,8 +46,8 @@ describe "Array#-" do
|
|||||||
it "removes an item identified as equivalent via #hash and #eql?" do
|
it "removes an item identified as equivalent via #hash and #eql?" do
|
||||||
obj1 = mock('1')
|
obj1 = mock('1')
|
||||||
obj2 = mock('2')
|
obj2 = mock('2')
|
||||||
obj1.should_receive(:hash).at_least(1).and_return(0)
|
obj1.stub!(:hash).and_return(0)
|
||||||
obj2.should_receive(:hash).at_least(1).and_return(0)
|
obj2.stub!(:hash).and_return(0)
|
||||||
obj1.should_receive(:eql?).at_least(1).and_return(true)
|
obj1.should_receive(:eql?).at_least(1).and_return(true)
|
||||||
|
|
||||||
([obj1] - [obj2]).should == []
|
([obj1] - [obj2]).should == []
|
||||||
@ -57,8 +57,8 @@ describe "Array#-" do
|
|||||||
it "doesn't remove an item with the same hash but not #eql?" do
|
it "doesn't remove an item with the same hash but not #eql?" do
|
||||||
obj1 = mock('1')
|
obj1 = mock('1')
|
||||||
obj2 = mock('2')
|
obj2 = mock('2')
|
||||||
obj1.should_receive(:hash).at_least(1).and_return(0)
|
obj1.stub!(:hash).and_return(0)
|
||||||
obj2.should_receive(:hash).at_least(1).and_return(0)
|
obj2.stub!(:hash).and_return(0)
|
||||||
obj1.should_receive(:eql?).at_least(1).and_return(false)
|
obj1.should_receive(:eql?).at_least(1).and_return(false)
|
||||||
|
|
||||||
([obj1] - [obj2]).should == [obj1]
|
([obj1] - [obj2]).should == [obj1]
|
||||||
@ -67,7 +67,7 @@ describe "Array#-" do
|
|||||||
|
|
||||||
it "removes an identical item even when its #eql? isn't reflexive" do
|
it "removes an identical item even when its #eql? isn't reflexive" do
|
||||||
x = mock('x')
|
x = mock('x')
|
||||||
x.should_receive(:hash).at_least(1).and_return(42)
|
x.stub!(:hash).and_return(42)
|
||||||
x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI.
|
x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI.
|
||||||
|
|
||||||
([x] - [x]).should == []
|
([x] - [x]).should == []
|
||||||
|
@ -45,8 +45,8 @@ describe "Array#|" do
|
|||||||
|
|
||||||
obj1 = mock('1')
|
obj1 = mock('1')
|
||||||
obj2 = mock('2')
|
obj2 = mock('2')
|
||||||
obj1.should_receive(:hash).at_least(1).and_return(0)
|
obj1.stub!(:hash).and_return(0)
|
||||||
obj2.should_receive(:hash).at_least(1).and_return(0)
|
obj2.stub!(:hash).and_return(0)
|
||||||
obj2.should_receive(:eql?).at_least(1).and_return(true)
|
obj2.should_receive(:eql?).at_least(1).and_return(true)
|
||||||
|
|
||||||
([obj1] | [obj2]).should == [obj1]
|
([obj1] | [obj2]).should == [obj1]
|
||||||
@ -54,8 +54,8 @@ describe "Array#|" do
|
|||||||
|
|
||||||
obj1 = mock('3')
|
obj1 = mock('3')
|
||||||
obj2 = mock('4')
|
obj2 = mock('4')
|
||||||
obj1.should_receive(:hash).at_least(1).and_return(0)
|
obj1.stub!(:hash).and_return(0)
|
||||||
obj2.should_receive(:hash).at_least(1).and_return(0)
|
obj2.stub!(:hash).and_return(0)
|
||||||
obj2.should_receive(:eql?).at_least(1).and_return(false)
|
obj2.should_receive(:eql?).at_least(1).and_return(false)
|
||||||
|
|
||||||
([obj1] | [obj2]).should == [obj1, obj2]
|
([obj1] | [obj2]).should == [obj1, obj2]
|
||||||
@ -74,7 +74,7 @@ describe "Array#|" do
|
|||||||
|
|
||||||
it "properly handles an identical item even when its #eql? isn't reflexive" do
|
it "properly handles an identical item even when its #eql? isn't reflexive" do
|
||||||
x = mock('x')
|
x = mock('x')
|
||||||
x.should_receive(:hash).at_least(1).and_return(42)
|
x.stub!(:hash).and_return(42)
|
||||||
x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI.
|
x.stub!(:eql?).and_return(false) # Stubbed for clarity and latitude in implementation; not actually sent by MRI.
|
||||||
|
|
||||||
([x] | [x]).should == [x]
|
([x] | [x]).should == [x]
|
||||||
|
@ -224,6 +224,13 @@ class TestArray < Test::Unit::TestCase
|
|||||||
assert_equal(@cls[], @cls[ 1, 2, 3 ] & @cls[ 4, 5, 6 ])
|
assert_equal(@cls[], @cls[ 1, 2, 3 ] & @cls[ 4, 5, 6 ])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_AND_big_array # '&'
|
||||||
|
assert_equal(@cls[1, 3], @cls[ 1, 1, 3, 5 ]*64 & @cls[ 1, 2, 3 ]*64)
|
||||||
|
assert_equal(@cls[], @cls[ 1, 1, 3, 5 ]*64 & @cls[ ])
|
||||||
|
assert_equal(@cls[], @cls[ ] & @cls[ 1, 2, 3 ]*64)
|
||||||
|
assert_equal(@cls[], @cls[ 1, 2, 3 ]*64 & @cls[ 4, 5, 6 ]*64)
|
||||||
|
end
|
||||||
|
|
||||||
def test_MUL # '*'
|
def test_MUL # '*'
|
||||||
assert_equal(@cls[], @cls[]*3)
|
assert_equal(@cls[], @cls[]*3)
|
||||||
assert_equal(@cls[1, 1, 1], @cls[1]*3)
|
assert_equal(@cls[1, 1, 1], @cls[1]*3)
|
||||||
@ -260,6 +267,18 @@ class TestArray < Test::Unit::TestCase
|
|||||||
assert_equal(@cls[1, 2, 3], @cls[1, 2, 3] - @cls[4, 5, 6])
|
assert_equal(@cls[1, 2, 3], @cls[1, 2, 3] - @cls[4, 5, 6])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_MINUS_big_array # '-'
|
||||||
|
assert_equal(@cls[1]*64, @cls[1, 2, 3, 4, 5]*64 - @cls[2, 3, 4, 5]*64)
|
||||||
|
# Ruby 1.8 feature change
|
||||||
|
#assert_equal(@cls[1], @cls[1, 2, 1, 3, 1, 4, 1, 5]*64 - @cls[2, 3, 4, 5]*64)
|
||||||
|
assert_equal(@cls[1, 1, 1, 1]*64, @cls[1, 2, 1, 3, 1, 4, 1, 5]*64 - @cls[2, 3, 4, 5]*64)
|
||||||
|
a = @cls[]
|
||||||
|
1000.times { a << 1 }
|
||||||
|
assert_equal(1000, a.length)
|
||||||
|
#assert_equal(@cls[1], a - @cls[2])
|
||||||
|
assert_equal(@cls[1] * 1000, a - @cls[2])
|
||||||
|
end
|
||||||
|
|
||||||
def test_LSHIFT # '<<'
|
def test_LSHIFT # '<<'
|
||||||
a = @cls[]
|
a = @cls[]
|
||||||
a << 1
|
a << 1
|
||||||
@ -1837,6 +1856,31 @@ class TestArray < Test::Unit::TestCase
|
|||||||
assert_equal([obj1], [obj1]|[obj2])
|
assert_equal([obj1], [obj1]|[obj2])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_OR_big_in_order
|
||||||
|
obj1, obj2 = Class.new do
|
||||||
|
attr_reader :name
|
||||||
|
def initialize(name) @name = name; end
|
||||||
|
def inspect; "test_OR_in_order(#{@name})"; end
|
||||||
|
def hash; 0; end
|
||||||
|
def eql?(a) true; end
|
||||||
|
break [new("1"), new("2")]
|
||||||
|
end
|
||||||
|
assert_equal([obj1], [obj1]*64|[obj2]*64)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_OR_big_array # '|'
|
||||||
|
assert_equal(@cls[1,2], @cls[1]*64 | @cls[2]*64)
|
||||||
|
assert_equal(@cls[1,2], @cls[1, 2]*64 | @cls[1, 2]*64)
|
||||||
|
|
||||||
|
a = (1..64).to_a
|
||||||
|
b = (1..128).to_a
|
||||||
|
c = a | b
|
||||||
|
assert_equal(c, b)
|
||||||
|
assert_not_same(c, b)
|
||||||
|
assert_equal((1..64).to_a, a)
|
||||||
|
assert_equal((1..128).to_a, b)
|
||||||
|
end
|
||||||
|
|
||||||
def test_combination
|
def test_combination
|
||||||
a = @cls[]
|
a = @cls[]
|
||||||
assert_equal(1, a.combination(0).size)
|
assert_equal(1, a.combination(0).size)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user