Avoid unnecessary array allocations for f(arg, *arg, **arg, **arg), f(*arg, a: lvar), and other calls

The `f(arg, *arg, **arg, **arg)` case was previously not optimized. The optimizer didn't optimize this case because of the multiple keyword splats, and the compiler didn't optimize it because the `f(*arg, **arg, **arg)` optimization added in 0ee3960685e283d8e75149a8777eb0109d41509a didn't apply. I found it difficult to apply this optimization without changing the `setup_args_core` API, since by the time you get to the ARGSCAT case, you don't know whether you were called recursively or directly, so I'm not sure if it was possible to know at that point whether the array allocation could be avoided. This changes the dup_rest argument in `setup_args_core` from an int to a pointer to int. This allows us to track whether we have allocated a caller side array for multiple splats or splat+post across recursive calls. Check the pointed value (*dup_rest) to determine the `splatarray` argument. If dup_rest is 1, then use `splatarray true` (caller-side array allocation), then set *dup_rest back to 0, ensuring only a single `splatarray true` per method call. Before calling `setup_args_core`, check whether the array allocation can be avoided safely using `splatarray false`. Optimizable cases are: ``` // f(*arg) SPLAT // f(1, *arg) ARGSCAT LIST // f(*arg, **arg) ARGSPUSH SPLAT HASH nd_brace=0 // f(1, *arg, **arg) ARGSPUSH ARGSCAT LIST HASH nd_brace=0 ``` If so, dup_rest is set to 0 instead of 1 to avoid the allocation. After calling `setup_args_core`, check the flag. If the flag includes `VM_CALL_ARGS_SPLAT`, and the pointed value has changed, indicating `splatarray true` was used, then also set `VM_CALL_ARGS_SPLAT_MUT` in the flag. My initial attempt at this broke the `f(*ary, &ary.pop)` test, because we were not duplicating the ary in the splat even though it was modified later (evaluation order issue). The initial attempt would also break `f(*ary, **ary.pop)` or `f(*ary, kw: ary.pop)` cases for the same reason. I added test cases for those evaluation order issues. Add setup_args_dup_rest_p static function that checks that a given node is safe. Call that on the block pass node to determine if the block pass node is safe. Also call it on each of the hash key/value nodes to test that they are safe. If any are not safe, then set dup_rest = 1 so that `splatarray true` will be used to avoid the evaluation order issue. This new approach has the affect of optimizing most cases of literal keywords after positional splats. Previously, only static keyword hashes after positional splats avoided array allocation for the splat. Now, most dynamic keyword hashes after positional splats also avoid array allocation. Add allocation tests for dynamic keyword keyword hashes after positional splats. setup_args_dup_rest_p is currently fairly conservative. It could definitely be expanded to handle additional node types to reduce allocations in additional cases.
Merged: https://github.com/ruby/ruby/pull/11161
2024-07-11 16:16:40 -07:00 · 2024-07-11 16:16:40 -07:00 · 3de20efc30 · 2024-07-19 05:17:42 +00:00
commit 3de20efc30
parent ca0dae25ed
3 changed files with 169 additions and 41 deletions
--- a/compile.c
+++ b/compile.c
@ -6305,7 +6305,7 @@ keyword_node_single_splat_p(NODE *kwnode)

 static int
 setup_args_core(rb_iseq_t *iseq, LINK_ANCHOR *const args, const NODE *argn,
-                int dup_rest, unsigned int *flag_ptr, struct rb_callinfo_kwarg **kwarg_ptr)
+                unsigned int *dup_rest, unsigned int *flag_ptr, struct rb_callinfo_kwarg **kwarg_ptr)
 {
    if (!argn) return 0;

@ -6331,17 +6331,15 @@ setup_args_core(rb_iseq_t *iseq, LINK_ANCHOR *const args, const NODE *argn,
      case NODE_SPLAT: {
        // f(*a)
        NO_CHECK(COMPILE(args, "args (splat)", RNODE_SPLAT(argn)->nd_head));
-        ADD_INSN1(args, argn, splatarray, RBOOL(dup_rest));
-        if (flag_ptr)  {
-            *flag_ptr |= VM_CALL_ARGS_SPLAT;
-            if (dup_rest) *flag_ptr |= VM_CALL_ARGS_SPLAT_MUT;
-        }
+        ADD_INSN1(args, argn, splatarray, RBOOL(*dup_rest));
+        if (*dup_rest) *dup_rest = 0;
+        if (flag_ptr) *flag_ptr |= VM_CALL_ARGS_SPLAT;
        RUBY_ASSERT(flag_ptr == NULL || (*flag_ptr & VM_CALL_KW_SPLAT) == 0);
        return 1;
      }
      case NODE_ARGSCAT: {
-        if (flag_ptr) *flag_ptr |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_SPLAT_MUT;
-        int argc = setup_args_core(iseq, args, RNODE_ARGSCAT(argn)->nd_head, 1, NULL, NULL);
+        if (flag_ptr) *flag_ptr |= VM_CALL_ARGS_SPLAT;
+        int argc = setup_args_core(iseq, args, RNODE_ARGSCAT(argn)->nd_head, dup_rest, NULL, NULL);
        bool args_pushed = false;

        if (nd_type_p(RNODE_ARGSCAT(argn)->nd_body, NODE_LIST)) {
@ -6356,7 +6354,8 @@ setup_args_core(rb_iseq_t *iseq, LINK_ANCHOR *const args, const NODE *argn,
        }

        if (nd_type_p(RNODE_ARGSCAT(argn)->nd_head, NODE_LIST)) {
-            ADD_INSN1(args, argn, splatarray, Qtrue);
+            ADD_INSN1(args, argn, splatarray, RBOOL(*dup_rest));
+            if (*dup_rest) *dup_rest = 0;
            argc += 1;
        }
        else if (!args_pushed) {
@ -6376,17 +6375,7 @@ setup_args_core(rb_iseq_t *iseq, LINK_ANCHOR *const args, const NODE *argn,
      }
      case NODE_ARGSPUSH: {
        if (flag_ptr) *flag_ptr |= VM_CALL_ARGS_SPLAT;
-        int recurse_dup_rest = 1;
-
-        if (nd_type_p(RNODE_ARGSPUSH(argn)->nd_head, NODE_SPLAT) &&
-                nd_type_p(RNODE_ARGSPUSH(argn)->nd_body, NODE_HASH) &&
-                !RNODE_HASH(RNODE_ARGSPUSH(argn)->nd_body)->nd_brace) {
-            recurse_dup_rest = 0;
-        }
-        else if (flag_ptr) {
-            *flag_ptr |= VM_CALL_ARGS_SPLAT_MUT;
-        }
-        int argc = setup_args_core(iseq, args, RNODE_ARGSPUSH(argn)->nd_head, recurse_dup_rest, NULL, NULL);
+        int argc = setup_args_core(iseq, args, RNODE_ARGSPUSH(argn)->nd_head, dup_rest, NULL, NULL);

        if (nd_type_p(RNODE_ARGSPUSH(argn)->nd_body, NODE_LIST)) {
            int rest_len = compile_args(iseq, args, RNODE_ARGSPUSH(argn)->nd_body, &kwnode);
@ -6422,13 +6411,110 @@ setup_args_core(rb_iseq_t *iseq, LINK_ANCHOR *const args, const NODE *argn,
    }
 }

+static void
+setup_args_splat_mut(unsigned int *flag, int dup_rest, int initial_dup_rest)
+{
+    if ((*flag & VM_CALL_ARGS_SPLAT) && dup_rest != initial_dup_rest) {
+        *flag |= VM_CALL_ARGS_SPLAT_MUT;
+    }
+}
+
+static bool
+setup_args_dup_rest_p(const NODE *argn)
+{
+    switch(nd_type(argn)) {
+      case NODE_LVAR:
+      case NODE_DVAR:
+      case NODE_GVAR:
+      case NODE_IVAR:
+      case NODE_CVAR:
+      case NODE_CONST:
+      case NODE_COLON3:
+      case NODE_INTEGER:
+      case NODE_FLOAT:
+      case NODE_RATIONAL:
+      case NODE_IMAGINARY:
+      case NODE_STR:
+      case NODE_SYM:
+      case NODE_REGX:
+      case NODE_SELF:
+      case NODE_NIL:
+      case NODE_TRUE:
+      case NODE_FALSE:
+      case NODE_LAMBDA:
+      case NODE_NTH_REF:
+      case NODE_BACK_REF:
+        return false;
+      case NODE_COLON2:
+        return setup_args_dup_rest_p(RNODE_COLON2(argn)->nd_head);
+      default:
+        return true;
+    }
+}
+
 static VALUE
 setup_args(rb_iseq_t *iseq, LINK_ANCHOR *const args, const NODE *argn,
           unsigned int *flag, struct rb_callinfo_kwarg **keywords)
 {
    VALUE ret;
+    unsigned int dup_rest = 1, initial_dup_rest;
+
+    if (argn) {
+        const NODE *check_arg = nd_type_p(argn, NODE_BLOCK_PASS) ?
+            RNODE_BLOCK_PASS(argn)->nd_head : argn;
+
+        if (check_arg) {
+            switch(nd_type(check_arg)) {
+              case(NODE_SPLAT):
+                // avoid caller side array allocation for f(*arg)
+                dup_rest = 0;
+                break;
+              case(NODE_ARGSCAT):
+                // avoid caller side array allocation for f(1, *arg)
+                dup_rest = !nd_type_p(RNODE_ARGSCAT(check_arg)->nd_head, NODE_LIST);
+                break;
+              case(NODE_ARGSPUSH):
+                // avoid caller side array allocation for f(*arg, **hash) and f(1, *arg, **hash)
+                dup_rest = !((nd_type_p(RNODE_ARGSPUSH(check_arg)->nd_head, NODE_SPLAT) ||
+                    (nd_type_p(RNODE_ARGSPUSH(check_arg)->nd_head, NODE_ARGSCAT) &&
+                     nd_type_p(RNODE_ARGSCAT(RNODE_ARGSPUSH(check_arg)->nd_head)->nd_head, NODE_LIST))) &&
+                    nd_type_p(RNODE_ARGSPUSH(check_arg)->nd_body, NODE_HASH) &&
+                    !RNODE_HASH(RNODE_ARGSPUSH(check_arg)->nd_body)->nd_brace);
+
+                if (!dup_rest) {
+                    // require allocation for keyword key/value/splat that may modify splatted argument
+                    NODE *node = RNODE_HASH(RNODE_ARGSPUSH(check_arg)->nd_body)->nd_head;
+                    while (node) {
+                        NODE *key_node = RNODE_LIST(node)->nd_head;
+                        if (key_node && setup_args_dup_rest_p(key_node)) {
+                            dup_rest = 1;
+                            break;
+                        }
+
+                        node = RNODE_LIST(node)->nd_next;
+                        NODE *value_node = RNODE_LIST(node)->nd_head;
+                        if (setup_args_dup_rest_p(value_node)) {
+                            dup_rest = 1;
+                            break;
+                        }
+
+                        node = RNODE_LIST(node)->nd_next;
+                    }
+                }
+                break;
+              default:
+                break;
+            }
+        }
+
+        if (!dup_rest && (check_arg != argn) && setup_args_dup_rest_p(RNODE_BLOCK_PASS(argn)->nd_body)) {
+            // require allocation for block pass that may modify splatted argument
+            dup_rest = 1;
+        }
+    }
+    initial_dup_rest = dup_rest;
+
    if (argn && nd_type_p(argn, NODE_BLOCK_PASS)) {
-        unsigned int dup_rest = 1;
        DECL_ANCHOR(arg_block);
        INIT_ANCHOR(arg_block);

@ -6445,12 +6531,13 @@ setup_args(rb_iseq_t *iseq, LINK_ANCHOR *const args, const NODE *argn,
            //   foo(x, y, ...)
            //       ^^^^
            if (nd_type_p(arg_node, NODE_ARGSCAT)) {
-                argc += setup_args_core(iseq, args, RNODE_ARGSCAT(arg_node)->nd_head, dup_rest, flag, keywords);
+                argc += setup_args_core(iseq, args, RNODE_ARGSCAT(arg_node)->nd_head, &dup_rest, flag, keywords);
            }

            *flag |= VM_CALL_FORWARDING;

            ADD_GETLOCAL(args, argn, idx, get_lvar_level(iseq));
+            setup_args_splat_mut(flag, dup_rest, initial_dup_rest);
            return INT2FIX(argc);
        }
        else {
@ -6466,15 +6553,15 @@ setup_args(rb_iseq_t *iseq, LINK_ANCHOR *const args, const NODE *argn,
                if (iobj->insn_id == BIN(getblockparam)) {
                    iobj->insn_id = BIN(getblockparamproxy);
                }
-                dup_rest = 0;
            }
        }
-        ret = INT2FIX(setup_args_core(iseq, args, RNODE_BLOCK_PASS(argn)->nd_head, dup_rest, flag, keywords));
+        ret = INT2FIX(setup_args_core(iseq, args, RNODE_BLOCK_PASS(argn)->nd_head, &dup_rest, flag, keywords));
        ADD_SEQ(args, arg_block);
    }
    else {
-        ret = INT2FIX(setup_args_core(iseq, args, argn, 0, flag, keywords));
+        ret = INT2FIX(setup_args_core(iseq, args, argn, &dup_rest, flag, keywords));
    }
+    setup_args_splat_mut(flag, dup_rest, initial_dup_rest);
    return ret;
 }

--- a/test/ruby/test_allocation.rb
+++ b/test/ruby/test_allocation.rb
@ -123,7 +123,7 @@ class TestAllocation < Test::Unit::TestCase
        check_allocations(0, 0, "required(*r2k_empty_array1#{block})")
        check_allocations(0, 1, "required(*r2k_array#{block})")

-        # Currently allocates 1 array unnecessarily due to splatarray true
+        # Currently allocates 1 array unnecessarily
        check_allocations(1, 1, "required(*empty_array, **hash1, **empty_hash#{block})")
      RUBY
    end
@ -148,7 +148,7 @@ class TestAllocation < Test::Unit::TestCase
        check_allocations(0, 0, "optional(*r2k_empty_array1#{block})")
        check_allocations(0, 1, "optional(*r2k_array#{block})")

-        # Currently allocates 1 array unnecessarily due to splatarray true
+        # Currently allocates 1 array unnecessarily
        check_allocations(1, 1, "optional(*empty_array, **hash1, **empty_hash#{block})")
      RUBY
    end
@ -308,7 +308,6 @@ class TestAllocation < Test::Unit::TestCase
        check_allocations(1, 1, "keyword_splat(*r2k_empty_array#{block})")
        check_allocations(1, 1, "keyword_splat(*r2k_array#{block})")

-        # Currently allocates 1 array unnecessarily due to splatarray true
        check_allocations(0, 1, "keyword_splat(*empty_array, a: 2, **empty_hash#{block})")
        check_allocations(0, 1, "keyword_splat(*empty_array, **hash1, **empty_hash#{block})")
      RUBY
@ -382,10 +381,8 @@ class TestAllocation < Test::Unit::TestCase
        check_allocations(0, 0, "required_and_keyword(*r2k_empty_array1#{block})")
        check_allocations(1, 1, "required_and_keyword(*r2k_array1#{block})")

-        # Currently allocates 1 array unnecessarily due to splatarray true
-        check_allocations(1, 1, "required_and_keyword(1, *empty_array, a: 2, **empty_hash#{block})")
-        check_allocations(1, 1, "required_and_keyword(1, *empty_array, **hash1, **empty_hash#{block})")
-
+        check_allocations(0, 1, "required_and_keyword(1, *empty_array, a: 2, **empty_hash#{block})")
+        check_allocations(0, 1, "required_and_keyword(1, *empty_array, **hash1, **empty_hash#{block})")
        check_allocations(0, 1, "required_and_keyword(*array1, **empty_hash, a: 2#{block})")
        check_allocations(0, 1, "required_and_keyword(*array1, **hash1, **empty_hash#{block})")
        check_allocations(0, 0, "required_and_keyword(*array1, **nil#{block})")
@ -474,9 +471,8 @@ class TestAllocation < Test::Unit::TestCase
        check_allocations(1, 1, "required_and_keyword_splat(*r2k_empty_array1#{block})")
        check_allocations(1, 1, "required_and_keyword_splat(*r2k_array1#{block})")

-        # Currently allocates 1 array unnecessarily due to splatarray true
-        check_allocations(1, 1, "required_and_keyword_splat(1, *empty_array, a: 2, **empty_hash#{block})")
-        check_allocations(1, 1, "required_and_keyword_splat(1, *empty_array, **hash1, **empty_hash#{block})")
+        check_allocations(0, 1, "required_and_keyword_splat(1, *empty_array, a: 2, **empty_hash#{block})")
+        check_allocations(0, 1, "required_and_keyword_splat(1, *empty_array, **hash1, **empty_hash#{block})")
        check_allocations(0, 1, "required_and_keyword_splat(*array1, **empty_hash, a: 2#{block})")
        check_allocations(0, 1, "required_and_keyword_splat(*array1, **hash1, **empty_hash#{block})")
        check_allocations(0, 1, "required_and_keyword_splat(*array1, **nil#{block})")
@ -654,8 +650,8 @@ class TestAllocation < Test::Unit::TestCase
        check_allocations(1, 1, "argument_forwarding(*array1, *empty_array, a: 2, **empty_hash#{block})")
        check_allocations(1, 1, "argument_forwarding(*array1, *empty_array, **hash1, **empty_hash#{block})")

-        check_allocations(1, 1, "argument_forwarding(1, *empty_array, a: 2, **empty_hash#{block})")
-        check_allocations(1, 1, "argument_forwarding(1, *empty_array, **hash1, **empty_hash#{block})")
+        check_allocations(0, 1, "argument_forwarding(1, *empty_array, a: 2, **empty_hash#{block})")
+        check_allocations(0, 1, "argument_forwarding(1, *empty_array, **hash1, **empty_hash#{block})")
        check_allocations(0, 1, "argument_forwarding(*array1, **empty_hash, a: 2#{block})")
        check_allocations(0, 1, "argument_forwarding(*array1, **hash1, **empty_hash#{block})")
        check_allocations(0, 0, "argument_forwarding(*array1, **nil#{block})")
@ -700,8 +696,8 @@ class TestAllocation < Test::Unit::TestCase
        check_allocations(1, 1, "argument_forwarding(*array1, *empty_array, a: 2, **empty_hash#{block})")
        check_allocations(1, 1, "argument_forwarding(*array1, *empty_array, **hash1, **empty_hash#{block})")

-        check_allocations(1, 1, "argument_forwarding(1, *empty_array, a: 2, **empty_hash#{block})")
-        check_allocations(1, 1, "argument_forwarding(1, *empty_array, **hash1, **empty_hash#{block})")
+        check_allocations(0, 1, "argument_forwarding(1, *empty_array, a: 2, **empty_hash#{block})")
+        check_allocations(0, 1, "argument_forwarding(1, *empty_array, **hash1, **empty_hash#{block})")
        check_allocations(0, 1, "argument_forwarding(*array1, **empty_hash, a: 2#{block})")
        check_allocations(0, 1, "argument_forwarding(*array1, **hash1, **empty_hash#{block})")
        check_allocations(0, 0, "argument_forwarding(*array1, **nil#{block})")
@ -763,6 +759,35 @@ class TestAllocation < Test::Unit::TestCase
      RUBY
    end

+    def test_no_array_allocation_with_splat_and_nonstatic_keywords
+      check_allocations(<<~RUBY)
+        def self.keyword(a: nil, b: nil#{block}); end
+
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array#{block})") # LVAR
+        check_allocations(0, 1, "->{keyword(*empty_array, a: empty_array#{block})}.call") # DVAR
+        check_allocations(0, 1, "$x = empty_array;  keyword(*empty_array, a: $x#{block})") # GVAR
+        check_allocations(0, 1, "@x = empty_array; keyword(*empty_array, a: @x#{block})") # IVAR
+        check_allocations(0, 1, "self.class.const_set(:X, empty_array); keyword(*empty_array, a: X#{block})") # CONST
+        check_allocations(0, 1, "keyword(*empty_array, a: Object::X#{block})") # COLON2
+        check_allocations(0, 1, "keyword(*empty_array, a: ::X#{block})") # COLON3
+        check_allocations(0, 1, "T = self; #{'B = block' unless block.empty?}; class Object; @@x = X; T.keyword(*X, a: @@x#{', &B' unless block.empty?}) end") # CVAR
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array, b: 1#{block})") # INTEGER
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array, b: 1.0#{block})") # FLOAT
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array, b: 1.0r#{block})") # RATIONAL
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array, b: 1.0i#{block})") # IMAGINARY
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array, b: 'a'#{block})") # STR
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array, b: :b#{block})") # SYM
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array, b: /a/#{block})") # REGX
+        check_allocations(0, 1, "keyword(*empty_array, a: self#{block})") # SELF
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array, b: nil#{block})") # NIL
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array, b: true#{block})") # TRUE
+        check_allocations(0, 1, "keyword(*empty_array, a: empty_array, b: false#{block})") # FALSE
+        check_allocations(0, 1, "keyword(*empty_array, a: ->{}#{block})") # LAMBDA
+        check_allocations(0, 1, "keyword(*empty_array, a: $1#{block})") # NTH_REF
+        check_allocations(0, 1, "keyword(*empty_array, a: $`#{block})") # BACK_REF
+      RUBY
+    end
+
    class WithBlock < self
      def block
        ', &block'
--- a/test/ruby/test_call.rb
+++ b/test/ruby/test_call.rb
@ -303,7 +303,7 @@ class TestCall < Test::Unit::TestCase
    assert_syntax_error(%q{h[*a, 2, b: 5, **kw] += 1}, message)
  end

-  def test_call_splat_order
+  def test_call_splat_post_order
    bug12860 = '[ruby-core:77701] [Bug# 12860]'
    ary = [1, 2]
    assert_equal([1, 2, 1], aaa(*ary, ary.shift), bug12860)
@ -311,7 +311,7 @@ class TestCall < Test::Unit::TestCase
    assert_equal([0, 1, 2, 1], aaa(0, *ary, ary.shift), bug12860)
  end

-  def test_call_block_order
+  def test_call_splat_block_order
    bug16504 = '[ruby-core:96769] [Bug# 16504]'
    b = proc{}
    ary = [1, 2, b]
@ -320,6 +320,22 @@ class TestCall < Test::Unit::TestCase
    assert_equal([0, 1, 2, b], aaa(0, *ary, &ary.pop), bug16504)
  end

+  def test_call_splat_kw_order
+    b = {}
+    ary = [1, 2, b]
+    assert_equal([1, 2, b, {a: b}], aaa(*ary, a: ary.pop))
+    ary = [1, 2, b]
+    assert_equal([0, 1, 2, b, {a: b}], aaa(0, *ary, a: ary.pop))
+  end
+
+  def test_call_splat_kw_splat_order
+    b = {}
+    ary = [1, 2, b]
+    assert_equal([1, 2, b], aaa(*ary, **ary.pop))
+    ary = [1, 2, b]
+    assert_equal([0, 1, 2, b], aaa(0, *ary, **ary.pop))
+  end
+
  def test_call_args_splat_with_nonhash_keyword_splat
    o = Object.new
    def o.to_hash; {a: 1} end