Expand opt_newarray_send to support Array#pack with buffer keyword arg

Use an enum for the method arg instead of needing to add an id
that doesn't map to an actual method name.

$ ruby --dump=insns -e 'b = "x"; [v].pack("E*", buffer: b)'

before:

```
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,34)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] b@0
0000 putchilledstring                       "x"                       (   1)[Li]
0002 setlocal_WC_0                          b@0
0004 putself
0005 opt_send_without_block                 <calldata!mid:v, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0007 newarray                               1
0009 putchilledstring                       "E*"
0011 getlocal_WC_0                          b@0
0013 opt_send_without_block                 <calldata!mid:pack, argc:2, kw:[#<Symbol:0x000000000023110c>], KWARG>
0015 leave
```

after:

```
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,34)>
local table (size: 1, argc: 0 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
[ 1] b@0
0000 putchilledstring                       "x"                       (   1)[Li]
0002 setlocal_WC_0                          b@0
0004 putself
0005 opt_send_without_block                 <calldata!mid:v, argc:0, FCALL|VCALL|ARGS_SIMPLE>
0007 putchilledstring                       "E*"
0009 getlocal                               b@0, 0
0012 opt_newarray_send                      3, 5
0015 leave
```
This commit is contained in:
Randy Stauner 2024-07-20 10:03:02 -07:00 committed by Alan Wu
parent 86a762ce56
commit acbb8d4fb5
Notes: git 2024-07-29 20:27:16 +00:00
12 changed files with 205 additions and 40 deletions

View File

@ -236,6 +236,48 @@ tests = [
end end
[3, x = 2, 1].min [3, x = 2, 1].min
}, },
[ 'opt_newarray_send', %q{ v = 1.23; [v, v*2].pack("E*").unpack("E*") == [v, v*2] }, ],
[ 'opt_newarray_send', %q{ v = 4.56; b = +"x"; [v, v*2].pack("E*", buffer: b); b[1..].unpack("E*") == [v, v*2] }, ],
[ 'opt_newarray_send', <<-'},', ], # {
v = 7.89;
b = +"x";
class Array
alias _pack pack
def pack(s, buffer: nil, prefix: "y")
buffer ||= +"b"
buffer << prefix
_pack(s, buffer: buffer)
end
end
tests = []
ret = [v].pack("E*", prefix: "z")
tests << (ret[0..1] == "bz")
tests << (ret[2..].unpack("E*") == [v])
ret = [v].pack("E*")
tests << (ret[0..1] == "by")
tests << (ret[2..].unpack("E*") == [v])
[v, v*2, v*3].pack("E*", buffer: b)
tests << (b[0..1] == "xy")
tests << (b[2..].unpack("E*") == [v, v*2, v*3])
class Array
def pack(_fmt, buffer:) = buffer
end
b = nil
tests << [v].pack("E*", buffer: b).nil?
class Array
def pack(_fmt, **kw) = kw.empty?
end
tests << [v].pack("E*") == true
tests.all? or puts tests
},
[ 'throw', %q{ false.tap { break true } }, ], [ 'throw', %q{ false.tap { break true } }, ],
[ 'branchif', %q{ x = nil; x ||= true }, ], [ 'branchif', %q{ x = nil; x ||= true }, ],

View File

@ -5185,3 +5185,19 @@ end
test test
RUBY RUBY
assert_equal '[true, true]', <<~'RUBY'
def pack
v = 1.23
[v, v*2, v*3].pack("E*").unpack("E*") == [v, v*2, v*3]
end
def with_buffer
v = 4.56
b = +"x"
[v, v*2, v*3].pack("E*", buffer: b)
b[1..].unpack("E*") == [v, v*2, v*3]
end
[pack, with_buffer]
RUBY

View File

@ -3996,30 +3996,38 @@ iseq_specialized_instruction(rb_iseq_t *iseq, INSN *iobj)
if (IS_INSN_ID(iobj, newarray) && iobj->link.next && if (IS_INSN_ID(iobj, newarray) && iobj->link.next &&
IS_INSN(iobj->link.next)) { IS_INSN(iobj->link.next)) {
/* /*
* [a, b, ...].max/min -> a, b, c, opt_newarray_max/min * [a, b, ...].max/min -> a, b, c, opt_newarray_send max/min
*/ */
INSN *niobj = (INSN *)iobj->link.next; INSN *niobj = (INSN *)iobj->link.next;
if (IS_INSN_ID(niobj, send)) { if (IS_INSN_ID(niobj, send)) {
const struct rb_callinfo *ci = (struct rb_callinfo *)OPERAND_AT(niobj, 0); const struct rb_callinfo *ci = (struct rb_callinfo *)OPERAND_AT(niobj, 0);
if (vm_ci_simple(ci) && vm_ci_argc(ci) == 0) { if (vm_ci_simple(ci) && vm_ci_argc(ci) == 0) {
VALUE method = INT2FIX(0);
switch (vm_ci_mid(ci)) { switch (vm_ci_mid(ci)) {
case idMax: case idMax:
method = INT2FIX(VM_OPT_NEWARRAY_SEND_MAX);
break;
case idMin: case idMin:
method = INT2FIX(VM_OPT_NEWARRAY_SEND_MIN);
break;
case idHash: case idHash:
{ method = INT2FIX(VM_OPT_NEWARRAY_SEND_HASH);
break;
}
if (method != INT2FIX(0)) {
VALUE num = iobj->operands[0]; VALUE num = iobj->operands[0];
int operand_len = insn_len(BIN(opt_newarray_send)) - 1; int operand_len = insn_len(BIN(opt_newarray_send)) - 1;
iobj->insn_id = BIN(opt_newarray_send); iobj->insn_id = BIN(opt_newarray_send);
iobj->operands = compile_data_calloc2(iseq, operand_len, sizeof(VALUE)); iobj->operands = compile_data_calloc2(iseq, operand_len, sizeof(VALUE));
iobj->operands[0] = num; iobj->operands[0] = num;
iobj->operands[1] = rb_id2sym(vm_ci_mid(ci)); iobj->operands[1] = method;
iobj->operand_size = operand_len; iobj->operand_size = operand_len;
ELEM_REMOVE(&niobj->link); ELEM_REMOVE(&niobj->link);
return COMPILE_OK; return COMPILE_OK;
} }
} }
} }
}
else if ((IS_INSN_ID(niobj, putstring) || IS_INSN_ID(niobj, putchilledstring) || else if ((IS_INSN_ID(niobj, putstring) || IS_INSN_ID(niobj, putchilledstring) ||
(IS_INSN_ID(niobj, putobject) && RB_TYPE_P(OPERAND_AT(niobj, 0), T_STRING))) && (IS_INSN_ID(niobj, putobject) && RB_TYPE_P(OPERAND_AT(niobj, 0), T_STRING))) &&
IS_NEXT_INSN_ID(&niobj->link, send)) { IS_NEXT_INSN_ID(&niobj->link, send)) {
@ -4030,7 +4038,7 @@ iseq_specialized_instruction(rb_iseq_t *iseq, INSN *iobj)
iobj->insn_id = BIN(opt_newarray_send); iobj->insn_id = BIN(opt_newarray_send);
iobj->operands = compile_data_calloc2(iseq, operand_len, sizeof(VALUE)); iobj->operands = compile_data_calloc2(iseq, operand_len, sizeof(VALUE));
iobj->operands[0] = FIXNUM_INC(num, 1); iobj->operands[0] = FIXNUM_INC(num, 1);
iobj->operands[1] = rb_id2sym(vm_ci_mid(ci)); iobj->operands[1] = INT2FIX(VM_OPT_NEWARRAY_SEND_PACK);
iobj->operand_size = operand_len; iobj->operand_size = operand_len;
ELEM_REMOVE(&iobj->link); ELEM_REMOVE(&iobj->link);
ELEM_REMOVE(niobj->link.next); ELEM_REMOVE(niobj->link.next);
@ -4038,6 +4046,32 @@ iseq_specialized_instruction(rb_iseq_t *iseq, INSN *iobj)
return COMPILE_OK; return COMPILE_OK;
} }
} }
// newarray n, putchilledstring "E", getlocal b, send :pack with {buffer: b}
// -> putchilledstring "E", getlocal b, opt_newarray_send n+2, :pack, :buffer
else if ((IS_INSN_ID(niobj, putstring) || IS_INSN_ID(niobj, putchilledstring) ||
(IS_INSN_ID(niobj, putobject) && RB_TYPE_P(OPERAND_AT(niobj, 0), T_STRING))) &&
IS_NEXT_INSN_ID(&niobj->link, getlocal) &&
(niobj->link.next && IS_NEXT_INSN_ID(niobj->link.next, send))) {
const struct rb_callinfo *ci = (struct rb_callinfo *)OPERAND_AT((INSN *)(niobj->link.next)->next, 0);
const struct rb_callinfo_kwarg *kwarg = vm_ci_kwarg(ci);
if (vm_ci_mid(ci) == idPack && vm_ci_argc(ci) == 2 &&
(kwarg && kwarg->keyword_len == 1 && kwarg->keywords[0] == rb_id2sym(idBuffer))) {
VALUE num = iobj->operands[0];
int operand_len = insn_len(BIN(opt_newarray_send)) - 1;
iobj->insn_id = BIN(opt_newarray_send);
iobj->operands = compile_data_calloc2(iseq, operand_len, sizeof(VALUE));
iobj->operands[0] = FIXNUM_INC(num, 2);
iobj->operands[1] = INT2FIX(VM_OPT_NEWARRAY_SEND_PACK_BUFFER);
iobj->operand_size = operand_len;
// Remove the "send" insn.
ELEM_REMOVE((niobj->link.next)->next);
// Remove the modified insn from its original "newarray" position...
ELEM_REMOVE(&iobj->link);
// and insert it after the buffer insn.
ELEM_INSERT_NEXT(niobj->link.next, &iobj->link);
return COMPILE_OK;
}
}
} }
if (IS_INSN_ID(iobj, send)) { if (IS_INSN_ID(iobj, send)) {

View File

@ -60,6 +60,7 @@ firstline, predefined = __LINE__+1, %[\
nil nil
path path
pack pack
buffer
_ UScore _ UScore

View File

@ -983,7 +983,7 @@ opt_str_uminus
DEFINE_INSN DEFINE_INSN
opt_newarray_send opt_newarray_send
(rb_num_t num, ID method) (rb_num_t num, rb_num_t method)
(...) (...)
(VALUE val) (VALUE val)
/* This instruction typically has no funcalls. But it compares array /* This instruction typically has no funcalls. But it compares array
@ -995,17 +995,20 @@ opt_newarray_send
// attr rb_snum_t comptime_sp_inc = 1 - (rb_snum_t)num; // attr rb_snum_t comptime_sp_inc = 1 - (rb_snum_t)num;
{ {
switch(method) { switch(method) {
case idHash: case VM_OPT_NEWARRAY_SEND_HASH:
val = vm_opt_newarray_hash(ec, num, STACK_ADDR_FROM_TOP(num)); val = vm_opt_newarray_hash(ec, num, STACK_ADDR_FROM_TOP(num));
break; break;
case idMin: case VM_OPT_NEWARRAY_SEND_MIN:
val = vm_opt_newarray_min(ec, num, STACK_ADDR_FROM_TOP(num)); val = vm_opt_newarray_min(ec, num, STACK_ADDR_FROM_TOP(num));
break; break;
case idMax: case VM_OPT_NEWARRAY_SEND_MAX:
val = vm_opt_newarray_max(ec, num, STACK_ADDR_FROM_TOP(num)); val = vm_opt_newarray_max(ec, num, STACK_ADDR_FROM_TOP(num));
break; break;
case idPack: case VM_OPT_NEWARRAY_SEND_PACK:
val = rb_vm_opt_newarray_pack(ec, (long)num-1, STACK_ADDR_FROM_TOP(num), TOPN(0)); val = vm_opt_newarray_pack_buffer(ec, (long)num-1, STACK_ADDR_FROM_TOP(num), TOPN(0), Qundef);
break;
case VM_OPT_NEWARRAY_SEND_PACK_BUFFER:
val = vm_opt_newarray_pack_buffer(ec, (long)num-2, STACK_ADDR_FROM_TOP(num), TOPN(1), TOPN(0));
break; break;
default: default:
rb_bug("unreachable"); rb_bug("unreachable");

View File

@ -913,4 +913,27 @@ EXPECTED
assert_equal "oh no", v assert_equal "oh no", v
end; end;
end end
def test_monkey_pack_buffer
assert_separately([], <<-'end;')
$-w = false
class Array
alias :old_pack :pack
def pack _, buffer:; buffer << " no"; end
end
def test
b = +"oh"
[2 ** 15].pack('n', buffer: b)
end
v = test
class Array
alias :pack :old_pack
end
assert_equal "oh no", v
end;
end
end end

View File

@ -1284,6 +1284,14 @@ enum vm_check_match_type {
#define VM_CHECKMATCH_TYPE_MASK 0x03 #define VM_CHECKMATCH_TYPE_MASK 0x03
#define VM_CHECKMATCH_ARRAY 0x04 #define VM_CHECKMATCH_ARRAY 0x04
enum vm_opt_newarray_send_type {
VM_OPT_NEWARRAY_SEND_MAX = 1,
VM_OPT_NEWARRAY_SEND_MIN = 2,
VM_OPT_NEWARRAY_SEND_HASH = 3,
VM_OPT_NEWARRAY_SEND_PACK = 4,
VM_OPT_NEWARRAY_SEND_PACK_BUFFER = 5,
};
enum vm_special_object_type { enum vm_special_object_type {
VM_SPECIAL_OBJECT_VMCORE = 1, VM_SPECIAL_OBJECT_VMCORE = 1,
VM_SPECIAL_OBJECT_CBASE, VM_SPECIAL_OBJECT_CBASE,

View File

@ -6203,17 +6203,43 @@ rb_vm_opt_newarray_hash(rb_execution_context_t *ec, rb_num_t num, const VALUE *p
VALUE rb_setup_fake_ary(struct RArray *fake_ary, const VALUE *list, long len, bool freeze); VALUE rb_setup_fake_ary(struct RArray *fake_ary, const VALUE *list, long len, bool freeze);
VALUE rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer); VALUE rb_ec_pack_ary(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer);
VALUE static VALUE
rb_vm_opt_newarray_pack(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr, VALUE fmt) vm_opt_newarray_pack_buffer(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr, VALUE fmt, VALUE buffer)
{ {
if (BASIC_OP_UNREDEFINED_P(BOP_PACK, ARRAY_REDEFINED_OP_FLAG)) { if (BASIC_OP_UNREDEFINED_P(BOP_PACK, ARRAY_REDEFINED_OP_FLAG)) {
struct RArray fake_ary; struct RArray fake_ary;
VALUE ary = rb_setup_fake_ary(&fake_ary, ptr, num, true); VALUE ary = rb_setup_fake_ary(&fake_ary, ptr, num, true);
return rb_ec_pack_ary(ec, ary, fmt, Qnil); return rb_ec_pack_ary(ec, ary, fmt, (UNDEF_P(buffer) ? Qnil : buffer));
} }
else { else {
return rb_vm_call_with_refinements(ec, rb_ary_new4(num, ptr), idPack, 1, &fmt, RB_PASS_CALLED_KEYWORDS); // The opt_newarray_send insn drops the keyword args so we need to rebuild them.
// Setup an array with room for keyword hash.
VALUE args[2];
args[0] = fmt;
int kw_splat = RB_NO_KEYWORDS;
int argc = 1;
if (!UNDEF_P(buffer)) {
args[1] = rb_hash_new_with_size(1);
rb_hash_aset(args[1], ID2SYM(idBuffer), buffer);
kw_splat = RB_PASS_KEYWORDS;
argc++;
} }
return rb_vm_call_with_refinements(ec, rb_ary_new4(num, ptr), idPack, argc, args, kw_splat);
}
}
VALUE
rb_vm_opt_newarray_pack_buffer(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr, VALUE fmt, VALUE buffer)
{
return vm_opt_newarray_pack_buffer(ec, num, ptr, fmt, buffer);
}
VALUE
rb_vm_opt_newarray_pack(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr, VALUE fmt)
{
return vm_opt_newarray_pack_buffer(ec, num, ptr, fmt, Qundef);
} }
#undef id_cmp #undef id_cmp

View File

@ -300,6 +300,7 @@ fn main() {
.allowlist_type("ruby_tag_type") .allowlist_type("ruby_tag_type")
.allowlist_type("ruby_vm_throw_flags") .allowlist_type("ruby_vm_throw_flags")
.allowlist_type("vm_check_match_type") .allowlist_type("vm_check_match_type")
.allowlist_type("vm_opt_newarray_send_type")
.allowlist_type("rb_iseq_type") .allowlist_type("rb_iseq_type")
// From yjit.c // From yjit.c

View File

@ -4157,47 +4157,56 @@ fn gen_opt_newarray_send(
jit: &mut JITState, jit: &mut JITState,
asm: &mut Assembler, asm: &mut Assembler,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
let method = jit.get_arg(1).as_u64(); let method = jit.get_arg(1).as_u32();
if method == ID!(min) { if method == VM_OPT_NEWARRAY_SEND_MIN {
gen_opt_newarray_min(jit, asm) gen_opt_newarray_min(jit, asm)
} else if method == ID!(max) { } else if method == VM_OPT_NEWARRAY_SEND_MAX {
gen_opt_newarray_max(jit, asm) gen_opt_newarray_max(jit, asm)
} else if method == ID!(hash) { } else if method == VM_OPT_NEWARRAY_SEND_HASH {
gen_opt_newarray_hash(jit, asm) gen_opt_newarray_hash(jit, asm)
} else if method == ID!(pack) { } else if method == VM_OPT_NEWARRAY_SEND_PACK {
gen_opt_newarray_pack(jit, asm) gen_opt_newarray_pack_buffer(jit, asm, 1, None)
} else if method == VM_OPT_NEWARRAY_SEND_PACK_BUFFER {
gen_opt_newarray_pack_buffer(jit, asm, 2, Some(1))
} else { } else {
None None
} }
} }
fn gen_opt_newarray_pack( fn gen_opt_newarray_pack_buffer(
jit: &mut JITState, jit: &mut JITState,
asm: &mut Assembler, asm: &mut Assembler,
fmt_offset: u32,
buffer: Option<u32>,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
// num == 4 ( for this code ) asm_comment!(asm, "opt_newarray_send pack");
let num = jit.get_arg(0).as_u32(); let num = jit.get_arg(0).as_u32();
// Save the PC and SP because we may call #pack // Save the PC and SP because we may call #pack
jit_prepare_non_leaf_call(jit, asm); jit_prepare_non_leaf_call(jit, asm);
extern "C" { extern "C" {
fn rb_vm_opt_newarray_pack(ec: EcPtr, num: u32, elts: *const VALUE, fmt: VALUE) -> VALUE; fn rb_vm_opt_newarray_pack_buffer(ec: EcPtr, num: u32, elts: *const VALUE, fmt: VALUE, buffer: VALUE) -> VALUE;
} }
let values_opnd = asm.ctx.sp_opnd(-(num as i32)); let values_opnd = asm.ctx.sp_opnd(-(num as i32));
let values_ptr = asm.lea(values_opnd); let values_ptr = asm.lea(values_opnd);
let fmt_string = asm.ctx.sp_opnd(-1); let fmt_string = asm.ctx.sp_opnd(-(fmt_offset as i32));
let val_opnd = asm.ccall( let val_opnd = asm.ccall(
rb_vm_opt_newarray_pack as *const u8, rb_vm_opt_newarray_pack_buffer as *const u8,
vec![ vec![
EC, EC,
(num - 1).into(), (num - fmt_offset).into(),
values_ptr, values_ptr,
fmt_string fmt_string,
match buffer {
None => Qundef.into(),
Some(i) => asm.ctx.sp_opnd(-(i as i32)),
},
], ],
); );

View File

@ -799,10 +799,6 @@ pub(crate) mod ids {
def_ids! { def_ids! {
name: NULL content: b"" name: NULL content: b""
name: min content: b"min"
name: max content: b"max"
name: hash content: b"hash"
name: pack content: b"pack"
name: respond_to_missing content: b"respond_to_missing?" name: respond_to_missing content: b"respond_to_missing?"
name: to_ary content: b"to_ary" name: to_ary content: b"to_ary"
name: eq content: b"==" name: eq content: b"=="

View File

@ -601,6 +601,12 @@ pub const VM_CHECKMATCH_TYPE_WHEN: vm_check_match_type = 1;
pub const VM_CHECKMATCH_TYPE_CASE: vm_check_match_type = 2; pub const VM_CHECKMATCH_TYPE_CASE: vm_check_match_type = 2;
pub const VM_CHECKMATCH_TYPE_RESCUE: vm_check_match_type = 3; pub const VM_CHECKMATCH_TYPE_RESCUE: vm_check_match_type = 3;
pub type vm_check_match_type = u32; pub type vm_check_match_type = u32;
pub const VM_OPT_NEWARRAY_SEND_MAX: vm_opt_newarray_send_type = 1;
pub const VM_OPT_NEWARRAY_SEND_MIN: vm_opt_newarray_send_type = 2;
pub const VM_OPT_NEWARRAY_SEND_HASH: vm_opt_newarray_send_type = 3;
pub const VM_OPT_NEWARRAY_SEND_PACK: vm_opt_newarray_send_type = 4;
pub const VM_OPT_NEWARRAY_SEND_PACK_BUFFER: vm_opt_newarray_send_type = 5;
pub type vm_opt_newarray_send_type = u32;
pub const VM_SPECIAL_OBJECT_VMCORE: vm_special_object_type = 1; pub const VM_SPECIAL_OBJECT_VMCORE: vm_special_object_type = 1;
pub const VM_SPECIAL_OBJECT_CBASE: vm_special_object_type = 2; pub const VM_SPECIAL_OBJECT_CBASE: vm_special_object_type = 2;
pub const VM_SPECIAL_OBJECT_CONST_BASE: vm_special_object_type = 3; pub const VM_SPECIAL_OBJECT_CONST_BASE: vm_special_object_type = 3;