diff --git a/.github/workflows/bundled_gems.yml b/.github/workflows/bundled_gems.yml index 28e9b9f490..233f624453 100644 --- a/.github/workflows/bundled_gems.yml +++ b/.github/workflows/bundled_gems.yml @@ -104,7 +104,7 @@ jobs: timeout-minutes: 30 env: RUBY_TESTOPTS: '-q --tty=no' - TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof' + TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof,rbs,repl_type_completor' if: ${{ steps.diff.outputs.gems }} - name: Commit diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 6220df9f56..2bfb7e037e 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -143,7 +143,7 @@ jobs: timeout-minutes: 60 env: RUBY_TESTOPTS: '-q --tty=no' - TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof' + TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof,rbs,repl_type_completor' PRECHECK_BUNDLED_GEMS: 'no' - name: make skipped tests diff --git a/.github/workflows/modgc.yml b/.github/workflows/modgc.yml index e7a1efe9d2..4cf85ef499 100644 --- a/.github/workflows/modgc.yml +++ b/.github/workflows/modgc.yml @@ -154,7 +154,7 @@ jobs: timeout-minutes: ${{ matrix.gc.timeout || 40 }} env: RUBY_TESTOPTS: '-q --tty=no' - TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof' + TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof,rbs,repl_type_completor' PRECHECK_BUNDLED_GEMS: 'no' - name: make skipped tests diff --git a/.github/workflows/parsey.yml b/.github/workflows/parsey.yml index eec4c81188..474ec722ff 100644 --- a/.github/workflows/parsey.yml +++ b/.github/workflows/parsey.yml @@ -82,7 +82,7 @@ jobs: EXCLUDES: '../src/test/.excludes-parsey' RUN_OPTS: ${{ matrix.run_opts || '--parser=parse.y' }} SPECOPTS: ${{ matrix.specopts || '-T --parser=parse.y' }} - TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof' + TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof,rbs,repl_type_completor' - uses: ./.github/actions/slack with: diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 8c72415cfb..6eb7765856 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -130,7 +130,7 @@ jobs: timeout-minutes: ${{ matrix.timeout || 40 }} env: RUBY_TESTOPTS: '-q --tty=no' - TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof' + TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof,rbs,repl_type_completor' PRECHECK_BUNDLED_GEMS: 'no' - name: make skipped tests diff --git a/.github/workflows/yjit-macos.yml b/.github/workflows/yjit-macos.yml index 10279ebb3d..a7700ffc1c 100644 --- a/.github/workflows/yjit-macos.yml +++ b/.github/workflows/yjit-macos.yml @@ -147,7 +147,7 @@ jobs: timeout-minutes: 60 env: RUBY_TESTOPTS: '-q --tty=no' - TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof' + TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof,rbs,repl_type_completor' SYNTAX_SUGGEST_TIMEOUT: '5' PRECHECK_BUNDLED_GEMS: 'no' continue-on-error: ${{ matrix.continue-on-test_task || false }} diff --git a/.github/workflows/yjit-ubuntu.yml b/.github/workflows/yjit-ubuntu.yml index 7e4fd85965..dd1f745944 100644 --- a/.github/workflows/yjit-ubuntu.yml +++ b/.github/workflows/yjit-ubuntu.yml @@ -195,7 +195,7 @@ jobs: timeout-minutes: 90 env: RUBY_TESTOPTS: '-q --tty=no' - TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof' + TEST_BUNDLED_GEMS_ALLOW_FAILURES: 'typeprof,rbs,repl_type_completor' PRECHECK_BUNDLED_GEMS: 'no' SYNTAX_SUGGEST_TIMEOUT: '5' YJIT_BINDGEN_DIFF_OPTS: '--exit-code' diff --git a/NEWS.md b/NEWS.md index 34efd40d2e..d370606f2b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -25,6 +25,11 @@ Note: We're only listing outstanding class updates. * `IO.select` accepts +Float::INFINITY+ as a timeout argument. [[Feature #20610]] +* Set + + * Set is now a core class, instead of an autoloaded stdlib class. + [[Feature #21216]] + * String * Update Unicode to Version 16.0.0 and Emoji Version 16.0. [[Feature #19908]][[Feature #20724]] @@ -98,3 +103,4 @@ The following bundled gems are updated. [Feature #20724]: https://bugs.ruby-lang.org/issues/20724 [Feature #21047]: https://bugs.ruby-lang.org/issues/21047 [Bug #21049]: https://bugs.ruby-lang.org/issues/21049 +[Feature #21216]: https://bugs.ruby-lang.org/issues/21216 diff --git a/benchmark/set.yml b/benchmark/set.yml new file mode 100644 index 0000000000..43217036e2 --- /dev/null +++ b/benchmark/set.yml @@ -0,0 +1,265 @@ +prelude: | + # First 1000 digits of pi + pi = <<~END.gsub(/\D/, '') + 31415926535897932384626433832795028841971693993751058209749445923078164062862089 + 98628034825342117067982148086513282306647093844609550582231725359408128481117450 + 28410270193852110555964462294895493038196442881097566593344612847564823378678316 + 52712019091456485669234603486104543266482133936072602491412737245870066063155881 + 74881520920962829254091715364367892590360011330530548820466521384146951941511609 + 43305727036575959195309218611738193261179310511854807446237996274956735188575272 + 48912279381830119491298336733624406566430860213949463952247371907021798609437027 + 70539217176293176752384674818467669405132000568127145263560827785771342757789609 + 17363717872146844090122495343014654958537105079227968925892354201995611212902196 + 08640344181598136297747713099605187072113499999983729780499510597317328160963185 + 95024459455346908302642522308253344685035261931188171010003137838752886587533208 + 38142061717766914730359825349042875546873115956286388235378759375195778185778053 + 21712268066130019278766111959092164201989380952572010654505906988788448549 + END + array1 = 10.times.flat_map do |i| + pi[i...].chars.each_slice(10).map(&:join) + end + array2 = array1.map(&:reverse) + array1.map!(&:to_i) + array2.map!(&:to_i) + a1 = array1[...10] + a2 = array1[...100] + a3 = array1 + oa1 = array2[...10] + oa2 = array2[...100] + oa3 = array2 + s0 = Set.new + s0 = Set.new + s1 = Set.new(a1) + s2 = Set.new(a2) + s3 = Set.new(a3) + o0 = Set.new + o1 = Set.new(array2[...10]) + o2 = Set.new(array2[...100]) + o3 = Set.new(array2) + d0 = s0.dup + d1 = s1.dup + d2 = s2.dup + d3 = s3.dup + ss1 = s1 - a1[-1..-1] + ss2 = s2 - a2[-1..-1] + ss3 = s3 - a3[-1..-1] + os1 = o1 - oa1[-1..-1] + os2 = o2 - oa2[-1..-1] + os3 = o3 - oa3[-1..-1] + member = a1.first + cbi = s0.dup.compare_by_identity + ns = Set[s3, o3, d3] + set_subclass = Class.new(Set) + +benchmark: + new_0: Set.new + new_10: Set.new(a1) + new_100: Set.new(a2) + new_1000: Set.new(a3) + aref_0: Set[] + aref_10: Set[*a1] + aref_100: Set[*a2] + aref_1000: Set[*a3] + amp_0: s0 & o0 + amp_10: s1 & o1 + amp_100: s2 & o2 + amp_1000: s3 & o3 + amp_same_0: s0 & d0 + amp_same_10: s1 & d1 + amp_same_100: s2 & d2 + amp_same_1000: s3 & d3 + minus_0: s0 - o0 + minus_10: s1 - o1 + minus_100: s2 - o2 + minus_1000: s3 - o3 + minus_same_0: s0 - d0 + minus_same_10: s1 - d1 + minus_same_100: s2 - d2 + minus_same_1000: s3 - d3 + spaceship_0: s0 <=> o0 + spaceship_diff_10: s1 <=> o1 + spaceship_diff_100: s2 <=> o2 + spaceship_diff_1000: s2 <=> o3 + spaceship_sub_10: s1 <=> ss1 + spaceship_sub_100: s2 <=> ss2 + spaceship_sub_1000: s2 <=> ss3 + spaceship_sup_10: ss1 <=> s1 + spaceship_sup_100: ss2 <=> s2 + spaceship_sup_1000: ss2 <=> s3 + eq_0: s0 == o0 + eq_10: s1 == o1 + eq_100: s2 == o2 + eq_1000: s3 == o3 + eq_same_0: s0 == d0 + eq_same_10: s1 == d1 + eq_same_100: s2 == d2 + eq_same_1000: s3 == d3 + xor_0: s0 ^ o0 + xor_10: s1 ^ o1 + xor_100: s2 ^ o2 + xor_1000: s3 ^ o3 + xor_same_0: s0 ^ d0 + xor_same_10: s1 ^ d1 + xor_same_100: s2 ^ d2 + xor_same_1000: s3 ^ d3 + pipe_0: s0 | o0 + pipe_10: s1 | o1 + pipe_100: s2 | o2 + pipe_1000: s3 | o3 + pipe_same_0: s0 | d0 + pipe_same_10: s1 | d1 + pipe_same_100: s2 | d2 + pipe_same_1000: s3 | d3 + add: a3.each { s0.add(it) } + add_exist: a3.each { s3.add(it) } + addq: a3.each { s0.add?(it) } + addq_exist: a3.each { s3.add?(it) } + classify_0: s0.classify { it } + classify_10: s1.classify { it & 2 } + classify_100: s2.classify { it & 8 } + classify_1000: s3.classify { it & 32 } + clear: s0.clear + collect_0: s0.collect! { it } + collect_10: s1.collect! { it } + collect_100: s2.collect! { it } + collect_1000: s3.collect! { it } + compare_by_identity_0: s0.dup.compare_by_identity + compare_by_identity_10: s1.dup.compare_by_identity + compare_by_identity_100: s2.dup.compare_by_identity + compare_by_identity_1000: s3.dup.compare_by_identity + compare_by_identityq_false: s0.compare_by_identity? + compare_by_identityq_true: cbi.compare_by_identity? + clone_0: s0.clone + clone_10: s1.clone + clone_100: s2.clone + clone_1000: s3.clone + delete: a3.each { s3.delete(it) } + delete_not_exist: a3.each { o3.delete(it) } + deleteq: a3.each { s3.delete?(it) } + deleteq_not_exist: a3.each { o3.delete?(it) } + delete_if_0: s0.delete_if { it } + delete_if_10: s1.delete_if { it & 2 == 0 } + delete_if_100: s2.delete_if { it & 2 == 0 } + delete_if_1000: s3.delete_if { it & 2 == 0 } + disjoint_0: s0.disjoint? o0 + disjoint_10: s1.disjoint? o1 + disjoint_100: s2.disjoint? o2 + disjoint_1000: s3.disjoint? o3 + disjoint_same_0: s0.disjoint? d0 + disjoint_same_10: s1.disjoint? d1 + disjoint_same_100: s2.disjoint? d2 + disjoint_same_1000: s3.disjoint? d3 + divide_1arity_0: s0.divide { true } + divide_1arity_10: s1.divide { it & 2 } + divide_1arity_100: s2.divide { it & 8 } + divide_1arity_1000: s3.divide { it & 32 } + divide_2arity_0: s0.divide { true } + divide_2arity_10: s1.divide { (_1 & 2) == (_2 & 2) } + divide_2arity_100: s2.divide { (_1 & 8) == (_2 & 8) } + divide_2arity_1000: s3.divide { (_1 & 32) == (_2 & 32) } + dup_0: s0.dup + dup_10: s1.dup + dup_100: s2.dup + dup_1000: s3.dup + each_0: s0.each { it } + each_10: s1.each { it } + each_100: s2.each { it } + each_1000: s3.each { it } + empty_true: s0.empty? + empty_false: s3.empty? + flatten: ns.flatten + flattenb: ns.flatten! + include_true_0: s0.include? member + include_true_10: s1.include? member + include_true_100: s2.include? member + include_true_1000: s3.include? member + include_false_0: s0.include?(-1) + include_false_10: s1.include?(-1) + include_false_100: s2.include?(-1) + include_false_1000: s3.include?(-1) + intersect_0: s0.intersect? o0 + intersect_10: s1.intersect? o1 + intersect_100: s2.intersect? o2 + intersect_1000: s3.intersect? o3 + intersect_same_0: s0.intersect? d0 + intersect_same_10: s1.intersect? d1 + intersect_same_100: s2.intersect? d2 + intersect_same_1000: s3.intersect? d3 + join_0: s0.join + join_10: s1.join + join_100: s2.join + join_1000: s3.join + join_arg_0: s0.join "" + join_arg_10: s1.join "" + join_arg_100: s2.join "" + join_arg_1000: s3.join "" + keep_if_0: s0.keep_if { it } + keep_if_10: s1.keep_if { it & 2 == 0 } + keep_if_100: s2.keep_if { it & 2 == 0 } + keep_if_1000: s3.keep_if { it & 2 == 0 } + merge_set: s0.dup.merge(s3, o3) + merge_enum: s0.dup.merge(array1, array2) + proper_subset_0: s0.proper_subset? s0 + proper_subset_10: s1.proper_subset? ss1 + proper_subset_100: s2.proper_subset? ss2 + proper_subset_1000: s3.proper_subset? ss3 + proper_subset_false_10: s1.proper_subset? os1 + proper_subset_false_100: s2.proper_subset? os2 + proper_subset_false_1000: s3.proper_subset? os3 + proper_superset_0: s0.proper_superset? s0 + proper_superset_10: ss1.proper_superset? s1 + proper_superset_100: ss2.proper_superset? s2 + proper_superset_1000: ss3.proper_superset? s3 + proper_superset_false_10: os1.proper_superset? s1 + proper_superset_false_100: os2.proper_superset? s2 + proper_superset_false_1000: os3.proper_superset? s3 + reject_0: s0.reject! { it } + reject_10: s1.reject! { it & 2 == 0 } + reject_100: s2.reject! { it & 2 == 0 } + reject_1000: s3.reject! { it & 2 == 0 } + replace_0: s = Set.new; array1.each { s.replace(s0) } + replace_10: s = Set.new; array1.each { s.replace(s1) } + replace_100: s = Set.new; array1.each { s.replace(s2) } + replace_1000: s = Set.new; array1.each { s.replace(s3) } + reset_0: s0.reset + reset_10: s1.reset + reset_100: s2.reset + reset_1000: s3.reset + select_0: s0.select! { it } + select_10: s1.select! { it & 2 == 0 } + select_100: s2.select! { it & 2 == 0 } + select_1000: s3.select! { it & 2 == 0 } + size_0: s0.size + size_10: s1.size + size_100: s2.size + size_1000: s3.size + subtract_set: s3.dup.subtract(os3) + subtract_enum: s3.dup.subtract(oa3) + subtract_same_set: s3.dup.subtract(s3) + subtract_same_enum: s3.dup.subtract(a3) + subset_0: s0.subset? s0 + subset_10: s1.subset? ss1 + subset_100: s2.subset? ss2 + subset_1000: s3.subset? ss3 + subset_false_10: s1.subset? os1 + subset_false_100: s2.subset? os2 + subset_false_1000: s3.subset? os3 + superset_0: s0.superset? s0 + superset_10: ss1.superset? s1 + superset_100: ss2.superset? s2 + superset_1000: ss3.superset? s3 + superset_false_10: os1.superset? s1 + superset_false_100: os2.superset? s2 + superset_false_1000: os3.superset? s3 + to_a_0: s0.to_a + to_a_10: s1.to_a + to_a_100: s2.to_a + to_a_1000: s3.to_a + to_set_0: s0.to_set + to_set_10: s1.to_set + to_set_100: s2.to_set + to_set_1000: s3.to_set + to_set_arg_0: s0.to_set set_subclass + to_set_arg_10: s1.to_set set_subclass + to_set_arg_100: s2.to_set set_subclass + to_set_arg_1000: s3.to_set set_subclass diff --git a/common.mk b/common.mk index c6a7f9a58e..d8d8a1655b 100644 --- a/common.mk +++ b/common.mk @@ -167,6 +167,7 @@ COMMONOBJS = array.$(OBJEXT) \ signal.$(OBJEXT) \ sprintf.$(OBJEXT) \ st.$(OBJEXT) \ + set.$(OBJEXT) \ strftime.$(OBJEXT) \ string.$(OBJEXT) \ struct.$(OBJEXT) \ @@ -16237,6 +16238,212 @@ scheduler.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h scheduler.$(OBJEXT): {$(VPATH)}thread_native.h scheduler.$(OBJEXT): {$(VPATH)}vm_core.h scheduler.$(OBJEXT): {$(VPATH)}vm_opts.h +set.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h +set.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h +set.$(OBJEXT): $(CCAN_DIR)/list/list.h +set.$(OBJEXT): $(CCAN_DIR)/str/str.h +set.$(OBJEXT): $(hdrdir)/ruby/ruby.h +set.$(OBJEXT): $(top_srcdir)/internal/array.h +set.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h +set.$(OBJEXT): $(top_srcdir)/internal/bits.h +set.$(OBJEXT): $(top_srcdir)/internal/compilers.h +set.$(OBJEXT): $(top_srcdir)/internal/gc.h +set.$(OBJEXT): $(top_srcdir)/internal/hash.h +set.$(OBJEXT): $(top_srcdir)/internal/imemo.h +set.$(OBJEXT): $(top_srcdir)/internal/proc.h +set.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h +set.$(OBJEXT): $(top_srcdir)/internal/serial.h +set.$(OBJEXT): $(top_srcdir)/internal/set_table.h +set.$(OBJEXT): $(top_srcdir)/internal/static_assert.h +set.$(OBJEXT): $(top_srcdir)/internal/symbol.h +set.$(OBJEXT): $(top_srcdir)/internal/variable.h +set.$(OBJEXT): $(top_srcdir)/internal/vm.h +set.$(OBJEXT): $(top_srcdir)/internal/warnings.h +set.$(OBJEXT): {$(VPATH)}assert.h +set.$(OBJEXT): {$(VPATH)}atomic.h +set.$(OBJEXT): {$(VPATH)}backward/2/assume.h +set.$(OBJEXT): {$(VPATH)}backward/2/attributes.h +set.$(OBJEXT): {$(VPATH)}backward/2/bool.h +set.$(OBJEXT): {$(VPATH)}backward/2/gcc_version_since.h +set.$(OBJEXT): {$(VPATH)}backward/2/inttypes.h +set.$(OBJEXT): {$(VPATH)}backward/2/limits.h +set.$(OBJEXT): {$(VPATH)}backward/2/long_long.h +set.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h +set.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h +set.$(OBJEXT): {$(VPATH)}config.h +set.$(OBJEXT): {$(VPATH)}constant.h +set.$(OBJEXT): {$(VPATH)}defines.h +set.$(OBJEXT): {$(VPATH)}encoding.h +set.$(OBJEXT): {$(VPATH)}id.h +set.$(OBJEXT): {$(VPATH)}id_table.h +set.$(OBJEXT): {$(VPATH)}intern.h +set.$(OBJEXT): {$(VPATH)}internal.h +set.$(OBJEXT): {$(VPATH)}internal/abi.h +set.$(OBJEXT): {$(VPATH)}internal/anyargs.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/char.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/double.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/fixnum.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/gid_t.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/int.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/intptr_t.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/long.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/long_long.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/mode_t.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/off_t.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/pid_t.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/short.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/size_t.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/st_data_t.h +set.$(OBJEXT): {$(VPATH)}internal/arithmetic/uid_t.h +set.$(OBJEXT): {$(VPATH)}internal/assume.h +set.$(OBJEXT): {$(VPATH)}internal/attr/alloc_size.h +set.$(OBJEXT): {$(VPATH)}internal/attr/artificial.h +set.$(OBJEXT): {$(VPATH)}internal/attr/cold.h +set.$(OBJEXT): {$(VPATH)}internal/attr/const.h +set.$(OBJEXT): {$(VPATH)}internal/attr/constexpr.h +set.$(OBJEXT): {$(VPATH)}internal/attr/deprecated.h +set.$(OBJEXT): {$(VPATH)}internal/attr/diagnose_if.h +set.$(OBJEXT): {$(VPATH)}internal/attr/enum_extensibility.h +set.$(OBJEXT): {$(VPATH)}internal/attr/error.h +set.$(OBJEXT): {$(VPATH)}internal/attr/flag_enum.h +set.$(OBJEXT): {$(VPATH)}internal/attr/forceinline.h +set.$(OBJEXT): {$(VPATH)}internal/attr/format.h +set.$(OBJEXT): {$(VPATH)}internal/attr/maybe_unused.h +set.$(OBJEXT): {$(VPATH)}internal/attr/noalias.h +set.$(OBJEXT): {$(VPATH)}internal/attr/nodiscard.h +set.$(OBJEXT): {$(VPATH)}internal/attr/noexcept.h +set.$(OBJEXT): {$(VPATH)}internal/attr/noinline.h +set.$(OBJEXT): {$(VPATH)}internal/attr/nonnull.h +set.$(OBJEXT): {$(VPATH)}internal/attr/noreturn.h +set.$(OBJEXT): {$(VPATH)}internal/attr/packed_struct.h +set.$(OBJEXT): {$(VPATH)}internal/attr/pure.h +set.$(OBJEXT): {$(VPATH)}internal/attr/restrict.h +set.$(OBJEXT): {$(VPATH)}internal/attr/returns_nonnull.h +set.$(OBJEXT): {$(VPATH)}internal/attr/warning.h +set.$(OBJEXT): {$(VPATH)}internal/attr/weakref.h +set.$(OBJEXT): {$(VPATH)}internal/cast.h +set.$(OBJEXT): {$(VPATH)}internal/compiler_is.h +set.$(OBJEXT): {$(VPATH)}internal/compiler_is/apple.h +set.$(OBJEXT): {$(VPATH)}internal/compiler_is/clang.h +set.$(OBJEXT): {$(VPATH)}internal/compiler_is/gcc.h +set.$(OBJEXT): {$(VPATH)}internal/compiler_is/intel.h +set.$(OBJEXT): {$(VPATH)}internal/compiler_is/msvc.h +set.$(OBJEXT): {$(VPATH)}internal/compiler_is/sunpro.h +set.$(OBJEXT): {$(VPATH)}internal/compiler_since.h +set.$(OBJEXT): {$(VPATH)}internal/config.h +set.$(OBJEXT): {$(VPATH)}internal/constant_p.h +set.$(OBJEXT): {$(VPATH)}internal/core.h +set.$(OBJEXT): {$(VPATH)}internal/core/rarray.h +set.$(OBJEXT): {$(VPATH)}internal/core/rbasic.h +set.$(OBJEXT): {$(VPATH)}internal/core/rbignum.h +set.$(OBJEXT): {$(VPATH)}internal/core/rclass.h +set.$(OBJEXT): {$(VPATH)}internal/core/rdata.h +set.$(OBJEXT): {$(VPATH)}internal/core/rfile.h +set.$(OBJEXT): {$(VPATH)}internal/core/rhash.h +set.$(OBJEXT): {$(VPATH)}internal/core/robject.h +set.$(OBJEXT): {$(VPATH)}internal/core/rregexp.h +set.$(OBJEXT): {$(VPATH)}internal/core/rstring.h +set.$(OBJEXT): {$(VPATH)}internal/core/rstruct.h +set.$(OBJEXT): {$(VPATH)}internal/core/rtypeddata.h +set.$(OBJEXT): {$(VPATH)}internal/ctype.h +set.$(OBJEXT): {$(VPATH)}internal/dllexport.h +set.$(OBJEXT): {$(VPATH)}internal/dosish.h +set.$(OBJEXT): {$(VPATH)}internal/encoding/coderange.h +set.$(OBJEXT): {$(VPATH)}internal/encoding/ctype.h +set.$(OBJEXT): {$(VPATH)}internal/encoding/encoding.h +set.$(OBJEXT): {$(VPATH)}internal/encoding/pathname.h +set.$(OBJEXT): {$(VPATH)}internal/encoding/re.h +set.$(OBJEXT): {$(VPATH)}internal/encoding/sprintf.h +set.$(OBJEXT): {$(VPATH)}internal/encoding/string.h +set.$(OBJEXT): {$(VPATH)}internal/encoding/symbol.h +set.$(OBJEXT): {$(VPATH)}internal/encoding/transcode.h +set.$(OBJEXT): {$(VPATH)}internal/error.h +set.$(OBJEXT): {$(VPATH)}internal/eval.h +set.$(OBJEXT): {$(VPATH)}internal/event.h +set.$(OBJEXT): {$(VPATH)}internal/fl_type.h +set.$(OBJEXT): {$(VPATH)}internal/gc.h +set.$(OBJEXT): {$(VPATH)}internal/glob.h +set.$(OBJEXT): {$(VPATH)}internal/globals.h +set.$(OBJEXT): {$(VPATH)}internal/has/attribute.h +set.$(OBJEXT): {$(VPATH)}internal/has/builtin.h +set.$(OBJEXT): {$(VPATH)}internal/has/c_attribute.h +set.$(OBJEXT): {$(VPATH)}internal/has/cpp_attribute.h +set.$(OBJEXT): {$(VPATH)}internal/has/declspec_attribute.h +set.$(OBJEXT): {$(VPATH)}internal/has/extension.h +set.$(OBJEXT): {$(VPATH)}internal/has/feature.h +set.$(OBJEXT): {$(VPATH)}internal/has/warning.h +set.$(OBJEXT): {$(VPATH)}internal/intern/array.h +set.$(OBJEXT): {$(VPATH)}internal/intern/bignum.h +set.$(OBJEXT): {$(VPATH)}internal/intern/class.h +set.$(OBJEXT): {$(VPATH)}internal/intern/compar.h +set.$(OBJEXT): {$(VPATH)}internal/intern/complex.h +set.$(OBJEXT): {$(VPATH)}internal/intern/cont.h +set.$(OBJEXT): {$(VPATH)}internal/intern/dir.h +set.$(OBJEXT): {$(VPATH)}internal/intern/enum.h +set.$(OBJEXT): {$(VPATH)}internal/intern/enumerator.h +set.$(OBJEXT): {$(VPATH)}internal/intern/error.h +set.$(OBJEXT): {$(VPATH)}internal/intern/eval.h +set.$(OBJEXT): {$(VPATH)}internal/intern/file.h +set.$(OBJEXT): {$(VPATH)}internal/intern/hash.h +set.$(OBJEXT): {$(VPATH)}internal/intern/io.h +set.$(OBJEXT): {$(VPATH)}internal/intern/load.h +set.$(OBJEXT): {$(VPATH)}internal/intern/marshal.h +set.$(OBJEXT): {$(VPATH)}internal/intern/numeric.h +set.$(OBJEXT): {$(VPATH)}internal/intern/object.h +set.$(OBJEXT): {$(VPATH)}internal/intern/parse.h +set.$(OBJEXT): {$(VPATH)}internal/intern/proc.h +set.$(OBJEXT): {$(VPATH)}internal/intern/process.h +set.$(OBJEXT): {$(VPATH)}internal/intern/random.h +set.$(OBJEXT): {$(VPATH)}internal/intern/range.h +set.$(OBJEXT): {$(VPATH)}internal/intern/rational.h +set.$(OBJEXT): {$(VPATH)}internal/intern/re.h +set.$(OBJEXT): {$(VPATH)}internal/intern/ruby.h +set.$(OBJEXT): {$(VPATH)}internal/intern/select.h +set.$(OBJEXT): {$(VPATH)}internal/intern/select/largesize.h +set.$(OBJEXT): {$(VPATH)}internal/intern/signal.h +set.$(OBJEXT): {$(VPATH)}internal/intern/sprintf.h +set.$(OBJEXT): {$(VPATH)}internal/intern/string.h +set.$(OBJEXT): {$(VPATH)}internal/intern/struct.h +set.$(OBJEXT): {$(VPATH)}internal/intern/thread.h +set.$(OBJEXT): {$(VPATH)}internal/intern/time.h +set.$(OBJEXT): {$(VPATH)}internal/intern/variable.h +set.$(OBJEXT): {$(VPATH)}internal/intern/vm.h +set.$(OBJEXT): {$(VPATH)}internal/interpreter.h +set.$(OBJEXT): {$(VPATH)}internal/iterator.h +set.$(OBJEXT): {$(VPATH)}internal/memory.h +set.$(OBJEXT): {$(VPATH)}internal/method.h +set.$(OBJEXT): {$(VPATH)}internal/module.h +set.$(OBJEXT): {$(VPATH)}internal/newobj.h +set.$(OBJEXT): {$(VPATH)}internal/scan_args.h +set.$(OBJEXT): {$(VPATH)}internal/set_table.h +set.$(OBJEXT): {$(VPATH)}internal/special_consts.h +set.$(OBJEXT): {$(VPATH)}internal/static_assert.h +set.$(OBJEXT): {$(VPATH)}internal/stdalign.h +set.$(OBJEXT): {$(VPATH)}internal/stdbool.h +set.$(OBJEXT): {$(VPATH)}internal/stdckdint.h +set.$(OBJEXT): {$(VPATH)}internal/symbol.h +set.$(OBJEXT): {$(VPATH)}internal/value.h +set.$(OBJEXT): {$(VPATH)}internal/value_type.h +set.$(OBJEXT): {$(VPATH)}internal/variable.h +set.$(OBJEXT): {$(VPATH)}internal/warning_push.h +set.$(OBJEXT): {$(VPATH)}internal/xmalloc.h +set.$(OBJEXT): {$(VPATH)}method.h +set.$(OBJEXT): {$(VPATH)}missing.h +set.$(OBJEXT): {$(VPATH)}node.h +set.$(OBJEXT): {$(VPATH)}onigmo.h +set.$(OBJEXT): {$(VPATH)}oniguruma.h +set.$(OBJEXT): {$(VPATH)}ruby_assert.h +set.$(OBJEXT): {$(VPATH)}ruby_atomic.h +set.$(OBJEXT): {$(VPATH)}rubyparser.h +set.$(OBJEXT): {$(VPATH)}set.c +set.$(OBJEXT): {$(VPATH)}shape.h +set.$(OBJEXT): {$(VPATH)}st.h +set.$(OBJEXT): {$(VPATH)}subst.h +set.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h +set.$(OBJEXT): {$(VPATH)}thread_native.h +set.$(OBJEXT): {$(VPATH)}vm_core.h +set.$(OBJEXT): {$(VPATH)}vm_opts.h setproctitle.$(OBJEXT): $(hdrdir)/ruby.h setproctitle.$(OBJEXT): $(hdrdir)/ruby/ruby.h setproctitle.$(OBJEXT): {$(VPATH)}assert.h @@ -17022,6 +17229,7 @@ st.$(OBJEXT): $(top_srcdir)/internal/bits.h st.$(OBJEXT): $(top_srcdir)/internal/compilers.h st.$(OBJEXT): $(top_srcdir)/internal/hash.h st.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h +st.$(OBJEXT): $(top_srcdir)/internal/set_table.h st.$(OBJEXT): $(top_srcdir)/internal/st.h st.$(OBJEXT): $(top_srcdir)/internal/static_assert.h st.$(OBJEXT): $(top_srcdir)/internal/warnings.h diff --git a/inits.c b/inits.c index 79683185df..ee8b204760 100644 --- a/inits.c +++ b/inits.c @@ -76,6 +76,7 @@ rb_call_inits(void) CALL(shape); CALL(Prism); CALL(unicode_version); + CALL(Set); // enable builtin loading CALL(builtin); diff --git a/internal/set_table.h b/internal/set_table.h new file mode 100644 index 0000000000..80bea99006 --- /dev/null +++ b/internal/set_table.h @@ -0,0 +1,63 @@ +#ifndef INTERNAL_SET_TABLE_H +#define INTERNAL_SET_TABLE_H + +#include "include/ruby/st.h" + +struct set_table_entry; + +typedef struct set_table_entry set_table_entry; + +struct set_table { + /* Cached features of the table -- see st.c for more details. */ + unsigned char entry_power, bin_power, size_ind; + /* How many times the table was rebuilt. */ + unsigned int rebuilds_num; + const struct st_hash_type *type; + /* Number of entries currently in the table. */ + st_index_t num_entries; + /* Array of bins used for access by keys. */ + st_index_t *bins; + /* Start and bound index of entries in array entries. + entries_starts and entries_bound are in interval + [0,allocated_entries]. */ + st_index_t entries_start, entries_bound; + /* Array of size 2^entry_power. */ + set_table_entry *entries; +}; + +typedef struct set_table set_table; + +typedef int set_foreach_callback_func(st_data_t, st_data_t); +typedef int set_foreach_check_callback_func(st_data_t, st_data_t, int); +typedef int set_update_callback_func(st_data_t *key, st_data_t arg, int existing); + +#define set_table_size rb_set_table_size +size_t rb_set_table_size(const struct set_table *tbl); +#define set_init_table_with_size rb_set_init_table_with_size +set_table *rb_set_init_table_with_size(set_table *tab, const struct st_hash_type *, st_index_t); +#define set_delete rb_set_delete +int rb_set_delete(set_table *, st_data_t *); /* returns 0:notfound 1:deleted */ +#define set_insert rb_set_insert +int rb_set_insert(set_table *, st_data_t); +#define set_lookup rb_set_lookup +int rb_set_lookup(set_table *, st_data_t); +#define set_foreach_with_replace rb_set_foreach_with_replace +int rb_set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg); +#define set_foreach rb_set_foreach +int rb_set_foreach(set_table *, set_foreach_callback_func *, st_data_t); +#define set_foreach_check rb_set_foreach_check +int rb_set_foreach_check(set_table *, set_foreach_check_callback_func *, st_data_t, st_data_t); +#define set_keys rb_set_keys +st_index_t rb_set_keys(set_table *table, st_data_t *keys, st_index_t size); +#define set_free_table rb_set_free_table +void rb_set_free_table(set_table *); +#define set_clear rb_set_clear +void rb_set_clear(set_table *); +#define set_copy rb_set_copy +set_table *rb_set_copy(set_table *new_table, set_table *old_table); +#define set_memsize rb_set_memsize +PUREFUNC(size_t rb_set_memsize(const set_table *)); +#define set_compact_table rb_set_compact_table +void set_compact_table(set_table *tab); + +#endif diff --git a/lib/pp.rb b/lib/pp.rb index 1c24ca0d23..5318395631 100644 --- a/lib/pp.rb +++ b/lib/pp.rb @@ -442,6 +442,23 @@ class Hash # :nodoc: end end +class Set # :nodoc: + def pretty_print(pp) # :nodoc: + pp.group(1, '#') { + pp.breakable + pp.group(1, '{', '}') { + pp.seplist(self) { |o| + pp.pp o + } + } + } + end + + def pretty_print_cycle(pp) # :nodoc: + pp.text sprintf('#', empty? ? '' : '...') + end +end + class << ENV # :nodoc: def pretty_print(q) # :nodoc: h = {} diff --git a/lib/set.rb b/lib/set.rb deleted file mode 100644 index 26311af6cc..0000000000 --- a/lib/set.rb +++ /dev/null @@ -1,855 +0,0 @@ -# frozen_string_literal: true -# :markup: markdown -# -# set.rb - defines the Set class -# -# Copyright (c) 2002-2024 Akinori MUSHA -# -# Documentation by Akinori MUSHA and Gavin Sinclair. -# -# All rights reserved. You can redistribute and/or modify it under the same -# terms as Ruby. - - -## -# This library provides the Set class, which implements a collection -# of unordered values with no duplicates. It is a hybrid of Array's -# intuitive inter-operation facilities and Hash's fast lookup. -# -# The method `to_set` is added to Enumerable for convenience. -# -# Set is easy to use with Enumerable objects (implementing `each`). -# Most of the initializer methods and binary operators accept generic -# Enumerable objects besides sets and arrays. An Enumerable object -# can be converted to Set using the `to_set` method. -# -# Set uses Hash as storage, so you must note the following points: -# -# * Equality of elements is determined according to Object#eql? and -# Object#hash. Use Set#compare_by_identity to make a set compare -# its elements by their identity. -# * Set assumes that the identity of each element does not change -# while it is stored. Modifying an element of a set will render the -# set to an unreliable state. -# * When a string is to be stored, a frozen copy of the string is -# stored instead unless the original string is already frozen. -# -# ## Comparison -# -# The comparison operators `<`, `>`, `<=`, and `>=` are implemented as -# shorthand for the {proper_,}{subset?,superset?} methods. The `<=>` -# operator reflects this order, or return `nil` for sets that both -# have distinct elements (`{x, y}` vs. `{x, z}` for example). -# -# ## Example -# -# ```ruby -# require 'set' -# s1 = Set[1, 2] #=> # -# s2 = [1, 2].to_set #=> # -# s1 == s2 #=> true -# s1.add("foo") #=> # -# s1.merge([2, 6]) #=> # -# s1.subset?(s2) #=> false -# s2.subset?(s1) #=> true -# ``` -# -# ## Contact -# -# - Akinori MUSHA <> (current maintainer) -# -# ## What's Here -# -# First, what's elsewhere. \Class \Set: -# -# - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. -# - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], -# which provides dozens of additional methods. -# -# In particular, class \Set does not have many methods of its own -# for fetching or for iterating. -# Instead, it relies on those in \Enumerable. -# -# Here, class \Set provides methods that are useful for: -# -# - [Creating a Set](#class-Set-label-Methods+for+Creating+a+Set) -# - [Set Operations](#class-Set-label-Methods+for+Set+Operations) -# - [Comparing](#class-Set-label-Methods+for+Comparing) -# - [Querying](#class-Set-label-Methods+for+Querying) -# - [Assigning](#class-Set-label-Methods+for+Assigning) -# - [Deleting](#class-Set-label-Methods+for+Deleting) -# - [Converting](#class-Set-label-Methods+for+Converting) -# - [Iterating](#class-Set-label-Methods+for+Iterating) -# - [And more....](#class-Set-label-Other+Methods) -# -# ### Methods for Creating a \Set -# -# - ::[]: -# Returns a new set containing the given objects. -# - ::new: -# Returns a new set containing either the given objects -# (if no block given) or the return values from the called block -# (if a block given). -# -# ### Methods for \Set Operations -# -# - [|](#method-i-7C) (aliased as #union and #+): -# Returns a new set containing all elements from +self+ -# and all elements from a given enumerable (no duplicates). -# - [&](#method-i-26) (aliased as #intersection): -# Returns a new set containing all elements common to +self+ -# and a given enumerable. -# - [-](#method-i-2D) (aliased as #difference): -# Returns a copy of +self+ with all elements -# in a given enumerable removed. -# - [\^](#method-i-5E): -# Returns a new set containing all elements from +self+ -# and a given enumerable except those common to both. -# -# ### Methods for Comparing -# -# - [<=>](#method-i-3C-3D-3E): -# Returns -1, 0, or 1 as +self+ is less than, equal to, -# or greater than a given object. -# - [==](#method-i-3D-3D): -# Returns whether +self+ and a given enumerable are equal, -# as determined by Object#eql?. -# - \#compare_by_identity?: -# Returns whether the set considers only identity -# when comparing elements. -# -# ### Methods for Querying -# -# - \#length (aliased as #size): -# Returns the count of elements. -# - \#empty?: -# Returns whether the set has no elements. -# - \#include? (aliased as #member? and #===): -# Returns whether a given object is an element in the set. -# - \#subset? (aliased as [<=](#method-i-3C-3D)): -# Returns whether a given object is a subset of the set. -# - \#proper_subset? (aliased as [<](#method-i-3C)): -# Returns whether a given enumerable is a proper subset of the set. -# - \#superset? (aliased as [>=](#method-i-3E-3D])): -# Returns whether a given enumerable is a superset of the set. -# - \#proper_superset? (aliased as [>](#method-i-3E)): -# Returns whether a given enumerable is a proper superset of the set. -# - \#disjoint?: -# Returns +true+ if the set and a given enumerable -# have no common elements, +false+ otherwise. -# - \#intersect?: -# Returns +true+ if the set and a given enumerable: -# have any common elements, +false+ otherwise. -# - \#compare_by_identity?: -# Returns whether the set considers only identity -# when comparing elements. -# -# ### Methods for Assigning -# -# - \#add (aliased as #<<): -# Adds a given object to the set; returns +self+. -# - \#add?: -# If the given object is not an element in the set, -# adds it and returns +self+; otherwise, returns +nil+. -# - \#merge: -# Merges the elements of each given enumerable object to the set; returns +self+. -# - \#replace: -# Replaces the contents of the set with the contents -# of a given enumerable. -# -# ### Methods for Deleting -# -# - \#clear: -# Removes all elements in the set; returns +self+. -# - \#delete: -# Removes a given object from the set; returns +self+. -# - \#delete?: -# If the given object is an element in the set, -# removes it and returns +self+; otherwise, returns +nil+. -# - \#subtract: -# Removes each given object from the set; returns +self+. -# - \#delete_if - Removes elements specified by a given block. -# - \#select! (aliased as #filter!): -# Removes elements not specified by a given block. -# - \#keep_if: -# Removes elements not specified by a given block. -# - \#reject! -# Removes elements specified by a given block. -# -# ### Methods for Converting -# -# - \#classify: -# Returns a hash that classifies the elements, -# as determined by the given block. -# - \#collect! (aliased as #map!): -# Replaces each element with a block return-value. -# - \#divide: -# Returns a hash that classifies the elements, -# as determined by the given block; -# differs from #classify in that the block may accept -# either one or two arguments. -# - \#flatten: -# Returns a new set that is a recursive flattening of +self+. -# \#flatten!: -# Replaces each nested set in +self+ with the elements from that set. -# - \#inspect (aliased as #to_s): -# Returns a string displaying the elements. -# - \#join: -# Returns a string containing all elements, converted to strings -# as needed, and joined by the given record separator. -# - \#to_a: -# Returns an array containing all set elements. -# - \#to_set: -# Returns +self+ if given no arguments and no block; -# with a block given, returns a new set consisting of block -# return values. -# -# ### Methods for Iterating -# -# - \#each: -# Calls the block with each successive element; returns +self+. -# -# ### Other Methods -# -# - \#reset: -# Resets the internal state; useful if an object -# has been modified while an element in the set. -# -class Set - VERSION = "1.1.1" - - include Enumerable - - # Creates a new set containing the given objects. - # - # Set[1, 2] # => # - # Set[1, 2, 1] # => # - # Set[1, 'c', :s] # => # - def self.[](*ary) - new(ary) - end - - # Creates a new set containing the elements of the given enumerable - # object. - # - # If a block is given, the elements of enum are preprocessed by the - # given block. - # - # Set.new([1, 2]) #=> # - # Set.new([1, 2, 1]) #=> # - # Set.new([1, 'c', :s]) #=> # - # Set.new(1..5) #=> # - # Set.new([1, 2, 3]) { |x| x * x } #=> # - def initialize(enum = nil, &block) # :yields: o - @hash ||= Hash.new(false) - - enum.nil? and return - - if block - do_with_enum(enum) { |o| add(block[o]) } - else - merge(enum) - end - end - - # Makes the set compare its elements by their identity and returns - # self. This method may not be supported by all subclasses of Set. - def compare_by_identity - if @hash.respond_to?(:compare_by_identity) - @hash.compare_by_identity - self - else - raise NotImplementedError, "#{self.class.name}\##{__method__} is not implemented" - end - end - - # Returns true if the set will compare its elements by their - # identity. Also see Set#compare_by_identity. - def compare_by_identity? - @hash.respond_to?(:compare_by_identity?) && @hash.compare_by_identity? - end - - def do_with_enum(enum, &block) # :nodoc: - if enum.respond_to?(:each_entry) - enum.each_entry(&block) if block - elsif enum.respond_to?(:each) - enum.each(&block) if block - else - raise ArgumentError, "value must be enumerable" - end - end - private :do_with_enum - - # Dup internal hash. - def initialize_dup(orig) - super - @hash = orig.instance_variable_get(:@hash).dup - end - - # Clone internal hash. - def initialize_clone(orig, **options) - super - @hash = orig.instance_variable_get(:@hash).clone(**options) - end - - def freeze # :nodoc: - @hash.freeze - super - end - - # Returns the number of elements. - def size - @hash.size - end - alias length size - - # Returns true if the set contains no elements. - def empty? - @hash.empty? - end - - # Removes all elements and returns self. - # - # set = Set[1, 'c', :s] #=> # - # set.clear #=> # - # set #=> # - def clear - @hash.clear - self - end - - # Replaces the contents of the set with the contents of the given - # enumerable object and returns self. - # - # set = Set[1, 'c', :s] #=> # - # set.replace([1, 2]) #=> # - # set #=> # - def replace(enum) - if enum.instance_of?(self.class) - @hash.replace(enum.instance_variable_get(:@hash)) - self - else - do_with_enum(enum) # make sure enum is enumerable before calling clear - clear - merge(enum) - end - end - - # Returns an array containing all elements in the set. - # - # Set[1, 2].to_a #=> [1, 2] - # Set[1, 'c', :s].to_a #=> [1, "c", :s] - def to_a - @hash.keys - end - - # Returns self if no arguments are given. Otherwise, converts the - # set to another with `klass.new(self, *args, &block)`. - # - # In subclasses, returns `klass.new(self, *args, &block)` unless - # overridden. - def to_set(klass = Set, *args, &block) - return self if instance_of?(Set) && klass == Set && block.nil? && args.empty? - klass.new(self, *args, &block) - end - - def flatten_merge(set, seen = {}) # :nodoc: - set.each { |e| - if e.is_a?(Set) - case seen[e_id = e.object_id] - when true - raise ArgumentError, "tried to flatten recursive Set" - when false - next - end - - seen[e_id] = true - flatten_merge(e, seen) - seen[e_id] = false - else - add(e) - end - } - - self - end - protected :flatten_merge - - # Returns a new set that is a copy of the set, flattening each - # containing set recursively. - def flatten - self.class.new.flatten_merge(self) - end - - # Equivalent to Set#flatten, but replaces the receiver with the - # result in place. Returns nil if no modifications were made. - def flatten! - replace(flatten()) if any?(Set) - end - - # Returns true if the set contains the given object. - # - # Note that include? and member? do not test member - # equality using == as do other Enumerables. - # - # See also Enumerable#include? - def include?(o) - @hash[o] - end - alias member? include? - - # Returns true if the set is a superset of the given set. - def superset?(set) - case - when set.instance_of?(self.class) && @hash.respond_to?(:>=) - @hash >= set.instance_variable_get(:@hash) - when set.is_a?(Set) - size >= set.size && set.all?(self) - else - raise ArgumentError, "value must be a set" - end - end - alias >= superset? - - # Returns true if the set is a proper superset of the given set. - def proper_superset?(set) - case - when set.instance_of?(self.class) && @hash.respond_to?(:>) - @hash > set.instance_variable_get(:@hash) - when set.is_a?(Set) - size > set.size && set.all?(self) - else - raise ArgumentError, "value must be a set" - end - end - alias > proper_superset? - - # Returns true if the set is a subset of the given set. - def subset?(set) - case - when set.instance_of?(self.class) && @hash.respond_to?(:<=) - @hash <= set.instance_variable_get(:@hash) - when set.is_a?(Set) - size <= set.size && all?(set) - else - raise ArgumentError, "value must be a set" - end - end - alias <= subset? - - # Returns true if the set is a proper subset of the given set. - def proper_subset?(set) - case - when set.instance_of?(self.class) && @hash.respond_to?(:<) - @hash < set.instance_variable_get(:@hash) - when set.is_a?(Set) - size < set.size && all?(set) - else - raise ArgumentError, "value must be a set" - end - end - alias < proper_subset? - - # Returns 0 if the set are equal, - # -1 / +1 if the set is a proper subset / superset of the given set, - # or nil if they both have unique elements. - def <=>(set) - return unless set.is_a?(Set) - - case size <=> set.size - when -1 then -1 if proper_subset?(set) - when +1 then +1 if proper_superset?(set) - else 0 if self.==(set) - end - end - - # Returns true if the set and the given enumerable have at least one - # element in common. - # - # Set[1, 2, 3].intersect? Set[4, 5] #=> false - # Set[1, 2, 3].intersect? Set[3, 4] #=> true - # Set[1, 2, 3].intersect? 4..5 #=> false - # Set[1, 2, 3].intersect? [3, 4] #=> true - def intersect?(set) - case set - when Set - if size < set.size - any?(set) - else - set.any?(self) - end - when Enumerable - set.any?(self) - else - raise ArgumentError, "value must be enumerable" - end - end - - # Returns true if the set and the given enumerable have - # no element in common. This method is the opposite of `intersect?`. - # - # Set[1, 2, 3].disjoint? Set[3, 4] #=> false - # Set[1, 2, 3].disjoint? Set[4, 5] #=> true - # Set[1, 2, 3].disjoint? [3, 4] #=> false - # Set[1, 2, 3].disjoint? 4..5 #=> true - def disjoint?(set) - !intersect?(set) - end - - # Calls the given block once for each element in the set, passing - # the element as parameter. Returns an enumerator if no block is - # given. - def each(&block) - block_given? or return enum_for(__method__) { size } - @hash.each_key(&block) - self - end - - # Adds the given object to the set and returns self. Use `merge` to - # add many elements at once. - # - # Set[1, 2].add(3) #=> # - # Set[1, 2].add([3, 4]) #=> # - # Set[1, 2].add(2) #=> # - def add(o) - @hash[o] = true - self - end - alias << add - - # Adds the given object to the set and returns self. If the - # object is already in the set, returns nil. - # - # Set[1, 2].add?(3) #=> # - # Set[1, 2].add?([3, 4]) #=> # - # Set[1, 2].add?(2) #=> nil - def add?(o) - add(o) unless include?(o) - end - - # Deletes the given object from the set and returns self. Use - # `subtract` to delete many items at once. - def delete(o) - @hash.delete(o) - self - end - - # Deletes the given object from the set and returns self. If the - # object is not in the set, returns nil. - def delete?(o) - delete(o) if include?(o) - end - - # Deletes every element of the set for which block evaluates to - # true, and returns self. Returns an enumerator if no block is - # given. - def delete_if(&block) - block_given? or return enum_for(__method__) { size } - # Instead of directly using @hash.delete_if, perform enumeration - # using self.each that subclasses may override. - select(&block).each { |o| @hash.delete(o) } - self - end - - # Deletes every element of the set for which block evaluates to - # false, and returns self. Returns an enumerator if no block is - # given. - def keep_if(&block) - block_given? or return enum_for(__method__) { size } - # Instead of directly using @hash.keep_if, perform enumeration - # using self.each that subclasses may override. - reject(&block).each { |o| @hash.delete(o) } - self - end - - # Replaces the elements with ones returned by `collect()`. - # Returns an enumerator if no block is given. - def collect! - block_given? or return enum_for(__method__) { size } - set = self.class.new - each { |o| set << yield(o) } - replace(set) - end - alias map! collect! - - # Equivalent to Set#delete_if, but returns nil if no changes were - # made. Returns an enumerator if no block is given. - def reject!(&block) - block_given? or return enum_for(__method__) { size } - n = size - delete_if(&block) - self if size != n - end - - # Equivalent to Set#keep_if, but returns nil if no changes were - # made. Returns an enumerator if no block is given. - def select!(&block) - block_given? or return enum_for(__method__) { size } - n = size - keep_if(&block) - self if size != n - end - - # Equivalent to Set#select! - alias filter! select! - - # Merges the elements of the given enumerable objects to the set and - # returns self. - def merge(*enums, **nil) - enums.each do |enum| - if enum.instance_of?(self.class) - @hash.update(enum.instance_variable_get(:@hash)) - else - do_with_enum(enum) { |o| add(o) } - end - end - - self - end - - # Deletes every element that appears in the given enumerable object - # and returns self. - def subtract(enum) - do_with_enum(enum) { |o| delete(o) } - self - end - - # Returns a new set built by merging the set and the elements of the - # given enumerable object. - # - # Set[1, 2, 3] | Set[2, 4, 5] #=> # - # Set[1, 5, 'z'] | (1..6) #=> # - def |(enum) - dup.merge(enum) - end - alias + | - alias union | - - # Returns a new set built by duplicating the set, removing every - # element that appears in the given enumerable object. - # - # Set[1, 3, 5] - Set[1, 5] #=> # - # Set['a', 'b', 'z'] - ['a', 'c'] #=> # - def -(enum) - dup.subtract(enum) - end - alias difference - - - # Returns a new set containing elements common to the set and the - # given enumerable object. - # - # Set[1, 3, 5] & Set[3, 2, 1] #=> # - # Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> # - def &(enum) - n = self.class.new - if enum.is_a?(Set) - if enum.size > size - each { |o| n.add(o) if enum.include?(o) } - else - enum.each { |o| n.add(o) if include?(o) } - end - else - do_with_enum(enum) { |o| n.add(o) if include?(o) } - end - n - end - alias intersection & - - # Returns a new set containing elements exclusive between the set - # and the given enumerable object. `(set ^ enum)` is equivalent to - # `((set | enum) - (set & enum))`. - # - # Set[1, 2] ^ Set[2, 3] #=> # - # Set[1, 'b', 'c'] ^ ['b', 'd'] #=> # - def ^(enum) - n = self.class.new(enum) - each { |o| n.add(o) unless n.delete?(o) } - n - end - - # Returns true if two sets are equal. The equality of each couple - # of elements is defined according to Object#eql?. - # - # Set[1, 2] == Set[2, 1] #=> true - # Set[1, 3, 5] == Set[1, 5] #=> false - # Set['a', 'b', 'c'] == Set['a', 'c', 'b'] #=> true - # Set['a', 'b', 'c'] == ['a', 'c', 'b'] #=> false - def ==(other) - if self.equal?(other) - true - elsif other.instance_of?(self.class) - @hash == other.instance_variable_get(:@hash) - elsif other.is_a?(Set) && self.size == other.size - other.all? { |o| @hash.include?(o) } - else - false - end - end - - def hash # :nodoc: - @hash.hash - end - - def eql?(o) # :nodoc: - return false unless o.is_a?(Set) - @hash.eql?(o.instance_variable_get(:@hash)) - end - - # Resets the internal state after modification to existing elements - # and returns self. - # - # Elements will be reindexed and deduplicated. - def reset - if @hash.respond_to?(:rehash) - @hash.rehash # This should perform frozenness check. - else - raise FrozenError, "can't modify frozen #{self.class.name}" if frozen? - end - self - end - - # Returns true if the given object is a member of the set, - # and false otherwise. - # - # Used in case statements: - # - # require 'set' - # - # case :apple - # when Set[:potato, :carrot] - # "vegetable" - # when Set[:apple, :banana] - # "fruit" - # end - # # => "fruit" - # - # Or by itself: - # - # Set[1, 2, 3] === 2 #=> true - # Set[1, 2, 3] === 4 #=> false - # - alias === include? - - # Classifies the set by the return value of the given block and - # returns a hash of {value => set of elements} pairs. The block is - # called once for each element of the set, passing the element as - # parameter. - # - # require 'set' - # files = Set.new(Dir.glob("*.rb")) - # hash = files.classify { |f| File.mtime(f).year } - # hash #=> {2000=>#, - # # 2001=>#, - # # 2002=>#} - # - # Returns an enumerator if no block is given. - def classify # :yields: o - block_given? or return enum_for(__method__) { size } - - h = {} - - each { |i| - (h[yield(i)] ||= self.class.new).add(i) - } - - h - end - - # Divides the set into a set of subsets according to the commonality - # defined by the given block. - # - # If the arity of the block is 2, elements o1 and o2 are in common - # if block.call(o1, o2) is true. Otherwise, elements o1 and o2 are - # in common if block.call(o1) == block.call(o2). - # - # require 'set' - # numbers = Set[1, 3, 4, 6, 9, 10, 11] - # set = numbers.divide { |i,j| (i - j).abs == 1 } - # set #=> #, - # # #, - # # #, - # # #}> - # - # Returns an enumerator if no block is given. - def divide(&func) - func or return enum_for(__method__) { size } - - if func.arity == 2 - require 'tsort' - - class << dig = {} # :nodoc: - include TSort - - alias tsort_each_node each_key - def tsort_each_child(node, &block) - fetch(node).each(&block) - end - end - - each { |u| - dig[u] = a = [] - each{ |v| func.call(u, v) and a << v } - } - - set = Set.new() - dig.each_strongly_connected_component { |css| - set.add(self.class.new(css)) - } - set - else - Set.new(classify(&func).values) - end - end - - # Returns a string created by converting each element of the set to a string - # See also: Array#join - def join(separator=nil) - to_a.join(separator) - end - - InspectKey = :__inspect_key__ # :nodoc: - - # Returns a string containing a human-readable representation of the - # set ("#"). - def inspect - ids = (Thread.current[InspectKey] ||= []) - - if ids.include?(object_id) - return sprintf('#<%s: {...}>', self.class.name) - end - - ids << object_id - begin - return sprintf('#<%s: {%s}>', self.class, to_a.inspect[1..-2]) - ensure - ids.pop - end - end - - alias to_s inspect - - def pretty_print(pp) # :nodoc: - pp.group(1, sprintf('#<%s:', self.class.name), '>') { - pp.breakable - pp.group(1, '{', '}') { - pp.seplist(self) { |o| - pp.pp o - } - } - } - end - - def pretty_print_cycle(pp) # :nodoc: - pp.text sprintf('#<%s: {%s}>', self.class.name, empty? ? '' : '...') - end -end - -module Enumerable - # Makes a set from the enumerable object with given arguments. - # Needs to `require "set"` to use this method. - def to_set(klass = Set, *args, &block) - klass.new(self, *args, &block) - end unless method_defined?(:to_set) -end - -autoload :SortedSet, "#{__dir__}/set/sorted_set" diff --git a/lib/set/set.gemspec b/lib/set/set.gemspec deleted file mode 100644 index 2ebef6985d..0000000000 --- a/lib/set/set.gemspec +++ /dev/null @@ -1,30 +0,0 @@ -name = File.basename(__FILE__, ".gemspec") -version = ["lib", Array.new(name.count("-")+1, "..").join("/")].find do |dir| - break File.foreach(File.join(__dir__, dir, "#{name.tr('-', '/')}.rb")) do |line| - /^\s*VERSION\s*=\s*"(.*)"/ =~ line and break $1 - end rescue nil -end - -Gem::Specification.new do |spec| - spec.name = name - spec.version = version - spec.authors = ["Akinori MUSHA"] - spec.email = ["knu@idaemons.org"] - - spec.summary = %q{Provides a class to deal with collections of unordered, unique values} - spec.description = %q{Provides a class to deal with collections of unordered, unique values} - spec.homepage = "https://github.com/ruby/set" - spec.licenses = ["Ruby", "BSD-2-Clause"] - spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0") - - spec.metadata["homepage_uri"] = spec.homepage - spec.metadata["source_code_uri"] = spec.homepage - spec.metadata["changelog_uri"] = "https://github.com/ruby/set/blob/v#{spec.version}/CHANGELOG.md" - - # Specify which files should be added to the gem when it is released. - # The `git ls-files -z` loads the files in the RubyGem that have been added into git. - spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do - `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } - end - spec.require_paths = ["lib"] -end diff --git a/prelude.rb b/prelude.rb index 757ae52a7e..a381db8cce 100644 --- a/prelude.rb +++ b/prelude.rb @@ -26,7 +26,7 @@ module Kernel private :pp end -autoload :Set, 'set' +autoload :SortedSet, 'set/sorted_set' module Enumerable # Makes a set from the enumerable object with given arguments. diff --git a/set.c b/set.c new file mode 100644 index 0000000000..8250a972ad --- /dev/null +++ b/set.c @@ -0,0 +1,2138 @@ +/* This implements sets using the same hash table implementation as in + st.c, but without a value for each hash entry. This results in the + same basic performance characteristics as when using an st table, + but uses 1/3 less memory. + */ + +#include "id.h" +#include "internal.h" +#include "internal/bits.h" +#include "internal/hash.h" +#include "internal/proc.h" +#include "internal/sanitizers.h" +#include "internal/set_table.h" +#include "internal/symbol.h" +#include "internal/variable.h" +#include "ruby_assert.h" + +#include +#ifdef HAVE_STDLIB_H +#include +#endif +#include + +#ifndef SET_DEBUG +#define SET_DEBUG 0 +#endif + +#if SET_DEBUG +#include "internal/gc.h" +#endif + +static st_index_t +dbl_to_index(double d) +{ + union {double d; st_index_t i;} u; + u.d = d; + return u.i; +} + +static const uint64_t prime1 = ((uint64_t)0x2e0bb864 << 32) | 0xe9ea7df5; +static const uint32_t prime2 = 0x830fcab9; + +static inline uint64_t +mult_and_mix(uint64_t m1, uint64_t m2) +{ +#if defined HAVE_UINT128_T + uint128_t r = (uint128_t) m1 * (uint128_t) m2; + return (uint64_t) (r >> 64) ^ (uint64_t) r; +#else + uint64_t hm1 = m1 >> 32, hm2 = m2 >> 32; + uint64_t lm1 = m1, lm2 = m2; + uint64_t v64_128 = hm1 * hm2; + uint64_t v32_96 = hm1 * lm2 + lm1 * hm2; + uint64_t v1_32 = lm1 * lm2; + + return (v64_128 + (v32_96 >> 32)) ^ ((v32_96 << 32) + v1_32); +#endif +} + +static inline uint64_t +key64_hash(uint64_t key, uint32_t seed) +{ + return mult_and_mix(key + seed, prime1); +} + +/* Should cast down the result for each purpose */ +#define set_index_hash(index) key64_hash(rb_hash_start(index), prime2) + +static st_index_t +set_ident_hash(st_data_t n) +{ +#ifdef USE_FLONUM /* RUBY */ + /* + * - flonum (on 64-bit) is pathologically bad, mix the actual + * float value in, but do not use the float value as-is since + * many integers get interpreted as 2.0 or -2.0 [Bug #10761] + */ + if (FLONUM_P(n)) { + n ^= dbl_to_index(rb_float_value(n)); + } +#endif + + return (st_index_t)set_index_hash((st_index_t)n); +} + +static const struct st_hash_type identhash = { + rb_st_numcmp, + set_ident_hash, +}; + +static const struct st_hash_type objhash = { + rb_any_cmp, + rb_any_hash, +}; + +VALUE rb_cSet; + +#define id_each idEach +static ID id_each_entry; +static ID id_any_p; +static ID id_new; +static ID id_set_iter_lev; + +#define RSET_INITIALIZED FL_USER1 +#define RSET_LEV_MASK (FL_USER13 | FL_USER14 | FL_USER15 | /* FL 13..19 */ \ + FL_USER16 | FL_USER17 | FL_USER18 | FL_USER19) +#define RSET_LEV_SHIFT (FL_USHIFT + 13) +#define RSET_LEV_MAX 127 /* 7 bits */ + +#define SET_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(SET_DEBUG, expr, #expr) + +#define RSET_SIZE(set) set_table_size(RSET_TABLE(set)) +#define RSET_EMPTY(set) (RSET_SIZE(set) == 0) +#define RSET_SIZE_NUM(set) SIZET2NUM(RSET_SIZE(set)) +#define RSET_IS_MEMBER(sobj, item) set_lookup(RSET_TABLE(set), (st_data_t)(item)) +#define RSET_COMPARE_BY_IDENTITY(set) (RSET_TABLE(set)->type == &identhash) + +struct set_object { + set_table table; +}; + +static int +mark_key(st_data_t key, st_data_t data) +{ + rb_gc_mark_movable((VALUE)key); + + return ST_CONTINUE; +} + +static void +set_mark(void *ptr) +{ + struct set_object *sobj = ptr; + if (sobj->table.entries) set_foreach(&sobj->table, mark_key, 0); +} + +static void +set_free_embedded(struct set_object *sobj) +{ + free((&sobj->table)->bins); + free((&sobj->table)->entries); +} + +static void +set_free(void *ptr) +{ + struct set_object *sobj = ptr; + set_free_embedded(sobj); + memset(&sobj->table, 0, sizeof(sobj->table)); +} + +static size_t +set_size(const void *ptr) +{ + const struct set_object *sobj = ptr; + /* Do not count the table size twice, as it is embedded */ + return (unsigned long)set_memsize(&sobj->table) - sizeof(sobj->table); +} + +static int +set_foreach_replace(st_data_t key, st_data_t argp, int error) +{ + if (rb_gc_location((VALUE)key) != (VALUE)key) { + return ST_REPLACE; + } + + return ST_CONTINUE; +} + +static int +set_replace_ref(st_data_t *key, st_data_t argp, int existing) +{ + if (rb_gc_location((VALUE)*key) != (VALUE)*key) { + *key = rb_gc_location((VALUE)*key); + } + + return ST_CONTINUE; +} + +static void +set_compact(void *ptr) +{ + struct set_object *sobj = ptr; + set_compact_table(&sobj->table); + set_foreach_with_replace(&sobj->table, set_foreach_replace, set_replace_ref, 0); +} + +static const rb_data_type_t set_data_type = { + .wrap_struct_name = "set", + .function = { + .dmark = set_mark, + .dfree = set_free, + .dsize = set_size, + .dcompact = set_compact, + }, + .flags = RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE +}; + +static inline set_table * +RSET_TABLE(VALUE set) +{ + struct set_object *sobj; + TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj); + return &sobj->table; +} + +static unsigned long +iter_lev_in_ivar(VALUE set) +{ + VALUE levval = rb_ivar_get(set, id_set_iter_lev); + SET_ASSERT(FIXNUM_P(levval)); + long lev = FIX2LONG(levval); + SET_ASSERT(lev >= 0); + return (unsigned long)lev; +} + +void rb_ivar_set_internal(VALUE obj, ID id, VALUE val); + +static void +iter_lev_in_ivar_set(VALUE set, unsigned long lev) +{ + SET_ASSERT(lev >= RSET_LEV_MAX); + SET_ASSERT(POSFIXABLE(lev)); /* POSFIXABLE means fitting to long */ + rb_ivar_set_internal(set, id_set_iter_lev, LONG2FIX((long)lev)); +} + +static inline unsigned long +iter_lev_in_flags(VALUE set) +{ + return (unsigned long)((RBASIC(set)->flags >> RSET_LEV_SHIFT) & RSET_LEV_MAX); +} + +static inline void +iter_lev_in_flags_set(VALUE set, unsigned long lev) +{ + SET_ASSERT(lev <= RSET_LEV_MAX); + RBASIC(set)->flags = ((RBASIC(set)->flags & ~RSET_LEV_MASK) | ((VALUE)lev << RSET_LEV_SHIFT)); +} + +static inline bool +set_iterating_p(VALUE set) +{ + return iter_lev_in_flags(set) > 0; +} + +static void +set_iter_lev_inc(VALUE set) +{ + unsigned long lev = iter_lev_in_flags(set); + if (lev == RSET_LEV_MAX) { + lev = iter_lev_in_ivar(set) + 1; + if (!POSFIXABLE(lev)) { /* paranoiac check */ + rb_raise(rb_eRuntimeError, "too much nested iterations"); + } + } + else { + lev += 1; + iter_lev_in_flags_set(set, lev); + if (lev < RSET_LEV_MAX) return; + } + iter_lev_in_ivar_set(set, lev); +} + +static void +set_iter_lev_dec(VALUE set) +{ + unsigned long lev = iter_lev_in_flags(set); + if (lev == RSET_LEV_MAX) { + lev = iter_lev_in_ivar(set); + if (lev > RSET_LEV_MAX) { + iter_lev_in_ivar_set(set, lev-1); + return; + } + rb_attr_delete(set, id_set_iter_lev); + } + else if (lev == 0) { + rb_raise(rb_eRuntimeError, "iteration level underflow"); + } + iter_lev_in_flags_set(set, lev - 1); +} + +static VALUE +set_foreach_ensure(VALUE set) +{ + set_iter_lev_dec(set); + return 0; +} + +typedef int set_foreach_func(VALUE, VALUE); + +struct set_foreach_arg { + VALUE set; + set_foreach_func *func; + VALUE arg; +}; + +static int +set_iter_status_check(int status) +{ + if (status == ST_CONTINUE) { + return ST_CHECK; + } + + return status; +} + +static int +set_foreach_iter(st_data_t key, st_data_t argp, int error) +{ + struct set_foreach_arg *arg = (struct set_foreach_arg *)argp; + + if (error) return ST_STOP; + + set_table *tbl = RSET_TABLE(arg->set); + int status = (*arg->func)((VALUE)key, arg->arg); + + if (RSET_TABLE(arg->set) != tbl) { + rb_raise(rb_eRuntimeError, "reset occurred during iteration"); + } + + return set_iter_status_check(status); +} + +static VALUE +set_foreach_call(VALUE arg) +{ + VALUE set = ((struct set_foreach_arg *)arg)->set; + int ret = 0; + ret = set_foreach_check(RSET_TABLE(set), set_foreach_iter, + (st_data_t)arg, (st_data_t)Qundef); + if (ret) { + rb_raise(rb_eRuntimeError, "ret: %d, set modified during iteration", ret); + } + return Qnil; +} + +static void +set_iter(VALUE set, set_foreach_func *func, VALUE farg) +{ + struct set_foreach_arg arg; + + if (RSET_EMPTY(set)) + return; + arg.set = set; + arg.func = func; + arg.arg = farg; + if (RB_OBJ_FROZEN(set)) { + set_foreach_call((VALUE)&arg); + } + else { + set_iter_lev_inc(set); + rb_ensure(set_foreach_call, (VALUE)&arg, set_foreach_ensure, set); + } +} + +NORETURN(static void no_new_item(void)); +static void +no_new_item(void) +{ + rb_raise(rb_eRuntimeError, "can't add a new item into set during iteration"); +} + +static void +set_compact_after_delete(VALUE set) +{ + if (!set_iterating_p(set)) { + set_compact_table(RSET_TABLE(set)); + } +} + +static int +set_table_insert_wb(set_table *tab, VALUE set, VALUE key, VALUE *key_addr) +{ + if (tab->type != &identhash && rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) { + key = rb_hash_key_str(key); + if (key_addr) *key_addr = key; + } + int ret = set_insert(tab, (st_data_t)key); + if (ret == 0) RB_OBJ_WRITTEN(set, Qundef, key); + return ret; +} + +static int +set_insert_wb(VALUE set, VALUE key, VALUE *key_addr) +{ + return set_table_insert_wb(RSET_TABLE(set), set, key, key_addr); +} + +static VALUE +set_alloc_with_size(VALUE klass, st_index_t size) +{ + VALUE set; + struct set_object *sobj; + + set = TypedData_Make_Struct(klass, struct set_object, &set_data_type, sobj); + set_init_table_with_size(&sobj->table, &objhash, size); + + return set; +} + + +static VALUE +set_s_alloc(VALUE klass) +{ + return set_alloc_with_size(klass, 0); +} + +static VALUE +set_s_create(int argc, VALUE *argv, VALUE klass) +{ + VALUE set = set_alloc_with_size(klass, argc); + set_table *table = RSET_TABLE(set); + int i; + + for (i=0; i < argc; i++) { + set_table_insert_wb(table, set, argv[i], NULL); + } + + return set; +} + +static void +check_set(VALUE arg) +{ + if (!rb_obj_is_kind_of(arg, rb_cSet)) { + rb_raise(rb_eArgError, "value must be a set"); + } +} + +static ID +enum_method_id(VALUE other) +{ + if (rb_respond_to(other, id_each_entry)) { + return id_each_entry; + } + else if (rb_respond_to(other, id_each)) { + return id_each; + } + else { + rb_raise(rb_eArgError, "value must be enumerable"); + } +} + +static VALUE +set_enum_size(VALUE set, VALUE args, VALUE eobj) +{ + return RSET_SIZE_NUM(set); +} + +static VALUE +set_initialize_without_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set)) +{ + VALUE element = i; + set_insert_wb(set, element, &element); + return element; +} + +static VALUE +set_initialize_with_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, set)) +{ + VALUE element = rb_yield(i); + set_insert_wb(set, element, &element); + return element; +} + +/* + * call-seq: + * Set.new -> new_set + * Set.new(enum) -> new_set + * Set.new(enum) { |elem| ... } -> new_set + * + * Creates a new set containing the elements of the given enumerable + * object. + * + * If a block is given, the elements of enum are preprocessed by the + * given block. + * + * Set.new([1, 2]) #=> # + * Set.new([1, 2, 1]) #=> # + * Set.new([1, 'c', :s]) #=> # + * Set.new(1..5) #=> # + * Set.new([1, 2, 3]) { |x| x * x } #=> # + */ +static VALUE +set_i_initialize(int argc, VALUE *argv, VALUE set) +{ + if (RBASIC(set)->flags & RSET_INITIALIZED) { + rb_raise(rb_eRuntimeError, "cannot reinitialize set"); + } + RBASIC(set)->flags |= RSET_INITIALIZED; + + VALUE other; + rb_check_arity(argc, 0, 1); + + if (argc > 0 && (other = argv[0]) != Qnil) { + if (RB_TYPE_P(other, T_ARRAY)) { + long len = RARRAY_LEN(other); + if (RARRAY_LEN(other) != 0) { + set_table *into = RSET_TABLE(set); + VALUE key; + int block_given = rb_block_given_p(); + RARRAY_PTR_USE(other, ptr, { + for(; len > 0; len--, ptr++) { + key = *ptr; + if (block_given) key = rb_yield(key); + set_table_insert_wb(into, set, key, NULL); + } + }); + } + } + else { + rb_block_call(other, enum_method_id(other), 0, 0, + rb_block_given_p() ? set_initialize_with_block : set_initialize_without_block, + set); + } + } + + return set; +} + +static VALUE +set_i_initialize_copy(VALUE set, VALUE other) +{ + if (set == other) return set; + + if (set_iterating_p(set)) { + rb_raise(rb_eRuntimeError, "cannot replace set during iteration"); + } + + struct set_object *sobj; + TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj); + + set_free_embedded(sobj); + set_copy(&sobj->table, RSET_TABLE(other)); + + return set; +} + +static int +set_inspect_i(st_data_t key, st_data_t arg) +{ + VALUE str = (VALUE)arg; + if (RSTRING_LEN(str) > 8) { + rb_str_buf_cat_ascii(str, ", "); + } + rb_str_buf_append(str, rb_inspect((VALUE)key)); + + return ST_CONTINUE; +} + +static VALUE +set_inspect(VALUE set, VALUE dummy, int recur) +{ + VALUE str; + + if (recur) return rb_usascii_str_new2("#"); + str = rb_str_buf_new2("#"); + + return str; +} + +/* + * call-seq: + * inspect -> new_string + * + * Returns a new string containing the set entries: + * + * s = Set.new + * s.inspect # => "#" + * s.add(1) + * s.inspect # => "#" + * s.add(2) + * s.inspect # => "#" + * + * Related: see {Methods for Converting}[rdoc-ref:Set@Methods+for+Converting]. + */ +static VALUE +set_i_inspect(VALUE set) +{ + return rb_exec_recursive(set_inspect, set, 0); +} + +static int +set_to_a_i(st_data_t key, st_data_t arg) +{ + rb_ary_push((VALUE)arg, (VALUE)key); + return ST_CONTINUE; +} + +/* + * call-seq: + * to_a -> array + * + * Returns an array containing all elements in the set. + * + * Set[1, 2].to_a #=> [1, 2] + * Set[1, 'c', :s].to_a #=> [1, "c", :s] + */ +static VALUE +set_i_to_a(VALUE set) +{ + st_index_t size = RSET_SIZE(set); + VALUE ary = rb_ary_new_capa(size); + + if (size == 0) return ary; + + if (ST_DATA_COMPATIBLE_P(VALUE)) { + RARRAY_PTR_USE(ary, ptr, { + size = set_keys(RSET_TABLE(set), ptr, size); + }); + rb_gc_writebarrier_remember(ary); + rb_ary_set_len(ary, size); + } + else { + set_iter(set, set_to_a_i, (st_data_t)ary); + } + return ary; +} + +/* + * call-seq: + * to_set(klass = Set, *args, &block) -> self or new_set + * + * Returns self if receiver is an instance of +Set+ and no arguments or + * block are given. Otherwise, converts the set to another with + * klass.new(self, *args, &block). + * + * In subclasses, returns `klass.new(self, *args, &block)` unless overridden. + */ +static VALUE +set_i_to_set(int argc, VALUE *argv, VALUE set) +{ + VALUE klass; + + if (argc == 0) { + klass = rb_cSet; + argv = &set; + argc = 1; + } + else { + klass = argv[0]; + argv[0] = set; + } + + if (klass == rb_cSet && rb_obj_is_instance_of(set, rb_cSet) && + argc == 1 && !rb_block_given_p()) { + return set; + } + + return rb_funcall_passing_block(klass, id_new, argc, argv); +} + +/* + * call-seq: + * join(separator=nil)-> new_string + * + * Returns a string created by converting each element of the set to a string. + */ +static VALUE +set_i_join(int argc, VALUE *argv, VALUE set) +{ + rb_check_arity(argc, 0, 1); + return rb_ary_join(set_i_to_a(set), argc == 0 ? Qnil : argv[0]); +} + +/* + * call-seq: + * add(obj) -> self + * + * Adds the given object to the set and returns self. Use `merge` to + * add many elements at once. + * + * Set[1, 2].add(3) #=> # + * Set[1, 2].add([3, 4]) #=> # + * Set[1, 2].add(2) #=> # + */ +static VALUE +set_i_add(VALUE set, VALUE item) +{ + rb_check_frozen(set); + if (set_iterating_p(set)) { + if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) { + no_new_item(); + } + } + else { + set_insert_wb(set, item, NULL); + } + return set; +} + +/* + * call-seq: + * add?(obj) -> self or nil + * + * Adds the given object to the set and returns self. If the object is + * already in the set, returns nil. + * + * Set[1, 2].add?(3) #=> # + * Set[1, 2].add?([3, 4]) #=> # + * Set[1, 2].add?(2) #=> nil + */ +static VALUE +set_i_add_p(VALUE set, VALUE item) +{ + rb_check_frozen(set); + if (set_iterating_p(set)) { + if (!set_lookup(RSET_TABLE(set), (st_data_t)item)) { + no_new_item(); + } + return Qnil; + } + else { + return set_insert_wb(set, item, NULL) ? Qnil : set; + } +} + +/* + * call-seq: + * delete(obj) -> self + * + * Deletes the given object from the set and returns self. Use subtract + * to delete many items at once. + */ +static VALUE +set_i_delete(VALUE set, VALUE item) +{ + rb_check_frozen(set); + if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) { + set_compact_after_delete(set); + } + return set; +} + +/* + * call-seq: + * delete?(obj) -> self or nil + * + * Deletes the given object from the set and returns self. If the + * object is not in the set, returns nil. + */ +static VALUE +set_i_delete_p(VALUE set, VALUE item) +{ + rb_check_frozen(set); + if (set_delete(RSET_TABLE(set), (st_data_t *)&item)) { + set_compact_after_delete(set); + return set; + } + return Qnil; +} + +static int +set_delete_if_i(st_data_t key, st_data_t dummy) +{ + return RTEST(rb_yield((VALUE)key)) ? ST_DELETE : ST_CONTINUE; +} + +/* + * call-seq: + * delete_if { |o| ... } -> self + * delete_if -> enumerator + * + * Deletes every element of the set for which block evaluates to + * true, and returns self. Returns an enumerator if no block is given. + */ +static VALUE +set_i_delete_if(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + rb_check_frozen(set); + set_iter(set, set_delete_if_i, 0); + set_compact_after_delete(set); + return set; +} + +/* + * call-seq: + * reject! { |o| ... } -> self + * reject! -> enumerator + * + * Equivalent to Set#delete_if, but returns nil if no changes were made. + * Returns an enumerator if no block is given. + */ +static VALUE +set_i_reject(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + rb_check_frozen(set); + + set_table *table = RSET_TABLE(set); + size_t n = set_table_size(table); + set_iter(set, set_delete_if_i, 0); + + if (n == set_table_size(table)) return Qnil; + + set_compact_after_delete(set); + return set; +} + +static int +set_classify_i(st_data_t key, st_data_t tmp) +{ + VALUE* args = (VALUE*)tmp; + VALUE hash = args[0]; + VALUE hash_key = rb_yield(key); + VALUE set = rb_hash_lookup2(hash, hash_key, Qundef); + if (set == Qundef) { + set = set_s_alloc(args[1]); + rb_hash_aset(hash, hash_key, set); + } + set_i_add(set, key); + + return ST_CONTINUE; +} + +/* + * call-seq: + * classify { |o| ... } -> hash + * classify -> enumerator + * + * Classifies the set by the return value of the given block and + * returns a hash of {value => set of elements} pairs. The block is + * called once for each element of the set, passing the element as + * parameter. + * + * files = Set.new(Dir.glob("*.rb")) + * hash = files.classify { |f| File.mtime(f).year } + * hash #=> {2000 => #, + * # 2001 => #, + * # 2002 => #} + * + * Returns an enumerator if no block is given. + */ +static VALUE +set_i_classify(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + VALUE args[2]; + args[0] = rb_hash_new(); + args[1] = rb_obj_class(set); + set_iter(set, set_classify_i, (st_data_t)args); + return args[0]; +} + +struct set_divide_args { + VALUE self; + VALUE set_class; + VALUE final_set; + VALUE hash; + VALUE current_set; + VALUE current_item; + unsigned long ni; + unsigned long nj; +}; + +static VALUE +set_divide_block0(RB_BLOCK_CALL_FUNC_ARGLIST(j, arg)) +{ + struct set_divide_args *args = (struct set_divide_args *)arg; + if (args->nj > args->ni) { + VALUE i = args->current_item; + if (RTEST(rb_yield_values(2, i, j)) && RTEST(rb_yield_values(2, j, i))) { + VALUE hash = args->hash; + if (args->current_set == Qnil) { + VALUE set = rb_hash_aref(hash, j); + if (set == Qnil) { + VALUE both[2] = {i, j}; + set = set_s_create(2, both, args->set_class); + rb_hash_aset(hash, i, set); + rb_hash_aset(hash, j, set); + set_i_add(args->final_set, set); + } + else { + set_i_add(set, i); + rb_hash_aset(hash, i, set); + } + args->current_set = set; + } + else { + set_i_add(args->current_set, j); + rb_hash_aset(hash, j, args->current_set); + } + } + } + args->nj++; + return j; +} + +static VALUE +set_divide_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, arg)) +{ + struct set_divide_args *args = (struct set_divide_args *)arg; + VALUE hash = args->hash; + args->current_set = rb_hash_aref(hash, i); + args->current_item = i; + args->nj = 0; + rb_block_call(args->self, id_each, 0, 0, set_divide_block0, arg); + if (args->current_set == Qnil) { + VALUE set = set_s_create(1, &i, args->set_class); + rb_hash_aset(hash, i, set); + set_i_add(args->final_set, set); + } + args->ni++; + return i; +} + +static void set_merge_enum_into(VALUE set, VALUE arg); + +/* + * call-seq: + * divide { |o1, o2| ... } -> set + * divide { |o| ... } -> set + * divide -> enumerator + * + * Divides the set into a set of subsets according to the commonality + * defined by the given block. + * + * If the arity of the block is 2, elements o1 and o2 are in common + * if both block.call(o1, o2) and block.call(o2, o1) are true. + * Otherwise, elements o1 and o2 are in common if + * block.call(o1) == block.call(o2). + * + * numbers = Set[1, 3, 4, 6, 9, 10, 11] + * set = numbers.divide { |i,j| (i - j).abs == 1 } + * set #=> #, + * # #, + * # #}> + * # #, + * + * Returns an enumerator if no block is given. + */ +static VALUE +set_i_divide(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + + if (rb_block_arity() == 2) { + VALUE final_set = set_s_create(0, 0, rb_cSet); + struct set_divide_args args = { + .self = set, + .set_class = rb_obj_class(set), + .final_set = final_set, + .hash = rb_hash_new(), + .current_set = 0, + .current_item = 0, + .ni = 0, + .nj = 0 + }; + rb_block_call(set, id_each, 0, 0, set_divide_block, (VALUE)&args); + return final_set; + } + + VALUE values = rb_hash_values(set_i_classify(set)); + set = set_alloc_with_size(rb_cSet, RARRAY_LEN(values)); + set_merge_enum_into(set, values); + return set; +} + +static int +set_clear_i(st_data_t key, st_data_t dummy) +{ + return ST_DELETE; +} + +/* + * call-seq: + * clear -> self + * + * Removes all elements and returns self. + * + * set = Set[1, 'c', :s] #=> # + * set.clear #=> # + * set #=> # + */ +static VALUE +set_i_clear(VALUE set) +{ + rb_check_frozen(set); + if (RSET_SIZE(set) == 0) return set; + if (set_iterating_p(set)) { + set_iter(set, set_clear_i, 0); + } + else { + set_clear(RSET_TABLE(set)); + set_compact_after_delete(set); + } + return set; +} + +struct set_intersection_data { + VALUE set; + set_table *into; + set_table *other; +}; + +static int +set_intersection_i(st_data_t key, st_data_t tmp) +{ + struct set_intersection_data *data = (struct set_intersection_data *)tmp; + if (set_lookup(data->other, key)) { + set_table_insert_wb(data->into, data->set, key, NULL); + } + + return ST_CONTINUE; +} + +static VALUE +set_intersection_block(RB_BLOCK_CALL_FUNC_ARGLIST(i, data)) +{ + set_intersection_i((st_data_t)i, (st_data_t)data); + return i; +} + +/* + * call-seq: + * set & enum -> new_set + * + * Returns a new set containing elements common to the set and the given + * enumerable object. + * + * Set[1, 3, 5] & Set[3, 2, 1] #=> # + * Set['a', 'b', 'z'] & ['a', 'b', 'c'] #=> # + */ +static VALUE +set_i_intersection(VALUE set, VALUE other) +{ + VALUE new_set = set_s_alloc(rb_obj_class(set)); + set_table *stable = RSET_TABLE(set); + set_table *ntable = RSET_TABLE(new_set); + + if (rb_obj_is_kind_of(other, rb_cSet)) { + set_table *otable = RSET_TABLE(other); + if (set_table_size(stable) >= set_table_size(otable)) { + /* Swap so we iterate over the smaller set */ + otable = stable; + set = other; + } + + struct set_intersection_data data = { + .set = new_set, + .into = ntable, + .other = otable + }; + set_iter(set, set_intersection_i, (st_data_t)&data); + } + else { + struct set_intersection_data data = { + .set = new_set, + .into = ntable, + .other = stable + }; + rb_block_call(other, enum_method_id(other), 0, 0, set_intersection_block, (VALUE)&data); + } + + return new_set; +} + +/* + * call-seq: + * include?(item) -> true or false + * + * Returns true if the set contains the given object: + * + * Set[1, 2, 3].include? 2 #=> true + * Set[1, 2, 3].include? 4 #=> false + * + * Note that include? and member? do not test member + * equality using == as do other Enumerables. + * + * This is aliased to #===, so it is usable in +case+ expressions: + * + * case :apple + * when Set[:potato, :carrot] + * "vegetable" + * when Set[:apple, :banana] + * "fruit" + * end + * # => "fruit" + * + * See also Enumerable#include? + */ +static VALUE +set_i_include(VALUE set, VALUE item) +{ + return RBOOL(RSET_IS_MEMBER(set, item)); +} + +struct set_merge_args { + VALUE set; + set_table *into; +}; + +static int +set_merge_i(st_data_t key, st_data_t data) +{ + struct set_merge_args *args = (struct set_merge_args *)data; + set_table_insert_wb(args->into, args->set, key, NULL); + return ST_CONTINUE; +} + +static VALUE +set_merge_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set)) +{ + VALUE element = key; + set_insert_wb(set, element, &element); + return element; +} + +static void +set_merge_enum_into(VALUE set, VALUE arg) +{ + if (rb_obj_is_kind_of(arg, rb_cSet)) { + struct set_merge_args args = { + .set = set, + .into = RSET_TABLE(set) + }; + set_iter(arg, set_merge_i, (st_data_t)&args); + } + else if (RB_TYPE_P(arg, T_ARRAY)) { + long len = RARRAY_LEN(arg); + if (RARRAY_LEN(arg) != 0) { + set_table *into = RSET_TABLE(set); + RARRAY_PTR_USE(arg, ptr, { + for(; len > 0; len--, ptr++) { + set_table_insert_wb(into, set, *ptr, NULL); + } + }); + } + } + else { + rb_block_call(arg, enum_method_id(arg), 0, 0, set_merge_block, (VALUE)set); + } +} + +/* + * call-seq: + * merge(*enums, **nil) -> self + * + * Merges the elements of the given enumerable objects to the set and + * returns self. + */ +static VALUE +set_i_merge(int argc, VALUE *argv, VALUE set) +{ + if (rb_keyword_given_p()) { + rb_raise(rb_eArgError, "no keywords accepted"); + } + rb_check_frozen(set); + + int i; + + for (i=0; i < argc; i++) { + set_merge_enum_into(set, argv[i]); + } + + return set; +} + +static VALUE +set_reset_table_with_type(VALUE set, const struct st_hash_type *type) +{ + rb_check_frozen(set); + + struct set_object *sobj; + TypedData_Get_Struct(set, struct set_object, &set_data_type, sobj); + set_table *old = &sobj->table; + + size_t size = set_table_size(old); + if (size > 0) { + set_table *new = set_init_table_with_size(NULL, type, size); + struct set_merge_args args = { + .set = set, + .into = new + }; + set_iter(set, set_merge_i, (st_data_t)&args); + set_free_embedded(sobj); + memcpy(&sobj->table, new, sizeof(*new)); + free(new); + } + else { + sobj->table.type = type; + } + + return set; +} + +/* + * call-seq: + * compare_by_identity -> self + * + * Makes the set compare its elements by their identity and returns self. + */ +static VALUE +set_i_compare_by_identity(VALUE set) +{ + if (RSET_COMPARE_BY_IDENTITY(set)) return set; + + if (set_iterating_p(set)) { + rb_raise(rb_eRuntimeError, "compare_by_identity during iteration"); + } + + return set_reset_table_with_type(set, &identhash); +} + +/* + * call-seq: + * compare_by_identity? -> true or false + * + * Returns true if the set will compare its elements by their + * identity. Also see Set#compare_by_identity. + */ +static VALUE +set_i_compare_by_identity_p(VALUE set) +{ + return RBOOL(RSET_COMPARE_BY_IDENTITY(set)); +} + +/* + * call-seq: + * size -> integer + * + * Returns the number of elements. + */ +static VALUE +set_i_size(VALUE set) +{ + return RSET_SIZE_NUM(set); +} + +/* + * call-seq: + * empty? -> true or false + * + * Returns true if the set contains no elements. + */ +static VALUE +set_i_empty(VALUE set) +{ + return RBOOL(RSET_EMPTY(set)); +} + +static int +set_xor_i(st_data_t key, st_data_t data) +{ + VALUE element = (VALUE)key; + VALUE set = (VALUE)data; + set_table *table = RSET_TABLE(set); + if (set_table_insert_wb(table, set, element, &element)) { + set_delete(table, &element); + } + return ST_CONTINUE; +} + +/* + * call-seq: + * set ^ enum -> new_set + * + * Returns a new set containing elements exclusive between the set and the + * given enumerable object. (set ^ enum) is equivalent to + * ((set | enum) - (set & enum)). + * + * Set[1, 2] ^ Set[2, 3] #=> # + * Set[1, 'b', 'c'] ^ ['b', 'd'] #=> # + */ +static VALUE +set_i_xor(VALUE set, VALUE other) +{ + VALUE new_set; + if (rb_obj_is_kind_of(other, rb_cSet)) { + new_set = other; + } + else { + new_set = set_s_alloc(rb_obj_class(set)); + set_merge_enum_into(new_set, other); + } + set_iter(set, set_xor_i, (st_data_t)new_set); + return new_set; +} + +/* + * call-seq: + * set | enum -> new_set + * + * Returns a new set built by merging the set and the elements of the + * given enumerable object. + * + * Set[1, 2, 3] | Set[2, 4, 5] #=> # + * Set[1, 5, 'z'] | (1..6) #=> # + */ +static VALUE +set_i_union(VALUE set, VALUE other) +{ + set = rb_obj_dup(set); + set_merge_enum_into(set, other); + return set; +} + +static int +set_remove_i(st_data_t key, st_data_t from) +{ + set_delete((struct set_table *)from, (st_data_t *)&key); + return ST_CONTINUE; +} + +static VALUE +set_remove_block(RB_BLOCK_CALL_FUNC_ARGLIST(key, set)) +{ + rb_check_frozen(set); + set_delete(RSET_TABLE(set), (st_data_t *)&key); + return key; +} + +static void +set_remove_enum_from(VALUE set, VALUE arg) +{ + if (rb_obj_is_kind_of(arg, rb_cSet)) { + set_iter(arg, set_remove_i, (st_data_t)RSET_TABLE(set)); + } + else { + rb_block_call(arg, enum_method_id(arg), 0, 0, set_remove_block, (VALUE)set); + } +} + +/* + * call-seq: + * subtract(enum) -> self + * + * Deletes every element that appears in the given enumerable object + * and returns self. + */ +static VALUE +set_i_subtract(VALUE set, VALUE other) +{ + rb_check_frozen(set); + set_remove_enum_from(set, other); + return set; +} + +/* + * call-seq: + * set - enum -> new_set + * + * Returns a new set built by duplicating the set, removing every + * element that appears in the given enumerable object. + * + * Set[1, 3, 5] - Set[1, 5] #=> # + * Set['a', 'b', 'z'] - ['a', 'c'] #=> # + */ +static VALUE +set_i_difference(VALUE set, VALUE other) +{ + return set_i_subtract(rb_obj_dup(set), other); +} + +static int +set_each_i(st_data_t key, st_data_t dummy) +{ + rb_yield(key); + return ST_CONTINUE; +} + +/* + * call-seq: + * each { |o| ... } -> self + * each -> enumerator + * + * Calls the given block once for each element in the set, passing + * the element as parameter. Returns an enumerator if no block is + * given. + */ +static VALUE +set_i_each(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + set_iter(set, set_each_i, 0); + return set; +} + +static int +set_collect_i(st_data_t key, st_data_t data) +{ + set_insert_wb((VALUE)data, rb_yield((VALUE)key), NULL); + return ST_CONTINUE; +} + +/* + * call-seq: + * collect! { |o| ... } -> self + * collect! -> enumerator + * + * Replaces the elements with ones returned by +collect+. + * Returns an enumerator if no block is given. + */ +static VALUE +set_i_collect(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + rb_check_frozen(set); + + VALUE new_set = set_s_alloc(rb_obj_class(set)); + set_iter(set, set_collect_i, (st_data_t)new_set); + set_i_initialize_copy(set, new_set); + + return set; +} + +static int +set_keep_if_i(st_data_t key, st_data_t into) +{ + if (!RTEST(rb_yield((VALUE)key))) { + set_delete((set_table *)into, &key); + } + return ST_CONTINUE; +} + +/* + * call-seq: + * keep_if { |o| ... } -> self + * keep_if -> enumerator + * + * Deletes every element of the set for which block evaluates to false, and + * returns self. Returns an enumerator if no block is given. + */ +static VALUE +set_i_keep_if(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + rb_check_frozen(set); + + set_iter(set, set_keep_if_i, (st_data_t)RSET_TABLE(set)); + + return set; +} + +/* + * call-seq: + * select! { |o| ... } -> self + * select! -> enumerator + * + * Equivalent to Set#keep_if, but returns nil if no changes were made. + * Returns an enumerator if no block is given. + */ +static VALUE +set_i_select(VALUE set) +{ + RETURN_SIZED_ENUMERATOR(set, 0, 0, set_enum_size); + rb_check_frozen(set); + + set_table *table = RSET_TABLE(set); + size_t n = set_table_size(table); + set_iter(set, set_keep_if_i, (st_data_t)table); + + return (n == set_table_size(table)) ? Qnil : set; +} + +/* + * call-seq: + * replace(enum) -> self + * + * Replaces the contents of the set with the contents of the given + * enumerable object and returns self. + * + * set = Set[1, 'c', :s] #=> # + * set.replace([1, 2]) #=> # + * set #=> # + */ +static VALUE +set_i_replace(VALUE set, VALUE other) +{ + rb_check_frozen(set); + + if (rb_obj_is_kind_of(other, rb_cSet)) { + set_i_initialize_copy(set, other); + } + else { + if (set_iterating_p(set)) { + rb_raise(rb_eRuntimeError, "cannot replace set during iteration"); + } + + // make sure enum is enumerable before calling clear + enum_method_id(other); + + set_clear(RSET_TABLE(set)); + set_merge_enum_into(set, other); + } + + return set; +} + +/* + * call-seq: + * reset -> self + * + * Resets the internal state after modification to existing elements + * and returns self. Elements will be reindexed and deduplicated. + */ +static VALUE +set_i_reset(VALUE set) +{ + if (set_iterating_p(set)) { + rb_raise(rb_eRuntimeError, "reset during iteration"); + } + + return set_reset_table_with_type(set, RSET_TABLE(set)->type); +} + +static void set_flatten_merge(VALUE set, VALUE from, VALUE seen); + +static int +set_flatten_merge_i(st_data_t item, st_data_t arg) +{ + VALUE *args = (VALUE *)arg; + VALUE set = args[0]; + if (rb_obj_is_kind_of(item, rb_cSet)) { + VALUE e_id = rb_obj_id(item); + VALUE hash = args[2]; + switch(rb_hash_aref(hash, e_id)) { + case Qfalse: + return ST_CONTINUE; + case Qtrue: + rb_raise(rb_eArgError, "tried to flatten recursive Set"); + default: + break; + } + + rb_hash_aset(hash, e_id, Qtrue); + set_flatten_merge(set, item, hash); + rb_hash_aset(hash, e_id, Qfalse); + } + else { + set_i_add(set, item); + } + return ST_CONTINUE; +} + +static void +set_flatten_merge(VALUE set, VALUE from, VALUE hash) +{ + VALUE args[3] = {set, from, hash}; + set_iter(from, set_flatten_merge_i, (st_data_t)args); +} + +/* + * call-seq: + * flatten -> set + * + * Returns a new set that is a copy of the set, flattening each + * containing set recursively. + */ +static VALUE +set_i_flatten(VALUE set) +{ + VALUE new_set = set_s_alloc(rb_obj_class(set)); + set_flatten_merge(new_set, set, rb_hash_new()); + return new_set; +} + +static int +set_contains_set_i(st_data_t item, st_data_t arg) +{ + if (rb_obj_is_kind_of(item, rb_cSet)) { + *(bool *)arg = true; + return ST_STOP; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * flatten! -> self + * + * Equivalent to Set#flatten, but replaces the receiver with the + * result in place. Returns nil if no modifications were made. + */ +static VALUE +set_i_flatten_bang(VALUE set) +{ + bool contains_set = false; + set_iter(set, set_contains_set_i, (st_data_t)&contains_set); + if (!contains_set) return Qnil; + rb_check_frozen(set); + return set_i_replace(set, set_i_flatten(set)); +} + +struct set_subset_data { + set_table *table; + VALUE result; +}; + +static int +set_le_i(st_data_t key, st_data_t arg) +{ + struct set_subset_data *data = (struct set_subset_data *)arg; + if (set_lookup(data->table, key)) return ST_CONTINUE; + data->result = Qfalse; + return ST_STOP; +} + +static VALUE +set_le(VALUE set, VALUE other) +{ + struct set_subset_data data = { + .table = RSET_TABLE(other), + .result = Qtrue + }; + set_iter(set, set_le_i, (st_data_t)&data); + return data.result; +} + +/* + * call-seq: + * proper_subset?(set) -> true or false + * + * Returns true if the set is a proper subset of the given set. + */ +static VALUE +set_i_proper_subset(VALUE set, VALUE other) +{ + check_set(other); + if (RSET_SIZE(set) >= RSET_SIZE(other)) return Qfalse; + return set_le(set, other); +} + +/* + * call-seq: + * subset?(set) -> true or false + * + * Returns true if the set is a subset of the given set. + */ +static VALUE +set_i_subset(VALUE set, VALUE other) +{ + check_set(other); + if (RSET_SIZE(set) > RSET_SIZE(other)) return Qfalse; + return set_le(set, other); +} + +/* + * call-seq: + * proper_superset?(set) -> true or false + * + * Returns true if the set is a proper superset of the given set. + */ +static VALUE +set_i_proper_superset(VALUE set, VALUE other) +{ + check_set(other); + if (RSET_SIZE(set) <= RSET_SIZE(other)) return Qfalse; + return set_le(other, set); +} + +/* + * call-seq: + * superset?(set) -> true or false + * + * Returns true if the set is a superset of the given set. + */ +static VALUE +set_i_superset(VALUE set, VALUE other) +{ + check_set(other); + if (RSET_SIZE(set) < RSET_SIZE(other)) return Qfalse; + return set_le(other, set); +} + +static int +set_intersect_i(st_data_t key, st_data_t arg) +{ + VALUE *args = (VALUE *)arg; + if (set_lookup((set_table *)args[0], key)) { + args[1] = Qtrue; + return ST_STOP; + } + return ST_CONTINUE; +} + +/* + * call-seq: + * intersect?(set) -> true or false + * + * Returns true if the set and the given enumerable have at least one + * element in common. + * + * Set[1, 2, 3].intersect? Set[4, 5] #=> false + * Set[1, 2, 3].intersect? Set[3, 4] #=> true + * Set[1, 2, 3].intersect? 4..5 #=> false + * Set[1, 2, 3].intersect? [3, 4] #=> true + */ +static VALUE +set_i_intersect(VALUE set, VALUE other) +{ + if (rb_obj_is_kind_of(other, rb_cSet)) { + size_t set_size = RSET_SIZE(set); + size_t other_size = RSET_SIZE(other); + VALUE args[2]; + args[1] = Qfalse; + VALUE iter_arg; + + if (set_size < other_size) { + iter_arg = set; + args[0] = (VALUE)RSET_TABLE(other); + } + else { + iter_arg = other; + args[0] = (VALUE)RSET_TABLE(set); + } + set_iter(iter_arg, set_intersect_i, (st_data_t)args); + return args[1]; + } + else if (rb_obj_is_kind_of(other, rb_mEnumerable)) { + return rb_funcall(other, id_any_p, 1, set); + } + else { + rb_raise(rb_eArgError, "value must be enumerable"); + } +} + +/* + * call-seq: + * disjoint?(set) -> true or false + * + * Returns true if the set and the given enumerable have no + * element in common. This method is the opposite of +intersect?+. + * + * Set[1, 2, 3].disjoint? Set[3, 4] #=> false + * Set[1, 2, 3].disjoint? Set[4, 5] #=> true + * Set[1, 2, 3].disjoint? [3, 4] #=> false + * Set[1, 2, 3].disjoint? 4..5 #=> true + */ +static VALUE +set_i_disjoint(VALUE set, VALUE other) +{ + return RBOOL(!RTEST(set_i_intersect(set, other))); +} + +/* + * call-seq: + * set <=> other -> -1, 0, 1, or nil + * + * Returns 0 if the set are equal, -1 / 1 if the set is a + * proper subset / superset of the given set, or or nil if + * they both have unique elements. + */ +static VALUE +set_i_compare(VALUE set, VALUE other) +{ + if (rb_obj_is_kind_of(other, rb_cSet)) { + size_t set_size = RSET_SIZE(set); + size_t other_size = RSET_SIZE(other); + + if (set_size < other_size) { + if (set_le(set, other) == Qtrue) { + return INT2NUM(-1); + } + } + else if (set_size > other_size) { + if (set_le(other, set) == Qtrue) { + return INT2NUM(1); + } + } + else if (set_le(set, other) == Qtrue) { + return INT2NUM(0); + } + } + + return Qnil; +} + +struct set_equal_data { + VALUE result; + VALUE set; +}; + +static int +set_eql_i(st_data_t item, st_data_t arg) +{ + struct set_equal_data *data = (struct set_equal_data *)arg; + + if (!set_lookup(RSET_TABLE(data->set), item)) { + data->result = Qfalse; + return ST_STOP; + } + return ST_CONTINUE; +} + +static VALUE +set_recursive_eql(VALUE set, VALUE dt, int recur) +{ + if (recur) return Qtrue; + struct set_equal_data *data = (struct set_equal_data*)dt; + data->result = Qtrue; + set_iter(set, set_eql_i, dt); + return data->result; +} + +/* + * call-seq: + * set == other -> true or false + * + * Returns true if two sets are equal. + */ +static VALUE +set_i_eq(VALUE set, VALUE other) +{ + if (!rb_obj_is_kind_of(other, rb_cSet)) return Qfalse; + if (set == other) return Qtrue; + + set_table *stable = RSET_TABLE(set); + set_table *otable = RSET_TABLE(other); + size_t ssize = set_table_size(stable); + size_t osize = set_table_size(otable); + + if (ssize != osize) return Qfalse; + if (ssize == 0 && osize == 0) return Qtrue; + if (stable->type != otable->type) return Qfalse; + + struct set_equal_data data; + data.set = other; + return rb_exec_recursive_paired(set_recursive_eql, set, other, (VALUE)&data); +} + +static int +set_hash_i(st_data_t item, st_data_t(arg)) +{ + st_index_t *hval = (st_index_t *)arg; + st_index_t ival = rb_hash(item); + *hval ^= rb_st_hash(&ival, sizeof(st_index_t), 0); + return ST_CONTINUE; +} + +/* + * call-seq: + * hash -> integer + * + * Returns hash code for set. + */ +static VALUE +set_i_hash(VALUE set) +{ + st_index_t size = RSET_SIZE(set); + st_index_t hval = rb_st_hash_start(size); + hval = rb_hash_uint(hval, (st_index_t)set_i_hash); + if (size) { + set_iter(set, set_hash_i, (VALUE)&hval); + } + hval = rb_st_hash_end(hval); + return ST2FIX(hval); +} + +/* + * Document-class: Set + * + * Copyright (c) 2002-2024 Akinori MUSHA + * + * Documentation by Akinori MUSHA and Gavin Sinclair. + * + * All rights reserved. You can redistribute and/or modify it under the same + * terms as Ruby. + * + * The Set class implements a collection of unordered values with no + * duplicates. It is a hybrid of Array's intuitive inter-operation + * facilities and Hash's fast lookup. + * + * Set is easy to use with Enumerable objects (implementing `each`). + * Most of the initializer methods and binary operators accept generic + * Enumerable objects besides sets and arrays. An Enumerable object + * can be converted to Set using the `to_set` method. + * + * Set uses a data structure similar to Hash for storage, except that + * it only has keys and no values. + * + * * Equality of elements is determined according to Object#eql? and + * Object#hash. Use Set#compare_by_identity to make a set compare + * its elements by their identity. + * * Set assumes that the identity of each element does not change + * while it is stored. Modifying an element of a set will render the + * set to an unreliable state. + * * When a string is to be stored, a frozen copy of the string is + * stored instead unless the original string is already frozen. + * + * == Comparison + * + * The comparison operators <, >, <=, and + * >= are implemented as shorthand for the + * {proper_,}{subset?,superset?} methods. The <=> + * operator reflects this order, or returns +nil+ for sets that both + * have distinct elements ({x, y} vs. {x, z} for example). + * + * == Example + * + * s1 = Set[1, 2] #=> # + * s2 = [1, 2].to_set #=> # + * s1 == s2 #=> true + * s1.add("foo") #=> # + * s1.merge([2, 6]) #=> # + * s1.subset?(s2) #=> false + * s2.subset?(s1) #=> true + * + * == Contact + * + * - Akinori MUSHA (current maintainer) + * + * == What's Here + * + * First, what's elsewhere. \Class \Set: + * + * - Inherits from {class Object}[rdoc-ref:Object@What-27s+Here]. + * - Includes {module Enumerable}[rdoc-ref:Enumerable@What-27s+Here], + * which provides dozens of additional methods. + * + * In particular, class \Set does not have many methods of its own + * for fetching or for iterating. + * Instead, it relies on those in \Enumerable. + * + * Here, class \Set provides methods that are useful for: + * + * - {Creating an Array}[rdoc-ref:Array@Methods+for+Creating+an+Array] + * - {Creating a Set}[rdoc-ref:Array@Methods+for+Creating+a+Set] + * - {Set Operations}[rdoc-ref:Array@Methods+for+Set+Operations] + * - {Comparing}[rdoc-ref:Array@Methods+for+Comparing] + * - {Querying}[rdoc-ref:Array@Methods+for+Querying] + * - {Assigning}[rdoc-ref:Array@Methods+for+Assigning] + * - {Deleting}[rdoc-ref:Array@Methods+for+Deleting] + * - {Converting}[rdoc-ref:Array@Methods+for+Converting] + * - {Iterating}[rdoc-ref:Array@Methods+for+Iterating] + * - {And more....}[rdoc-ref:Array@Other+Methods] + * + * === Methods for Creating a \Set + * + * - ::[]: + * Returns a new set containing the given objects. + * - ::new: + * Returns a new set containing either the given objects + * (if no block given) or the return values from the called block + * (if a block given). + * + * === Methods for \Set Operations + * + * - #| (aliased as #union and #+): + * Returns a new set containing all elements from +self+ + * and all elements from a given enumerable (no duplicates). + * - #& (aliased as #intersection): + * Returns a new set containing all elements common to +self+ + * and a given enumerable. + * - #- (aliased as #difference): + * Returns a copy of +self+ with all elements + * in a given enumerable removed. + * - #^: Returns a new set containing all elements from +self+ + * and a given enumerable except those common to both. + * + * === Methods for Comparing + * + * - #<=>: Returns -1, 0, or 1 as +self+ is less than, equal to, + * or greater than a given object. + * - #==: Returns whether +self+ and a given enumerable are equal, + * as determined by Object#eql?. + * - #compare_by_identity?: + * Returns whether the set considers only identity + * when comparing elements. + * + * === Methods for Querying + * + * - #length (aliased as #size): + * Returns the count of elements. + * - #empty?: + * Returns whether the set has no elements. + * - #include? (aliased as #member? and #===): + * Returns whether a given object is an element in the set. + * - #subset? (aliased as #<=): + * Returns whether a given object is a subset of the set. + * - #proper_subset? (aliased as #<): + * Returns whether a given enumerable is a proper subset of the set. + * - #superset? (aliased as #>=): + * Returns whether a given enumerable is a superset of the set. + * - #proper_superset? (aliased as #>): + * Returns whether a given enumerable is a proper superset of the set. + * - #disjoint?: + * Returns +true+ if the set and a given enumerable + * have no common elements, +false+ otherwise. + * - #intersect?: + * Returns +true+ if the set and a given enumerable: + * have any common elements, +false+ otherwise. + * - #compare_by_identity?: + * Returns whether the set considers only identity + * when comparing elements. + * + * === Methods for Assigning + * + * - #add (aliased as #<<): + * Adds a given object to the set; returns +self+. + * - #add?: + * If the given object is not an element in the set, + * adds it and returns +self+; otherwise, returns +nil+. + * - #merge: + * Merges the elements of each given enumerable object to the set; returns +self+. + * - #replace: + * Replaces the contents of the set with the contents + * of a given enumerable. + * + * === Methods for Deleting + * + * - #clear: + * Removes all elements in the set; returns +self+. + * - #delete: + * Removes a given object from the set; returns +self+. + * - #delete?: + * If the given object is an element in the set, + * removes it and returns +self+; otherwise, returns +nil+. + * - #subtract: + * Removes each given object from the set; returns +self+. + * - #delete_if - Removes elements specified by a given block. + * - #select! (aliased as #filter!): + * Removes elements not specified by a given block. + * - #keep_if: + * Removes elements not specified by a given block. + * - #reject! + * Removes elements specified by a given block. + * + * === Methods for Converting + * + * - #classify: + * Returns a hash that classifies the elements, + * as determined by the given block. + * - #collect! (aliased as #map!): + * Replaces each element with a block return-value. + * - #divide: + * Returns a hash that classifies the elements, + * as determined by the given block; + * differs from #classify in that the block may accept + * either one or two arguments. + * - #flatten: + * Returns a new set that is a recursive flattening of +self+. + * - #flatten!: + * Replaces each nested set in +self+ with the elements from that set. + * - #inspect (aliased as #to_s): + * Returns a string displaying the elements. + * - #join: + * Returns a string containing all elements, converted to strings + * as needed, and joined by the given record separator. + * - #to_a: + * Returns an array containing all set elements. + * - #to_set: + * Returns +self+ if given no arguments and no block; + * with a block given, returns a new set consisting of block + * return values. + * + * === Methods for Iterating + * + * - #each: + * Calls the block with each successive element; returns +self+. + * + * === Other Methods + * + * - #reset: + * Resets the internal state; useful if an object + * has been modified while an element in the set. + * + */ +void +Init_Set(void) +{ + rb_cSet = rb_define_class("Set", rb_cObject); + rb_include_module(rb_cSet, rb_mEnumerable); + + id_each_entry = rb_intern_const("each_entry"); + id_any_p = rb_intern_const("any?"); + id_new = rb_intern_const("new"); + id_set_iter_lev = rb_make_internal_id(); + + rb_define_alloc_func(rb_cSet, set_s_alloc); + rb_define_singleton_method(rb_cSet, "[]", set_s_create, -1); + + rb_define_method(rb_cSet, "initialize", set_i_initialize, -1); + rb_define_method(rb_cSet, "initialize_copy", set_i_initialize_copy, 1); + + rb_define_method(rb_cSet, "&", set_i_intersection, 1); + rb_define_alias(rb_cSet, "intersection", "&"); + rb_define_method(rb_cSet, "-", set_i_difference, 1); + rb_define_alias(rb_cSet, "difference", "-"); + rb_define_method(rb_cSet, "^", set_i_xor, 1); + rb_define_method(rb_cSet, "|", set_i_union, 1); + rb_define_alias(rb_cSet, "+", "|"); + rb_define_alias(rb_cSet, "union", "|"); + rb_define_method(rb_cSet, "<=>", set_i_compare, 1); + rb_define_method(rb_cSet, "==", set_i_eq, 1); + rb_define_alias(rb_cSet, "eql?", "=="); + rb_define_method(rb_cSet, "add", set_i_add, 1); + rb_define_alias(rb_cSet, "<<", "add"); + rb_define_method(rb_cSet, "add?", set_i_add_p, 1); + rb_define_method(rb_cSet, "classify", set_i_classify, 0); + rb_define_method(rb_cSet, "clear", set_i_clear, 0); + rb_define_method(rb_cSet, "collect!", set_i_collect, 0); + rb_define_alias(rb_cSet, "map!", "collect!"); + rb_define_method(rb_cSet, "compare_by_identity", set_i_compare_by_identity, 0); + rb_define_method(rb_cSet, "compare_by_identity?", set_i_compare_by_identity_p, 0); + rb_define_method(rb_cSet, "delete", set_i_delete, 1); + rb_define_method(rb_cSet, "delete?", set_i_delete_p, 1); + rb_define_method(rb_cSet, "delete_if", set_i_delete_if, 0); + rb_define_method(rb_cSet, "disjoint?", set_i_disjoint, 1); + rb_define_method(rb_cSet, "divide", set_i_divide, 0); + rb_define_method(rb_cSet, "each", set_i_each, 0); + rb_define_method(rb_cSet, "empty?", set_i_empty, 0); + rb_define_method(rb_cSet, "flatten", set_i_flatten, 0); + rb_define_method(rb_cSet, "flatten!", set_i_flatten_bang, 0); + rb_define_method(rb_cSet, "hash", set_i_hash, 0); + rb_define_method(rb_cSet, "include?", set_i_include, 1); + rb_define_alias(rb_cSet, "member?", "include?"); + rb_define_alias(rb_cSet, "===", "include?"); + rb_define_method(rb_cSet, "inspect", set_i_inspect, 0); + rb_define_alias(rb_cSet, "to_s", "inspect"); + rb_define_method(rb_cSet, "intersect?", set_i_intersect, 1); + rb_define_method(rb_cSet, "join", set_i_join, -1); + rb_define_method(rb_cSet, "keep_if", set_i_keep_if, 0); + rb_define_method(rb_cSet, "merge", set_i_merge, -1); + rb_define_method(rb_cSet, "proper_subset?", set_i_proper_subset, 1); + rb_define_alias(rb_cSet, "<", "proper_subset?"); + rb_define_method(rb_cSet, "proper_superset?", set_i_proper_superset, 1); + rb_define_alias(rb_cSet, ">", "proper_superset?"); + rb_define_method(rb_cSet, "reject!", set_i_reject, 0); + rb_define_method(rb_cSet, "replace", set_i_replace, 1); + rb_define_method(rb_cSet, "reset", set_i_reset, 0); + rb_define_method(rb_cSet, "size", set_i_size, 0); + rb_define_alias(rb_cSet, "length", "size"); + rb_define_method(rb_cSet, "select!", set_i_select, 0); + rb_define_alias(rb_cSet, "filter!", "select!"); + rb_define_method(rb_cSet, "subset?", set_i_subset, 1); + rb_define_alias(rb_cSet, "<=", "subset?"); + rb_define_method(rb_cSet, "subtract", set_i_subtract, 1); + rb_define_method(rb_cSet, "superset?", set_i_superset, 1); + rb_define_alias(rb_cSet, ">=", "superset?"); + rb_define_method(rb_cSet, "to_a", set_i_to_a, 0); + rb_define_method(rb_cSet, "to_set", set_i_to_set, -1); + + rb_provide("set.rb"); +} diff --git a/spec/ruby/core/kernel/require_spec.rb b/spec/ruby/core/kernel/require_spec.rb index e78e7176ec..945f68aba9 100644 --- a/spec/ruby/core/kernel/require_spec.rb +++ b/spec/ruby/core/kernel/require_spec.rb @@ -17,6 +17,9 @@ describe "Kernel#require" do end provided = %w[complex enumerator fiber rational thread ruby2_keywords] + ruby_version_is "3.5" do + provided << "set" + end it "#{provided.join(', ')} are already required" do out = ruby_exe("puts $LOADED_FEATURES", options: '--disable-gems --disable-did-you-mean') diff --git a/spec/ruby/language/predefined_spec.rb b/spec/ruby/language/predefined_spec.rb index f8645493b8..5acfa6d0ab 100644 --- a/spec/ruby/language/predefined_spec.rb +++ b/spec/ruby/language/predefined_spec.rb @@ -1382,9 +1382,9 @@ end describe "$LOAD_PATH.resolve_feature_path" do it "returns what will be loaded without actual loading, .rb file" do - extension, path = $LOAD_PATH.resolve_feature_path('set') + extension, path = $LOAD_PATH.resolve_feature_path('pp') extension.should == :rb - path.should.end_with?('/set.rb') + path.should.end_with?('/pp.rb') end it "returns what will be loaded without actual loading, .so file" do diff --git a/spec/ruby/library/set/compare_by_identity_spec.rb b/spec/ruby/library/set/compare_by_identity_spec.rb index 602d1e758e..ad90cd8a8e 100644 --- a/spec/ruby/library/set/compare_by_identity_spec.rb +++ b/spec/ruby/library/set/compare_by_identity_spec.rb @@ -91,11 +91,22 @@ describe "Set#compare_by_identity" do set.to_a.sort.should == [a1, a2].sort end - it "raises a FrozenError on frozen sets" do - set = Set.new.freeze - -> { - set.compare_by_identity - }.should raise_error(FrozenError, /frozen Hash/) + ruby_version_is "3.5" do + it "raises a FrozenError on frozen sets" do + set = Set.new.freeze + -> { + set.compare_by_identity + }.should raise_error(FrozenError, "can't modify frozen Set: #") + end + end + + ruby_version_is ""..."3.5" do + it "raises a FrozenError on frozen sets" do + set = Set.new.freeze + -> { + set.compare_by_identity + }.should raise_error(FrozenError, /frozen Hash/) + end end it "persists over #dups" do diff --git a/spec/ruby/library/set/divide_spec.rb b/spec/ruby/library/set/divide_spec.rb index 998a1b292c..314d9942eb 100644 --- a/spec/ruby/library/set/divide_spec.rb +++ b/spec/ruby/library/set/divide_spec.rb @@ -26,10 +26,20 @@ describe "Set#divide when passed a block with an arity of 2" do set.map{ |x| x.to_a.sort }.sort.should == [[1], [3, 4], [6], [9, 10, 11]] end - it "yields each two Object to the block" do - ret = [] - Set[1, 2].divide { |x, y| ret << [x, y] } - ret.sort.should == [[1, 1], [1, 2], [2, 1], [2, 2]] + ruby_version_is "3.5" do + it "yields each two Object to the block" do + ret = [] + Set[1, 2].divide { |x, y| ret << [x, y] } + ret.sort.should == [[1, 2], [2, 1]] + end + end + + ruby_version_is ""..."3.5" do + it "yields each two Object to the block" do + ret = [] + Set[1, 2].divide { |x, y| ret << [x, y] } + ret.sort.should == [[1, 1], [1, 2], [2, 1], [2, 2]] + end end it "returns an enumerator when not passed a block" do diff --git a/spec/ruby/library/set/equal_value_spec.rb b/spec/ruby/library/set/equal_value_spec.rb index f5b5f790c0..cac4a99fd5 100644 --- a/spec/ruby/library/set/equal_value_spec.rb +++ b/spec/ruby/library/set/equal_value_spec.rb @@ -25,9 +25,11 @@ describe "Set#==" do set1.should == set2 end - context "when comparing to a Set-like object" do - it "returns true when a Set and a Set-like object contain the same elements" do - Set[1, 2, 3].should == SetSpecs::SetLike.new([1, 2, 3]) + ruby_version_is ""..."3.5" do + context "when comparing to a Set-like object" do + it "returns true when a Set and a Set-like object contain the same elements" do + Set[1, 2, 3].should == SetSpecs::SetLike.new([1, 2, 3]) + end end end end diff --git a/spec/ruby/library/set/flatten_merge_spec.rb b/spec/ruby/library/set/flatten_merge_spec.rb index f2c99a9481..a0883ebc9d 100644 --- a/spec/ruby/library/set/flatten_merge_spec.rb +++ b/spec/ruby/library/set/flatten_merge_spec.rb @@ -2,22 +2,24 @@ require_relative '../../spec_helper' require 'set' describe "Set#flatten_merge" do - it "is protected" do - Set.should have_protected_instance_method("flatten_merge") - end + ruby_version_is ""..."3.5" do + it "is protected" do + Set.should have_protected_instance_method("flatten_merge") + end - it "flattens the passed Set and merges it into self" do - set1 = Set[1, 2] - set2 = Set[3, 4, Set[5, 6]] + it "flattens the passed Set and merges it into self" do + set1 = Set[1, 2] + set2 = Set[3, 4, Set[5, 6]] - set1.send(:flatten_merge, set2).should == Set[1, 2, 3, 4, 5, 6] - end + set1.send(:flatten_merge, set2).should == Set[1, 2, 3, 4, 5, 6] + end - it "raises an ArgumentError when trying to flatten a recursive Set" do - set1 = Set[1, 2, 3] - set2 = Set[5, 6, 7] - set2 << set2 + it "raises an ArgumentError when trying to flatten a recursive Set" do + set1 = Set[1, 2, 3] + set2 = Set[5, 6, 7] + set2 << set2 - -> { set1.send(:flatten_merge, set2) }.should raise_error(ArgumentError) + -> { set1.send(:flatten_merge, set2) }.should raise_error(ArgumentError) + end end end diff --git a/spec/ruby/library/set/flatten_spec.rb b/spec/ruby/library/set/flatten_spec.rb index 51b58d6439..c075225181 100644 --- a/spec/ruby/library/set/flatten_spec.rb +++ b/spec/ruby/library/set/flatten_spec.rb @@ -17,9 +17,11 @@ describe "Set#flatten" do -> { set.flatten }.should raise_error(ArgumentError) end - context "when Set contains a Set-like object" do - it "returns a copy of self with each included Set-like object flattened" do - Set[SetSpecs::SetLike.new([1])].flatten.should == Set[1] + ruby_version_is ""..."3.5" do + context "when Set contains a Set-like object" do + it "returns a copy of self with each included Set-like object flattened" do + Set[SetSpecs::SetLike.new([1])].flatten.should == Set[1] + end end end end @@ -47,9 +49,11 @@ describe "Set#flatten!" do end version_is(set_version, ""..."1.1.0") do #ruby_version_is ""..."3.3" do - context "when Set contains a Set-like object" do - it "flattens self, including Set-like objects" do - Set[SetSpecs::SetLike.new([1])].flatten!.should == Set[1] + ruby_version_is ""..."3.5" do + context "when Set contains a Set-like object" do + it "flattens self, including Set-like objects" do + Set[SetSpecs::SetLike.new([1])].flatten!.should == Set[1] + end end end end diff --git a/spec/ruby/library/set/hash_spec.rb b/spec/ruby/library/set/hash_spec.rb index c5bab73931..9be487a2b1 100644 --- a/spec/ruby/library/set/hash_spec.rb +++ b/spec/ruby/library/set/hash_spec.rb @@ -11,8 +11,10 @@ describe "Set#hash" do Set[1, 2, 3].hash.should_not == Set[:a, "b", ?c].hash end - # see https://github.com/jruby/jruby/issues/8393 - it "is equal to nil.hash for an uninitialized Set" do - Set.allocate.hash.should == nil.hash + ruby_version_is ""..."3.5" do + # see https://github.com/jruby/jruby/issues/8393 + it "is equal to nil.hash for an uninitialized Set" do + Set.allocate.hash.should == nil.hash + end end end diff --git a/spec/ruby/library/set/join_spec.rb b/spec/ruby/library/set/join_spec.rb index 3f511a84e4..a37f35947a 100644 --- a/spec/ruby/library/set/join_spec.rb +++ b/spec/ruby/library/set/join_spec.rb @@ -21,9 +21,11 @@ describe "Set#join" do set.join(' | ').should == "a | b | c" end - it "calls #to_a to convert the Set in to an Array" do - set = Set[:a, :b, :c] - set.should_receive(:to_a).and_return([:a, :b, :c]) - set.join.should == "abc" + ruby_version_is ""..."3.5" do + it "calls #to_a to convert the Set in to an Array" do + set = Set[:a, :b, :c] + set.should_receive(:to_a).and_return([:a, :b, :c]) + set.join.should == "abc" + end end end diff --git a/spec/ruby/library/set/proper_subset_spec.rb b/spec/ruby/library/set/proper_subset_spec.rb index 6b51dedc9f..e58b23f5ff 100644 --- a/spec/ruby/library/set/proper_subset_spec.rb +++ b/spec/ruby/library/set/proper_subset_spec.rb @@ -35,9 +35,11 @@ describe "Set#proper_subset?" do end version_is(set_version, ""..."1.1.0") do #ruby_version_is ""..."3.3" do - context "when comparing to a Set-like object" do - it "returns true if passed a Set-like object that self is a proper subset of" do - Set[1, 2, 3].proper_subset?(SetSpecs::SetLike.new([1, 2, 3, 4])).should be_true + ruby_version_is ""..."3.5" do + context "when comparing to a Set-like object" do + it "returns true if passed a Set-like object that self is a proper subset of" do + Set[1, 2, 3].proper_subset?(SetSpecs::SetLike.new([1, 2, 3, 4])).should be_true + end end end end diff --git a/spec/ruby/library/set/proper_superset_spec.rb b/spec/ruby/library/set/proper_superset_spec.rb index a386c8c097..fbd94fb75d 100644 --- a/spec/ruby/library/set/proper_superset_spec.rb +++ b/spec/ruby/library/set/proper_superset_spec.rb @@ -33,9 +33,11 @@ describe "Set#proper_superset?" do -> { Set[].proper_superset?(Object.new) }.should raise_error(ArgumentError) end - context "when comparing to a Set-like object" do - it "returns true if passed a Set-like object that self is a proper superset of" do - Set[1, 2, 3, 4].proper_superset?(SetSpecs::SetLike.new([1, 2, 3])).should be_true + ruby_version_is ""..."3.5" do + context "when comparing to a Set-like object" do + it "returns true if passed a Set-like object that self is a proper superset of" do + Set[1, 2, 3, 4].proper_superset?(SetSpecs::SetLike.new([1, 2, 3])).should be_true + end end end end diff --git a/spec/ruby/library/set/subset_spec.rb b/spec/ruby/library/set/subset_spec.rb index 85666d633f..0494aa25cb 100644 --- a/spec/ruby/library/set/subset_spec.rb +++ b/spec/ruby/library/set/subset_spec.rb @@ -35,9 +35,11 @@ describe "Set#subset?" do end version_is(set_version, ""..."1.1.0") do #ruby_version_is ""..."3.3" do - context "when comparing to a Set-like object" do - it "returns true if passed a Set-like object that self is a subset of" do - Set[1, 2, 3].subset?(SetSpecs::SetLike.new([1, 2, 3, 4])).should be_true + ruby_version_is ""..."3.5" do + context "when comparing to a Set-like object" do + it "returns true if passed a Set-like object that self is a subset of" do + Set[1, 2, 3].subset?(SetSpecs::SetLike.new([1, 2, 3, 4])).should be_true + end end end end diff --git a/spec/ruby/library/set/superset_spec.rb b/spec/ruby/library/set/superset_spec.rb index bd9d2f3eee..46fdc358e4 100644 --- a/spec/ruby/library/set/superset_spec.rb +++ b/spec/ruby/library/set/superset_spec.rb @@ -33,9 +33,11 @@ describe "Set#superset?" do -> { Set[].superset?(Object.new) }.should raise_error(ArgumentError) end - context "when comparing to a Set-like object" do - it "returns true if passed a Set-like object that self is a superset of" do - Set[1, 2, 3, 4].superset?(SetSpecs::SetLike.new([1, 2, 3])).should be_true + ruby_version_is ""..."3.5" do + context "when comparing to a Set-like object" do + it "returns true if passed a Set-like object that self is a superset of" do + Set[1, 2, 3, 4].superset?(SetSpecs::SetLike.new([1, 2, 3])).should be_true + end end end end diff --git a/st.c b/st.c index f41120ecaf..3672fa9a80 100644 --- a/st.c +++ b/st.c @@ -109,6 +109,7 @@ #include "internal/bits.h" #include "internal/hash.h" #include "internal/sanitizers.h" +#include "internal/set_table.h" #include "internal/st.h" #include "ruby_assert.h" #endif @@ -2332,4 +2333,880 @@ rb_st_compact_table(st_table *tab) } } +/* + * set_table related code + */ + +struct set_table_entry { + st_hash_t hash; + st_data_t key; +}; + +/* Return hash value of KEY for table TAB. */ +static inline st_hash_t +set_do_hash(st_data_t key, set_table *tab) +{ + st_hash_t hash = (st_hash_t)(tab->type->hash)(key); + return normalize_hash_value(hash); +} + +/* Return bin size index of table TAB. */ +static inline unsigned int +set_get_size_ind(const set_table *tab) +{ + return tab->size_ind; +} + +/* Return the number of allocated bins of table TAB. */ +static inline st_index_t +set_get_bins_num(const set_table *tab) +{ + return ((st_index_t) 1)<bin_power; +} + +/* Return mask for a bin index in table TAB. */ +static inline st_index_t +set_bins_mask(const set_table *tab) +{ + return set_get_bins_num(tab) - 1; +} + +/* Return the index of table TAB bin corresponding to + HASH_VALUE. */ +static inline st_index_t +set_hash_bin(st_hash_t hash_value, set_table *tab) +{ + return hash_value & set_bins_mask(tab); +} + +/* Return the number of allocated entries of table TAB. */ +static inline st_index_t +set_get_allocated_entries(const set_table *tab) +{ + return ((st_index_t) 1)<entry_power; +} + +/* Return size of the allocated bins of table TAB. */ +static inline st_index_t +set_bins_size(const set_table *tab) +{ + return features[tab->entry_power].bins_words * sizeof (st_index_t); +} + +/* Mark all bins of table TAB as empty. */ +static void +set_initialize_bins(set_table *tab) +{ + memset(tab->bins, 0, set_bins_size(tab)); +} + +/* Make table TAB empty. */ +static void +set_make_tab_empty(set_table *tab) +{ + tab->num_entries = 0; + tab->entries_start = tab->entries_bound = 0; + if (tab->bins != NULL) + set_initialize_bins(tab); +} + +static set_table * +set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size) +{ + int n; + +#ifdef HASH_LOG +#if HASH_LOG+0 < 0 + { + const char *e = getenv("ST_HASH_LOG"); + if (!e || !*e) init_st = 1; + } +#endif + if (init_st == 0) { + init_st = 1; + atexit(stat_col); + } +#endif + + n = get_power2(size); + + tab->type = type; + tab->entry_power = n; + tab->bin_power = features[n].bin_power; + tab->size_ind = features[n].size_ind; + if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) + tab->bins = NULL; + else { + tab->bins = (st_index_t *) malloc(set_bins_size(tab)); + } + tab->entries = (set_table_entry *) malloc(set_get_allocated_entries(tab) + * sizeof(set_table_entry)); + set_make_tab_empty(tab); + tab->rebuilds_num = 0; + return tab; +} + +/* Create and return table with TYPE which can hold at least SIZE + entries. The real number of entries which the table can hold is + the nearest power of two for SIZE. */ +set_table * +set_init_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size) +{ + if (tab == NULL) tab = malloc(sizeof(set_table)); + + set_init_existing_table_with_size(tab, type, size); + + return tab; +} + +size_t +set_table_size(const struct set_table *tbl) +{ + return tbl->num_entries; +} + +/* Make table TAB empty. */ +void +set_clear(set_table *tab) +{ + set_make_tab_empty(tab); + tab->rebuilds_num++; +} + +/* Free table TAB space. This should only be used if you passed NULL to + set_init_table_with_size/set_copy when creating the table. */ +void +set_free_table(set_table *tab) +{ + free(tab->bins); + free(tab->entries); + free(tab); +} + +/* Return byte size of memory allocated for table TAB. */ +size_t +set_memsize(const set_table *tab) +{ + return(sizeof(set_table) + + (tab->bins == NULL ? 0 : set_bins_size(tab)) + + set_get_allocated_entries(tab) * sizeof(set_table_entry)); +} + +static st_index_t +set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key); + +static st_index_t +set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key); + +static st_index_t +set_find_table_bin_ind_direct(set_table *table, st_hash_t hash_value, st_data_t key); + +static st_index_t +set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value, + st_data_t key, st_index_t *bin_ind); + +static void set_rebuild_table_with(set_table *const new_tab, set_table *const tab); +static void set_rebuild_move_table(set_table *const new_tab, set_table *const tab); +static void set_rebuild_cleanup(set_table *const tab); + +/* Rebuild table TAB. Rebuilding removes all deleted bins and entries + and can change size of the table entries and bins arrays. + Rebuilding is implemented by creation of a new table or by + compaction of the existing one. */ +static void +set_rebuild_table(set_table *tab) +{ + if ((2 * tab->num_entries <= set_get_allocated_entries(tab) + && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab)) + || tab->num_entries < (1 << MINIMAL_POWER2)) { + /* Compaction: */ + tab->num_entries = 0; + if (tab->bins != NULL) + set_initialize_bins(tab); + set_rebuild_table_with(tab, tab); + } + else { + set_table *new_tab; + /* This allocation could trigger GC and compaction. If tab is the + * gen_iv_tbl, then tab could have changed in size due to objects being + * freed and/or moved. Do not store attributes of tab before this line. */ + new_tab = set_init_table_with_size(NULL, tab->type, + 2 * tab->num_entries - 1); + set_rebuild_table_with(new_tab, tab); + set_rebuild_move_table(new_tab, tab); + } + set_rebuild_cleanup(tab); +} + +static void +set_rebuild_table_with(set_table *const new_tab, set_table *const tab) +{ + st_index_t i, ni; + unsigned int size_ind; + set_table_entry *new_entries; + set_table_entry *curr_entry_ptr; + st_index_t *bins; + st_index_t bin_ind; + + new_entries = new_tab->entries; + + ni = 0; + bins = new_tab->bins; + size_ind = set_get_size_ind(new_tab); + st_index_t bound = tab->entries_bound; + set_table_entry *entries = tab->entries; + + for (i = tab->entries_start; i < bound; i++) { + curr_entry_ptr = &entries[i]; + PREFETCH(entries + i + 1, 0); + if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0)) + continue; + if (&new_entries[ni] != curr_entry_ptr) + new_entries[ni] = *curr_entry_ptr; + if (EXPECT(bins != NULL, 1)) { + bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash, + curr_entry_ptr->key); + set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE); + } + new_tab->num_entries++; + ni++; + } + + assert(new_tab->num_entries == tab->num_entries); +} + +static void +set_rebuild_move_table(set_table *const new_tab, set_table *const tab) +{ + tab->entry_power = new_tab->entry_power; + tab->bin_power = new_tab->bin_power; + tab->size_ind = new_tab->size_ind; + free(tab->bins); + tab->bins = new_tab->bins; + free(tab->entries); + tab->entries = new_tab->entries; + free(new_tab); +} + +static void +set_rebuild_cleanup(set_table *const tab) +{ + tab->entries_start = 0; + tab->entries_bound = tab->num_entries; + tab->rebuilds_num++; +} + +/* Return the next secondary hash index for table TAB using previous + index IND and PERTURB. Finally modulo of the function becomes a + full *cycle linear congruential generator*, in other words it + guarantees traversing all table bins in extreme case. + + According the Hull-Dobell theorem a generator + "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if + o m and c are relatively prime + o a-1 is divisible by all prime factors of m + o a-1 is divisible by 4 if m is divisible by 4. + + For our case a is 5, c is 1, and m is a power of two. */ +static inline st_index_t +set_secondary_hash(st_index_t ind, set_table *tab, st_index_t *perturb) +{ + *perturb >>= 11; + ind = (ind << 2) + ind + *perturb + 1; + return set_hash_bin(ind, tab); +} + +/* Find an entry with HASH_VALUE and KEY in TABLE using a linear + search. Return the index of the found entry in array `entries`. + If it is not found, return UNDEFINED_ENTRY_IND. If the table was + rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */ +static inline st_index_t +set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key) +{ + int eq_p, rebuilt_p; + st_index_t i, bound; + set_table_entry *entries; + + bound = tab->entries_bound; + entries = tab->entries; + for (i = tab->entries_start; i < bound; i++) { + DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p); + if (EXPECT(rebuilt_p, 0)) + return REBUILT_TABLE_ENTRY_IND; + if (eq_p) + return i; + } + return UNDEFINED_ENTRY_IND; +} + +/* Use the quadratic probing. The method has a better data locality + but more collisions than the current approach. In average it + results in a bit slower search. */ +/*#define QUADRATIC_PROBE*/ + +/* Return index of entry with HASH_VALUE and KEY in table TAB. If + there is no such entry, return UNDEFINED_ENTRY_IND. If the table + was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */ +static st_index_t +set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key) +{ + int eq_p, rebuilt_p; + st_index_t ind; +#ifdef QUADRATIC_PROBE + st_index_t d; +#else + st_index_t perturb; +#endif + st_index_t bin; + set_table_entry *entries = tab->entries; + + ind = set_hash_bin(hash_value, tab); +#ifdef QUADRATIC_PROBE + d = 1; +#else + perturb = hash_value; +#endif + for (;;) { + bin = get_bin(tab->bins, set_get_size_ind(tab), ind); + if (! EMPTY_OR_DELETED_BIN_P(bin)) { + DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); + if (EXPECT(rebuilt_p, 0)) + return REBUILT_TABLE_ENTRY_IND; + if (eq_p) + break; + } + else if (EMPTY_BIN_P(bin)) + return UNDEFINED_ENTRY_IND; +#ifdef QUADRATIC_PROBE + ind = set_hash_bin(ind + d, tab); + d++; +#else + ind = set_secondary_hash(ind, tab, &perturb); +#endif + } + return bin; +} + +/* Find and return index of table TAB bin corresponding to an entry + with HASH_VALUE and KEY. If there is no such bin, return + UNDEFINED_BIN_IND. If the table was rebuilt during the search, + return REBUILT_TABLE_BIN_IND. */ +static st_index_t +set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key) +{ + int eq_p, rebuilt_p; + st_index_t ind; +#ifdef QUADRATIC_PROBE + st_index_t d; +#else + st_index_t perturb; +#endif + st_index_t bin; + set_table_entry *entries = tab->entries; + + ind = set_hash_bin(hash_value, tab); +#ifdef QUADRATIC_PROBE + d = 1; +#else + perturb = hash_value; +#endif + for (;;) { + bin = get_bin(tab->bins, set_get_size_ind(tab), ind); + if (! EMPTY_OR_DELETED_BIN_P(bin)) { + DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); + if (EXPECT(rebuilt_p, 0)) + return REBUILT_TABLE_BIN_IND; + if (eq_p) + break; + } + else if (EMPTY_BIN_P(bin)) + return UNDEFINED_BIN_IND; +#ifdef QUADRATIC_PROBE + ind = set_hash_bin(ind + d, tab); + d++; +#else + ind = set_secondary_hash(ind, tab, &perturb); +#endif + } + return ind; +} + +/* Find and return index of table TAB bin corresponding to an entry + with HASH_VALUE and KEY. The entry should be in the table + already. */ +static st_index_t +set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t key) +{ + st_index_t ind; +#ifdef QUADRATIC_PROBE + st_index_t d; +#else + st_index_t perturb; +#endif + st_index_t bin; + + ind = set_hash_bin(hash_value, tab); +#ifdef QUADRATIC_PROBE + d = 1; +#else + perturb = hash_value; +#endif + for (;;) { + bin = get_bin(tab->bins, set_get_size_ind(tab), ind); + if (EMPTY_OR_DELETED_BIN_P(bin)) + return ind; +#ifdef QUADRATIC_PROBE + ind = set_hash_bin(ind + d, tab); + d++; +#else + ind = set_secondary_hash(ind, tab, &perturb); +#endif + } +} + +/* Mark I-th bin of table TAB as empty, in other words not + corresponding to any entry. */ +#define MARK_SET_BIN_EMPTY(tab, i) (set_bin((tab)->bins, set_get_size_ind(tab), i, EMPTY_BIN)) + +/* Return index of table TAB bin for HASH_VALUE and KEY through + BIN_IND and the pointed value as the function result. Reserve the + bin for inclusion of the corresponding entry into the table if it + is not there yet. We always find such bin as bins array length is + bigger entries array. Although we can reuse a deleted bin, the + result bin value is always empty if the table has no entry with + KEY. Return the entries array index of the found entry or + UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt + during the search, return REBUILT_TABLE_ENTRY_IND. */ +static st_index_t +set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value, + st_data_t key, st_index_t *bin_ind) +{ + int eq_p, rebuilt_p; + st_index_t ind; + st_hash_t curr_hash_value = *hash_value; +#ifdef QUADRATIC_PROBE + st_index_t d; +#else + st_index_t perturb; +#endif + st_index_t entry_index; + st_index_t firset_deleted_bin_ind; + set_table_entry *entries; + + ind = set_hash_bin(curr_hash_value, tab); +#ifdef QUADRATIC_PROBE + d = 1; +#else + perturb = curr_hash_value; +#endif + firset_deleted_bin_ind = UNDEFINED_BIN_IND; + entries = tab->entries; + for (;;) { + entry_index = get_bin(tab->bins, set_get_size_ind(tab), ind); + if (EMPTY_BIN_P(entry_index)) { + tab->num_entries++; + entry_index = UNDEFINED_ENTRY_IND; + if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) { + /* We can reuse bin of a deleted entry. */ + ind = firset_deleted_bin_ind; + MARK_SET_BIN_EMPTY(tab, ind); + } + break; + } + else if (! DELETED_BIN_P(entry_index)) { + DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p); + if (EXPECT(rebuilt_p, 0)) + return REBUILT_TABLE_ENTRY_IND; + if (eq_p) + break; + } + else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND) + firset_deleted_bin_ind = ind; +#ifdef QUADRATIC_PROBE + ind = set_hash_bin(ind + d, tab); + d++; +#else + ind = set_secondary_hash(ind, tab, &perturb); +#endif + } + *bin_ind = ind; + return entry_index; +} + +/* Find an entry with KEY in table TAB. Return non-zero if we found + it. */ +int +set_lookup(set_table *tab, st_data_t key) +{ + st_index_t bin; + st_hash_t hash = set_do_hash(key, tab); + + retry: + if (tab->bins == NULL) { + bin = set_find_entry(tab, hash, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; + if (bin == UNDEFINED_ENTRY_IND) + return 0; + } + else { + bin = set_find_table_entry_ind(tab, hash, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; + if (bin == UNDEFINED_ENTRY_IND) + return 0; + bin -= ENTRY_BASE; + } + return 1; +} + +/* Check the table and rebuild it if it is necessary. */ +static inline void +set_rebuild_table_if_necessary (set_table *tab) +{ + st_index_t bound = tab->entries_bound; + + if (bound == set_get_allocated_entries(tab)) + set_rebuild_table(tab); +} + +/* Insert KEY into table TAB and return zero. If there is + already entry with KEY in the table, return nonzero and update + the value of the found entry. */ +int +set_insert(set_table *tab, st_data_t key) +{ + set_table_entry *entry; + st_index_t bin; + st_index_t ind; + st_hash_t hash_value; + st_index_t bin_ind; + int new_p; + + hash_value = set_do_hash(key, tab); + retry: + set_rebuild_table_if_necessary(tab); + if (tab->bins == NULL) { + bin = set_find_entry(tab, hash_value, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; + new_p = bin == UNDEFINED_ENTRY_IND; + if (new_p) + tab->num_entries++; + bin_ind = UNDEFINED_BIN_IND; + } + else { + bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value, + key, &bin_ind); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; + new_p = bin == UNDEFINED_ENTRY_IND; + bin -= ENTRY_BASE; + } + if (new_p) { + ind = tab->entries_bound++; + entry = &tab->entries[ind]; + entry->hash = hash_value; + entry->key = key; + if (bin_ind != UNDEFINED_BIN_IND) + set_bin(tab->bins, set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE); + return 0; + } + return 1; +} + +/* Insert (KEY, HASH) into table TAB. The table should not have + entry with KEY before the insertion. */ +static inline void +set_add_direct_with_hash(set_table *tab, + st_data_t key, st_hash_t hash) +{ + set_table_entry *entry; + st_index_t ind; + st_index_t bin_ind; + + assert(hash != RESERVED_HASH_VAL); + + set_rebuild_table_if_necessary(tab); + ind = tab->entries_bound++; + entry = &tab->entries[ind]; + entry->hash = hash; + entry->key = key; + tab->num_entries++; + if (tab->bins != NULL) { + bin_ind = set_find_table_bin_ind_direct(tab, hash, key); + set_bin(tab->bins, set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE); + } +} + +/* Create a copy of old_tab into new_tab. */ +static set_table * +set_replace(set_table *new_tab, set_table *old_tab) +{ + *new_tab = *old_tab; + if (old_tab->bins == NULL) + new_tab->bins = NULL; + else { + new_tab->bins = (st_index_t *) malloc(set_bins_size(old_tab)); + } + new_tab->entries = (set_table_entry *) malloc(set_get_allocated_entries(old_tab) + * sizeof(set_table_entry)); + MEMCPY(new_tab->entries, old_tab->entries, set_table_entry, + set_get_allocated_entries(old_tab)); + if (old_tab->bins != NULL) + MEMCPY(new_tab->bins, old_tab->bins, char, set_bins_size(old_tab)); + + return new_tab; +} + +/* Create and return a copy of table OLD_TAB. */ +set_table * +set_copy(set_table *new_tab, set_table *old_tab) +{ + if (new_tab == NULL) new_tab = (set_table *) malloc(sizeof(set_table)); + + if (set_replace(new_tab, old_tab) == NULL) { + set_free_table(new_tab); + return NULL; + } + + return new_tab; +} + +/* Update the entries start of table TAB after removing an entry + with index N in the array entries. */ +static inline void +set_update_range_for_deleted(set_table *tab, st_index_t n) +{ + /* Do not update entries_bound here. Otherwise, we can fill all + bins by deleted entry value before rebuilding the table. */ + if (tab->entries_start == n) { + st_index_t start = n + 1; + st_index_t bound = tab->entries_bound; + set_table_entry *entries = tab->entries; + while (start < bound && DELETED_ENTRY_P(&entries[start])) start++; + tab->entries_start = start; + } +} + +/* Mark I-th bin of table TAB as corresponding to a deleted table + entry. Update number of entries in the table and number of bins + corresponding to deleted entries. */ +#define MARK_SET_BIN_DELETED(tab, i) \ + do { \ + set_bin((tab)->bins, set_get_size_ind(tab), i, DELETED_BIN); \ + } while (0) + +/* Delete entry with KEY from table TAB, and return non-zero. If + there is no entry with KEY in the table, return zero. */ +int +set_delete(set_table *tab, st_data_t *key) +{ + set_table_entry *entry; + st_index_t bin; + st_index_t bin_ind; + st_hash_t hash; + + hash = set_do_hash(*key, tab); + retry: + if (tab->bins == NULL) { + bin = set_find_entry(tab, hash, *key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; + if (bin == UNDEFINED_ENTRY_IND) { + return 0; + } + } + else { + bin_ind = set_find_table_bin_ind(tab, hash, *key); + if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) + goto retry; + if (bin_ind == UNDEFINED_BIN_IND) { + return 0; + } + bin = get_bin(tab->bins, set_get_size_ind(tab), bin_ind) - ENTRY_BASE; + MARK_SET_BIN_DELETED(tab, bin_ind); + } + entry = &tab->entries[bin]; + *key = entry->key; + MARK_ENTRY_DELETED(entry); + tab->num_entries--; + set_update_range_for_deleted(tab, bin); + return 1; +} + +/* Traverse all entries in table TAB calling FUNC with current entry + key and zero. If the call returns ST_STOP, stop + traversing. If the call returns ST_DELETE, delete the current + entry from the table. In case of ST_CHECK or ST_CONTINUE, continue + traversing. The function returns zero unless an error is found. + CHECK_P is flag of set_foreach_check call. The behavior is a bit + different for ST_CHECK and when the current element is removed + during traversing. */ +static inline int +set_general_foreach(set_table *tab, set_foreach_check_callback_func *func, + set_update_callback_func *replace, st_data_t arg, + int check_p) +{ + st_index_t bin; + st_index_t bin_ind; + set_table_entry *entries, *curr_entry_ptr; + enum st_retval retval; + st_index_t i, rebuilds_num; + st_hash_t hash; + st_data_t key; + int error_p, packed_p = tab->bins == NULL; + + entries = tab->entries; + /* The bound can change inside the loop even without rebuilding + the table, e.g. by an entry insertion. */ + for (i = tab->entries_start; i < tab->entries_bound; i++) { + curr_entry_ptr = &entries[i]; + if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0)) + continue; + key = curr_entry_ptr->key; + rebuilds_num = tab->rebuilds_num; + hash = curr_entry_ptr->hash; + retval = (*func)(key, arg, 0); + + if (retval == ST_REPLACE && replace) { + retval = (*replace)(&key, arg, TRUE); + curr_entry_ptr->key = key; + } + + if (rebuilds_num != tab->rebuilds_num) { + retry: + entries = tab->entries; + packed_p = tab->bins == NULL; + if (packed_p) { + i = set_find_entry(tab, hash, key); + if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; + error_p = i == UNDEFINED_ENTRY_IND; + } + else { + i = set_find_table_entry_ind(tab, hash, key); + if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0)) + goto retry; + error_p = i == UNDEFINED_ENTRY_IND; + i -= ENTRY_BASE; + } + if (error_p && check_p) { + /* call func with error notice */ + retval = (*func)(0, arg, 1); + return 1; + } + curr_entry_ptr = &entries[i]; + } + switch (retval) { + case ST_REPLACE: + break; + case ST_CONTINUE: + break; + case ST_CHECK: + if (check_p) + break; + case ST_STOP: + return 0; + case ST_DELETE: { + st_data_t key = curr_entry_ptr->key; + + again: + if (packed_p) { + bin = set_find_entry(tab, hash, key); + if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) + goto again; + if (bin == UNDEFINED_ENTRY_IND) + break; + } + else { + bin_ind = set_find_table_bin_ind(tab, hash, key); + if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) + goto again; + if (bin_ind == UNDEFINED_BIN_IND) + break; + bin = get_bin(tab->bins, set_get_size_ind(tab), bin_ind) - ENTRY_BASE; + MARK_SET_BIN_DELETED(tab, bin_ind); + } + curr_entry_ptr = &entries[bin]; + MARK_ENTRY_DELETED(curr_entry_ptr); + tab->num_entries--; + set_update_range_for_deleted(tab, bin); + break; + } + } + } + return 0; +} + +int +set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg) +{ + return set_general_foreach(tab, func, replace, arg, TRUE); +} + +struct set_functor { + set_foreach_callback_func *func; + st_data_t arg; +}; + +static int +set_apply_functor(st_data_t k, st_data_t d, int _) +{ + const struct set_functor *f = (void *)d; + return f->func(k, f->arg); +} + +int +set_foreach(set_table *tab, set_foreach_callback_func *func, st_data_t arg) +{ + const struct set_functor f = { func, arg }; + return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE); +} + +/* See comments for function set_delete_safe. */ +int +set_foreach_check(set_table *tab, set_foreach_check_callback_func *func, st_data_t arg, + st_data_t never ATTRIBUTE_UNUSED) +{ + return set_general_foreach(tab, func, NULL, arg, TRUE); +} + +/* Set up array KEYS by at most SIZE keys of head table TAB entries. + Return the number of keys set up in array KEYS. */ +inline st_index_t +set_keys(set_table *tab, st_data_t *keys, st_index_t size) +{ + st_index_t i, bound; + st_data_t key, *keys_start, *keys_end; + set_table_entry *curr_entry_ptr, *entries = tab->entries; + + bound = tab->entries_bound; + keys_start = keys; + keys_end = keys + size; + for (i = tab->entries_start; i < bound; i++) { + if (keys == keys_end) + break; + curr_entry_ptr = &entries[i]; + key = curr_entry_ptr->key; + if (! DELETED_ENTRY_P(curr_entry_ptr)) + *keys++ = key; + } + + return keys - keys_start; +} + +void +set_compact_table(set_table *tab) +{ + st_index_t num = tab->num_entries; + if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) { + /* Compaction: */ + set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num); + set_rebuild_table_with(new_tab, tab); + set_rebuild_move_table(new_tab, tab); + set_rebuild_cleanup(tab); + } +} + #endif diff --git a/test/ruby/test_require_lib.rb b/test/ruby/test_require_lib.rb index 81c2fdf833..44dfbcf9ec 100644 --- a/test/ruby/test_require_lib.rb +++ b/test/ruby/test_require_lib.rb @@ -13,7 +13,7 @@ class TestRequireLib < Test::Unit::TestCase scripts.concat(Dir.glob(dirs.map {|d| d + '/*.rb'}, base: libdir).map {|f| f.chomp('.rb')}) # skip some problems - scripts -= %w[bundler bundled_gems rubygems mkmf] + scripts -= %w[bundler bundled_gems rubygems mkmf set/sorted_set] scripts.each do |lib| define_method "test_thread_size:#{lib}" do diff --git a/test/set/test_set.rb b/test/ruby/test_set.rb similarity index 100% rename from test/set/test_set.rb rename to test/ruby/test_set.rb diff --git a/tool/test-bundled-gems.rb b/tool/test-bundled-gems.rb index a71d7dce7e..b3a220b43b 100644 --- a/tool/test-bundled-gems.rb +++ b/tool/test-bundled-gems.rb @@ -10,7 +10,7 @@ github_actions = ENV["GITHUB_ACTIONS"] == "true" allowed_failures = ENV['TEST_BUNDLED_GEMS_ALLOW_FAILURES'] || '' if RUBY_PLATFORM =~ /mswin|mingw/ - allowed_failures = [allowed_failures, "rbs,debug,irb"].join(',') + allowed_failures = [allowed_failures, "rbs,debug,irb,repl_type_completor"].join(',') end allowed_failures = allowed_failures.split(',').uniq.reject(&:empty?)