From 65a8185eb212639875ae8db14dfffb1fa06b71e9 Mon Sep 17 00:00:00 2001 From: matz Date: Tue, 22 Jan 2008 03:59:53 +0000 Subject: [PATCH] * configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from MINIRUBY since miniruby might not be able to load DLL. * test/ruby/test_m17n.rb: move tests from bootstrap test. * encoding.c (enc_find): should check name if ASCII compatible. * string.c (rb_str_end_with): should check character boundary. * encoding.c (rb_enc_compatible): encoding must be ASCII compatible before checking ENC_CODERANGE_7BIT. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15167 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 14 +++ bootstraptest/test_m17n.rb | 200 ------------------------------------- configure.in | 2 +- encoding.c | 22 ++-- string.c | 11 +- test/ruby/test_m17n.rb | 109 ++++++++++++++++++-- 6 files changed, 137 insertions(+), 221 deletions(-) delete mode 100644 bootstraptest/test_m17n.rb diff --git a/ChangeLog b/ChangeLog index 04b1256042..719fa74b1e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +Tue Jan 22 12:57:07 2008 Yukihiro Matsumoto + + * configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from + MINIRUBY since miniruby might not be able to load DLL. + + * test/ruby/test_m17n.rb: move tests from bootstrap test. + + * encoding.c (enc_find): should check name if ASCII compatible. + + * string.c (rb_str_end_with): should check character boundary. + + * encoding.c (rb_enc_compatible): encoding must be ASCII + compatible before checking ENC_CODERANGE_7BIT. + Tue Jan 22 09:26:47 2008 Nobuyoshi Nakada * string.c (rb_str_each_char): iterates over a shadow. diff --git a/bootstraptest/test_m17n.rb b/bootstraptest/test_m17n.rb deleted file mode 100644 index d69a110f3d..0000000000 --- a/bootstraptest/test_m17n.rb +++ /dev/null @@ -1,200 +0,0 @@ -assert_normal_exit %q{ - "abcdefghij\xf0".force_encoding("utf-8").reverse.inspect -}, '[ruby-dev:32448]' - -assert_equal 'true', %q{ - "abc".sub(/b/, "\xa1\xa1".force_encoding("euc-jp")) == - "a\xa1\xa1c".force_encoding("euc-jp") -} - -assert_equal 'ok', %q{ - begin - if ("\xa1\xa2\xa1\xa3").force_encoding("euc-jp").split(//) == - ["\xa1\xa2".force_encoding("euc-jp"), "\xa1\xa3".force_encoding("euc-jp")] - :ok - else - :ng - end - rescue - :ng - end -}, '[ruby-dev:32452]' - -assert_equal 'ok', %q{ - begin - "\xa1\xa1".force_encoding("euc-jp") + "\xa1".force_encoding("ascii-8bit") - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'ok', %q{ - begin - "%s%s" % ["\xc2\xa1".force_encoding("sjis"), "\xc2\xa1".force_encoding("euc-jp")] - rescue ArgumentError - :ok - end -} - -assert_equal '0', %q{ - "\xa1\xa2".force_encoding("euc-jp").count("z") -} - -assert_equal '1', %q{ - "\xa1\xa2".force_encoding("euc-jp").delete("z").length -} - -assert_equal 'false', %q{ - "\xa1\xa2\xa3\xa4".force_encoding("euc-jp").include?("\xa3".force_encoding("euc-jp")) -} - -assert_equal 'ok', %q{ - "\xa1\xa2\xa3\xa4".force_encoding("euc-jp").index("\xa3".force_encoding("euc-jp")) or :ok -} - -assert_equal 'ok', %q{ - "\xa1\xa2\xa3\xa4".force_encoding("euc-jp").rindex("\xa3".force_encoding("euc-jp")) or :ok -} - -assert_equal 'false', %q{ - s1 = "\xa1\xa1".force_encoding("euc-jp") - s2 = s1.dup - (94*94+94).times { s2.next! } - s1 == s2 -} - -assert_equal 'ok', %q{ - "\xa1\xa2a\xa3\xa4".force_encoding("euc-jp").scan(/a/) - :ok -} - -assert_equal 'ok', %q{ - "\xa1\xa2a\xa3\xa4".force_encoding("euc-jp").split(/a/) - :ok -} - -assert_equal 'ok', %q{ - s1 = "\xa1\xa2".force_encoding("euc-jp") - s2 = "\xa1\xa2".force_encoding("sjis") - begin - s1.upto(s2) {|x| break } - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'true', %q{ - "%s" % "\xa1\xa1".force_encoding("euc-jp") == - "\xa1\xa1".force_encoding("euc-jp") -} - -assert_equal 'a', %q{ - s = "a".dup.force_encoding("EUC-JP") - s.length - s[0,2] -} - -assert_equal 'ok', %q{ - s1 = "\x81\x41".force_encoding("sjis") - s2 = "\x81\x61".force_encoding("sjis") - s1.casecmp(s2) == 0 ? :ng : :ok -} - -assert_equal 'EUC-JP', %q{ ("\xc2\xa1 %s".force_encoding("EUC-JP") % "foo").encoding.name } -assert_equal 'true', %q{ "\xa1\xa2\xa3\xa4".force_encoding("euc-jp")["\xa2\xa3".force_encoding("euc-jp")] == nil } -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s["\xb0\xa3"] = "foo" - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'EUC-JP', %q{ "\xa3\xb0".force_encoding("EUC-JP").center(10).encoding.name } - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s.chomp("\xa3\xb4".force_encoding("shift_jis")) - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s.count("\xa3\xb0".force_encoding("ascii-8bit")) - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s.delete("\xa3\xb2".force_encoding("ascii-8bit")) - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s.each_line("\xa3\xb1".force_encoding("ascii-8bit")) {|l| } - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'true', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - s.gsub(/\xa3\xb1/e, "z") == "\xa3\xb0z\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") -} - -assert_equal 'false', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - s.include?("\xb0\xa3".force_encoding("euc-jp")) -} - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - s.index("\xb3\xa3".force_encoding("euc-jp")) or :ok -} - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - s.insert(-1, "a") - :ok -} - -assert_finish 1, %q{ "\xa3\xfe".force_encoding("euc-jp").next } - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s.rindex("\xb1\xa3".force_encoding("ascii-8bit")) - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'true', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - s.split("\xa3\xb1".force_encoding("euc-jp")) == [ - "\xa3\xb0".force_encoding("euc-jp"), - "\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - ] -}, '[ruby-dev:32452]' - -assert_normal_exit %q{ // =~ :a } diff --git a/configure.in b/configure.in index e2e549c8bb..c064b378c0 100644 --- a/configure.in +++ b/configure.in @@ -1370,7 +1370,7 @@ if test x"$cross_compiling" = xyes; then PREP=fake.rb RUNRUBY='$(MINIRUBY) -I`cd $(srcdir)/lib; pwd`' else - MINIRUBY='./miniruby$(EXEEXT) -I$(srcdir)/lib -I$(EXTOUT)/$(arch)' + MINIRUBY='./miniruby$(EXEEXT) -I$(srcdir)/lib' PREP='miniruby$(EXEEXT)' RUNRUBY='$(MINIRUBY) $(srcdir)/runruby.rb --extout=$(EXTOUT)' fi diff --git a/encoding.c b/encoding.c index 328f2d4742..f429f2c816 100644 --- a/encoding.c +++ b/encoding.c @@ -640,7 +640,7 @@ rb_encoding* rb_enc_compatible(VALUE str1, VALUE str2) { int idx1, idx2; - rb_encoding *enc; + rb_encoding *enc1, *enc2; idx1 = rb_enc_get_index(str1); idx2 = rb_enc_get_index(str2); @@ -648,6 +648,10 @@ rb_enc_compatible(VALUE str1, VALUE str2) if (idx1 == idx2) { return rb_enc_from_index(idx1); } + enc1 = rb_enc_from_index(idx1); + enc2 = rb_enc_from_index(idx2); + if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) + return 0; if (BUILTIN_TYPE(str1) != T_STRING) { VALUE tmp = str1; int idx0 = idx1; @@ -664,17 +668,15 @@ rb_enc_compatible(VALUE str1, VALUE str2) cr2 = rb_enc_str_coderange(str2); if (cr1 != cr2) { /* may need to handle ENC_CODERANGE_BROKEN */ - if (cr1 == ENC_CODERANGE_7BIT) return rb_enc_from_index(idx2); - if (cr2 == ENC_CODERANGE_7BIT) return rb_enc_from_index(idx1); + if (cr1 == ENC_CODERANGE_7BIT) return enc2; } if (cr2 == ENC_CODERANGE_7BIT) { - if (idx1 == 0) return rb_enc_from_index(idx2); - return rb_enc_from_index(idx1); + if (idx1 == 0) return enc2; + return enc1; } } - if (cr1 == ENC_CODERANGE_7BIT && - rb_enc_asciicompat(enc = rb_enc_from_index(idx2))) - return enc; + if (cr1 == ENC_CODERANGE_7BIT) + return enc2; } return 0; } @@ -908,7 +910,11 @@ static VALUE enc_find(VALUE klass, VALUE enc) { int idx; + if (SYMBOL_P(enc)) enc = rb_id2str(SYM2ID(enc)); + if (!rb_enc_asciicompat(rb_enc_get(enc))) { + rb_raise(rb_eArgError, "invalid name encoding (non ASCII)"); + } idx = rb_enc_find_index(StringValueCStr(enc)); if (idx < 0) { rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc)); diff --git a/string.c b/string.c index c963b7f434..7d716f0523 100644 --- a/string.c +++ b/string.c @@ -5522,14 +5522,19 @@ static VALUE rb_str_end_with(int argc, VALUE *argv, VALUE str) { int i; + char *p, *s; + rb_encoding *enc; for (i=0; i