* enc/unicode.c: Artificial mapping to test buffer expansion code.

* string.c: Fixed buffer expansion logic.
* test/ruby/enc/test_case_mapping.rb: Tests for above.
(with Kimihito Matsui)


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53554 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
duerst 2016-01-16 08:24:58 +00:00
parent d2076446ed
commit c12af76763
4 changed files with 50 additions and 5 deletions

View File

@ -1,3 +1,10 @@
Sat Jan 16 17:24:24 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
* enc/unicode.c: Artificial mapping to test buffer expansion code.
* string.c: Fixed buffer expansion logic.
* test/ruby/enc/test_case_mapping.rb: Tests for above.
(with Kimihito Matsui)
Sat Jan 16 16:47:14 2016 SHIBATA Hiroshi <hsbt@ruby-lang.org> Sat Jan 16 16:47:14 2016 SHIBATA Hiroshi <hsbt@ruby-lang.org>
* ext/openssl/lib/openssl/pkey.rb: Added 2048 bit DH parameter. * ext/openssl/lib/openssl/pkey.rb: Added 2048 bit DH parameter.
@ -24,6 +31,7 @@ Sat Jan 16 10:23:23 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
option to avoid accidental problems in daily use. option to avoid accidental problems in daily use.
* test/ruby/enc/test_case_mapping.rb: Test for above. * test/ruby/enc/test_case_mapping.rb: Test for above.
* string.c: function 'check_case_options': fixed logical errors * string.c: function 'check_case_options': fixed logical errors
(with Kimihito Matsui)
Fri Jan 15 20:20:20 2016 Naohisa Goto <ngotogenome@gmail.com> Fri Jan 15 20:20:20 2016 Naohisa Goto <ngotogenome@gmail.com>

View File

@ -610,13 +610,14 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
be duplicated here (and in string.c), but we'll wait for this because we be duplicated here (and in string.c), but we'll wait for this because we
want this to become a primitive anyway. */ want this to become a primitive anyway. */
extern int extern int
onigenc_unicode_case_map(OnigCaseFoldType* flags, onigenc_unicode_case_map(OnigCaseFoldType* flagP,
const OnigUChar** pp, const OnigUChar* end, const OnigUChar** pp, const OnigUChar* end,
OnigUChar* to, OnigUChar* to_end, OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc) const struct OnigEncodingTypeST* enc)
{ {
OnigCodePoint code; OnigCodePoint code;
OnigUChar *to_start = to; OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
to_end -= CASE_MAPPING_SLACK; to_end -= CASE_MAPPING_SLACK;
/* hopelessly preliminary implementation, just dealing with ASCII, /* hopelessly preliminary implementation, just dealing with ASCII,
@ -624,11 +625,25 @@ onigenc_unicode_case_map(OnigCaseFoldType* flags,
while (*pp<end && to<=to_end) { while (*pp<end && to<=to_end) {
code = ONIGENC_MBC_TO_CODE(enc, *pp, end); code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
*pp += enclen(enc, *pp, end); *pp += enclen(enc, *pp, end);
if (code>='A' && code<='Z') { /* using :turcic to test buffer expansion */
if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code==0x0049) { /* I */
to += ONIGENC_CODE_TO_MBC(enc, 'T', to);
to += ONIGENC_CODE_TO_MBC(enc, 'U', to);
to += ONIGENC_CODE_TO_MBC(enc, 'R', to);
to += ONIGENC_CODE_TO_MBC(enc, 'K', to);
to += ONIGENC_CODE_TO_MBC(enc, 'I', to);
to += ONIGENC_CODE_TO_MBC(enc, 'S', to);
to += ONIGENC_CODE_TO_MBC(enc, 'H', to);
to += ONIGENC_CODE_TO_MBC(enc, '*', to);
code = 0x0131;
flags |= ONIGENC_CASE_MODIFIED;
}
else if (code>='A' && code<='Z') {
code += 'a'-'A'; code += 'a'-'A';
*flags |= ONIGENC_CASE_MODIFIED; flags |= ONIGENC_CASE_MODIFIED;
} }
to += ONIGENC_CODE_TO_MBC(enc, code, to); to += ONIGENC_CODE_TO_MBC(enc, code, to);
} }
*flagP = flags;
return (int)(to-to_start); return (int)(to-to_start);
} }

View File

@ -5673,6 +5673,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
while (source_current < source_end) { while (source_current < source_end) {
/* increase multiplier using buffer count to converge quickly */ /* increase multiplier using buffer count to converge quickly */
int capa = (int)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH; int capa = (int)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
/* fprintf(stderr, "Buffer allocation, capa is %d\n", capa); *//* for tuning */
current_buffer->next = (mapping_buffer*)ALLOC_N(char, sizeof(mapping_buffer)+capa); current_buffer->next = (mapping_buffer*)ALLOC_N(char, sizeof(mapping_buffer)+capa);
current_buffer = current_buffer->next; current_buffer = current_buffer->next;
current_buffer->next = NULL; current_buffer->next = NULL;
@ -5684,13 +5685,22 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
current_buffer->space+current_buffer->capa, current_buffer->space+current_buffer->capa,
enc); enc);
} }
/* fprintf(stderr, "Buffer count is %d\n", buffer_count); *//* for tuning */
if (buffer_count==1) if (buffer_count==1)
target = rb_str_new_with_class(source, (const char*)current_buffer->space, target_length); target = rb_str_new_with_class(source, (const char*)current_buffer->space, target_length);
else { else {
char *target_current = RSTRING_PTR(target = rb_str_new_with_class(source, 0, target_length)); char *target_current = RSTRING_PTR(target = rb_str_new_with_class(source, 0, target_length));
for (current_buffer=pre_buffer.next; current_buffer; current_buffer=current_buffer->next) mapping_buffer *previous_buffer;
current_buffer=pre_buffer.next;
while (current_buffer) {
memcpy(target_current, current_buffer->space, current_buffer->used); memcpy(target_current, current_buffer->space, current_buffer->used);
target_current += current_buffer->used;
previous_buffer = current_buffer;
current_buffer=current_buffer->next;
xfree(previous_buffer);
}
} }
/* TODO: check about string terminator character */ /* TODO: check about string terminator character */

View File

@ -6,6 +6,18 @@ require "test/unit"
# to test new implementation strategy # to test new implementation strategy
class TestCaseMappingPreliminary < Test::Unit::TestCase class TestCaseMappingPreliminary < Test::Unit::TestCase
def test_case_mapping_preliminary def test_case_mapping_preliminary
assert_equal "yukihiro matsumoto (matz)", "Yukihiro MATSUMOTO (MATZ)".downcase(:lithuanian) assert_equal 'yukihiro matsumoto (matz)',
'Yukihiro MATSUMOTO (MATZ)'.downcase(:lithuanian)
assert_equal 'matsumoto yukTURKISH*ıhTURKISH*ıro (matz)',
'MATSUMOTO YUKIHIRO (MATZ)'.downcase(:turkic, :lithuanian)
end
def test_buffer_allocations
assert_equal 'TURKISH*ı'*10, ('I'*10).downcase(:turkic, :lithuanian)
assert_equal 'TURKISH*ı'*100, ('I'*100).downcase(:turkic, :lithuanian)
assert_equal 'TURKISH*ı'*1_000, ('I'*1_000).downcase(:turkic, :lithuanian)
assert_equal 'TURKISH*ı'*10_000, ('I'*10_000).downcase(:turkic, :lithuanian)
assert_equal 'TURKISH*ı'*100_000, ('I'*100_000).downcase(:turkic, :lithuanian)
assert_equal 'TURKISH*ı'*1_000_000, ('I'*1_000_000).downcase(:turkic, :lithuanian)
end end
end end