* enc/unicode.c: Artificial mapping to test buffer expansion code.
* string.c: Fixed buffer expansion logic. * test/ruby/enc/test_case_mapping.rb: Tests for above. (with Kimihito Matsui) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53554 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
d2076446ed
commit
c12af76763
@ -1,3 +1,10 @@
|
|||||||
|
Sat Jan 16 17:24:24 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||||
|
|
||||||
|
* enc/unicode.c: Artificial mapping to test buffer expansion code.
|
||||||
|
* string.c: Fixed buffer expansion logic.
|
||||||
|
* test/ruby/enc/test_case_mapping.rb: Tests for above.
|
||||||
|
(with Kimihito Matsui)
|
||||||
|
|
||||||
Sat Jan 16 16:47:14 2016 SHIBATA Hiroshi <hsbt@ruby-lang.org>
|
Sat Jan 16 16:47:14 2016 SHIBATA Hiroshi <hsbt@ruby-lang.org>
|
||||||
|
|
||||||
* ext/openssl/lib/openssl/pkey.rb: Added 2048 bit DH parameter.
|
* ext/openssl/lib/openssl/pkey.rb: Added 2048 bit DH parameter.
|
||||||
@ -24,6 +31,7 @@ Sat Jan 16 10:23:23 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
|
|||||||
option to avoid accidental problems in daily use.
|
option to avoid accidental problems in daily use.
|
||||||
* test/ruby/enc/test_case_mapping.rb: Test for above.
|
* test/ruby/enc/test_case_mapping.rb: Test for above.
|
||||||
* string.c: function 'check_case_options': fixed logical errors
|
* string.c: function 'check_case_options': fixed logical errors
|
||||||
|
(with Kimihito Matsui)
|
||||||
|
|
||||||
Fri Jan 15 20:20:20 2016 Naohisa Goto <ngotogenome@gmail.com>
|
Fri Jan 15 20:20:20 2016 Naohisa Goto <ngotogenome@gmail.com>
|
||||||
|
|
||||||
|
@ -610,13 +610,14 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
|||||||
be duplicated here (and in string.c), but we'll wait for this because we
|
be duplicated here (and in string.c), but we'll wait for this because we
|
||||||
want this to become a primitive anyway. */
|
want this to become a primitive anyway. */
|
||||||
extern int
|
extern int
|
||||||
onigenc_unicode_case_map(OnigCaseFoldType* flags,
|
onigenc_unicode_case_map(OnigCaseFoldType* flagP,
|
||||||
const OnigUChar** pp, const OnigUChar* end,
|
const OnigUChar** pp, const OnigUChar* end,
|
||||||
OnigUChar* to, OnigUChar* to_end,
|
OnigUChar* to, OnigUChar* to_end,
|
||||||
const struct OnigEncodingTypeST* enc)
|
const struct OnigEncodingTypeST* enc)
|
||||||
{
|
{
|
||||||
OnigCodePoint code;
|
OnigCodePoint code;
|
||||||
OnigUChar *to_start = to;
|
OnigUChar *to_start = to;
|
||||||
|
OnigCaseFoldType flags = *flagP;
|
||||||
to_end -= CASE_MAPPING_SLACK;
|
to_end -= CASE_MAPPING_SLACK;
|
||||||
|
|
||||||
/* hopelessly preliminary implementation, just dealing with ASCII,
|
/* hopelessly preliminary implementation, just dealing with ASCII,
|
||||||
@ -624,11 +625,25 @@ onigenc_unicode_case_map(OnigCaseFoldType* flags,
|
|||||||
while (*pp<end && to<=to_end) {
|
while (*pp<end && to<=to_end) {
|
||||||
code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
|
code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
|
||||||
*pp += enclen(enc, *pp, end);
|
*pp += enclen(enc, *pp, end);
|
||||||
if (code>='A' && code<='Z') {
|
/* using :turcic to test buffer expansion */
|
||||||
|
if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code==0x0049) { /* I */
|
||||||
|
to += ONIGENC_CODE_TO_MBC(enc, 'T', to);
|
||||||
|
to += ONIGENC_CODE_TO_MBC(enc, 'U', to);
|
||||||
|
to += ONIGENC_CODE_TO_MBC(enc, 'R', to);
|
||||||
|
to += ONIGENC_CODE_TO_MBC(enc, 'K', to);
|
||||||
|
to += ONIGENC_CODE_TO_MBC(enc, 'I', to);
|
||||||
|
to += ONIGENC_CODE_TO_MBC(enc, 'S', to);
|
||||||
|
to += ONIGENC_CODE_TO_MBC(enc, 'H', to);
|
||||||
|
to += ONIGENC_CODE_TO_MBC(enc, '*', to);
|
||||||
|
code = 0x0131;
|
||||||
|
flags |= ONIGENC_CASE_MODIFIED;
|
||||||
|
}
|
||||||
|
else if (code>='A' && code<='Z') {
|
||||||
code += 'a'-'A';
|
code += 'a'-'A';
|
||||||
*flags |= ONIGENC_CASE_MODIFIED;
|
flags |= ONIGENC_CASE_MODIFIED;
|
||||||
}
|
}
|
||||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||||
}
|
}
|
||||||
|
*flagP = flags;
|
||||||
return (int)(to-to_start);
|
return (int)(to-to_start);
|
||||||
}
|
}
|
||||||
|
12
string.c
12
string.c
@ -5673,6 +5673,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
|
|||||||
while (source_current < source_end) {
|
while (source_current < source_end) {
|
||||||
/* increase multiplier using buffer count to converge quickly */
|
/* increase multiplier using buffer count to converge quickly */
|
||||||
int capa = (int)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
|
int capa = (int)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
|
||||||
|
/* fprintf(stderr, "Buffer allocation, capa is %d\n", capa); *//* for tuning */
|
||||||
current_buffer->next = (mapping_buffer*)ALLOC_N(char, sizeof(mapping_buffer)+capa);
|
current_buffer->next = (mapping_buffer*)ALLOC_N(char, sizeof(mapping_buffer)+capa);
|
||||||
current_buffer = current_buffer->next;
|
current_buffer = current_buffer->next;
|
||||||
current_buffer->next = NULL;
|
current_buffer->next = NULL;
|
||||||
@ -5684,13 +5685,22 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
|
|||||||
current_buffer->space+current_buffer->capa,
|
current_buffer->space+current_buffer->capa,
|
||||||
enc);
|
enc);
|
||||||
}
|
}
|
||||||
|
/* fprintf(stderr, "Buffer count is %d\n", buffer_count); *//* for tuning */
|
||||||
|
|
||||||
if (buffer_count==1)
|
if (buffer_count==1)
|
||||||
target = rb_str_new_with_class(source, (const char*)current_buffer->space, target_length);
|
target = rb_str_new_with_class(source, (const char*)current_buffer->space, target_length);
|
||||||
else {
|
else {
|
||||||
char *target_current = RSTRING_PTR(target = rb_str_new_with_class(source, 0, target_length));
|
char *target_current = RSTRING_PTR(target = rb_str_new_with_class(source, 0, target_length));
|
||||||
for (current_buffer=pre_buffer.next; current_buffer; current_buffer=current_buffer->next)
|
mapping_buffer *previous_buffer;
|
||||||
|
|
||||||
|
current_buffer=pre_buffer.next;
|
||||||
|
while (current_buffer) {
|
||||||
memcpy(target_current, current_buffer->space, current_buffer->used);
|
memcpy(target_current, current_buffer->space, current_buffer->used);
|
||||||
|
target_current += current_buffer->used;
|
||||||
|
previous_buffer = current_buffer;
|
||||||
|
current_buffer=current_buffer->next;
|
||||||
|
xfree(previous_buffer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: check about string terminator character */
|
/* TODO: check about string terminator character */
|
||||||
|
@ -6,6 +6,18 @@ require "test/unit"
|
|||||||
# to test new implementation strategy
|
# to test new implementation strategy
|
||||||
class TestCaseMappingPreliminary < Test::Unit::TestCase
|
class TestCaseMappingPreliminary < Test::Unit::TestCase
|
||||||
def test_case_mapping_preliminary
|
def test_case_mapping_preliminary
|
||||||
assert_equal "yukihiro matsumoto (matz)", "Yukihiro MATSUMOTO (MATZ)".downcase(:lithuanian)
|
assert_equal 'yukihiro matsumoto (matz)',
|
||||||
|
'Yukihiro MATSUMOTO (MATZ)'.downcase(:lithuanian)
|
||||||
|
assert_equal 'matsumoto yukTURKISH*ıhTURKISH*ıro (matz)',
|
||||||
|
'MATSUMOTO YUKIHIRO (MATZ)'.downcase(:turkic, :lithuanian)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_buffer_allocations
|
||||||
|
assert_equal 'TURKISH*ı'*10, ('I'*10).downcase(:turkic, :lithuanian)
|
||||||
|
assert_equal 'TURKISH*ı'*100, ('I'*100).downcase(:turkic, :lithuanian)
|
||||||
|
assert_equal 'TURKISH*ı'*1_000, ('I'*1_000).downcase(:turkic, :lithuanian)
|
||||||
|
assert_equal 'TURKISH*ı'*10_000, ('I'*10_000).downcase(:turkic, :lithuanian)
|
||||||
|
assert_equal 'TURKISH*ı'*100_000, ('I'*100_000).downcase(:turkic, :lithuanian)
|
||||||
|
assert_equal 'TURKISH*ı'*1_000_000, ('I'*1_000_000).downcase(:turkic, :lithuanian)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Loading…
x
Reference in New Issue
Block a user