prepare Unicode normalization for Unicode 16.0.0

This commit is contained in:
Martin Dürst 2025-04-18 16:03:30 +09:00
parent ab22f3910d
commit 900ece77b2
2 changed files with 12 additions and 0 deletions

View File

@ -114,6 +114,7 @@ module UnicodeNormalize # :nodoc:
last_class = accent_class
end
end
accents = nfc_one(accents) if accents.length>1 # TODO: change from recursion to loop
hangul_comp_one(start+accents)
end

View File

@ -112,6 +112,17 @@ accent_array = combining_class.keys + composition_table.keys.collect {|key| key.
composition_starters = composition_table.keys.collect {|key| key.first}
# Special treatment for Unicode 16.0.0
# Add characters that can be decomposed (even indirectly) so that
# the first character in the decomposition is a an accent to accents.
# We do this here up to two levels deep.
# In the future, there may be even deeper levels.
starter_accents = composition_starters & accent_array
decomposition_table.each do |k, v|
accent_array << k if starter_accents.include? v.first
accent_array << k if starter_accents.include? decomposition_table[v.first]&.first
end
hangul_no_trailing = []
0xAC00.step(0xD7A3, 28) {|c| hangul_no_trailing << c}