[Feature #20724] Bump Unicode version to 16.0.0

This commit is contained in:
Mari Imaizumi 2025-04-18 16:16:59 +09:00
parent 900ece77b2
commit 63b07cdcbb
Notes: git 2025-04-18 10:50:38 +00:00
9 changed files with 10087 additions and 7827 deletions

View File

@ -27,7 +27,7 @@ Note: We're only listing outstanding class updates.
* String
* Update Unicode to Version 15.1.0 and Emoji Version 15.1. [[Feature #19908]]
* Update Unicode to Version 16.0.0 and Emoji Version 16.0. [[Feature #19908]][[Feature #20724]]
(also applies to Regexp)
## Stdlib updates
@ -95,5 +95,6 @@ The following bundled gems are updated.
## JIT
[Feature #19908]: https://bugs.ruby-lang.org/issues/19908
[Feature #19908]: https://bugs.ruby-lang.org/issues/20724
[Feature #21047]: https://bugs.ruby-lang.org/issues/21047
[Bug #21049]: https://bugs.ruby-lang.org/issues/21049

View File

@ -20,7 +20,7 @@ mflags = $(MFLAGS)
gnumake_recursive =
enable_shared = $(ENABLE_SHARED:no=)
UNICODE_VERSION = 15.1.0
UNICODE_VERSION = 16.0.0
UNICODE_EMOJI_VERSION_0 = $(UNICODE_VERSION)///
UNICODE_EMOJI_VERSION_1 = $(UNICODE_EMOJI_VERSION_0:.0///=)
UNICODE_EMOJI_VERSION = $(UNICODE_EMOJI_VERSION_1:///=)

View File

@ -85,6 +85,7 @@ Older versions may not support all of these.
- <tt>\p{Ideographic}</tt>, <tt>\p{Ideo}</tt>
- <tt>\p{Join_Control}</tt>, <tt>\p{Join_C}</tt>
- <tt>\p{Logical_Order_Exception}</tt>, <tt>\p{LOE}</tt>
- <tt>\p{Modifier_Combining_Mark}</tt>, <tt>\p{MCM}</tt>
- <tt>\p{Noncharacter_Code_Point}</tt>, <tt>\p{NChar}</tt>
- <tt>\p{Other_Alphabetic}</tt>, <tt>\p{OAlpha}</tt>
- <tt>\p{Other_Default_Ignorable_Code_Point}</tt>, <tt>\p{ODI}</tt>
@ -173,6 +174,7 @@ Older versions may not support all of these.
- <tt>\p{Elbasan}</tt>, <tt>\p{Elba}</tt>
- <tt>\p{Elymaic}</tt>, <tt>\p{Elym}</tt>
- <tt>\p{Ethiopic}</tt>, <tt>\p{Ethi}</tt>
- <tt>\p{Garay}</tt>, <tt>\p{Gara}</tt>
- <tt>\p{Georgian}</tt>, <tt>\p{Geor}</tt>
- <tt>\p{Glagolitic}</tt>, <tt>\p{Glag}</tt>
- <tt>\p{Gothic}</tt>, <tt>\p{Goth}</tt>
@ -181,6 +183,7 @@ Older versions may not support all of these.
- <tt>\p{Gujarati}</tt>, <tt>\p{Gujr}</tt>
- <tt>\p{Gunjala_Gondi}</tt>, <tt>\p{Gong}</tt>
- <tt>\p{Gurmukhi}</tt>, <tt>\p{Guru}</tt>
- <tt>\p{Gurung_Khema}</tt>, <tt>\p{Gukh}</tt>
- <tt>\p{Han}</tt>, <tt>\p{Hani}</tt>
- <tt>\p{Hangul}</tt>, <tt>\p{Hang}</tt>
- <tt>\p{Hanifi_Rohingya}</tt>, <tt>\p{Rohg}</tt>
@ -203,6 +206,7 @@ Older versions may not support all of these.
- <tt>\p{Khmer}</tt>, <tt>\p{Khmr}</tt>
- <tt>\p{Khojki}</tt>, <tt>\p{Khoj}</tt>
- <tt>\p{Khudawadi}</tt>, <tt>\p{Sind}</tt>
- <tt>\p{Kirat_Rai}</tt>, <tt>\p{Krai}</tt>
- <tt>\p{Lao}</tt>, <tt>\p{Laoo}</tt>
- <tt>\p{Latin}</tt>, <tt>\p{Latn}</tt>
- <tt>\p{Lepcha}</tt>, <tt>\p{Lepc}</tt>
@ -240,6 +244,7 @@ Older versions may not support all of these.
- <tt>\p{Nyiakeng_Puachue_Hmong}</tt>, <tt>\p{Hmnp}</tt>
- <tt>\p{Ogham}</tt>, <tt>\p{Ogam}</tt>
- <tt>\p{Ol_Chiki}</tt>, <tt>\p{Olck}</tt>
- <tt>\p{Ol_Onal}</tt>, <tt>\p{Onao}</tt>
- <tt>\p{Old_Hungarian}</tt>, <tt>\p{Hung}</tt>
- <tt>\p{Old_Italic}</tt>, <tt>\p{Ital}</tt>
- <tt>\p{Old_North_Arabian}</tt>, <tt>\p{Narb}</tt>
@ -271,6 +276,7 @@ Older versions may not support all of these.
- <tt>\p{Sora_Sompeng}</tt>, <tt>\p{Sora}</tt>
- <tt>\p{Soyombo}</tt>, <tt>\p{Soyo}</tt>
- <tt>\p{Sundanese}</tt>, <tt>\p{Sund}</tt>
- <tt>\p{Sunuwar}</tt>, <tt>\p{Sunu}</tt>
- <tt>\p{Syloti_Nagri}</tt>, <tt>\p{Sylo}</tt>
- <tt>\p{Syriac}</tt>, <tt>\p{Syrc}</tt>
- <tt>\p{Tagalog}</tt>, <tt>\p{Tglg}</tt>
@ -288,7 +294,9 @@ Older versions may not support all of these.
- <tt>\p{Tibetan}</tt>, <tt>\p{Tibt}</tt>
- <tt>\p{Tifinagh}</tt>, <tt>\p{Tfng}</tt>
- <tt>\p{Tirhuta}</tt>, <tt>\p{Tirh}</tt>
- <tt>\p{Todhri}</tt>, <tt>\p{Todr}</tt>
- <tt>\p{Toto}</tt>
- <tt>\p{Tulu_Tigalari}</tt>, <tt>\p{Tutg}</tt>
- <tt>\p{Ugaritic}</tt>, <tt>\p{Ugar}</tt>
- <tt>\p{Unknown}</tt>, <tt>\p{Zzzz}</tt>
- <tt>\p{Vai}</tt>, <tt>\p{Vaii}</tt>
@ -396,6 +404,7 @@ Older versions may not support all of these.
- <tt>\p{In_Early_Dynastic_Cuneiform}</tt>
- <tt>\p{In_Egyptian_Hieroglyph_Format_Controls}</tt>
- <tt>\p{In_Egyptian_Hieroglyphs}</tt>
- <tt>\p{In_Egyptian_Hieroglyphs_Extended_A}</tt>
- <tt>\p{In_Elbasan}</tt>
- <tt>\p{In_Elymaic}</tt>
- <tt>\p{In_Emoticons}</tt>
@ -408,6 +417,7 @@ Older versions may not support all of these.
- <tt>\p{In_Ethiopic_Extended_A}</tt>
- <tt>\p{In_Ethiopic_Extended_B}</tt>
- <tt>\p{In_Ethiopic_Supplement}</tt>
- <tt>\p{In_Garay}</tt>
- <tt>\p{In_General_Punctuation}</tt>
- <tt>\p{In_Geometric_Shapes}</tt>
- <tt>\p{In_Geometric_Shapes_Extended}</tt>
@ -423,6 +433,7 @@ Older versions may not support all of these.
- <tt>\p{In_Gujarati}</tt>
- <tt>\p{In_Gunjala_Gondi}</tt>
- <tt>\p{In_Gurmukhi}</tt>
- <tt>\p{In_Gurung_Khema}</tt>
- <tt>\p{In_Halfwidth_and_Fullwidth_Forms}</tt>
- <tt>\p{In_Hangul_Compatibility_Jamo}</tt>
- <tt>\p{In_Hangul_Jamo}</tt>
@ -462,6 +473,7 @@ Older versions may not support all of these.
- <tt>\p{In_Khmer_Symbols}</tt>
- <tt>\p{In_Khojki}</tt>
- <tt>\p{In_Khudawadi}</tt>
- <tt>\p{In_Kirat_Rai}</tt>
- <tt>\p{In_Lao}</tt>
- <tt>\p{In_Latin_1_Supplement}</tt>
- <tt>\p{In_Latin_Extended_A}</tt>
@ -517,6 +529,7 @@ Older versions may not support all of these.
- <tt>\p{In_Myanmar}</tt>
- <tt>\p{In_Myanmar_Extended_A}</tt>
- <tt>\p{In_Myanmar_Extended_B}</tt>
- <tt>\p{In_Myanmar_Extended_C}</tt>
- <tt>\p{In_NKo}</tt>
- <tt>\p{In_Nabataean}</tt>
- <tt>\p{In_Nag_Mundari}</tt>
@ -529,6 +542,7 @@ Older versions may not support all of these.
- <tt>\p{In_Nyiakeng_Puachue_Hmong}</tt>
- <tt>\p{In_Ogham}</tt>
- <tt>\p{In_Ol_Chiki}</tt>
- <tt>\p{In_Ol_Onal}</tt>
- <tt>\p{In_Old_Hungarian}</tt>
- <tt>\p{In_Old_Italic}</tt>
- <tt>\p{In_Old_North_Arabian}</tt>
@ -575,6 +589,7 @@ Older versions may not support all of these.
- <tt>\p{In_Specials}</tt>
- <tt>\p{In_Sundanese}</tt>
- <tt>\p{In_Sundanese_Supplement}</tt>
- <tt>\p{In_Sunuwar}</tt>
- <tt>\p{In_Superscripts_and_Subscripts}</tt>
- <tt>\p{In_Supplemental_Arrows_A}</tt>
- <tt>\p{In_Supplemental_Arrows_B}</tt>
@ -588,6 +603,7 @@ Older versions may not support all of these.
- <tt>\p{In_Syloti_Nagri}</tt>
- <tt>\p{In_Symbols_and_Pictographs_Extended_A}</tt>
- <tt>\p{In_Symbols_for_Legacy_Computing}</tt>
- <tt>\p{In_Symbols_for_Legacy_Computing_Supplement}</tt>
- <tt>\p{In_Syriac}</tt>
- <tt>\p{In_Syriac_Supplement}</tt>
- <tt>\p{In_Tagalog}</tt>
@ -610,8 +626,10 @@ Older versions may not support all of these.
- <tt>\p{In_Tibetan}</tt>
- <tt>\p{In_Tifinagh}</tt>
- <tt>\p{In_Tirhuta}</tt>
- <tt>\p{In_Todhri}</tt>
- <tt>\p{In_Toto}</tt>
- <tt>\p{In_Transport_and_Map_Symbols}</tt>
- <tt>\p{In_Tulu_Tigalari}</tt>
- <tt>\p{In_Ugaritic}</tt>
- <tt>\p{In_Unified_Canadian_Aboriginal_Syllabics}</tt>
- <tt>\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}</tt>
@ -666,6 +684,7 @@ Older versions may not support all of these.
- <tt>\p{Age_14_0}</tt>
- <tt>\p{Age_15_0}</tt>
- <tt>\p{Age_15_1}</tt>
- <tt>\p{Age_16_0}</tt>
- <tt>\p{Age_1_1}</tt>
- <tt>\p{Age_2_0}</tt>
- <tt>\p{Age_2_1}</tt>

File diff suppressed because it is too large Load Diff

View File

@ -29,7 +29,7 @@ module UnicodeNormalize # :nodoc:
"\u0825-\u0827" \
"\u0829-\u082D" \
"\u0859-\u085B" \
"\u0898-\u089F" \
"\u0897-\u089F" \
"\u08CA-\u08E1" \
"\u08E3-\u08FF" \
"\u093C" \
@ -149,6 +149,7 @@ module UnicodeNormalize # :nodoc:
"\u{10A3F}" \
"\u{10AE5}\u{10AE6}" \
"\u{10D24}-\u{10D27}" \
"\u{10D69}-\u{10D6D}" \
"\u{10EAB}\u{10EAC}" \
"\u{10EFD}-\u{10EFF}" \
"\u{10F46}-\u{10F50}" \
@ -171,6 +172,12 @@ module UnicodeNormalize # :nodoc:
"\u{11357}" \
"\u{11366}-\u{1136C}" \
"\u{11370}-\u{11374}" \
"\u{113B8}" \
"\u{113BB}" \
"\u{113C2}" \
"\u{113C5}" \
"\u{113C7}-\u{113C9}" \
"\u{113CE}-\u{113D0}" \
"\u{11442}" \
"\u{11446}" \
"\u{1145E}" \
@ -196,8 +203,11 @@ module UnicodeNormalize # :nodoc:
"\u{11D44}\u{11D45}" \
"\u{11D97}" \
"\u{11F41}\u{11F42}" \
"\u{1611E}-\u{16129}" \
"\u{1612F}" \
"\u{16AF0}-\u{16AF4}" \
"\u{16B30}-\u{16B36}" \
"\u{16D67}\u{16D68}" \
"\u{16FF0}\u{16FF1}" \
"\u{1BC9E}" \
"\u{1D165}-\u{1D169}" \
@ -216,6 +226,7 @@ module UnicodeNormalize # :nodoc:
"\u{1E2AE}" \
"\u{1E2EC}-\u{1E2EF}" \
"\u{1E4EC}-\u{1E4EF}" \
"\u{1E5EE}\u{1E5EF}" \
"\u{1E8D0}-\u{1E8D6}" \
"\u{1E944}-\u{1E94A}" \
"]"
@ -441,15 +452,25 @@ module UnicodeNormalize # :nodoc:
"\uFB40\uFB41" \
"\uFB43\uFB44" \
"\uFB46-\uFB4E" \
"\u{105C9}" \
"\u{105E4}" \
"\u{1109A}" \
"\u{1109C}" \
"\u{110AB}" \
"\u{1112E}\u{1112F}" \
"\u{1134B}\u{1134C}" \
"\u{11383}" \
"\u{11385}" \
"\u{1138E}" \
"\u{11391}" \
"\u{113C5}" \
"\u{113C7}\u{113C8}" \
"\u{114BB}\u{114BC}" \
"\u{114BE}" \
"\u{115BA}\u{115BB}" \
"\u{11938}" \
"\u{16121}-\u{16128}" \
"\u{16D68}-\u{16D6A}" \
"\u{1D15E}-\u{1D164}" \
"\u{1D1BB}-\u{1D1C0}" \
"\u{2F800}-\u{2FA1D}" \
@ -613,14 +634,25 @@ module UnicodeNormalize # :nodoc:
"\u30DB" \
"\u30EF-\u30F2" \
"\u30FD" \
"\u{105D2}" \
"\u{105DA}" \
"\u{11099}" \
"\u{1109B}" \
"\u{110A5}" \
"\u{11131}\u{11132}" \
"\u{11347}" \
"\u{11382}" \
"\u{11384}" \
"\u{1138B}" \
"\u{11390}" \
"\u{113C2}" \
"\u{114B9}" \
"\u{115B8}\u{115B9}" \
"\u{11935}" \
"\u{1611E}" \
"\u{16129}" \
"\u{16D63}" \
"\u{16D67}" \
"]?#{accents}+" \
"|#{'' # precomposed Hangul syllables
}" \
@ -891,6 +923,10 @@ module UnicodeNormalize # :nodoc:
"\u30F4" \
"\u30F7-\u30FA" \
"\u30FD\u30FE" \
"\u{105C9}" \
"\u{105D2}" \
"\u{105DA}" \
"\u{105E4}" \
"\u{11099}-\u{1109C}" \
"\u{110A5}" \
"\u{110AB}" \
@ -898,12 +934,23 @@ module UnicodeNormalize # :nodoc:
"\u{11131}\u{11132}" \
"\u{11347}" \
"\u{1134B}\u{1134C}" \
"\u{11382}-\u{11385}" \
"\u{1138B}" \
"\u{1138E}" \
"\u{11390}\u{11391}" \
"\u{113C2}" \
"\u{113C5}" \
"\u{113C7}\u{113C8}" \
"\u{114B9}" \
"\u{114BB}\u{114BC}" \
"\u{114BE}" \
"\u{115B8}-\u{115BB}" \
"\u{11935}" \
"\u{11938}" \
"\u{1611E}" \
"\u{16121}-\u{16129}" \
"\u{16D63}" \
"\u{16D67}-\u{16D6A}" \
"]?#{accents}+" \
"|#{'' # Hangul syllables with separate trailer
}" \
@ -1440,6 +1487,7 @@ module UnicodeNormalize # :nodoc:
"\u{10781}-\u{10785}" \
"\u{10787}-\u{107B0}" \
"\u{107B2}-\u{107BA}" \
"\u{1CCD6}-\u{1CCF9}" \
"\u{1D400}-\u{1D454}" \
"\u{1D456}-\u{1D49C}" \
"\u{1D49E}\u{1D49F}" \
@ -1789,6 +1837,7 @@ module UnicodeNormalize # :nodoc:
"\u0859"=>220,
"\u085A"=>220,
"\u085B"=>220,
"\u0897"=>230,
"\u0898"=>230,
"\u0899"=>220,
"\u089A"=>220,
@ -2234,6 +2283,11 @@ module UnicodeNormalize # :nodoc:
"\u{10D25}"=>230,
"\u{10D26}"=>230,
"\u{10D27}"=>230,
"\u{10D69}"=>230,
"\u{10D6A}"=>230,
"\u{10D6B}"=>230,
"\u{10D6C}"=>230,
"\u{10D6D}"=>230,
"\u{10EAB}"=>230,
"\u{10EAC}"=>230,
"\u{10EFD}"=>220,
@ -2286,6 +2340,9 @@ module UnicodeNormalize # :nodoc:
"\u{11372}"=>230,
"\u{11373}"=>230,
"\u{11374}"=>230,
"\u{113CE}"=>9,
"\u{113CF}"=>9,
"\u{113D0}"=>9,
"\u{11442}"=>9,
"\u{11446}"=>7,
"\u{1145E}"=>230,
@ -2313,6 +2370,7 @@ module UnicodeNormalize # :nodoc:
"\u{11D97}"=>9,
"\u{11F41}"=>9,
"\u{11F42}"=>9,
"\u{1612F}"=>9,
"\u{16AF0}"=>1,
"\u{16AF1}"=>1,
"\u{16AF2}"=>1,
@ -2416,6 +2474,8 @@ module UnicodeNormalize # :nodoc:
"\u{1E4ED}"=>232,
"\u{1E4EE}"=>220,
"\u{1E4EF}"=>230,
"\u{1E5EE}"=>230,
"\u{1E5EF}"=>220,
"\u{1E8D0}"=>220,
"\u{1E8D1}"=>220,
"\u{1E8D2}"=>220,
@ -3928,6 +3988,8 @@ module UnicodeNormalize # :nodoc:
"\uFB4C"=>"\u05D1\u05BF",
"\uFB4D"=>"\u05DB\u05BF",
"\uFB4E"=>"\u05E4\u05BF",
"\u{105C9}"=>"\u{105D2}\u0307",
"\u{105E4}"=>"\u{105DA}\u0307",
"\u{1109A}"=>"\u{11099}\u{110BA}",
"\u{1109C}"=>"\u{1109B}\u{110BA}",
"\u{110AB}"=>"\u{110A5}\u{110BA}",
@ -3935,12 +3997,30 @@ module UnicodeNormalize # :nodoc:
"\u{1112F}"=>"\u{11132}\u{11127}",
"\u{1134B}"=>"\u{11347}\u{1133E}",
"\u{1134C}"=>"\u{11347}\u{11357}",
"\u{11383}"=>"\u{11382}\u{113C9}",
"\u{11385}"=>"\u{11384}\u{113BB}",
"\u{1138E}"=>"\u{1138B}\u{113C2}",
"\u{11391}"=>"\u{11390}\u{113C9}",
"\u{113C5}"=>"\u{113C2}\u{113C2}",
"\u{113C7}"=>"\u{113C2}\u{113B8}",
"\u{113C8}"=>"\u{113C2}\u{113C9}",
"\u{114BB}"=>"\u{114B9}\u{114BA}",
"\u{114BC}"=>"\u{114B9}\u{114B0}",
"\u{114BE}"=>"\u{114B9}\u{114BD}",
"\u{115BA}"=>"\u{115B8}\u{115AF}",
"\u{115BB}"=>"\u{115B9}\u{115AF}",
"\u{11938}"=>"\u{11935}\u{11930}",
"\u{16121}"=>"\u{1611E}\u{1611E}",
"\u{16122}"=>"\u{1611E}\u{16129}",
"\u{16123}"=>"\u{1611E}\u{1611F}",
"\u{16124}"=>"\u{16129}\u{1611F}",
"\u{16125}"=>"\u{1611E}\u{16120}",
"\u{16126}"=>"\u{1611E}\u{1611E}\u{1611F}",
"\u{16127}"=>"\u{1611E}\u{16129}\u{1611F}",
"\u{16128}"=>"\u{1611E}\u{1611E}\u{16120}",
"\u{16D68}"=>"\u{16D67}\u{16D67}",
"\u{16D69}"=>"\u{16D63}\u{16D67}",
"\u{16D6A}"=>"\u{16D63}\u{16D67}\u{16D67}",
"\u{1D15E}"=>"\u{1D157}\u{1D165}",
"\u{1D15F}"=>"\u{1D158}\u{1D165}",
"\u{1D160}"=>"\u{1D158}\u{1D165}\u{1D16E}",
@ -6950,6 +7030,42 @@ module UnicodeNormalize # :nodoc:
"\u{107B8}"=>"\u01C2",
"\u{107B9}"=>"\u{1DF0A}",
"\u{107BA}"=>"\u{1DF1E}",
"\u{1CCD6}"=>"A",
"\u{1CCD7}"=>"B",
"\u{1CCD8}"=>"C",
"\u{1CCD9}"=>"D",
"\u{1CCDA}"=>"E",
"\u{1CCDB}"=>"F",
"\u{1CCDC}"=>"G",
"\u{1CCDD}"=>"H",
"\u{1CCDE}"=>"I",
"\u{1CCDF}"=>"J",
"\u{1CCE0}"=>"K",
"\u{1CCE1}"=>"L",
"\u{1CCE2}"=>"M",
"\u{1CCE3}"=>"N",
"\u{1CCE4}"=>"O",
"\u{1CCE5}"=>"P",
"\u{1CCE6}"=>"Q",
"\u{1CCE7}"=>"R",
"\u{1CCE8}"=>"S",
"\u{1CCE9}"=>"T",
"\u{1CCEA}"=>"U",
"\u{1CCEB}"=>"V",
"\u{1CCEC}"=>"W",
"\u{1CCED}"=>"X",
"\u{1CCEE}"=>"Y",
"\u{1CCEF}"=>"Z",
"\u{1CCF0}"=>"0",
"\u{1CCF1}"=>"1",
"\u{1CCF2}"=>"2",
"\u{1CCF3}"=>"3",
"\u{1CCF4}"=>"4",
"\u{1CCF5}"=>"5",
"\u{1CCF6}"=>"6",
"\u{1CCF7}"=>"7",
"\u{1CCF8}"=>"8",
"\u{1CCF9}"=>"9",
"\u{1D400}"=>"A",
"\u{1D401}"=>"B",
"\u{1D402}"=>"C",
@ -9242,6 +9358,8 @@ module UnicodeNormalize # :nodoc:
"\u30F1\u3099"=>"\u30F9",
"\u30F2\u3099"=>"\u30FA",
"\u30FD\u3099"=>"\u30FE",
"\u{105D2}\u0307"=>"\u{105C9}",
"\u{105DA}\u0307"=>"\u{105E4}",
"\u{11099}\u{110BA}"=>"\u{1109A}",
"\u{1109B}\u{110BA}"=>"\u{1109C}",
"\u{110A5}\u{110BA}"=>"\u{110AB}",
@ -9249,11 +9367,29 @@ module UnicodeNormalize # :nodoc:
"\u{11132}\u{11127}"=>"\u{1112F}",
"\u{11347}\u{1133E}"=>"\u{1134B}",
"\u{11347}\u{11357}"=>"\u{1134C}",
"\u{11382}\u{113C9}"=>"\u{11383}",
"\u{11384}\u{113BB}"=>"\u{11385}",
"\u{1138B}\u{113C2}"=>"\u{1138E}",
"\u{11390}\u{113C9}"=>"\u{11391}",
"\u{113C2}\u{113C2}"=>"\u{113C5}",
"\u{113C2}\u{113B8}"=>"\u{113C7}",
"\u{113C2}\u{113C9}"=>"\u{113C8}",
"\u{114B9}\u{114BA}"=>"\u{114BB}",
"\u{114B9}\u{114B0}"=>"\u{114BC}",
"\u{114B9}\u{114BD}"=>"\u{114BE}",
"\u{115B8}\u{115AF}"=>"\u{115BA}",
"\u{115B9}\u{115AF}"=>"\u{115BB}",
"\u{11935}\u{11930}"=>"\u{11938}",
"\u{1611E}\u{1611E}"=>"\u{16121}",
"\u{1611E}\u{16129}"=>"\u{16122}",
"\u{1611E}\u{1611F}"=>"\u{16123}",
"\u{16129}\u{1611F}"=>"\u{16124}",
"\u{1611E}\u{16120}"=>"\u{16125}",
"\u{16121}\u{1611F}"=>"\u{16126}",
"\u{16122}\u{1611F}"=>"\u{16127}",
"\u{16121}\u{16120}"=>"\u{16128}",
"\u{16D67}\u{16D67}"=>"\u{16D68}",
"\u{16D63}\u{16D67}"=>"\u{16D69}",
"\u{16D69}\u{16D67}"=>"\u{16D6A}",
}.freeze
end

View File

@ -16,8 +16,8 @@ describe "RbConfig::CONFIG['UNICODE_EMOJI_VERSION']" do
# Caution: ruby_version_is means is_or_later
ruby_version_is "3.5" do
it "is 15.1" do
RbConfig::CONFIG['UNICODE_EMOJI_VERSION'].should == "15.1"
it "is 16.0" do
RbConfig::CONFIG['UNICODE_EMOJI_VERSION'].should == "16.0"
end
end
end

View File

@ -16,8 +16,8 @@ describe "RbConfig::CONFIG['UNICODE_VERSION']" do
# Caution: ruby_version_is means is_or_later
ruby_version_is "3.5" do
it "is 15.1.0" do
RbConfig::CONFIG['UNICODE_VERSION'].should == "15.1.0"
it "is 16.0.0" do
RbConfig::CONFIG['UNICODE_VERSION'].should == "16.0.0"
end
end
end

View File

@ -1524,6 +1524,107 @@ class TestRegexp < Test::Unit::TestCase
"CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D")
end
def test_unicode_age_16_0
@matches = %w"16.0"
@unmatches = %w"15.1"
# https://www.unicode.org/Public/16.0.0/ucd/DerivedAge.txt
assert_unicode_age("\u{0897}",
"ARABIC PEPET")
assert_unicode_age("\u{1B4E}".."\u{1B4F}",
"BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN")
assert_unicode_age("\u{1B7F}",
"BALINESE PANTI BAWAK")
assert_unicode_age("\u{1C89}".."\u{1C8A}",
"CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE")
assert_unicode_age("\u{2427}".."\u{2429}",
"SYMBOL FOR DELETE SQUARE CHECKER BOARD FORM..SYMBOL FOR DELETE MEDIUM SHADE FORM")
assert_unicode_age("\u{31E4}".."\u{31E5}",
"CJK STROKE HXG..CJK STROKE SZP")
assert_unicode_age("\u{A7CB}".."\u{A7CD}",
"LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER S WITH DIAGONAL STROKE")
assert_unicode_age("\u{A7DA}".."\u{A7DC}",
"LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER LAMBDA WITH STROKE")
assert_unicode_age("\u{105C0}".."\u{105F3}",
"TODHRI LETTER A..TODHRI LETTER OO")
assert_unicode_age("\u{10D40}".."\u{10D65}",
"GARAY DIGIT ZERO..GARAY CAPITAL LETTER OLD NA")
assert_unicode_age("\u{10D69}".."\u{10D85}",
"GARAY VOWEL SIGN E..GARAY SMALL LETTER OLD NA")
assert_unicode_age("\u{10D8E}".."\u{10D8F}",
"GARAY PLUS SIGN..GARAY MINUS SIGN")
assert_unicode_age("\u{10EC2}".."\u{10EC4}",
"ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW")
assert_unicode_age("\u{10EFC}",
"ARABIC COMBINING ALEF OVERLAY")
assert_unicode_age("\u{11380}".."\u{11389}",
"TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL")
assert_unicode_age("\u{1138B}",
"TULU-TIGALARI LETTER EE")
assert_unicode_age("\u{1138E}",
"TULU-TIGALARI LETTER AI")
assert_unicode_age("\u{11390}".."\u{113B5}",
"TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA")
assert_unicode_age("\u{113B7}".."\u{113C0}",
"TULU-TIGALARI SIGN AVAGRAHA..TULU-TIGALARI VOWEL SIGN VOCALIC LL")
assert_unicode_age("\u{113C2}",
"TULU-TIGALARI VOWEL SIGN EE")
assert_unicode_age("\u{113C5}",
"TULU-TIGALARI VOWEL SIGN AI")
assert_unicode_age("\u{113C7}".."\u{113CA}",
"TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA")
assert_unicode_age("\u{113CC}".."\u{113D5}",
"TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI DOUBLE DANDA")
assert_unicode_age("\u{113D7}".."\u{113D8}",
"TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA")
assert_unicode_age("\u{113E1}".."\u{113E2}",
"TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA")
assert_unicode_age("\u{116D0}".."\u{116E3}",
"MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE")
assert_unicode_age("\u{11BC0}".."\u{11BE1}",
"SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO")
assert_unicode_age("\u{11BF0}".."\u{11BF9}",
"SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE")
assert_unicode_age("\u{11F5A}",
"KAWI SIGN NUKTA")
assert_unicode_age("\u{13460}".."\u{143FA}",
"EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA")
assert_unicode_age("\u{16100}".."\u{16139}",
"GURUNG KHEMA LETTER A..GURUNG KHEMA DIGIT NINE")
assert_unicode_age("\u{16D40}".."\u{16D79}",
"KIRAT RAI SIGN ANUSVARA..KIRAT RAI DIGIT NINE")
assert_unicode_age("\u{18CFF}",
"KHITAN SMALL SCRIPT CHARACTER-18CFF")
assert_unicode_age("\u{1CC00}".."\u{1CCF9}",
"UP-POINTING GO-KART..OUTLINED DIGIT NINE")
assert_unicode_age("\u{1CD00}".."\u{1CEB3}",
"BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET")
assert_unicode_age("\u{1E5D0}".."\u{1E5FA}",
"OL ONAL LETTER O..OL ONAL DIGIT NINE")
assert_unicode_age("\u{1E5FF}",
"OL ONAL ABBREVIATION SIGN")
assert_unicode_age("\u{1F8B2}".."\u{1F8BB}",
"RIGHTWARDS ARROW WITH LOWER HOOK..SOUTH WEST ARROW FROM BAR")
assert_unicode_age("\u{1F8C0}".."\u{1F8C1}",
"LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW")
assert_unicode_age("\u{1FA89}",
"HARP")
assert_unicode_age("\u{1FA8F}",
"SHOVEL")
assert_unicode_age("\u{1FABE}",
"LEAFLESS TREE")
assert_unicode_age("\u{1FAC6}",
"FINGERPRINT")
assert_unicode_age("\u{1FADC}",
"ROOT VEGETABLE")
assert_unicode_age("\u{1FADF}",
"SPLATTER")
assert_unicode_age("\u{1FAE9}",
"FACE WITH BAGS UNDER EYES")
assert_unicode_age("\u{1FBCB}".."\u{1FBEF}",
"WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE")
end
UnicodeAgeRegexps = Hash.new do |h, age|
h[age] = [/\A\p{age=#{age}}+\z/u, /\A\P{age=#{age}}+\z/u].freeze
end