diff --git a/common.mk b/common.mk
index ddceb96fbf..4f153059f4 100644
--- a/common.mk
+++ b/common.mk
@@ -430,7 +430,7 @@ ruby.imp: $(COMMONOBJS)
sort -u -o $@
install: install-$(INSTALLDOC)
-docs: $(DOCTARGETS)
+docs: srcs-doc $(DOCTARGETS)
pkgconfig-data: $(ruby_pc)
$(ruby_pc): $(srcdir)/template/ruby.pc.in config.status
@@ -624,15 +624,15 @@ do-install-dbg: $(PROGRAM) pre-install-dbg
post-install-dbg::
@$(NULLCMD)
-rdoc: PHONY main
+rdoc: PHONY main srcs-doc
@echo Generating RDoc documentation
$(Q) $(RDOC) --ri --op "$(RDOCOUT)" $(RDOC_GEN_OPTS) $(RDOCFLAGS) "$(srcdir)"
-html: PHONY main
+html: PHONY main srcs-doc
@echo Generating RDoc HTML files
$(Q) $(RDOC) --op "$(HTMLOUT)" $(RDOC_GEN_OPTS) $(RDOCFLAGS) "$(srcdir)"
-rdoc-coverage: PHONY main
+rdoc-coverage: PHONY main srcs-doc
@echo Generating RDoc coverage report
$(Q) $(RDOC) --quiet -C $(RDOCFLAGS) "$(srcdir)"
@@ -1142,7 +1142,7 @@ common-srcs: $(srcs_vpath)parse.c $(srcs_vpath)lex.c $(srcs_vpath)enc/trans/newl
missing-srcs: $(srcdir)/missing/des_tables.c
-srcs: common-srcs missing-srcs srcs-enc
+srcs: common-srcs missing-srcs srcs-enc srcs-doc
RIPPER_SRCS = $(srcdir)/ext/ripper/ripper.c \
$(srcdir)/ext/ripper/ripper_init.c \
@@ -1730,6 +1730,12 @@ $(UNICODE_HDR_DIR)/name2ctype.h:
$(UNICODE_SRC_DATA_DIR) $(UNICODE_SRC_EMOJI_DATA_DIR) > $@.new
$(MV) $@.new $@
+srcs-doc: $(srcdir)/doc/regexp/unicode_properties.rdoc
+$(srcdir)/doc/regexp/unicode_properties.rdoc: $(UNICODE_HDR_DIR)/name2ctype.h $(UNICODE_PROPERTY_FILES)
+ $(Q) $(BOOTSTRAPRUBY) $(tooldir)/generic_erb.rb -c -o $@ \
+ $(srcdir)/template/unicode_properties.rdoc.tmpl \
+ $(UNICODE_SRC_DATA_DIR) $(UNICODE_HDR_DIR)/name2ctype.h
+
# the next non-comment line was:
# $(UNICODE_HDR_DIR)/casefold.h: $(tooldir)/enc-case-folding.rb \
# but was changed to make sure CI works on systems that don't have gperf
diff --git a/doc/regexp/unicode_properties.rdoc b/doc/regexp/unicode_properties.rdoc
index 354ed3a83c..a1d7ecc380 100644
--- a/doc/regexp/unicode_properties.rdoc
+++ b/doc/regexp/unicode_properties.rdoc
@@ -1,863 +1,678 @@
== \Regexps Based on Unicode Properties
The properties shown here are those currently supported in Ruby.
-Older versions may not support all of these;
-newer versions may support additional properties.
+Older versions may not support all of these.
=== POSIX brackets
-- /\p{Alpha}/
-- /\p{Blank}/
-- /\p{Cntrl}/
-- /\p{Digit}/
-- /\p{Graph}/
-- /\p{Lower}/
-- /\p{Print}/
-- /\p{Punct}/
-- /\p{Space}/
-- /\p{Upper}/
-- /\p{XDigit}/
-- /\p{Word}/
-- /\p{Alnum}/
-- /\p{ASCII}/
-- /\p{XPosixPunct}/
+- \p{ASCII}
+- \p{Alnum}
+- \p{Alphabetic}, \p{Alpha}
+- \p{Blank}
+- \p{Cntrl}
+- \p{Digit}
+- \p{Graph}
+- \p{Lowercase}, \p{Lower}
+- \p{Print}
+- \p{Punct}
+- \p{Space}
+- \p{Uppercase}, \p{Upper}
+- \p{Word}
+- \p{XDigit}
+- \p{XPosixPunct}
=== Special
-- /\p{Any}/
-- /\p{Assigned}/
+- \p{Any}
+- \p{Assigned}
=== Major and General Categories
-- /\p{C}/
-- /\p{Cc}/
-- /\p{Cf}/
-- /\p{Cn}/
-- /\p{Co}/
-- /\p{Cs}/
-- /\p{L}/
-- /\p{LC}/
-- /\p{Ll}/
-- /\p{Lm}/
-- /\p{Lo}/
-- /\p{Lt}/
-- /\p{Lu}/
-- /\p{M}/
-- /\p{Mc}/
-- /\p{Me}/
-- /\p{Mn}/
-- /\p{N}/
-- /\p{Nd}/
-- /\p{Nl}/
-- /\p{No}/
-- /\p{P}/
-- /\p{Pc}/
-- /\p{Pd}/
-- /\p{Pe}/
-- /\p{Pf}/
-- /\p{Pi}/
-- /\p{Po}/
-- /\p{Ps}/
-- /\p{S}/
-- /\p{Sc}/
-- /\p{Sk}/
-- /\p{Sm}/
-- /\p{So}/
-- /\p{Z}/
-- /\p{Zl}/
-- /\p{Zp}/
-- /\p{Zs}/
-
-=== Scripts
-
-- /\p{Adlam}/
-- /\p{Ahom}/
-- /\p{Anatolian_Hieroglyphs}/
-- /\p{Arabic}/
-- /\p{Armenian}/
-- /\p{Avestan}/
-- /\p{Balinese}/
-- /\p{Bamum}/
-- /\p{Bassa_Vah}/
-- /\p{Batak}/
-- /\p{Bengali}/
-- /\p{Bhaiksuki}/
-- /\p{Bopomofo}/
-- /\p{Brahmi}/
-- /\p{Braille}/
-- /\p{Buginese}/
-- /\p{Buhid}/
-- /\p{Canadian_Aboriginal}/
-- /\p{Carian}/
-- /\p{Caucasian_Albanian}/
-- /\p{Chakma}/
-- /\p{Cham}/
-- /\p{Cherokee}/
-- /\p{Common}/
-- /\p{Coptic}/
-- /\p{Cuneiform}/
-- /\p{Cypriot}/
-- /\p{Cyrillic}/
-- /\p{Deseret}/
-- /\p{Devanagari}/
-- /\p{Dogra}/
-- /\p{Duployan}/
-- /\p{Egyptian_Hieroglyphs}/
-- /\p{Elbasan}/
-- /\p{Elymaic}/
-- /\p{Ethiopic}/
-- /\p{Georgian}/
-- /\p{Glagolitic}/
-- /\p{Gothic}/
-- /\p{Grantha}/
-- /\p{Greek}/
-- /\p{Gujarati}/
-- /\p{Gunjala_Gondi}/
-- /\p{Gurmukhi}/
-- /\p{Han}/
-- /\p{Hangul}/
-- /\p{Hanifi_Rohingya}/
-- /\p{Hanunoo}/
-- /\p{Hatran}/
-- /\p{Hebrew}/
-- /\p{Hiragana}/
-- /\p{Imperial_Aramaic}/
-- /\p{Inherited}/
-- /\p{Inscriptional_Pahlavi}/
-- /\p{Inscriptional_Parthian}/
-- /\p{Javanese}/
-- /\p{Kaithi}/
-- /\p{Kannada}/
-- /\p{Katakana}/
-- /\p{Kayah_Li}/
-- /\p{Kharoshthi}/
-- /\p{Khmer}/
-- /\p{Khojki}/
-- /\p{Khudawadi}/
-- /\p{Lao}/
-- /\p{Latin}/
-- /\p{Lepcha}/
-- /\p{Limbu}/
-- /\p{Linear_A}/
-- /\p{Linear_B}/
-- /\p{Lisu}/
-- /\p{Lycian}/
-- /\p{Lydian}/
-- /\p{Mahajani}/
-- /\p{Makasar}/
-- /\p{Malayalam}/
-- /\p{Mandaic}/
-- /\p{Manichaean}/
-- /\p{Marchen}/
-- /\p{Masaram_Gondi}/
-- /\p{Medefaidrin}/
-- /\p{Meetei_Mayek}/
-- /\p{Mende_Kikakui}/
-- /\p{Meroitic_Cursive}/
-- /\p{Meroitic_Hieroglyphs}/
-- /\p{Miao}/
-- /\p{Modi}/
-- /\p{Mongolian}/
-- /\p{Mro}/
-- /\p{Multani}/
-- /\p{Myanmar}/
-- /\p{Nabataean}/
-- /\p{Nandinagari}/
-- /\p{New_Tai_Lue}/
-- /\p{Newa}/
-- /\p{Nko}/
-- /\p{Nushu}/
-- /\p{Nyiakeng_Puachue_Hmong}/
-- /\p{Ogham}/
-- /\p{Ol_Chiki}/
-- /\p{Old_Hungarian}/
-- /\p{Old_Italic}/
-- /\p{Old_North_Arabian}/
-- /\p{Old_Permic}/
-- /\p{Old_Persian}/
-- /\p{Old_Sogdian}/
-- /\p{Old_South_Arabian}/
-- /\p{Old_Turkic}/
-- /\p{Oriya}/
-- /\p{Osage}/
-- /\p{Osmanya}/
-- /\p{Pahawh_Hmong}/
-- /\p{Palmyrene}/
-- /\p{Pau_Cin_Hau}/
-- /\p{Phags_Pa}/
-- /\p{Phoenician}/
-- /\p{Psalter_Pahlavi}/
-- /\p{Rejang}/
-- /\p{Runic}/
-- /\p{Samaritan}/
-- /\p{Saurashtra}/
-- /\p{Sharada}/
-- /\p{Shavian}/
-- /\p{Siddham}/
-- /\p{SignWriting}/
-- /\p{Sinhala}/
-- /\p{Sogdian}/
-- /\p{Sora_Sompeng}/
-- /\p{Soyombo}/
-- /\p{Sundanese}/
-- /\p{Syloti_Nagri}/
-- /\p{Syriac}/
-- /\p{Tagalog}/
-- /\p{Tagbanwa}/
-- /\p{Tai_Le}/
-- /\p{Tai_Tham}/
-- /\p{Tai_Viet}/
-- /\p{Takri}/
-- /\p{Tamil}/
-- /\p{Tangut}/
-- /\p{Telugu}/
-- /\p{Thaana}/
-- /\p{Thai}/
-- /\p{Tibetan}/
-- /\p{Tifinagh}/
-- /\p{Tirhuta}/
-- /\p{Ugaritic}/
-- /\p{Unknown}/
-- /\p{Vai}/
-- /\p{Wancho}/
-- /\p{Warang_Citi}/
-- /\p{Yi}/
-- /\p{Zanabazar_Square}/
-
-=== Derived Core Properties
-
-- /\p{Alphabetic}/
-- /\p{Case_Ignorable}/
-- /\p{Cased}/
-- /\p{Changes_When_Casefolded}/
-- /\p{Changes_When_Casemapped}/
-- /\p{Changes_When_Lowercased}/
-- /\p{Changes_When_Titlecased}/
-- /\p{Changes_When_Uppercased}/
-- /\p{Default_Ignorable_Code_Point}/
-- /\p{Grapheme_Base}/
-- /\p{Grapheme_Extend}/
-- /\p{Grapheme_Link}/
-- /\p{ID_Continue}/
-- /\p{ID_Start}/
-- /\p{Lowercase}/
-- /\p{Math}/
-- /\p{Uppercase}/
-- /\p{XID_Continue}/
-- /\p{XID_Start}/
+- \p{Cased_Letter}, \p{LC}
+- \p{Close_Punctuation}, \p{Pe}
+- \p{Connector_Punctuation}, \p{Pc}
+- \p{Control}, \p{Cc}
+- \p{Currency_Symbol}, \p{Sc}
+- \p{Dash_Punctuation}, \p{Pd}
+- \p{Decimal_Number}, \p{Nd}
+- \p{Enclosing_Mark}, \p{Me}
+- \p{Final_Punctuation}, \p{Pf}
+- \p{Format}, \p{Cf}
+- \p{Initial_Punctuation}, \p{Pi}
+- \p{Letter}, \p{L}
+- \p{Letter_Number}, \p{Nl}
+- \p{Line_Separator}, \p{Zl}
+- \p{Lowercase_Letter}, \p{Ll}
+- \p{Mark}, \p{M}
+- \p{Math_Symbol}, \p{Sm}
+- \p{Modifier_Letter}, \p{Lm}
+- \p{Modifier_Symbol}, \p{Sk}
+- \p{Nonspacing_Mark}, \p{Mn}
+- \p{Number}, \p{N}
+- \p{Open_Punctuation}, \p{Ps}
+- \p{Other}, \p{C}
+- \p{Other_Letter}, \p{Lo}
+- \p{Other_Number}, \p{No}
+- \p{Other_Punctuation}, \p{Po}
+- \p{Other_Symbol}, \p{So}
+- \p{Paragraph_Separator}, \p{Zp}
+- \p{Private_Use}, \p{Co}
+- \p{Punctuation}, \p{P}
+- \p{Separator}, \p{Z}
+- \p{Space_Separator}, \p{Zs}
+- \p{Spacing_Mark}, \p{Mc}
+- \p{Surrogate}, \p{Cs}
+- \p{Symbol}, \p{S}
+- \p{Titlecase_Letter}, \p{Lt}
+- \p{Unassigned}, \p{Cn}
+- \p{Uppercase_Letter}, \p{Lu}
=== Prop List
-- /\p{ASCII_Hex_Digit}/
-- /\p{Bidi_Control}/
-- /\p{Dash}/
-- /\p{Deprecated}/
-- /\p{Diacritic}/
-- /\p{Extender}/
-- /\p{Hex_Digit}/
-- /\p{Hyphen}/
-- /\p{IDS_Binary_Operator}/
-- /\p{IDS_Trinary_Operator}/
-- /\p{Ideographic}/
-- /\p{Join_Control}/
-- /\p{Logical_Order_Exception}/
-- /\p{Noncharacter_Code_Point}/
-- /\p{Other_Alphabetic}/
-- /\p{Other_Default_Ignorable_Code_Point}/
-- /\p{Other_Grapheme_Extend}/
-- /\p{Other_ID_Continue}/
-- /\p{Other_ID_Start}/
-- /\p{Other_Lowercase}/
-- /\p{Other_Math}/
-- /\p{Other_Uppercase}/
-- /\p{Pattern_Syntax}/
-- /\p{Pattern_White_Space}/
-- /\p{Prepended_Concatenation_Mark}/
-- /\p{Quotation_Mark}/
-- /\p{Radical}/
-- /\p{Regional_Indicator}/
-- /\p{Sentence_Terminal}/
-- /\p{Soft_Dotted}/
-- /\p{Terminal_Punctuation}/
-- /\p{Unified_Ideograph}/
-- /\p{Variation_Selector}/
-- /\p{White_Space}/
+- \p{ASCII_Hex_Digit}, \p{AHex}
+- \p{Bidi_Control}, \p{Bidi_C}
+- \p{Dash}
+- \p{Deprecated}, \p{Dep}
+- \p{Diacritic}, \p{Dia}
+- \p{Extender}, \p{Ext}
+- \p{Hex_Digit}, \p{Hex}
+- \p{Hyphen}
+- \p{IDS_Binary_Operator}, \p{IDSB}
+- \p{IDS_Trinary_Operator}, \p{IDST}
+- \p{Ideographic}, \p{Ideo}
+- \p{Join_Control}, \p{Join_C}
+- \p{Logical_Order_Exception}, \p{LOE}
+- \p{Noncharacter_Code_Point}, \p{NChar}
+- \p{Other_Alphabetic}, \p{OAlpha}
+- \p{Other_Default_Ignorable_Code_Point}, \p{ODI}
+- \p{Other_Grapheme_Extend}, \p{OGr_Ext}
+- \p{Other_ID_Continue}, \p{OIDC}
+- \p{Other_ID_Start}, \p{OIDS}
+- \p{Other_Lowercase}, \p{OLower}
+- \p{Other_Math}, \p{OMath}
+- \p{Other_Uppercase}, \p{OUpper}
+- \p{Pattern_Syntax}, \p{Pat_Syn}
+- \p{Pattern_White_Space}, \p{Pat_WS}
+- \p{Prepended_Concatenation_Mark}, \p{PCM}
+- \p{Quotation_Mark}, \p{QMark}
+- \p{Radical}
+- \p{Regional_Indicator}, \p{RI}
+- \p{Sentence_Terminal}, \p{STerm}
+- \p{Soft_Dotted}, \p{SD}
+- \p{Terminal_Punctuation}, \p{Term}
+- \p{Unified_Ideograph}, \p{UIdeo}
+- \p{Variation_Selector}, \p{VS}
+- \p{White_Space}, \p{WSpace}
-=== Emoji
+=== Derived Core Properties
-- /\p{Emoji}/
-- /\p{Emoji_Component}/
-- /\p{Emoji_Modifier}/
-- /\p{Emoji_Modifier_Base}/
-- /\p{Emoji_Presentation}/
+- \p{Alphabetic}, \p{Alpha}
+- \p{Case_Ignorable}, \p{CI}
+- \p{Cased}
+- \p{Changes_When_Casefolded}, \p{CWCF}
+- \p{Changes_When_Casemapped}, \p{CWCM}
+- \p{Changes_When_Lowercased}, \p{CWL}
+- \p{Changes_When_Titlecased}, \p{CWT}
+- \p{Changes_When_Uppercased}, \p{CWU}
+- \p{Default_Ignorable_Code_Point}, \p{DI}
+- \p{Grapheme_Base}, \p{Gr_Base}
+- \p{Grapheme_Extend}, \p{Gr_Ext}
+- \p{Grapheme_Link}, \p{Gr_Link}
+- \p{ID_Continue}, \p{IDC}
+- \p{ID_Start}, \p{IDS}
+- \p{Lowercase}, \p{Lower}
+- \p{Math}
+- \p{Uppercase}, \p{Upper}
+- \p{XID_Continue}, \p{XIDC}
+- \p{XID_Start}, \p{XIDS}
-=== Property Aliases
+=== Scripts
-- /\p{AHex}/
-- /\p{Bidi_C}/
-- /\p{CI}/
-- /\p{CWCF}/
-- /\p{CWCM}/
-- /\p{CWL}/
-- /\p{CWT}/
-- /\p{CWU}/
-- /\p{DI}/
-- /\p{Dep}/
-- /\p{Dia}/
-- /\p{Ext}/
-- /\p{Gr_Base}/
-- /\p{Gr_Ext}/
-- /\p{Gr_Link}/
-- /\p{Hex}/
-- /\p{IDC}/
-- /\p{IDS}/
-- /\p{IDSB}/
-- /\p{IDST}/
-- /\p{Ideo}/
-- /\p{Join_C}/
-- /\p{LOE}/
-- /\p{NChar}/
-- /\p{OAlpha}/
-- /\p{ODI}/
-- /\p{OGr_Ext}/
-- /\p{OIDC}/
-- /\p{OIDS}/
-- /\p{OLower}/
-- /\p{OMath}/
-- /\p{OUpper}/
-- /\p{PCM}/
-- /\p{Pat_Syn}/
-- /\p{Pat_WS}/
-- /\p{QMark}/
-- /\p{RI}/
-- /\p{SD}/
-- /\p{STerm}/
-- /\p{Term}/
-- /\p{UIdeo}/
-- /\p{VS}/
-- /\p{WSpace}/
-- /\p{XIDC}/
-- /\p{XIDS}/
-
-=== Property Value Aliases (General Category)
-
-- /\p{Other}/
-- /\p{Control}/
-- /\p{Format}/
-- /\p{Unassigned}/
-- /\p{Private_Use}/
-- /\p{Surrogate}/
-- /\p{Letter}/
-- /\p{Cased_Letter}/
-- /\p{Lowercase_Letter}/
-- /\p{Modifier_Letter}/
-- /\p{Other_Letter}/
-- /\p{Titlecase_Letter}/
-- /\p{Uppercase_Letter}/
-- /\p{Mark}/
-- /\p{Combining_Mark}/
-- /\p{Spacing_Mark}/
-- /\p{Enclosing_Mark}/
-- /\p{Nonspacing_Mark}/
-- /\p{Number}/
-- /\p{Decimal_Number}/
-- /\p{Letter_Number}/
-- /\p{Other_Number}/
-- /\p{Punctuation}/
-- /\p{Connector_Punctuation}/
-- /\p{Dash_Punctuation}/
-- /\p{Close_Punctuation}/
-- /\p{Final_Punctuation}/
-- /\p{Initial_Punctuation}/
-- /\p{Other_Punctuation}/
-- /\p{Open_Punctuation}/
-- /\p{Symbol}/
-- /\p{Currency_Symbol}/
-- /\p{Modifier_Symbol}/
-- /\p{Math_Symbol}/
-- /\p{Other_Symbol}/
-- /\p{Separator}/
-- /\p{Line_Separator}/
-- /\p{Paragraph_Separator}/
-- /\p{Space_Separator}/
-
-=== Property Value Aliases (Script)
-
-- /\p{Adlm}/
-- /\p{Aghb}/
-- /\p{Arab}/
-- /\p{Armi}/
-- /\p{Armn}/
-- /\p{Avst}/
-- /\p{Bali}/
-- /\p{Bamu}/
-- /\p{Bass}/
-- /\p{Batk}/
-- /\p{Beng}/
-- /\p{Bhks}/
-- /\p{Bopo}/
-- /\p{Brah}/
-- /\p{Brai}/
-- /\p{Bugi}/
-- /\p{Buhd}/
-- /\p{Cakm}/
-- /\p{Cans}/
-- /\p{Cari}/
-- /\p{Cher}/
-- /\p{Copt}/
-- /\p{Qaac}/
-- /\p{Cprt}/
-- /\p{Cyrl}/
-- /\p{Deva}/
-- /\p{Dogr}/
-- /\p{Dsrt}/
-- /\p{Dupl}/
-- /\p{Egyp}/
-- /\p{Elba}/
-- /\p{Elym}/
-- /\p{Ethi}/
-- /\p{Geor}/
-- /\p{Glag}/
-- /\p{Gong}/
-- /\p{Gonm}/
-- /\p{Goth}/
-- /\p{Gran}/
-- /\p{Grek}/
-- /\p{Gujr}/
-- /\p{Guru}/
-- /\p{Hang}/
-- /\p{Hani}/
-- /\p{Hano}/
-- /\p{Hatr}/
-- /\p{Hebr}/
-- /\p{Hira}/
-- /\p{Hluw}/
-- /\p{Hmng}/
-- /\p{Hmnp}/
-- /\p{Hung}/
-- /\p{Ital}/
-- /\p{Java}/
-- /\p{Kali}/
-- /\p{Kana}/
-- /\p{Khar}/
-- /\p{Khmr}/
-- /\p{Khoj}/
-- /\p{Knda}/
-- /\p{Kthi}/
-- /\p{Lana}/
-- /\p{Laoo}/
-- /\p{Latn}/
-- /\p{Lepc}/
-- /\p{Limb}/
-- /\p{Lina}/
-- /\p{Linb}/
-- /\p{Lyci}/
-- /\p{Lydi}/
-- /\p{Mahj}/
-- /\p{Maka}/
-- /\p{Mand}/
-- /\p{Mani}/
-- /\p{Marc}/
-- /\p{Medf}/
-- /\p{Mend}/
-- /\p{Merc}/
-- /\p{Mero}/
-- /\p{Mlym}/
-- /\p{Mong}/
-- /\p{Mroo}/
-- /\p{Mtei}/
-- /\p{Mult}/
-- /\p{Mymr}/
-- /\p{Nand}/
-- /\p{Narb}/
-- /\p{Nbat}/
-- /\p{Nkoo}/
-- /\p{Nshu}/
-- /\p{Ogam}/
-- /\p{Olck}/
-- /\p{Orkh}/
-- /\p{Orya}/
-- /\p{Osge}/
-- /\p{Osma}/
-- /\p{Palm}/
-- /\p{Pauc}/
-- /\p{Perm}/
-- /\p{Phag}/
-- /\p{Phli}/
-- /\p{Phlp}/
-- /\p{Phnx}/
-- /\p{Plrd}/
-- /\p{Prti}/
-- /\p{Rjng}/
-- /\p{Rohg}/
-- /\p{Runr}/
-- /\p{Samr}/
-- /\p{Sarb}/
-- /\p{Saur}/
-- /\p{Sgnw}/
-- /\p{Shaw}/
-- /\p{Shrd}/
-- /\p{Sidd}/
-- /\p{Sind}/
-- /\p{Sinh}/
-- /\p{Sogd}/
-- /\p{Sogo}/
-- /\p{Sora}/
-- /\p{Soyo}/
-- /\p{Sund}/
-- /\p{Sylo}/
-- /\p{Syrc}/
-- /\p{Tagb}/
-- /\p{Takr}/
-- /\p{Tale}/
-- /\p{Talu}/
-- /\p{Taml}/
-- /\p{Tang}/
-- /\p{Tavt}/
-- /\p{Telu}/
-- /\p{Tfng}/
-- /\p{Tglg}/
-- /\p{Thaa}/
-- /\p{Tibt}/
-- /\p{Tirh}/
-- /\p{Ugar}/
-- /\p{Vaii}/
-- /\p{Wara}/
-- /\p{Wcho}/
-- /\p{Xpeo}/
-- /\p{Xsux}/
-- /\p{Yiii}/
-- /\p{Zanb}/
-- /\p{Zinh}/
-- /\p{Qaai}/
-- /\p{Zyyy}/
-- /\p{Zzzz}/
-
-=== Derived Ages
-
-- /\p{Age=1.1}/
-- /\p{Age=10.0}/
-- /\p{Age=11.0}/
-- /\p{Age=12.0}/
-- /\p{Age=12.1}/
-- /\p{Age=2.0}/
-- /\p{Age=2.1}/
-- /\p{Age=3.0}/
-- /\p{Age=3.1}/
-- /\p{Age=3.2}/
-- /\p{Age=4.0}/
-- /\p{Age=4.1}/
-- /\p{Age=5.0}/
-- /\p{Age=5.1}/
-- /\p{Age=5.2}/
-- /\p{Age=6.0}/
-- /\p{Age=6.1}/
-- /\p{Age=6.2}/
-- /\p{Age=6.3}/
-- /\p{Age=7.0}/
-- /\p{Age=8.0}/
-- /\p{Age=9.0}/
+- \p{Adlam}, \p{Adlm}
+- \p{Ahom}
+- \p{Anatolian_Hieroglyphs}, \p{Hluw}
+- \p{Arabic}, \p{Arab}
+- \p{Armenian}, \p{Armn}
+- \p{Avestan}, \p{Avst}
+- \p{Balinese}, \p{Bali}
+- \p{Bamum}, \p{Bamu}
+- \p{Bassa_Vah}, \p{Bass}
+- \p{Batak}, \p{Batk}
+- \p{Bengali}, \p{Beng}
+- \p{Bhaiksuki}, \p{Bhks}
+- \p{Bopomofo}, \p{Bopo}
+- \p{Brahmi}, \p{Brah}
+- \p{Braille}, \p{Brai}
+- \p{Buginese}, \p{Bugi}
+- \p{Buhid}, \p{Buhd}
+- \p{Canadian_Aboriginal}, \p{Cans}
+- \p{Carian}, \p{Cari}
+- \p{Caucasian_Albanian}, \p{Aghb}
+- \p{Chakma}, \p{Cakm}
+- \p{Cham}
+- \p{Cherokee}, \p{Cher}
+- \p{Chorasmian}, \p{Chrs}
+- \p{Common}, \p{Zyyy}
+- \p{Coptic}, \p{Copt}
+- \p{Cuneiform}, \p{Xsux}
+- \p{Cypriot}, \p{Cprt}
+- \p{Cypro_Minoan}, \p{Cpmn}
+- \p{Cyrillic}, \p{Cyrl}
+- \p{Deseret}, \p{Dsrt}
+- \p{Devanagari}, \p{Deva}
+- \p{Dives_Akuru}, \p{Diak}
+- \p{Dogra}, \p{Dogr}
+- \p{Duployan}, \p{Dupl}
+- \p{Egyptian_Hieroglyphs}, \p{Egyp}
+- \p{Elbasan}, \p{Elba}
+- \p{Elymaic}, \p{Elym}
+- \p{Ethiopic}, \p{Ethi}
+- \p{Georgian}, \p{Geor}
+- \p{Glagolitic}, \p{Glag}
+- \p{Gothic}, \p{Goth}
+- \p{Grantha}, \p{Gran}
+- \p{Greek}, \p{Grek}
+- \p{Gujarati}, \p{Gujr}
+- \p{Gunjala_Gondi}, \p{Gong}
+- \p{Gurmukhi}, \p{Guru}
+- \p{Han}, \p{Hani}
+- \p{Hangul}, \p{Hang}
+- \p{Hanifi_Rohingya}, \p{Rohg}
+- \p{Hanunoo}, \p{Hano}
+- \p{Hatran}, \p{Hatr}
+- \p{Hebrew}, \p{Hebr}
+- \p{Hiragana}, \p{Hira}
+- \p{Imperial_Aramaic}, \p{Armi}
+- \p{Inherited}, \p{Zinh}
+- \p{Inscriptional_Pahlavi}, \p{Phli}
+- \p{Inscriptional_Parthian}, \p{Prti}
+- \p{Javanese}, \p{Java}
+- \p{Kaithi}, \p{Kthi}
+- \p{Kannada}, \p{Knda}
+- \p{Katakana}, \p{Kana}
+- \p{Kawi}
+- \p{Kayah_Li}, \p{Kali}
+- \p{Kharoshthi}, \p{Khar}
+- \p{Khitan_Small_Script}, \p{Kits}
+- \p{Khmer}, \p{Khmr}
+- \p{Khojki}, \p{Khoj}
+- \p{Khudawadi}, \p{Sind}
+- \p{Lao}, \p{Laoo}
+- \p{Latin}, \p{Latn}
+- \p{Lepcha}, \p{Lepc}
+- \p{Limbu}, \p{Limb}
+- \p{Linear_A}, \p{Lina}
+- \p{Linear_B}, \p{Linb}
+- \p{Lisu}
+- \p{Lycian}, \p{Lyci}
+- \p{Lydian}, \p{Lydi}
+- \p{Mahajani}, \p{Mahj}
+- \p{Makasar}, \p{Maka}
+- \p{Malayalam}, \p{Mlym}
+- \p{Mandaic}, \p{Mand}
+- \p{Manichaean}, \p{Mani}
+- \p{Marchen}, \p{Marc}
+- \p{Masaram_Gondi}, \p{Gonm}
+- \p{Medefaidrin}, \p{Medf}
+- \p{Meetei_Mayek}, \p{Mtei}
+- \p{Mende_Kikakui}, \p{Mend}
+- \p{Meroitic_Cursive}, \p{Merc}
+- \p{Meroitic_Hieroglyphs}, \p{Mero}
+- \p{Miao}, \p{Plrd}
+- \p{Modi}
+- \p{Mongolian}, \p{Mong}
+- \p{Mro}, \p{Mroo}
+- \p{Multani}, \p{Mult}
+- \p{Myanmar}, \p{Mymr}
+- \p{Nabataean}, \p{Nbat}
+- \p{Nag_Mundari}, \p{Nagm}
+- \p{Nandinagari}, \p{Nand}
+- \p{New_Tai_Lue}, \p{Talu}
+- \p{Newa}
+- \p{Nko}, \p{Nkoo}
+- \p{Nushu}, \p{Nshu}
+- \p{Nyiakeng_Puachue_Hmong}, \p{Hmnp}
+- \p{Ogham}, \p{Ogam}
+- \p{Ol_Chiki}, \p{Olck}
+- \p{Old_Hungarian}, \p{Hung}
+- \p{Old_Italic}, \p{Ital}
+- \p{Old_North_Arabian}, \p{Narb}
+- \p{Old_Permic}, \p{Perm}
+- \p{Old_Persian}, \p{Xpeo}
+- \p{Old_Sogdian}, \p{Sogo}
+- \p{Old_South_Arabian}, \p{Sarb}
+- \p{Old_Turkic}, \p{Orkh}
+- \p{Old_Uyghur}, \p{Ougr}
+- \p{Oriya}, \p{Orya}
+- \p{Osage}, \p{Osge}
+- \p{Osmanya}, \p{Osma}
+- \p{Pahawh_Hmong}, \p{Hmng}
+- \p{Palmyrene}, \p{Palm}
+- \p{Pau_Cin_Hau}, \p{Pauc}
+- \p{Phags_Pa}, \p{Phag}
+- \p{Phoenician}, \p{Phnx}
+- \p{Psalter_Pahlavi}, \p{Phlp}
+- \p{Rejang}, \p{Rjng}
+- \p{Runic}, \p{Runr}
+- \p{Samaritan}, \p{Samr}
+- \p{Saurashtra}, \p{Saur}
+- \p{Sharada}, \p{Shrd}
+- \p{Shavian}, \p{Shaw}
+- \p{Siddham}, \p{Sidd}
+- \p{SignWriting}, \p{Sgnw}
+- \p{Sinhala}, \p{Sinh}
+- \p{Sogdian}, \p{Sogd}
+- \p{Sora_Sompeng}, \p{Sora}
+- \p{Soyombo}, \p{Soyo}
+- \p{Sundanese}, \p{Sund}
+- \p{Syloti_Nagri}, \p{Sylo}
+- \p{Syriac}, \p{Syrc}
+- \p{Tagalog}, \p{Tglg}
+- \p{Tagbanwa}, \p{Tagb}
+- \p{Tai_Le}, \p{Tale}
+- \p{Tai_Tham}, \p{Lana}
+- \p{Tai_Viet}, \p{Tavt}
+- \p{Takri}, \p{Takr}
+- \p{Tamil}, \p{Taml}
+- \p{Tangsa}, \p{Tnsa}
+- \p{Tangut}, \p{Tang}
+- \p{Telugu}, \p{Telu}
+- \p{Thaana}, \p{Thaa}
+- \p{Thai}
+- \p{Tibetan}, \p{Tibt}
+- \p{Tifinagh}, \p{Tfng}
+- \p{Tirhuta}, \p{Tirh}
+- \p{Toto}
+- \p{Ugaritic}, \p{Ugar}
+- \p{Unknown}, \p{Zzzz}
+- \p{Vai}, \p{Vaii}
+- \p{Vithkuqi}, \p{Vith}
+- \p{Wancho}, \p{Wcho}
+- \p{Warang_Citi}, \p{Wara}
+- \p{Yezidi}, \p{Yezi}
+- \p{Yi}, \p{Yiii}
+- \p{Zanabazar_Square}, \p{Zanb}
=== Blocks
-- /\p{In_Basic_Latin}/
-- /\p{In_Latin_1_Supplement}/
-- /\p{In_Latin_Extended_A}/
-- /\p{In_Latin_Extended_B}/
-- /\p{In_IPA_Extensions}/
-- /\p{In_Spacing_Modifier_Letters}/
-- /\p{In_Combining_Diacritical_Marks}/
-- /\p{In_Greek_and_Coptic}/
-- /\p{In_Cyrillic}/
-- /\p{In_Cyrillic_Supplement}/
-- /\p{In_Armenian}/
-- /\p{In_Hebrew}/
-- /\p{In_Arabic}/
-- /\p{In_Syriac}/
-- /\p{In_Arabic_Supplement}/
-- /\p{In_Thaana}/
-- /\p{In_NKo}/
-- /\p{In_Samaritan}/
-- /\p{In_Mandaic}/
-- /\p{In_Syriac_Supplement}/
-- /\p{In_Arabic_Extended_A}/
-- /\p{In_Devanagari}/
-- /\p{In_Bengali}/
-- /\p{In_Gurmukhi}/
-- /\p{In_Gujarati}/
-- /\p{In_Oriya}/
-- /\p{In_Tamil}/
-- /\p{In_Telugu}/
-- /\p{In_Kannada}/
-- /\p{In_Malayalam}/
-- /\p{In_Sinhala}/
-- /\p{In_Thai}/
-- /\p{In_Lao}/
-- /\p{In_Tibetan}/
-- /\p{In_Myanmar}/
-- /\p{In_Georgian}/
-- /\p{In_Hangul_Jamo}/
-- /\p{In_Ethiopic}/
-- /\p{In_Ethiopic_Supplement}/
-- /\p{In_Cherokee}/
-- /\p{In_Unified_Canadian_Aboriginal_Syllabics}/
-- /\p{In_Ogham}/
-- /\p{In_Runic}/
-- /\p{In_Tagalog}/
-- /\p{In_Hanunoo}/
-- /\p{In_Buhid}/
-- /\p{In_Tagbanwa}/
-- /\p{In_Khmer}/
-- /\p{In_Mongolian}/
-- /\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}/
-- /\p{In_Limbu}/
-- /\p{In_Tai_Le}/
-- /\p{In_New_Tai_Lue}/
-- /\p{In_Khmer_Symbols}/
-- /\p{In_Buginese}/
-- /\p{In_Tai_Tham}/
-- /\p{In_Combining_Diacritical_Marks_Extended}/
-- /\p{In_Balinese}/
-- /\p{In_Sundanese}/
-- /\p{In_Batak}/
-- /\p{In_Lepcha}/
-- /\p{In_Ol_Chiki}/
-- /\p{In_Cyrillic_Extended_C}/
-- /\p{In_Georgian_Extended}/
-- /\p{In_Sundanese_Supplement}/
-- /\p{In_Vedic_Extensions}/
-- /\p{In_Phonetic_Extensions}/
-- /\p{In_Phonetic_Extensions_Supplement}/
-- /\p{In_Combining_Diacritical_Marks_Supplement}/
-- /\p{In_Latin_Extended_Additional}/
-- /\p{In_Greek_Extended}/
-- /\p{In_General_Punctuation}/
-- /\p{In_Superscripts_and_Subscripts}/
-- /\p{In_Currency_Symbols}/
-- /\p{In_Combining_Diacritical_Marks_for_Symbols}/
-- /\p{In_Letterlike_Symbols}/
-- /\p{In_Number_Forms}/
-- /\p{In_Arrows}/
-- /\p{In_Mathematical_Operators}/
-- /\p{In_Miscellaneous_Technical}/
-- /\p{In_Control_Pictures}/
-- /\p{In_Optical_Character_Recognition}/
-- /\p{In_Enclosed_Alphanumerics}/
-- /\p{In_Box_Drawing}/
-- /\p{In_Block_Elements}/
-- /\p{In_Geometric_Shapes}/
-- /\p{In_Miscellaneous_Symbols}/
-- /\p{In_Dingbats}/
-- /\p{In_Miscellaneous_Mathematical_Symbols_A}/
-- /\p{In_Supplemental_Arrows_A}/
-- /\p{In_Braille_Patterns}/
-- /\p{In_Supplemental_Arrows_B}/
-- /\p{In_Miscellaneous_Mathematical_Symbols_B}/
-- /\p{In_Supplemental_Mathematical_Operators}/
-- /\p{In_Miscellaneous_Symbols_and_Arrows}/
-- /\p{In_Glagolitic}/
-- /\p{In_Latin_Extended_C}/
-- /\p{In_Coptic}/
-- /\p{In_Georgian_Supplement}/
-- /\p{In_Tifinagh}/
-- /\p{In_Ethiopic_Extended}/
-- /\p{In_Cyrillic_Extended_A}/
-- /\p{In_Supplemental_Punctuation}/
-- /\p{In_CJK_Radicals_Supplement}/
-- /\p{In_Kangxi_Radicals}/
-- /\p{In_Ideographic_Description_Characters}/
-- /\p{In_CJK_Symbols_and_Punctuation}/
-- /\p{In_Hiragana}/
-- /\p{In_Katakana}/
-- /\p{In_Bopomofo}/
-- /\p{In_Hangul_Compatibility_Jamo}/
-- /\p{In_Kanbun}/
-- /\p{In_Bopomofo_Extended}/
-- /\p{In_CJK_Strokes}/
-- /\p{In_Katakana_Phonetic_Extensions}/
-- /\p{In_Enclosed_CJK_Letters_and_Months}/
-- /\p{In_CJK_Compatibility}/
-- /\p{In_CJK_Unified_Ideographs_Extension_A}/
-- /\p{In_Yijing_Hexagram_Symbols}/
-- /\p{In_CJK_Unified_Ideographs}/
-- /\p{In_Yi_Syllables}/
-- /\p{In_Yi_Radicals}/
-- /\p{In_Lisu}/
-- /\p{In_Vai}/
-- /\p{In_Cyrillic_Extended_B}/
-- /\p{In_Bamum}/
-- /\p{In_Modifier_Tone_Letters}/
-- /\p{In_Latin_Extended_D}/
-- /\p{In_Syloti_Nagri}/
-- /\p{In_Common_Indic_Number_Forms}/
-- /\p{In_Phags_pa}/
-- /\p{In_Saurashtra}/
-- /\p{In_Devanagari_Extended}/
-- /\p{In_Kayah_Li}/
-- /\p{In_Rejang}/
-- /\p{In_Hangul_Jamo_Extended_A}/
-- /\p{In_Javanese}/
-- /\p{In_Myanmar_Extended_B}/
-- /\p{In_Cham}/
-- /\p{In_Myanmar_Extended_A}/
-- /\p{In_Tai_Viet}/
-- /\p{In_Meetei_Mayek_Extensions}/
-- /\p{In_Ethiopic_Extended_A}/
-- /\p{In_Latin_Extended_E}/
-- /\p{In_Cherokee_Supplement}/
-- /\p{In_Meetei_Mayek}/
-- /\p{In_Hangul_Syllables}/
-- /\p{In_Hangul_Jamo_Extended_B}/
-- /\p{In_High_Surrogates}/
-- /\p{In_High_Private_Use_Surrogates}/
-- /\p{In_Low_Surrogates}/
-- /\p{In_Private_Use_Area}/
-- /\p{In_CJK_Compatibility_Ideographs}/
-- /\p{In_Alphabetic_Presentation_Forms}/
-- /\p{In_Arabic_Presentation_Forms_A}/
-- /\p{In_Variation_Selectors}/
-- /\p{In_Vertical_Forms}/
-- /\p{In_Combining_Half_Marks}/
-- /\p{In_CJK_Compatibility_Forms}/
-- /\p{In_Small_Form_Variants}/
-- /\p{In_Arabic_Presentation_Forms_B}/
-- /\p{In_Halfwidth_and_Fullwidth_Forms}/
-- /\p{In_Specials}/
-- /\p{In_Linear_B_Syllabary}/
-- /\p{In_Linear_B_Ideograms}/
-- /\p{In_Aegean_Numbers}/
-- /\p{In_Ancient_Greek_Numbers}/
-- /\p{In_Ancient_Symbols}/
-- /\p{In_Phaistos_Disc}/
-- /\p{In_Lycian}/
-- /\p{In_Carian}/
-- /\p{In_Coptic_Epact_Numbers}/
-- /\p{In_Old_Italic}/
-- /\p{In_Gothic}/
-- /\p{In_Old_Permic}/
-- /\p{In_Ugaritic}/
-- /\p{In_Old_Persian}/
-- /\p{In_Deseret}/
-- /\p{In_Shavian}/
-- /\p{In_Osmanya}/
-- /\p{In_Osage}/
-- /\p{In_Elbasan}/
-- /\p{In_Caucasian_Albanian}/
-- /\p{In_Linear_A}/
-- /\p{In_Cypriot_Syllabary}/
-- /\p{In_Imperial_Aramaic}/
-- /\p{In_Palmyrene}/
-- /\p{In_Nabataean}/
-- /\p{In_Hatran}/
-- /\p{In_Phoenician}/
-- /\p{In_Lydian}/
-- /\p{In_Meroitic_Hieroglyphs}/
-- /\p{In_Meroitic_Cursive}/
-- /\p{In_Kharoshthi}/
-- /\p{In_Old_South_Arabian}/
-- /\p{In_Old_North_Arabian}/
-- /\p{In_Manichaean}/
-- /\p{In_Avestan}/
-- /\p{In_Inscriptional_Parthian}/
-- /\p{In_Inscriptional_Pahlavi}/
-- /\p{In_Psalter_Pahlavi}/
-- /\p{In_Old_Turkic}/
-- /\p{In_Old_Hungarian}/
-- /\p{In_Hanifi_Rohingya}/
-- /\p{In_Rumi_Numeral_Symbols}/
-- /\p{In_Old_Sogdian}/
-- /\p{In_Sogdian}/
-- /\p{In_Elymaic}/
-- /\p{In_Brahmi}/
-- /\p{In_Kaithi}/
-- /\p{In_Sora_Sompeng}/
-- /\p{In_Chakma}/
-- /\p{In_Mahajani}/
-- /\p{In_Sharada}/
-- /\p{In_Sinhala_Archaic_Numbers}/
-- /\p{In_Khojki}/
-- /\p{In_Multani}/
-- /\p{In_Khudawadi}/
-- /\p{In_Grantha}/
-- /\p{In_Newa}/
-- /\p{In_Tirhuta}/
-- /\p{In_Siddham}/
-- /\p{In_Modi}/
-- /\p{In_Mongolian_Supplement}/
-- /\p{In_Takri}/
-- /\p{In_Ahom}/
-- /\p{In_Dogra}/
-- /\p{In_Warang_Citi}/
-- /\p{In_Nandinagari}/
-- /\p{In_Zanabazar_Square}/
-- /\p{In_Soyombo}/
-- /\p{In_Pau_Cin_Hau}/
-- /\p{In_Bhaiksuki}/
-- /\p{In_Marchen}/
-- /\p{In_Masaram_Gondi}/
-- /\p{In_Gunjala_Gondi}/
-- /\p{In_Makasar}/
-- /\p{In_Tamil_Supplement}/
-- /\p{In_Cuneiform}/
-- /\p{In_Cuneiform_Numbers_and_Punctuation}/
-- /\p{In_Early_Dynastic_Cuneiform}/
-- /\p{In_Egyptian_Hieroglyphs}/
-- /\p{In_Egyptian_Hieroglyph_Format_Controls}/
-- /\p{In_Anatolian_Hieroglyphs}/
-- /\p{In_Bamum_Supplement}/
-- /\p{In_Mro}/
-- /\p{In_Bassa_Vah}/
-- /\p{In_Pahawh_Hmong}/
-- /\p{In_Medefaidrin}/
-- /\p{In_Miao}/
-- /\p{In_Ideographic_Symbols_and_Punctuation}/
-- /\p{In_Tangut}/
-- /\p{In_Tangut_Components}/
-- /\p{In_Kana_Supplement}/
-- /\p{In_Kana_Extended_A}/
-- /\p{In_Small_Kana_Extension}/
-- /\p{In_Nushu}/
-- /\p{In_Duployan}/
-- /\p{In_Shorthand_Format_Controls}/
-- /\p{In_Byzantine_Musical_Symbols}/
-- /\p{In_Musical_Symbols}/
-- /\p{In_Ancient_Greek_Musical_Notation}/
-- /\p{In_Mayan_Numerals}/
-- /\p{In_Tai_Xuan_Jing_Symbols}/
-- /\p{In_Counting_Rod_Numerals}/
-- /\p{In_Mathematical_Alphanumeric_Symbols}/
-- /\p{In_Sutton_SignWriting}/
-- /\p{In_Glagolitic_Supplement}/
-- /\p{In_Nyiakeng_Puachue_Hmong}/
-- /\p{In_Wancho}/
-- /\p{In_Mende_Kikakui}/
-- /\p{In_Adlam}/
-- /\p{In_Indic_Siyaq_Numbers}/
-- /\p{In_Ottoman_Siyaq_Numbers}/
-- /\p{In_Arabic_Mathematical_Alphabetic_Symbols}/
-- /\p{In_Mahjong_Tiles}/
-- /\p{In_Domino_Tiles}/
-- /\p{In_Playing_Cards}/
-- /\p{In_Enclosed_Alphanumeric_Supplement}/
-- /\p{In_Enclosed_Ideographic_Supplement}/
-- /\p{In_Miscellaneous_Symbols_and_Pictographs}/
-- /\p{In_Emoticons}/
-- /\p{In_Ornamental_Dingbats}/
-- /\p{In_Transport_and_Map_Symbols}/
-- /\p{In_Alchemical_Symbols}/
-- /\p{In_Geometric_Shapes_Extended}/
-- /\p{In_Supplemental_Arrows_C}/
-- /\p{In_Supplemental_Symbols_and_Pictographs}/
-- /\p{In_Chess_Symbols}/
-- /\p{In_Symbols_and_Pictographs_Extended_A}/
-- /\p{In_CJK_Unified_Ideographs_Extension_B}/
-- /\p{In_CJK_Unified_Ideographs_Extension_C}/
-- /\p{In_CJK_Unified_Ideographs_Extension_D}/
-- /\p{In_CJK_Unified_Ideographs_Extension_E}/
-- /\p{In_CJK_Unified_Ideographs_Extension_F}/
-- /\p{In_CJK_Compatibility_Ideographs_Supplement}/
-- /\p{In_Tags}/
-- /\p{In_Variation_Selectors_Supplement}/
-- /\p{In_Supplementary_Private_Use_Area_A}/
-- /\p{In_Supplementary_Private_Use_Area_B}/
-- /\p{In_No_Block}/
+- \p{In_Adlam}
+- \p{In_Aegean_Numbers}
+- \p{In_Ahom}
+- \p{In_Alchemical_Symbols}
+- \p{In_Alphabetic_Presentation_Forms}
+- \p{In_Anatolian_Hieroglyphs}
+- \p{In_Ancient_Greek_Musical_Notation}
+- \p{In_Ancient_Greek_Numbers}
+- \p{In_Ancient_Symbols}
+- \p{In_Arabic}
+- \p{In_Arabic_Extended_A}
+- \p{In_Arabic_Extended_B}
+- \p{In_Arabic_Extended_C}
+- \p{In_Arabic_Mathematical_Alphabetic_Symbols}
+- \p{In_Arabic_Presentation_Forms_A}
+- \p{In_Arabic_Presentation_Forms_B}
+- \p{In_Arabic_Supplement}
+- \p{In_Armenian}
+- \p{In_Arrows}
+- \p{In_Avestan}
+- \p{In_Balinese}
+- \p{In_Bamum}
+- \p{In_Bamum_Supplement}
+- \p{In_Basic_Latin}
+- \p{In_Bassa_Vah}
+- \p{In_Batak}
+- \p{In_Bengali}
+- \p{In_Bhaiksuki}
+- \p{In_Block_Elements}
+- \p{In_Bopomofo}
+- \p{In_Bopomofo_Extended}
+- \p{In_Box_Drawing}
+- \p{In_Brahmi}
+- \p{In_Braille_Patterns}
+- \p{In_Buginese}
+- \p{In_Buhid}
+- \p{In_Byzantine_Musical_Symbols}
+- \p{In_CJK_Compatibility}
+- \p{In_CJK_Compatibility_Forms}
+- \p{In_CJK_Compatibility_Ideographs}
+- \p{In_CJK_Compatibility_Ideographs_Supplement}
+- \p{In_CJK_Radicals_Supplement}
+- \p{In_CJK_Strokes}
+- \p{In_CJK_Symbols_and_Punctuation}
+- \p{In_CJK_Unified_Ideographs}
+- \p{In_CJK_Unified_Ideographs_Extension_A}
+- \p{In_CJK_Unified_Ideographs_Extension_B}
+- \p{In_CJK_Unified_Ideographs_Extension_C}
+- \p{In_CJK_Unified_Ideographs_Extension_D}
+- \p{In_CJK_Unified_Ideographs_Extension_E}
+- \p{In_CJK_Unified_Ideographs_Extension_F}
+- \p{In_CJK_Unified_Ideographs_Extension_G}
+- \p{In_CJK_Unified_Ideographs_Extension_H}
+- \p{In_Carian}
+- \p{In_Caucasian_Albanian}
+- \p{In_Chakma}
+- \p{In_Cham}
+- \p{In_Cherokee}
+- \p{In_Cherokee_Supplement}
+- \p{In_Chess_Symbols}
+- \p{In_Chorasmian}
+- \p{In_Combining_Diacritical_Marks}
+- \p{In_Combining_Diacritical_Marks_Extended}
+- \p{In_Combining_Diacritical_Marks_Supplement}
+- \p{In_Combining_Diacritical_Marks_for_Symbols}
+- \p{In_Combining_Half_Marks}
+- \p{In_Common_Indic_Number_Forms}
+- \p{In_Control_Pictures}
+- \p{In_Coptic}
+- \p{In_Coptic_Epact_Numbers}
+- \p{In_Counting_Rod_Numerals}
+- \p{In_Cuneiform}
+- \p{In_Cuneiform_Numbers_and_Punctuation}
+- \p{In_Currency_Symbols}
+- \p{In_Cypriot_Syllabary}
+- \p{In_Cypro_Minoan}
+- \p{In_Cyrillic}
+- \p{In_Cyrillic_Extended_A}
+- \p{In_Cyrillic_Extended_B}
+- \p{In_Cyrillic_Extended_C}
+- \p{In_Cyrillic_Extended_D}
+- \p{In_Cyrillic_Supplement}
+- \p{In_Deseret}
+- \p{In_Devanagari}
+- \p{In_Devanagari_Extended}
+- \p{In_Devanagari_Extended_A}
+- \p{In_Dingbats}
+- \p{In_Dives_Akuru}
+- \p{In_Dogra}
+- \p{In_Domino_Tiles}
+- \p{In_Duployan}
+- \p{In_Early_Dynastic_Cuneiform}
+- \p{In_Egyptian_Hieroglyph_Format_Controls}
+- \p{In_Egyptian_Hieroglyphs}
+- \p{In_Elbasan}
+- \p{In_Elymaic}
+- \p{In_Emoticons}
+- \p{In_Enclosed_Alphanumeric_Supplement}
+- \p{In_Enclosed_Alphanumerics}
+- \p{In_Enclosed_CJK_Letters_and_Months}
+- \p{In_Enclosed_Ideographic_Supplement}
+- \p{In_Ethiopic}
+- \p{In_Ethiopic_Extended}
+- \p{In_Ethiopic_Extended_A}
+- \p{In_Ethiopic_Extended_B}
+- \p{In_Ethiopic_Supplement}
+- \p{In_General_Punctuation}
+- \p{In_Geometric_Shapes}
+- \p{In_Geometric_Shapes_Extended}
+- \p{In_Georgian}
+- \p{In_Georgian_Extended}
+- \p{In_Georgian_Supplement}
+- \p{In_Glagolitic}
+- \p{In_Glagolitic_Supplement}
+- \p{In_Gothic}
+- \p{In_Grantha}
+- \p{In_Greek_Extended}
+- \p{In_Greek_and_Coptic}
+- \p{In_Gujarati}
+- \p{In_Gunjala_Gondi}
+- \p{In_Gurmukhi}
+- \p{In_Halfwidth_and_Fullwidth_Forms}
+- \p{In_Hangul_Compatibility_Jamo}
+- \p{In_Hangul_Jamo}
+- \p{In_Hangul_Jamo_Extended_A}
+- \p{In_Hangul_Jamo_Extended_B}
+- \p{In_Hangul_Syllables}
+- \p{In_Hanifi_Rohingya}
+- \p{In_Hanunoo}
+- \p{In_Hatran}
+- \p{In_Hebrew}
+- \p{In_High_Private_Use_Surrogates}
+- \p{In_High_Surrogates}
+- \p{In_Hiragana}
+- \p{In_IPA_Extensions}
+- \p{In_Ideographic_Description_Characters}
+- \p{In_Ideographic_Symbols_and_Punctuation}
+- \p{In_Imperial_Aramaic}
+- \p{In_Indic_Siyaq_Numbers}
+- \p{In_Inscriptional_Pahlavi}
+- \p{In_Inscriptional_Parthian}
+- \p{In_Javanese}
+- \p{In_Kaithi}
+- \p{In_Kaktovik_Numerals}
+- \p{In_Kana_Extended_A}
+- \p{In_Kana_Extended_B}
+- \p{In_Kana_Supplement}
+- \p{In_Kanbun}
+- \p{In_Kangxi_Radicals}
+- \p{In_Kannada}
+- \p{In_Katakana}
+- \p{In_Katakana_Phonetic_Extensions}
+- \p{In_Kawi}
+- \p{In_Kayah_Li}
+- \p{In_Kharoshthi}
+- \p{In_Khitan_Small_Script}
+- \p{In_Khmer}
+- \p{In_Khmer_Symbols}
+- \p{In_Khojki}
+- \p{In_Khudawadi}
+- \p{In_Lao}
+- \p{In_Latin_1_Supplement}
+- \p{In_Latin_Extended_A}
+- \p{In_Latin_Extended_Additional}
+- \p{In_Latin_Extended_B}
+- \p{In_Latin_Extended_C}
+- \p{In_Latin_Extended_D}
+- \p{In_Latin_Extended_E}
+- \p{In_Latin_Extended_F}
+- \p{In_Latin_Extended_G}
+- \p{In_Lepcha}
+- \p{In_Letterlike_Symbols}
+- \p{In_Limbu}
+- \p{In_Linear_A}
+- \p{In_Linear_B_Ideograms}
+- \p{In_Linear_B_Syllabary}
+- \p{In_Lisu}
+- \p{In_Lisu_Supplement}
+- \p{In_Low_Surrogates}
+- \p{In_Lycian}
+- \p{In_Lydian}
+- \p{In_Mahajani}
+- \p{In_Mahjong_Tiles}
+- \p{In_Makasar}
+- \p{In_Malayalam}
+- \p{In_Mandaic}
+- \p{In_Manichaean}
+- \p{In_Marchen}
+- \p{In_Masaram_Gondi}
+- \p{In_Mathematical_Alphanumeric_Symbols}
+- \p{In_Mathematical_Operators}
+- \p{In_Mayan_Numerals}
+- \p{In_Medefaidrin}
+- \p{In_Meetei_Mayek}
+- \p{In_Meetei_Mayek_Extensions}
+- \p{In_Mende_Kikakui}
+- \p{In_Meroitic_Cursive}
+- \p{In_Meroitic_Hieroglyphs}
+- \p{In_Miao}
+- \p{In_Miscellaneous_Mathematical_Symbols_A}
+- \p{In_Miscellaneous_Mathematical_Symbols_B}
+- \p{In_Miscellaneous_Symbols}
+- \p{In_Miscellaneous_Symbols_and_Arrows}
+- \p{In_Miscellaneous_Symbols_and_Pictographs}
+- \p{In_Miscellaneous_Technical}
+- \p{In_Modi}
+- \p{In_Modifier_Tone_Letters}
+- \p{In_Mongolian}
+- \p{In_Mongolian_Supplement}
+- \p{In_Mro}
+- \p{In_Multani}
+- \p{In_Musical_Symbols}
+- \p{In_Myanmar}
+- \p{In_Myanmar_Extended_A}
+- \p{In_Myanmar_Extended_B}
+- \p{In_NKo}
+- \p{In_Nabataean}
+- \p{In_Nag_Mundari}
+- \p{In_Nandinagari}
+- \p{In_New_Tai_Lue}
+- \p{In_Newa}
+- \p{In_No_Block}
+- \p{In_Number_Forms}
+- \p{In_Nushu}
+- \p{In_Nyiakeng_Puachue_Hmong}
+- \p{In_Ogham}
+- \p{In_Ol_Chiki}
+- \p{In_Old_Hungarian}
+- \p{In_Old_Italic}
+- \p{In_Old_North_Arabian}
+- \p{In_Old_Permic}
+- \p{In_Old_Persian}
+- \p{In_Old_Sogdian}
+- \p{In_Old_South_Arabian}
+- \p{In_Old_Turkic}
+- \p{In_Old_Uyghur}
+- \p{In_Optical_Character_Recognition}
+- \p{In_Oriya}
+- \p{In_Ornamental_Dingbats}
+- \p{In_Osage}
+- \p{In_Osmanya}
+- \p{In_Ottoman_Siyaq_Numbers}
+- \p{In_Pahawh_Hmong}
+- \p{In_Palmyrene}
+- \p{In_Pau_Cin_Hau}
+- \p{In_Phags_pa}
+- \p{In_Phaistos_Disc}
+- \p{In_Phoenician}
+- \p{In_Phonetic_Extensions}
+- \p{In_Phonetic_Extensions_Supplement}
+- \p{In_Playing_Cards}
+- \p{In_Private_Use_Area}
+- \p{In_Psalter_Pahlavi}
+- \p{In_Rejang}
+- \p{In_Rumi_Numeral_Symbols}
+- \p{In_Runic}
+- \p{In_Samaritan}
+- \p{In_Saurashtra}
+- \p{In_Sharada}
+- \p{In_Shavian}
+- \p{In_Shorthand_Format_Controls}
+- \p{In_Siddham}
+- \p{In_Sinhala}
+- \p{In_Sinhala_Archaic_Numbers}
+- \p{In_Small_Form_Variants}
+- \p{In_Small_Kana_Extension}
+- \p{In_Sogdian}
+- \p{In_Sora_Sompeng}
+- \p{In_Soyombo}
+- \p{In_Spacing_Modifier_Letters}
+- \p{In_Specials}
+- \p{In_Sundanese}
+- \p{In_Sundanese_Supplement}
+- \p{In_Superscripts_and_Subscripts}
+- \p{In_Supplemental_Arrows_A}
+- \p{In_Supplemental_Arrows_B}
+- \p{In_Supplemental_Arrows_C}
+- \p{In_Supplemental_Mathematical_Operators}
+- \p{In_Supplemental_Punctuation}
+- \p{In_Supplemental_Symbols_and_Pictographs}
+- \p{In_Supplementary_Private_Use_Area_A}
+- \p{In_Supplementary_Private_Use_Area_B}
+- \p{In_Sutton_SignWriting}
+- \p{In_Syloti_Nagri}
+- \p{In_Symbols_and_Pictographs_Extended_A}
+- \p{In_Symbols_for_Legacy_Computing}
+- \p{In_Syriac}
+- \p{In_Syriac_Supplement}
+- \p{In_Tagalog}
+- \p{In_Tagbanwa}
+- \p{In_Tags}
+- \p{In_Tai_Le}
+- \p{In_Tai_Tham}
+- \p{In_Tai_Viet}
+- \p{In_Tai_Xuan_Jing_Symbols}
+- \p{In_Takri}
+- \p{In_Tamil}
+- \p{In_Tamil_Supplement}
+- \p{In_Tangsa}
+- \p{In_Tangut}
+- \p{In_Tangut_Components}
+- \p{In_Tangut_Supplement}
+- \p{In_Telugu}
+- \p{In_Thaana}
+- \p{In_Thai}
+- \p{In_Tibetan}
+- \p{In_Tifinagh}
+- \p{In_Tirhuta}
+- \p{In_Toto}
+- \p{In_Transport_and_Map_Symbols}
+- \p{In_Ugaritic}
+- \p{In_Unified_Canadian_Aboriginal_Syllabics}
+- \p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}
+- \p{In_Unified_Canadian_Aboriginal_Syllabics_Extended_A}
+- \p{In_Vai}
+- \p{In_Variation_Selectors}
+- \p{In_Variation_Selectors_Supplement}
+- \p{In_Vedic_Extensions}
+- \p{In_Vertical_Forms}
+- \p{In_Vithkuqi}
+- \p{In_Wancho}
+- \p{In_Warang_Citi}
+- \p{In_Yezidi}
+- \p{In_Yi_Radicals}
+- \p{In_Yi_Syllables}
+- \p{In_Yijing_Hexagram_Symbols}
+- \p{In_Zanabazar_Square}
+- \p{In_Znamenny_Musical_Notation}
+
+=== Emoji
+
+- \p{Emoji}
+- \p{Emoji_Component}, \p{EComp}
+- \p{Emoji_Modifier}, \p{EMod}
+- \p{Emoji_Modifier_Base}, \p{EBase}
+- \p{Emoji_Presentation}, \p{EPres}
+- \p{Extended_Pictographic}, \p{ExtPict}
+
+=== Graphemes
+
+- \p{Grapheme_Cluster_Break_CR}
+- \p{Grapheme_Cluster_Break_Control}
+- \p{Grapheme_Cluster_Break_Extend}
+- \p{Grapheme_Cluster_Break_L}
+- \p{Grapheme_Cluster_Break_LF}
+- \p{Grapheme_Cluster_Break_LV}
+- \p{Grapheme_Cluster_Break_LVT}
+- \p{Grapheme_Cluster_Break_Prepend}
+- \p{Grapheme_Cluster_Break_Regional_Indicator}
+- \p{Grapheme_Cluster_Break_SpacingMark}
+- \p{Grapheme_Cluster_Break_T}
+- \p{Grapheme_Cluster_Break_V}
+- \p{Grapheme_Cluster_Break_ZWJ}
+
+=== Derived Ages
+
+- \p{Age_10_0}
+- \p{Age_11_0}
+- \p{Age_12_0}
+- \p{Age_12_1}
+- \p{Age_13_0}
+- \p{Age_14_0}
+- \p{Age_15_0}
+- \p{Age_1_1}
+- \p{Age_2_0}
+- \p{Age_2_1}
+- \p{Age_3_0}
+- \p{Age_3_1}
+- \p{Age_3_2}
+- \p{Age_4_0}
+- \p{Age_4_1}
+- \p{Age_5_0}
+- \p{Age_5_1}
+- \p{Age_5_2}
+- \p{Age_6_0}
+- \p{Age_6_1}
+- \p{Age_6_2}
+- \p{Age_6_3}
+- \p{Age_7_0}
+- \p{Age_8_0}
+- \p{Age_9_0}
diff --git a/enc/unicode/15.0.0/name2ctype.h b/enc/unicode/15.0.0/name2ctype.h
index a2c996423d..6bbbb3512f 100644
--- a/enc/unicode/15.0.0/name2ctype.h
+++ b/enc/unicode/15.0.0/name2ctype.h
@@ -5402,7 +5402,7 @@ static const OnigCodePoint CR_ASCII[] = {
0x0000, 0x007f,
}; /* CR_ASCII */
-/* 'Punct' */
+/* 'Punct': [[:Punct:]] */
static const OnigCodePoint CR_Punct[] = {
191,
0x0021, 0x0023,
diff --git a/template/unicode_properties.rdoc.tmpl b/template/unicode_properties.rdoc.tmpl
new file mode 100755
index 0000000000..7bbedc780c
--- /dev/null
+++ b/template/unicode_properties.rdoc.tmpl
@@ -0,0 +1,59 @@
+== \Regexps Based on Unicode Properties
+
+The properties shown here are those currently supported in Ruby.
+Older versions may not support all of these.
+<%
+# Generate a documentation file for the unicode properties.
+#
+# Usage:
+#
+# Get PropertyAliases.txt, PropertyValueAliases.txt from unicode.org
+# (http://unicode.org/Public/UNIDATA/) and run
+# ```
+# ruby tool/generic_erb.rb template/unicode_properties.rdoc.tmpl data_dir name2ctype.h
+# ```
+
+data_dir = ARGV.shift&.tap { |d| Dir.exist?(d) } ||
+ abort("Usage: #{$0} data_directory [name2ctype.h]")
+
+# Map group names, given as last argument to #make_const in enc-unicode.rb,
+# to sections in the doc. The order in this hash controls the order in the doc.
+map = {
+ /\[\[:/ => 'POSIX brackets',
+ '-' => 'Special',
+ /.+ Category/ => 'Major and General Categories',
+ 'Binary Property' => 'Prop List',
+ /Derived Property/ => 'Derived Core Properties',
+ 'Script' => 'Scripts',
+ 'Block' => 'Blocks',
+ 'Emoji' => 'Emoji',
+ /Grapheme/ => 'Graphemes',
+ /Derived Age/ => 'Derived Ages',
+}
+
+# aliases in the form { short => long }, e.g. { 'Hex' => 'Hex_Digit', 'L' => 'Letter' }
+aliases = (
+ File.binread(File.join(data_dir, 'PropertyAliases.txt')).scan(/^(\w+)\s*; (\w+)/) +
+ File.binread(File.join(data_dir, 'PropertyValueAliases.txt')).scan(/^(?:gc|sc)\s*; (\w+)\s*; (\w+)/)
+).to_h
+
+props_by_section = {}
+ARGF.each_line do |line|
+ next unless /'(?[^']+)': (?.+) \*/ =~ line
+ next if prop == 'NEWLINE' # ignore custom internal prop
+
+ section = map.find { |k, v| k === name }&.last || warn("no doc section for #{name}")
+
+ # normalize prop names - the header file uses a mix of short and long names
+ long_prop_name = aliases[prop] || prop
+ (props_by_section[section] ||= []) << long_prop_name
+end
+
+map.each_value do |section| -%>
+
+=== <%=section%>
+
+% props_by_section[section].sort.each do |prop|
+- <%= [prop, aliases.key(prop)].compact.uniq.map { |v| "\\p{#{v}}" }.join(', ') %>
+% end
+% end
diff --git a/tool/enc-unicode.rb b/tool/enc-unicode.rb
index 3fdbe71634..2224ce7149 100755
--- a/tool/enc-unicode.rb
+++ b/tool/enc-unicode.rb
@@ -269,23 +269,12 @@ def parse_block(data)
blocks << constname
end
-# shim for Ruby 1.8
-unless {}.respond_to?(:key)
- class Hash
- alias key index
- end
-end
-
$const_cache = {}
# make_const(property, pairs, name): Prints a 'static const' structure for a
# given property, group of paired codepoints, and a human-friendly name for
# the group
def make_const(prop, data, name)
- if name.empty?
- puts "\n/* '#{prop}' */"
- else
- puts "\n/* '#{prop}': #{name} */"
- end
+ puts "\n/* '#{prop}': #{name} */" # comment used to generate documentation
if origprop = $const_cache.key(data)
puts "#define CR_#{prop} CR_#{origprop}"
else
@@ -437,8 +426,6 @@ define_posix_props(data)
POSIX_NAMES.each do |name|
if name == 'XPosixPunct'
make_const(name, data[name], "[[:Punct:]]")
- elsif name == 'Punct'
- make_const(name, data[name], "")
else
make_const(name, data[name], "[[:#{name}:]]")
end