deps: ICU 60 bump

- Update to released ICU 60.1, including:
  - CLDR 32 (many new languages and data improvements)
  - Unicode 10 (8,518 new characters, including four new scripts,
  7,494 new Han characters, and 56 new emoji characters)
  - UTF-8 malformed bytes now handled according to W3C/WHATWG spec

Fixes: https://github.com/nodejs/node/issues/15540
PR-URL: https://github.com/nodejs/node/pull/16876
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com>
This commit is contained in:
Steven R. Loomis 2017-09-21 15:31:38 -07:00
parent 3b3ceafaf9
commit 44d3e17985
No known key found for this signature in database
GPG Key ID: 3932080F4FB419E3
254 changed files with 24067 additions and 10556 deletions

View File

@ -230,7 +230,7 @@ The externally maintained libraries used by Node.js are:
# ---------COPYING.libtabe ---- BEGIN-------------------- # ---------COPYING.libtabe ---- BEGIN--------------------
# #
# /* # /*
# * Copyrighy (c) 1999 TaBE Project. # * Copyright (c) 1999 TaBE Project.
# * Copyright (c) 1999 Pai-Hsiang Hsiao. # * Copyright (c) 1999 Pai-Hsiang Hsiao.
# * All rights reserved. # * All rights reserved.
# * # *

4
configure vendored
View File

@ -1092,8 +1092,8 @@ def glob_to_var(dir_base, dir_sub, patch_dir):
def configure_intl(o): def configure_intl(o):
icus = [ icus = [
{ {
'url': 'https://ssl.icu-project.org/files/icu4c/59.1/icu4c-59_1-src.zip', 'url': 'https://ssl.icu-project.org/files/icu4c/60.1/icu4c-60_1-src.zip',
'md5': '29a41f9bb576b06c7eef0487a84a7674', 'md5': 'e6cb990ac2a3161d31a3def8435f80cb',
}, },
] ]
def icu_download(path): def icu_download(path):

View File

@ -131,7 +131,7 @@ property of their respective owners.
# ---------COPYING.libtabe ---- BEGIN-------------------- # ---------COPYING.libtabe ---- BEGIN--------------------
# #
# /* # /*
# * Copyrighy (c) 1999 TaBE Project. # * Copyright (c) 1999 TaBE Project.
# * Copyright (c) 1999 Pai-Hsiang Hsiao. # * Copyright (c) 1999 Pai-Hsiang Hsiao.
# * All rights reserved. # * All rights reserved.
# * # *

View File

@ -1,8 +1,8 @@
Small ICU sources - auto generated by shrink-icu-src.py Small ICU sources - auto generated by shrink-icu-src.py
This directory contains the ICU subset used by --with-intl=small-icu (the default) This directory contains the ICU subset used by --with-intl=small-icu (the default)
It is a strict subset of ICU 59 source files with the following exception(s): It is a strict subset of ICU 60 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt59l.dat : Reduced-size data file * deps/icu-small/source/data/in/icudt60l.dat : Reduced-size data file
To rebuild this directory, see ../../tools/icu/README.md To rebuild this directory, see ../../tools/icu/README.md

View File

@ -28,7 +28,7 @@ U_NAMESPACE_BEGIN
BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) : BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
list(parentList), listLength(parentListLength) { list(parentList), listLength(parentListLength) {
uprv_memset(asciiBytes, 0, sizeof(asciiBytes)); uprv_memset(latin1Contains, 0, sizeof(latin1Contains));
uprv_memset(table7FF, 0, sizeof(table7FF)); uprv_memset(table7FF, 0, sizeof(table7FF));
uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits)); uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
@ -45,14 +45,16 @@ BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1); list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
} }
list4kStarts[0x11]=listLength-1; list4kStarts[0x11]=listLength-1;
containsFFFD=containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10]);
initBits(); initBits();
overrideIllegal(); overrideIllegal();
} }
BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) : BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
containsFFFD(otherBMPSet.containsFFFD),
list(newParentList), listLength(newParentListLength) { list(newParentList), listLength(newParentListLength) {
uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes)); uprv_memcpy(latin1Contains, otherBMPSet.latin1Contains, sizeof(latin1Contains));
uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF)); uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits)); uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts)); uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
@ -120,7 +122,7 @@ void BMPSet::initBits() {
UChar32 start, limit; UChar32 start, limit;
int32_t listIndex=0; int32_t listIndex=0;
// Set asciiBytes[]. // Set latin1Contains[].
do { do {
start=list[listIndex++]; start=list[listIndex++];
if(listIndex<listLength) { if(listIndex<listLength) {
@ -128,13 +130,30 @@ void BMPSet::initBits() {
} else { } else {
limit=0x110000; limit=0x110000;
} }
if(start>=0x80) { if(start>=0x100) {
break; break;
} }
do { do {
asciiBytes[start++]=1; latin1Contains[start++]=1;
} while(start<limit && start<0x80); } while(start<limit && start<0x100);
} while(limit<=0x80); } while(limit<=0x100);
// Find the first range overlapping with (or after) 80..FF again,
// to include them in table7FF as well.
for(listIndex=0;;) {
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
if(limit>0x80) {
if(start<0x80) {
start=0x80;
}
break;
}
}
// Set table7FF[]. // Set table7FF[].
while(start<0x800) { while(start<0x800) {
@ -204,19 +223,14 @@ void BMPSet::initBits() {
* for faster validity checking at runtime. * for faster validity checking at runtime.
* No need to set 0 values where they were reset to 0 in the constructor * No need to set 0 values where they were reset to 0 in the constructor
* and not modified by initBits(). * and not modified by initBits().
* (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF) * (table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
* Need to set 0 values for surrogates D800..DFFF. * Need to set 0 values for surrogates D800..DFFF.
*/ */
void BMPSet::overrideIllegal() { void BMPSet::overrideIllegal() {
uint32_t bits, mask; uint32_t bits, mask;
int32_t i; int32_t i;
if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) { if(containsFFFD) {
// contains(FFFD)==TRUE
for(i=0x80; i<0xc0; ++i) {
asciiBytes[i]=1;
}
bits=3; // Lead bytes 0xC0 and 0xC1. bits=3; // Lead bytes 0xC0 and 0xC1.
for(i=0; i<64; ++i) { for(i=0; i<64; ++i) {
table7FF[i]|=bits; table7FF[i]|=bits;
@ -233,7 +247,6 @@ void BMPSet::overrideIllegal() {
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits; bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
} }
} else { } else {
// contains(FFFD)==FALSE
mask=~(0x10001<<0xd); // Lead byte 0xED. mask=~(0x10001<<0xd); // Lead byte 0xED.
for(i=32; i<64; ++i) { // Second half of 4k block. for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]&=mask; bmpBlockBits[i]&=mask;
@ -277,8 +290,8 @@ int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
UBool UBool
BMPSet::contains(UChar32 c) const { BMPSet::contains(UChar32 c) const {
if((uint32_t)c<=0x7f) { if((uint32_t)c<=0xff) {
return (UBool)asciiBytes[c]; return (UBool)latin1Contains[c];
} else if((uint32_t)c<=0x7ff) { } else if((uint32_t)c<=0x7ff) {
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0); return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) { } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
@ -314,8 +327,8 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
// span // span
do { do {
c=*s; c=*s;
if(c<=0x7f) { if(c<=0xff) {
if(!asciiBytes[c]) { if(!latin1Contains[c]) {
break; break;
} }
} else if(c<=0x7ff) { } else if(c<=0x7ff) {
@ -354,8 +367,8 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
// span not // span not
do { do {
c=*s; c=*s;
if(c<=0x7f) { if(c<=0xff) {
if(asciiBytes[c]) { if(latin1Contains[c]) {
break; break;
} }
} else if(c<=0x7ff) { } else if(c<=0x7ff) {
@ -403,8 +416,8 @@ BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondi
// span // span
for(;;) { for(;;) {
c=*(--limit); c=*(--limit);
if(c<=0x7f) { if(c<=0xff) {
if(!asciiBytes[c]) { if(!latin1Contains[c]) {
break; break;
} }
} else if(c<=0x7ff) { } else if(c<=0x7ff) {
@ -446,8 +459,8 @@ BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondi
// span not // span not
for(;;) { for(;;) {
c=*(--limit); c=*(--limit);
if(c<=0x7f) { if(c<=0xff) {
if(asciiBytes[c]) { if(latin1Contains[c]) {
break; break;
} }
} else if(c<=0x7ff) { } else if(c<=0x7ff) {
@ -497,22 +510,22 @@ const uint8_t *
BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
const uint8_t *limit=s+length; const uint8_t *limit=s+length;
uint8_t b=*s; uint8_t b=*s;
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
// Initial all-ASCII span. // Initial all-ASCII span.
if(spanCondition) { if(spanCondition) {
do { do {
if(!asciiBytes[b] || ++s==limit) { if(!latin1Contains[b] || ++s==limit) {
return s; return s;
} }
b=*s; b=*s;
} while((int8_t)b>=0); } while(U8_IS_SINGLE(b));
} else { } else {
do { do {
if(asciiBytes[b] || ++s==limit) { if(latin1Contains[b] || ++s==limit) {
return s; return s;
} }
b=*s; b=*s;
} while((int8_t)b>=0); } while(U8_IS_SINGLE(b));
} }
length=(int32_t)(limit-s); length=(int32_t)(limit-s);
} }
@ -540,20 +553,20 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
// single trail byte, check for preceding 3- or 4-byte lead byte // single trail byte, check for preceding 3- or 4-byte lead byte
if(length>=2 && (b=*(limit-2))>=0xe0) { if(length>=2 && (b=*(limit-2))>=0xe0) {
limit-=2; limit-=2;
if(asciiBytes[0x80]!=spanCondition) { if(containsFFFD!=spanCondition) {
limit0=limit; limit0=limit;
} }
} else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) { } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
// 4-byte lead byte with only two trail bytes // 4-byte lead byte with only two trail bytes
limit-=3; limit-=3;
if(asciiBytes[0x80]!=spanCondition) { if(containsFFFD!=spanCondition) {
limit0=limit; limit0=limit;
} }
} }
} else { } else {
// lead byte with no trail bytes // lead byte with no trail bytes
--limit; --limit;
if(asciiBytes[0x80]!=spanCondition) { if(containsFFFD!=spanCondition) {
limit0=limit; limit0=limit;
} }
} }
@ -563,26 +576,26 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
while(s<limit) { while(s<limit) {
b=*s; b=*s;
if(b<0xc0) { if(U8_IS_SINGLE(b)) {
// ASCII; or trail bytes with the result of contains(FFFD). // ASCII
if(spanCondition) { if(spanCondition) {
do { do {
if(!asciiBytes[b]) { if(!latin1Contains[b]) {
return s; return s;
} else if(++s==limit) { } else if(++s==limit) {
return limit0; return limit0;
} }
b=*s; b=*s;
} while(b<0xc0); } while(U8_IS_SINGLE(b));
} else { } else {
do { do {
if(asciiBytes[b]) { if(latin1Contains[b]) {
return s; return s;
} else if(++s==limit) { } else if(++s==limit) {
return limit0; return limit0;
} }
b=*s; b=*s;
} while(b<0xc0); } while(U8_IS_SINGLE(b));
} }
} }
++s; // Advance past the lead byte. ++s; // Advance past the lead byte.
@ -619,7 +632,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3; UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
if( ( (0x10000<=c && c<=0x10ffff) ? if( ( (0x10000<=c && c<=0x10ffff) ?
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) : containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
asciiBytes[0x80] containsFFFD
) != spanCondition ) != spanCondition
) { ) {
return s-1; return s-1;
@ -627,8 +640,9 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
s+=3; s+=3;
continue; continue;
} }
} else /* 0xc0<=b<0xe0 */ { } else {
if( /* handle U+0000..U+07FF inline */ if( /* handle U+0000..U+07FF inline */
b>=0xc0 &&
(t1=(uint8_t)(*s-0x80)) <= 0x3f (t1=(uint8_t)(*s-0x80)) <= 0x3f
) { ) {
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) { if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
@ -642,7 +656,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
// Give an illegal sequence the same value as the result of contains(FFFD). // Give an illegal sequence the same value as the result of contains(FFFD).
// Handle each byte of an illegal sequence separately to simplify the code; // Handle each byte of an illegal sequence separately to simplify the code;
// no need to optimize error handling. // no need to optimize error handling.
if(asciiBytes[0x80]!=spanCondition) { if(containsFFFD!=spanCondition) {
return s-1; return s-1;
} }
} }
@ -667,26 +681,26 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
do { do {
b=s[--length]; b=s[--length];
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
// ASCII sub-span // ASCII sub-span
if(spanCondition) { if(spanCondition) {
do { do {
if(!asciiBytes[b]) { if(!latin1Contains[b]) {
return length+1; return length+1;
} else if(length==0) { } else if(length==0) {
return 0; return 0;
} }
b=s[--length]; b=s[--length];
} while((int8_t)b>=0); } while(U8_IS_SINGLE(b));
} else { } else {
do { do {
if(asciiBytes[b]) { if(latin1Contains[b]) {
return length+1; return length+1;
} else if(length==0) { } else if(length==0) {
return 0; return 0;
} }
b=s[--length]; b=s[--length];
} while((int8_t)b>=0); } while(U8_IS_SINGLE(b));
} }
} }

View File

@ -28,11 +28,12 @@ U_NAMESPACE_BEGIN
* Helper class for frozen UnicodeSets, implements contains() and span() * Helper class for frozen UnicodeSets, implements contains() and span()
* optimized for BMP code points. Structured to be UTF-8-friendly. * optimized for BMP code points. Structured to be UTF-8-friendly.
* *
* ASCII: Look up bytes. * Latin-1: Look up bytes.
* 2-byte characters: Bits organized vertically. * 2-byte characters: Bits organized vertically.
* 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
* with mixed for illegal ranges. * with mixed for illegal ranges.
* Supplementary characters: Call contains() on the parent set. * Supplementary characters: Binary search over
* the supplementary part of the parent set's inversion list.
*/ */
class BMPSet : public UMemory { class BMPSet : public UMemory {
public: public:
@ -96,12 +97,12 @@ private:
inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const; inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
/* /*
* One byte per ASCII character, or trail byte in lead position. * One byte 0 or 1 per Latin-1 character.
* 0 or 1 for ASCII characters.
* The value for trail bytes is the result of contains(FFFD)
* for faster validity checking at runtime.
*/ */
UBool asciiBytes[0xc0]; UBool latin1Contains[0x100];
/* TRUE if contains(U+FFFD). */
UBool containsFFFD;
/* /*
* One bit per code point from U+0000..U+07FF. * One bit per code point from U+0000..U+07FF.

View File

@ -11,9 +11,6 @@
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
#include "brkeng.h"
#include "cmemory.h"
#include "dictbe.h"
#include "unicode/uchar.h" #include "unicode/uchar.h"
#include "unicode/uniset.h" #include "unicode/uniset.h"
#include "unicode/chariter.h" #include "unicode/chariter.h"
@ -24,6 +21,10 @@
#include "unicode/uscript.h" #include "unicode/uscript.h"
#include "unicode/ucharstrie.h" #include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h" #include "unicode/bytestrie.h"
#include "brkeng.h"
#include "cmemory.h"
#include "dictbe.h"
#include "charstr.h" #include "charstr.h"
#include "dictionarydata.h" #include "dictionarydata.h"
#include "mutex.h" #include "mutex.h"
@ -80,25 +81,17 @@ UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
int32_t int32_t
UnhandledEngine::findBreaks( UText *text, UnhandledEngine::findBreaks( UText *text,
int32_t startPos, int32_t /* startPos */,
int32_t endPos, int32_t endPos,
UBool reverse,
int32_t breakType, int32_t breakType,
UStack &/*foundBreaks*/ ) const { UVector32 &/*foundBreaks*/ ) const {
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) { if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
UChar32 c = utext_current32(text); UChar32 c = utext_current32(text);
if (reverse) {
while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
c = utext_previous32(text);
}
}
else {
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations. utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text); c = utext_current32(text);
} }
} }
}
return 0; return 0;
} }

View File

@ -19,6 +19,7 @@ U_NAMESPACE_BEGIN
class UnicodeSet; class UnicodeSet;
class UStack; class UStack;
class UVector32;
class DictionaryMatcher; class DictionaryMatcher;
/******************************************************************* /*******************************************************************
@ -67,18 +68,15 @@ class LanguageBreakEngine : public UMemory {
* is capable of handling. * is capable of handling.
* @param startPos The start of the run within the supplied text. * @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text. * @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1. * @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any * @param foundBreaks A Vector of int32_t to receive the breaks.
* @return The number of breaks found. * @return The number of breaks found.
*/ */
virtual int32_t findBreaks( UText *text, virtual int32_t findBreaks( UText *text,
int32_t startPos, int32_t startPos,
int32_t endPos, int32_t endPos,
UBool reverse,
int32_t breakType, int32_t breakType,
UStack &foundBreaks ) const = 0; UVector32 &foundBreaks ) const = 0;
}; };
@ -192,8 +190,6 @@ class UnhandledEngine : public LanguageBreakEngine {
* is capable of handling. * is capable of handling.
* @param startPos The start of the run within the supplied text. * @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text. * @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1. * @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any * @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found. * @return The number of breaks found.
@ -201,9 +197,8 @@ class UnhandledEngine : public LanguageBreakEngine {
virtual int32_t findBreaks( UText *text, virtual int32_t findBreaks( UText *text,
int32_t startPos, int32_t startPos,
int32_t endPos, int32_t endPos,
UBool reverse,
int32_t breakType, int32_t breakType,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
/** /**
* <p>Tell the engine to handle a particular character and break type.</p> * <p>Tell the engine to handle a particular character and break type.</p>

View File

@ -195,7 +195,7 @@ BreakIterator::getAvailableLocales(int32_t& count)
// ------------------------------------------ // ------------------------------------------
// //
// Default constructor and destructor // Constructors, destructor and assignment operator
// //
//------------------------------------------- //-------------------------------------------
@ -204,6 +204,19 @@ BreakIterator::BreakIterator()
*validLocale = *actualLocale = 0; *validLocale = *actualLocale = 0;
} }
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
}
BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
if (this != &other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
}
return *this;
}
BreakIterator::~BreakIterator() BreakIterator::~BreakIterator()
{ {
} }
@ -265,7 +278,7 @@ ICUBreakIteratorService::~ICUBreakIteratorService() {}
// defined in ucln_cmn.h // defined in ucln_cmn.h
U_NAMESPACE_END U_NAMESPACE_END
static icu::UInitOnce gInitOnce; static icu::UInitOnce gInitOnceBrkiter;
static icu::ICULocaleService* gService = NULL; static icu::ICULocaleService* gService = NULL;
@ -280,7 +293,7 @@ static UBool U_CALLCONV breakiterator_cleanup(void) {
delete gService; delete gService;
gService = NULL; gService = NULL;
} }
gInitOnce.reset(); gInitOnceBrkiter.reset();
#endif #endif
return TRUE; return TRUE;
} }
@ -296,7 +309,7 @@ initService(void) {
static ICULocaleService* static ICULocaleService*
getService(void) getService(void)
{ {
umtx_initOnce(gInitOnce, &initService); umtx_initOnce(gInitOnceBrkiter, &initService);
return gService; return gService;
} }
@ -306,7 +319,7 @@ getService(void)
static inline UBool static inline UBool
hasService(void) hasService(void)
{ {
return !gInitOnce.isReset() && getService() != NULL; return !gInitOnceBrkiter.isReset() && getService() != NULL;
} }
// ------------------------------------- // -------------------------------------

View File

@ -0,0 +1,123 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// bytesinkutil.cpp
// created: 2017sep14 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "unicode/stringoptions.h"
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "bytesinkutil.h"
#include "cmemory.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
UBool
ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
char scratch[200];
int32_t s8Length = 0;
for (int32_t i = 0; i < s16Length;) {
int32_t capacity;
int32_t desiredCapacity = s16Length - i;
if (desiredCapacity < (INT32_MAX / 3)) {
desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit
} else if (desiredCapacity < (INT32_MAX / 2)) {
desiredCapacity *= 2;
} else {
desiredCapacity = INT32_MAX;
}
char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
scratch, UPRV_LENGTHOF(scratch), &capacity);
capacity -= U8_MAX_LENGTH - 1;
int32_t j = 0;
for (; i < s16Length && j < capacity;) {
UChar32 c;
U16_NEXT_UNSAFE(s16, i, c);
U8_APPEND_UNSAFE(buffer, j, c);
}
if (j > (INT32_MAX - s8Length)) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
sink.Append(buffer, j);
s8Length += j;
}
if (edits != nullptr) {
edits->addReplace(length, s8Length);
}
return TRUE;
}
UBool
ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if ((limit - s) > INT32_MAX) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
}
void
ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
char s8[U8_MAX_LENGTH];
int32_t s8Length = 0;
U8_APPEND_UNSAFE(s8, s8Length, c);
if (edits != nullptr) {
edits->addReplace(length, s8Length);
}
sink.Append(s8, s8Length);
}
namespace {
// See unicode/utf8.h U8_APPEND_UNSAFE().
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
} // namespace
void
ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
sink.Append(s8, 2);
}
UBool
ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if (length > 0) {
if (edits != nullptr) {
edits->addUnchanged(length);
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(reinterpret_cast<const char *>(s), length);
}
}
return TRUE;
}
UBool
ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if ((limit - s) > INT32_MAX) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode);
}
U_NAMESPACE_END

View File

@ -0,0 +1,53 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// bytesinkutil.h
// created: 2017sep14 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "cmemory.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
class ByteSink;
class Edits;
class U_COMMON_API ByteSinkUtil {
public:
ByteSinkUtil() = delete; // all static
/** (length) bytes were mapped to valid (s16, s16Length). */
static UBool appendChange(int32_t length,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode);
/** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
static UBool appendChange(const uint8_t *s, const uint8_t *limit,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode);
/** (length) bytes were mapped/changed to valid code point c. */
static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);
/** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
ByteSink &sink, Edits *edits = nullptr) {
appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
}
/** Append the two-byte character (U+0080..U+07FF). */
static void appendTwoBytes(UChar32 c, ByteSink &sink);
static UBool appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
};
U_NAMESPACE_END

View File

@ -45,6 +45,12 @@ void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
if (n <= 0) { if (n <= 0) {
return; return;
} }
if (n > (INT32_MAX - appended_)) {
// TODO: Report as integer overflow, not merely buffer overflow.
appended_ = INT32_MAX;
overflowed_ = TRUE;
return;
}
appended_ += n; appended_ += n;
int32_t available = capacity_ - size_; int32_t available = capacity_ - size_;
if (n > available) { if (n > available) {

View File

@ -405,7 +405,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
//String[] finalResult = new String[result.size()]; //String[] finalResult = new String[result.size()];
UnicodeString *finalResult = NULL; UnicodeString *finalResult = NULL;
int32_t resultCount; int32_t resultCount;
if((resultCount = result.count())) { if((resultCount = result.count()) != 0) {
finalResult = new UnicodeString[resultCount]; finalResult = new UnicodeString[resultCount];
if (finalResult == 0) { if (finalResult == 0) {
status = U_MEMORY_ALLOCATION_ERROR; status = U_MEMORY_ALLOCATION_ERROR;

View File

@ -162,7 +162,6 @@ public:
* @param p simple pointer to an array of T items that is adopted * @param p simple pointer to an array of T items that is adopted
*/ */
explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {} explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move constructor, leaves src with isNull(). * Move constructor, leaves src with isNull().
* @param src source smart pointer * @param src source smart pointer
@ -170,14 +169,12 @@ public:
LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL; src.ptr=NULL;
} }
#endif
/** /**
* Destructor deletes the memory it owns. * Destructor deletes the memory it owns.
*/ */
~LocalMemory() { ~LocalMemory() {
uprv_free(LocalPointerBase<T>::ptr); uprv_free(LocalPointerBase<T>::ptr);
} }
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move assignment operator, leaves src with isNull(). * Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object. * The behavior is undefined if *this and src are the same object.
@ -187,7 +184,6 @@ public:
LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT { LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT {
return moveFrom(src); return moveFrom(src);
} }
#endif
/** /**
* Move assignment, leaves src with isNull(). * Move assignment, leaves src with isNull().
* The behavior is undefined if *this and src are the same object. * The behavior is undefined if *this and src are the same object.
@ -312,6 +308,14 @@ public:
* Default constructor initializes with internal T[stackCapacity] buffer. * Default constructor initializes with internal T[stackCapacity] buffer.
*/ */
MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {} MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
/**
* Automatically allocates the heap array if the argument is larger than the stack capacity.
* Intended for use when an approximate capacity is known at compile time but the true
* capacity is not known until runtime.
*/
MaybeStackArray(int32_t newCapacity) : MaybeStackArray() {
if (capacity < newCapacity) { resize(newCapacity); }
};
/** /**
* Destructor deletes the array (if owned). * Destructor deletes the array (if owned).
*/ */

View File

@ -46,9 +46,9 @@ int32_t
DictionaryBreakEngine::findBreaks( UText *text, DictionaryBreakEngine::findBreaks( UText *text,
int32_t startPos, int32_t startPos,
int32_t endPos, int32_t endPos,
UBool reverse,
int32_t breakType, int32_t breakType,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
(void)startPos; // TODO: remove this param?
int32_t result = 0; int32_t result = 0;
// Find the span of characters included in the set. // Find the span of characters included in the set.
@ -60,34 +60,12 @@ DictionaryBreakEngine::findBreaks( UText *text,
int32_t rangeStart; int32_t rangeStart;
int32_t rangeEnd; int32_t rangeEnd;
UChar32 c = utext_current32(text); UChar32 c = utext_current32(text);
if (reverse) {
UBool isDict = fSet.contains(c);
while((current = (int32_t)utext_getNativeIndex(text)) > startPos && isDict) {
c = utext_previous32(text);
isDict = fSet.contains(c);
}
if (current < startPos) {
rangeStart = startPos;
} else {
rangeStart = current;
if (!isDict) {
utext_next32(text);
rangeStart = (int32_t)utext_getNativeIndex(text);
}
}
// rangeEnd = start + 1;
utext_setNativeIndex(text, start);
utext_next32(text);
rangeEnd = (int32_t)utext_getNativeIndex(text);
}
else {
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) { while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
utext_next32(text); // TODO: recast loop for postincrement utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text); c = utext_current32(text);
} }
rangeStart = start; rangeStart = start;
rangeEnd = current; rangeEnd = current;
}
if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) { if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
utext_setNativeIndex(text, current); utext_setNativeIndex(text, current);
@ -248,7 +226,7 @@ int32_t
ThaiBreakEngine::divideUpDictionaryRange( UText *text, ThaiBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
utext_setNativeIndex(text, rangeStart); utext_setNativeIndex(text, rangeStart);
utext_moveIndex32(text, THAI_MIN_WORD_SPAN); utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
if (utext_getNativeIndex(text) >= rangeEnd) { if (utext_getNativeIndex(text) >= rangeEnd) {
@ -487,7 +465,7 @@ int32_t
LaoBreakEngine::divideUpDictionaryRange( UText *text, LaoBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) { if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words return 0; // Not enough characters for two words
} }
@ -680,7 +658,7 @@ int32_t
BurmeseBreakEngine::divideUpDictionaryRange( UText *text, BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) { if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words return 0; // Not enough characters for two words
} }
@ -885,7 +863,7 @@ int32_t
KhmerBreakEngine::divideUpDictionaryRange( UText *text, KhmerBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) { if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words return 0; // Not enough characters for two words
} }
@ -1110,9 +1088,9 @@ static inline uint32_t getKatakanaCost(int32_t wordLength){
return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength]; return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength];
} }
static inline bool isKatakana(uint16_t value) { static inline bool isKatakana(UChar32 value) {
return (value >= 0x30A1u && value <= 0x30FEu && value != 0x30FBu) || return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) ||
(value >= 0xFF66u && value <= 0xFF9fu); (value >= 0xFF66 && value <= 0xFF9f);
} }
@ -1128,14 +1106,14 @@ static inline int32_t utext_i32_flag(int32_t bitIndex) {
* @param text A UText representing the text * @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters * @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters * @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0 * @param foundBreaks vector<int32> to receive the break positions
* @return The number of breaks found * @return The number of breaks found
*/ */
int32_t int32_t
CjkBreakEngine::divideUpDictionaryRange( UText *inText, CjkBreakEngine::divideUpDictionaryRange( UText *inText,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
if (rangeStart >= rangeEnd) { if (rangeStart >= rangeEnd) {
return 0; return 0;
} }
@ -1405,6 +1383,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
prevCPPos = cpPos; prevCPPos = cpPos;
prevUTextPos = utextPos; prevUTextPos = utextPos;
} }
(void)prevCPPos; // suppress compiler warnings about unused variable
// inString goes out of scope // inString goes out of scope
// inputMap goes out of scope // inputMap goes out of scope

View File

@ -15,6 +15,7 @@
#include "unicode/utext.h" #include "unicode/utext.h"
#include "brkeng.h" #include "brkeng.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
@ -84,21 +85,18 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* *
* @param text A UText representing the text. The iterator is left at * @param text A UText representing the text. The iterator is left at
* the end of the run of characters which the engine is capable of handling * the end of the run of characters which the engine is capable of handling
* that starts from the first (or last) character in the range. * that starts from the first character in the range.
* @param startPos The start of the run within the supplied text. * @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text. * @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1. * @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any * @param foundBreaks vector of int32_t to receive the break positions
* @return The number of breaks found. * @return The number of breaks found.
*/ */
virtual int32_t findBreaks( UText *text, virtual int32_t findBreaks( UText *text,
int32_t startPos, int32_t startPos,
int32_t endPos, int32_t endPos,
UBool reverse,
int32_t breakType, int32_t breakType,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
protected: protected:
@ -128,7 +126,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const = 0; UVector32 &foundBreaks ) const = 0;
}; };
@ -185,7 +183,7 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
}; };
@ -241,7 +239,7 @@ class LaoBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
}; };
@ -297,7 +295,7 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
}; };
@ -353,7 +351,7 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
}; };
@ -417,7 +415,7 @@ class CjkBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
}; };

View File

@ -17,10 +17,10 @@ namespace {
const int32_t MAX_UNCHANGED_LENGTH = 0x1000; const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1; const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units. // 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
// No length change. const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6;
const int32_t MAX_SHORT_WIDTH = 6; const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7;
const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff; const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff;
const int32_t MAX_SHORT_CHANGE = 0x6fff; const int32_t MAX_SHORT_CHANGE = 0x6fff;
// 0111mmmmmmnnnnnn records a replacement of m text units with n. // 0111mmmmmmnnnnnn records a replacement of m text units with n.
@ -33,20 +33,85 @@ const int32_t LENGTH_IN_2TRAIL = 62;
} // namespace } // namespace
Edits::~Edits() { void Edits::releaseArray() U_NOEXCEPT {
if (array != stackArray) { if (array != stackArray) {
uprv_free(array); uprv_free(array);
} }
} }
void Edits::reset() { Edits &Edits::copyArray(const Edits &other) {
length = delta = 0; if (U_FAILURE(errorCode_)) {
length = delta = numChanges = 0;
return *this;
}
if (length > capacity) {
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
if (newArray == nullptr) {
length = delta = numChanges = 0;
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
releaseArray();
array = newArray;
capacity = length;
}
if (length > 0) {
uprv_memcpy(array, other.array, (size_t)length * 2);
}
return *this;
}
Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
if (U_FAILURE(errorCode_)) {
length = delta = numChanges = 0;
return *this;
}
releaseArray();
if (length > STACK_CAPACITY) {
array = src.array;
capacity = src.capacity;
src.array = src.stackArray;
src.capacity = STACK_CAPACITY;
src.reset();
return *this;
}
array = stackArray;
capacity = STACK_CAPACITY;
if (length > 0) {
uprv_memcpy(array, src.array, (size_t)length * 2);
}
return *this;
}
Edits &Edits::operator=(const Edits &other) {
length = other.length;
delta = other.delta;
numChanges = other.numChanges;
errorCode_ = other.errorCode_;
return copyArray(other);
}
Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
length = src.length;
delta = src.delta;
numChanges = src.numChanges;
errorCode_ = src.errorCode_;
return moveArray(src);
}
Edits::~Edits() {
releaseArray();
}
void Edits::reset() U_NOEXCEPT {
length = delta = numChanges = 0;
errorCode_ = U_ZERO_ERROR;
} }
void Edits::addUnchanged(int32_t unchangedLength) { void Edits::addUnchanged(int32_t unchangedLength) {
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; } if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
if(unchangedLength < 0) { if(unchangedLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR; errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
return; return;
} }
// Merge into previous unchanged-text record, if any. // Merge into previous unchanged-text record, if any.
@ -72,38 +137,41 @@ void Edits::addUnchanged(int32_t unchangedLength) {
} }
void Edits::addReplace(int32_t oldLength, int32_t newLength) { void Edits::addReplace(int32_t oldLength, int32_t newLength) {
if(U_FAILURE(errorCode)) { return; } if(U_FAILURE(errorCode_)) { return; }
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
int32_t last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
setLastUnit(last + 1);
return;
}
append(oldLength << 12);
return;
}
if(oldLength < 0 || newLength < 0) { if(oldLength < 0 || newLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR; errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
return; return;
} }
if (oldLength == 0 && newLength == 0) { if (oldLength == 0 && newLength == 0) {
return; return;
} }
++numChanges;
int32_t newDelta = newLength - oldLength; int32_t newDelta = newLength - oldLength;
if (newDelta != 0) { if (newDelta != 0) {
if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) || if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
(newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) { (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
// Integer overflow or underflow. // Integer overflow or underflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR; errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return; return;
} }
delta += newDelta; delta += newDelta;
} }
if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
// Merge into previous same-lengths short-replacement record, if any.
int32_t u = (oldLength << 12) | (newLength << 9);
int32_t last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last & ~SHORT_CHANGE_NUM_MASK) == u &&
(last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
setLastUnit(last + 1);
return;
}
append(u);
return;
}
int32_t head = 0x7000; int32_t head = 0x7000;
if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) { if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6; head |= oldLength << 6;
@ -149,7 +217,7 @@ UBool Edits::growArray() {
} else if (capacity == INT32_MAX) { } else if (capacity == INT32_MAX) {
// Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
// with a result-string-buffer overflow. // with a result-string-buffer overflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR; errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE; return FALSE;
} else if (capacity >= (INT32_MAX / 2)) { } else if (capacity >= (INT32_MAX / 2)) {
newCapacity = INT32_MAX; newCapacity = INT32_MAX;
@ -158,18 +226,16 @@ UBool Edits::growArray() {
} }
// Grow by at least 5 units so that a maximal change record will fit. // Grow by at least 5 units so that a maximal change record will fit.
if ((newCapacity - capacity) < 5) { if ((newCapacity - capacity) < 5) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR; errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE; return FALSE;
} }
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2); uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
if (newArray == NULL) { if (newArray == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR; errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return FALSE; return FALSE;
} }
uprv_memcpy(newArray, array, (size_t)length * 2); uprv_memcpy(newArray, array, (size_t)length * 2);
if (array != stackArray) { releaseArray();
uprv_free(array);
}
array = newArray; array = newArray;
capacity = newCapacity; capacity = newCapacity;
return TRUE; return TRUE;
@ -177,27 +243,161 @@ UBool Edits::growArray() {
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) { UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
if (U_FAILURE(outErrorCode)) { return TRUE; } if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode)) { return FALSE; } if (U_SUCCESS(errorCode_)) { return FALSE; }
outErrorCode = errorCode; outErrorCode = errorCode_;
return TRUE; return TRUE;
} }
UBool Edits::hasChanges() const { Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
if (delta != 0) { if (copyErrorTo(errorCode)) { return *this; }
return TRUE; // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
// Parallel iteration over both Edits.
Iterator abIter = ab.getFineIterator();
Iterator bcIter = bc.getFineIterator();
UBool abHasNext = TRUE, bcHasNext = TRUE;
// Copy iterator state into local variables, so that we can modify and subdivide spans.
// ab old & new length, bc old & new length
int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
// When we have different-intermediate-length changes, we accumulate a larger change.
int32_t pending_aLength = 0, pending_cLength = 0;
for (;;) {
// At this point, for each of the two iterators:
// Either we are done with the locally cached current edit,
// and its intermediate-string length has been reset,
// or we will continue to work with a truncated remainder of this edit.
//
// If the current edit is done, and the iterator has not yet reached the end,
// then we fetch the next edit. This is true for at least one of the iterators.
//
// Normally it does not matter whether we fetch from ab and then bc or vice versa.
// However, the result is observably different when
// ab deletions meet bc insertions at the same intermediate-string index.
// Some users expect the bc insertions to come first, so we fetch from bc first.
if (bc_bLength == 0) {
if (bcHasNext && (bcHasNext = bcIter.next(errorCode))) {
bc_bLength = bcIter.oldLength();
cLength = bcIter.newLength();
if (bc_bLength == 0) {
// insertion
if (ab_bLength == 0 || !abIter.hasChange()) {
addReplace(pending_aLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
} else {
pending_cLength += cLength;
} }
for (int32_t i = 0; i < length; ++i) { continue;
if (array[i] > MAX_UNCHANGED) {
return TRUE;
} }
} }
return FALSE; // else see if the other iterator is done, too.
}
if (ab_bLength == 0) {
if (abHasNext && (abHasNext = abIter.next(errorCode))) {
aLength = abIter.oldLength();
ab_bLength = abIter.newLength();
if (ab_bLength == 0) {
// deletion
if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
addReplace(pending_aLength + aLength, pending_cLength);
pending_aLength = pending_cLength = 0;
} else {
pending_aLength += aLength;
}
continue;
}
} else if (bc_bLength == 0) {
// Both iterators are done at the same time:
// The intermediate-string lengths match.
break;
} else {
// The ab output string is shorter than the bc input string.
if (!copyErrorTo(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
}
if (bc_bLength == 0) {
// The bc input string is shorter than the ab output string.
if (!copyErrorTo(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
// Done fetching: ab_bLength > 0 && bc_bLength > 0
// The current state has two parts:
// - Past: We accumulate a longer ac edit in the "pending" variables.
// - Current: We have copies of the current ab/bc edits in local variables.
// At least one side is newly fetched.
// One side might be a truncated remainder of an edit we fetched earlier.
if (!abIter.hasChange() && !bcIter.hasChange()) {
// An unchanged span all the way from string a to string c.
if (pending_aLength != 0 || pending_cLength != 0) {
addReplace(pending_aLength, pending_cLength);
pending_aLength = pending_cLength = 0;
}
int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
addUnchanged(unchangedLength);
ab_bLength = aLength -= unchangedLength;
bc_bLength = cLength -= unchangedLength;
// At least one of the unchanged spans is now empty.
continue;
}
if (!abIter.hasChange() && bcIter.hasChange()) {
// Unchanged a->b but changed b->c.
if (ab_bLength >= bc_bLength) {
// Split the longer unchanged span into change + remainder.
addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
aLength = ab_bLength -= bc_bLength;
bc_bLength = 0;
continue;
}
// Handle the shorter unchanged span below like a change.
} else if (abIter.hasChange() && !bcIter.hasChange()) {
// Changed a->b and then unchanged b->c.
if (ab_bLength <= bc_bLength) {
// Split the longer unchanged span into change + remainder.
addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
pending_aLength = pending_cLength = 0;
cLength = bc_bLength -= ab_bLength;
ab_bLength = 0;
continue;
}
// Handle the shorter unchanged span below like a change.
} else { // both abIter.hasChange() && bcIter.hasChange()
if (ab_bLength == bc_bLength) {
// Changes on both sides up to the same position. Emit & reset.
addReplace(pending_aLength + aLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
ab_bLength = bc_bLength = 0;
continue;
}
}
// Accumulate the a->c change, reset the shorter side,
// keep a remainder of the longer one.
pending_aLength += aLength;
pending_cLength += cLength;
if (ab_bLength < bc_bLength) {
bc_bLength -= ab_bLength;
cLength = ab_bLength = 0;
} else { // ab_bLength > bc_bLength
ab_bLength -= bc_bLength;
aLength = bc_bLength = 0;
}
}
if (pending_aLength != 0 || pending_cLength != 0) {
addReplace(pending_aLength, pending_cLength);
}
copyErrorTo(errorCode);
return *this;
} }
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) : Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0), array(a), index(0), length(len), remaining(0),
onlyChanges_(oc), coarse(crs), onlyChanges_(oc), coarse(crs),
changed(FALSE), oldLength_(0), newLength_(0), dir(0), changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {} srcIndex(0), replIndex(0), destIndex(0) {}
int32_t Edits::Iterator::readLength(int32_t head) { int32_t Edits::Iterator::readLength(int32_t head) {
@ -219,7 +419,7 @@ int32_t Edits::Iterator::readLength(int32_t head) {
} }
} }
void Edits::Iterator::updateIndexes() { void Edits::Iterator::updateNextIndexes() {
srcIndex += oldLength_; srcIndex += oldLength_;
if (changed) { if (changed) {
replIndex += newLength_; replIndex += newLength_;
@ -227,23 +427,53 @@ void Edits::Iterator::updateIndexes() {
destIndex += newLength_; destIndex += newLength_;
} }
void Edits::Iterator::updatePreviousIndexes() {
srcIndex -= oldLength_;
if (changed) {
replIndex -= newLength_;
}
destIndex -= newLength_;
}
UBool Edits::Iterator::noNext() { UBool Edits::Iterator::noNext() {
// No change beyond the string. // No change before or beyond the string.
dir = 0;
changed = FALSE; changed = FALSE;
oldLength_ = newLength_ = 0; oldLength_ = newLength_ = 0;
return FALSE; return FALSE;
} }
UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) { UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
// Forward iteration: Update the string indexes to the limit of the current span,
// and post-increment-read array units to assemble a new span.
// Leaves the array index one after the last unit of that span.
if (U_FAILURE(errorCode)) { return FALSE; } if (U_FAILURE(errorCode)) { return FALSE; }
// We have an errorCode in case we need to start guarding against integer overflows. // We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere. // It is also convenient for caller loops if we bail out when an error was set elsewhere.
updateIndexes(); if (dir > 0) {
updateNextIndexes();
} else {
if (dir < 0) {
// Turn around from previous() to next().
// Post-increment-read the same span again.
if (remaining > 0) { if (remaining > 0) {
// Fine-grained iterator: Continue a sequence of equal-length changes. // Fine-grained iterator:
// Stay on the current one of a sequence of compressed changes.
++index; // next() rests on the index after the sequence unit.
dir = 1;
return TRUE;
}
}
dir = 1;
}
if (remaining >= 1) {
// Fine-grained iterator: Continue a sequence of compressed changes.
if (remaining > 1) {
--remaining; --remaining;
return TRUE; return TRUE;
} }
remaining = 0;
}
if (index >= length) { if (index >= length) {
return noNext(); return noNext();
} }
@ -258,7 +488,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
} }
newLength_ = oldLength_; newLength_ = oldLength_;
if (onlyChanges) { if (onlyChanges) {
updateIndexes(); updateNextIndexes();
if (index >= length) { if (index >= length) {
return noNext(); return noNext();
} }
@ -270,14 +500,19 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
} }
changed = TRUE; changed = TRUE;
if (u <= MAX_SHORT_CHANGE) { if (u <= MAX_SHORT_CHANGE) {
int32_t oldLen = u >> 12;
int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
if (coarse) { if (coarse) {
int32_t w = u >> 12; oldLength_ = num * oldLen;
int32_t len = (u & 0xfff) + 1; newLength_ = num * newLen;
oldLength_ = newLength_ = len * w;
} else { } else {
// Split a sequence of equal-length changes that was compressed into one unit. // Split a sequence of changes that was compressed into one unit.
oldLength_ = newLength_ = u >> 12; oldLength_ = oldLen;
remaining = u & 0xfff; newLength_ = newLen;
if (num > 1) {
remaining = num; // This is the first of two or more changes.
}
return TRUE; return TRUE;
} }
} else { } else {
@ -292,55 +527,250 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
while (index < length && (u = array[index]) > MAX_UNCHANGED) { while (index < length && (u = array[index]) > MAX_UNCHANGED) {
++index; ++index;
if (u <= MAX_SHORT_CHANGE) { if (u <= MAX_SHORT_CHANGE) {
int32_t w = u >> 12; int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
int32_t len = (u & 0xfff) + 1; oldLength_ += (u >> 12) * num;
len = len * w; newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
oldLength_ += len;
newLength_ += len;
} else { } else {
U_ASSERT(u <= 0x7fff); U_ASSERT(u <= 0x7fff);
int32_t oldLen = readLength((u >> 6) & 0x3f); oldLength_ += readLength((u >> 6) & 0x3f);
int32_t newLen = readLength(u & 0x3f); newLength_ += readLength(u & 0x3f);
oldLength_ += oldLen;
newLength_ += newLen;
} }
} }
return TRUE; return TRUE;
} }
UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) { UBool Edits::Iterator::previous(UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || i < 0) { return FALSE; } // Backward iteration: Pre-decrement-read array units to assemble a new span,
if (i < srcIndex) { // then update the string indexes to the start of that span.
// Reset the iterator to the start. // Leaves the array index on the head unit of that span.
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0; if (U_FAILURE(errorCode)) { return FALSE; }
} else if (i < (srcIndex + oldLength_)) { // We have an errorCode in case we need to start guarding against integer overflows.
// The index is in the current span. // It is also convenient for caller loops if we bail out when an error was set elsewhere.
if (dir >= 0) {
if (dir > 0) {
// Turn around from next() to previous().
// Set the string indexes to the span limit and
// pre-decrement-read the same span again.
if (remaining > 0) {
// Fine-grained iterator:
// Stay on the current one of a sequence of compressed changes.
--index; // previous() rests on the sequence unit.
dir = -1;
return TRUE; return TRUE;
} }
while (next(FALSE, errorCode)) { updateNextIndexes();
if (i < (srcIndex + oldLength_)) { }
// The index is in the current span. dir = -1;
}
if (remaining > 0) {
// Fine-grained iterator: Continue a sequence of compressed changes.
int32_t u = array[index];
U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
++remaining;
updatePreviousIndexes();
return TRUE; return TRUE;
} }
remaining = 0;
}
if (index <= 0) {
return noNext();
}
int32_t u = array[--index];
if (u <= MAX_UNCHANGED) {
// Combine adjacent unchanged ranges.
changed = FALSE;
oldLength_ = u + 1;
while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
--index;
oldLength_ += u + 1;
}
newLength_ = oldLength_;
// No need to handle onlyChanges as long as previous() is called only from findIndex().
updatePreviousIndexes();
return TRUE;
}
changed = TRUE;
if (u <= MAX_SHORT_CHANGE) {
int32_t oldLen = u >> 12;
int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
if (coarse) {
oldLength_ = num * oldLen;
newLength_ = num * newLen;
} else {
// Split a sequence of changes that was compressed into one unit.
oldLength_ = oldLen;
newLength_ = newLen;
if (num > 1) {
remaining = 1; // This is the last of two or more changes.
}
updatePreviousIndexes();
return TRUE;
}
} else {
if (u <= 0x7fff) {
// The change is encoded in u alone.
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
} else {
// Back up to the head of the change, read the lengths,
// and reset the index to the head again.
U_ASSERT(index > 0);
while ((u = array[--index]) > 0x7fff) {}
U_ASSERT(u > MAX_SHORT_CHANGE);
int32_t headIndex = index++;
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
index = headIndex;
}
if (!coarse) {
updatePreviousIndexes();
return TRUE;
}
}
// Combine adjacent changes.
while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
--index;
if (u <= MAX_SHORT_CHANGE) {
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
oldLength_ += (u >> 12) * num;
newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
} else if (u <= 0x7fff) {
// Read the lengths, and reset the index to the head again.
int32_t headIndex = index++;
oldLength_ += readLength((u >> 6) & 0x3f);
newLength_ += readLength(u & 0x3f);
index = headIndex;
}
}
updatePreviousIndexes();
return TRUE;
}
int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || i < 0) { return -1; }
int32_t spanStart, spanLength;
if (findSource) { // find source index
spanStart = srcIndex;
spanLength = oldLength_;
} else { // find destination index
spanStart = destIndex;
spanLength = newLength_;
}
if (i < spanStart) {
if (i >= (spanStart / 2)) {
// Search backwards.
for (;;) {
UBool hasPrevious = previous(errorCode);
U_ASSERT(hasPrevious); // because i>=0 and the first span starts at 0
(void)hasPrevious; // avoid unused-variable warning
spanStart = findSource ? srcIndex : destIndex;
if (i >= spanStart) {
// The index is in the current span.
return 0;
}
if (remaining > 0) { if (remaining > 0) {
// Is the index in one of the remaining compressed edits? // Is the index in one of the remaining compressed edits?
// srcIndex is the start of the current span, before the remaining ones. // spanStart is the start of the current span, first of the remaining ones.
int32_t len = (remaining + 1) * oldLength_; spanLength = findSource ? oldLength_ : newLength_;
if (i < (srcIndex + len)) { int32_t u = array[index];
int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
len = n * oldLength_; int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
srcIndex += len; int32_t len = num * spanLength;
replIndex += len; if (i >= (spanStart - len)) {
destIndex += len; int32_t n = ((spanStart - i - 1) / spanLength) + 1;
remaining -= n; // 1 <= n <= num
return TRUE; srcIndex -= n * oldLength_;
replIndex -= n * newLength_;
destIndex -= n * newLength_;
remaining += n;
return 0;
} }
// Make next() skip all of these edits at once. // Skip all of these edits at once.
oldLength_ = newLength_ = len; srcIndex -= num * oldLength_;
replIndex -= num * newLength_;
destIndex -= num * newLength_;
remaining = 0; remaining = 0;
} }
} }
return FALSE; }
// Reset the iterator to the start.
dir = 0;
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (spanStart + spanLength)) {
// The index is in the current span.
return 0;
}
while (next(FALSE, errorCode)) {
if (findSource) {
spanStart = srcIndex;
spanLength = oldLength_;
} else {
spanStart = destIndex;
spanLength = newLength_;
}
if (i < (spanStart + spanLength)) {
// The index is in the current span.
return 0;
}
if (remaining > 1) {
// Is the index in one of the remaining compressed edits?
// spanStart is the start of the current span, first of the remaining ones.
int32_t len = remaining * spanLength;
if (i < (spanStart + len)) {
int32_t n = (i - spanStart) / spanLength; // 1 <= n <= remaining - 1
srcIndex += n * oldLength_;
replIndex += n * newLength_;
destIndex += n * newLength_;
remaining -= n;
return 0;
}
// Make next() skip all of these edits at once.
oldLength_ *= remaining;
newLength_ *= remaining;
remaining = 0;
}
}
return 1;
}
int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
int32_t where = findIndex(i, TRUE, errorCode);
if (where < 0) {
// Error or before the string.
return 0;
}
if (where > 0 || i == srcIndex) {
// At or after string length, or at start of the found span.
return destIndex;
}
if (changed) {
// In a change span, map to its end.
return destIndex + newLength_;
} else {
// In an unchanged span, offset 1:1 within it.
return destIndex + (i - srcIndex);
}
}
int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
int32_t where = findIndex(i, FALSE, errorCode);
if (where < 0) {
// Error or before the string.
return 0;
}
if (where > 0 || i == destIndex) {
// At or after string length, or at start of the found span.
return srcIndex;
}
if (changed) {
// In a change span, map to its end.
return srcIndex + oldLength_;
} else {
// In an unchanged span, offset within it.
return srcIndex + (i - destIndex);
}
} }
U_NAMESPACE_END U_NAMESPACE_END

View File

@ -694,7 +694,7 @@ FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
} }
FilteredBreakIteratorBuilder * FilteredBreakIteratorBuilder *
FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
if(U_FAILURE(status)) return NULL; if(U_FAILURE(status)) return NULL;
LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status); LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
return (U_SUCCESS(status))? ret.orphan(): NULL; return (U_SUCCESS(status))? ret.orphan(): NULL;

View File

@ -20,7 +20,9 @@
#if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_NORMALIZATION
#include "unicode/edits.h"
#include "unicode/normalizer2.h" #include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/uniset.h" #include "unicode/uniset.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/unorm.h" #include "unicode/unorm.h"
@ -85,6 +87,52 @@ FilteredNormalizer2::normalize(const UnicodeString &src,
return dest; return dest;
} }
void
FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return;
}
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
options |= U_EDITS_NO_RESET; // Do not reset for each span.
normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode);
}
void
FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length,
ByteSink &sink, Edits *edits,
USetSpanCondition spanCondition,
UErrorCode &errorCode) const {
while (length > 0) {
int32_t spanLength = set.spanUTF8(src, length, spanCondition);
if (spanCondition == USET_SPAN_NOT_CONTAINED) {
if (spanLength != 0) {
if (edits != nullptr) {
edits->addUnchanged(spanLength);
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(src, spanLength);
}
}
spanCondition = USET_SPAN_SIMPLE;
} else {
if (spanLength != 0) {
// Not norm2.normalizeSecondAndAppend() because we do not want
// to modify the non-filter part of dest.
norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode);
if (U_FAILURE(errorCode)) {
break;
}
}
spanCondition = USET_SPAN_NOT_CONTAINED;
}
src += spanLength;
length -= spanLength;
}
}
UnicodeString & UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first, FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,
@ -196,6 +244,31 @@ FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode)
return TRUE; return TRUE;
} }
UBool
FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const char *s = sp.data();
int32_t length = sp.length();
USetSpanCondition spanCondition = USET_SPAN_SIMPLE;
while (length > 0) {
int32_t spanLength = set.spanUTF8(s, length, spanCondition);
if (spanCondition == USET_SPAN_NOT_CONTAINED) {
spanCondition = USET_SPAN_SIMPLE;
} else {
if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
U_FAILURE(errorCode)) {
return FALSE;
}
spanCondition = USET_SPAN_NOT_CONTAINED;
}
s += spanLength;
length -= spanLength;
}
return TRUE;
}
UNormalizationCheckResult UNormalizationCheckResult
FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode); uprv_checkCanGetBuffer(s, errorCode);

View File

@ -33,6 +33,8 @@ class U_COMMON_API Hashtable : public UMemory {
inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status); inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);
public: public:
/** /**
* Construct a hashtable * Construct a hashtable
@ -41,6 +43,14 @@ public:
*/ */
Hashtable(UBool ignoreKeyCase, UErrorCode& status); Hashtable(UBool ignoreKeyCase, UErrorCode& status);
/**
* Construct a hashtable
* @param ignoreKeyCase If true, keys are case insensitive.
* @param size initial size allocation
* @param status Error code
*/
Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
/** /**
* Construct a hashtable * Construct a hashtable
* @param keyComp Comparator for comparing the keys * @param keyComp Comparator for comparing the keys
@ -119,10 +129,23 @@ inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
} }
} }
inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, int32_t size, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
if (U_SUCCESS(status)) {
hash = &hashObj;
uhash_setKeyDeleter(hash, uprv_deleteUObject);
}
}
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
UErrorCode& status) : hash(0) { UErrorCode& status) : hash(0) {
init( uhash_hashUnicodeString, keyComp, valueComp, status); init( uhash_hashUnicodeString, keyComp, valueComp, status);
} }
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status) inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
: hash(0) : hash(0)
{ {
@ -134,6 +157,17 @@ inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
status); status);
} }
inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
: hash(0)
{
initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
: uhash_hashUnicodeString,
ignoreKeyCase ? uhash_compareCaselessUnicodeString
: uhash_compareUnicodeString,
NULL, size,
status);
}
inline Hashtable::Hashtable(UErrorCode& status) inline Hashtable::Hashtable(UErrorCode& status)
: hash(0) : hash(0)
{ {

View File

@ -63,7 +63,7 @@ ListFormatInternal(const ListFormatInternal &other) :
static Hashtable* listPatternHash = NULL; static Hashtable* listPatternHash = NULL;
static UMutex listFormatterMutex = U_MUTEX_INITIALIZER; static UMutex listFormatterMutex = U_MUTEX_INITIALIZER;
static const char *STANDARD_STYLE = "standard"; static const char STANDARD_STYLE[] = "standard";
U_CDECL_BEGIN U_CDECL_BEGIN
static UBool U_CALLCONV uprv_listformatter_cleanup() { static UBool U_CALLCONV uprv_listformatter_cleanup() {

View File

@ -62,7 +62,7 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
pInfo->dataFormat[1]==0x72 && pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d && pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 && pInfo->dataFormat[3]==0x32 &&
pInfo->formatVersion[0]==2 pInfo->formatVersion[0]==3
) { ) {
// Normalizer2Impl *me=(Normalizer2Impl *)context; // Normalizer2Impl *me=(Normalizer2Impl *)context;
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
@ -84,7 +84,7 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
const int32_t *inIndexes=(const int32_t *)inBytes; const int32_t *inIndexes=(const int32_t *)inBytes;
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
if(indexesLength<=IX_MIN_MAYBE_YES) { if(indexesLength<=IX_MIN_LCCC_CP) {
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
return; return;
} }

View File

@ -35,7 +35,7 @@ U_NAMESPACE_BEGIN
static icu::Locale* availableLocaleList = NULL; static icu::Locale* availableLocaleList = NULL;
static int32_t availableLocaleListCount; static int32_t availableLocaleListCount;
static icu::UInitOnce gInitOnce = U_INITONCE_INITIALIZER; static icu::UInitOnce gInitOnceLocale = U_INITONCE_INITIALIZER;
U_NAMESPACE_END U_NAMESPACE_END
@ -50,7 +50,7 @@ static UBool U_CALLCONV locale_available_cleanup(void)
availableLocaleList = NULL; availableLocaleList = NULL;
} }
availableLocaleListCount = 0; availableLocaleListCount = 0;
gInitOnce.reset(); gInitOnceLocale.reset();
return TRUE; return TRUE;
} }
@ -81,7 +81,7 @@ void U_CALLCONV locale_available_init() {
const Locale* U_EXPORT2 const Locale* U_EXPORT2
Locale::getAvailableLocales(int32_t& count) Locale::getAvailableLocales(int32_t& count)
{ {
umtx_initOnce(gInitOnce, &locale_available_init); umtx_initOnce(gInitOnceLocale, &locale_available_init);
count = availableLocaleListCount; count = availableLocaleListCount;
return availableLocaleList; return availableLocaleList;
} }

View File

@ -542,7 +542,7 @@ uloc_getDisplayName(const char *locale,
return 0; return 0;
} }
separator = (const UChar *)p0 + subLen; separator = (const UChar *)p0 + subLen;
sepLen = p1 - separator; sepLen = static_cast<int32_t>(p1 - separator);
} }
if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) { if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
@ -558,8 +558,8 @@ uloc_getDisplayName(const char *locale,
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0; return 0;
} }
sub0Pos=p0-pattern; sub0Pos = static_cast<int32_t>(p0-pattern);
sub1Pos=p1-pattern; sub1Pos = static_cast<int32_t>(p1-pattern);
if (sub1Pos < sub0Pos) { /* a very odd pattern */ if (sub1Pos < sub0Pos) { /* a very odd pattern */
int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t; int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
langi=1; langi=1;
@ -821,6 +821,8 @@ uloc_getDisplayKeywordValue( const char* locale,
/* get the keyword value */ /* get the keyword value */
keywordValue[0]=0; keywordValue[0]=0;
keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status); keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
if (*status == U_STRING_NOT_TERMINATED_WARNING)
*status = U_BUFFER_OVERFLOW_ERROR;
/* /*
* if the keyword is equal to currency .. then to get the display name * if the keyword is equal to currency .. then to get the display name

View File

@ -54,7 +54,7 @@ static int32_t ncat(char *buffer, uint32_t buflen, ...) {
*p = 0; *p = 0;
va_end(args); va_end(args);
return p - buffer; return static_cast<int32_t>(p - buffer);
} }
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
@ -636,8 +636,9 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale,
char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
const char* key; const char* key;
while ((key = e->next((int32_t *)0, status)) != NULL) { while ((key = e->next((int32_t *)0, status)) != NULL) {
value[0] = 0;
locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status); locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
if (U_FAILURE(status)) { if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
return result; return result;
} }
keyDisplayName(key, temp, TRUE); keyDisplayName(key, temp, TRUE);

View File

@ -511,7 +511,7 @@ parseTagString(
unknownLanguage); unknownLanguage);
*langLength = (int32_t)uprv_strlen(lang); *langLength = (int32_t)uprv_strlen(lang);
} }
else if (_isIDSeparator(*position)) { if (_isIDSeparator(*position)) {
++position; ++position;
} }
@ -1281,7 +1281,7 @@ uloc_minimizeSubtags(const char* localeID,
// Pairs of (language subtag, + or -) for finding out fast if common languages // Pairs of (language subtag, + or -) for finding out fast if common languages
// are LTR (minus) or RTL (plus). // are LTR (minus) or RTL (plus).
static const char* LANG_DIR_STRING = static const char LANG_DIR_STRING[] =
"root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-"; "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
// Implemented here because this calls uloc_addLikelySubtags(). // Implemented here because this calls uloc_addLikelySubtags().
@ -1383,4 +1383,3 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
uprv_strncpy(region, rgBuf, regionCapacity); uprv_strncpy(region, rgBuf, regionCapacity);
return u_terminateChars(region, regionCapacity, rgLen, status); return u_terminateChars(region, regionCapacity, rgLen, status);
} }

View File

@ -190,7 +190,10 @@ ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG) ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG) ILCID_POSIX_SUBTABLE(bin) {
{0x66, "bin"},
{0x0466, "bin_NG"}
};
ILCID_POSIX_SUBTABLE(bn) { ILCID_POSIX_SUBTABLE(bn) {
{0x45, "bn"}, {0x45, "bn"},
@ -214,7 +217,13 @@ ILCID_POSIX_SUBTABLE(ca) {
}; };
ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR) ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
ILCID_POSIX_SUBTABLE(chr) {
{0x05c, "chr"},
{0x7c5c, "chr_Cher"},
{0x045c, "chr_Cher_US"},
{0x045c, "chr_US"}
};
// ICU has chosen different names for these. // ICU has chosen different names for these.
ILCID_POSIX_SUBTABLE(ckb) { ILCID_POSIX_SUBTABLE(ckb) {
@ -263,10 +272,10 @@ ILCID_POSIX_SUBTABLE(en) {
{0x2C09, "en_TT"}, {0x2C09, "en_TT"},
{0x0409, "en_US"}, {0x0409, "en_US"},
{0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */ {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
{0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */ {0x2409, "en_029"},
{0x1c09, "en_ZA"}, {0x1c09, "en_ZA"},
{0x3009, "en_ZW"}, {0x3009, "en_ZW"},
{0x2409, "en_029"}, {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
{0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
{0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
{0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
@ -419,7 +428,12 @@ ILCID_POSIX_SUBTABLE(hsb) {
ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU) ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM) ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG)
ILCID_POSIX_SUBTABLE(ibb) {
{0x69, "ibb"},
{0x0469, "ibb_NG"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID) ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG) ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN) ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
@ -458,13 +472,18 @@ ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */ ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
{0x60, "ks"}, {0x60, "ks"},
{0x0860, "ks_IN"}, /* Documentation doesn't mention script */
{0x0460, "ks_Arab_IN"}, {0x0460, "ks_Arab_IN"},
{0x0860, "ks_Deva_IN"} {0x0860, "ks_Deva_IN"}
}; };
ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */ ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
ILCID_POSIX_SUBTABLE(la) {
{0x76, "la"},
{0x0476, "la_001"},
{0x0476, "la_IT"} /*Left in for compatibility*/
};
ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU) ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA) ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT) ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
@ -535,15 +554,19 @@ ILCID_POSIX_SUBTABLE(or_IN) {
{0x0448, "or_IN"}, {0x0448, "or_IN"},
}; };
ILCID_POSIX_SUBTABLE(pa) { ILCID_POSIX_SUBTABLE(pa) {
{0x46, "pa"}, {0x46, "pa"},
{0x0446, "pa_IN"}, {0x0446, "pa_IN"},
{0x0846, "pa_PK"}, {0x0846, "pa_Arab_PK"},
{0x0846, "pa_Arab_PK"} {0x0846, "pa_PK"}
};
ILCID_POSIX_SUBTABLE(pap) {
{0x79, "pap"},
{0x0479, "pap_029"},
{0x0479, "pap_AN"} /*Left in for compatibility*/
}; };
ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN)
ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL) ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF) ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
@ -619,9 +642,11 @@ ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
ILCID_POSIX_SUBTABLE(sd) { ILCID_POSIX_SUBTABLE(sd) {
{0x59, "sd"}, {0x59, "sd"},
{0x0459, "sd_IN"},
{0x0459, "sd_Deva_IN"}, {0x0459, "sd_Deva_IN"},
{0x0859, "sd_PK"} {0x0459, "sd_IN"},
{0x0859, "sd_Arab_PK"},
{0x0859, "sd_PK"},
{0x7c59, "sd_Arab"}
}; };
ILCID_POSIX_SUBTABLE(se) { ILCID_POSIX_SUBTABLE(se) {
@ -645,9 +670,8 @@ ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK) ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI) ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */ ILCID_POSIX_SUBTABLE(so) {
{0x77, "so"}, {0x77, "so"},
{0x0477, "so_ET"},
{0x0477, "so_SO"} {0x0477, "so_SO"}
}; };
@ -739,7 +763,12 @@ ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN) ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN) ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA) ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi)
ILCID_POSIX_SUBTABLE(yi) {
{0x003d, "yi"},
{0x043d, "yi_001"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG) ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
// Windows & ICU tend to different names for some of these // Windows & ICU tend to different names for some of these
@ -1033,6 +1062,8 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
const char *pPosixID = NULL; const char *pPosixID = NULL;
#ifdef USE_WINDOWS_LCID_MAPPING_API #ifdef USE_WINDOWS_LCID_MAPPING_API
char locName[LOCALE_NAME_MAX_LENGTH] = {}; // ICU name can't be longer than Windows name
// Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
// GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
// Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
@ -1040,7 +1071,6 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
if ((hostid & 0x3FF) != 0x92) { if ((hostid & 0x3FF) != 0x92) {
int32_t tmpLen = 0; int32_t tmpLen = 0;
UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH]; // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH]; // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH
char locName[LOCALE_NAME_MAX_LENGTH]; // ICU name can't be longer than Windows name
// Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names. // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES); tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
@ -1102,7 +1132,7 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
} }
if (pPosixID) { if (pPosixID) {
int32_t resLen = uprv_strlen(pPosixID); int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity; int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
uprv_memcpy(posixID, pPosixID, copyLen); uprv_memcpy(posixID, pPosixID, copyLen);
if (resLen < posixIDCapacity) { if (resLen < posixIDCapacity) {
@ -1176,7 +1206,7 @@ uprv_convertToLCIDPlatform(const char* localeID)
char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {}; char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
// this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
int32_t bcp47Len = uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus); (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
if (U_SUCCESS(myStatus)) if (U_SUCCESS(myStatus))
{ {
@ -1213,6 +1243,8 @@ uprv_convertToLCIDPlatform(const char* localeID)
} }
} }
} }
#else
(void)localeID; // Suppress unused variable warning.
#endif /* USE_WINDOWS_LCID_MAPPING_API */ #endif /* USE_WINDOWS_LCID_MAPPING_API */
// No found, or not implemented on platforms without native name->lcid conversion // No found, or not implemented on platforms without native name->lcid conversion

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@
* Copyright (C) 2014, International Business Machines * Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
******************************************************************************* *******************************************************************************
* loadednormalizer2impl.h * norm2allmodes.h
* *
* created on: 2014sep07 * created on: 2014sep07
* created by: Markus W. Scherer * created by: Markus W. Scherer
@ -18,7 +18,9 @@
#if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_NORMALIZATION
#include "unicode/edits.h"
#include "unicode/normalizer2.h" #include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "cpputils.h" #include "cpputils.h"
#include "normalizer2impl.h" #include "normalizer2impl.h"
@ -210,8 +212,8 @@ private:
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
} }
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
}; };
@ -224,19 +226,35 @@ public:
private: private:
virtual void virtual void
normalize(const UChar *src, const UChar *limit, normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const { ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
} }
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
if (U_FAILURE(errorCode)) {
return;
}
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
&sink, edits, errorCode);
sink.Flush();
}
virtual void virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle, UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const { ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
} }
virtual UBool virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) { if(U_FAILURE(errorCode)) {
return FALSE; return FALSE;
} }
@ -252,8 +270,16 @@ private:
} }
return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
} }
virtual UBool
isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
}
virtual UNormalizationCheckResult virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) { if(U_FAILURE(errorCode)) {
return UNORM_MAYBE; return UNORM_MAYBE;
} }
@ -267,21 +293,21 @@ private:
return qcResult; return qcResult;
} }
virtual const UChar * virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
} }
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
return impl.getCompQuickCheck(impl.getNorm16(c)); return impl.getCompQuickCheck(impl.getNorm16(c));
} }
virtual UBool hasBoundaryBefore(UChar32 c) const { virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
return impl.hasCompBoundaryBefore(c); return impl.hasCompBoundaryBefore(c);
} }
virtual UBool hasBoundaryAfter(UChar32 c) const { virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); return impl.hasCompBoundaryAfter(c, onlyContiguous);
} }
virtual UBool isInert(UChar32 c) const { virtual UBool isInert(UChar32 c) const U_OVERRIDE {
return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); return impl.isCompInert(c, onlyContiguous);
} }
const UBool onlyContiguous; const UBool onlyContiguous;

View File

@ -20,7 +20,9 @@
#if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_NORMALIZATION
#include "unicode/edits.h"
#include "unicode/normalizer2.h" #include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/unorm.h" #include "unicode/unorm.h"
#include "cstring.h" #include "cstring.h"
@ -42,6 +44,20 @@ U_NAMESPACE_BEGIN
Normalizer2::~Normalizer2() {} Normalizer2::~Normalizer2() {}
void
Normalizer2::normalizeUTF8(uint32_t /*options*/, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return;
}
if (edits != nullptr) {
errorCode = U_UNSUPPORTED_ERROR;
return;
}
UnicodeString src16 = UnicodeString::fromUTF8(src);
normalize(src16, errorCode).toUTF8(sink);
}
UBool UBool
Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
return FALSE; return FALSE;
@ -57,6 +73,11 @@ Normalizer2::getCombiningClass(UChar32 /*c*/) const {
return 0; return 0;
} }
UBool
Normalizer2::isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const {
return U_SUCCESS(errorCode) && isNormalized(UnicodeString::fromUTF8(s), errorCode);
}
// Normalizer2 implementation for the old UNORM_NONE. // Normalizer2 implementation for the old UNORM_NONE.
class NoopNormalizer2 : public Normalizer2 { class NoopNormalizer2 : public Normalizer2 {
virtual ~NoopNormalizer2(); virtual ~NoopNormalizer2();
@ -64,7 +85,7 @@ class NoopNormalizer2 : public Normalizer2 {
virtual UnicodeString & virtual UnicodeString &
normalize(const UnicodeString &src, normalize(const UnicodeString &src,
UnicodeString &dest, UnicodeString &dest,
UErrorCode &errorCode) const { UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) { if(U_SUCCESS(errorCode)) {
if(&dest!=&src) { if(&dest!=&src) {
dest=src; dest=src;
@ -74,10 +95,27 @@ class NoopNormalizer2 : public Normalizer2 {
} }
return dest; return dest;
} }
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) {
if (edits != nullptr) {
if ((options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
edits->addUnchanged(src.length());
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(src.data(), src.length());
}
sink.Flush();
}
}
virtual UnicodeString & virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first, normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,
UErrorCode &errorCode) const { UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) { if(U_SUCCESS(errorCode)) {
if(&first!=&second) { if(&first!=&second) {
first.append(second); first.append(second);
@ -90,7 +128,7 @@ class NoopNormalizer2 : public Normalizer2 {
virtual UnicodeString & virtual UnicodeString &
append(UnicodeString &first, append(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,
UErrorCode &errorCode) const { UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) { if(U_SUCCESS(errorCode)) {
if(&first!=&second) { if(&first!=&second) {
first.append(second); first.append(second);
@ -101,25 +139,29 @@ class NoopNormalizer2 : public Normalizer2 {
return first; return first;
} }
virtual UBool virtual UBool
getDecomposition(UChar32, UnicodeString &) const { getDecomposition(UChar32, UnicodeString &) const U_OVERRIDE {
return FALSE; return FALSE;
} }
// No need to override the default getRawDecomposition(). // No need to U_OVERRIDE the default getRawDecomposition().
virtual UBool virtual UBool
isNormalized(const UnicodeString &, UErrorCode &) const { isNormalized(const UnicodeString &, UErrorCode &errorCode) const U_OVERRIDE {
return TRUE; return U_SUCCESS(errorCode);
}
virtual UBool
isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const U_OVERRIDE {
return U_SUCCESS(errorCode);
} }
virtual UNormalizationCheckResult virtual UNormalizationCheckResult
quickCheck(const UnicodeString &, UErrorCode &) const { quickCheck(const UnicodeString &, UErrorCode &) const U_OVERRIDE {
return UNORM_YES; return UNORM_YES;
} }
virtual int32_t virtual int32_t
spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const U_OVERRIDE {
return s.length(); return s.length();
} }
virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } virtual UBool hasBoundaryBefore(UChar32) const U_OVERRIDE { return TRUE; }
virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } virtual UBool hasBoundaryAfter(UChar32) const U_OVERRIDE { return TRUE; }
virtual UBool isInert(UChar32) const { return TRUE; } virtual UBool isInert(UChar32) const U_OVERRIDE { return TRUE; }
}; };
NoopNormalizer2::~NoopNormalizer2() {} NoopNormalizer2::~NoopNormalizer2() {}

File diff suppressed because it is too large Load Diff

View File

@ -35,6 +35,11 @@ U_NAMESPACE_BEGIN
struct CanonIterData; struct CanonIterData;
class ByteSink;
class Edits;
class InitCanonIterData;
class LcccContext;
class U_COMMON_API Hangul { class U_COMMON_API Hangul {
public: public:
/* Korean Hangul and Jamo constants */ /* Korean Hangul and Jamo constants */
@ -63,9 +68,9 @@ public:
return HANGUL_BASE<=c && c<HANGUL_LIMIT; return HANGUL_BASE<=c && c<HANGUL_LIMIT;
} }
static inline UBool static inline UBool
isHangulWithoutJamoT(UChar c) { isHangulLV(UChar32 c) {
c-=HANGUL_BASE; c-=HANGUL_BASE;
return c<HANGUL_COUNT && c%JAMO_T_COUNT==0; return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
} }
static inline UBool isJamoL(UChar32 c) { static inline UBool isJamoL(UChar32 c) {
return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT; return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT;
@ -73,6 +78,14 @@ public:
static inline UBool isJamoV(UChar32 c) { static inline UBool isJamoV(UChar32 c) {
return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT; return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT;
} }
static inline UBool isJamoT(UChar32 c) {
int32_t t=c-JAMO_T_BASE;
return 0<t && t<JAMO_T_COUNT; // not JAMO_T_BASE itself
}
static UBool isJamo(UChar32 c) {
return JAMO_L_BASE<=c && c<=JAMO_T_END &&
(c<=JAMO_L_END || (JAMO_V_BASE<=c && c<=JAMO_V_END) || JAMO_T_BASE<c);
}
/** /**
* Decomposes c, which must be a Hangul syllable, into buffer * Decomposes c, which must be a Hangul syllable, into buffer
@ -117,10 +130,13 @@ class Normalizer2Impl;
class U_COMMON_API ReorderingBuffer : public UMemory { class U_COMMON_API ReorderingBuffer : public UMemory {
public: public:
/** Constructs only; init() should be called. */
ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) : ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
impl(ni), str(dest), impl(ni), str(dest),
start(NULL), reorderStart(NULL), limit(NULL), start(NULL), reorderStart(NULL), limit(NULL),
remainingCapacity(0), lastCC(0) {} remainingCapacity(0), lastCC(0) {}
/** Constructs, removes the string contents, and initializes for a small initial capacity. */
ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, UErrorCode &errorCode);
~ReorderingBuffer() { ~ReorderingBuffer() {
if(start!=NULL) { if(start!=NULL) {
str.releaseBuffer((int32_t)(limit-start)); str.releaseBuffer((int32_t)(limit-start));
@ -135,11 +151,7 @@ public:
uint8_t getLastCC() const { return lastCC; } uint8_t getLastCC() const { return lastCC; }
UBool equals(const UChar *start, const UChar *limit) const; UBool equals(const UChar *start, const UChar *limit) const;
UBool equals(const uint8_t *otherStart, const uint8_t *otherLimit) const;
// For Hangul composition, replacing the Leading consonant Jamo with the syllable.
void setLastChar(UChar c) {
*(limit-1)=c;
}
UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) { UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
return (c<=0xffff) ? return (c<=0xffff) ?
@ -218,6 +230,12 @@ private:
UChar *codePointStart, *codePointLimit; UChar *codePointStart, *codePointLimit;
}; };
/**
* Low-level implementation of the Unicode Normalization Algorithm.
* For the data structure and details see the documentation at the end of
* this normalizer2impl.h and in the design doc at
* http://site.icu-project.org/design/normalization/custom
*/
class U_COMMON_API Normalizer2Impl : public UObject { class U_COMMON_API Normalizer2Impl : public UObject {
public: public:
Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) {
@ -234,8 +252,6 @@ public:
// low-level properties ------------------------------------------------ *** // low-level properties ------------------------------------------------ ***
const UTrie2 *getNormTrie() const { return normTrie; }
UBool ensureCanonIterData(UErrorCode &errorCode) const; UBool ensureCanonIterData(UErrorCode &errorCode) const;
uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); } uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); }
@ -255,15 +271,22 @@ public:
uint8_t getCC(uint16_t norm16) const { uint8_t getCC(uint16_t norm16) const {
if(norm16>=MIN_NORMAL_MAYBE_YES) { if(norm16>=MIN_NORMAL_MAYBE_YES) {
return (uint8_t)norm16; return getCCFromNormalYesOrMaybe(norm16);
} }
if(norm16<minNoNo || limitNoNo<=norm16) { if(norm16<minNoNo || limitNoNo<=norm16) {
return 0; return 0;
} }
return getCCFromNoNo(norm16); return getCCFromNoNo(norm16);
} }
static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
return (uint8_t)(norm16 >> OFFSET_SHIFT);
}
static uint8_t getCCFromYesOrMaybe(uint16_t norm16) { static uint8_t getCCFromYesOrMaybe(uint16_t norm16) {
return norm16>=MIN_NORMAL_MAYBE_YES ? (uint8_t)norm16 : 0; return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
}
uint8_t getCCFromYesOrMaybeCP(UChar32 c) const {
if (c < minCompNoMaybeCP) { return 0; }
return getCCFromYesOrMaybe(getNorm16(c));
} }
/** /**
@ -272,10 +295,8 @@ public:
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0. * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/ */
uint16_t getFCD16(UChar32 c) const { uint16_t getFCD16(UChar32 c) const {
if(c<0) { if(c<minDecompNoCP) {
return 0; return 0;
} else if(c<0x180) {
return tccc180[c];
} else if(c<=0xffff) { } else if(c<=0xffff) {
if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; } if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
} }
@ -291,9 +312,7 @@ public:
*/ */
uint16_t nextFCD16(const UChar *&s, const UChar *limit) const { uint16_t nextFCD16(const UChar *&s, const UChar *limit) const {
UChar32 c=*s++; UChar32 c=*s++;
if(c<0x180) { if(c<minDecompNoCP || !singleLeadMightHaveNonZeroFCD16(c)) {
return tccc180[c];
} else if(!singleLeadMightHaveNonZeroFCD16(c)) {
return 0; return 0;
} }
UChar c2; UChar c2;
@ -311,8 +330,8 @@ public:
*/ */
uint16_t previousFCD16(const UChar *start, const UChar *&s) const { uint16_t previousFCD16(const UChar *start, const UChar *&s) const {
UChar32 c=*--s; UChar32 c=*--s;
if(c<0x180) { if(c<minDecompNoCP) {
return tccc180[c]; return 0;
} }
if(!U16_IS_TRAIL(c)) { if(!U16_IS_TRAIL(c)) {
if(!singleLeadMightHaveNonZeroFCD16(c)) { if(!singleLeadMightHaveNonZeroFCD16(c)) {
@ -328,8 +347,6 @@ public:
return getFCD16FromNormData(c); return getFCD16FromNormData(c);
} }
/** Returns the FCD data for U+0000<=c<U+0180. */
uint16_t getFCD16FromBelow180(UChar32 c) const { return tccc180[c]; }
/** Returns TRUE if the single-or-lead code unit c might have non-zero FCD data. */ /** Returns TRUE if the single-or-lead code unit c might have non-zero FCD data. */
UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const { UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
// 0<=lead<=0xffff // 0<=lead<=0xffff
@ -340,9 +357,6 @@ public:
/** Returns the FCD value from the regular normalization data. */ /** Returns the FCD value from the regular normalization data. */
uint16_t getFCD16FromNormData(UChar32 c) const; uint16_t getFCD16FromNormData(UChar32 c) const;
void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
CanonIterData &newData, UErrorCode &errorCode) const;
/** /**
* Gets the decomposition for one code point. * Gets the decomposition for one code point.
* @param c code point * @param c code point
@ -367,14 +381,25 @@ public:
UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const; UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const;
enum { enum {
MIN_CCC_LCCC_CP=0x300 // Fixed norm16 values.
}; MIN_YES_YES_WITH_CC=0xfe02,
JAMO_VT=0xfe00,
MIN_NORMAL_MAYBE_YES=0xfc00,
JAMO_L=2, // offset=1 hasCompBoundaryAfter=FALSE
INERT=1, // offset=0 hasCompBoundaryAfter=TRUE
// norm16 bit 0 is comp-boundary-after.
HAS_COMP_BOUNDARY_AFTER=1,
OFFSET_SHIFT=1,
// For algorithmic one-way mappings, norm16 bits 2..1 indicate the
// tccc (0, 1, >1) for quick FCC boundary-after tests.
DELTA_TCCC_0=0,
DELTA_TCCC_1=2,
DELTA_TCCC_GT_1=4,
DELTA_TCCC_MASK=6,
DELTA_SHIFT=3,
enum {
MIN_YES_YES_WITH_CC=0xff01,
JAMO_VT=0xff00,
MIN_NORMAL_MAYBE_YES=0xfe00,
JAMO_L=1,
MAX_DELTA=0x40 MAX_DELTA=0x40
}; };
@ -394,21 +419,32 @@ public:
IX_MIN_COMP_NO_MAYBE_CP, IX_MIN_COMP_NO_MAYBE_CP,
// Norm16 value thresholds for quick check combinations and types of extra data. // Norm16 value thresholds for quick check combinations and types of extra data.
IX_MIN_YES_NO, // Mappings & compositions in [minYesNo..minYesNoMappingsOnly[.
/** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
IX_MIN_YES_NO,
/** Mappings are comp-normalized. */
IX_MIN_NO_NO, IX_MIN_NO_NO,
IX_LIMIT_NO_NO, IX_LIMIT_NO_NO,
IX_MIN_MAYBE_YES, IX_MIN_MAYBE_YES,
IX_MIN_YES_NO_MAPPINGS_ONLY, // Mappings only in [minYesNoMappingsOnly..minNoNo[. /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
IX_MIN_YES_NO_MAPPINGS_ONLY,
/** Mappings are not comp-normalized but have a comp boundary before. */
IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE,
/** Mappings do not have a comp boundary before. */
IX_MIN_NO_NO_COMP_NO_MAYBE_CC,
/** Mappings to the empty string. */
IX_MIN_NO_NO_EMPTY,
IX_RESERVED15, IX_MIN_LCCC_CP,
IX_RESERVED19,
IX_COUNT IX_COUNT
}; };
enum { enum {
MAPPING_HAS_CCC_LCCC_WORD=0x80, MAPPING_HAS_CCC_LCCC_WORD=0x80,
MAPPING_HAS_RAW_MAPPING=0x40, MAPPING_HAS_RAW_MAPPING=0x40,
MAPPING_NO_COMP_BOUNDARY_AFTER=0x20, // unused bit 0x20,
MAPPING_LENGTH_MASK=0x1f MAPPING_LENGTH_MASK=0x1f
}; };
@ -457,6 +493,12 @@ public:
UnicodeString &safeMiddle, UnicodeString &safeMiddle,
ReorderingBuffer &buffer, ReorderingBuffer &buffer,
UErrorCode &errorCode) const; UErrorCode &errorCode) const;
/** sink==nullptr: isNormalized() */
UBool composeUTF8(uint32_t options, UBool onlyContiguous,
const uint8_t *src, const uint8_t *limit,
ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const;
const UChar *makeFCD(const UChar *src, const UChar *limit, const UChar *makeFCD(const UChar *src, const UChar *limit,
ReorderingBuffer *buffer, UErrorCode &errorCode) const; ReorderingBuffer *buffer, UErrorCode &errorCode) const;
void makeFCDAndAppend(const UChar *src, const UChar *limit, void makeFCDAndAppend(const UChar *src, const UChar *limit,
@ -465,27 +507,42 @@ public:
ReorderingBuffer &buffer, ReorderingBuffer &buffer,
UErrorCode &errorCode) const; UErrorCode &errorCode) const;
UBool hasDecompBoundary(UChar32 c, UBool before) const; UBool hasDecompBoundaryBefore(UChar32 c) const;
UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const;
UBool hasDecompBoundaryAfter(UChar32 c) const;
UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const;
UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); } UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
UBool hasCompBoundaryBefore(UChar32 c) const { UBool hasCompBoundaryBefore(UChar32 c) const {
return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c)); return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
}
UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const {
return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
}
UBool isCompInert(UChar32 c, UBool onlyContiguous) const {
uint16_t norm16=getNorm16(c);
return isCompYesAndZeroCC(norm16) &&
(norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
(!onlyContiguous || isInert(norm16) || *getMapping(norm16) <= 0x1ff);
} }
UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const;
UBool hasFCDBoundaryBefore(UChar32 c) const { return c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff; } UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
UBool hasFCDBoundaryAfter(UChar32 c) const { UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); }
uint16_t fcd16=getFCD16(c);
return fcd16<=1 || (fcd16&0xff)==0;
}
UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; } UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
private: private:
friend class InitCanonIterData;
friend class LcccContext;
UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; } UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; } UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
static UBool isInert(uint16_t norm16) { return norm16==0; } static UBool isInert(uint16_t norm16) { return norm16==INERT; }
static UBool isJamoL(uint16_t norm16) { return norm16==1; } static UBool isJamoL(uint16_t norm16) { return norm16==JAMO_L; }
static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; } static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }
UBool isHangul(uint16_t norm16) const { return norm16==minYesNo; } uint16_t hangulLVT() const { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; }
UBool isHangulLV(uint16_t norm16) const { return norm16==minYesNo; }
UBool isHangulLVT(uint16_t norm16) const {
return norm16==hangulLVT();
}
UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; } UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }
// UBool isCompYes(uint16_t norm16) const { // UBool isCompYes(uint16_t norm16) const {
// return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo; // return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
@ -504,7 +561,7 @@ private:
/** /**
* A little faster and simpler than isDecompYesAndZeroCC() but does not include * A little faster and simpler than isDecompYesAndZeroCC() but does not include
* the MaybeYes which combine-forward and have ccc=0. * the MaybeYes which combine-forward and have ccc=0.
* (Standard Unicode 5.2 normalization does not have such characters.) * (Standard Unicode 10 normalization does not have such characters.)
*/ */
UBool isMostDecompYesAndZeroCC(uint16_t norm16) const { UBool isMostDecompYesAndZeroCC(uint16_t norm16) const {
return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
@ -514,7 +571,7 @@ private:
// For use with isCompYes(). // For use with isCompYes().
// Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC. // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
// static uint8_t getCCFromYes(uint16_t norm16) { // static uint8_t getCCFromYes(uint16_t norm16) {
// return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0; // return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
// } // }
uint8_t getCCFromNoNo(uint16_t norm16) const { uint8_t getCCFromNoNo(uint16_t norm16) const {
const uint16_t *mapping=getMapping(norm16); const uint16_t *mapping=getMapping(norm16);
@ -525,30 +582,47 @@ private:
} }
} }
// requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC() // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
uint8_t getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const; uint8_t getTrailCCFromCompYesAndZeroCC(uint16_t norm16) const {
if(norm16<=minYesNo) {
return 0; // yesYes and Hangul LV have ccc=tccc=0
} else {
// For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
return (uint8_t)(*getMapping(norm16)>>8); // tccc from yesNo
}
}
uint8_t getPreviousTrailCC(const UChar *start, const UChar *p) const;
uint8_t getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const;
// Requires algorithmic-NoNo. // Requires algorithmic-NoNo.
UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const { UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const {
return c+norm16-(minMaybeYes-MAX_DELTA-1); return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta;
}
UChar32 getAlgorithmicDelta(uint16_t norm16) const {
return (norm16>>DELTA_SHIFT)-centerNoNoDelta;
} }
// Requires minYesNo<norm16<limitNoNo. // Requires minYesNo<norm16<limitNoNo.
const uint16_t *getMapping(uint16_t norm16) const { return extraData+norm16; } const uint16_t *getMapping(uint16_t norm16) const { return extraData+(norm16>>OFFSET_SHIFT); }
const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const { const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const {
if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) { if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
return NULL; return NULL;
} else if(norm16<minMaybeYes) { } else if(norm16<minMaybeYes) {
return extraData+norm16; // for yesYes; if Jamo L: harmless empty list return getMapping(norm16); // for yesYes; if Jamo L: harmless empty list
} else { } else {
return maybeYesCompositions+norm16-minMaybeYes; return maybeYesCompositions+norm16-minMaybeYes;
} }
} }
const uint16_t *getCompositionsListForComposite(uint16_t norm16) const { const uint16_t *getCompositionsListForComposite(uint16_t norm16) const {
const uint16_t *list=extraData+norm16; // composite has both mapping & compositions list // A composite has both mapping & compositions list.
const uint16_t *list=getMapping(norm16);
return list+ // mapping pointer return list+ // mapping pointer
1+ // +1 to skip the first unit with the mapping lenth 1+ // +1 to skip the first unit with the mapping length
(*list&MAPPING_LENGTH_MASK); // + mapping length (*list&MAPPING_LENGTH_MASK); // + mapping length
} }
const uint16_t *getCompositionsListForMaybe(uint16_t norm16) const {
// minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES
return maybeYesCompositions+((norm16-minMaybeYes)>>OFFSET_SHIFT);
}
/** /**
* @param c code point must have compositions * @param c code point must have compositions
* @return compositions list pointer * @return compositions list pointer
@ -563,46 +637,78 @@ private:
UChar32 minNeedDataCP, UChar32 minNeedDataCP,
ReorderingBuffer *buffer, ReorderingBuffer *buffer,
UErrorCode &errorCode) const; UErrorCode &errorCode) const;
UBool decomposeShort(const UChar *src, const UChar *limit, const UChar *decomposeShort(const UChar *src, const UChar *limit,
UBool stopAtCompBoundary, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const; ReorderingBuffer &buffer, UErrorCode &errorCode) const;
UBool decompose(UChar32 c, uint16_t norm16, UBool decompose(UChar32 c, uint16_t norm16,
ReorderingBuffer &buffer, UErrorCode &errorCode) const; ReorderingBuffer &buffer, UErrorCode &errorCode) const;
const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit,
UBool stopAtCompBoundary, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
static int32_t combine(const uint16_t *list, UChar32 trail); static int32_t combine(const uint16_t *list, UChar32 trail);
void addComposites(const uint16_t *list, UnicodeSet &set) const; void addComposites(const uint16_t *list, UnicodeSet &set) const;
void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex, void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
UBool onlyContiguous) const; UBool onlyContiguous) const;
UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const; UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const {
const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p) const; return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16);
const UChar *findNextCompBoundary(const UChar *p, const UChar *limit) const; }
UBool norm16HasCompBoundaryBefore(uint16_t norm16) const {
return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16);
}
UBool hasCompBoundaryBefore(const UChar *src, const UChar *limit) const;
UBool hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const;
UBool hasCompBoundaryAfter(const UChar *start, const UChar *p,
UBool onlyContiguous) const;
UBool hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p,
UBool onlyContiguous) const;
UBool norm16HasCompBoundaryAfter(uint16_t norm16, UBool onlyContiguous) const {
return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
(!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16));
}
/** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
UBool isTrailCC01ForCompBoundaryAfter(uint16_t norm16) const {
return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
(norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : *getMapping(norm16) <= 0x1ff);
}
const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p, UBool onlyContiguous) const;
const UChar *findNextCompBoundary(const UChar *p, const UChar *limit, UBool onlyContiguous) const;
const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const; const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const;
const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const; const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;
void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
CanonIterData &newData, UErrorCode &errorCode) const;
int32_t getCanonValue(UChar32 c) const; int32_t getCanonValue(UChar32 c) const;
const UnicodeSet &getCanonStartSet(int32_t n) const; const UnicodeSet &getCanonStartSet(int32_t n) const;
// UVersionInfo dataVersion; // UVersionInfo dataVersion;
// Code point thresholds for quick check codes. // BMP code point thresholds for quick check loops looking at single UTF-16 code units.
UChar32 minDecompNoCP; UChar minDecompNoCP;
UChar32 minCompNoMaybeCP; UChar minCompNoMaybeCP;
UChar minLcccCP;
// Norm16 value thresholds for quick check combinations and types of extra data. // Norm16 value thresholds for quick check combinations and types of extra data.
uint16_t minYesNo; uint16_t minYesNo;
uint16_t minYesNoMappingsOnly; uint16_t minYesNoMappingsOnly;
uint16_t minNoNo; uint16_t minNoNo;
uint16_t minNoNoCompBoundaryBefore;
uint16_t minNoNoCompNoMaybeCC;
uint16_t minNoNoEmpty;
uint16_t limitNoNo; uint16_t limitNoNo;
uint16_t centerNoNoDelta;
uint16_t minMaybeYes; uint16_t minMaybeYes;
const UTrie2 *normTrie; const UTrie2 *normTrie;
const uint16_t *maybeYesCompositions; const uint16_t *maybeYesCompositions;
const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0 const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F
public: // CanonIterData is public to allow access from C callback functions.
UInitOnce fCanonIterDataInitOnce; UInitOnce fCanonIterDataInitOnce;
CanonIterData *fCanonIterData; CanonIterData *fCanonIterData;
}; };
@ -658,13 +764,14 @@ unorm_getFCD16(UChar32 c);
/** /**
* Format of Normalizer2 .nrm data files. * Format of Normalizer2 .nrm data files.
* Format version 2.0. * Format version 3.0.
* *
* Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms. * Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
* ICU ships with data files for standard Unicode Normalization Forms * ICU ships with data files for standard Unicode Normalization Forms
* NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm). * NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm).
* Custom (application-specific) data can be built into additional .nrm files * Custom (application-specific) data can be built into additional .nrm files
* with the gennorm2 build tool. * with the gennorm2 build tool.
* ICU ships with one such file, uts46.nrm, for the implementation of UTS #46.
* *
* Normalizer2.getInstance() causes a .nrm file to be loaded, unless it has been * Normalizer2.getInstance() causes a .nrm file to be loaded, unless it has been
* cached already. Internally, Normalizer2Impl.load() reads the .nrm file. * cached already. Internally, Normalizer2Impl.load() reads the .nrm file.
@ -695,14 +802,20 @@ unorm_getFCD16(UChar32 c);
* with a decomposition mapping, that is, with NF*D_QC=No. * with a decomposition mapping, that is, with NF*D_QC=No.
* minCompNoMaybeCP=indexes[IX_MIN_COMP_NO_MAYBE_CP] is the lowest code point * minCompNoMaybeCP=indexes[IX_MIN_COMP_NO_MAYBE_CP] is the lowest code point
* with NF*C_QC=No (has a one-way mapping) or Maybe (combines backward). * with NF*C_QC=No (has a one-way mapping) or Maybe (combines backward).
* minLcccCP=indexes[IX_MIN_LCCC_CP] (index 18, new in formatVersion 3)
* is the lowest code point with lccc!=0.
* *
* The next five indexes are thresholds of 16-bit trie values for ranges of * The next eight indexes are thresholds of 16-bit trie values for ranges of
* values indicating multiple normalization properties. * values indicating multiple normalization properties.
* They are listed here in threshold order, not in the order they are stored in the indexes.
* minYesNo=indexes[IX_MIN_YES_NO]; * minYesNo=indexes[IX_MIN_YES_NO];
* minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
* minNoNo=indexes[IX_MIN_NO_NO]; * minNoNo=indexes[IX_MIN_NO_NO];
* minNoNoCompBoundaryBefore=indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
* minNoNoCompNoMaybeCC=indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
* minNoNoEmpty=indexes[IX_MIN_NO_NO_EMPTY];
* limitNoNo=indexes[IX_LIMIT_NO_NO]; * limitNoNo=indexes[IX_LIMIT_NO_NO];
* minMaybeYes=indexes[IX_MIN_MAYBE_YES]; * minMaybeYes=indexes[IX_MIN_MAYBE_YES];
* minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
* See the normTrie description below and the design doc for details. * See the normTrie description below and the design doc for details.
* *
* UTrie2 normTrie; -- see utrie2_impl.h and utrie2.h * UTrie2 normTrie; -- see utrie2_impl.h and utrie2.h
@ -710,12 +823,14 @@ unorm_getFCD16(UChar32 c);
* The trie holds the main normalization data. Each code point is mapped to a 16-bit value. * The trie holds the main normalization data. Each code point is mapped to a 16-bit value.
* Rather than using independent bits in the value (which would require more than 16 bits), * Rather than using independent bits in the value (which would require more than 16 bits),
* information is extracted primarily via range checks. * information is extracted primarily via range checks.
* Except, format version 3 uses bit 0 for hasCompBoundaryAfter().
* For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo * For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo
* means that the character has NF*C_QC=Yes and NF*D_QC=No properties, * means that the character has NF*C_QC=Yes and NF*D_QC=No properties,
* which means it has a two-way (round-trip) decomposition mapping. * which means it has a two-way (round-trip) decomposition mapping.
* Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData * Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData
* pointing to mappings, compositions lists, or both. * pointing to mappings, compositions lists, or both.
* Value norm16==0 means that the character is normalization-inert, that is, * Value norm16==INERT (0 in versions 1 & 2, 1 in version 3)
* means that the character is normalization-inert, that is,
* it does not have a mapping, does not participate in composition, has a zero * it does not have a mapping, does not participate in composition, has a zero
* canonical combining class, and forms a boundary where text before it and after it * canonical combining class, and forms a boundary where text before it and after it
* can be normalized independently. * can be normalized independently.
@ -729,7 +844,7 @@ unorm_getFCD16(UChar32 c);
* The trie has a value for each lead surrogate code unit representing the "worst case" * The trie has a value for each lead surrogate code unit representing the "worst case"
* properties of the 1024 supplementary characters whose UTF-16 form starts with * properties of the 1024 supplementary characters whose UTF-16 form starts with
* the lead surrogate. If all of the 1024 supplementary characters are normalization-inert, * the lead surrogate. If all of the 1024 supplementary characters are normalization-inert,
* then their lead surrogate code unit has the trie value 0. * then their lead surrogate code unit has the trie value INERT.
* When the lead surrogate unit's value exceeds the quick check minimum during processing, * When the lead surrogate unit's value exceeds the quick check minimum during processing,
* the properties for the full supplementary code point need to be looked up. * the properties for the full supplementary code point need to be looked up.
* *
@ -738,6 +853,7 @@ unorm_getFCD16(UChar32 c);
* *
* There is only one byte offset for the end of these two arrays. * There is only one byte offset for the end of these two arrays.
* The split between them is given by the constant and variable mentioned above. * The split between them is given by the constant and variable mentioned above.
* In version 3, the difference must be shifted right by OFFSET_SHIFT.
* *
* The maybeYesCompositions array contains compositions lists for characters that * The maybeYesCompositions array contains compositions lists for characters that
* combine both forward (as starters in composition pairs) * combine both forward (as starters in composition pairs)
@ -754,6 +870,8 @@ unorm_getFCD16(UChar32 c);
* followed by only mappings for "NoNo" characters. * followed by only mappings for "NoNo" characters.
* (Referring to pairs of NFC/NFD quick check values.) * (Referring to pairs of NFC/NFD quick check values.)
* The norm16 values of those characters are directly indexes into the extraData array. * The norm16 values of those characters are directly indexes into the extraData array.
* In version 3, the norm16 values must be shifted right by OFFSET_SHIFT
* for accessing extraData.
* *
* The data structures for compositions lists and mappings are described in the design doc. * The data structures for compositions lists and mappings are described in the design doc.
* *
@ -784,6 +902,50 @@ unorm_getFCD16(UChar32 c);
* This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LIST flag. * This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LIST flag.
* It is needed for the new (in ICU 49) composePair(), not for other normalization. * It is needed for the new (in ICU 49) composePair(), not for other normalization.
* - Addition of the smallFCD[] bit set. * - Addition of the smallFCD[] bit set.
*
* Changes from format version 2 to format version 3 (ICU 60) ------------------
*
* - norm16 bit 0 indicates hasCompBoundaryAfter(),
* except that for contiguous composition (FCC) the tccc must be checked as well.
* Data indexes and ccc values are shifted left by one (OFFSET_SHIFT).
* Thresholds like minNoNo are tested before shifting.
*
* - Algorithmic mapping deltas are shifted left by two more bits (total DELTA_SHIFT),
* to make room for two bits (three values) indicating whether the tccc is 0, 1, or greater.
* See DELTA_TCCC_MASK etc.
* This helps with fetching tccc/FCD values and FCC hasCompBoundaryAfter().
* minMaybeYes is 8-aligned so that the DELTA_TCCC_MASK bits can be tested directly.
*
* - Algorithmic mappings are only used for mapping to "comp yes and ccc=0" characters,
* and ASCII characters are mapped algorithmically only to other ASCII characters.
* This helps with hasCompBoundaryBefore() and compose() fast paths.
* It is never necessary any more to loop for algorithmic mappings.
*
* - Addition of indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE],
* indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC], and indexes[IX_MIN_NO_NO_EMPTY],
* and separation of the noNo extraData into distinct ranges.
* With this, the noNo norm16 value indicates whether the mapping is
* compose-normalized, not normalized but hasCompBoundaryBefore(),
* not even that, or maps to an empty string.
* hasCompBoundaryBefore() can be determined solely from the norm16 value.
*
* - The norm16 value for Hangul LVT is now different from that for Hangul LV,
* so that hasCompBoundaryAfter() need not check for the syllable type.
* For Hangul LV, minYesNo continues to be used (no comp-boundary-after).
* For Hangul LVT, minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER is used.
* The extraData units at these indexes are set to firstUnit=2 and firstUnit=3, respectively,
* to simplify some code.
*
* - The extraData firstUnit bit 5 is no longer necessary
* (norm16 bit 0 used instead of firstUnit MAPPING_NO_COMP_BOUNDARY_AFTER),
* is reserved again, and always set to 0.
*
* - Addition of indexes[IX_MIN_LCCC_CP], the first code point where lccc!=0.
* This used to be hardcoded to U+0300, but in data like NFKC_Casefold it is lower:
* U+00AD Soft Hyphen maps to an empty string,
* which is artificially assigned "worst case" values lccc=1 and tccc=255.
*
* - A mapping to an empty string has explicit lccc=1 and tccc=255 values.
*/ */
#endif /* !UCONFIG_NO_NORMALIZATION */ #endif /* !UCONFIG_NO_NORMALIZATION */

File diff suppressed because it is too large Load Diff

View File

@ -101,15 +101,15 @@
# define NOIME # define NOIME
# define NOMCX # define NOMCX
# include <windows.h> # include <windows.h>
# include "unicode\uloc.h" # include "unicode/uloc.h"
#if U_PLATFORM_HAS_WINUWP_API == 0 #if U_PLATFORM_HAS_WINUWP_API == 0
# include "wintz.h" # include "wintz.h"
#else // U_PLATFORM_HAS_WINUWP_API #else // U_PLATFORM_HAS_WINUWP_API
typedef PVOID LPMSG; // TODO: figure out how to get rid of this typedef typedef PVOID LPMSG; // TODO: figure out how to get rid of this typedef
#include <Windows.Globalization.h> #include <Windows.Globalization.h>
#include <windows.system.userprofile.h> #include <windows.system.userprofile.h>
#include <wrl\wrappers\corewrappers.h> #include <wrl/wrappers/corewrappers.h>
#include <wrl\client.h> #include <wrl/client.h>
using namespace ABI::Windows::Foundation; using namespace ABI::Windows::Foundation;
using namespace Microsoft::WRL; using namespace Microsoft::WRL;
@ -675,6 +675,16 @@ extern U_IMPORT char *U_TZNAME[];
#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
/* These platforms are likely to use Olson timezone IDs. */ /* These platforms are likely to use Olson timezone IDs. */
/* common targets of the symbolic link at TZDEFAULT are:
* "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
* "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
* "/usr/share/lib/zoneinfo/<olsonID>" Solaris
* "../usr/share/lib/zoneinfo/<olsonID>" Solaris
* "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
* To avoid checking lots of paths, just check that the target path
* before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
*/
#define CHECK_LOCALTIME_LINK 1 #define CHECK_LOCALTIME_LINK 1
#if U_PLATFORM_IS_DARWIN_BASED #if U_PLATFORM_IS_DARWIN_BASED
#include <tzfile.h> #include <tzfile.h>
@ -682,12 +692,12 @@ extern U_IMPORT char *U_TZNAME[];
#elif U_PLATFORM == U_PF_SOLARIS #elif U_PLATFORM == U_PF_SOLARIS
#define TZDEFAULT "/etc/localtime" #define TZDEFAULT "/etc/localtime"
#define TZZONEINFO "/usr/share/lib/zoneinfo/" #define TZZONEINFO "/usr/share/lib/zoneinfo/"
#define TZZONEINFO2 "../usr/share/lib/zoneinfo/"
#define TZ_ENV_CHECK "localtime" #define TZ_ENV_CHECK "localtime"
#else #else
#define TZDEFAULT "/etc/localtime" #define TZDEFAULT "/etc/localtime"
#define TZZONEINFO "/usr/share/zoneinfo/" #define TZZONEINFO "/usr/share/zoneinfo/"
#endif #endif
#define TZZONEINFOTAIL "/zoneinfo/"
#if U_HAVE_DIRENT_H #if U_HAVE_DIRENT_H
#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
@ -939,30 +949,30 @@ static CharString *gSearchTZFileResult = NULL;
* This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results. * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
*/ */
static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
DIR* dirp = opendir(path); DIR* dirp = NULL;
DIR* subDirp = NULL;
struct dirent* dirEntry = NULL; struct dirent* dirEntry = NULL;
char* result = NULL; char* result = NULL;
UErrorCode status = U_ZERO_ERROR;
/* Save the current path */
CharString curpath(path, -1, status);
if (U_FAILURE(status)) {
goto cleanupAndReturn;
}
dirp = opendir(path);
if (dirp == NULL) { if (dirp == NULL) {
return result; goto cleanupAndReturn;
} }
if (gSearchTZFileResult == NULL) { if (gSearchTZFileResult == NULL) {
gSearchTZFileResult = new CharString; gSearchTZFileResult = new CharString;
if (gSearchTZFileResult == NULL) { if (gSearchTZFileResult == NULL) {
return NULL; goto cleanupAndReturn;
} }
ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
} }
/* Save the current path */
UErrorCode status = U_ZERO_ERROR;
CharString curpath(path, -1, status);
if (U_FAILURE(status)) {
return NULL;
}
/* Check each entry in the directory. */ /* Check each entry in the directory. */
while((dirEntry = readdir(dirp)) != NULL) { while((dirEntry = readdir(dirp)) != NULL) {
const char* dirName = dirEntry->d_name; const char* dirName = dirEntry->d_name;
@ -971,15 +981,16 @@ static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
CharString newpath(curpath, status); CharString newpath(curpath, status);
newpath.append(dirName, -1, status); newpath.append(dirName, -1, status);
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return NULL; break;
} }
DIR* subDirp = NULL;
if ((subDirp = opendir(newpath.data())) != NULL) { if ((subDirp = opendir(newpath.data())) != NULL) {
/* If this new path is a directory, make a recursive call with the newpath. */ /* If this new path is a directory, make a recursive call with the newpath. */
closedir(subDirp); closedir(subDirp);
newpath.append('/', status); newpath.append('/', status);
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return NULL; break;
} }
result = searchForTZFile(newpath.data(), tzInfo); result = searchForTZFile(newpath.data(), tzInfo);
/* /*
@ -1003,7 +1014,7 @@ static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
gSearchTZFileResult->clear(); gSearchTZFileResult->clear();
gSearchTZFileResult->append(zoneid, -1, status); gSearchTZFileResult->append(zoneid, -1, status);
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return NULL; break;
} }
result = gSearchTZFileResult->data(); result = gSearchTZFileResult->data();
/* Get out after the first one found. */ /* Get out after the first one found. */
@ -1012,7 +1023,11 @@ static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
} }
} }
} }
cleanupAndReturn:
if (dirp) {
closedir(dirp); closedir(dirp);
}
return result; return result;
} }
#endif #endif
@ -1045,7 +1060,7 @@ uprv_getWindowsTimeZone()
hr = timezone->GetTimeZone(timezoneString.GetAddressOf()); hr = timezone->GetTimeZone(timezoneString.GetAddressOf());
if (SUCCEEDED(hr)) if (SUCCEEDED(hr))
{ {
int32_t length = wcslen(timezoneString.GetRawBuffer(NULL)); int32_t length = static_cast<int32_t>(wcslen(timezoneString.GetRawBuffer(NULL)));
char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char)); char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char));
if (asciiId != nullptr) if (asciiId != nullptr)
{ {
@ -1064,6 +1079,7 @@ uprv_getWindowsTimeZone()
U_CAPI const char* U_EXPORT2 U_CAPI const char* U_EXPORT2
uprv_tzname(int n) uprv_tzname(int n)
{ {
(void)n; // Avoid unreferenced parameter warning.
const char *tzid = NULL; const char *tzid = NULL;
#if U_PLATFORM_USES_ONLY_WIN32_API #if U_PLATFORM_USES_ONLY_WIN32_API
#if U_PLATFORM_HAS_WINUWP_API > 0 #if U_PLATFORM_HAS_WINUWP_API > 0
@ -1125,24 +1141,15 @@ uprv_tzname(int n)
*/ */
int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1); int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
if (0 < ret) { if (0 < ret) {
int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
gTimeZoneBuffer[ret] = 0; gTimeZoneBuffer[ret] = 0;
if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
&& isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
if (tzZoneInfoTailPtr != NULL
&& isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
{ {
return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
} }
#if U_PLATFORM == U_PF_SOLARIS
else
{
tzZoneInfoLen = uprv_strlen(TZZONEINFO2);
if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0
&& isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
{
return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
}
}
#endif
} else { } else {
#if defined(SEARCH_TZFILE) #if defined(SEARCH_TZFILE)
DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
@ -1228,7 +1235,7 @@ UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
static CharString *gTimeZoneFilesDirectory = NULL; static CharString *gTimeZoneFilesDirectory = NULL;
#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
static char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */ static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
static bool gCorrectedPOSIXLocaleHeapAllocated = false; static bool gCorrectedPOSIXLocaleHeapAllocated = false;
#endif #endif
@ -1251,7 +1258,7 @@ static UBool U_CALLCONV putil_cleanup(void)
#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) { if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
uprv_free(gCorrectedPOSIXLocale); uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
gCorrectedPOSIXLocale = NULL; gCorrectedPOSIXLocale = NULL;
gCorrectedPOSIXLocaleHeapAllocated = false; gCorrectedPOSIXLocaleHeapAllocated = false;
} }
@ -1287,7 +1294,7 @@ u_setDataDirectory(const char *directory) {
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
{ {
char *p; char *p;
while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
*p = U_FILE_SEP_CHAR; *p = U_FILE_SEP_CHAR;
} }
} }
@ -1445,7 +1452,7 @@ static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
gTimeZoneFilesDirectory->append(path, status); gTimeZoneFilesDirectory->append(path, status);
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
char *p = gTimeZoneFilesDirectory->data(); char *p = gTimeZoneFilesDirectory->data();
while (p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) { while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
*p = U_FILE_SEP_CHAR; *p = U_FILE_SEP_CHAR;
} }
#endif #endif
@ -1809,6 +1816,8 @@ The leftmost codepage (.xxx) wins.
} }
// Now normalize the resulting name // Now normalize the resulting name
correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
/* TODO: Should we just exit on memory allocation failure? */
if (correctedPOSIXLocale) if (correctedPOSIXLocale)
{ {
int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
@ -2326,19 +2335,16 @@ u_getVersion(UVersionInfo versionArray) {
* icucfg.h dependent code * icucfg.h dependent code
*/ */
#if U_ENABLE_DYLOAD #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
#if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
#if HAVE_DLFCN_H #if HAVE_DLFCN_H
#ifdef __MVS__ #ifdef __MVS__
#ifndef __SUSV3 #ifndef __SUSV3
#define __SUSV3 1 #define __SUSV3 1
#endif #endif
#endif #endif
#include <dlfcn.h> #include <dlfcn.h>
#endif #endif /* HAVE_DLFCN_H */
U_INTERNAL void * U_EXPORT2 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) { uprv_dl_open(const char *libName, UErrorCode *status) {
@ -2378,38 +2384,10 @@ uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
return uret.fp; return uret.fp;
} }
#else #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
/* null (nonexistent) implementation. */ /* Windows API implementation. */
// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) {
if(U_FAILURE(*status)) return NULL;
*status = U_UNSUPPORTED_ERROR;
return NULL;
}
U_INTERNAL void U_EXPORT2
uprv_dl_close(void *lib, UErrorCode *status) {
if(U_FAILURE(*status)) return;
*status = U_UNSUPPORTED_ERROR;
return;
}
U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
if(U_SUCCESS(*status)) {
*status = U_UNSUPPORTED_ERROR;
}
return (UVoidFunction*)NULL;
}
#endif
#elif U_PLATFORM_USES_ONLY_WIN32_API
U_INTERNAL void * U_EXPORT2 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) { uprv_dl_open(const char *libName, UErrorCode *status) {
@ -2436,7 +2414,6 @@ uprv_dl_close(void *lib, UErrorCode *status) {
return; return;
} }
U_INTERNAL UVoidFunction* U_EXPORT2 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
HMODULE handle = (HMODULE)lib; HMODULE handle = (HMODULE)lib;
@ -2458,10 +2435,9 @@ uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
return addr; return addr;
} }
#else #else
/* No dynamic loading set. */ /* No dynamic loading, null (nonexistent) implementation. */
U_INTERNAL void * U_EXPORT2 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) { uprv_dl_open(const char *libName, UErrorCode *status) {
@ -2479,7 +2455,6 @@ uprv_dl_close(void *lib, UErrorCode *status) {
return; return;
} }
U_INTERNAL UVoidFunction* U_EXPORT2 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
(void)lib; (void)lib;
@ -2490,7 +2465,7 @@ uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
return (UVoidFunction*)NULL; return (UVoidFunction*)NULL;
} }
#endif /* U_ENABLE_DYLOAD */ #endif
/* /*
* Hey, Emacs, please set the following: * Hey, Emacs, please set the following:

View File

@ -72,15 +72,6 @@
typedef size_t uintptr_t; typedef size_t uintptr_t;
#endif #endif
/**
* \def U_HAVE_MSVC_2003_OR_EARLIER
* Flag for workaround of MSVC 2003 optimization bugs
* @internal
*/
#if !defined(U_HAVE_MSVC_2003_OR_EARLIER) && defined(_MSC_VER) && (_MSC_VER < 1400)
#define U_HAVE_MSVC_2003_OR_EARLIER
#endif
/*===========================================================================*/ /*===========================================================================*/
/** @{ Information about POSIX support */ /** @{ Information about POSIX support */
/*===========================================================================*/ /*===========================================================================*/
@ -120,15 +111,15 @@ typedef size_t uintptr_t;
/* Use the predefined value. */ /* Use the predefined value. */
#elif U_PLATFORM == U_PF_ANDROID #elif U_PLATFORM == U_PF_ANDROID
# define U_TIMEZONE timezone # define U_TIMEZONE timezone
#elif U_PLATFORM_IS_LINUX_BASED #elif defined(__UCLIBC__)
# if defined(__UCLIBC__) // uClibc does not have __timezone or _timezone.
/* uClibc does not have __timezone or _timezone. */
#elif defined(_NEWLIB_VERSION) #elif defined(_NEWLIB_VERSION)
# define U_TIMEZONE _timezone # define U_TIMEZONE _timezone
#elif defined(__GLIBC__) #elif defined(__GLIBC__)
/* glibc */ // glibc
# define U_TIMEZONE __timezone # define U_TIMEZONE __timezone
# endif #elif U_PLATFORM_IS_LINUX_BASED
// not defined
#elif U_PLATFORM_USES_ONLY_WIN32_API #elif U_PLATFORM_USES_ONLY_WIN32_API
# define U_TIMEZONE _timezone # define U_TIMEZONE _timezone
#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__) #elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__)
@ -214,7 +205,7 @@ typedef size_t uintptr_t;
/** /**
* \def U_HAVE_STD_ATOMICS * \def U_HAVE_STD_ATOMICS
* Defines whether the standard C++11 <atomic> is available. * Defines whether the standard C++11 <atomic> is available.
* ICU will use this when avialable, * ICU will use this when available,
* otherwise will fall back to compiler or platform specific alternatives. * otherwise will fall back to compiler or platform specific alternatives.
* @internal * @internal
*/ */
@ -239,7 +230,7 @@ typedef size_t uintptr_t;
/** /**
* \def U_HAVE_CLANG_ATOMICS * \def U_HAVE_CLANG_ATOMICS
* Defines whether Clang c11 style built-in atomics are avaialable. * Defines whether Clang c11 style built-in atomics are available.
* These are used in preference to gcc atomics when both are available. * These are used in preference to gcc atomics when both are available.
*/ */
#ifdef U_HAVE_CLANG_ATOMICS #ifdef U_HAVE_CLANG_ATOMICS
@ -277,7 +268,7 @@ typedef size_t uintptr_t;
/** /**
* Platform utilities isolates the platform dependencies of the * Platform utilities isolates the platform dependencies of the
* libarary. For each platform which this code is ported to, these * library. For each platform which this code is ported to, these
* functions may have to be re-implemented. * functions may have to be re-implemented.
*/ */
@ -425,7 +416,7 @@ U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void);
/** /**
* Please use uloc_getDefault() instead. * Please use uloc_getDefault() instead.
* Return the default locale ID string by querying ths system, or * Return the default locale ID string by querying the system, or
* zero if one cannot be found. * zero if one cannot be found.
* This function can call setlocale() on Unix platforms. Please read the * This function can call setlocale() on Unix platforms. Please read the
* platform documentation on setlocale() before calling this function. * platform documentation on setlocale() before calling this function.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,630 @@
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// file: rbbi_cache.cpp
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/ubrk.h"
#include "unicode/rbbi.h"
#include "rbbi_cache.h"
#include "brkeng.h"
#include "cmemory.h"
#include "rbbidata.h"
#include "rbbirb.h"
#include "uassert.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
/*
* DictionaryCache implementation
*/
RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
fBI(bi), fBreaks(NULL), fPositionInCache(-1),
fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
fBreaks = new UVector32(status);
}
RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
delete fBreaks;
fBreaks = NULL;
}
void RuleBasedBreakIterator::DictionaryCache::reset() {
fPositionInCache = -1;
fStart = 0;
fLimit = 0;
fFirstRuleStatusIndex = 0;
fOtherRuleStatusIndex = 0;
fBreaks->removeAllElements();
}
UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
if (fromPos >= fLimit || fromPos < fStart) {
fPositionInCache = -1;
return FALSE;
}
// Sequential iteration, move from previous boundary to the following
int32_t r = 0;
if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
++fPositionInCache;
if (fPositionInCache >= fBreaks->size()) {
fPositionInCache = -1;
return FALSE;
}
r = fBreaks->elementAti(fPositionInCache);
U_ASSERT(r > fromPos);
*result = r;
*statusIndex = fOtherRuleStatusIndex;
return TRUE;
}
// Random indexing. Linear search for the boundary following the given position.
for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) {
r= fBreaks->elementAti(fPositionInCache);
if (r > fromPos) {
*result = r;
*statusIndex = fOtherRuleStatusIndex;
return TRUE;
}
}
U_ASSERT(FALSE);
fPositionInCache = -1;
return FALSE;
}
UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
if (fromPos <= fStart || fromPos > fLimit) {
fPositionInCache = -1;
return FALSE;
}
if (fromPos == fLimit) {
fPositionInCache = fBreaks->size() - 1;
if (fPositionInCache >= 0) {
U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos);
}
}
int32_t r;
if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
--fPositionInCache;
r = fBreaks->elementAti(fPositionInCache);
U_ASSERT(r < fromPos);
*result = r;
*statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
return TRUE;
}
if (fPositionInCache == 0) {
fPositionInCache = -1;
return FALSE;
}
for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) {
r = fBreaks->elementAti(fPositionInCache);
if (r < fromPos) {
*result = r;
*statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
return TRUE;
}
}
U_ASSERT(FALSE);
fPositionInCache = -1;
return FALSE;
}
void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
int32_t firstRuleStatus, int32_t otherRuleStatus) {
if ((endPos - startPos) <= 1) {
return;
}
reset();
fFirstRuleStatusIndex = firstRuleStatus;
fOtherRuleStatusIndex = otherRuleStatus;
int32_t rangeStart = startPos;
int32_t rangeEnd = endPos;
uint16_t category;
int32_t current;
UErrorCode status = U_ZERO_ERROR;
int32_t foundBreakCount = 0;
UText *text = fBI->fText;
// Loop through the text, looking for ranges of dictionary characters.
// For each span, find the appropriate break engine, and ask it to find
// any breaks within the span.
utext_setNativeIndex(text, rangeStart);
UChar32 c = utext_current32(text);
category = UTRIE2_GET16(fBI->fData->fTrie, c);
while(U_SUCCESS(status)) {
while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
utext_next32(text); // TODO: cleaner loop structure.
c = utext_current32(text);
category = UTRIE2_GET16(fBI->fData->fTrie, c);
}
if (current >= rangeEnd) {
break;
}
// We now have a dictionary character. Get the appropriate language object
// to deal with it.
const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
// Ask the language object if there are any breaks. It will add them to the cache and
// leave the text pointer on the other side of its range, ready to search for the next one.
if (lbe != NULL) {
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks);
}
// Reload the loop variables for the next go-round
c = utext_current32(text);
category = UTRIE2_GET16(fBI->fData->fTrie, c);
}
// If we found breaks, ensure that the first and last entries are
// the original starting and ending position. And initialize the
// cache iteration position to the first entry.
// printf("foundBreakCount = %d\n", foundBreakCount);
if (foundBreakCount > 0) {
U_ASSERT(foundBreakCount == fBreaks->size());
if (startPos < fBreaks->elementAti(0)) {
// The dictionary did not place a boundary at the start of the segment of text.
// Add one now. This should not commonly happen, but it would be easy for interactions
// of the rules for dictionary segments and the break engine implementations to
// inadvertently cause it. Cover it here, just in case.
fBreaks->insertElementAt(startPos, 0, status);
}
if (endPos > fBreaks->peeki()) {
fBreaks->push(endPos, status);
}
fPositionInCache = 0;
// Note: Dictionary matching may extend beyond the original limit.
fStart = fBreaks->elementAti(0);
fLimit = fBreaks->peeki();
} else {
// there were no language-based breaks, even though the segment contained
// dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
// for this range will fail, and the calling code will fall back to the rule based boundaries.
}
}
/*
* BreakCache implemetation
*/
RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
fBI(bi), fSideBuffer(status) {
reset();
}
RuleBasedBreakIterator::BreakCache::~BreakCache() {
}
void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
fStartBufIdx = 0;
fEndBufIdx = 0;
fTextIdx = pos;
fBufIdx = 0;
fBoundaries[0] = pos;
fStatuses[0] = (uint16_t)ruleStatus;
}
int32_t RuleBasedBreakIterator::BreakCache::current() {
fBI->fPosition = fTextIdx;
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
fBI->fDone = FALSE;
return fTextIdx;
}
void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
// startPos is in the cache. Do a next() from that position.
// TODO: an awkward set of interactions with bi->fDone
// seek() does not clear it; it can't because of interactions with populateNear().
// next() does not clear it in the fast-path case, where everything matters. Maybe it should.
// So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
fBI->fDone = false;
next();
}
return;
}
void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
if (startPos == fTextIdx) {
previous(status);
} else {
// seek() leaves the BreakCache positioned at the preceding boundary
// if the requested position is between two bounaries.
// current() pushes the BreakCache position out to the BreakIterator itself.
U_ASSERT(startPos > fTextIdx);
current();
}
}
return;
}
/*
* Out-of-line code for BreakCache::next().
* Cache does not already contain the boundary
*/
void RuleBasedBreakIterator::BreakCache::nextOL() {
fBI->fDone = !populateFollowing();
fBI->fPosition = fTextIdx;
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
return;
}
void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
int32_t initialBufIdx = fBufIdx;
if (fBufIdx == fStartBufIdx) {
// At start of cache. Prepend to it.
populatePreceding(status);
} else {
// Cache already holds the next boundary
fBufIdx = modChunkSize(fBufIdx - 1);
fTextIdx = fBoundaries[fBufIdx];
}
fBI->fDone = (fBufIdx == initialBufIdx);
fBI->fPosition = fTextIdx;
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
return;
}
UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
return FALSE;
}
if (pos == fBoundaries[fStartBufIdx]) {
// Common case: seek(0), from BreakIterator::first()
fBufIdx = fStartBufIdx;
fTextIdx = fBoundaries[fBufIdx];
return TRUE;
}
if (pos == fBoundaries[fEndBufIdx]) {
fBufIdx = fEndBufIdx;
fTextIdx = fBoundaries[fBufIdx];
return TRUE;
}
int32_t min = fStartBufIdx;
int32_t max = fEndBufIdx;
while (min != max) {
int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
probe = modChunkSize(probe);
if (fBoundaries[probe] > pos) {
max = probe;
} else {
min = modChunkSize(probe + 1);
}
}
U_ASSERT(fBoundaries[max] > pos);
fBufIdx = modChunkSize(max - 1);
fTextIdx = fBoundaries[fBufIdx];
U_ASSERT(fTextIdx <= pos);
return TRUE;
}
UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
if (U_FAILURE(status)) {
return FALSE;
}
U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
// Find a boundary somewhere in the vicinity of the requested position.
// Depending on the safe rules and the text data, it could be either before, at, or after
// the requested position.
// If the requested position is not near already cached positions, clear the existing cache,
// find a near-by boundary and begin new cache contents there.
if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
int32_t aBoundary = 0;
int32_t ruleStatusIndex = 0;
// TODO: check for position == length of text. Although may still need to back up to get rule status.
if (position > 20) {
int32_t backupPos = fBI->handlePrevious(position);
fBI->fPosition = backupPos;
aBoundary = fBI->handleNext(); // Ignore dictionary, just finding a rule based boundary.
ruleStatusIndex = fBI->fRuleStatusIndex;
}
reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
}
// Fill in boundaries between existing cache content and the new requested position.
if (fBoundaries[fEndBufIdx] < position) {
// The last position in the cache precedes the requested position.
// Add following position(s) to the cache.
while (fBoundaries[fEndBufIdx] < position) {
if (!populateFollowing()) {
U_ASSERT(false);
return false;
}
}
fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer.
fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries.
while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos.
previous(status);
}
return true;
}
if (fBoundaries[fStartBufIdx] > position) {
// The first position in the cache is beyond the requested position.
// back up more until we get a boundary <= the requested position.
while (fBoundaries[fStartBufIdx] > position) {
populatePreceding(status);
}
fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer.
fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries.
while (fTextIdx < position) { // Move forwards to a position at or following the requested pos.
next();
}
if (fTextIdx > position) {
// If position is not itself a boundary, the next() loop above will overshoot.
// Back up one, leaving cache position at the boundary preceding the requested position.
previous(status);
}
return true;
}
U_ASSERT(fTextIdx == position);
return true;
}
UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
int32_t fromPosition = fBoundaries[fEndBufIdx];
int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
int32_t pos = 0;
int32_t ruleStatusIdx = 0;
if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
return TRUE;
}
fBI->fPosition = fromPosition;
pos = fBI->handleNext();
if (pos == UBRK_DONE) {
return FALSE;
}
ruleStatusIdx = fBI->fRuleStatusIndex;
if (fBI->fDictionaryCharCount > 0) {
// The text segment obtained from the rules includes dictionary characters.
// Subdivide it, with subdivided results going into the dictionary cache.
fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
return TRUE;
// TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
// But be careful with interactions with populateNear().
}
}
// Rule based segment did not include dictionary characters.
// Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
// meaning that we didn't take the return, above.
// Add its end point to the cache.
addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
// Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
// (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
//
for (int count=0; count<6; ++count) {
pos = fBI->handleNext();
if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
break;
}
addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
}
return TRUE;
}
UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
if (U_FAILURE(status)) {
return FALSE;
}
int32_t fromPosition = fBoundaries[fStartBufIdx];
if (fromPosition == 0) {
return FALSE;
}
int32_t position = 0;
int32_t positionStatusIdx = 0;
if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
addPreceding(position, positionStatusIdx, UpdateCachePosition);
return TRUE;
}
int32_t backupPosition = fromPosition;
// Find a boundary somewhere preceding the first already-cached boundary
do {
backupPosition = backupPosition - 30;
if (backupPosition <= 0) {
backupPosition = 0;
} else {
backupPosition = fBI->handlePrevious(backupPosition);
}
if (backupPosition == UBRK_DONE || backupPosition == 0) {
position = 0;
positionStatusIdx = 0;
} else {
fBI->fPosition = backupPosition; // TODO: pass starting position in a clearer way.
position = fBI->handleNext();
positionStatusIdx = fBI->fRuleStatusIndex;
}
} while (position >= fromPosition);
// Find boundaries between the one we just located and the first already-cached boundary
// Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
fSideBuffer.removeAllElements();
fSideBuffer.addElement(position, status);
fSideBuffer.addElement(positionStatusIdx, status);
do {
int32_t prevPosition = fBI->fPosition = position;
int32_t prevStatusIdx = positionStatusIdx;
position = fBI->handleNext();
positionStatusIdx = fBI->fRuleStatusIndex;
if (position == UBRK_DONE) {
break;
}
UBool segmentHandledByDictionary = FALSE;
if (fBI->fDictionaryCharCount != 0) {
// Segment from the rules includes dictionary characters.
// Subdivide it, with subdivided results going into the dictionary cache.
int32_t dictSegEndPosition = position;
fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
segmentHandledByDictionary = true;
U_ASSERT(position > prevPosition);
if (position >= fromPosition) {
break;
}
U_ASSERT(position <= dictSegEndPosition);
fSideBuffer.addElement(position, status);
fSideBuffer.addElement(positionStatusIdx, status);
prevPosition = position;
}
U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
}
if (!segmentHandledByDictionary && position < fromPosition) {
fSideBuffer.addElement(position, status);
fSideBuffer.addElement(positionStatusIdx, status);
}
} while (position < fromPosition);
// Move boundaries from the side buffer to the main circular buffer.
UBool success = FALSE;
if (!fSideBuffer.isEmpty()) {
positionStatusIdx = fSideBuffer.popi();
position = fSideBuffer.popi();
addPreceding(position, positionStatusIdx, UpdateCachePosition);
success = TRUE;
}
while (!fSideBuffer.isEmpty()) {
positionStatusIdx = fSideBuffer.popi();
position = fSideBuffer.popi();
if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
// No space in circular buffer to hold a new preceding result while
// also retaining the current cache (iteration) position.
// Bailing out is safe; the cache will refill again if needed.
break;
}
}
return success;
}
void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
U_ASSERT(position > fBoundaries[fEndBufIdx]);
U_ASSERT(ruleStatusIdx <= UINT16_MAX);
int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
if (nextIdx == fStartBufIdx) {
fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1.
}
fBoundaries[nextIdx] = position;
fStatuses[nextIdx] = ruleStatusIdx;
fEndBufIdx = nextIdx;
if (update == UpdateCachePosition) {
// Set current position to the newly added boundary.
fBufIdx = nextIdx;
fTextIdx = position;
} else {
// Retaining the original cache position.
// Check if the added boundary wraps around the buffer, and would over-write the original position.
// It's the responsibility of callers of this function to not add too many.
U_ASSERT(nextIdx != fBufIdx);
}
}
bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
U_ASSERT(position < fBoundaries[fStartBufIdx]);
U_ASSERT(ruleStatusIdx <= UINT16_MAX);
int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
if (nextIdx == fEndBufIdx) {
if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
// Failure. The insertion of the new boundary would claim the buffer position that is the
// current iteration position. And we also want to retain the current iteration position.
// (The buffer is already completely full of entries that precede the iteration position.)
return false;
}
fEndBufIdx = modChunkSize(fEndBufIdx - 1);
}
fBoundaries[nextIdx] = position;
fStatuses[nextIdx] = ruleStatusIdx;
fStartBufIdx = nextIdx;
if (update == UpdateCachePosition) {
fBufIdx = nextIdx;
fTextIdx = position;
}
return true;
}
void RuleBasedBreakIterator::BreakCache::dumpCache() {
#ifdef RBBI_DEBUG
RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx);
for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]);
if (i == fEndBufIdx) {
break;
}
}
#endif
}
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_BREAK_ITERATION

View File

@ -0,0 +1,203 @@
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// file: rbbi_cache.h
//
#ifndef RBBI_CACHE_H
#define RBBI_CACHE_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/rbbi.h"
#include "unicode/uobject.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
/* DictionaryCache stores the boundaries obtained from a run of dictionary characters.
* Dictionary boundaries are moved first to this cache, then from here
* to the main BreakCache, where they may inter-leave with non-dictionary
* boundaries. The public BreakIterator API always fetches directly
* from the main BreakCache, not from here.
*
* In common situations, the number of boundaries in a single dictionary run
* should be quite small, it will be terminated by punctuation, spaces,
* or any other non-dictionary characters. The main BreakCache may end
* up with boundaries from multiple dictionary based runs.
*
* The boundaries are stored in a simple ArrayList (vector), with the
* assumption that they will be accessed sequentially.
*/
class RuleBasedBreakIterator::DictionaryCache: public UMemory {
public:
DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status);
~DictionaryCache();
void reset();
UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
/**
* Populate the cache with the dictionary based boundaries within a region of text.
* @param startPos The start position of a range of text
* @param endPos The end position of a range of text
* @param firstRuleStatus The rule status index that applies to the break at startPos
* @param otherRuleStatus The rule status index that applies to boundaries other than startPos
* @internal
*/
void populateDictionary(int32_t startPos, int32_t endPos,
int32_t firstRuleStatus, int32_t otherRuleStatus);
RuleBasedBreakIterator *fBI;
UVector32 *fBreaks; // A vector containing the boundaries.
int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following()
// or preceding(). Optimizes sequential access.
int32_t fStart; // Text position of first boundary in cache.
int32_t fLimit; // Last boundary in cache. Which is the limit of the
// text segment being handled by the dictionary.
int32_t fFirstRuleStatusIndex; // Rule status info for first boundary.
int32_t fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries.
};
/*
* class BreakCache
*
* Cache of break boundary positions and rule status values.
* Break iterator API functions, next(), previous(), etc., will use cached results
* when possible, and otherwise cache new results as they are obtained.
*
* Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
*
* The cache is implemented as a single circular buffer.
*/
/*
* size of the circular cache buffer.
*/
class RuleBasedBreakIterator::BreakCache: public UMemory {
public:
BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status);
virtual ~BreakCache();
void reset(int32_t pos = 0, int32_t ruleStatus = 0);
void next() { if (fBufIdx == fEndBufIdx) {
nextOL();
} else {
fBufIdx = modChunkSize(fBufIdx + 1);
fTextIdx = fBI->fPosition = fBoundaries[fBufIdx];
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
}
};
void nextOL();
void previous(UErrorCode &status);
// Move the iteration state to the position following the startPosition.
// Input position must be pinned to the input length.
void following(int32_t startPosition, UErrorCode &status);
void preceding(int32_t startPosition, UErrorCode &status);
/*
* Update the state of the public BreakIterator (fBI) to reflect the
* current state of the break iterator cache (this).
*/
int32_t current();
/**
* Add boundaries to the cache near the specified position.
* The given position need not be a boundary itself.
* The input position must be within the range of the text, and
* on a code point boundary.
* If the requested position is a break boundary, leave the iteration
* position on it.
* If the requested position is not a boundary, leave the iteration
* position on the preceding boundary and include both the the
* preceding and following boundaries in the cache.
* Additional boundaries, either preceding or following, may be added
* to the cache as a side effect.
*
* Return FALSE if the operation failed.
*/
UBool populateNear(int32_t position, UErrorCode &status);
/**
* Add boundary(s) to the cache following the current last boundary.
* Return FALSE if at the end of the text, and no more boundaries can be added.
* Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
*/
UBool populateFollowing();
/**
* Add one or more boundaries to the cache preceding the first currently cached boundary.
* Leave the iteration position on the first added boundary.
* Return false if no boundaries could be added (if at the start of the text.)
*/
UBool populatePreceding(UErrorCode &status);
enum UpdatePositionValues {
RetainCachePosition = 0,
UpdateCachePosition = 1
};
/*
* Add the boundary following the current position.
* The current position can be left as it was, or changed to the newly added boundary,
* as specified by the update parameter.
*/
void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
/*
* Add the boundary preceding the current position.
* The current position can be left as it was, or changed to the newly added boundary,
* as specified by the update parameter.
*/
bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
/**
* Set the cache position to the specified position, or, if the position
* falls between to cached boundaries, to the preceding boundary.
* Fails if the requested position is outside of the range of boundaries currently held by the cache.
* The startPosition must be on a code point boundary.
*
* Return TRUE if successful, FALSE if the specified position is after
* the last cached boundary or before the first.
*/
UBool seek(int32_t startPosition);
void dumpCache();
private:
static inline int32_t modChunkSize(int index) { return index & (CACHE_SIZE - 1); };
static constexpr int32_t CACHE_SIZE = 128;
static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");
RuleBasedBreakIterator *fBI;
int32_t fStartBufIdx;
int32_t fEndBufIdx; // inclusive
int32_t fTextIdx;
int32_t fBufIdx;
int32_t fBoundaries[CACHE_SIZE];
uint16_t fStatuses[CACHE_SIZE];
UVector32 fSideBuffer;
};
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_BREAK_ITERATION
#endif // RBBI_CACHE_H

0
deps/icu-small/source/common/rbbicst.pl vendored Executable file → Normal file
View File

View File

@ -14,7 +14,7 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "rbbidata.h" #include "rbbidata.h"
#include "rbbirb.h" #include "rbbirb.h"
#include "utrie.h" #include "utrie2.h"
#include "udatamem.h" #include "udatamem.h"
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"
@ -23,23 +23,6 @@
#include "uassert.h" #include "uassert.h"
//-----------------------------------------------------------------------------------
//
// Trie access folding function. Copied as-is from properties code in uchar.c
//
//-----------------------------------------------------------------------------------
U_CDECL_BEGIN
static int32_t U_CALLCONV
getFoldingOffset(uint32_t data) {
/* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */
if(data&0x8000) {
return (int32_t)(data&0x7fff);
} else {
return 0;
}
}
U_CDECL_END
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
@ -71,9 +54,8 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk " dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk "
dh->info.dataFormat[1] == 0x72 && dh->info.dataFormat[1] == 0x72 &&
dh->info.dataFormat[2] == 0x6b && dh->info.dataFormat[2] == 0x6b &&
dh->info.dataFormat[3] == 0x20) dh->info.dataFormat[3] == 0x20 &&
// Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is isDataVersionAcceptable(dh->info.formatVersion))
// validated when checking that.
) { ) {
status = U_INVALID_FORMAT_ERROR; status = U_INVALID_FORMAT_ERROR;
return; return;
@ -84,6 +66,11 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
fUDataMem = udm; fUDataMem = udm;
} }
UBool RBBIDataWrapper::isDataVersionAcceptable(const UVersionInfo version) {
return RBBI_DATA_FORMAT_VERSION[0] == version[0];
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// //
// init(). Does most of the work of construction, shared between the // init(). Does most of the work of construction, shared between the
@ -98,6 +85,7 @@ void RBBIDataWrapper::init0() {
fSafeRevTable = NULL; fSafeRevTable = NULL;
fRuleSource = NULL; fRuleSource = NULL;
fRuleStatusTable = NULL; fRuleStatusTable = NULL;
fTrie = NULL;
fUDataMem = NULL; fUDataMem = NULL;
fRefCount = 0; fRefCount = 0;
fDontFreeData = TRUE; fDontFreeData = TRUE;
@ -108,8 +96,7 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
return; return;
} }
fHeader = data; fHeader = data;
if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3) if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) {
{
status = U_INVALID_FORMAT_ERROR; status = U_INVALID_FORMAT_ERROR;
return; return;
} }
@ -131,16 +118,23 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable); fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
} }
// Rule Compatibility Hacks
// If a rule set includes reverse rules but does not explicitly include safe reverse rules,
// the reverse rules are to be treated as safe reverse rules.
utrie_unserialize(&fTrie, if (fSafeRevTable == NULL && fReverseTable != NULL) {
fSafeRevTable = fReverseTable;
fReverseTable = NULL;
}
fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
(uint8_t *)data + fHeader->fTrie, (uint8_t *)data + fHeader->fTrie,
fHeader->fTrieLen, fHeader->fTrieLen,
NULL, // *actual length
&status); &status);
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return; return;
} }
fTrie.getFoldingOffset=getFoldingOffset;
fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource); fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource);
fRuleString.setTo(TRUE, fRuleSource, -1); fRuleString.setTo(TRUE, fRuleSource, -1);
@ -165,6 +159,8 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
RBBIDataWrapper::~RBBIDataWrapper() { RBBIDataWrapper::~RBBIDataWrapper() {
U_ASSERT(fRefCount == 0); U_ASSERT(fRefCount == 0);
utrie2_close(fTrie);
fTrie = NULL;
if (fUDataMem) { if (fUDataMem) {
udata_close(fUDataMem); udata_close(fUDataMem);
} else if (!fDontFreeData) { } else if (!fDontFreeData) {
@ -323,7 +319,7 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
pInfo->dataFormat[1]==0x72 && pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6b && pInfo->dataFormat[2]==0x6b &&
pInfo->dataFormat[3]==0x20 && pInfo->dataFormat[3]==0x20 &&
pInfo->formatVersion[0]==3 )) { RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) {
udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n", udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->dataFormat[2], pInfo->dataFormat[3],
@ -344,17 +340,11 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
// //
// Get the RRBI Data Header, and check that it appears to be OK. // Get the RRBI Data Header, and check that it appears to be OK.
// //
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
// an int32_t with a value of 1. Starting with ICU 3.4,
// RBBI's fDataFormat matches the dataFormat field from the
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
//
const uint8_t *inBytes =(const uint8_t *)inData+headerSize; const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes; RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 || if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
rbbiDH->fFormatVersion[0] != 3 || !RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) ||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) {
{
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n"); udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
*status=U_UNSUPPORTED_ERROR; *status=U_UNSUPPORTED_ERROR;
return 0; return 0;
@ -451,7 +441,7 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
} }
// Trie table for character categories // Trie table for character categories
utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen), utrie2_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
outBytes+ds->readUInt32(rbbiDH->fTrie), status); outBytes+ds->readUInt32(rbbiDH->fTrie), status);
// Source Rules Text. It's UChar data // Source Rules Text. It's UChar data

View File

@ -51,22 +51,23 @@ ubrk_swap(const UDataSwapper *ds,
#include "unicode/uobject.h" #include "unicode/uobject.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/uversion.h"
#include "umutex.h" #include "umutex.h"
#include "utrie.h" #include "utrie2.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
// The current RBBI data format version.
static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {4, 0, 0, 0};
/* /*
* The following structs map exactly onto the raw data from ICU common data file. * The following structs map exactly onto the raw data from ICU common data file.
*/ */
struct RBBIDataHeader { struct RBBIDataHeader {
uint32_t fMagic; /* == 0xbla0 */ uint32_t fMagic; /* == 0xbla0 */
uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */ UVersionInfo fFormatVersion; /* Data Format. Same as the value in struct UDataInfo */
/* if there is one associated with this data. */ /* if there is one associated with this data. */
/* (version originates in rbbi, is copied to UDataInfo) */ /* (version originates in rbbi, is copied to UDataInfo) */
/* For ICU 3.2 and earlier, this field was */
/* uint32_t fVersion */
/* with a value of 1. */
uint32_t fLength; /* Total length in bytes of this RBBI Data, */ uint32_t fLength; /* Total length in bytes of this RBBI Data, */
/* including all sections, not just the header. */ /* including all sections, not just the header. */
uint32_t fCatCount; /* Number of character categories. */ uint32_t fCatCount; /* Number of character categories. */
@ -152,6 +153,8 @@ public:
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
~RBBIDataWrapper(); ~RBBIDataWrapper();
static UBool isDataVersionAcceptable(const UVersionInfo version);
void init0(); void init0();
void init(const RBBIDataHeader *data, UErrorCode &status); void init(const RBBIDataHeader *data, UErrorCode &status);
RBBIDataWrapper *addReference(); RBBIDataWrapper *addReference();
@ -181,7 +184,7 @@ public:
/* number of int32_t values in the rule status table. Used to sanity check indexing */ /* number of int32_t values in the rule status table. Used to sanity check indexing */
int32_t fStatusMaxIdx; int32_t fStatusMaxIdx;
UTrie fTrie; UTrie2 *fTrie;
private: private:
u_atomic_int32_t fRefCount; u_atomic_int32_t fRefCount;

View File

@ -24,16 +24,16 @@
#include "unicode/uchriter.h" #include "unicode/uchriter.h"
#include "unicode/parsepos.h" #include "unicode/parsepos.h"
#include "unicode/parseerr.h" #include "unicode/parseerr.h"
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"
#include "rbbirb.h" #include "rbbirb.h"
#include "rbbinode.h" #include "rbbinode.h"
#include "rbbiscan.h" #include "rbbiscan.h"
#include "rbbisetb.h" #include "rbbisetb.h"
#include "rbbitblb.h" #include "rbbitblb.h"
#include "rbbidata.h" #include "rbbidata.h"
#include "uassert.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
@ -164,8 +164,13 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)); int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar)); int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
int32_t totalSize = headerSize + forwardTableSize + reverseTableSize (void)safeFwdTableSize;
+ safeFwdTableSize + safeRevTableSize
int32_t totalSize = headerSize
+ forwardTableSize
+ /* reverseTableSize */ 0
+ /* safeFwdTableSize */ 0
+ (safeRevTableSize ? safeRevTableSize : reverseTableSize)
+ statusTableSize + trieSize + rulesSize; + statusTableSize + trieSize + rulesSize;
RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize); RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
@ -177,23 +182,45 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
data->fMagic = 0xb1a0; data->fMagic = 0xb1a0;
data->fFormatVersion[0] = 3; data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
data->fFormatVersion[1] = 1; data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
data->fFormatVersion[2] = 0; data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
data->fFormatVersion[3] = 0; data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
data->fLength = totalSize; data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories(); data->fCatCount = fSetBuilder->getNumCharCategories();
// Only save the forward table and the safe reverse table,
// because these are the only ones used at run-time.
//
// For the moment, we still build the other tables if they are present in the rule source files,
// for backwards compatibility. Old rule files need to work, and this is the simplest approach.
//
// Additional backwards compatibility consideration: if no safe rules are provided, consider the
// reverse rules to actually be the safe reverse rules.
data->fFTable = headerSize; data->fFTable = headerSize;
data->fFTableLen = forwardTableSize; data->fFTableLen = forwardTableSize;
data->fRTable = data->fFTable + forwardTableSize;
data->fRTableLen = reverseTableSize;
data->fSFTable = data->fRTable + reverseTableSize;
data->fSFTableLen = safeFwdTableSize;
data->fSRTable = data->fSFTable + safeFwdTableSize;
data->fSRTableLen = safeRevTableSize;
data->fTrie = data->fSRTable + safeRevTableSize; // Do not save Reverse Table.
data->fRTable = data->fFTable + forwardTableSize;
data->fRTableLen = 0;
// Do not save the Safe Forward table.
data->fSFTable = data->fRTable + 0;
data->fSFTableLen = 0;
data->fSRTable = data->fSFTable + 0;
if (safeRevTableSize > 0) {
data->fSRTableLen = safeRevTableSize;
} else if (reverseTableSize > 0) {
data->fSRTableLen = reverseTableSize;
} else {
U_ASSERT(FALSE); // Rule build should have failed for lack of a reverse table
// before reaching this point.
}
data->fTrie = data->fSRTable + data->fSRTableLen;
data->fTrieLen = fSetBuilder->getTrieSize(); data->fTrieLen = fSetBuilder->getTrieSize();
data->fStatusTable = data->fTrie + trieSize; data->fStatusTable = data->fTrie + trieSize;
data->fStatusTableLen= statusTableSize; data->fStatusTableLen= statusTableSize;
@ -203,9 +230,14 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
fForwardTables->exportTable((uint8_t *)data + data->fFTable); fForwardTables->exportTable((uint8_t *)data + data->fFTable);
fReverseTables->exportTable((uint8_t *)data + data->fRTable); // fReverseTables->exportTable((uint8_t *)data + data->fRTable);
fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable); // fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
if (safeRevTableSize > 0) {
fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable); fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
} else {
fReverseTables->exportTable((uint8_t *)data + data->fSRTable);
}
fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie); fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable); int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);

View File

@ -15,6 +15,9 @@
#define RBBIRB_H #define RBBIRB_H
#include "unicode/utypes.h" #include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uobject.h" #include "unicode/uobject.h"
#include "unicode/rbbi.h" #include "unicode/rbbi.h"
#include "unicode/uniset.h" #include "unicode/uniset.h"
@ -207,4 +210,7 @@ struct RBBISetTableEl {
#endif #endif
U_NAMESPACE_END U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif #endif

View File

@ -47,6 +47,7 @@
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
static const UChar gRuleSet_rule_char_pattern[] = { static const UChar gRuleSet_rule_char_pattern[] = {
// Characters that may appear as literals in patterns without escaping or quoting.
// [ ^ [ \ p { Z } \ u 0 0 2 0 // [ ^ [ \ p { Z } \ u 0 0 2 0
0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30, 0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30,
// - \ u 0 0 7 f ] - [ \ p // - \ u 0 0 7 f ] - [ \ p
@ -558,6 +559,10 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
fRB->fDefaultTree = &fRB->fSafeRevTree; fRB->fDefaultTree = &fRB->fSafeRevTree;
} else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) { } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) {
fRB->fLookAheadHardBreak = TRUE; fRB->fLookAheadHardBreak = TRUE;
} else if (opt == UNICODE_STRING("quoted_literals_only", 20)) {
fRuleSets[kRuleSet_rule_char-128].clear();
} else if (opt == UNICODE_STRING("unquoted_literals", 17)) {
fRuleSets[kRuleSet_rule_char-128].applyPattern(UnicodeString(gRuleSet_rule_char_pattern), *fRB->fStatus);
} else { } else {
error(U_BRK_UNRECOGNIZED_OPTION); error(U_BRK_UNRECOGNIZED_OPTION);
} }

View File

@ -35,7 +35,7 @@
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uniset.h" #include "unicode/uniset.h"
#include "utrie.h" #include "utrie2.h"
#include "uvector.h" #include "uvector.h"
#include "uassert.h" #include "uassert.h"
#include "cmemory.h" #include "cmemory.h"
@ -44,43 +44,6 @@
#include "rbbisetb.h" #include "rbbisetb.h"
#include "rbbinode.h" #include "rbbinode.h"
//------------------------------------------------------------------------
//
// getFoldedRBBIValue Call-back function used during building of Trie table.
// Folding value: just store the offset (16 bits)
// if there is any non-0 entry.
// (It'd really be nice if the Trie builder would provide a
// simple default, so this function could go away from here.)
//
//------------------------------------------------------------------------
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
U_CDECL_BEGIN
static uint32_t U_CALLCONV
getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) {
uint32_t value;
UChar32 limit;
UBool inBlockZero;
limit=start+0x400;
while(start<limit) {
value=utrie_get32(trie, start, &inBlockZero);
if(inBlockZero) {
start+=UTRIE_DATA_BLOCK_LENGTH;
} else if(value!=0) {
return (uint32_t)(offset|0x8000);
} else {
++start;
}
}
return 0;
}
U_CDECL_END
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
//------------------------------------------------------------------------ //------------------------------------------------------------------------
@ -116,7 +79,7 @@ RBBISetBuilder::~RBBISetBuilder()
delete r; delete r;
} }
utrie_close(fTrie); utrie2_close(fTrie);
} }
@ -287,33 +250,38 @@ void RBBISetBuilder::build() {
// Build the Trie table for mapping UChar32 values to the corresponding // Build the Trie table for mapping UChar32 values to the corresponding
// range group number // range group number
// //
fTrie = utrie_open(NULL, // Pre-existing trie to be filled in fTrie = utrie2_open(0, // Initial value for all code points.
NULL, // Data array (utrie will allocate one) 0, // Error value for out-of-range input.
100000, // Max Data Length fStatus);
0, // Initial value for all code points
0, // Lead surrogate unit value
TRUE); // Keep Latin 1 in separately
for (rlRange = fRangeList; rlRange!=0 && U_SUCCESS(*fStatus); rlRange=rlRange->fNext) {
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { utrie2_setRange32(fTrie,
utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE); rlRange->fStartChar, // Range start
rlRange->fEndChar, // Range end (inclusive)
rlRange->fNum, // value for range
TRUE, // Overwrite previously written values
fStatus);
} }
} }
//----------------------------------------------------------------------------------- //-----------------------------------------------------------------------------------
// //
// getTrieSize() Return the size that will be required to serialize the Trie. // getTrieSize() Return the size that will be required to serialize the Trie.
// //
//----------------------------------------------------------------------------------- //-----------------------------------------------------------------------------------
int32_t RBBISetBuilder::getTrieSize() /*const*/ { int32_t RBBISetBuilder::getTrieSize() {
fTrieSize = utrie_serialize(fTrie, if (U_FAILURE(*fStatus)) {
return 0;
}
utrie2_freeze(fTrie, UTRIE2_16_VALUE_BITS, fStatus);
fTrieSize = utrie2_serialize(fTrie,
NULL, // Buffer NULL, // Buffer
0, // Capacity 0, // Capacity
getFoldedRBBIValue,
TRUE, // Reduce to 16 bits
fStatus); fStatus);
if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
*fStatus = U_ZERO_ERROR;
}
// RBBIDebugPrintf("Trie table size is %d\n", trieSize); // RBBIDebugPrintf("Trie table size is %d\n", trieSize);
return fTrieSize; return fTrieSize;
} }
@ -327,11 +295,9 @@ int32_t RBBISetBuilder::getTrieSize() /*const*/ {
// //
//----------------------------------------------------------------------------------- //-----------------------------------------------------------------------------------
void RBBISetBuilder::serializeTrie(uint8_t *where) { void RBBISetBuilder::serializeTrie(uint8_t *where) {
utrie_serialize(fTrie, utrie2_serialize(fTrie,
where, // Buffer where, // Buffer
fTrieSize, // Capacity fTrieSize, // Capacity
getFoldedRBBIValue,
TRUE, // Reduce to 16 bits
fStatus); fStatus);
} }

View File

@ -13,12 +13,14 @@
#define RBBISETB_H #define RBBISETB_H
#include "unicode/utypes.h" #include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uobject.h" #include "unicode/uobject.h"
#include "rbbirb.h" #include "rbbirb.h"
#include "utrie2.h"
#include "uvector.h" #include "uvector.h"
struct UNewTrie;
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
// //
@ -109,7 +111,7 @@ private:
RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
UNewTrie *fTrie; // The mapping TRIE that is the end result of processing UTrie2 *fTrie; // The mapping TRIE that is the end result of processing
uint32_t fTrieSize; // the Unicode Sets. uint32_t fTrieSize; // the Unicode Sets.
// Groups correspond to character categories - // Groups correspond to character categories -
@ -129,4 +131,7 @@ private:
U_NAMESPACE_END U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif #endif

View File

@ -11,37 +11,37 @@
#ifdef INCLUDED_FROM_UBIDI_PROPS_C #ifdef INCLUDED_FROM_UBIDI_PROPS_C
static const UVersionInfo ubidi_props_dataVersion={9,0,0,0}; static const UVersionInfo ubidi_props_dataVersion={0xa,0,0,0};
static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6060,0x5ce8,0x1a,0x620,0x8c0,0x10ac0,0x10af0,0,0,0,0,0,0,0,0x5802b6}; static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6028,0x5cb0,0x1a,0x620,0x8c0,0x10ac0,0x10af0,0,0,0,0,0,0,0,0x6302b6};
static const uint16_t ubidi_props_trieIndex[11884]={ static const uint16_t ubidi_props_trieIndex[11856]={
0x36a,0x372,0x37a,0x382,0x39a,0x3a2,0x3aa,0x3b2,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x38a,0x392, 0x36a,0x372,0x37a,0x382,0x39a,0x3a2,0x3aa,0x3b2,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x38a,0x392,
0x38a,0x392,0x38a,0x392,0x3b8,0x3c0,0x3c8,0x3d0,0x3d8,0x3e0,0x3dc,0x3e4,0x3ec,0x3f4,0x3ef,0x3f7, 0x38a,0x392,0x38a,0x392,0x3b8,0x3c0,0x3c8,0x3d0,0x3d8,0x3e0,0x3dc,0x3e4,0x3ec,0x3f4,0x3ef,0x3f7,
0x38a,0x392,0x38a,0x392,0x3ff,0x407,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x40d,0x415,0x41d,0x425, 0x38a,0x392,0x38a,0x392,0x3ff,0x407,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x40d,0x415,0x41d,0x425,
0x42d,0x435,0x43d,0x445,0x44b,0x453,0x45b,0x463,0x46b,0x473,0x479,0x481,0x489,0x491,0x499,0x4a1, 0x42d,0x435,0x43d,0x445,0x44b,0x453,0x45b,0x463,0x46b,0x473,0x479,0x481,0x489,0x491,0x499,0x4a1,
0x4ad,0x4a9,0x4b5,0x4bd,0x41f,0x4cd,0x4d5,0x4c5,0x4dd,0x4df,0x4e7,0x4ef,0x4f7,0x4f8,0x500,0x508, 0x4ad,0x4a9,0x4b5,0x4bd,0x41f,0x4cd,0x4d5,0x4c5,0x4dd,0x4df,0x4e7,0x4ef,0x4f7,0x4f8,0x500,0x508,
0x510,0x4f8,0x518,0x51d,0x510,0x4f8,0x525,0x52d,0x4f7,0x535,0x53d,0x4ef,0x542,0x38a,0x54a,0x54e, 0x510,0x4f8,0x518,0x51d,0x510,0x4f8,0x525,0x52d,0x4f7,0x535,0x53d,0x4ef,0x542,0x38a,0x54a,0x54e,
0x556,0x557,0x55f,0x567,0x4f7,0x56f,0x577,0x4ef,0x57f,0x581,0x500,0x4ef,0x38a,0x38a,0x589,0x38a, 0x556,0x557,0x55f,0x567,0x4f7,0x56f,0x577,0x4ef,0x401,0x57b,0x500,0x4ef,0x38a,0x38a,0x583,0x38a,
0x38a,0x58f,0x597,0x38a,0x38a,0x59b,0x5a3,0x38a,0x5a7,0x5ae,0x38a,0x5b6,0x5be,0x5c5,0x541,0x38a, 0x38a,0x589,0x591,0x38a,0x38a,0x595,0x59d,0x38a,0x5a1,0x5a8,0x38a,0x5b0,0x5b8,0x5bf,0x541,0x38a,
0x38a,0x5cd,0x5d5,0x5dd,0x5e5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x5c7,0x5cf,0x5d7,0x5df,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x5ed,0x38a,0x5f5,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x5e7,0x38a,0x5ef,0x38a,0x38a,0x38a,
0x5fd,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x5f7,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x605,0x38a,0x38a,0x38a,0x60d,0x60d,0x504,0x504,0x38a,0x613,0x61b,0x5f5, 0x38a,0x38a,0x38a,0x38a,0x5ff,0x38a,0x38a,0x38a,0x607,0x607,0x504,0x504,0x38a,0x60d,0x615,0x5ef,
0x631,0x623,0x623,0x639,0x640,0x629,0x38a,0x38a,0x38a,0x648,0x650,0x38a,0x38a,0x38a,0x652,0x65a, 0x62b,0x61d,0x61d,0x633,0x63a,0x623,0x38a,0x38a,0x38a,0x642,0x64a,0x38a,0x38a,0x38a,0x64c,0x654,
0x662,0x38a,0x669,0x671,0x38a,0x679,0x38a,0x38a,0x681,0x684,0x542,0x68c,0x401,0x694,0x38a,0x69b, 0x65c,0x38a,0x663,0x66b,0x38a,0x673,0x38a,0x38a,0x534,0x67b,0x542,0x683,0x401,0x68b,0x38a,0x692,
0x38a,0x6a0,0x38a,0x38a,0x38a,0x38a,0x6a6,0x6ae,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0x6b6, 0x38a,0x697,0x38a,0x38a,0x38a,0x38a,0x69d,0x6a5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0x6ad,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x6be,0x6c6,0x6ca, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x6b5,0x6bd,0x6c1,
0x6e2,0x6e8,0x6d2,0x6da,0x6f0,0x6f8,0x6fc,0x5c8,0x704,0x70c,0x714,0x38a,0x71c,0x65a,0x65a,0x65a, 0x6d9,0x6df,0x6c9,0x6d1,0x6e7,0x6ef,0x6f3,0x5c2,0x6fb,0x703,0x70b,0x38a,0x713,0x654,0x654,0x654,
0x72c,0x734,0x73c,0x744,0x749,0x751,0x759,0x724,0x761,0x769,0x38a,0x76f,0x776,0x65a,0x65a,0x65a, 0x723,0x72b,0x733,0x73b,0x740,0x748,0x750,0x71b,0x758,0x760,0x38a,0x766,0x76d,0x654,0x654,0x654,
0x65a,0x56d,0x77c,0x65a,0x784,0x38a,0x38a,0x657,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a, 0x654,0x56d,0x773,0x654,0x77b,0x38a,0x38a,0x651,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,
0x65a,0x65a,0x65a,0x65a,0x65a,0x78c,0x65a,0x65a,0x65a,0x65a,0x65a,0x792,0x65a,0x65a,0x79a,0x7a2, 0x654,0x654,0x654,0x654,0x654,0x783,0x654,0x654,0x654,0x654,0x654,0x789,0x654,0x654,0x791,0x799,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x65a,0x65a,0x65a,0x7b2,0x7b9,0x7c1,0x7aa, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,0x654,0x654,0x654,0x7a9,0x7b0,0x7b8,0x7a1,
0x7d1,0x7d9,0x7e1,0x7e8,0x7f0,0x7f8,0x7ff,0x7c9,0x65a,0x65a,0x65a,0x807,0x80d,0x813,0x81b,0x820, 0x7c8,0x7d0,0x7d8,0x7df,0x7e7,0x7ef,0x7f6,0x7c0,0x654,0x654,0x654,0x7fe,0x804,0x80a,0x812,0x817,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x827,0x38a,0x38a,0x38a,0x82f,0x38a,0x38a,0x38a,0x3d8, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x81e,0x38a,0x38a,0x38a,0x826,0x38a,0x38a,0x38a,0x3d8,
0x837,0x83f,0x76c,0x38a,0x842,0x65a,0x65a,0x65d,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x849,0x84f, 0x82e,0x836,0x763,0x38a,0x839,0x654,0x654,0x657,0x654,0x654,0x654,0x654,0x654,0x654,0x840,0x846,
0x85f,0x857,0x38a,0x38a,0x867,0x5fd,0x38a,0x3b1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x82e, 0x856,0x84e,0x38a,0x38a,0x85e,0x5f7,0x38a,0x3b1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,0x825,
0x3bf,0x38a,0x86f,0x877,0x38a,0x87f,0x820,0x38a,0x38a,0x38a,0x38a,0x887,0x38a,0x38a,0x652,0x3b0, 0x3bf,0x38a,0x866,0x86e,0x38a,0x876,0x817,0x38a,0x38a,0x38a,0x38a,0x87e,0x38a,0x38a,0x64c,0x3b0,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
@ -54,7 +54,7 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x65a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,0x654,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
@ -98,10 +98,10 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x86f,0x65a,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x866,0x654,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x88e,0x38a,0x38a,0x893,0x557,0x38a,0x38a,0x5a9,0x65a,0x651,0x38a,0x38a,0x89b,0x38a,0x38a,0x38a, 0x885,0x38a,0x38a,0x88a,0x557,0x38a,0x38a,0x5a3,0x654,0x64b,0x38a,0x38a,0x892,0x38a,0x38a,0x38a,
0x8a3,0x8aa,0x623,0x8b2,0x38a,0x38a,0x8b9,0x8c1,0x38a,0x8c8,0x8cf,0x38a,0x4dd,0x8d4,0x38a,0x4f6, 0x89a,0x8a1,0x61d,0x8a9,0x38a,0x38a,0x579,0x8b1,0x38a,0x8b8,0x8bf,0x38a,0x4dd,0x8c4,0x38a,0x4f6,
0x38a,0x8dc,0x8e4,0x4f8,0x38a,0x8e8,0x4f7,0x8f0,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8f7, 0x38a,0x8cc,0x8d4,0x4f8,0x38a,0x8d8,0x4f7,0x8e0,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8e7,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
@ -141,9 +141,9 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x90b,0x8ff,0x903,0x489,0x489,0x489,0x489,0x489, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8fb,0x8ef,0x8f3,0x489,0x489,0x489,0x489,0x489,
0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x913,0x489,0x489,0x489,0x489,0x91b,0x91f, 0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x903,0x489,0x489,0x489,0x489,0x90b,0x90f,
0x927,0x92f,0x933,0x93b,0x489,0x489,0x489,0x93f,0x947,0x37a,0x94f,0x957,0x38a,0x38a,0x38a,0x95f, 0x917,0x91f,0x923,0x92b,0x489,0x489,0x489,0x92f,0x937,0x37a,0x93f,0x947,0x38a,0x38a,0x38a,0x94f,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0xe28,0xe28,0xe68,0xea8,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xee0,0xf20,0xf60,0xf70,0xfb0,0xfbc, 0xe28,0xe28,0xe68,0xea8,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xee0,0xf20,0xf60,0xf70,0xfb0,0xfbc,
@ -180,61 +180,61 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17,
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x967,0x38a,0x65a,0x65a,0x96f,0x5fd,0x38a,0x4f0, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x957,0x38a,0x654,0x654,0x95f,0x5f7,0x38a,0x4f0,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x977,0x38a,0x38a,0x38a,0x97e,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x967,0x38a,0x38a,0x38a,0x96e,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x986,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x976,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x98e,0x992,0x41f,0x41f,0x41f,0x41f,0x9a2,0x99a,0x41f,0x9aa,0x41f,0x41f,0x9b2,0x9b8,0x41f,0x41f, 0x97e,0x982,0x41f,0x41f,0x41f,0x41f,0x992,0x98a,0x41f,0x99a,0x41f,0x41f,0x9a2,0x9a8,0x41f,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x41f,0x41f,0x41f,0x9c0,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0x41f,0x41f,0x41f,0x9b0,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x4f7,0x8bb,0x9c8,0x9cf,0x401,0x9d2,0x38a,0x38a,0x4dd,0x9da,0x38a,0x9e0,0x401,0x9e5,0x60f,0x38a, 0x4f7,0x9b8,0x9bf,0x9c6,0x401,0x9c9,0x38a,0x38a,0x4dd,0x9d1,0x38a,0x9d7,0x401,0x9dc,0x609,0x38a,
0x38a,0x9ed,0x38a,0x38a,0x38a,0x38a,0x82f,0x9f5,0x401,0x4f8,0x556,0x9fc,0x38a,0x38a,0x38a,0x38a, 0x38a,0x9e4,0x38a,0x38a,0x38a,0x38a,0x826,0x9ec,0x401,0x4f8,0x556,0x9f3,0x38a,0x38a,0x38a,0x38a,
0x38a,0x8bb,0xa04,0x38a,0x38a,0xa08,0xa10,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa14,0xa1c,0x38a, 0x38a,0x9b8,0x9fb,0x38a,0x38a,0x9ff,0xa07,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa0b,0xa13,0x38a,
0x38a,0xa24,0x556,0xa2c,0x38a,0xa32,0x38a,0x38a,0x5ed,0xa3a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0xa1b,0x556,0xa23,0x38a,0xa29,0x38a,0x38a,0x5e7,0xa31,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa42,0xa46,0xa4e,0x38a,0xa55,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa39,0xa3d,0xa45,0x38a,0xa4c,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa5c,0x38a,0x38a,0xa64,0xa6a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa53,0x38a,0x38a,0xa61,0xa5b,
0x38a,0x38a,0x38a,0xa70,0xa78,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0xa69,0xa71,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa7c,0x38a,0xa82,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa75,0x38a,0xa7b,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0xa88,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0xa81,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x511,0xa90,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x511,0xa89,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0xa97,0xa9f,0xaa5,0x38a,0x38a,0x65a,0x65a,0xaad,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a, 0x38a,0x38a,0xa90,0xa98,0xa9e,0x38a,0x38a,0x654,0x654,0xaa6,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,
0x65a,0xab5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x654,0xaae,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xabb,0x38a,0xac2, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xab4,0x38a,0xabb,
0x38a,0xabe,0x38a,0xac5,0x38a,0xacd,0xad1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0xab7,0x38a,0xabe,0x38a,0xac6,0xaca,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0xad9,0x3d8,0xae0,0xae7,0xaef,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0xad2,0x3d8,0xad9,0xae0,0xae8,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xaf7,0xaff,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xaf0,0xaf8,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0xb07,0x41f,0xb0f, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0xb00,0x41f,0xb08,
0xb0f,0xb16,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0xb08,0xb0f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0xb1e,0x41f, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0xb17,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x65a,0xb26,0x65a,0x65a,0x65d,0xb2b,0xb2f,0x849,0xb37, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x654,0xb1f,0x654,0x654,0x657,0xb24,0xb28,0x840,0xb30,
0x38a,0x38a,0xb3d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x76d,0x38a,0x38a,0x38a,0x38a,0x65a, 0x38a,0x38a,0xb36,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x764,0x38a,0x38a,0x38a,0x38a,0x654,
0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a, 0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,
0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0xb45,0xb4d,0x65a, 0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0xb3e,0xb46,0x654,
0x65a,0x65a,0x65d,0x65a,0x65a,0xb45,0x38a,0xb26,0x65a,0xb55,0x65a,0xb5d,0x84b,0x38a,0x38a,0xb26, 0x654,0x654,0x657,0x654,0x654,0xb3e,0x38a,0xb1f,0x654,0xb4e,0x654,0xb56,0x842,0x38a,0x38a,0xb1f,
0xb61,0xb69,0x65f,0x65c,0x38a,0xb71,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0xb5a,0xb62,0x659,0x656,0x38a,0xb6a,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb79,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb72,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb79,0xb89,0xb81,0xb81,0xb81,0xb8a,0xb8a,0xb8a,0xb8a,0x3d8, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb72,0xb82,0xb7a,0xb7a,0xb7a,0xb83,0xb83,0xb83,0xb83,0x3d8,
0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0xb92,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, 0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0xb8b,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, 0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, 0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, 0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0x369,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0x369,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,8,7,8,9,7,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,8,7,8,9,7,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,7,7,7,8,9,0xa,0xa,4,4,4,0xa,0xa, 0x12,0x12,0x12,0x12,7,7,7,8,9,0xa,0xa,4,4,4,0xa,0xa,
0x310a,0xf20a,0xa,3,6,3,6,6,2,2,2,2,2,2,2,2, 0x310a,0xf20a,0xa,3,6,3,6,6,2,2,2,2,2,2,2,2,
@ -319,7 +319,7 @@ static const uint16_t ubidi_props_trieIndex[11884]={
1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1, 1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1,
0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41, 0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x81,0x41,1,1,1,0xb1,0xb1,0xb1, 0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x81,0x41,1,1,1,0xb1,0xb1,0xb1,
1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, 1,1,1,1,0x4d,0xd,0x4d,0x4d,0x4d,0x4d,0xd,0x8d,0x4d,0x8d,0x8d,0xd,
0xd,0xd,0xd,0xd,1,1,1,1,1,1,1,1,1,1,1,1, 0xd,0xd,0xd,0xd,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,0xb1,0xb1,5,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 1,1,1,1,0xb1,0xb1,5,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
@ -348,8 +348,8 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0, 0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,0,0,0,0,0,0,0,0,0x11,0x11, 0,0,0,0,0,4,0,0,0,0,0,0,0,0,0xb1,0xb1,
0x11,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, 0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0, 0,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,
@ -367,215 +367,211 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0,0,0xa0,0,0,0,0, 0,0,0,0,0,0,0,0,0xb1,0,0,0xa0,0,0,0,0,
0,0,0xa0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0, 0,0,0xa0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x11,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x11, 0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,
0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0, 0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0xb1,0xb1,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0, 0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0, 0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,4,0,0,0,0, 0,0,0,4,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, 0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0, 0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0, 0xb1,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, 0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,
0,0xb1,0x310a,0xf20a,0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,
0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,
0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0xb1,0,0xb1,0,0xb1,0x310a,0xf20a,0x310a,0xf20a,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0, 0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x310a, 0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,
0xf20a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,4,0,0xb1,0,0,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1,
0xb1,0xb1,0x12,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,
0xa,0,0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0xb1,0,0,0,
0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x11,0x11,
0x11,0x11,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0,0xa,0xa,0xa,0xa,6,0x310a,0xf20a,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0x814,0x815,
0x813,0x816,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,2,0,0,0,2,2,2,2,
2,2,3,3,0xa,0x310a,0xf20a,0,9,9,9,9,9,9,9,9,
9,9,9,0xb2,0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae,0x8b0,0x8ac,0x8af,6,
4,4,4,4,4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,
2,2,2,2,2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xa,0xa,0,0xa,0xa,0xa,0xa,0,0xa,0xa,0,0,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa, 0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,
0xa,0xa,0xa,0xa,0,0xa,0,0xa,0,0xa,0,0,0,0,4,0, 0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xa,0xa,0,0,0,0,
0x100a,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,
0x300a,0xf00a,0x900a,0x900a,0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a,0x900a,0x900a,0x900a,0x100a,
0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a,0xb00a,0xb00a,0xa,0xa,
0xa,0x100a,3,4,0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,
0x100a,0x100a,0x100a,0xa,0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x300a,0xf00a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,
0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,
0x100a,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a,0x900a,0x900a,0x100a,0x900a,
0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0xa,
0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,
0xf00a,0x900a,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0,0,0,0,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0, 0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,
0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,
2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa,
0x300a,0xf00a,0xa,0x500a,0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa,
0xa,0xa,0xa,0xa,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa,
0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a,
0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,
0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,
0xf20a,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x100a,0xa,0xa,
0xa,0xa,0x100a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x900a,0x900a,0x100a,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x100a,
0xa,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0xa,0x100a,0xa,0xa,0x100a,0xa,0x300a,
0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,
0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,
0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,
0x100a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,
0xf00a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0xa,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xb1,0xb1,0xb1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xa,0xa,0x300a,0xf00a, 0,0,0,0,0,0,0,0x310a,0xf20a,0,0,0,0,0,0,0,
0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a, 0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,4,
0,0xb1,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1,0xb1,0xb1,0x12,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0xb1,0xb1,0,0,0,0,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,
0,0xb1,0xb1,0xb1,0,0,0,0,0xa,0,0,0,0xa,0xa,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xa,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xa,0,0,0,
0,0,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,9,0xa,0xa,0xa,
0xa,0,0,0,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0xb1,0xb1,0,0,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0, 0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa, 0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0xb1,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0, 0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0,0,0, 0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,
0,0,0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0,0, 0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0, 0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,0,0,0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, 0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x60,0,0xa,0xa,0xa,0xa, 0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0, 0,0xb1,0,0,0,0,0,0,0xb1,0,0,0,0xb1,0xb1,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0,0xa,0xa,0xa,0xa,6,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0x814,0x815,0x813,0x816,0xb2,0xb2,
0xb2,0xb2,0xb2,0xb2,2,0,0,0,2,2,2,2,2,2,3,3,
0xa,0x310a,0xf20a,0,9,9,9,9,9,9,9,9,9,9,9,0xb2,
0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae,0x8b0,0x8ac,0x8af,6,4,4,4,4,
4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,2,2,2,2,
2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0xa,
0xa,0xa,0xa,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0xa,0,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,
0,0xa,0,0xa,0,0xa,0,0,0,0,4,0,0,0,0,0,
0,0,0,0,0,0,0xa,0xa,0,0,0,0,0x100a,0xa,0xa,0xa,
0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x900a,0x900a,
0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a,0x900a,0x900a,0x900a,0x100a,0xa,0x100a,0x100a,0x100a,
0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a,0xb00a,0xb00a,0xa,0xa,0xa,0x100a,3,4,
0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,
0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x900a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,
0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0x100a,0xa,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,
0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a,0x900a,0x900a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x900a,0xa,0xa,
0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa,0x300a,0xf00a,0xa,0x500a,
0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,
0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,
0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,
0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x300a,
0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x100a,0xa,0xa,0xa,
0xa,0xa,0x100a,0x900a,0x900a,0x900a,0x100a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,
0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0x100a,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,
0x100a,0x100a,0xa,0xa,0x100a,0xa,0x100a,0xa,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0xa,0xa,0x100a,
0x100a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,
0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,
0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,
0xa,0xa,0xa,0xa,0x100a,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,
0xa,0xa,0xa,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,
0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0,0,0,0,0xa,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xa,0,0,0,0,0,0xa,0xa,
0,0,0,0,0,0xa,0xa,0xa,9,0xa,0xa,0xa,0xa,0,0,0,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,
0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xa,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,
0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0xb1,0,
0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,
0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x60,0,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0, 0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,
@ -642,123 +638,125 @@ static const uint16_t ubidi_props_trieIndex[11884]={
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,1, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,
0,0xb1,0xb1,0,0,0xa0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
0,0,0,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xb1,0,0,0,0,
0,0,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, 0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, 0,0xa0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0, 0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0xb1,
0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,0x11,0,
0,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,
0x11,0,0,0x11,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0x11,
0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,0x11,0,
0,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0x11,0x11,
0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0,0x11,0x11,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xa0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x11,0x11,0x11,0x11,0x11,0x11,0,0,0,0x11,0,0x11,0x11,0,0x11,
0x11,0x11,0x11,0x11,0x11,0x11,0,0x11,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb2,0xb2,0xb2,0xb2,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0xb2, 0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, 0xb1,0xb1,0,0,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,
0,0,0,0,0xa,0xa,0xb1,0xb1,0xb1,0xa,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0x100a,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0,0,0,
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,0x41,0x41,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,
2,2,2,2,2,2,2,0xa,0xa,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0, 0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, 0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x12,0x12,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2, 0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xa0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,
0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0,
0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb2,0xb2,0xb2,0xb2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xb1,0xb1,0xb1,0xa,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x100a,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x100a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x100a,0,0,0,0,
0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,
0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
2,2,2,2,2,2,2,2,2,2,2,0xa,0xa,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x12,0x12,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2, 0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
0xb2,0xb2,0xb2,0xb2,0x12,0xb2,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0xb2,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0,0,0,0 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0,0,0,0
}; };
static const uint32_t ubidi_props_mirrors[26]={ static const uint32_t ubidi_props_mirrors[26]={
@ -803,7 +801,7 @@ static const uint8_t ubidi_props_jgArray[672]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0x5d,0x5a,0x60,0x63,0x5e,0x5f,0x59,0x61,0x5b,0x5c,0x62,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -828,13 +826,13 @@ static const UBiDiProps ubidi_props_singleton={
ubidi_props_trieIndex+3496, ubidi_props_trieIndex+3496,
NULL, NULL,
3496, 3496,
8388, 8360,
0x1a0, 0x1a0,
0xe28, 0xe28,
0x0, 0x0,
0x0, 0x0,
0x110000, 0x110000,
0x2e68, 0x2e4c,
NULL, 0, FALSE, FALSE, 0, NULL NULL, 0, FALSE, FALSE, 0, NULL
}, },
{ 2,2,0,0 } { 2,2,0,0 }

View File

@ -961,6 +961,7 @@ ucase_toFullLower(UChar32 c,
0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
*/ */
*pString=nullptr;
return 0; /* remove the dot (continue without output) */ return 0; /* remove the dot (continue without output) */
} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) { } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
/* /*
@ -1059,6 +1060,7 @@ toUpperOrTitle(UChar32 c,
0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
*/ */
*pString=nullptr;
return 0; /* remove the dot (continue without output) */ return 0; /* remove the dot (continue without output) */
} else { } else {
/* no known conditional special case mapping, use a normal mapping */ /* no known conditional special case mapping, use a normal mapping */

View File

@ -61,7 +61,7 @@ enum {
/** /**
* Bit mask for getting just the options from a string compare options word * Bit mask for getting just the options from a string compare options word
* that are relevant for case-insensitive string comparison. * that are relevant for case-insensitive string comparison.
* See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER. * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
* @internal * @internal
*/ */
#define _STRCASECMP_OPTIONS_MASK 0xffff #define _STRCASECMP_OPTIONS_MASK 0xffff
@ -69,10 +69,16 @@ enum {
/** /**
* Bit mask for getting just the options from a string compare options word * Bit mask for getting just the options from a string compare options word
* that are relevant for case folding (of a single string or code point). * that are relevant for case folding (of a single string or code point).
* See uchar.h. *
* Currently only bit 0 for U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* It is conceivable that at some point we might use one more bit for using uppercase sharp s.
* It is conceivable that at some point we might want the option to use only simple case foldings
* when operating on strings.
*
* See stringoptions.h.
* @internal * @internal
*/ */
#define _FOLD_CASE_OPTIONS_MASK 0xff #define _FOLD_CASE_OPTIONS_MASK 7
/* single-code point functions */ /* single-code point functions */

View File

@ -11,36 +11,36 @@
#ifdef INCLUDED_FROM_UCASE_CPP #ifdef INCLUDED_FROM_UCASE_CPP
static const UVersionInfo ucase_props_dataVersion={9,0,0,0}; static const UVersionInfo ucase_props_dataVersion={0xa,0,0,0};
static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x6c6c,0x5a10,0x79c,0x172,0,0,0,0,0,0,0,0,0,0,3}; static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x6dfc,0x5ba0,0x79c,0x172,0,0,0,0,0,0,0,0,0,0,3};
static const uint16_t ucase_props_trieIndex[11520]={ static const uint16_t ucase_props_trieIndex[11720]={
0x327,0x32f,0x337,0x33f,0x34d,0x355,0x35d,0x365,0x36d,0x375,0x37c,0x384,0x38c,0x394,0x39c,0x3a4, 0x327,0x32f,0x337,0x33f,0x34d,0x355,0x35d,0x365,0x36d,0x375,0x37c,0x384,0x38c,0x394,0x39c,0x3a4,
0x3aa,0x3b2,0x3ba,0x3c2,0x3ca,0x3d2,0x3da,0x3e2,0x3ea,0x3f2,0x3fa,0x402,0x40a,0x412,0x41a,0x422, 0x3aa,0x3b2,0x3ba,0x3c2,0x3ca,0x3d2,0x3da,0x3e2,0x3ea,0x3f2,0x3fa,0x402,0x40a,0x412,0x41a,0x422,
0x42a,0x432,0x43a,0x442,0x44a,0x452,0x45a,0x462,0x45e,0x466,0x46b,0x473,0x47a,0x482,0x48a,0x492, 0x42a,0x432,0x43a,0x442,0x44a,0x452,0x45a,0x462,0x45e,0x466,0x46b,0x473,0x47a,0x482,0x48a,0x492,
0x49a,0x4a2,0x4aa,0x4b2,0x346,0x34e,0x4b7,0x4bf,0x4c4,0x4cc,0x4d4,0x4dc,0x4db,0x4e3,0x4e8,0x4f0, 0x49a,0x4a2,0x4aa,0x4b2,0x346,0x34e,0x4b7,0x4bf,0x4c4,0x4cc,0x4d4,0x4dc,0x4db,0x4e3,0x4e8,0x4f0,
0x4f7,0x4fe,0x502,0x346,0x346,0x327,0x512,0x50a,0x51a,0x51c,0x524,0x52c,0x530,0x531,0x539,0x541, 0x4f7,0x4fe,0x502,0x346,0x346,0x327,0x512,0x50a,0x51a,0x51c,0x524,0x52c,0x530,0x531,0x539,0x541,
0x549,0x531,0x551,0x556,0x549,0x531,0x55e,0x541,0x530,0x562,0x56a,0x541,0x56f,0x346,0x577,0x346, 0x549,0x531,0x551,0x556,0x549,0x531,0x55e,0x566,0x530,0x56e,0x576,0x541,0x57b,0x346,0x583,0x346,
0x4a1,0x4dd,0x57f,0x541,0x530,0x562,0x586,0x541,0x530,0x346,0x539,0x541,0x346,0x346,0x58c,0x346, 0x4a1,0x4dd,0x58b,0x541,0x530,0x56e,0x592,0x541,0x59a,0x59c,0x539,0x541,0x346,0x346,0x5a4,0x346,
0x346,0x592,0x599,0x346,0x346,0x59d,0x5a5,0x346,0x5a9,0x5b0,0x346,0x5b7,0x5bf,0x5c6,0x5ce,0x346, 0x346,0x5aa,0x5b1,0x346,0x346,0x5b5,0x5bd,0x346,0x5c1,0x5c8,0x346,0x5cf,0x5d7,0x5de,0x5e6,0x346,
0x346,0x5d3,0x5db,0x5e3,0x5eb,0x5f3,0x5fb,0x490,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x5eb,0x5f3,0x5fb,0x603,0x60b,0x613,0x490,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x5ff,0x346,0x346,0x60f,0x617,0x607, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x617,0x346,0x346,0x627,0x62f,0x61f,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x61f,0x61f,0x53d,0x53d,0x346,0x625,0x62d,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x637,0x637,0x53d,0x53d,0x346,0x63d,0x645,0x346,
0x635,0x346,0x63d,0x346,0x548,0x643,0x346,0x346,0x346,0x64b,0x346,0x346,0x346,0x346,0x346,0x346, 0x64d,0x346,0x655,0x346,0x548,0x65b,0x346,0x346,0x346,0x663,0x346,0x346,0x346,0x346,0x346,0x346,
0x652,0x346,0x659,0x661,0x346,0x669,0x346,0x346,0x671,0x674,0x67c,0x682,0x68a,0x692,0x346,0x699, 0x66a,0x346,0x671,0x679,0x346,0x681,0x346,0x346,0x56d,0x689,0x691,0x697,0x59a,0x69f,0x346,0x6a6,
0x346,0x69e,0x346,0x6a4,0x6ac,0x346,0x6b0,0x6b8,0x6c0,0x6c5,0x6c8,0x6d0,0x6e0,0x6d8,0x6f0,0x6e8, 0x346,0x6ab,0x346,0x6b1,0x6b9,0x346,0x6bd,0x6c5,0x6cd,0x6d2,0x6d5,0x6dd,0x6ed,0x6e5,0x6fd,0x6f5,
0x36d,0x6f8,0x36d,0x700,0x703,0x36d,0x70b,0x36d,0x713,0x71b,0x723,0x72b,0x733,0x73b,0x743,0x74b, 0x36d,0x705,0x36d,0x70d,0x710,0x36d,0x718,0x36d,0x720,0x728,0x730,0x738,0x740,0x748,0x750,0x758,
0x753,0x75a,0x346,0x762,0x76a,0x346,0x772,0x77a,0x782,0x78a,0x792,0x79a,0x7a2,0x346,0x346,0x346, 0x760,0x767,0x346,0x76f,0x777,0x346,0x77f,0x787,0x78f,0x797,0x79f,0x7a7,0x7af,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x7a5,0x7ab,0x7b1,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x7b2,0x7b8,0x7be,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x7b9,0x7be,0x7c2,0x7ca,0x36d,0x36d,0x36d,0x7d2,0x7da,0x7e2,0x346,0x7e7,0x346,0x346,0x346,0x7ef, 0x7c6,0x7cb,0x7cf,0x7d7,0x36d,0x36d,0x36d,0x7df,0x7e7,0x7ef,0x346,0x7f4,0x346,0x346,0x346,0x7fc,
0x346,0x63a,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x652,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x52f,0x7f7,0x346,0x346,0x7fe,0x346,0x346,0x806,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x52f,0x804,0x346,0x346,0x80b,0x346,0x346,0x813,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
@ -96,12 +96,12 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x80e,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x81b,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x6a4,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x6b1,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x814,0x346,0x81c,0x821,0x829,0x346,0x346,0x831,0x839,0x841,0x36d,0x846,0x84e,0x854,0x346,0x85a, 0x821,0x346,0x829,0x82e,0x836,0x346,0x346,0x83e,0x846,0x84e,0x36d,0x853,0x85b,0x861,0x346,0x867,
0x862,0x548,0x346,0x346,0x346,0x346,0x869,0x871,0x346,0x878,0x87f,0x346,0x51a,0x884,0x88c,0x548, 0x86f,0x548,0x346,0x346,0x346,0x346,0x876,0x87e,0x346,0x885,0x88c,0x346,0x51a,0x891,0x899,0x548,
0x346,0x892,0x89a,0x89e,0x346,0x8a6,0x8ae,0x8b6,0x346,0x8bc,0x8c0,0x8c8,0x8d8,0x8d0,0x346,0x8e0, 0x346,0x89f,0x8a7,0x8ab,0x346,0x8b3,0x8bb,0x8c3,0x346,0x8c9,0x8cd,0x8d5,0x8e5,0x8dd,0x346,0x8ed,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
@ -141,15 +141,15 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x8e8,0x346,0x346,0x346,0x346,0x8f0,0x68a,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x8f5,0x346,0x346,0x346,0x346,0x8fd,0x59a,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x8f5,0x8fd,0x901,0x346,0x346,0x346,0x346,0x329,0x32f,0x909,0x911,0x918,0x4dd,0x346,0x346,0x920, 0x902,0x90a,0x90e,0x346,0x346,0x346,0x346,0x329,0x32f,0x916,0x91e,0x925,0x4dd,0x346,0x346,0x92d,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0xd1c,0xd1c,0xd34,0xd74,0xdb4,0xdf0,0xe30,0xe70,0xea8,0xee8,0xf28,0xf68,0xfa8,0xfe8,0x1028,0x1068, 0xd1c,0xd1c,0xd34,0xd74,0xdb4,0xdf0,0xe30,0xe70,0xea8,0xee8,0xf28,0xf68,0xfa8,0xfe8,0x1028,0x1068,
0x10a8,0x10e8,0x1128,0x1168,0x1178,0x11ac,0x11e8,0x1228,0x1268,0x12a8,0xd18,0x12dc,0x1310,0x1350,0x136c,0x13a0, 0x10a8,0x10e8,0x1128,0x1168,0x1178,0x11ac,0x11e8,0x1228,0x1268,0x12a8,0xd18,0x12dc,0x1310,0x1350,0x136c,0x13a0,
0x9e1,0xa11,0xa51,0xa8c,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xab5,0x188,0x188, 0x9e1,0xa11,0xa51,0xa8c,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xab7,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xaf5,0x188,0x188,0xb2a,0xb69,0xba9,0xbe3,0xc1a,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xaf7,0x188,0x188,0xb2c,0xb6b,0xbab,0xbe5,0xc1c,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
@ -174,50 +174,50 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0xc5a,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0xc5c,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x63e,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x928,0x346,0x346,0x346,0x92b,0x346,0x346,0x346, 0x656,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x935,0x346,0x346,0x346,0x938,0x346,0x346,0x346,
0x346,0x933,0x939,0x93d,0x346,0x346,0x941,0x945,0x94b,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x940,0x946,0x94a,0x346,0x346,0x94e,0x952,0x958,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x953,0x957,0x346,0x346,0x346,0x346,0x346,0x95f,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x960,0x964,0x346,0x346,0x346,0x346,0x346,0x96c,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x967,0x96b,0x973,0x977,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x974,0x978,0x980,0x984,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x530,0x97c,0x983,0x985,0x68a,0x98d,0x346,0x346,0x995,0x99c,0x346,0x988,0x68a,0x9a2,0x9aa, 0x346,0x530,0x989,0x990,0x59b,0x59a,0x994,0x346,0x346,0x99c,0x9a3,0x346,0x9a9,0x59a,0x9ae,0x9b6,
0x346,0x346,0x9af,0x346,0x346,0x346,0x346,0x329,0x9b7,0x68a,0x531,0x9bf,0x9c6,0x346,0x346,0x346, 0x346,0x346,0x9bb,0x346,0x346,0x346,0x346,0x329,0x9c3,0x59a,0x531,0x9cb,0x9d2,0x346,0x346,0x346,
0x346,0x346,0x97c,0x9ce,0x346,0x346,0x9d2,0x9da,0x346,0x346,0x346,0x346,0x346,0x346,0x9de,0x9e6, 0x346,0x346,0x989,0x9da,0x346,0x346,0x9de,0x9e6,0x346,0x346,0x346,0x346,0x346,0x346,0x9ea,0x9f2,
0x346,0x346,0x9ee,0x4a1,0x346,0x346,0x9f6,0x346,0x346,0x9fc,0xa04,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x9fa,0x4a1,0x346,0x346,0xa02,0x346,0x346,0xa08,0xa10,0x346,0x346,0x346,0x346,0x346,
0x346,0xa0c,0xa14,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0xa18,0xa20,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa28,0xa2c,0xa34,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa1c,0x346,0x346, 0xa3b,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa42,0x346,0x346,
0x8f0,0xa24,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x8fd,0xa4a,0x346,0x346,0x346,0xa50,0xa58,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa2a,0x346,0xa30,0x671, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa5c,0x346,
0xa62,0x56d,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0xa68,0x346,0x346,0x59a,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0xa36,0x346,0x346,0x4a1,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa70,0x56d,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa3e,0x671,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa78,0xa80,0xa86,0x346,0x346,0x346,0x346,0xa8e,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa96,0xa9e,0xaa3,0xaa9,
0xab1,0xab9,0xac1,0xa9a,0xac9,0xad1,0xad9,0xae0,0xa9b,0xa96,0xa9e,0xa99,0xaa9,0xa9c,0xa97,0xae8,
0xa9a,0xaf0,0xaf8,0xb00,0xb07,0xaf3,0xafb,0xb03,0xb0a,0xaf6,0xb12,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x846,0xb1a,0x846,0xb21,0xb28,
0xb30,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0xa46,0xa4e,0xa54,0x346,0x346,0x346,0x346,0xa5c,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb38,0xb40,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa64,0xa6c,0xa71,0xa77,0xa7f,0xa87,
0xa8f,0xa68,0xa97,0xa9f,0xaa7,0xaae,0xa69,0xa64,0xa6c,0xa67,0xa77,0xa6a,0xa65,0xab6,0xa68,0xabe,
0xac6,0xace,0xad5,0xac1,0xac9,0xad1,0xad8,0xac4,0xae0,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x839,0xae8,0x839,0xaef,0xaf6,0xafe,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb06,0xb0e,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb12,0x346,0xb1a,0xb22,0xb29,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb44,0x346,0xb4c,0xb54,0xb5b,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0xa60,0xb31,0xb31,0xb37,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0xa92,0xb63,0xb63,0xb69,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x997,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x99e,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x530,0x839,0x839,0x839,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x530,0x846,0x846,0x846,
0x346,0x346,0x839,0x839,0x839,0x839,0x839,0x839,0x839,0xa3a,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0xa6c,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x326,0x326,0,0,0,0, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,0,0,0,0,0,0,4,0,0,0,0,0, 0,0,0,4,0,0,0,0,0,0,4,0,0,0,0,0,
@ -361,6 +361,9 @@ static const uint16_t ucase_props_trieIndex[11520]={
0,0,0,0,0,0,0,0,4,4,0,0,0,4,0,0, 0,0,0,0,0,0,0,0,4,4,0,0,0,4,0,0,
0,0,0,0,0,0,0,0,0,4,4,4,4,4,0,4, 0,0,0,0,0,0,0,0,0,4,4,4,4,4,0,4,
4,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0, 4,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x64,0,0,4,0,4,4,4,4,0,0,0, 0,0,0,0,0x64,0,0,4,0,4,4,4,4,0,0,0,
0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,4,0, 0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,4,0,
@ -371,6 +374,9 @@ static const uint16_t ucase_props_trieIndex[11520]={
0,0,4,4,4,0,4,4,4,0x64,0,0,0,0,0,0, 0,0,4,4,4,0,4,4,4,0x64,0,0,0,0,0,0,
0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,4,0, 0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,4,0,
0,0,0,0,4,0x64,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,0x64,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x64,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0x64,0,0,0,0,0,
0,0,4,4,4,0,4,0,0,0,0,0,0,0,0,0, 0,0,4,4,4,0,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,4,0,0,4,4,4,4, 0,0,0,0,0,0,0,0,0,4,0,0,4,4,4,4,
@ -428,211 +434,208 @@ static const uint16_t ucase_props_trieIndex[11520]={
4,0,0,0,0,0,0,4,4,0x44,0x44,0x44,0x44,0x44,0x44,0x44, 4,0,0,0,0,0,0,4,4,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0,0,0x64,0,0,0,0,0,0,0,4,0,0,0,0, 0x44,0,0,0x64,0,0,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44, 0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,
0x44,0x64,4,0,4,4,4,4,0,0,0,0,0,0,0,0, 0x44,0x64,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0x64,0,4,4,4,4,4,0,
0,0,0,0,0x64,0,4,4,4,4,4,0,4,0,0,0, 4,0,0,0,0,0,4,0,0x60,0,0,0,0,0,0,0,
0,0,4,0,0x60,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,4,4,4,0,0,
4,4,0x60,0x64,4,4,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x64,0,4,4,0,0,
0,4,0,4,4,4,0x60,0x60,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,4,4,4,4,4,0,0,4,0x64,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,0,0,
0x5cd9,0x5d39,0x5d99,0x5df9,0x5e59,0x5ef9,0x5f99,0x5ff9,0x6059,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x64,0x64,0x64,
0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0x64,0,0,
0,0,0,0,0x44,0,0,0,0x44,0x44,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,0x25,5,5,5,5,5,5,5,5,1,1,1,1,1,
1,1,1,1,1,1,1,1,5,0x60b9,1,1,1,0x60f9,1,1,
5,5,5,5,0x25,5,5,5,0x25,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,0x21,1,1,1,1,5,5,5,5,5,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0x44,0x64,0x64,0x44,0x64,
0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x64,0x64,0x64,
0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xffa9,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x613a,0x61b9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x6239,0x6339,0x6439,0x6539,0x6639,0x6739,1,1,0x679a,1,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xffa9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,0x6839,0x409,0x6939,0x409,
0x6a99,0x409,0x6bf9,0x409,0,0xfc0a,0,0xfc0a,0,0xfc0a,0,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x2509,0x2509,0x2b09,0x2b09,
0x2b09,0x2b09,0x3209,0x3209,0x4009,0x4009,0x3809,0x3809,0x3f09,0x3f09,0,0,0x6d59,0x6e39,0x6f19,0x6ff9,
0x70d9,0x71b9,0x7299,0x7379,0x745b,0x753b,0x761b,0x76fb,0x77db,0x78bb,0x799b,0x7a7b,0x7b59,0x7c39,0x7d19,0x7df9,
0x7ed9,0x7fb9,0x8099,0x8179,0x825b,0x833b,0x841b,0x84fb,0x85db,0x86bb,0x879b,0x887b,0x8959,0x8a39,0x8b19,0x8bf9,
0x8cd9,0x8db9,0x8e99,0x8f79,0x905b,0x913b,0x921b,0x92fb,0x93db,0x94bb,0x959b,0x967b,0x409,0x409,0x9759,0x9859,
0x9939,0,0x9a39,0x9b39,0xfc0a,0xfc0a,0xdb0a,0xdb0a,0x9c9b,4,0x9d79,4,4,4,0x9e19,0x9f19,
0x9ff9,0,0xa0f9,0xa1f9,0xd50a,0xd50a,0xd50a,0xd50a,0xa35b,4,4,4,0x409,0x409,0xa439,0xa599,
0,0,0xa739,0xa839,0xfc0a,0xfc0a,0xce0a,0xce0a,0,4,4,4,0x409,0x409,0xa999,0xaaf9,
0xac99,0x389,0xad99,0xae99,0xfc0a,0xfc0a,0xc80a,0xc80a,0xfc8a,4,4,4,0,0,0xaff9,0xb0f9,
0xb1d9,0,0xb2d9,0xb3d9,0xc00a,0xc00a,0xc10a,0xc10a,0xb53b,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,0,0,0,0,
0,0,0,0,4,4,0,0,0,0,0,0,4,0,0,4,
0,0,4,4,4,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,
4,4,4,4,4,4,4,4,0,0x25,0,0,0,0,0,0,
0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x64,0x64,0x44,0x44,0x44,0x44,
0x64,0x64,0x64,0x44,0x44,4,4,4,4,0x44,4,4,4,0x64,0x64,0x44,
0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,
0,0,1,2,2,2,1,1,2,2,2,1,0,2,0,0,
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0xb61a,0,
2,0,0xb69a,0xb71a,2,2,0,1,2,2,0xe0a,2,1,0,0,0,
0,1,0,0,1,1,2,2,0,0,0,0,0,2,1,1,
0x21,0x21,0,0,0,0,0xf209,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,
0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,
0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0,0,0,0x8a,0xff89,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,
0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xf309,0xf309,0xf309,0xf309,
0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0,0x8a,0xff89,0xb79a,0xb7da,0xb81a,0xb859,0xb899,0x8a,
0xff89,0x8a,0xff89,0x8a,0xff89,0xb8da,0xb91a,0xb95a,0xb99a,1,0x8a,0xff89,1,0x8a,0xff89,1,
1,1,1,1,0x25,5,0xb9da,0xba1a,0x8a,0xff89,0x8a,0xff89,1,0,0,0,
0,0,0,0x8a,0xff89,0x8a,0xff89,0x44,0x44,0x44,0x8a,0xff89,0,0,0,0,
0,0,0,0,0,0,0,0,0xba59,0xba99,0xbad9,0xbb19,0xbb59,0xbb99,0xbbd9,0xbc19,
0xbc59,0xbc99,0xbcd9,0xbd19,0xbd59,0xbd99,0xbdd9,0xbe19,0xbe59,0xbe99,0xbed9,0xbf19,0xbf59,0xbf99,0xbfd9,0xc019,
0xc059,0xc099,0xc0d9,0xc119,0xc159,0xc199,0xc1d9,0xc219,0xc259,0xc299,0xc2d9,0xc319,0xc359,0xc399,0,0xc3d9,
0,0,0,0,0,0xc419,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,
0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x60,0x60,0,4,4,4,
4,4,0,0,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x64,0x64,4,4,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc45a,0xc4d9,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0,0x44,4,4,4,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
5,5,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,1,1,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,5,1,1,1,1,1,1,1,
1,0x8a,0xff89,0x8a,0xff89,0xc55a,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
4,4,4,0x8a,0xff89,0xc59a,1,0,0x8a,0xff89,0x8a,0xff89,1,1,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc5da,0xc61a,0xc65a,0xc69a,0xc6da,0,
0xc71a,0xc75a,0xc79a,0xc7da,0x8a,0xff89,0x8a,0xff89,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
5,5,1,0,0,0,0,0,0,0,4,0,0,0,0x64,0,
0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,4,4,4,4,0x64,0x64,0x64,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,4,4,4,4,4,4,4,0,0x60,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x64,0,0,4,4,4,4,0,0,4,0,0,0,
0x60,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,4,4,4,4,4,4,0,0,4,4,0,0,4,4,0,
0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0,0x44,0x44,0x64,0,0,0x44,
0x44,0,0,0,0,0,0x44,0x44,0,0x44,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,0,0,0,0,0,4,4,0,0x64,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,0xc819,1,1,1,1,1,1,1,4,5,5,5,5,
1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
0xc859,0xc8b9,0xc919,0xc979,0xc9d9,0xca39,0xca99,0xcaf9,0xcb59,0xcbb9,0xcc19,0xcc79,0xccd9,0xcd39,0xcd99,0xcdf9,
0xda59,0xdab9,0xdb19,0xdb79,0xdbd9,0xdc39,0xdc99,0xdcf9,0xdd59,0xddb9,0xde19,0xde79,0xded9,0xdf39,0xdf99,0xdff9,
0xe059,0xe0b9,0xe119,0xe179,0xe1d9,0xe239,0xe299,0xe2f9,0xe359,0xe3b9,0xe419,0xe479,0xe4d9,0xe539,0xe599,0xe5f9,
0xce59,0xceb9,0xcf19,0xcf79,0xcfd9,0xd039,0xd099,0xd0f9,0xd159,0xd1b9,0xd219,0xd279,0xd2d9,0xd339,0xd399,0xd3f9,
0xd459,0xd4b9,0xd519,0xd579,0xd5d9,0xd639,0xd699,0xd6f9,0xd759,0xd7b9,0xd819,0xd879,0xd8d9,0xd939,0xd999,0xd9f9,
0,0,0,0,0,4,0,0,4,0,0,0,0,0x64,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xe659,0xe759,0xe859,0xe959,0xeab9,0xec19,0xed59,0,0,0,0,0,0,0,0,0,
0,0,0,0xee99,0xef99,0xf099,0xf199,0xf299,0,0,0,0,0,0,0x64,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,
0x64,0x64,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,0,0,4,0,0,0,0,0,0,
0,0,0,0,0,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0,
0,0,4,0,4,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,
0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0,0,0,0,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0,0,0,0,0,4,4,4,
0,4,4,0,0,0,0,0,4,0x64,4,0x44,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x64,0x64,0,0,0,0,0x64,0,0,0,0,
0,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,
4,4,0,0,4,4,0x60,0x64,4,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,
4,4,0,0,0,4,0,4,4,4,0x60,0x60,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4, 0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,0,0,0x5cd9,0x5d39,0x5d99,0x5df9,0x5e59,0x5ef9,0x5f99,0x5ff9,0x6059,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0x44,0x44,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,
0,0,0,0,0,0,0,4,4,4,4,0,0,0x64,0x64,0, 0x64,0x64,0x64,0x64,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,
0,4,0,0,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0, 0,0x64,0,0,0,0,0,0,0x44,0,0,0,0x44,0x44,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,0x25,5,5,5,5,5,5,5,5,1,
1,1,1,1,1,1,1,1,1,1,1,1,5,0x60b9,1,1,
1,0x60f9,1,1,5,5,5,5,0x25,5,5,5,0x25,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,0x21,1,1,1,1,5,
5,5,5,5,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0,0x44,
0x64,0x64,0x44,0x64,0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,
0x44,0x64,0x64,0x64,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xffa9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x613a,0x61b9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x6239,0x6339,0x6439,0x6539,0x6639,0x6739,1,1,0x679a,1,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xffa9,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,
0x6839,0x409,0x6939,0x409,0x6a99,0x409,0x6bf9,0x409,0,0xfc0a,0,0xfc0a,0,0xfc0a,0,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x2509,0x2509,0x2b09,0x2b09,0x2b09,0x2b09,0x3209,0x3209,0x4009,0x4009,0x3809,0x3809,0x3f09,0x3f09,0,0,
0x6d59,0x6e39,0x6f19,0x6ff9,0x70d9,0x71b9,0x7299,0x7379,0x745b,0x753b,0x761b,0x76fb,0x77db,0x78bb,0x799b,0x7a7b,
0x7b59,0x7c39,0x7d19,0x7df9,0x7ed9,0x7fb9,0x8099,0x8179,0x825b,0x833b,0x841b,0x84fb,0x85db,0x86bb,0x879b,0x887b,
0x8959,0x8a39,0x8b19,0x8bf9,0x8cd9,0x8db9,0x8e99,0x8f79,0x905b,0x913b,0x921b,0x92fb,0x93db,0x94bb,0x959b,0x967b,
0x409,0x409,0x9759,0x9859,0x9939,0,0x9a39,0x9b39,0xfc0a,0xfc0a,0xdb0a,0xdb0a,0x9c9b,4,0x9d79,4,
4,4,0x9e19,0x9f19,0x9ff9,0,0xa0f9,0xa1f9,0xd50a,0xd50a,0xd50a,0xd50a,0xa35b,4,4,4,
0x409,0x409,0xa439,0xa599,0,0,0xa739,0xa839,0xfc0a,0xfc0a,0xce0a,0xce0a,0,4,4,4,
0x409,0x409,0xa999,0xaaf9,0xac99,0x389,0xad99,0xae99,0xfc0a,0xfc0a,0xc80a,0xc80a,0xfc8a,4,4,4,
0,0,0xaff9,0xb0f9,0xb1d9,0,0xb2d9,0xb3d9,0xc00a,0xc00a,0xc10a,0xc10a,0xb53b,4,4,0,
0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
4,0,0,4,0,0,4,4,4,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,0,4,4,4,4,4,4,4,4,4,4,0,0x25,0,0,
0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x64,0x64,
0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x44,0x44,4,4,4,4,0x44,4,4,
4,0x64,0x64,0x44,0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
0,0,0,2,0,0,1,2,2,2,1,1,2,2,2,1,
0,2,0,0,0,2,2,2,2,2,0,0,0,0,0,0,
2,0,0xb61a,0,2,0,0xb69a,0xb71a,2,2,0,1,2,2,0xe0a,2,
1,0,0,0,0,1,0,0,1,1,2,2,0,0,0,0,
0,2,1,1,0x21,0x21,0,0,0,0,0xf209,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x80a,0x80a,0x80a,0x80a,
0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0xf809,0xf809,0xf809,0xf809,
0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0,0,0,0x8a,
0xff89,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xd0a,0xd0a,
0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,
0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,
0xf309,0xf309,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0,0x8a,0xff89,0xb79a,0xb7da,
0xb81a,0xb859,0xb899,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xb8da,0xb91a,0xb95a,0xb99a,1,0x8a,0xff89,
1,0x8a,0xff89,1,1,1,1,1,0x25,5,0xb9da,0xba1a,0x8a,0xff89,0x8a,0xff89,
1,0,0,0,0,0,0,0x8a,0xff89,0x8a,0xff89,0x44,0x44,0x44,0x8a,0xff89,
0,0,0,0,0,0,0,0,0,0,0,0,0xba59,0xba99,0xbad9,0xbb19,
0xbb59,0xbb99,0xbbd9,0xbc19,0xbc59,0xbc99,0xbcd9,0xbd19,0xbd59,0xbd99,0xbdd9,0xbe19,0xbe59,0xbe99,0xbed9,0xbf19,
0xbf59,0xbf99,0xbfd9,0xc019,0xc059,0xc099,0xc0d9,0xc119,0xc159,0xc199,0xc1d9,0xc219,0xc259,0xc299,0xc2d9,0xc319,
0xc359,0xc399,0,0xc3d9,0,0,0,0,0,0xc419,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0,0,0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x60,0x60,
0,4,4,4,4,4,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,4, 0,0,0,0,0,0x64,0x64,4,4,4,4,0,0,0,0,0,
4,4,4,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc45a,0xc4d9,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0,0x44,4,4,4,0,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,5,5,0x44,0x44,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,1,1,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,5,1,1,1,
1,1,1,1,1,0x8a,0xff89,0x8a,0xff89,0xc55a,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,4,4,4,0x8a,0xff89,0xc59a,1,0,0x8a,0xff89,0x8a,0xff89,
1,1,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc5da,0xc61a,
0xc65a,0xc69a,0xc6da,0,0xc71a,0xc75a,0xc79a,0xc7da,0x8a,0xff89,0x8a,0xff89,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,5,5,1,0,0,0,0,0,0,0,4,0,
0,0,0x64,0,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x64,4,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,0x64,
0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,
4,4,0,0x60,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x64,0,0,4,4,4,4,0,0,
4,0,0,0,0x60,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,4,4,4,4,4,0,0,4,4,0,
0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,
0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x44,0,0x44,0x44,
0x64,0,0,0x44,0x44,0,0,0,0,0,0x44,0x44,0,0x44,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,4,
4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,0xc819,1,1,1,1,1,1,1,4,
5,5,5,5,1,1,1,1,1,1,0,0,0,0,0,0,
0,0,0,0,0xc859,0xc8b9,0xc919,0xc979,0xc9d9,0xca39,0xca99,0xcaf9,0xcb59,0xcbb9,0xcc19,0xcc79,
0xccd9,0xcd39,0xcd99,0xcdf9,0xda59,0xdab9,0xdb19,0xdb79,0xdbd9,0xdc39,0xdc99,0xdcf9,0xdd59,0xddb9,0xde19,0xde79,
0xded9,0xdf39,0xdf99,0xdff9,0xe059,0xe0b9,0xe119,0xe179,0xe1d9,0xe239,0xe299,0xe2f9,0xe359,0xe3b9,0xe419,0xe479,
0xe4d9,0xe539,0xe599,0xe5f9,0xce59,0xceb9,0xcf19,0xcf79,0xcfd9,0xd039,0xd099,0xd0f9,0xd159,0xd1b9,0xd219,0xd279,
0xd2d9,0xd339,0xd399,0xd3f9,0xd459,0xd4b9,0xd519,0xd579,0xd5d9,0xd639,0xd699,0xd6f9,0xd759,0xd7b9,0xd819,0xd879,
0xd8d9,0xd939,0xd999,0xd9f9,0,0,0,0,0,4,0,0,4,0,0,0,
0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xe659,0xe759,0xe859,0xe959,0xeab9,0xec19,0xed59,0,0,0,0,0,
0,0,0,0,0,0,0,0xee99,0xef99,0xf099,0xf199,0xf299,0,0,0,0,
0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,0,0,0,4,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,
0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,0,0,4,0,0,
0,0,0,0,0,0,0,0,0,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0,0,0,4,0,4,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,
0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0,0,0,0,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0,0,0,0,
0,4,4,4,0,4,4,0,0,0,0,0,4,0x64,4,0x44,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x64,0x64,0,0,0,0,0x64,
0,0,0,0,0,0x44,0x64,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,4,4,4,4,0x64,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,0,0,0x64,0x64,0,0,4,0,0,
0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,4,0,4,4,4,4,4,4,0x64,
0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,
4,4,4,4,4,4,4,0,0x60,0,0,0,0,0,0,0, 4,4,4,4,4,4,4,0,0x60,0,0,0,0,0,0,0,
0,0,0x64,4,4,0,0,0,0,0,0,0,0,0,0,0, 0,0,0x64,4,4,0,0,0,0,0,0,0,0,0,0,0,
@ -663,79 +666,89 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a, 0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009, 0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009, 0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0,4,4,4,4,4,4,0,0,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,0,4,4,4,4,4,4,0,0x64, 0,0,0,4,0x64,4,4,4,4,0,0,4,4,4,4,0,
4,4,4,4,4,4,4,4,0,0,4,4,4,4,4,4, 0,0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,
4,0,4,4,0,4,4,0,0,0,0,0,0,0,0,0, 0,4,4,4,4,4,4,0,0,4,4,4,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0,0,0, 0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,4,
4,4,4,0,4,0x64,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,0,
4,4,4,4,4,4,0,0x64,4,4,4,4,4,4,4,4,
0,0,4,4,4,4,4,4,4,0,4,4,0,4,4,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0, 0,4,4,4,4,4,4,0,0,0,4,0,4,4,0,4,
0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4, 4,4,0x64,4,0x64,0x64,0,4,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,0x64,0,0,0,0,0,0,0x60,0x60,0x64, 0x64,0x64,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,
0x64,0x64,0,0,0,0x60,0x60,0x60,0x60,0x60,0x60,4,4,4,4,4,
4,4,4,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0x44,0x44,0x44,
0x44,0x44,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
1,1,0x21,0x21,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,1,1,1,1,1,1,1,0,0x21,0x21,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,
1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,2,0,2,2,0,0,2,0,
0,2,2,0,0,2,2,2,2,0,2,2,2,2,2,2,
2,2,1,1,1,1,0,1,0,1,0x21,0x21,1,1,1,1,
0,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
2,2,0,2,2,2,2,0,0,2,2,2,2,2,2,2,
2,0,2,2,2,2,2,2,2,0,1,1,1,1,1,1,
1,1,0x21,0x21,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,2,2,0,2,2,2,2,0,2,2,2,2,
2,0,2,0,0,0,2,2,2,2,2,2,2,0,1,1,
1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,0,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,0,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,0,1,1,1,1,1,1,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,0,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,0,1,1,1,1,1,1,2,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,0,0,0,0,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,
0,4,0,0,0,0,0,0,0,0,0,0,4,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,4,0,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0, 0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0x44,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,0x44,0,0x44,0x44, 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,
0,0,0,0,0,0,0,0,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,2,2,2,2,2,2,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,4,0x64,0,
0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2, 0,0,0,0,0,0x60,0x60,0x64,0x64,0x64,0,0,0,0x60,0x60,0x60,
2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0, 0x60,0x60,0x60,4,4,4,4,4,4,4,4,0x64,0x64,0x64,0x64,0x64,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 0x64,0x64,0x64,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
1,0,0x21,0x21,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,1,1,1,1,1,1,1,1,0x21,0x21,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,0,2,2,0,0,2,0,0,2,2,0,0,2,2,2,
2,0,2,2,2,2,2,2,2,2,1,1,1,1,0,1,
0,1,0x21,0x21,1,1,1,1,0,1,1,1,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,1,1,1,1,2,2,0,2,2,2,2,0,
0,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,
2,0,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,0,2,
2,2,2,0,2,2,2,2,2,0,2,0,0,0,2,2,
2,2,2,2,2,0,1,1,1,1,1,1,1,1,0x21,0x21,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,0,0,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,
1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,0,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,0,1,1,1,1,
1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,
0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0x44,0x44,0x44,0x44,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,
2,2,2,2,2,2,0,0,0,0,0,0,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0
}; };
static const uint16_t ucase_props_exceptions[1948]={ static const uint16_t ucase_props_exceptions[1948]={
@ -900,13 +913,13 @@ static const UCaseProps ucase_props_singleton={
ucase_props_trieIndex+3228, ucase_props_trieIndex+3228,
NULL, NULL,
3228, 3228,
8292, 8492,
0x188, 0x188,
0xd18, 0xd18,
0x0, 0x0,
0x0, 0x0,
0xe0800, 0xe0800,
0x2cfc, 0x2dc4,
NULL, 0, FALSE, FALSE, 0, NULL NULL, 0, FALSE, FALSE, 0, NULL
}, },
{ 3,0,0,0 } { 3,0,0,0 }

View File

@ -20,8 +20,11 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/brkiter.h" #include "unicode/brkiter.h"
#include "unicode/bytestream.h"
#include "unicode/casemap.h" #include "unicode/casemap.h"
#include "unicode/edits.h" #include "unicode/edits.h"
#include "unicode/stringoptions.h"
#include "unicode/stringpiece.h"
#include "unicode/ubrk.h" #include "unicode/ubrk.h"
#include "unicode/uloc.h" #include "unicode/uloc.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
@ -32,6 +35,7 @@
#include "unicode/utf.h" #include "unicode/utf.h"
#include "unicode/utf8.h" #include "unicode/utf8.h"
#include "unicode/utf16.h" #include "unicode/utf16.h"
#include "bytesinkutil.h"
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"
#include "uassert.h" #include "uassert.h"
@ -39,27 +43,6 @@
#include "ucasemap_imp.h" #include "ucasemap_imp.h"
#include "ustr_imp.h" #include "ustr_imp.h"
U_NAMESPACE_BEGIN
namespace {
// TODO: share with UTF-16? inline in ucasemap_imp.h?
int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
Edits *edits, UErrorCode &errorCode) {
if (U_SUCCESS(errorCode)) {
if (destIndex > destCapacity) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
} else if (edits != NULL) {
edits->copyErrorTo(errorCode);
}
}
return destIndex;
}
} // namespace
U_NAMESPACE_END
U_NAMESPACE_USE U_NAMESPACE_USE
/* UCaseMap service object -------------------------------------------------- */ /* UCaseMap service object -------------------------------------------------- */
@ -150,148 +133,39 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
/* TODO(markus): Move to a new, separate utf8case.cpp file. */ /* TODO(markus): Move to a new, separate utf8case.cpp file. */
namespace {
/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
static inline int32_t inline UBool
appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, appendResult(int32_t cpLength, int32_t result, const UChar *s,
int32_t result, const UChar *s, ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
int32_t cpLength, uint32_t options, icu::Edits *edits) { U_ASSERT(U_SUCCESS(errorCode));
UChar32 c;
int32_t length;
UErrorCode errorCode;
/* decode the result */ /* decode the result */
if(result<0) { if(result<0) {
/* (not) original code point */ /* (not) original code point */
if(edits!=NULL) { if(edits!=NULL) {
edits->addUnchanged(cpLength); edits->addUnchanged(cpLength);
if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
return destIndex;
} }
if((options & U_OMIT_UNCHANGED_TEXT) == 0) {
ByteSinkUtil::appendCodePoint(cpLength, ~result, sink);
} }
c=~result;
if(destIndex<destCapacity && c<=0x7f) { // ASCII slightly-fastpath
dest[destIndex++]=(uint8_t)c;
return destIndex;
}
length=cpLength;
} else { } else {
if(result<=UCASE_MAX_STRING_LENGTH) { if(result<=UCASE_MAX_STRING_LENGTH) {
// string: "result" is the UTF-16 length // string: "result" is the UTF-16 length
errorCode=U_ZERO_ERROR; return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode);
if(destIndex<destCapacity) {
u_strToUTF8((char *)(dest+destIndex), destCapacity-destIndex, &length,
s, result, &errorCode);
} else { } else {
u_strToUTF8(NULL, 0, &length, s, result, &errorCode); ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits);
}
if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
return -1;
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if(edits!=NULL) {
edits->addReplace(cpLength, length);
}
// We might have an overflow, but we know the actual length.
return destIndex+length;
} else if(destIndex<destCapacity && result<=0x7f) { // ASCII slightly-fastpath
dest[destIndex++]=(uint8_t)result;
if(edits!=NULL) {
edits->addReplace(cpLength, 1);
}
return destIndex;
} else {
c=result;
length=U8_LENGTH(c);
if(edits!=NULL) {
edits->addReplace(cpLength, length);
} }
} }
} return TRUE;
// c>=0 single code point
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if(destIndex<destCapacity) {
/* append the result */
UBool isError=FALSE;
U8_APPEND(dest, destIndex, destCapacity, c, isError);
if(isError) {
/* overflow, nothing written */
destIndex+=length;
}
} else {
/* preflight */
destIndex+=length;
}
return destIndex;
}
static inline int32_t
appendASCII(uint8_t *dest, int32_t destIndex, int32_t destCapacity, uint8_t c) {
if(destIndex<destCapacity) {
dest[destIndex]=c;
} else if(destIndex==INT32_MAX) {
return -1; // integer overflow
}
return destIndex+1;
} }
// See unicode/utf8.h U8_APPEND_UNSAFE(). // See unicode/utf8.h U8_APPEND_UNSAFE().
static inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
static inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
static inline int32_t } // namespace
appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar32 c) {
U_ASSERT(0x370 <= c && c <= 0x3ff); // 2-byte UTF-8, main Greek block
if(2>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
int32_t limit=destIndex+2;
if(limit<=destCapacity) {
dest+=destIndex;
dest[0]=getTwoByteLead(c);
dest[1]=getTwoByteTrail(c);
}
return limit;
}
static inline int32_t
appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, const char *s) {
if(2>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
int32_t limit=destIndex+2;
if(limit<=destCapacity) {
dest+=destIndex;
dest[0]=(uint8_t)s[0];
dest[1]=(uint8_t)s[1];
}
return limit;
}
static inline int32_t
appendUnchanged(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
const uint8_t *s, int32_t length, uint32_t options, icu::Edits *edits) {
if(length>0) {
if(edits!=NULL) {
edits->addUnchanged(length);
if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if((destIndex+length)<=destCapacity) {
uprv_memcpy(dest+destIndex, s, length);
}
destIndex+=length;
}
return destIndex;
}
static UChar32 U_CALLCONV static UChar32 U_CALLCONV
utf8_caseContextIterator(void *context, int8_t dir) { utf8_caseContextIterator(void *context, int8_t dir) {
@ -329,17 +203,15 @@ utf8_caseContextIterator(void *context, int8_t dir) {
* Case-maps [srcStart..srcLimit[ but takes * Case-maps [srcStart..srcLimit[ but takes
* context [0..srcLength[ into account. * context [0..srcLength[ into account.
*/ */
static int32_t static void
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, UCaseContext *csc, const uint8_t *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit, int32_t srcStart, int32_t srcLimit,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
/* case mapping loop */ /* case mapping loop */
int32_t srcIndex=srcStart; int32_t srcIndex=srcStart;
int32_t destIndex=0; while (U_SUCCESS(errorCode) && srcIndex<srcLimit) {
while(srcIndex<srcLimit) {
int32_t cpStart; int32_t cpStart;
csc->cpStart=cpStart=srcIndex; csc->cpStart=cpStart=srcIndex;
UChar32 c; UChar32 c;
@ -347,45 +219,32 @@ _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
csc->cpLimit=srcIndex; csc->cpLimit=srcIndex;
if(c<0) { if(c<0) {
// Malformed UTF-8. // Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity, ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
src+cpStart, srcIndex-cpStart, options, edits); sink, options, edits, errorCode);
if(destIndex<0) { } else {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
continue;
}
const UChar *s; const UChar *s;
c=map(c, utf8_caseContextIterator, csc, &s, caseLocale); c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
destIndex = appendResult(dest, destIndex, destCapacity, c, s, appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
srcIndex - cpStart, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} }
return destIndex;
} }
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t U_CALLCONV U_CFUNC void U_CALLCONV
ucasemap_internalUTF8ToTitle( ucasemap_internalUTF8ToTitle(
int32_t caseLocale, uint32_t options, BreakIterator *iter, int32_t caseLocale, uint32_t options, BreakIterator *iter,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
return 0; return;
} }
/* set up local variables */ /* set up local variables */
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
int32_t destIndex=0;
int32_t prev=0; int32_t prev=0;
UBool isFirstIndex=TRUE; UBool isFirstIndex=TRUE;
@ -404,45 +263,36 @@ ucasemap_internalUTF8ToTitle(
} }
/* /*
* Unicode 4 & 5 section 3.13 Default Case Operations: * Segment [prev..index[ into 3 parts:
* * a) skipped characters (copy as-is) [prev..titleStart[
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex * b) first letter (titlecase) [titleStart..titleLimit[
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
* cased character F. If F exists, map F to default_title(F); then map each
* subsequent character C to default_lower(C).
*
* In this implementation, segment [prev..index[ into 3 parts:
* a) uncased characters (copy as-is) [prev..titleStart[
* b) first case letter (titlecase) [titleStart..titleLimit[
* c) subsequent characters (lowercase) [titleLimit..index[ * c) subsequent characters (lowercase) [titleLimit..index[
*/ */
if(prev<index) { if(prev<index) {
/* find and copy uncased characters [prev..titleStart[ */ /* find and copy skipped characters [prev..titleStart[ */
int32_t titleStart=prev; int32_t titleStart=prev;
int32_t titleLimit=prev; int32_t titleLimit=prev;
UChar32 c; UChar32 c;
U8_NEXT(src, titleLimit, index, c); U8_NEXT(src, titleLimit, index, c);
if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) { if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
/* Adjust the titlecasing index (titleStart) to the next cased character. */ // Adjust the titlecasing index to the next cased character,
for(;;) { // or to the next letter/number/symbol/private use.
// Stop with titleStart<titleLimit<=index
// if there is a character to be titlecased,
// or else stop with titleStart==titleLimit==index.
UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
titleStart=titleLimit; titleStart=titleLimit;
if(titleLimit==index) { if(titleLimit==index) {
/*
* only uncased characters in [prev..index[
* stop with titleStart==titleLimit==index
*/
break; break;
} }
U8_NEXT(src, titleLimit, index, c); U8_NEXT(src, titleLimit, index, c);
if(UCASE_NONE!=ucase_getType(c)) {
break; /* cased letter at [titleStart..titleLimit[ */
} }
if (prev < titleStart) {
if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev,
sink, options, edits, errorCode)) {
return;
} }
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+prev, titleStart-prev, options, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} }
@ -453,16 +303,15 @@ ucasemap_internalUTF8ToTitle(
csc.cpLimit=titleLimit; csc.cpLimit=titleLimit;
const UChar *s; const UChar *s;
c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale); c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
destIndex=appendResult(dest, destIndex, destCapacity, c, s, if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) {
titleLimit-titleStart, options, edits); return;
}
} else { } else {
// Malformed UTF-8. // Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity, if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart,
src+titleStart, titleLimit-titleStart, options, edits); sink, options, edits, errorCode)) {
return;
} }
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
/* Special case Dutch IJ titlecasing */ /* Special case Dutch IJ titlecasing */
@ -470,22 +319,13 @@ ucasemap_internalUTF8ToTitle(
caseLocale == UCASE_LOC_DUTCH && caseLocale == UCASE_LOC_DUTCH &&
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) { (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
if (src[titleStart+1] == 0x006A) { if (src[titleStart+1] == 0x006A) {
destIndex=appendASCII(dest, destIndex, destCapacity, 0x004A); ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if(edits!=NULL) {
edits->addReplace(1, 1);
}
titleLimit++; titleLimit++;
} else if (src[titleStart+1] == 0x004A) { } else if (src[titleStart+1] == 0x004A) {
// Keep the capital J from getting lowercased. // Keep the capital J from getting lowercased.
destIndex=appendUnchanged(dest, destIndex, destCapacity, if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
src+titleStart+1, 1, options, edits); sink, options, edits, errorCode)) {
if(destIndex<0) { return;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
titleLimit++; titleLimit++;
} }
@ -495,26 +335,18 @@ ucasemap_internalUTF8ToTitle(
if(titleLimit<index) { if(titleLimit<index) {
if((options&U_TITLECASE_NO_LOWERCASE)==0) { if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */ /* Normal operation: Lowercase the rest of the word. */
destIndex+= _caseMap(caseLocale, options, ucase_toFullLower,
_caseMap(
caseLocale, options, ucase_toFullLower,
dest+destIndex, destCapacity-destIndex,
src, &csc, src, &csc,
titleLimit, index, titleLimit, index,
edits, errorCode); sink, edits, errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
}
if(U_FAILURE(errorCode)) { if(U_FAILURE(errorCode)) {
return destIndex; return;
} }
} else { } else {
/* Optionally just copy the rest of the word unchanged. */ /* Optionally just copy the rest of the word unchanged. */
destIndex=appendUnchanged(dest, destIndex, destCapacity, if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit,
src+titleLimit, index-titleLimit, options, edits); sink, options, edits, errorCode)) {
if(destIndex<0) { return;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} }
} }
@ -523,8 +355,6 @@ ucasemap_internalUTF8ToTitle(
prev=index; prev=index;
} }
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
} }
#endif #endif
@ -549,12 +379,10 @@ UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
} }
// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java. // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
int32_t toUpper(uint32_t options, void toUpper(uint32_t options,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
Edits *edits, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
int32_t destIndex=0;
uint32_t state = 0; uint32_t state = 0;
for (int32_t i = 0; i < srcLength;) { for (int32_t i = 0; i < srcLength;) {
int32_t nextIndex = i; int32_t nextIndex = i;
@ -630,8 +458,10 @@ int32_t toUpper(uint32_t options,
} }
} }
UBool change = TRUE; UBool change;
if (edits != NULL) { if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
change = TRUE; // common, simple usage
} else {
// Find out first whether we are changing the text. // Find out first whether we are changing the text.
U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block
change = (i + 2) > nextIndex || change = (i + 2) > nextIndex ||
@ -662,148 +492,146 @@ int32_t toUpper(uint32_t options,
edits->addUnchanged(oldLength); edits->addUnchanged(oldLength);
} }
// Write unchanged text? // Write unchanged text?
change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0; change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
} }
} }
if (change) { if (change) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, upper); ByteSinkUtil::appendTwoBytes(upper, sink);
if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { if ((data & HAS_EITHER_DIALYTIKA) != 0) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0308"); // restore or add a dialytika sink.Append(u8"\u0308", 2); // restore or add a dialytika
} }
if (destIndex >= 0 && addTonos) { if (addTonos) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0301"); sink.Append(u8"\u0301", 2);
} }
while (destIndex >= 0 && numYpogegrammeni > 0) { while (numYpogegrammeni > 0) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0399"); sink.Append(u8"\u0399", 2);
--numYpogegrammeni; --numYpogegrammeni;
} }
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
} }
} else if(c>=0) { } else if(c>=0) {
const UChar *s; const UChar *s;
c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK); c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
destIndex = appendResult(dest, destIndex, destCapacity, c, s, if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) {
nextIndex - i, options, edits); return;
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} else { } else {
// Malformed UTF-8. // Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity, if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i,
src+i, nextIndex-i, options, edits); sink, options, edits, errorCode)) {
if(destIndex<0) { return;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} }
i = nextIndex; i = nextIndex;
state = nextState; state = nextState;
} }
return destIndex;
} }
} // namespace GreekUpper } // namespace GreekUpper
U_NAMESPACE_END U_NAMESPACE_END
static int32_t U_CALLCONV static void U_CALLCONV
ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
int32_t destIndex = _caseMap( _caseMap(
caseLocale, options, ucase_toFullLower, caseLocale, options, ucase_toFullLower,
dest, destCapacity,
src, &csc, 0, srcLength, src, &csc, 0, srcLength,
edits, errorCode); sink, edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
} }
static int32_t U_CALLCONV static void U_CALLCONV
ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
int32_t destIndex;
if (caseLocale == UCASE_LOC_GREEK) { if (caseLocale == UCASE_LOC_GREEK) {
destIndex = GreekUpper::toUpper(options, dest, destCapacity, GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode);
src, srcLength, edits, errorCode);
} else { } else {
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
destIndex = _caseMap( _caseMap(
caseLocale, options, ucase_toFullUpper, caseLocale, options, ucase_toFullUpper,
dest, destCapacity,
src, &csc, 0, srcLength, src, &csc, 0, srcLength,
edits, errorCode); sink, edits, errorCode);
} }
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
} }
static int32_t U_CALLCONV static void U_CALLCONV
ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
/* case mapping loop */ /* case mapping loop */
int32_t srcIndex = 0; int32_t srcIndex = 0;
int32_t destIndex = 0; while (U_SUCCESS(errorCode) && srcIndex < srcLength) {
while (srcIndex < srcLength) {
int32_t cpStart = srcIndex; int32_t cpStart = srcIndex;
UChar32 c; UChar32 c;
U8_NEXT(src, srcIndex, srcLength, c); U8_NEXT(src, srcIndex, srcLength, c);
if(c<0) { if(c<0) {
// Malformed UTF-8. // Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity, ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
src+cpStart, srcIndex-cpStart, options, edits); sink, options, edits, errorCode);
if(destIndex<0) { } else {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
continue;
}
const UChar *s; const UChar *s;
c = ucase_toFullFolding(c, &s, options); c = ucase_toFullFolding(c, &s, options);
destIndex = appendResult(dest, destIndex, destCapacity, c, s, appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
srcIndex - cpStart, options, edits); }
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} }
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); void
}
U_CFUNC int32_t
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
uint8_t *dest, int32_t destCapacity, const char *src, int32_t srcLength,
const uint8_t *src, int32_t srcLength, UTF8CaseMapper *stringCaseMapper,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
/* check argument values */
if (U_FAILURE(errorCode)) {
return;
}
if ((src == nullptr && srcLength != 0) || srcLength < -1) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Get the string length.
if (srcLength == -1) {
srcLength = (int32_t)uprv_strlen((const char *)src);
}
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
(const uint8_t *)src, srcLength, sink, edits, errorCode);
sink.Flush();
if (U_SUCCESS(errorCode)) {
if (edits != nullptr) {
edits->copyErrorTo(errorCode);
}
}
}
int32_t
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper, UTF8CaseMapper *stringCaseMapper,
icu::Edits *edits, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
int32_t destLength;
/* check argument values */ /* check argument values */
if(U_FAILURE(errorCode)) { if(U_FAILURE(errorCode)) {
return 0; return 0;
} }
if( destCapacity<0 || if( destCapacity<0 ||
(dest==NULL && destCapacity>0) || (dest==NULL && destCapacity>0) ||
src==NULL || (src==NULL && srcLength!=0) || srcLength<-1
srcLength<-1
) { ) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR; errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0; return 0;
@ -823,12 +651,21 @@ ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_P
return 0; return 0;
} }
if(edits!=NULL) { CheckedArrayByteSink sink(dest, destCapacity);
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset(); edits->reset();
} }
destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
dest, destCapacity, src, srcLength, edits, errorCode); (const uint8_t *)src, srcLength, sink, edits, errorCode);
return u_terminateChars((char *)dest, destCapacity, destLength, &errorCode); sink.Flush();
if (U_SUCCESS(errorCode)) {
if (sink.Overflowed()) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
} else if (edits != nullptr) {
edits->copyErrorTo(errorCode);
}
}
return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode);
} }
/* public API functions */ /* public API functions */
@ -840,8 +677,8 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToLower, NULL, *pErrorCode); ucasemap_internalUTF8ToLower, NULL, *pErrorCode);
} }
@ -852,8 +689,8 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToUpper, NULL, *pErrorCode); ucasemap_internalUTF8ToUpper, NULL, *pErrorCode);
} }
@ -864,13 +701,43 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8Fold, NULL, *pErrorCode); ucasemap_internalUTF8Fold, NULL, *pErrorCode);
} }
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
void CaseMap::utf8ToLower(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
src.data(), src.length(),
ucasemap_internalUTF8ToLower, sink, edits, errorCode);
}
void CaseMap::utf8ToUpper(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
src.data(), src.length(),
ucasemap_internalUTF8ToUpper, sink, edits, errorCode);
}
void CaseMap::utf8Fold(
uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
ucasemap_mapUTF8(
UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
src.data(), src.length(),
ucasemap_internalUTF8Fold, sink, edits, errorCode);
}
int32_t CaseMap::utf8ToLower( int32_t CaseMap::utf8ToLower(
const char *locale, uint32_t options, const char *locale, uint32_t options,
const char *src, int32_t srcLength, const char *src, int32_t srcLength,
@ -878,8 +745,8 @@ int32_t CaseMap::utf8ToLower(
UErrorCode &errorCode) { UErrorCode &errorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToLower, edits, errorCode); ucasemap_internalUTF8ToLower, edits, errorCode);
} }
@ -890,8 +757,8 @@ int32_t CaseMap::utf8ToUpper(
UErrorCode &errorCode) { UErrorCode &errorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToUpper, edits, errorCode); ucasemap_internalUTF8ToUpper, edits, errorCode);
} }
@ -902,8 +769,8 @@ int32_t CaseMap::utf8Fold(
UErrorCode &errorCode) { UErrorCode &errorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8Fold, edits, errorCode); ucasemap_internalUTF8Fold, edits, errorCode);
} }

View File

@ -9,16 +9,26 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/ucasemap.h" #include "unicode/ucasemap.h"
#include "unicode/uchar.h"
#include "ucase.h" #include "ucase.h"
#ifndef U_COMPARE_IGNORE_CASE
/* see also unorm.h */
/** /**
* Option bit for unorm_compare: * Bit mask for the titlecasing iterator options bit field.
* Perform case-insensitive comparison. * Currently only 3 out of 8 values are used:
* 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* See stringoptions.h.
* @internal
*/ */
#define U_COMPARE_IGNORE_CASE 0x10000 #define U_TITLECASE_ITERATOR_MASK 0xe0
#endif
/**
* Bit mask for the titlecasing index adjustment options bit set.
* Currently two bits are defined:
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED.
* See stringoptions.h.
* @internal
*/
#define U_TITLECASE_ADJUSTMENT_MASK 0x600
/** /**
* Internal API, used by u_strcasecmp() etc. * Internal API, used by u_strcasecmp() etc.
@ -32,7 +42,7 @@ u_strcmpFold(const UChar *s1, int32_t length1,
UErrorCode *pErrorCode); UErrorCode *pErrorCode);
/** /**
* Interanl API, used for detecting length of * Internal API, used for detecting length of
* shared prefix case-insensitively. * shared prefix case-insensitively.
* @param s1 input string 1 * @param s1 input string 1
* @param length1 length of string 1, or -1 (NULL terminated) * @param length1 length of string 1, or -1 (NULL terminated)
@ -61,6 +71,44 @@ uprv_haveProperties(UErrorCode *pErrorCode);
#ifdef __cplusplus #ifdef __cplusplus
U_NAMESPACE_BEGIN
class BreakIterator; // unicode/brkiter.h
class ByteSink;
class Locale; // unicode/locid.h
/** Returns TRUE if the options are valid. Otherwise FALSE, and sets an error. */
inline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) {
// Both options together.
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
return TRUE;
}
inline UBool ustrcase_isLNS(UChar32 c) {
// Letter, number, symbol,
// or a private use code point because those are typically used as letters or numbers.
// Consider modifier letters only if they are cased.
const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK;
int gc = u_charType(c);
return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE);
}
#if !UCONFIG_NO_BREAK_ITERATION
/** Returns nullptr if error. Pass in either locale or locID, not both. */
U_CFUNC
BreakIterator *ustrcase_getTitleBreakIterator(
const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode);
#endif
U_NAMESPACE_END
#include "unicode/unistr.h" // for UStringCaseMapper #include "unicode/unistr.h" // for UStringCaseMapper
/* /*
@ -163,39 +211,43 @@ ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITE
* UTF-8 version of UStringCaseMapper. * UTF-8 version of UStringCaseMapper.
* All error checking must be done. * All error checking must be done.
* The UCaseMap must be fully initialized, with locale and/or iter set as needed. * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
* src and dest must not overlap.
*/ */
typedef int32_t U_CALLCONV typedef void U_CALLCONV
UTF8CaseMapper(int32_t caseLocale, uint32_t options, UTF8CaseMapper(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter, icu::BreakIterator *iter,
#endif #endif
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode); UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
/** Implements UTF8CaseMapper. */ /** Implements UTF8CaseMapper. */
U_CFUNC int32_t U_CALLCONV U_CFUNC void U_CALLCONV
ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options, ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
icu::BreakIterator *iter, icu::BreakIterator *iter,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode); UErrorCode &errorCode);
#endif #endif
void
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
const char *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode);
/** /**
* Implements argument checking and buffer handling * Implements argument checking and buffer handling
* for UTF-8 string case mapping as a common function. * for UTF-8 string case mapping as a common function.
*/ */
U_CFUNC int32_t int32_t
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
uint8_t *dest, int32_t destCapacity, char *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const char *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper, UTF8CaseMapper *stringCaseMapper,
icu::Edits *edits, icu::Edits *edits,
UErrorCode &errorCode); UErrorCode &errorCode);

View File

@ -31,6 +31,29 @@
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
void CaseMap::utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
UText utext = UTEXT_INITIALIZER;
utext_openUTF8(&utext, src.data(), src.length(), &errorCode);
LocalPointer<BreakIterator> ownedIter;
iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
if (iter == nullptr) {
utext_close(&utext);
return;
}
iter->setText(&utext, errorCode);
ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, iter,
src.data(), src.length(),
ucasemap_internalUTF8ToTitle, sink, edits, errorCode);
utext_close(&utext);
}
int32_t CaseMap::utf8ToTitle( int32_t CaseMap::utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter, const char *locale, uint32_t options, BreakIterator *iter,
const char *src, int32_t srcLength, const char *src, int32_t srcLength,
@ -42,19 +65,16 @@ int32_t CaseMap::utf8ToTitle(
UText utext=UTEXT_INITIALIZER; UText utext=UTEXT_INITIALIZER;
utext_openUTF8(&utext, src, srcLength, &errorCode); utext_openUTF8(&utext, src, srcLength, &errorCode);
LocalPointer<BreakIterator> ownedIter; LocalPointer<BreakIterator> ownedIter;
iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
if(iter==NULL) { if(iter==NULL) {
iter=BreakIterator::createWordInstance(Locale(locale), errorCode);
ownedIter.adoptInstead(iter);
}
if(U_FAILURE(errorCode)) {
utext_close(&utext); utext_close(&utext);
return 0; return 0;
} }
iter->setText(&utext, errorCode); iter->setText(&utext, errorCode);
int32_t length=ucasemap_mapUTF8( int32_t length=ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, iter, ustrcase_getCaseLocale(locale), options, iter,
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToTitle, edits, errorCode); ucasemap_internalUTF8ToTitle, edits, errorCode);
utext_close(&utext); utext_close(&utext);
return length; return length;
@ -88,17 +108,24 @@ ucasemap_utf8ToTitle(UCaseMap *csm,
} }
UText utext=UTEXT_INITIALIZER; UText utext=UTEXT_INITIALIZER;
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
if(csm->iter==NULL) {
csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode);
}
if (U_FAILURE(*pErrorCode)) { if (U_FAILURE(*pErrorCode)) {
return 0; return 0;
} }
if(csm->iter==NULL) {
LocalPointer<BreakIterator> ownedIter;
BreakIterator *iter = ustrcase_getTitleBreakIterator(
nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode);
if (iter == nullptr) {
utext_close(&utext);
return 0;
}
csm->iter = ownedIter.orphan();
}
csm->iter->setText(&utext, *pErrorCode); csm->iter->setText(&utext, *pErrorCode);
int32_t length=ucasemap_mapUTF8( int32_t length=ucasemap_mapUTF8(
csm->caseLocale, csm->options, csm->iter, csm->caseLocale, csm->options, csm->iter,
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToTitle, NULL, *pErrorCode); ucasemap_internalUTF8ToTitle, NULL, *pErrorCode);
utext_close(&utext); utext_close(&utext);
return length; return length;

View File

@ -729,8 +729,5 @@ upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
} }
/* add the start code point of each same-value range of the properties vectors trie */ /* add the start code point of each same-value range of the properties vectors trie */
if(propsVectorsColumns>0) {
/* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */
utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
} }
}

File diff suppressed because it is too large Load Diff

View File

@ -287,7 +287,7 @@ UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UCha
UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode) UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(units) { : LinearMatchNode(len, nextNode), s(units) {
hash=hash*37+ustr_hashUCharsN(units, len); hash=hash*37u+ustr_hashUCharsN(units, len);
} }
UBool UBool

View File

@ -35,7 +35,7 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_START = -1, UCLN_COMMON_START = -1,
UCLN_COMMON_USPREP, UCLN_COMMON_USPREP,
UCLN_COMMON_BREAKITERATOR, UCLN_COMMON_BREAKITERATOR,
UCLN_COMMON_BREAKITERATOR_DICT, UCLN_COMMON_RBBI,
UCLN_COMMON_SERVICE, UCLN_COMMON_SERVICE,
UCLN_COMMON_LOCALE_KEY_TYPE, UCLN_COMMON_LOCALE_KEY_TYPE,
UCLN_COMMON_LOCALE, UCLN_COMMON_LOCALE,

View File

@ -315,6 +315,7 @@ _CompoundTextClose(UConverter *converter) {
} }
uprv_free(converter->extraInfo); uprv_free(converter->extraInfo);
converter->extraInfo = NULL;
} }
} }
@ -519,7 +520,7 @@ UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args,
currentState = tmpState; currentState = tmpState;
} }
sourceOffset = uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength; sourceOffset = static_cast<int32_t>(uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength);
mySource += sourceOffset; mySource += sourceOffset;

View File

@ -966,26 +966,26 @@ _LMBCSFromUnicode(UConverterFromUnicodeArgs* args,
if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START)
{ {
bytes_written = LMBCSConversionWorker (extraInfo, bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
ULMBCS_GRP_L1, pLMBCS, &uniChar, ULMBCS_GRP_L1, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried); &lastConverterIndex, groups_tried);
if(!bytes_written) if(!bytes_written)
{ {
bytes_written = LMBCSConversionWorker (extraInfo, bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried); &lastConverterIndex, groups_tried);
} }
if(!bytes_written) if(!bytes_written)
{ {
bytes_written = LMBCSConversionWorker (extraInfo, bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
extraInfo->localeConverterIndex, pLMBCS, &uniChar, extraInfo->localeConverterIndex, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried); &lastConverterIndex, groups_tried);
} }
} }
else else
{ {
bytes_written = LMBCSConversionWorker (extraInfo, bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
extraInfo->localeConverterIndex, pLMBCS, &uniChar, extraInfo->localeConverterIndex, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried); &lastConverterIndex, groups_tried);
} }

View File

@ -1323,9 +1323,17 @@ _UTF16GetName(const UConverter *cnv) {
U_CDECL_END U_CDECL_END
extern const UConverterSharedData _UTF16Data; extern const UConverterSharedData _UTF16Data;
#define IS_UTF16BE(cnv) ((cnv)->sharedData==&_UTF16BEData) static inline bool IS_UTF16BE(const UConverter *cnv) {
#define IS_UTF16LE(cnv) ((cnv)->sharedData==&_UTF16LEData) return ((cnv)->sharedData == &_UTF16BEData);
#define IS_UTF16(cnv) ((cnv)->sharedData==&_UTF16Data || (cnv)->sharedData==&_UTF16v2Data) }
static inline bool IS_UTF16LE(const UConverter *cnv) {
return ((cnv)->sharedData == &_UTF16LEData);
}
static inline bool IS_UTF16(const UConverter *cnv) {
return ((cnv)->sharedData==&_UTF16Data) || ((cnv)->sharedData == &_UTF16v2Data);
}
U_CDECL_BEGIN U_CDECL_BEGIN
static void U_CALLCONV static void U_CALLCONV

View File

@ -31,6 +31,7 @@
#include "ucnv_bld.h" #include "ucnv_bld.h"
#include "ucnv_cnv.h" #include "ucnv_cnv.h"
#include "cmemory.h" #include "cmemory.h"
#include "ustr_imp.h"
/* Prototypes --------------------------------------------------------------- */ /* Prototypes --------------------------------------------------------------- */
@ -44,51 +45,13 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args
/* UTF-8 -------------------------------------------------------------------- */ /* UTF-8 -------------------------------------------------------------------- */
/* UTF-8 Conversion DATA
* for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9
*/
/*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/
#define MAXIMUM_UCS2 0x0000FFFF #define MAXIMUM_UCS2 0x0000FFFF
#define MAXIMUM_UTF 0x0010FFFF
#define MAXIMUM_UCS4 0x7FFFFFFF
#define HALF_SHIFT 10
#define HALF_BASE 0x0010000
#define HALF_MASK 0x3FF
#define SURROGATE_HIGH_START 0xD800
#define SURROGATE_HIGH_END 0xDBFF
#define SURROGATE_LOW_START 0xDC00
#define SURROGATE_LOW_END 0xDFFF
/* -SURROGATE_LOW_START + HALF_BASE */ static const uint32_t offsetsFromUTF8[5] = {0,
#define SURROGATE_LOW_BASE 9216
static const uint32_t offsetsFromUTF8[7] = {0,
(uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
(uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080 (uint32_t) 0x03C82080
}; };
/* END OF UTF-8 Conversion DATA */
static const int8_t bytesFromUTF8[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
};
/*
* Starting with Unicode 3.0.1:
* UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N];
* byte sequences with more than 4 bytes are illegal in UTF-8,
* which is tested with impossible values for them
*/
static const uint32_t
utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
static UBool hasCESU8Data(const UConverter *cnv) static UBool hasCESU8Data(const UConverter *cnv)
{ {
#if UCONFIG_ONLY_HTML_CONVERSION #if UCONFIG_ONLY_HTML_CONVERSION
@ -127,7 +90,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
while (mySource < sourceLimit && myTarget < targetLimit) while (mySource < sourceLimit && myTarget < targetLimit)
{ {
ch = *(mySource++); ch = *(mySource++);
if (ch < 0x80) /* Simple case */ if (U8_IS_SINGLE(ch)) /* Simple case */
{ {
*(myTarget++) = (UChar) ch; *(myTarget++) = (UChar) ch;
} }
@ -135,7 +98,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
{ {
/* store the first char */ /* store the first char */
toUBytes[0] = (char)ch; toUBytes[0] = (char)ch;
inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */ inBytes = U8_COUNT_BYTES_NON_ASCII(ch); /* lookup current sequence length */
i = 1; i = 1;
morebytes: morebytes:
@ -144,7 +107,8 @@ morebytes:
if (mySource < sourceLimit) if (mySource < sourceLimit)
{ {
toUBytes[i] = (char) (ch2 = *mySource); toUBytes[i] = (char) (ch2 = *mySource);
if (!U8_IS_TRAIL(ch2)) if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
{ {
break; /* i < inBytes */ break; /* i < inBytes */
} }
@ -162,24 +126,12 @@ morebytes:
} }
} }
// In CESU-8, only surrogates, not supplementary code points, are encoded directly.
if (i == inBytes && (!isCESU8 || i <= 3))
{
/* Remove the accumulated high bits */ /* Remove the accumulated high bits */
ch -= offsetsFromUTF8[inBytes]; ch -= offsetsFromUTF8[inBytes];
/*
* Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
* - use only trail bytes after a lead byte (checked above)
* - use the right number of trail bytes for a given lead byte
* - encode a code point <= U+10ffff
* - use the fewest possible number of bytes for their code points
* - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
*
* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
* There are no irregular sequences any more.
* In CESU-8, only surrogates, not supplementary code points, are encoded directly.
*/
if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
(isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
{
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= MAXIMUM_UCS2) if (ch <= MAXIMUM_UCS2)
{ {
@ -189,9 +141,8 @@ morebytes:
else else
{ {
/* write out the surrogates */ /* write out the surrogates */
ch -= HALF_BASE; *(myTarget++) = U16_LEAD(ch);
*(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START); ch = U16_TRAIL(ch);
ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
if (myTarget < targetLimit) if (myTarget < targetLimit)
{ {
*(myTarget++) = (UChar)ch; *(myTarget++) = (UChar)ch;
@ -256,7 +207,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
while (mySource < sourceLimit && myTarget < targetLimit) while (mySource < sourceLimit && myTarget < targetLimit)
{ {
ch = *(mySource++); ch = *(mySource++);
if (ch < 0x80) /* Simple case */ if (U8_IS_SINGLE(ch)) /* Simple case */
{ {
*(myTarget++) = (UChar) ch; *(myTarget++) = (UChar) ch;
*(myOffsets++) = offsetNum++; *(myOffsets++) = offsetNum++;
@ -264,7 +215,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
else else
{ {
toUBytes[0] = (char)ch; toUBytes[0] = (char)ch;
inBytes = bytesFromUTF8[ch]; inBytes = U8_COUNT_BYTES_NON_ASCII(ch);
i = 1; i = 1;
morebytes: morebytes:
@ -273,7 +224,8 @@ morebytes:
if (mySource < sourceLimit) if (mySource < sourceLimit)
{ {
toUBytes[i] = (char) (ch2 = *mySource); toUBytes[i] = (char) (ch2 = *mySource);
if (!U8_IS_TRAIL(ch2)) if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
{ {
break; /* i < inBytes */ break; /* i < inBytes */
} }
@ -290,24 +242,12 @@ morebytes:
} }
} }
// In CESU-8, only surrogates, not supplementary code points, are encoded directly.
if (i == inBytes && (!isCESU8 || i <= 3))
{
/* Remove the accumulated high bits */ /* Remove the accumulated high bits */
ch -= offsetsFromUTF8[inBytes]; ch -= offsetsFromUTF8[inBytes];
/*
* Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
* - use only trail bytes after a lead byte (checked above)
* - use the right number of trail bytes for a given lead byte
* - encode a code point <= U+10ffff
* - use the fewest possible number of bytes for their code points
* - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
*
* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
* There are no irregular sequences any more.
* In CESU-8, only surrogates, not supplementary code points, are encoded directly.
*/
if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
(isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
{
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= MAXIMUM_UCS2) if (ch <= MAXIMUM_UCS2)
{ {
@ -318,10 +258,9 @@ morebytes:
else else
{ {
/* write out the surrogates */ /* write out the surrogates */
ch -= HALF_BASE; *(myTarget++) = U16_LEAD(ch);
*(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
*(myOffsets++) = offsetNum; *(myOffsets++) = offsetNum;
ch = (ch & HALF_MASK) + SURROGATE_LOW_START; ch = U16_TRAIL(ch);
if (myTarget < targetLimit) if (myTarget < targetLimit)
{ {
*(myTarget++) = (UChar)ch; *(myTarget++) = (UChar)ch;
@ -616,10 +555,9 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
UConverter *cnv; UConverter *cnv;
const uint8_t *sourceInitial; const uint8_t *sourceInitial;
const uint8_t *source; const uint8_t *source;
uint16_t extraBytesToWrite;
uint8_t myByte; uint8_t myByte;
UChar32 ch; UChar32 ch;
int8_t i, isLegalSequence; int8_t i;
/* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */ /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */
@ -633,14 +571,14 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
} }
myByte = (uint8_t)*(source++); myByte = (uint8_t)*(source++);
if (myByte < 0x80) if (U8_IS_SINGLE(myByte))
{ {
args->source = (const char *)source; args->source = (const char *)source;
return (UChar32)myByte; return (UChar32)myByte;
} }
extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte]; uint16_t countTrailBytes = U8_COUNT_TRAIL_BYTES(myByte);
if (extraBytesToWrite == 0) { if (countTrailBytes == 0) {
cnv->toUBytes[0] = myByte; cnv->toUBytes[0] = myByte;
cnv->toULength = 1; cnv->toULength = 1;
*err = U_ILLEGAL_CHAR_FOUND; *err = U_ILLEGAL_CHAR_FOUND;
@ -649,15 +587,17 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
} }
/*The byte sequence is longer than the buffer area passed*/ /*The byte sequence is longer than the buffer area passed*/
if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit) if (((const char *)source + countTrailBytes) > args->sourceLimit)
{ {
/* check if all of the remaining bytes are trail bytes */ /* check if all of the remaining bytes are trail bytes */
uint16_t extraBytesToWrite = countTrailBytes + 1;
cnv->toUBytes[0] = myByte; cnv->toUBytes[0] = myByte;
i = 1; i = 1;
*err = U_TRUNCATED_CHAR_FOUND; *err = U_TRUNCATED_CHAR_FOUND;
while(source < (const uint8_t *)args->sourceLimit) { while(source < (const uint8_t *)args->sourceLimit) {
if(U8_IS_TRAIL(myByte = *source)) { uint8_t b = *source;
cnv->toUBytes[i++] = myByte; if(icu::UTF8::isValidTrail(myByte, b, i, extraBytesToWrite)) {
cnv->toUBytes[i++] = b;
++source; ++source;
} else { } else {
/* error even before we run out of input */ /* error even before we run out of input */
@ -670,82 +610,29 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
return 0xffff; return 0xffff;
} }
isLegalSequence = 1;
ch = myByte << 6; ch = myByte << 6;
switch(extraBytesToWrite) if(countTrailBytes == 2) {
{ uint8_t t1 = *source, t2;
/* note: code falls through cases! (sic)*/ if(U8_IS_VALID_LEAD3_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source)) {
case 6: args->source = (const char *)(source + 1);
ch += (myByte = *source); return (((ch + t1) << 6) + t2) - offsetsFromUTF8[3];
ch <<= 6;
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
} }
++source; } else if(countTrailBytes == 1) {
U_FALLTHROUGH; uint8_t t1 = *source;
case 5: if(U8_IS_TRAIL(t1)) {
ch += (myByte = *source); args->source = (const char *)(source + 1);
ch <<= 6; return (ch + t1) - offsetsFromUTF8[2];
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
} }
++source; } else { // countTrailBytes == 3
U_FALLTHROUGH; uint8_t t1 = *source, t2, t3;
case 4: if(U8_IS_VALID_LEAD4_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source) &&
ch += (myByte = *source); U8_IS_TRAIL(t3 = *++source)) {
ch <<= 6; args->source = (const char *)(source + 1);
if (!U8_IS_TRAIL(myByte)) return (((((ch + t1) << 6) + t2) << 6) + t3) - offsetsFromUTF8[4];
{
isLegalSequence = 0;
break;
} }
++source;
U_FALLTHROUGH;
case 3:
ch += (myByte = *source);
ch <<= 6;
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
} }
++source;
U_FALLTHROUGH;
case 2:
ch += (myByte = *source);
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
}
++source;
};
ch -= offsetsFromUTF8[extraBytesToWrite];
args->source = (const char *)source; args->source = (const char *)source;
/*
* Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
* - use only trail bytes after a lead byte (checked above)
* - use the right number of trail bytes for a given lead byte
* - encode a code point <= U+10ffff
* - use the fewest possible number of bytes for their code points
* - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
*
* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
* There are no irregular sequences any more.
*/
if (isLegalSequence &&
(uint32_t)ch <= MAXIMUM_UTF &&
(uint32_t)ch >= utf8_minChar32[extraBytesToWrite] &&
!U_IS_SURROGATE(ch)
) {
return ch; /* return the code point */
}
for(i = 0; sourceInitial < source; ++i) { for(i = 0; sourceInitial < source; ++i) {
cnv->toUBytes[i] = *sourceInitial++; cnv->toUBytes[i] = *sourceInitial++;
} }
@ -757,14 +644,6 @@ U_CDECL_END
/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */ /* UTF-8-from-UTF-8 conversion functions ------------------------------------ */
/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
static const UChar32
utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
static const UChar32
utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
U_CDECL_BEGIN U_CDECL_BEGIN
/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */ /* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
static void U_CALLCONV static void U_CALLCONV
@ -812,39 +691,35 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
*pErrorCode=U_USING_DEFAULT_WARNING; *pErrorCode=U_USING_DEFAULT_WARNING;
return; return;
} else { } else {
/* // Use a single counter for source and target, counting the minimum of
* Use a single counter for source and target, counting the minimum of // the source length and the target capacity.
* the source length and the target capacity. // Let the standard converter handle edge cases.
* As a result, the source length is checked only once per multi-byte
* character instead of twice.
*
* Make sure that the last byte sequence is complete, or else
* stop just before it.
* (The longest legal byte sequence has 3 trail bytes.)
* Count oldToULength (number of source bytes from a previous buffer)
* into the source length but reduce the source index by toULimit
* while going back over trail bytes in order to not go back into
* the bytes that will be read for finishing a partial
* sequence from the previous buffer.
* Let the standard converter handle edge cases.
*/
int32_t i;
if(count>targetCapacity) { if(count>targetCapacity) {
count=targetCapacity; count=targetCapacity;
} }
i=0; // The conversion loop checks count>0 only once per 1/2/3-byte character.
while(i<3 && i<(count-toULimit)) { // If the buffer ends with a truncated 2- or 3-byte sequence,
b=source[count-oldToULength-i-1]; // then we reduce the count to stop before that,
if(U8_IS_TRAIL(b)) { // and collect the remaining bytes after the conversion loop.
++i; {
} else { // Do not go back into the bytes that will be read for finishing a partial
if(i<U8_COUNT_TRAIL_BYTES(b)) { // sequence from the previous buffer.
/* stop converting before the lead byte if there are not enough trail bytes for it */ int32_t length=count-toULimit;
count-=i+1; if(length>0) {
uint8_t b1=*(sourceLimit-1);
if(U8_IS_SINGLE(b1)) {
// common ASCII character
} else if(U8_IS_TRAIL(b1) && length>=2) {
uint8_t b2=*(sourceLimit-2);
if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
// truncated 3-byte sequence
count-=2;
}
} else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--count;
} }
break;
} }
} }
} }
@ -859,17 +734,17 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
/* conversion loop */ /* conversion loop */
while(count>0) { while(count>0) {
b=*source++; b=*source++;
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
/* convert ASCII */ /* convert ASCII */
*target++=b; *target++=b;
--count; --count;
continue; continue;
} else { } else {
if(b>0xe0) { if(b>=0xe0) {
if( /* handle U+1000..U+D7FF inline */ if( /* handle U+0800..U+FFFF inline */
(t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) || b<0xf0 &&
(b==0xed && (t1 <= 0x9f))) && U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
(t2=source[1]) >= 0x80 && t2 <= 0xbf U8_IS_TRAIL(t2=source[1])
) { ) {
source+=2; source+=2;
*target++=b; *target++=b;
@ -878,10 +753,10 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
count-=3; count-=3;
continue; continue;
} }
} else if(b<0xe0) { } else {
if( /* handle U+0080..U+07FF inline */ if( /* handle U+0080..U+07FF inline */
b>=0xc2 && b>=0xc2 &&
(t1=*source) >= 0x80 && t1 <= 0xbf U8_IS_TRAIL(t1=*source)
) { ) {
++source; ++source;
*target++=b; *target++=b;
@ -889,30 +764,18 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
count-=2; count-=2;
continue; continue;
} }
} else if(b==0xe0) {
if( /* handle U+0800..U+0FFF inline */
(t1=source[0]) >= 0xa0 && t1 <= 0xbf &&
(t2=source[1]) >= 0x80 && t2 <= 0xbf
) {
source+=2;
*target++=b;
*target++=t1;
*target++=t2;
count-=3;
continue;
}
} }
/* handle "complicated" and error cases, and continuing partial characters */ /* handle "complicated" and error cases, and continuing partial characters */
oldToULength=0; oldToULength=0;
toULength=1; toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES_NON_ASCII(b);
c=b; c=b;
moreBytes: moreBytes:
while(toULength<toULimit) { while(toULength<toULimit) {
if(source<sourceLimit) { if(source<sourceLimit) {
b=*source; b=*source;
if(U8_IS_TRAIL(b)) { if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
++source; ++source;
++toULength; ++toULength;
c=(c<<6)+b; c=(c<<6)+b;
@ -934,18 +797,7 @@ moreBytes:
} }
} }
if( toULength==toULimit && /* consumed all trail bytes */ if(toULength!=toULimit) {
(toULength==3 || toULength==2) && /* BMP */
(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
(c<=0xd7ff || 0xe000<=c) /* not a surrogate */
) {
/* legal byte sequence for BMP code point */
} else if(
toULength==toULimit && toULength==4 &&
(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
) {
/* legal byte sequence for supplementary code point */
} else {
/* error handling: illegal UTF-8 byte sequence */ /* error handling: illegal UTF-8 byte sequence */
source-=(toULength-oldToULength); source-=(toULength-oldToULength);
while(oldToULength<toULength) { while(oldToULength<toULength) {
@ -979,7 +831,7 @@ moreBytes:
*pErrorCode=U_BUFFER_OVERFLOW_ERROR; *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
} else { } else {
b=*source; b=*source;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES(b);
if(toULimit>(sourceLimit-source)) { if(toULimit>(sourceLimit-source)) {
/* collect a truncated byte sequence */ /* collect a truncated byte sequence */
toULength=0; toULength=0;

View File

@ -23,6 +23,7 @@
#include "unicode/utf8.h" #include "unicode/utf8.h"
#include "ucnv_bld.h" #include "ucnv_bld.h"
#include "ucnv_cnv.h" #include "ucnv_cnv.h"
#include "ustr_imp.h"
/* control optimizations according to the platform */ /* control optimizations according to the platform */
#define LATIN1_UNROLL_FROM_UNICODE 1 #define LATIN1_UNROLL_FROM_UNICODE 1
@ -374,7 +375,7 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
while(source<sourceLimit) { while(source<sourceLimit) {
if(targetCapacity>0) { if(targetCapacity>0) {
b=*source++; b=*source++;
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
/* convert ASCII */ /* convert ASCII */
*target++=(uint8_t)b; *target++=(uint8_t)b;
--targetCapacity; --targetCapacity;
@ -409,7 +410,7 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++; utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
utf8->toULength=1; utf8->toULength=1;
utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1; utf8->mode=U8_COUNT_BYTES(b);
} }
/* write back the updated pointers */ /* write back the updated pointers */

View File

@ -59,6 +59,7 @@
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"
#include "umutex.h" #include "umutex.h"
#include "ustr_imp.h"
/* control optimizations according to the platform */ /* control optimizations according to the platform */
#define MBCS_UNROLL_SINGLE_TO_BMP 1 #define MBCS_UNROLL_SINGLE_TO_BMP 1
@ -5011,13 +5012,9 @@ ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
/* MBCS-from-UTF-8 conversion functions ------------------------------------- */ /* MBCS-from-UTF-8 conversion functions ------------------------------------- */
/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
static const UChar32
utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */ /* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
static const UChar32 static const UChar32
utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; utf8_offsets[5]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
static void U_CALLCONV static void U_CALLCONV
ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
@ -5037,7 +5034,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
uint8_t b, t1, t2; uint8_t b, t1, t2;
uint32_t asciiRoundtrips; uint32_t asciiRoundtrips;
uint16_t value, minValue; uint16_t value, minValue = 0;
UBool hasSupplementary; UBool hasSupplementary;
/* set up the local pointers */ /* set up the local pointers */
@ -5075,28 +5072,27 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
toULength=oldToULength=toULimit=0; toULength=oldToULength=toULimit=0;
} }
/* // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
* Make sure that the last byte sequence before sourceLimit is complete // If the buffer ends with a truncated 2- or 3-byte sequence,
* or runs into a lead byte. // then we reduce the sourceLimit to before that,
* Do not go back into the bytes that will be read for finishing a partial // and collect the remaining bytes after the conversion loop.
* sequence from the previous buffer.
* In the conversion loop compare source with sourceLimit only once
* per multi-byte character.
*/
{ {
int32_t i, length; // Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength); int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
for(i=0; i<3 && i<length;) { if(length>0) {
b=*(sourceLimit-i-1); uint8_t b1=*(sourceLimit-1);
if(U8_IS_TRAIL(b)) { if(U8_IS_SINGLE(b1)) {
++i; // common ASCII character
} else { } else if(U8_IS_TRAIL(b1) && length>=2) {
if(i<U8_COUNT_TRAIL_BYTES(b)) { uint8_t b2=*(sourceLimit-2);
/* exit the conversion loop before the lead byte if there are not enough trail bytes for it */ if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
sourceLimit-=i+1; // truncated 3-byte sequence
sourceLimit-=2;
} }
break; } else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--sourceLimit;
} }
} }
} }
@ -5130,7 +5126,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
while(source<sourceLimit) { while(source<sourceLimit) {
if(targetCapacity>0) { if(targetCapacity>0) {
b=*source++; b=*source++;
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
/* convert ASCII */ /* convert ASCII */
if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
*target++=(uint8_t)b; *target++=(uint8_t)b;
@ -5185,7 +5181,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
/* handle "complicated" and error cases, and continuing partial characters */ /* handle "complicated" and error cases, and continuing partial characters */
oldToULength=0; oldToULength=0;
toULength=1; toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES_NON_ASCII(b);
c=b; c=b;
moreBytes: moreBytes:
while(toULength<toULimit) { while(toULength<toULimit) {
@ -5198,7 +5194,7 @@ moreBytes:
*/ */
if(source<(uint8_t *)pToUArgs->sourceLimit) { if(source<(uint8_t *)pToUArgs->sourceLimit) {
b=*source; b=*source;
if(U8_IS_TRAIL(b)) { if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
++source; ++source;
++toULength; ++toULength;
c=(c<<6)+b; c=(c<<6)+b;
@ -5220,16 +5216,11 @@ moreBytes:
} }
} }
if( toULength==toULimit && /* consumed all trail bytes */ if(toULength==toULimit) {
(toULength==3 || toULength==2) && /* BMP */ c-=utf8_offsets[toULength];
(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && if(toULength<=3) { /* BMP */
(c<=0xd7ff || 0xe000<=c) /* not a surrogate */
) {
value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
} else if( } else {
toULength==toULimit && toULength==4 &&
(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
) {
/* supplementary code point */ /* supplementary code point */
if(!hasSupplementary) { if(!hasSupplementary) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
@ -5237,6 +5228,7 @@ moreBytes:
} else { } else {
value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
} }
}
} else { } else {
/* error handling: illegal UTF-8 byte sequence */ /* error handling: illegal UTF-8 byte sequence */
source-=(toULength-oldToULength); source-=(toULength-oldToULength);
@ -5310,7 +5302,7 @@ moreBytes:
source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
c=utf8->toUBytes[0]=b=*source++; c=utf8->toUBytes[0]=b=*source++;
toULength=1; toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES(b);
while(source<sourceLimit) { while(source<sourceLimit) {
utf8->toUBytes[toULength++]=b=*source++; utf8->toUBytes[toULength++]=b=*source++;
c=(c<<6)+b; c=(c<<6)+b;
@ -5344,7 +5336,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
uint32_t stage2Entry; uint32_t stage2Entry;
uint32_t asciiRoundtrips; uint32_t asciiRoundtrips;
uint16_t value; uint16_t value = 0;
UBool hasSupplementary; UBool hasSupplementary;
/* set up the local pointers */ /* set up the local pointers */
@ -5375,28 +5367,27 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
toULength=oldToULength=toULimit=0; toULength=oldToULength=toULimit=0;
} }
/* // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
* Make sure that the last byte sequence before sourceLimit is complete // If the buffer ends with a truncated 2- or 3-byte sequence,
* or runs into a lead byte. // then we reduce the sourceLimit to before that,
* Do not go back into the bytes that will be read for finishing a partial // and collect the remaining bytes after the conversion loop.
* sequence from the previous buffer.
* In the conversion loop compare source with sourceLimit only once
* per multi-byte character.
*/
{ {
int32_t i, length; // Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength); int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
for(i=0; i<3 && i<length;) { if(length>0) {
b=*(sourceLimit-i-1); uint8_t b1=*(sourceLimit-1);
if(U8_IS_TRAIL(b)) { if(U8_IS_SINGLE(b1)) {
++i; // common ASCII character
} else { } else if(U8_IS_TRAIL(b1) && length>=2) {
if(i<U8_COUNT_TRAIL_BYTES(b)) { uint8_t b2=*(sourceLimit-2);
/* exit the conversion loop before the lead byte if there are not enough trail bytes for it */ if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
sourceLimit-=i+1; // truncated 3-byte sequence
sourceLimit-=2;
} }
break; } else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--sourceLimit;
} }
} }
} }
@ -5412,7 +5403,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
while(source<sourceLimit) { while(source<sourceLimit) {
if(targetCapacity>0) { if(targetCapacity>0) {
b=*source++; b=*source++;
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
/* convert ASCII */ /* convert ASCII */
if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
*target++=b; *target++=b;
@ -5426,13 +5417,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
} }
} }
} else { } else {
if(b>0xe0) { if(b>=0xe0) {
if( /* handle U+1000..U+D7FF inline */ if( /* handle U+0800..U+D7FF inline */
(((t1=(uint8_t)(source[0]-0x80), b<0xed) && (t1 <= 0x3f)) || b<=0xed && // do not assume maxFastUChar>0xd7ff
(b==0xed && (t1 <= 0x1f))) && U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
(t2=(uint8_t)(source[1]-0x80)) <= 0x3f (t2=(uint8_t)(source[1]-0x80)) <= 0x3f
) { ) {
c=((b&0xf)<<6)|t1; c=((b&0xf)<<6)|(t1&0x3f);
source+=2; source+=2;
value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2); value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2);
if(value==0) { if(value==0) {
@ -5442,7 +5433,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
} else { } else {
c=-1; c=-1;
} }
} else if(b<0xe0) { } else {
if( /* handle U+0080..U+07FF inline */ if( /* handle U+0080..U+07FF inline */
b>=0xc2 && b>=0xc2 &&
(t1=(uint8_t)(*source-0x80)) <= 0x3f (t1=(uint8_t)(*source-0x80)) <= 0x3f
@ -5457,15 +5448,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
} else { } else {
c=-1; c=-1;
} }
} else {
c=-1;
} }
if(c<0) { if(c<0) {
/* handle "complicated" and error cases, and continuing partial characters */ /* handle "complicated" and error cases, and continuing partial characters */
oldToULength=0; oldToULength=0;
toULength=1; toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES_NON_ASCII(b);
c=b; c=b;
moreBytes: moreBytes:
while(toULength<toULimit) { while(toULength<toULimit) {
@ -5478,7 +5467,7 @@ moreBytes:
*/ */
if(source<(uint8_t *)pToUArgs->sourceLimit) { if(source<(uint8_t *)pToUArgs->sourceLimit) {
b=*source; b=*source;
if(U8_IS_TRAIL(b)) { if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
++source; ++source;
++toULength; ++toULength;
c=(c<<6)+b; c=(c<<6)+b;
@ -5500,16 +5489,11 @@ moreBytes:
} }
} }
if( toULength==toULimit && /* consumed all trail bytes */ if(toULength==toULimit) {
(toULength==3 || toULength==2) && /* BMP */ c-=utf8_offsets[toULength];
(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && if(toULength<=3) { /* BMP */
(c<=0xd7ff || 0xe000<=c) /* not a surrogate */
) {
stage2Entry=MBCS_STAGE_2_FROM_U(table, c); stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
} else if( } else {
toULength==toULimit && toULength==4 &&
(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
) {
/* supplementary code point */ /* supplementary code point */
if(!hasSupplementary) { if(!hasSupplementary) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
@ -5517,6 +5501,7 @@ moreBytes:
} else { } else {
stage2Entry=MBCS_STAGE_2_FROM_U(table, c); stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
} }
}
} else { } else {
/* error handling: illegal UTF-8 byte sequence */ /* error handling: illegal UTF-8 byte sequence */
source-=(toULength-oldToULength); source-=(toULength-oldToULength);
@ -5620,7 +5605,7 @@ unassigned:
source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
c=utf8->toUBytes[0]=b=*source++; c=utf8->toUBytes[0]=b=*source++;
toULength=1; toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES(b);
while(source<sourceLimit) { while(source<sourceLimit) {
utf8->toUBytes[toULength++]=b=*source++; utf8->toUBytes[toULength++]=b=*source++;
c=(c<<6)+b; c=(c<<6)+b;

View File

@ -25,6 +25,7 @@
#include "uenumimp.h" #include "uenumimp.h"
#include "uhash.h" #include "uhash.h"
#include "hash.h" #include "hash.h"
#include "uinvchar.h"
#include "uresimp.h" #include "uresimp.h"
#include "ulist.h" #include "ulist.h"
#include "ureslocs.h" #include "ureslocs.h"
@ -545,25 +546,28 @@ U_CAPI int32_t U_EXPORT2
ucurr_forLocale(const char* locale, ucurr_forLocale(const char* locale,
UChar* buff, UChar* buff,
int32_t buffCapacity, int32_t buffCapacity,
UErrorCode* ec) UErrorCode* ec) {
{ if (U_FAILURE(*ec)) { return 0; }
int32_t resLen = 0; if (buffCapacity < 0 || (buff == nullptr && buffCapacity > 0)) {
const UChar* s = NULL; *ec = U_ILLEGAL_ARGUMENT_ERROR;
if (ec != NULL && U_SUCCESS(*ec)) { return 0;
if ((buff && buffCapacity) || !buffCapacity) {
UErrorCode localStatus = U_ZERO_ERROR;
char id[ULOC_FULLNAME_CAPACITY];
if ((resLen = uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus))) {
// there is a currency keyword. Try to see if it's valid
if(buffCapacity > resLen) {
/* Normalize the currency keyword value to upper case. */
T_CString_toUpperCase(id);
u_charsToUChars(id, buff, resLen);
} }
} else {
// get country or country_variant in `id'
uint32_t variantType = idForLocale(locale, id, sizeof(id), ec);
char currency[4]; // ISO currency codes are alpha3 codes.
UErrorCode localStatus = U_ZERO_ERROR;
int32_t resLen = uloc_getKeywordValue(locale, "currency",
currency, UPRV_LENGTHOF(currency), &localStatus);
if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency, resLen)) {
if (resLen < buffCapacity) {
T_CString_toUpperCase(currency);
u_charsToUChars(currency, buff, resLen);
}
return u_terminateUChars(buff, buffCapacity, resLen, ec);
}
// get country or country_variant in `id'
char id[ULOC_FULLNAME_CAPACITY];
uint32_t variantType = idForLocale(locale, id, UPRV_LENGTHOF(id), ec);
if (U_FAILURE(*ec)) { if (U_FAILURE(*ec)) {
return 0; return 0;
} }
@ -574,49 +578,56 @@ ucurr_forLocale(const char* locale,
if(buffCapacity > u_strlen(result)) { if(buffCapacity > u_strlen(result)) {
u_strcpy(buff, result); u_strcpy(buff, result);
} }
return u_strlen(result); resLen = u_strlen(result);
return u_terminateUChars(buff, buffCapacity, resLen, ec);
} }
#endif #endif
// Remove variants, which is only needed for registration. // Remove variants, which is only needed for registration.
char *idDelim = strchr(id, VAR_DELIM); char *idDelim = uprv_strchr(id, VAR_DELIM);
if (idDelim) { if (idDelim) {
idDelim[0] = 0; idDelim[0] = 0;
} }
const UChar* s = NULL; // Currency code from data file.
if (id[0] == 0) {
// No point looking in the data for an empty string.
// This is what we would get.
localStatus = U_MISSING_RESOURCE_ERROR;
} else {
// Look up the CurrencyMap element in the root bundle. // Look up the CurrencyMap element in the root bundle.
localStatus = U_ZERO_ERROR;
UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus); UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus); UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus); UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
UResourceBundle *currencyReq = ures_getByIndex(countryArray, 0, NULL, &localStatus); UResourceBundle *currencyReq = ures_getByIndex(countryArray, 0, NULL, &localStatus);
s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus); s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus);
/* // Get the second item when PREEURO is requested, and this is a known Euro country.
Get the second item when PREEURO is requested, and this is a known Euro country. // If the requested variant is PREEURO, and this isn't a Euro country,
If the requested variant is PREEURO, and this isn't a Euro country, assume // assume that the country changed over to the Euro in the future.
that the country changed over to the Euro in the future. This is probably // This is probably an old version of ICU that hasn't been updated yet.
an old version of ICU that hasn't been updated yet. The latest currency is // The latest currency is probably correct.
probably correct.
*/
if (U_SUCCESS(localStatus)) { if (U_SUCCESS(localStatus)) {
if ((variantType & VARIANT_IS_PREEURO) && u_strcmp(s, EUR_STR) == 0) { if ((variantType & VARIANT_IS_PREEURO) && u_strcmp(s, EUR_STR) == 0) {
currencyReq = ures_getByIndex(countryArray, 1, currencyReq, &localStatus); currencyReq = ures_getByIndex(countryArray, 1, currencyReq, &localStatus);
s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus); s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus);
} } else if ((variantType & VARIANT_IS_EURO)) {
else if ((variantType & VARIANT_IS_EURO)) {
s = EUR_STR; s = EUR_STR;
} }
} }
ures_close(countryArray);
ures_close(currencyReq); ures_close(currencyReq);
ures_close(countryArray);
}
if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) {
{
// We don't know about it. Check to see if we support the variant. // We don't know about it. Check to see if we support the variant.
uloc_getParent(locale, id, sizeof(id), ec); uloc_getParent(locale, id, UPRV_LENGTHOF(id), ec);
*ec = U_USING_FALLBACK_WARNING; *ec = U_USING_FALLBACK_WARNING;
// TODO: Loop over the shortened id rather than recursing and
// looking again for a currency keyword.
return ucurr_forLocale(id, buff, buffCapacity, ec); return ucurr_forLocale(id, buff, buffCapacity, ec);
} }
else if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) { if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) {
// There is nothing to fallback to. Report the failure/warning if possible. // There is nothing to fallback to. Report the failure/warning if possible.
*ec = localStatus; *ec = localStatus;
} }
@ -625,13 +636,7 @@ ucurr_forLocale(const char* locale,
u_strcpy(buff, s); u_strcpy(buff, s);
} }
} }
}
return u_terminateUChars(buff, buffCapacity, resLen, ec); return u_terminateUChars(buff, buffCapacity, resLen, ec);
} else {
*ec = U_ILLEGAL_ARGUMENT_ERROR;
}
}
return resLen;
} }
// end registration // end registration
@ -648,7 +653,16 @@ static UBool fallback(char *loc) {
return FALSE; return FALSE;
} }
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
if (uprv_strcmp(loc, "en_GB") == 0) {
// HACK: See #13368. We need "en_GB" to fall back to "en_001" instead of "en"
// in order to consume the correct data strings. This hack will be removed
// when proper data sink loading is implemented here.
// NOTE: "001" adds 1 char over "GB". However, both call sites allocate
// arrays with length ULOC_FULLNAME_CAPACITY (plenty of room for en_001).
uprv_strcpy(loc + 3, "001");
} else {
uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status); uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status);
}
/* /*
char *i = uprv_strrchr(loc, '_'); char *i = uprv_strrchr(loc, '_');
if (i == NULL) { if (i == NULL) {
@ -2216,6 +2230,7 @@ ucurr_countCurrencies(const char* locale,
UErrorCode localStatus = U_ZERO_ERROR; UErrorCode localStatus = U_ZERO_ERROR;
char id[ULOC_FULLNAME_CAPACITY]; char id[ULOC_FULLNAME_CAPACITY];
uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus); uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus);
// get country or country_variant in `id' // get country or country_variant in `id'
/*uint32_t variantType =*/ idForLocale(locale, id, sizeof(id), ec); /*uint32_t variantType =*/ idForLocale(locale, id, sizeof(id), ec);

View File

@ -206,6 +206,8 @@ setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to ca
return didUpdate; return didUpdate;
} }
#if U_PLATFORM_HAS_WINUWP_API == 0
static UBool static UBool
setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) { setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
UDataMemory tData; UDataMemory tData;
@ -215,6 +217,8 @@ setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCod
return setCommonICUData(&tData, FALSE, pErrorCode); return setCommonICUData(&tData, FALSE, pErrorCode);
} }
#endif
static const char * static const char *
findBasename(const char *path) { findBasename(const char *path) {
const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR); const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
@ -982,7 +986,7 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
/* init path iterator for individual files */ /* init path iterator for individual files */
UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode); UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
while((pathBuffer = iter.next(pErrorCode))) while((pathBuffer = iter.next(pErrorCode)) != NULL)
{ {
#ifdef UDATA_DEBUG #ifdef UDATA_DEBUG
fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer); fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
@ -1165,7 +1169,7 @@ doOpenChoice(const char *path, const char *type, const char *name,
if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) { if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) {
altSepPath.append(path, *pErrorCode); altSepPath.append(path, *pErrorCode);
char *p; char *p;
while((p=uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR))) { while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != NULL) {
*p = U_FILE_SEP_CHAR; *p = U_FILE_SEP_CHAR;
} }
#if defined (UDATA_DEBUG) #if defined (UDATA_DEBUG)

View File

@ -79,14 +79,14 @@
* prime number while being less than a power of two. * prime number while being less than a power of two.
*/ */
static const int32_t PRIMES[] = { static const int32_t PRIMES[] = {
13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
16777213, 33554393, 67108859, 134217689, 268435399, 536870909, 16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
1073741789, 2147483647 /*, 4294967291 */ 1073741789, 2147483647 /*, 4294967291 */
}; };
#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES) #define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
#define DEFAULT_PRIME_INDEX 3 #define DEFAULT_PRIME_INDEX 4
/* These ratios are tuned to the PRIMES array such that a resize /* These ratios are tuned to the PRIMES array such that a resize
* places the table back into the zone of non-resizing. That is, * places the table back into the zone of non-resizing. That is,
@ -570,6 +570,22 @@ uhash_init(UHashtable *fillinResult,
return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
} }
U_CAPI UHashtable* U_EXPORT2
uhash_initSize(UHashtable *fillinResult,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
UErrorCode *status) {
// Find the smallest index i for which PRIMES[i] >= size.
int32_t i = 0;
while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
++i;
}
return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
}
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uhash_close(UHashtable *hash) { uhash_close(UHashtable *hash) {
if (hash == NULL) { if (hash == NULL) {
@ -844,7 +860,7 @@ uhash_hashUChars(const UHashTok key) {
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key) { uhash_hashChars(const UHashTok key) {
const char *s = (const char *)key.pointer; const char *s = (const char *)key.pointer;
return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s)); return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, uprv_strlen(s)));
} }
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2

View File

@ -231,6 +231,25 @@ uhash_init(UHashtable *hash,
UValueComparator *valueComp, UValueComparator *valueComp,
UErrorCode *status); UErrorCode *status);
/**
* Initialize an existing UHashtable.
* @param keyHash A pointer to the key hashing function. Must not be
* NULL.
* @param keyComp A pointer to the function that compares keys. Must
* not be NULL.
* @param size The initial capacity of this hash table.
* @param status A pointer to an UErrorCode to receive any errors.
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize
*/
U_CAPI UHashtable* U_EXPORT2
uhash_initSize(UHashtable *hash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
UErrorCode *status);
/** /**
* Close a UHashtable, releasing the memory used. * Close a UHashtable, releasing the memory used.
* @param hash The UHashtable to close. If hash is NULL no operation is performed. * @param hash The UHashtable to close. If hash is NULL no operation is performed.

View File

@ -573,7 +573,7 @@ uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
uint8_t *orig_dst = dst; uint8_t *orig_dst = dst;
if(n==-1) { if(n==-1) {
n = uprv_strlen((const char*)src)+1; /* copy NUL */ n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
} }
/* copy non-null */ /* copy non-null */
while(*src && n>0) { while(*src && n>0) {
@ -594,7 +594,7 @@ uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
uint8_t *orig_dst = dst; uint8_t *orig_dst = dst;
if(n==-1) { if(n==-1) {
n = uprv_strlen((const char*)src)+1; /* copy NUL */ n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
} }
/* copy non-null */ /* copy non-null */
while(*src && n>0) { while(*src && n>0) {

View File

@ -252,7 +252,7 @@ U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration *en, int32_t
s = (const char *)ulist_getNext((UList *)(en->context)); s = (const char *)ulist_getNext((UList *)(en->context));
if (s != NULL && resultLength != NULL) { if (s != NULL && resultLength != NULL) {
*resultLength = uprv_strlen(s); *resultLength = static_cast<int32_t>(uprv_strlen(s));
} }
return s; return s;
} }

View File

@ -98,6 +98,7 @@ locale_getKeywords(const char *localeID,
*/ */
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
/* ISO639 table version is 20150505 */ /* ISO639 table version is 20150505 */
/* Subsequent hand addition of selected languages */
static const char * const LANGUAGES[] = { static const char * const LANGUAGES[] = {
"aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb", "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
"af", "afh", "agq", "ain", "ak", "akk", "akz", "ale", "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
@ -109,7 +110,7 @@ static const char * const LANGUAGES[] = {
"bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla", "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
"bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh", "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
"brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv", "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
"ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg", "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
"ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp", "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
"chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh", "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
"cs", "csb", "cu", "cv", "cy", "cs", "csb", "cu", "cv", "cy",
@ -213,6 +214,7 @@ static const char* const REPLACEMENT_LANGUAGES[]={
*/ */
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
/* ISO639 table version is 20150505 */ /* ISO639 table version is 20150505 */
/* Subsequent hand addition of selected languages */
static const char * const LANGUAGES_3[] = { static const char * const LANGUAGES_3[] = {
"aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb", "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
"afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale", "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
@ -224,7 +226,7 @@ static const char * const LANGUAGES_3[] = {
"bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla", "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
"bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh", "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
"brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv", "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
"cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg", "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
"cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp", "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
"chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh", "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
"ces", "csb", "chu", "chv", "cym", "ces", "csb", "chu", "chv", "cym",
@ -529,14 +531,16 @@ static const VariantMap VARIANT_MAP[] = {
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
#define _ConvertBCP47(finalID, id, buffer, length,err) \ #define _ConvertBCP47(finalID, id, buffer, length,err) \
if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \ if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
finalID=id; \ finalID=id; \
if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
} else { \ } else { \
finalID=buffer; \ finalID=buffer; \
} }
/* Gets the size of the shortest subtag in the given localeID. */ /* Gets the size of the shortest subtag in the given localeID. */
static int32_t getShortestSubtagLength(const char *localeID) { static int32_t getShortestSubtagLength(const char *localeID) {
int32_t localeIDLength = uprv_strlen(localeID); int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
int32_t length = localeIDLength; int32_t length = localeIDLength;
int32_t tmpLength = 0; int32_t tmpLength = 0;
int32_t i; int32_t i;
@ -2486,7 +2490,7 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
#if defined(ULOC_DEBUG) #if defined(ULOC_DEBUG)
fprintf(stderr,"%02d: %s\n", i, acceptList[i]); fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
#endif #endif
while((l=uenum_next(availableLocales, NULL, status))) { while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
#if defined(ULOC_DEBUG) #if defined(ULOC_DEBUG)
fprintf(stderr," %s\n", l); fprintf(stderr," %s\n", l);
#endif #endif
@ -2526,7 +2530,7 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
#if defined(ULOC_DEBUG) #if defined(ULOC_DEBUG)
fprintf(stderr,"Try: [%s]", fallbackList[i]); fprintf(stderr,"Try: [%s]", fallbackList[i]);
#endif #endif
while((l=uenum_next(availableLocales, NULL, status))) { while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
#if defined(ULOC_DEBUG) #if defined(ULOC_DEBUG)
fprintf(stderr," %s\n", l); fprintf(stderr," %s\n", l);
#endif #endif

View File

@ -1022,7 +1022,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
no known mapping. This implementation normalizes the no known mapping. This implementation normalizes the
the value to lower case the value to lower case
*/ */
int32_t bcpValueLen = uprv_strlen(bcpValue); int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
if (bcpValueLen < extBufCapacity) { if (bcpValueLen < extBufCapacity) {
uprv_strcpy(pExtBuf, bcpValue); uprv_strcpy(pExtBuf, bcpValue);
T_CString_toLowerCase(pExtBuf); T_CString_toLowerCase(pExtBuf);
@ -1288,7 +1288,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
bufIdx++; bufIdx++;
} }
len = uprv_strlen(attr->attribute); len = static_cast<int32_t>(uprv_strlen(attr->attribute));
uprv_memcpy(buf + bufIdx, attr->attribute, len); uprv_memcpy(buf + bufIdx, attr->attribute, len);
bufIdx += len; bufIdx += len;
@ -1841,7 +1841,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
int32_t newTagLength; int32_t newTagLength;
grandfatheredLen = tagLen; /* back up for output parsedLen */ grandfatheredLen = tagLen; /* back up for output parsedLen */
newTagLength = uprv_strlen(GRANDFATHERED[i+1]); newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
if (tagLen < newTagLength) { if (tagLen < newTagLength) {
uprv_free(tagBuf); uprv_free(tagBuf);
tagBuf = (char*)uprv_malloc(newTagLength + 1); tagBuf = (char*)uprv_malloc(newTagLength + 1);

View File

@ -102,9 +102,6 @@
{ {
HANDLE map; HANDLE map;
HANDLE file; HANDLE file;
SECURITY_ATTRIBUTES mappingAttributes;
SECURITY_ATTRIBUTES *mappingAttributesPtr = NULL;
SECURITY_DESCRIPTOR securityDesc;
UDataMemory_init(pData); /* Clear the output struct. */ UDataMemory_init(pData); /* Clear the output struct. */
@ -143,6 +140,11 @@
This is required for multiuser systems on Windows 2000 SP4 and beyond */ This is required for multiuser systems on Windows 2000 SP4 and beyond */
// TODO: UWP does not have this function and I do not think it is required? // TODO: UWP does not have this function and I do not think it is required?
#if U_PLATFORM_HAS_WINUWP_API == 0 #if U_PLATFORM_HAS_WINUWP_API == 0
SECURITY_ATTRIBUTES mappingAttributes;
SECURITY_ATTRIBUTES *mappingAttributesPtr = NULL;
SECURITY_DESCRIPTOR securityDesc;
if (InitializeSecurityDescriptor(&securityDesc, SECURITY_DESCRIPTOR_REVISION)) { if (InitializeSecurityDescriptor(&securityDesc, SECURITY_DESCRIPTOR_REVISION)) {
/* give the security descriptor a Null Dacl done using the "TRUE, (PACL)NULL" here */ /* give the security descriptor a Null Dacl done using the "TRUE, (PACL)NULL" here */
if (SetSecurityDescriptorDacl(&securityDesc, TRUE, (PACL)NULL, FALSE)) { if (SetSecurityDescriptorDacl(&securityDesc, TRUE, (PACL)NULL, FALSE)) {

View File

@ -132,7 +132,7 @@ umtx_condBroadcast(UConditionVar *condition) {
} }
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
umtx_condSignal(UConditionVar *condition) { umtx_condSignal(UConditionVar * /* condition */) {
// Function not implemented. There is no immediate requirement from ICU to have it. // Function not implemented. There is no immediate requirement from ICU to have it.
// Once ICU drops support for Windows XP and Server 2003, ICU Condition Variables will be // Once ICU drops support for Windows XP and Server 2003, ICU Condition Variables will be
// changed to be thin wrappers on native Windows CONDITION_VARIABLEs, and this function // changed to be thin wrappers on native Windows CONDITION_VARIABLEs, and this function

View File

@ -250,7 +250,7 @@ public:
virtual int32_t next(void) = 0; virtual int32_t next(void) = 0;
/** /**
* Return character index of the current interator position within the text. * Return character index of the current iterator position within the text.
* @return The boundary most recently returned. * @return The boundary most recently returned.
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
@ -277,7 +277,7 @@ public:
virtual int32_t preceding(int32_t offset) = 0; virtual int32_t preceding(int32_t offset) = 0;
/** /**
* Return true if the specfied position is a boundary position. * Return true if the specified position is a boundary position.
* As a side effect, the current position of the iterator is set * As a side effect, the current position of the iterator is set
* to the first boundary position at or following the specified offset. * to the first boundary position at or following the specified offset.
* @param offset the offset to check. * @param offset the offset to check.
@ -331,7 +331,7 @@ public:
* @param fillInVec an array to be filled in with the status values. * @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes * @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the * the function to return the number of status values, in the
* normal way, without attemtping to store any values. * normal way, without attempting to store any values.
* @param status receives error codes. * @param status receives error codes.
* @return The number of rule status values from rules that determined * @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator. * the most recent boundary returned by the break iterator.
@ -469,7 +469,7 @@ public:
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
/** /**
* Get name of the object for the desired Locale, in the desired langauge. * Get name of the object for the desired Locale, in the desired language.
* @param objectLocale must be from getAvailableLocales. * @param objectLocale must be from getAvailableLocales.
* @param displayLocale specifies the desired locale for output. * @param displayLocale specifies the desired locale for output.
* @param name the fill-in parameter of the return value * @param name the fill-in parameter of the return value
@ -482,7 +482,7 @@ public:
UnicodeString& name); UnicodeString& name);
/** /**
* Get name of the object for the desired Locale, in the langauge of the * Get name of the object for the desired Locale, in the language of the
* default locale. * default locale.
* @param objectLocale must be from getMatchingLocales * @param objectLocale must be from getMatchingLocales
* @param name the fill-in parameter of the return value * @param name the fill-in parameter of the return value
@ -629,10 +629,12 @@ protected:
/** @internal */ /** @internal */
BreakIterator(); BreakIterator();
/** @internal */ /** @internal */
BreakIterator (const BreakIterator &other) : UObject(other) {} BreakIterator (const BreakIterator &other);
#ifndef U_HIDE_INTERNAL_API #ifndef U_HIDE_INTERNAL_API
/** @internal */ /** @internal */
BreakIterator (const Locale& valid, const Locale &actual); BreakIterator (const Locale& valid, const Locale &actual);
/** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
BreakIterator &operator = (const BreakIterator &other);
#endif /* U_HIDE_INTERNAL_API */ #endif /* U_HIDE_INTERNAL_API */
private: private:
@ -640,12 +642,6 @@ private:
/** @internal */ /** @internal */
char actualLocale[ULOC_FULLNAME_CAPACITY]; char actualLocale[ULOC_FULLNAME_CAPACITY];
char validLocale[ULOC_FULLNAME_CAPACITY]; char validLocale[ULOC_FULLNAME_CAPACITY];
/**
* The assignment operator has no real implementation.
* It's provided to make the compiler happy. Do not call.
*/
BreakIterator& operator=(const BreakIterator&);
}; };
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
@ -661,5 +657,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif // _BRKITER #endif // BRKITER_H
//eof //eof

View File

@ -126,8 +126,8 @@ public:
virtual void Flush(); virtual void Flush();
private: private:
ByteSink(const ByteSink &); // copy constructor not implemented ByteSink(const ByteSink &) = delete;
ByteSink &operator=(const ByteSink &); // assignment operator not implemented ByteSink &operator=(const ByteSink &) = delete;
}; };
// ------------------------------------------------------------- // -------------------------------------------------------------
@ -217,9 +217,10 @@ private:
int32_t size_; int32_t size_;
int32_t appended_; int32_t appended_;
UBool overflowed_; UBool overflowed_;
CheckedArrayByteSink(); ///< default constructor not implemented
CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented CheckedArrayByteSink() = delete;
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
}; };
/** /**
@ -236,6 +237,21 @@ class StringByteSink : public ByteSink {
* @stable ICU 4.2 * @stable ICU 4.2
*/ */
StringByteSink(StringClass* dest) : dest_(dest) { } StringByteSink(StringClass* dest) : dest_(dest) { }
#ifndef U_HIDE_DRAFT_API
/**
* Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
*
* @param dest pointer to string object to append to
* @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
* @draft ICU 60
*/
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
if (initialAppendCapacity > 0 &&
(uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
dest->reserve(dest->length() + initialAppendCapacity);
}
}
#endif // U_HIDE_DRAFT_API
/** /**
* Append "bytes[0,n-1]" to this. * Append "bytes[0,n-1]" to this.
* @param data the pointer to the bytes * @param data the pointer to the bytes
@ -245,9 +261,10 @@ class StringByteSink : public ByteSink {
virtual void Append(const char* data, int32_t n) { dest_->append(data, n); } virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
private: private:
StringClass* dest_; StringClass* dest_;
StringByteSink(); ///< default constructor not implemented
StringByteSink(const StringByteSink &); ///< copy constructor not implemented StringByteSink() = delete;
StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented StringByteSink(const StringByteSink &) = delete;
StringByteSink &operator=(const StringByteSink &) = delete;
}; };
U_NAMESPACE_END U_NAMESPACE_END

View File

@ -8,6 +8,7 @@
#define __CASEMAP_H__ #define __CASEMAP_H__
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h" #include "unicode/uobject.h"
/** /**
@ -20,6 +21,7 @@ U_NAMESPACE_BEGIN
#ifndef U_HIDE_DRAFT_API #ifndef U_HIDE_DRAFT_API
class BreakIterator; class BreakIterator;
class ByteSink;
class Edits; class Edits;
/** /**
@ -36,7 +38,7 @@ public:
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if * @param dest A buffer for the result string. The result will be NUL-terminated if
@ -48,7 +50,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -71,7 +74,7 @@ public:
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if * @param dest A buffer for the result string. The result will be NUL-terminated if
@ -83,7 +86,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -112,8 +116,10 @@ public:
* all others. (This can be modified with options bits.) * all others. (This can be modified with options bits.)
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. * U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased. * @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText()) * It is set to the source string (setText())
* and used one or more times for iteration (first() and next()). * and used one or more times for iteration (first() and next()).
@ -130,7 +136,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -159,7 +166,7 @@ public:
* The result may be longer or shorter than the original. * The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@ -172,7 +179,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -188,6 +196,129 @@ public:
char16_t *dest, int32_t destCapacity, Edits *edits, char16_t *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode); UErrorCode &errorCode);
/**
* Lowercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToLower
* @draft ICU 60
*/
static void utf8ToLower(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
/**
* Uppercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToUpper
* @draft ICU 60
*/
static void utf8ToUpper(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/**
* Titlecases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* Titlecasing uses a break iterator to find the first characters of words
* that are to be titlecased. It titlecases those characters and lowercases
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setUText())
* and used one or more times for iteration (first() and next()).
* If NULL, then a word break iterator for the locale is used
* (or something equivalent).
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToTitle
* @draft ICU 60
*/
static void utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
#endif // UCONFIG_NO_BREAK_ITERATION
/**
* Case-folds a UTF-8 string and optionally records edits.
*
* Case folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* The result may be longer or shorter than the original.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8FoldCase
* @draft ICU 60
*/
static void utf8Fold(
uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
/** /**
* Lowercases a UTF-8 string and optionally records edits. * Lowercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive. * Casing is locale-dependent and context-sensitive.
@ -195,7 +326,7 @@ public:
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if * @param dest A buffer for the result string. The result will be NUL-terminated if
@ -207,7 +338,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -230,7 +362,7 @@ public:
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if * @param dest A buffer for the result string. The result will be NUL-terminated if
@ -242,7 +374,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -271,10 +404,12 @@ public:
* all others. (This can be modified with options bits.) * all others. (This can be modified with options bits.)
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. * U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased. * @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText()) * It is set to the source string (setUText())
* and used one or more times for iteration (first() and next()). * and used one or more times for iteration (first() and next()).
* If NULL, then a word break iterator for the locale is used * If NULL, then a word break iterator for the locale is used
* (or something equivalent). * (or something equivalent).
@ -289,7 +424,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -317,7 +453,7 @@ public:
* The result may be longer or shorter than the original. * The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@ -330,7 +466,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.

View File

@ -95,45 +95,45 @@ private:
return reinterpret_cast<char16_t *>(t); return reinterpret_cast<char16_t *>(t);
} }
char16_t *p; char16_t *p_;
#else #else
union { union {
char16_t *cp; char16_t *cp;
uint16_t *up; uint16_t *up;
wchar_t *wp; wchar_t *wp;
} u; } u_;
#endif #endif
}; };
#ifdef U_ALIASING_BARRIER #ifdef U_ALIASING_BARRIER
Char16Ptr::Char16Ptr(char16_t *p) : p(p) {} Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF #if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {} Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
#endif #endif
#if U_SIZEOF_WCHAR_T==2 #if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
#endif #endif
Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
Char16Ptr::~Char16Ptr() { Char16Ptr::~Char16Ptr() {
U_ALIASING_BARRIER(p); U_ALIASING_BARRIER(p_);
} }
char16_t *Char16Ptr::get() const { return p; } char16_t *Char16Ptr::get() const { return p_; }
#else #else
Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; } Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF #if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; } Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
#endif #endif
#if U_SIZEOF_WCHAR_T==2 #if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
#endif #endif
Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
Char16Ptr::~Char16Ptr() {} Char16Ptr::~Char16Ptr() {}
char16_t *Char16Ptr::get() const { return u.cp; } char16_t *Char16Ptr::get() const { return u_.cp; }
#endif #endif
@ -203,45 +203,45 @@ private:
return reinterpret_cast<const char16_t *>(t); return reinterpret_cast<const char16_t *>(t);
} }
const char16_t *p; const char16_t *p_;
#else #else
union { union {
const char16_t *cp; const char16_t *cp;
const uint16_t *up; const uint16_t *up;
const wchar_t *wp; const wchar_t *wp;
} u; } u_;
#endif #endif
}; };
#ifdef U_ALIASING_BARRIER #ifdef U_ALIASING_BARRIER
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {} ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF #if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {} ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
#endif #endif
#if U_SIZEOF_WCHAR_T==2 #if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
#endif #endif
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
ConstChar16Ptr::~ConstChar16Ptr() { ConstChar16Ptr::~ConstChar16Ptr() {
U_ALIASING_BARRIER(p); U_ALIASING_BARRIER(p_);
} }
const char16_t *ConstChar16Ptr::get() const { return p; } const char16_t *ConstChar16Ptr::get() const { return p_; }
#else #else
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF #if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; } ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
#endif #endif
#if U_SIZEOF_WCHAR_T==2 #if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
#endif #endif
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
ConstChar16Ptr::~ConstChar16Ptr() {} ConstChar16Ptr::~ConstChar16Ptr() {}
const char16_t *ConstChar16Ptr::get() const { return u.cp; } const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
#endif #endif

View File

@ -140,7 +140,7 @@
* <tr> * <tr>
* <td>Number Formatting</td> * <td>Number Formatting</td>
* <td>unum.h</td> * <td>unum.h</td>
* <td>icu::NumberFormat</td> * <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)</td>
* </tr> * </tr>
* <tr> * <tr>
* <td>Number Spellout<br/>(Rule Based Number Formatting)</td> * <td>Number Spellout<br/>(Rule Based Number Formatting)</td>

View File

@ -36,19 +36,61 @@ public:
* @draft ICU 59 * @draft ICU 59
*/ */
Edits() : Edits() :
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
errorCode(U_ZERO_ERROR) {} errorCode_(U_ZERO_ERROR) {}
/**
* Copy constructor.
* @param other source edits
* @draft ICU 60
*/
Edits(const Edits &other) :
array(stackArray), capacity(STACK_CAPACITY), length(other.length),
delta(other.delta), numChanges(other.numChanges),
errorCode_(other.errorCode_) {
copyArray(other);
}
/**
* Move constructor, might leave src empty.
* This object will have the same contents that the source object had.
* @param src source edits
* @draft ICU 60
*/
Edits(Edits &&src) U_NOEXCEPT :
array(stackArray), capacity(STACK_CAPACITY), length(src.length),
delta(src.delta), numChanges(src.numChanges),
errorCode_(src.errorCode_) {
moveArray(src);
}
/** /**
* Destructor. * Destructor.
* @draft ICU 59 * @draft ICU 59
*/ */
~Edits(); ~Edits();
/**
* Assignment operator.
* @param other source edits
* @return *this
* @draft ICU 60
*/
Edits &operator=(const Edits &other);
/**
* Move assignment operator, might leave src empty.
* This object will have the same contents that the source object had.
* The behavior is undefined if *this and src are the same object.
* @param src source edits
* @return *this
* @draft ICU 60
*/
Edits &operator=(Edits &&src) U_NOEXCEPT;
/** /**
* Resets the data but may not release memory. * Resets the data but may not release memory.
* @draft ICU 59 * @draft ICU 59
*/ */
void reset(); void reset() U_NOEXCEPT;
/** /**
* Adds a record for an unchanged segment of text. * Adds a record for an unchanged segment of text.
@ -66,6 +108,9 @@ public:
* Sets the UErrorCode if an error occurred while recording edits. * Sets the UErrorCode if an error occurred while recording edits.
* Preserves older error codes in the outErrorCode. * Preserves older error codes in the outErrorCode.
* Normally called from inside ICU string transformation functions, not user code. * Normally called from inside ICU string transformation functions, not user code.
* @param outErrorCode Set to an error code if it does not contain one already
* and an error occurred while recording edits.
* Otherwise unchanged.
* @return TRUE if U_FAILURE(outErrorCode) * @return TRUE if U_FAILURE(outErrorCode)
* @draft ICU 59 * @draft ICU 59
*/ */
@ -81,7 +126,13 @@ public:
* @return TRUE if there are any change edits * @return TRUE if there are any change edits
* @draft ICU 59 * @draft ICU 59
*/ */
UBool hasChanges() const; UBool hasChanges() const { return numChanges != 0; }
/**
* @return the number of change edits
* @draft ICU 60
*/
int32_t numberOfChanges() const { return numChanges; }
/** /**
* Access to the list of edits. * Access to the list of edits.
@ -90,6 +141,15 @@ public:
* @draft ICU 59 * @draft ICU 59
*/ */
struct U_COMMON_API Iterator U_FINAL : public UMemory { struct U_COMMON_API Iterator U_FINAL : public UMemory {
/**
* Default constructor, empty iterator.
* @draft ICU 60
*/
Iterator() :
array(nullptr), index(0), length(0),
remaining(0), onlyChanges_(FALSE), coarse(FALSE),
dir(0), changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
/** /**
* Copy constructor. * Copy constructor.
* @draft ICU 59 * @draft ICU 59
@ -103,6 +163,9 @@ public:
/** /**
* Advances to the next edit. * Advances to the next edit.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if there is another edit * @return TRUE if there is another edit
* @draft ICU 59 * @draft ICU 59
*/ */
@ -121,10 +184,86 @@ public:
* if the source index is out of bounds for the source string. * if the source index is out of bounds for the source string.
* *
* @param i source index * @param i source index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the source index was found * @return TRUE if the edit for the source index was found
* @draft ICU 59 * @draft ICU 59
*/ */
UBool findSourceIndex(int32_t i, UErrorCode &errorCode); UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
return findIndex(i, TRUE, errorCode) == 0;
}
/**
* Finds the edit that contains the destination index.
* The destination index may be found in a non-change
* even if normal iteration would skip non-changes.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
* (It may affect the performance of the search.)
*
* The iterator state after this search is undefined
* if the source index is out of bounds for the source string.
*
* @param i destination index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the destination index was found
* @draft ICU 60
*/
UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
return findIndex(i, FALSE, errorCode) == 0;
}
/**
* Returns the destination index corresponding to the given source index.
* If the source index is inside a change edit (not at its start),
* then the destination index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
*
* (This means that indexes to the start and middle of an edit,
* for example around a grapheme cluster, are mapped to indexes
* encompassing the entire edit.
* The alternative, mapping an interior index to the start,
* would map such an interval to an empty one.)
*
* This operation will usually but not always modify this object.
* The iterator state after this search is undefined.
*
* @param i source index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return destination index; undefined if i is not 0..string length
* @draft ICU 60
*/
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
/**
* Returns the source index corresponding to the given destination index.
* If the destination index is inside a change edit (not at its start),
* then the source index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
*
* (This means that indexes to the start and middle of an edit,
* for example around a grapheme cluster, are mapped to indexes
* encompassing the entire edit.
* The alternative, mapping an interior index to the start,
* would map such an interval to an empty one.)
*
* This operation will usually but not always modify this object.
* The iterator state after this search is undefined.
*
* @param i destination index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return source index; undefined if i is not 0..string length
* @draft ICU 60
*/
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
/** /**
* @return TRUE if this edit replaces oldLength() units with newLength() different ones. * @return TRUE if this edit replaces oldLength() units with newLength() different ones.
@ -167,15 +306,22 @@ public:
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs); Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
int32_t readLength(int32_t head); int32_t readLength(int32_t head);
void updateIndexes(); void updateNextIndexes();
void updatePreviousIndexes();
UBool noNext(); UBool noNext();
UBool next(UBool onlyChanges, UErrorCode &errorCode); UBool next(UBool onlyChanges, UErrorCode &errorCode);
UBool previous(UErrorCode &errorCode);
/** @return -1: error or i<0; 0: found; 1: i>=string length */
int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
const uint16_t *array; const uint16_t *array;
int32_t index, length; int32_t index, length;
// 0 if we are not within compressed equal-length changes.
// Otherwise the number of remaining changes, including the current one.
int32_t remaining; int32_t remaining;
UBool onlyChanges_, coarse; UBool onlyChanges_, coarse;
int8_t dir; // iteration direction: back(<0), initial(0), forward(>0)
UBool changed; UBool changed;
int32_t oldLength_, newLength_; int32_t oldLength_, newLength_;
int32_t srcIndex, replIndex, destIndex; int32_t srcIndex, replIndex, destIndex;
@ -219,9 +365,39 @@ public:
return Iterator(array, length, FALSE, FALSE); return Iterator(array, length, FALSE, FALSE);
} }
/**
* Merges the two input Edits and appends the result to this object.
*
* Consider two string transformations (for example, normalization and case mapping)
* where each records Edits in addition to writing an output string.<br>
* Edits ab reflect how substrings of input string a
* map to substrings of intermediate string b.<br>
* Edits bc reflect how substrings of intermediate string b
* map to substrings of output string c.<br>
* This function merges ab and bc such that the additional edits
* recorded in this object reflect how substrings of input string a
* map to substrings of output string c.
*
* If unrelated Edits are passed in where the output string of the first
* has a different length than the input string of the second,
* then a U_ILLEGAL_ARGUMENT_ERROR is reported.
*
* @param ab reflects how substrings of input string a
* map to substrings of intermediate string b.
* @param bc reflects how substrings of intermediate string b
* map to substrings of output string c.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return *this, with the merged edits appended
* @draft ICU 60
*/
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
private: private:
Edits(const Edits &) = delete; void releaseArray() U_NOEXCEPT;
Edits &operator=(const Edits &) = delete; Edits &copyArray(const Edits &other);
Edits &moveArray(Edits &src) U_NOEXCEPT;
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; } void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; } int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
@ -234,7 +410,8 @@ private:
int32_t capacity; int32_t capacity;
int32_t length; int32_t length;
int32_t delta; int32_t delta;
UErrorCode errorCode; int32_t numChanges;
UErrorCode errorCode_;
uint16_t stackArray[STACK_CAPACITY]; uint16_t stackArray[STACK_CAPACITY];
}; };

View File

@ -55,14 +55,30 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/ */
static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);
#ifndef U_HIDE_DEPRECATED_API
/**
* This function has been deprecated in favor of createEmptyInstance, which has
* identical behavior.
* @param status The error code.
* @return the new builder
* @deprecated ICU 60 use createEmptyInstance instead
* @see createEmptyInstance()
*/
static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) {
return createEmptyInstance(status);
}
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/** /**
* Construct an empty FilteredBreakIteratorBuilder. * Construct an empty FilteredBreakIteratorBuilder.
* In this state, it will not suppress any segment boundaries. * In this state, it will not suppress any segment boundaries.
* @param status The error code. * @param status The error code.
* @return the new builder * @return the new builder
* @stable ICU 56 * @draft ICU 60
*/ */
static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);
#endif /* U_HIDE_DRAFT_API */
/** /**
* Suppress a certain string from being the end of a segment. * Suppress a certain string from being the end of a segment.
@ -89,6 +105,20 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/ */
virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
#ifndef U_HIDE_DEPRECATED_API
/**
* This function has been deprecated in favor of wrapIteratorWithFilter()
* The behavior is identical.
* @param adoptBreakIterator the break iterator to adopt
* @param status error code
* @return the new BreakIterator, owned by the caller.
* @deprecated ICU 60 use wrapIteratorWithFilter() instead
* @see wrapBreakIteratorWithFilter()
*/
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/** /**
* Wrap (adopt) an existing break iterator in a new filtered instance. * Wrap (adopt) an existing break iterator in a new filtered instance.
* The resulting BreakIterator is owned by the caller. * The resulting BreakIterator is owned by the caller.
@ -96,12 +126,16 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
* Note that the adoptBreakIterator is adopted by the new BreakIterator * Note that the adoptBreakIterator is adopted by the new BreakIterator
* and should no longer be used by the caller. * and should no longer be used by the caller.
* The FilteredBreakIteratorBuilder may be reused. * The FilteredBreakIteratorBuilder may be reused.
* This function is an alias for build()
* @param adoptBreakIterator the break iterator to adopt * @param adoptBreakIterator the break iterator to adopt
* @param status error code * @param status error code
* @return the new BreakIterator, owned by the caller. * @return the new BreakIterator, owned by the caller.
* @stable ICU 56 * @draft ICU 60
*/ */
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
return build(adoptBreakIterator, status);
}
#endif /* U_HIDE_DRAFT_API */
protected: protected:
/** /**

View File

@ -213,7 +213,6 @@ public:
errorCode=U_MEMORY_ALLOCATION_ERROR; errorCode=U_MEMORY_ALLOCATION_ERROR;
} }
} }
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move constructor, leaves src with isNull(). * Move constructor, leaves src with isNull().
* @param src source smart pointer * @param src source smart pointer
@ -222,7 +221,6 @@ public:
LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL; src.ptr=NULL;
} }
#endif
/** /**
* Destructor deletes the object it owns. * Destructor deletes the object it owns.
* @stable ICU 4.4 * @stable ICU 4.4
@ -230,7 +228,6 @@ public:
~LocalPointer() { ~LocalPointer() {
delete LocalPointerBase<T>::ptr; delete LocalPointerBase<T>::ptr;
} }
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move assignment operator, leaves src with isNull(). * Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object. * The behavior is undefined if *this and src are the same object.
@ -241,7 +238,6 @@ public:
LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT { LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT {
return moveFrom(src); return moveFrom(src);
} }
#endif
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/** /**
* Move assignment, leaves src with isNull(). * Move assignment, leaves src with isNull().
@ -362,7 +358,6 @@ public:
errorCode=U_MEMORY_ALLOCATION_ERROR; errorCode=U_MEMORY_ALLOCATION_ERROR;
} }
} }
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move constructor, leaves src with isNull(). * Move constructor, leaves src with isNull().
* @param src source smart pointer * @param src source smart pointer
@ -371,7 +366,6 @@ public:
LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL; src.ptr=NULL;
} }
#endif
/** /**
* Destructor deletes the array it owns. * Destructor deletes the array it owns.
* @stable ICU 4.4 * @stable ICU 4.4
@ -379,7 +373,6 @@ public:
~LocalArray() { ~LocalArray() {
delete[] LocalPointerBase<T>::ptr; delete[] LocalPointerBase<T>::ptr;
} }
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move assignment operator, leaves src with isNull(). * Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object. * The behavior is undefined if *this and src are the same object.
@ -390,7 +383,6 @@ public:
LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT { LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT {
return moveFrom(src); return moveFrom(src);
} }
#endif
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/** /**
* Move assignment, leaves src with isNull(). * Move assignment, leaves src with isNull().
@ -492,7 +484,6 @@ public:
* @see LocalPointer * @see LocalPointer
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
#if U_HAVE_RVALUE_REFERENCES
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \ #define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
class LocalPointerClassName : public LocalPointerBase<Type> { \ class LocalPointerClassName : public LocalPointerBase<Type> { \
public: \ public: \
@ -526,34 +517,6 @@ public:
ptr=p; \ ptr=p; \
} \ } \
} }
#else
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
class LocalPointerClassName : public LocalPointerBase<Type> { \
public: \
using LocalPointerBase<Type>::operator*; \
using LocalPointerBase<Type>::operator->; \
explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
~LocalPointerClassName() { closeFunction(ptr); } \
LocalPointerClassName &moveFrom(LocalPointerClassName &src) U_NOEXCEPT { \
if (ptr != NULL) { closeFunction(ptr); } \
LocalPointerBase<Type>::ptr=src.ptr; \
src.ptr=NULL; \
return *this; \
} \
void swap(LocalPointerClassName &other) U_NOEXCEPT { \
Type *temp=LocalPointerBase<Type>::ptr; \
LocalPointerBase<Type>::ptr=other.ptr; \
other.ptr=temp; \
} \
friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \
p1.swap(p2); \
} \
void adoptInstead(Type *p) { \
if (ptr != NULL) { closeFunction(ptr); } \
ptr=p; \
} \
}
#endif
U_NAMESPACE_END U_NAMESPACE_END

View File

@ -88,7 +88,7 @@ class UnicodeString;
* <P> * <P>
* The third constructor requires a third argument--the <STRONG>Variant.</STRONG> * The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
* The Variant codes are vendor and browser-specific. * The Variant codes are vendor and browser-specific.
* For example, use REVISED for a langauge's revised script orthography, and POSIX for POSIX. * For example, use REVISED for a language's revised script orthography, and POSIX for POSIX.
* Where there are two variants, separate them with an underscore, and * Where there are two variants, separate them with an underscore, and
* put the most important one first. For * put the most important one first. For
* example, a Traditional Spanish collation might be referenced, with * example, a Traditional Spanish collation might be referenced, with

View File

@ -28,12 +28,15 @@
#if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_NORMALIZATION
#include "unicode/stringpiece.h"
#include "unicode/uniset.h" #include "unicode/uniset.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/unorm2.h" #include "unicode/unorm2.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
class ByteSink;
/** /**
* Unicode normalization functionality for standard Unicode normalization or * Unicode normalization functionality for standard Unicode normalization or
* for using custom mapping tables. * for using custom mapping tables.
@ -215,6 +218,35 @@ public:
normalize(const UnicodeString &src, normalize(const UnicodeString &src,
UnicodeString &dest, UnicodeString &dest,
UErrorCode &errorCode) const = 0; UErrorCode &errorCode) const = 0;
/**
* Normalizes a UTF-8 string and optionally records how source substrings
* relate to changed and unchanged result substrings.
*
* Currently implemented completely only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* Otherwise currently converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be nullptr.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @draft ICU 60
*/
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const;
/** /**
* Appends the normalized form of the second string to the first string * Appends the normalized form of the second string to the first string
* (merging them at the boundary) and returns the first string. * (merging them at the boundary) and returns the first string.
@ -340,6 +372,30 @@ public:
*/ */
virtual UBool virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
/**
* Tests if the UTF-8 string is normalized.
* Internally, in cases where the quickCheck() method would return "maybe"
* (which is only possible for the two COMPOSE modes) this method
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
*
* This works for all normalization modes,
* but it is currently optimized for UTF-8 only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* For other modes it currently converts to UTF-16 and calls isNormalized().
*
* @param s UTF-8 input string
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return TRUE if s is normalized
* @draft ICU 60
*/
virtual UBool
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
/** /**
* Tests if the string is normalized. * Tests if the string is normalized.
@ -479,7 +535,36 @@ public:
virtual UnicodeString & virtual UnicodeString &
normalize(const UnicodeString &src, normalize(const UnicodeString &src,
UnicodeString &dest, UnicodeString &dest,
UErrorCode &errorCode) const; UErrorCode &errorCode) const U_OVERRIDE;
/**
* Normalizes a UTF-8 string and optionally records how source substrings
* relate to changed and unchanged result substrings.
*
* Currently implemented completely only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* Otherwise currently converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be nullptr.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @draft ICU 60
*/
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Appends the normalized form of the second string to the first string * Appends the normalized form of the second string to the first string
* (merging them at the boundary) and returns the first string. * (merging them at the boundary) and returns the first string.
@ -497,7 +582,7 @@ public:
virtual UnicodeString & virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first, normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,
UErrorCode &errorCode) const; UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Appends the second string to the first string * Appends the second string to the first string
* (merging them at the boundary) and returns the first string. * (merging them at the boundary) and returns the first string.
@ -515,7 +600,7 @@ public:
virtual UnicodeString & virtual UnicodeString &
append(UnicodeString &first, append(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,
UErrorCode &errorCode) const; UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Gets the decomposition mapping of c. * Gets the decomposition mapping of c.
@ -529,7 +614,7 @@ public:
* @stable ICU 4.6 * @stable ICU 4.6
*/ */
virtual UBool virtual UBool
getDecomposition(UChar32 c, UnicodeString &decomposition) const; getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
/** /**
* Gets the raw decomposition mapping of c. * Gets the raw decomposition mapping of c.
@ -543,7 +628,7 @@ public:
* @stable ICU 49 * @stable ICU 49
*/ */
virtual UBool virtual UBool
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
/** /**
* Performs pairwise composition of a & b and returns the composite if there is one. * Performs pairwise composition of a & b and returns the composite if there is one.
@ -556,7 +641,7 @@ public:
* @stable ICU 49 * @stable ICU 49
*/ */
virtual UChar32 virtual UChar32
composePair(UChar32 a, UChar32 b) const; composePair(UChar32 a, UChar32 b) const U_OVERRIDE;
/** /**
* Gets the combining class of c. * Gets the combining class of c.
@ -567,7 +652,7 @@ public:
* @stable ICU 49 * @stable ICU 49
*/ */
virtual uint8_t virtual uint8_t
getCombiningClass(UChar32 c) const; getCombiningClass(UChar32 c) const U_OVERRIDE;
/** /**
* Tests if the string is normalized. * Tests if the string is normalized.
@ -581,7 +666,30 @@ public:
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual UBool virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/**
* Tests if the UTF-8 string is normalized.
* Internally, in cases where the quickCheck() method would return "maybe"
* (which is only possible for the two COMPOSE modes) this method
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
*
* This works for all normalization modes,
* but it is currently optimized for UTF-8 only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* For other modes it currently converts to UTF-16 and calls isNormalized().
*
* @param s UTF-8 input string
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return TRUE if s is normalized
* @draft ICU 60
*/
virtual UBool
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Tests if the string is normalized. * Tests if the string is normalized.
* For details see the Normalizer2 base class documentation. * For details see the Normalizer2 base class documentation.
@ -594,7 +702,7 @@ public:
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual UNormalizationCheckResult virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Returns the end of the normalized substring of the input string. * Returns the end of the normalized substring of the input string.
* For details see the Normalizer2 base class documentation. * For details see the Normalizer2 base class documentation.
@ -607,7 +715,7 @@ public:
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual int32_t virtual int32_t
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Tests if the character always has a normalization boundary before it, * Tests if the character always has a normalization boundary before it,
@ -617,7 +725,7 @@ public:
* @return TRUE if c has a normalization boundary before it * @return TRUE if c has a normalization boundary before it
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual UBool hasBoundaryBefore(UChar32 c) const; virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;
/** /**
* Tests if the character always has a normalization boundary after it, * Tests if the character always has a normalization boundary after it,
@ -627,7 +735,7 @@ public:
* @return TRUE if c has a normalization boundary after it * @return TRUE if c has a normalization boundary after it
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual UBool hasBoundaryAfter(UChar32 c) const; virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;
/** /**
* Tests if the character is normalization-inert. * Tests if the character is normalization-inert.
@ -636,7 +744,7 @@ public:
* @return TRUE if c is normalization-inert * @return TRUE if c is normalization-inert
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual UBool isInert(UChar32 c) const; virtual UBool isInert(UChar32 c) const U_OVERRIDE;
private: private:
UnicodeString & UnicodeString &
normalize(const UnicodeString &src, normalize(const UnicodeString &src,
@ -644,6 +752,12 @@ private:
USetSpanCondition spanCondition, USetSpanCondition spanCondition,
UErrorCode &errorCode) const; UErrorCode &errorCode) const;
void
normalizeUTF8(uint32_t options, const char *src, int32_t length,
ByteSink &sink, Edits *edits,
USetSpanCondition spanCondition,
UErrorCode &errorCode) const;
UnicodeString & UnicodeString &
normalizeSecondAndAppend(UnicodeString &first, normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,

View File

@ -132,6 +132,8 @@
#define U_PF_BROWSER_NATIVE_CLIENT 4020 #define U_PF_BROWSER_NATIVE_CLIENT 4020
/** Android is based on Linux. @internal */ /** Android is based on Linux. @internal */
#define U_PF_ANDROID 4050 #define U_PF_ANDROID 4050
/** Fuchsia is a POSIX-ish platform. @internal */
#define U_PF_FUCHSIA 4100
/* Maximum value for Linux-based platform is 4499 */ /* Maximum value for Linux-based platform is 4499 */
/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */ /** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
#define U_PF_OS390 9000 #define U_PF_OS390 9000
@ -152,6 +154,8 @@
# include <android/api-level.h> # include <android/api-level.h>
#elif defined(__pnacl__) || defined(__native_client__) #elif defined(__pnacl__) || defined(__native_client__)
# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT # define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
#elif defined(__Fuchsia__)
# define U_PLATFORM U_PF_FUCHSIA
#elif defined(linux) || defined(__linux__) || defined(__linux) #elif defined(linux) || defined(__linux__) || defined(__linux)
# define U_PLATFORM U_PF_LINUX # define U_PLATFORM U_PF_LINUX
#elif defined(__APPLE__) && defined(__MACH__) #elif defined(__APPLE__) && defined(__MACH__)
@ -192,6 +196,20 @@
# define U_PLATFORM U_PF_UNKNOWN # define U_PLATFORM U_PF_UNKNOWN
#endif #endif
/**
* \def UPRV_INCOMPLETE_CPP11_SUPPORT
* This switch turns off ICU 60 NumberFormatter code.
* By default, this switch is enabled on AIX and z/OS,
* which have poor C++11 support.
*
* NOTE: This switch is intended to be temporary; see #13393.
*
* @internal
*/
#ifndef UPRV_INCOMPLETE_CPP11_SUPPORT
# define UPRV_INCOMPLETE_CPP11_SUPPORT (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_SOLARIS )
#endif
/** /**
* \def CYGWINMSVC * \def CYGWINMSVC
* Defined if this is Windows with Cygwin, but using MSVC rather than gcc. * Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
@ -330,31 +348,6 @@
# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H # define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
#endif #endif
/**
* \def U_IOSTREAM_SOURCE
* Defines what support for C++ streams is available.
*
* If U_IOSTREAM_SOURCE is set to 199711, then &lt;iostream&gt; is available
* (the ISO/IEC C++ FDIS was published in November 1997), and then
* one should qualify streams using the std namespace in ICU header
* files.
* Starting with ICU 49, this is the only supported version.
*
* If U_IOSTREAM_SOURCE is set to 198506, then &lt;iostream.h&gt; is
* available instead (in June 1985 Stroustrup published
* "An Extensible I/O Facility for C++" at the summer USENIX conference).
* Starting with ICU 49, this version is not supported any more.
*
* If U_IOSTREAM_SOURCE is 0 (or any value less than 199711),
* then C++ streams are not available and
* support for them will be silently suppressed in ICU.
*
* @internal
*/
#ifndef U_IOSTREAM_SOURCE
#define U_IOSTREAM_SOURCE 199711
#endif
/*===========================================================================*/ /*===========================================================================*/
/** @{ Compiler and environment features */ /** @{ Compiler and environment features */
/*===========================================================================*/ /*===========================================================================*/
@ -505,22 +498,6 @@ namespace std {
}; };
#endif #endif
/**
* \def U_HAVE_RVALUE_REFERENCES
* Set to 1 if the compiler supports rvalue references.
* C++11 feature, necessary for move constructor & move assignment.
* @internal
*/
#ifdef U_HAVE_RVALUE_REFERENCES
/* Use the predefined value. */
#elif U_CPLUSPLUS_VERSION >= 11 || __has_feature(cxx_rvalue_references) \
|| defined(__GXX_EXPERIMENTAL_CXX0X__) \
|| (defined(_MSC_VER) && _MSC_VER >= 1600) /* Visual Studio 2010 */
# define U_HAVE_RVALUE_REFERENCES 1
#else
# define U_HAVE_RVALUE_REFERENCES 0
#endif
/** /**
* \def U_NOEXCEPT * \def U_NOEXCEPT
* "noexcept" if supported, otherwise empty. * "noexcept" if supported, otherwise empty.
@ -871,6 +848,16 @@ namespace std {
# define U_CALLCONV U_EXPORT2 # define U_CALLCONV U_EXPORT2
#endif #endif
/**
* \def U_CALLCONV_FPTR
* Similar to U_CALLCONV, but only used on function pointers.
* @internal
*/
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
# define U_CALLCONV_FPTR U_CALLCONV
#else
# define U_CALLCONV_FPTR
#endif
/* @} */ /* @} */
#endif #endif

View File

@ -31,23 +31,14 @@
#include "unicode/schriter.h" #include "unicode/schriter.h"
#include "unicode/uchriter.h" #include "unicode/uchriter.h"
struct UTrie;
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
/** @internal */ /** @internal */
struct RBBIDataHeader;
class RuleBasedBreakIteratorTables;
class BreakIterator;
class RBBIDataWrapper;
class UStack;
class LanguageBreakEngine; class LanguageBreakEngine;
struct RBBIDataHeader;
class RBBIDataWrapper;
class UnhandledEngine; class UnhandledEngine;
struct RBBIStateTable; class UStack;
/** /**
* *
@ -96,19 +87,36 @@ private:
*/ */
RBBIDataWrapper *fData; RBBIDataWrapper *fData;
/** Index of the Rule {tag} values for the most recent match. /**
* The iteration state - current position, rule status for the current position,
* and whether the iterator ran off the end, yielding UBRK_DONE.
* Current position is pinned to be 0 < position <= text.length.
* Current position is always set to a boundary.
* @internal * @internal
*/ */
int32_t fLastRuleStatusIndex; /**
* The current position of the iterator. Pinned, 0 < fPosition <= text.length.
* Never has the value UBRK_DONE (-1).
*/
int32_t fPosition;
/** /**
* Rule tag value valid flag. * TODO:
* Some iterator operations don't intrinsically set the correct tag value.
* This flag lets us lazily compute the value if we are ever asked for it.
* @internal
*/ */
UBool fLastStatusIndexValid; int32_t fRuleStatusIndex;
/**
* True when iteration has run off the end, and iterator functions should return UBRK_DONE.
*/
UBool fDone;
/**
* Cache of previously determined boundary positions.
*/
public: // TODO: debug, return to private.
class BreakCache;
BreakCache *fBreakCache;
private:
/** /**
* Counter for the number of characters encountered with the "dictionary" * Counter for the number of characters encountered with the "dictionary"
* flag set. * flag set.
@ -117,26 +125,11 @@ private:
uint32_t fDictionaryCharCount; uint32_t fDictionaryCharCount;
/** /**
* When a range of characters is divided up using the dictionary, the break * Cache of boundary positions within a region of text that has been
* positions that are discovered are stored here, preventing us from having * sub-divided by dictionary based breaking.
* to use either the dictionary or the state table again until the iterator
* leaves this range of text. Has the most impact for line breaking.
* @internal
*/ */
int32_t* fCachedBreakPositions; class DictionaryCache;
DictionaryCache *fDictionaryCache;
/**
* The number of elements in fCachedBreakPositions
* @internal
*/
int32_t fNumCachedBreakPositions;
/**
* if fCachedBreakPositions is not null, this indicates which item in the
* cache the current iteration position refers to
* @internal
*/
int32_t fPositionInCache;
/** /**
* *
@ -179,13 +172,11 @@ private:
*/ */
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
/** @internal */
friend class RBBIRuleBuilder; friend class RBBIRuleBuilder;
/** @internal */ /** @internal */
friend class BreakIterator; friend class BreakIterator;
public: public:
/** Default constructor. Creates an empty shell of an iterator, with no /** Default constructor. Creates an empty shell of an iterator, with no
@ -469,7 +460,10 @@ public:
virtual UBool isBoundary(int32_t offset); virtual UBool isBoundary(int32_t offset);
/** /**
* Returns the current iteration position. * Returns the current iteration position. Note that UBRK_DONE is never
* returned from this function; if iteration has run to the end of a
* string, current() will return the length of the string while
* next() will return UBRK_DONE).
* @return The current iteration position. * @return The current iteration position.
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
@ -501,6 +495,7 @@ public:
* Note: this function is not thread safe. It should not have been * Note: this function is not thread safe. It should not have been
* declared const, and the const remains only for compatibility * declared const, and the const remains only for compatibility
* reasons. (The function is logically const, but not bit-wise const). * reasons. (The function is logically const, but not bit-wise const).
* TODO: check this. Probably thread safe now.
* <p> * <p>
* @return the status from the break rule that determined the most recently * @return the status from the break rule that determined the most recently
* returned break position. * returned break position.
@ -660,46 +655,31 @@ private:
* Common initialization function, used by constructors and bufferClone. * Common initialization function, used by constructors and bufferClone.
* @internal * @internal
*/ */
void init(); void init(UErrorCode &status);
/** /**
* This method backs the iterator back up to a "safe position" in the text. * Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules.
* This is a position that we know, without any context, must be a break position. * This locates a "Safe Position" from which the forward break rules
* The various calling methods then iterate forward from this safe position to * will operate correctly. A Safe Position is not necessarily a boundary itself.
* the appropriate position to return. (For more information, see the description *
* of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.) * @param fromPosition the position in the input text to begin the iteration.
* @param statetable state table used of moving backwards
* @internal * @internal
*/ */
int32_t handlePrevious(const RBBIStateTable *statetable); int32_t handlePrevious(int32_t fromPosition);
/** /**
* This method is the actual implementation of the next() method. All iteration * Find a rule-based boundary by running the state machine.
* vectors through here. This method initializes the state machine to state 1 * Input
* and advances through the text character by character until we reach the end * fPosition, the position in the text to begin from.
* of the text or the state machine transitions to state 0. We update our return * Output
* value every time the state machine passes through a possible end state. * fPosition: the boundary following the starting position.
* @param statetable state table used of moving forwards * fDictionaryCharCount the number of dictionary characters encountered.
* If > 0, the segment will be further subdivided
* fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
*
* @internal * @internal
*/ */
int32_t handleNext(const RBBIStateTable *statetable); int32_t handleNext();
/**
* This is the function that actually implements dictionary-based
* breaking. Covering at least the range from startPos to endPos,
* it checks for dictionary characters, and if it finds them determines
* the appropriate object to deal with them. It may cache found breaks in
* fCachedBreakPositions as it goes. It may well also look at text outside
* the range startPos to endPos.
* If going forward, endPos is the normal Unicode break result, and
* if goind in reverse, startPos is the normal Unicode break result
* @param startPos The start position of a range of text
* @param endPos The end position of a range of text
* @param reverse The call is for the reverse direction
* @internal
*/
int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
/** /**
@ -710,11 +690,14 @@ private:
*/ */
const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
public:
#ifndef U_HIDE_INTERNAL_API
/** /**
* Debugging function only.
* @internal * @internal
*/ */
void makeRuleStatusValid(); void dumpCache();
#endif /* U_HIDE_INTERNAL_API */
}; };
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

View File

@ -21,6 +21,13 @@
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
// Forward declaration:
namespace number {
namespace impl {
class SimpleModifier;
}
}
/** /**
* Formats simple patterns like "{1} was born in {0}". * Formats simple patterns like "{1} was born in {0}".
* Minimal subset of MessageFormat; fast, simple, minimal dependencies. * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
@ -286,6 +293,9 @@ private:
UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
int32_t *offsets, int32_t offsetsLength, int32_t *offsets, int32_t offsetsLength,
UErrorCode &errorCode); UErrorCode &errorCode);
// Give access to internals to SimpleModifier for number formatting
friend class number::impl::SimpleModifier;
}; };
U_NAMESPACE_END U_NAMESPACE_END

View File

@ -0,0 +1,198 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// stringoptions.h
// created: 2017jun08 Markus W. Scherer
#ifndef __STRINGOPTIONS_H__
#define __STRINGOPTIONS_H__
#include "unicode/utypes.h"
/**
* \file
* \brief C API: Bit set option bit constants for various string and character processing functions.
*/
/**
* Option value for case folding: Use default mappings defined in CaseFolding.txt.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_DEFAULT 0
/**
* Option value for case folding:
*
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
* and dotless i appropriately for Turkic languages (tr, az).
*
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
* are to be included for default mappings and
* excluded for the Turkic-specific mappings.
*
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
* are to be excluded for default mappings and
* included for the Turkic-specific mappings.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
#ifndef U_HIDE_DRAFT_API
/**
* Titlecase the string as a whole rather than each word.
* (Titlecase only the character at index 0, possibly adjusted.)
* Option bits value for titlecasing APIs that take an options bit set.
*
* It is an error to specify multiple titlecasing iterator options together,
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @draft ICU 60
*/
#define U_TITLECASE_WHOLE_STRING 0x20
/**
* Titlecase sentences rather than words.
* (Titlecase only the first character of each sentence, possibly adjusted.)
* Option bits value for titlecasing APIs that take an options bit set.
*
* It is an error to specify multiple titlecasing iterator options together,
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @draft ICU 60
*/
#define U_TITLECASE_SENTENCES 0x40
#endif // U_HIDE_DRAFT_API
/**
* Do not lowercase non-initial parts of words when titlecasing.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will titlecase the character at each
* (possibly adjusted) BreakIterator index and
* lowercase all other characters up to the next iterator index.
* With this option, the other characters will not be modified.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @see UnicodeString::toTitle
* @see CaseMap::toTitle
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_LOWERCASE 0x100
/**
* Do not adjust the titlecasing BreakIterator indexes;
* titlecase exactly the characters at breaks from the iterator.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will take each break iterator index,
* adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
* and titlecase that one.
*
* Other characters are lowercased.
*
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @see U_TITLECASE_NO_LOWERCASE
* @see UnicodeString::toTitle
* @see CaseMap::toTitle
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
#ifndef U_HIDE_DRAFT_API
/**
* Adjust each titlecasing BreakIterator index to the next cased character.
* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
* Option bit for titlecasing APIs that take an options bit set.
*
* This used to be the default index adjustment in ICU.
* Since ICU 60, the default index adjustment is to the next character that is
* a letter, number, symbol, or private use code point.
* (Uncased modifier letters are skipped.)
* The difference in behavior is small for word titlecasing,
* but the new adjustment is much better for whole-string and sentence titlecasing:
* It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
*
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
* @draft ICU 60
*/
#define U_TITLECASE_ADJUST_TO_CASED 0x400
/**
* Option for string transformation functions to not first reset the Edits object.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @draft ICU 60
*/
#define U_EDITS_NO_RESET 0x2000
/**
* Omit unchanged text when recording how source substrings
* relate to changed and unchanged result substrings.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @draft ICU 60
*/
#define U_OMIT_UNCHANGED_TEXT 0x4000
#endif // U_HIDE_DRAFT_API
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
* @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
* @stable ICU 2.2
*/
#define U_COMPARE_IGNORE_CASE 0x10000
/**
* Option bit for unorm_compare:
* Both input strings are assumed to fulfill FCD conditions.
* @stable ICU 2.2
*/
#define UNORM_INPUT_IS_FCD 0x20000
// Related definitions elsewhere.
// Options that are not meaningful in the same functions
// can share the same bits.
//
// Public:
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
//
// Internal: (may change or be removed)
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
#endif // __STRINGOPTIONS_H__

View File

@ -256,7 +256,7 @@ protected:
/** @internal */ /** @internal */
class FinalValueNode : public Node { class FinalValueNode : public Node {
public: public:
FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {} FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
virtual UBool operator==(const Node &other) const; virtual UBool operator==(const Node &other) const;
virtual void write(StringTrieBuilder &builder); virtual void write(StringTrieBuilder &builder);
protected: protected:
@ -276,7 +276,7 @@ protected:
void setValue(int32_t v) { void setValue(int32_t v) {
hasValue=TRUE; hasValue=TRUE;
value=v; value=v;
hash=hash*37+v; hash=hash*37u+v;
} }
protected: protected:
UBool hasValue; UBool hasValue;
@ -290,7 +290,7 @@ protected:
class IntermediateValueNode : public ValueNode { class IntermediateValueNode : public ValueNode {
public: public:
IntermediateValueNode(int32_t v, Node *nextNode) IntermediateValueNode(int32_t v, Node *nextNode)
: ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); } : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
virtual UBool operator==(const Node &other) const; virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
virtual void write(StringTrieBuilder &builder); virtual void write(StringTrieBuilder &builder);
@ -307,7 +307,7 @@ protected:
class LinearMatchNode : public ValueNode { class LinearMatchNode : public ValueNode {
public: public:
LinearMatchNode(int32_t len, Node *nextNode) LinearMatchNode(int32_t len, Node *nextNode)
: ValueNode((0x333333*37+len)*37+hashCode(nextNode)), : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
length(len), next(nextNode) {} length(len), next(nextNode) {}
virtual UBool operator==(const Node &other) const; virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
@ -342,7 +342,7 @@ protected:
equal[length]=NULL; equal[length]=NULL;
values[length]=value; values[length]=value;
++length; ++length;
hash=(hash*37+c)*37+value; hash=(hash*37u+c)*37u+value;
} }
// Adds a unit which leads to another match node. // Adds a unit which leads to another match node.
void add(int32_t c, Node *node) { void add(int32_t c, Node *node) {
@ -350,7 +350,7 @@ protected:
equal[length]=node; equal[length]=node;
values[length]=0; values[length]=0;
++length; ++length;
hash=(hash*37+c)*37+hashCode(node); hash=(hash*37u+c)*37u+hashCode(node);
} }
protected: protected:
Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value". Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
@ -365,8 +365,8 @@ protected:
class SplitBranchNode : public BranchNode { class SplitBranchNode : public BranchNode {
public: public:
SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
: BranchNode(((0x555555*37+middleUnit)*37+ : BranchNode(((0x555555u*37u+middleUnit)*37u+
hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)), hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
virtual UBool operator==(const Node &other) const; virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
@ -382,7 +382,7 @@ protected:
class BranchHeadNode : public ValueNode { class BranchHeadNode : public ValueNode {
public: public:
BranchHeadNode(int32_t len, Node *subNode) BranchHeadNode(int32_t len, Node *subNode)
: ValueNode((0x666666*37+len)*37+hashCode(subNode)), : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
length(len), next(subNode) {} length(len), next(subNode) {}
virtual UBool operator==(const Node &other) const; virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual int32_t markRightEdgesFirst(int32_t edgeNumber);

View File

@ -23,8 +23,6 @@
#include "unicode/uchar.h" #include "unicode/uchar.h"
#include "unicode/localpointer.h" #include "unicode/localpointer.h"
#ifndef U_HIDE_DRAFT_API
/** /**
* \file * \file
* \brief Bidi Transformations * \brief Bidi Transformations
@ -60,17 +58,17 @@
* @see UBIDI_REORDER_DEFAULT * @see UBIDI_REORDER_DEFAULT
* @see UBIDI_REORDER_INVERSE_LIKE_DIRECT * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
* @see UBIDI_REORDER_RUNS_ONLY * @see UBIDI_REORDER_RUNS_ONLY
* @draft ICU 58 * @stable ICU 58
*/ */
typedef enum { typedef enum {
/** 0: Constant indicating a logical order. /** 0: Constant indicating a logical order.
* This is the default for input text. * This is the default for input text.
* @draft ICU 58 * @stable ICU 58
*/ */
UBIDI_LOGICAL = 0, UBIDI_LOGICAL = 0,
/** 1: Constant indicating a visual order. /** 1: Constant indicating a visual order.
* This is a default for output text. * This is a default for output text.
* @draft ICU 58 * @stable ICU 58
*/ */
UBIDI_VISUAL UBIDI_VISUAL
} UBiDiOrder; } UBiDiOrder;
@ -83,20 +81,20 @@ typedef enum {
* @see ubidi_setReorderingOptions * @see ubidi_setReorderingOptions
* @see ubidi_writeReordered * @see ubidi_writeReordered
* @see ubidi_writeReverse * @see ubidi_writeReverse
* @draft ICU 58 * @stable ICU 58
*/ */
typedef enum { typedef enum {
/** 0: Constant indicating that character mirroring should not be /** 0: Constant indicating that character mirroring should not be
* performed. * performed.
* This is the default. * This is the default.
* @draft ICU 58 * @stable ICU 58
*/ */
UBIDI_MIRRORING_OFF = 0, UBIDI_MIRRORING_OFF = 0,
/** 1: Constant indicating that character mirroring should be performed. /** 1: Constant indicating that character mirroring should be performed.
* This corresponds to calling <code>ubidi_writeReordered</code> or * This corresponds to calling <code>ubidi_writeReordered</code> or
* <code>ubidi_writeReverse</code> with the * <code>ubidi_writeReverse</code> with the
* <code>UBIDI_DO_MIRRORING</code> option bit set. * <code>UBIDI_DO_MIRRORING</code> option bit set.
* @draft ICU 58 * @stable ICU 58
*/ */
UBIDI_MIRRORING_ON UBIDI_MIRRORING_ON
} UBiDiMirroring; } UBiDiMirroring;
@ -104,7 +102,7 @@ typedef enum {
/** /**
* Forward declaration of the <code>UBiDiTransform</code> structure that stores * Forward declaration of the <code>UBiDiTransform</code> structure that stores
* information used by the layout transformation engine. * information used by the layout transformation engine.
* @draft ICU 58 * @stable ICU 58
*/ */
typedef struct UBiDiTransform UBiDiTransform; typedef struct UBiDiTransform UBiDiTransform;
@ -240,9 +238,9 @@ typedef struct UBiDiTransform UBiDiTransform;
* @see UBiDiMirroring * @see UBiDiMirroring
* @see ubidi_setPara * @see ubidi_setPara
* @see u_shapeArabic * @see u_shapeArabic
* @draft ICU 58 * @stable ICU 58
*/ */
U_DRAFT uint32_t U_EXPORT2 U_STABLE uint32_t U_EXPORT2
ubiditransform_transform(UBiDiTransform *pBiDiTransform, ubiditransform_transform(UBiDiTransform *pBiDiTransform,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
UChar *dest, int32_t destSize, UChar *dest, int32_t destSize,
@ -286,16 +284,16 @@ ubiditransform_transform(UBiDiTransform *pBiDiTransform,
* <code>ubiditransform_close()</code>. * <code>ubiditransform_close()</code>.
* *
* @return An empty <code>UBiDiTransform</code> object. * @return An empty <code>UBiDiTransform</code> object.
* @draft ICU 58 * @stable ICU 58
*/ */
U_DRAFT UBiDiTransform* U_EXPORT2 U_STABLE UBiDiTransform* U_EXPORT2
ubiditransform_open(UErrorCode *pErrorCode); ubiditransform_open(UErrorCode *pErrorCode);
/** /**
* Deallocates the given <code>UBiDiTransform</code> object. * Deallocates the given <code>UBiDiTransform</code> object.
* @draft ICU 58 * @stable ICU 58
*/ */
U_DRAFT void U_EXPORT2 U_STABLE void U_EXPORT2
ubiditransform_close(UBiDiTransform *pBidiTransform); ubiditransform_close(UBiDiTransform *pBidiTransform);
#if U_SHOW_CPLUSPLUS_API #if U_SHOW_CPLUSPLUS_API
@ -309,7 +307,7 @@ U_NAMESPACE_BEGIN
* *
* @see LocalPointerBase * @see LocalPointerBase
* @see LocalPointer * @see LocalPointer
* @draft ICU 58 * @stable ICU 58
*/ */
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
@ -317,5 +315,4 @@ U_NAMESPACE_END
#endif #endif
#endif /* U_HIDE_DRAFT_API */
#endif #endif

View File

@ -230,7 +230,8 @@ typedef enum USentenceBreakTag {
* @param locale The locale specifying the text-breaking conventions. Note that * @param locale The locale specifying the text-breaking conventions. Note that
* locale keys such as "lb" and "ss" may be used to modify text break behavior, * locale keys such as "lb" and "ss" may be used to modify text break behavior,
* see general discussion of BreakIterator C API. * see general discussion of BreakIterator C API.
* @param text The text to be iterated over. * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
* used to specify the text to be iterated.
* @param textLength The number of characters in text, or -1 if null-terminated. * @param textLength The number of characters in text, or -1 if null-terminated.
* @param status A UErrorCode to receive any errors. * @param status A UErrorCode to receive any errors.
* @return A UBreakIterator for the specified locale. * @return A UBreakIterator for the specified locale.

View File

@ -23,6 +23,7 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/localpointer.h" #include "unicode/localpointer.h"
#include "unicode/stringoptions.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
/** /**
@ -144,56 +145,6 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
U_STABLE void U_EXPORT2 U_STABLE void U_EXPORT2
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
/**
* Do not lowercase non-initial parts of words when titlecasing.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will titlecase the first cased character
* of a word and lowercase all other characters.
* With this option, the other characters will not be modified.
*
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @see UnicodeString::toTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_LOWERCASE 0x100
/**
* Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
* titlecase exactly the characters at breaks from the iterator.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will take each break iterator index,
* adjust it by looking for the next cased character, and titlecase that one.
* Other characters are lowercased.
*
* This follows Unicode 4 & 5 section 3.13 Default Case Operations:
*
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
* cased character F. If F exists, map F to default_title(F); then map each
* subsequent character C to default_lower(C).
*
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @see UnicodeString::toTitle
* @see U_TITLECASE_NO_LOWERCASE
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
/**
* Omit unchanged text when case-mapping with Edits.
*
* @see CaseMap
* @see Edits
* @draft ICU 59
*/
#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
/** /**
@ -251,7 +202,7 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode
* The standard titlecase iterator for the root locale implements the * The standard titlecase iterator for the root locale implements the
* algorithm of Unicode TR 21. * algorithm of Unicode TR 21.
* *
* This function uses only the setUText(), first(), next() and close() methods of the * This function uses only the setText(), first() and next() methods of the
* provided break iterator. * provided break iterator.
* *
* The result may be longer or shorter than the original. * The result may be longer or shorter than the original.

View File

@ -26,6 +26,7 @@
#define UCHAR_H #define UCHAR_H
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/stringoptions.h"
U_CDECL_BEGIN U_CDECL_BEGIN
@ -41,7 +42,7 @@ U_CDECL_BEGIN
* @see u_getUnicodeVersion * @see u_getUnicodeVersion
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
#define U_UNICODE_VERSION "9.0" #define U_UNICODE_VERSION "10.0"
/** /**
* \file * \file
@ -148,8 +149,9 @@ U_CDECL_BEGIN
* *
* The properties APIs are intended to reflect Unicode properties as defined * The properties APIs are intended to reflect Unicode properties as defined
* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
* For details about the properties see http://www.unicode.org/ucd/ . *
* For names of Unicode properties see the UCD file PropertyAliases.txt. * For details about the properties see
* UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
* *
* Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
* then properties marked with "new in Unicode 3.2" are not or not fully available. * then properties marked with "new in Unicode 3.2" are not or not fully available.
@ -427,12 +429,29 @@ typedef enum UProperty {
* @stable ICU 57 * @stable ICU 57
*/ */
UCHAR_EMOJI_MODIFIER_BASE=60, UCHAR_EMOJI_MODIFIER_BASE=60,
/**
* Binary property Emoji_Component.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @stable ICU 60
*/
UCHAR_EMOJI_COMPONENT=61,
/**
* Binary property Regional_Indicator.
* @stable ICU 60
*/
UCHAR_REGIONAL_INDICATOR=62,
/**
* Binary property Prepended_Concatenation_Mark.
* @stable ICU 60
*/
UCHAR_PREPENDED_CONCATENATION_MARK=63,
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
/** /**
* One more than the last constant for binary Unicode properties. * One more than the last constant for binary Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/ */
UCHAR_BINARY_LIMIT=61, UCHAR_BINARY_LIMIT,
#endif // U_HIDE_DEPRECATED_API #endif // U_HIDE_DEPRECATED_API
/** Enumerated property Bidi_Class. /** Enumerated property Bidi_Class.
@ -1647,6 +1666,23 @@ enum UBlockCode {
/** @stable ICU 58 */ /** @stable ICU 58 */
UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/ UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/
// New blocks in Unicode 10.0
/** @stable ICU 60 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/
/** @stable ICU 60 */
UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/
/** @stable ICU 60 */
UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/
/** @stable ICU 60 */
UBLOCK_NUSHU = 277, /*[1B170]*/
/** @stable ICU 60 */
UBLOCK_SOYOMBO = 278, /*[11A50]*/
/** @stable ICU 60 */
UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/
/** @stable ICU 60 */
UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
/** /**
* One more than the highest normal UBlockCode value. * One more than the highest normal UBlockCode value.
@ -1654,7 +1690,7 @@ enum UBlockCode {
* *
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/ */
UBLOCK_COUNT = 274, UBLOCK_COUNT = 281,
#endif // U_HIDE_DEPRECATED_API #endif // U_HIDE_DEPRECATED_API
/** @stable ICU 2.0 */ /** @stable ICU 2.0 */
@ -1930,6 +1966,19 @@ typedef enum UJoiningGroup {
U_JG_AFRICAN_FEH, /**< @stable ICU 58 */ U_JG_AFRICAN_FEH, /**< @stable ICU 58 */
U_JG_AFRICAN_NOON, /**< @stable ICU 58 */ U_JG_AFRICAN_NOON, /**< @stable ICU 58 */
U_JG_AFRICAN_QAF, /**< @stable ICU 58 */ U_JG_AFRICAN_QAF, /**< @stable ICU 58 */
U_JG_MALAYALAM_BHA, /**< @stable ICU 60 */
U_JG_MALAYALAM_JA, /**< @stable ICU 60 */
U_JG_MALAYALAM_LLA, /**< @stable ICU 60 */
U_JG_MALAYALAM_LLLA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NGA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NNA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NNNA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NYA, /**< @stable ICU 60 */
U_JG_MALAYALAM_RA, /**< @stable ICU 60 */
U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */
U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
/** /**
* One more than the highest normal UJoiningGroup value. * One more than the highest normal UJoiningGroup value.
@ -3521,27 +3570,6 @@ u_toupper(UChar32 c);
U_STABLE UChar32 U_EXPORT2 U_STABLE UChar32 U_EXPORT2
u_totitle(UChar32 c); u_totitle(UChar32 c);
/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
#define U_FOLD_CASE_DEFAULT 0
/**
* Option value for case folding:
*
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
* and dotless i appropriately for Turkic languages (tr, az).
*
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
* are to be included for default mappings and
* excluded for the Turkic-specific mappings.
*
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
* are to be excluded for default mappings and
* included for the Turkic-specific mappings.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
/** /**
* The given character is mapped to its case folding equivalent according to * The given character is mapped to its case folding equivalent according to
* UnicodeData.txt and CaseFolding.txt; * UnicodeData.txt and CaseFolding.txt;

View File

@ -149,7 +149,7 @@ typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem);
* @system * @system
*/ */
U_STABLE void U_EXPORT2 U_STABLE void U_EXPORT2
u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV a, UMemReallocFn * U_CALLCONV r, UMemFreeFn * U_CALLCONV f, u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f,
UErrorCode *status); UErrorCode *status);
U_CDECL_END U_CDECL_END

View File

@ -76,7 +76,7 @@
#endif #endif
/** /**
* Determines wheter to enable auto cleanup of libraries. * Determines whether to enable auto cleanup of libraries.
* @internal * @internal
*/ */
#ifndef UCLN_NO_AUTO_CLEANUP #ifndef UCLN_NO_AUTO_CLEANUP
@ -262,7 +262,8 @@
/** /**
* \def UCONFIG_NO_CONVERSION * \def UCONFIG_NO_CONVERSION
* ICU will not completely build with this switch turned on. * ICU will not completely build (compiling the tools fails) with this
* switch turned on.
* This switch turns off all converters. * This switch turns off all converters.
* *
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1 * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
@ -320,7 +321,9 @@
*/ */
#ifndef UCONFIG_NO_NORMALIZATION #ifndef UCONFIG_NO_NORMALIZATION
# define UCONFIG_NO_NORMALIZATION 0 # define UCONFIG_NO_NORMALIZATION 0
#elif UCONFIG_NO_NORMALIZATION #endif
#if UCONFIG_NO_NORMALIZATION
/* common library */ /* common library */
/* ICU 50 CJK dictionary BreakIterator uses normalization */ /* ICU 50 CJK dictionary BreakIterator uses normalization */
# define UCONFIG_NO_BREAK_ITERATION 1 # define UCONFIG_NO_BREAK_ITERATION 1

View File

@ -44,14 +44,12 @@ enum UDisplayContextType {
* @stable ICU 54 * @stable ICU 54
*/ */
UDISPCTX_TYPE_DISPLAY_LENGTH = 2, UDISPCTX_TYPE_DISPLAY_LENGTH = 2,
#ifndef U_HIDE_DRAFT_API
/** /**
* Type to retrieve the substitute handling setting, e.g. * Type to retrieve the substitute handling setting, e.g.
* UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE. * UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.
* @draft ICU 58 * @stable ICU 58
*/ */
UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3 UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3
#endif /* U_HIDE_DRAFT_API */
}; };
/** /**
* @stable ICU 51 * @stable ICU 51
@ -143,7 +141,6 @@ enum UDisplayContext {
* @stable ICU 54 * @stable ICU 54
*/ */
UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1, UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1,
#ifndef U_HIDE_DRAFT_API
/** /**
* ================================ * ================================
* SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or * SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or
@ -154,16 +151,15 @@ enum UDisplayContext {
* A possible setting for SUBSTITUTE_HANDLING: * A possible setting for SUBSTITUTE_HANDLING:
* Returns a fallback value (e.g., the input code) when no data is available. * Returns a fallback value (e.g., the input code) when no data is available.
* This is the default value. * This is the default value.
* @draft ICU 58 * @stable ICU 58
*/ */
UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0, UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0,
/** /**
* A possible setting for SUBSTITUTE_HANDLING: * A possible setting for SUBSTITUTE_HANDLING:
* Returns a null value when no data is available. * Returns a null value when no data is available.
* @draft ICU 58 * @stable ICU 58
*/ */
UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1 UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1
#endif /* U_HIDE_DRAFT_API */
}; };
/** /**

View File

@ -38,16 +38,6 @@
struct UConverter; // unicode/ucnv.h struct UConverter; // unicode/ucnv.h
#ifndef U_COMPARE_CODE_POINT_ORDER
/* see also ustring.h and unorm.h */
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
* @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
#endif
#ifndef USTRING_H #ifndef USTRING_H
/** /**
* \ingroup ustring_ustrlen * \ingroup ustring_ustrlen
@ -1730,7 +1720,7 @@ public:
*/ */
template<typename StringClass> template<typename StringClass>
StringClass &toUTF8String(StringClass &result) const { StringClass &toUTF8String(StringClass &result) const {
StringByteSink<StringClass> sbs(&result); StringByteSink<StringClass> sbs(&result, length());
toUTF8(sbs); toUTF8(sbs);
return result; return result;
} }
@ -1901,7 +1891,6 @@ public:
*/ */
UnicodeString &fastCopyFrom(const UnicodeString &src); UnicodeString &fastCopyFrom(const UnicodeString &src);
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move assignment operator, might leave src in bogus state. * Move assignment operator, might leave src in bogus state.
* This string will have the same contents and state that the source string had. * This string will have the same contents and state that the source string had.
@ -1913,7 +1902,7 @@ public:
UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT { UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
return moveFrom(src); return moveFrom(src);
} }
#endif
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/** /**
* Move assignment, might leave src in bogus state. * Move assignment, might leave src in bogus state.
@ -2786,11 +2775,11 @@ public:
* break iterator is opened. * break iterator is opened.
* Otherwise the provided iterator is set to the string's text. * Otherwise the provided iterator is set to the string's text.
* @param locale The locale to consider. * @param locale The locale to consider.
* @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param options Options bit set, see ucasemap_open(). * @param options Options bit set, see ucasemap_open().
* @return A reference to this. * @return A reference to this.
* @see U_TITLECASE_NO_LOWERCASE
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
* @see ucasemap_open
* @stable ICU 3.8 * @stable ICU 3.8
*/ */
UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
@ -3360,7 +3349,6 @@ public:
*/ */
UnicodeString(const UnicodeString& that); UnicodeString(const UnicodeString& that);
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move constructor, might leave src in bogus state. * Move constructor, might leave src in bogus state.
* This string will have the same contents and state that the source string had. * This string will have the same contents and state that the source string had.
@ -3368,7 +3356,6 @@ public:
* @stable ICU 56 * @stable ICU 56
*/ */
UnicodeString(UnicodeString &&src) U_NOEXCEPT; UnicodeString(UnicodeString &&src) U_NOEXCEPT;
#endif
/** /**
* 'Substring' constructor from tail of source string. * 'Substring' constructor from tail of source string.

View File

@ -210,7 +210,7 @@ enum {
* the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR. * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
* @deprecated ICU 56 Use unorm2.h instead. * @deprecated ICU 56 Use unorm2.h instead.
*/ */
U_STABLE int32_t U_EXPORT2 U_DEPRECATED int32_t U_EXPORT2
unorm_normalize(const UChar *source, int32_t sourceLength, unorm_normalize(const UChar *source, int32_t sourceLength,
UNormalizationMode mode, int32_t options, UNormalizationMode mode, int32_t options,
UChar *result, int32_t resultLength, UChar *result, int32_t resultLength,
@ -236,7 +236,7 @@ unorm_normalize(const UChar *source, int32_t sourceLength,
* @see unorm_isNormalized * @see unorm_isNormalized
* @deprecated ICU 56 Use unorm2.h instead. * @deprecated ICU 56 Use unorm2.h instead.
*/ */
U_STABLE UNormalizationCheckResult U_EXPORT2 U_DEPRECATED UNormalizationCheckResult U_EXPORT2
unorm_quickCheck(const UChar *source, int32_t sourcelength, unorm_quickCheck(const UChar *source, int32_t sourcelength,
UNormalizationMode mode, UNormalizationMode mode,
UErrorCode *status); UErrorCode *status);
@ -257,7 +257,7 @@ unorm_quickCheck(const UChar *source, int32_t sourcelength,
* @see unorm_isNormalized * @see unorm_isNormalized
* @deprecated ICU 56 Use unorm2.h instead. * @deprecated ICU 56 Use unorm2.h instead.
*/ */
U_STABLE UNormalizationCheckResult U_EXPORT2 U_DEPRECATED UNormalizationCheckResult U_EXPORT2
unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options, UNormalizationMode mode, int32_t options,
UErrorCode *pErrorCode); UErrorCode *pErrorCode);
@ -283,7 +283,7 @@ unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
* @see unorm_quickCheck * @see unorm_quickCheck
* @deprecated ICU 56 Use unorm2.h instead. * @deprecated ICU 56 Use unorm2.h instead.
*/ */
U_STABLE UBool U_EXPORT2 U_DEPRECATED UBool U_EXPORT2
unorm_isNormalized(const UChar *src, int32_t srcLength, unorm_isNormalized(const UChar *src, int32_t srcLength,
UNormalizationMode mode, UNormalizationMode mode,
UErrorCode *pErrorCode); UErrorCode *pErrorCode);
@ -305,7 +305,7 @@ unorm_isNormalized(const UChar *src, int32_t srcLength,
* @see unorm_isNormalized * @see unorm_isNormalized
* @deprecated ICU 56 Use unorm2.h instead. * @deprecated ICU 56 Use unorm2.h instead.
*/ */
U_STABLE UBool U_EXPORT2 U_DEPRECATED UBool U_EXPORT2
unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options, UNormalizationMode mode, int32_t options,
UErrorCode *pErrorCode); UErrorCode *pErrorCode);
@ -383,7 +383,7 @@ unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
* *
* @deprecated ICU 56 Use unorm2.h instead. * @deprecated ICU 56 Use unorm2.h instead.
*/ */
U_STABLE int32_t U_EXPORT2 U_DEPRECATED int32_t U_EXPORT2
unorm_next(UCharIterator *src, unorm_next(UCharIterator *src,
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,
UNormalizationMode mode, int32_t options, UNormalizationMode mode, int32_t options,
@ -416,7 +416,7 @@ unorm_next(UCharIterator *src,
* *
* @deprecated ICU 56 Use unorm2.h instead. * @deprecated ICU 56 Use unorm2.h instead.
*/ */
U_STABLE int32_t U_EXPORT2 U_DEPRECATED int32_t U_EXPORT2
unorm_previous(UCharIterator *src, unorm_previous(UCharIterator *src,
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,
UNormalizationMode mode, int32_t options, UNormalizationMode mode, int32_t options,
@ -460,7 +460,7 @@ unorm_previous(UCharIterator *src,
* *
* @deprecated ICU 56 Use unorm2.h instead. * @deprecated ICU 56 Use unorm2.h instead.
*/ */
U_STABLE int32_t U_EXPORT2 U_DEPRECATED int32_t U_EXPORT2
unorm_concatenate(const UChar *left, int32_t leftLength, unorm_concatenate(const UChar *left, int32_t leftLength,
const UChar *right, int32_t rightLength, const UChar *right, int32_t rightLength,
UChar *dest, int32_t destCapacity, UChar *dest, int32_t destCapacity,

View File

@ -32,6 +32,7 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/localpointer.h" #include "unicode/localpointer.h"
#include "unicode/stringoptions.h"
#include "unicode/uset.h" #include "unicode/uset.h"
/** /**
@ -526,30 +527,6 @@ unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
U_STABLE UBool U_EXPORT2 U_STABLE UBool U_EXPORT2
unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
/**
* Option bit for unorm_compare:
* Both input strings are assumed to fulfill FCD conditions.
* @stable ICU 2.2
*/
#define UNORM_INPUT_IS_FCD 0x20000
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
* @stable ICU 2.2
*/
#define U_COMPARE_IGNORE_CASE 0x10000
#ifndef U_COMPARE_CODE_POINT_ORDER
/* see also unistr.h and ustring.h */
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
* @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
#endif
/** /**
* Compares two strings for canonical equivalence. * Compares two strings for canonical equivalence.
* Further options include case-insensitive comparison and * Further options include case-insensitive comparison and

View File

@ -107,6 +107,7 @@
#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup) #define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup)
#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl)
#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup) #define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup)
#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats) #define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats)
@ -944,6 +945,7 @@
#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget) #define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti) #define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init) #define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init)
#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize)
#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput) #define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput)
#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi) #define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi)
#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove) #define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove)
@ -1654,6 +1656,7 @@
#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN) #define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN)
#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN) #define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN)
#define ustrcase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_getCaseLocale) #define ustrcase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_getCaseLocale)
#define ustrcase_getTitleBreakIterator U_ICU_ENTRY_POINT_RENAME(ustrcase_getTitleBreakIterator)
#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold) #define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold)
#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower) #define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower)
#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle) #define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle)

View File

@ -444,6 +444,13 @@ typedef enum UScriptCode {
/** @stable ICU 58 */ /** @stable ICU 58 */
USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */ USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */
/** @stable ICU 60 */
USCRIPT_MASARAM_GONDI = 175,/* Gonm */
/** @stable ICU 60 */
USCRIPT_SOYOMBO = 176,/* Soyo */
/** @stable ICU 60 */
USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
/** /**
* One more than the highest normal UScriptCode value. * One more than the highest normal UScriptCode value.
@ -451,7 +458,7 @@ typedef enum UScriptCode {
* *
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/ */
USCRIPT_CODE_LIMIT = 175 USCRIPT_CODE_LIMIT = 178
#endif // U_HIDE_DEPRECATED_API #endif // U_HIDE_DEPRECATED_API
} UScriptCode; } UScriptCode;

Some files were not shown because too many files have changed in this diff Show More