deps: icu 62.1 bump (Unicode 11, CLDR 33.1)

- Full release notes: http://site.icu-project.org/download/62

Fixes: https://github.com/nodejs/node/issues/21452
PR-URL: https://github.com/nodejs/node/pull/21728

Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Bradley Farias <bradley.meck@gmail.com>
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Tiancheng "Timothy" Gu <timothygu99@gmail.com>
Reviewed-By: Richard Lau <riclau@uk.ibm.com>
This commit is contained in:
Steven R. Loomis 2018-07-09 13:46:16 -07:00 committed by Anna Henningsen
parent ed715ef890
commit 538acead66
No known key found for this signature in database
GPG Key ID: 9C63F3A6CD2AD8F9
213 changed files with 22259 additions and 25980 deletions

4
configure vendored
View File

@ -1185,8 +1185,8 @@ def glob_to_var(dir_base, dir_sub, patch_dir):
def configure_intl(o):
icus = [
{
'url': 'https://ssl.icu-project.org/files/icu4c/61.1/icu4c-61_1-src.zip',
'md5': '780d8524c8a860ed8d8f6fe75cb7ce3f',
'url': 'https://sourceforge.net/projects/icu/files/ICU4C/62.1/icu4c-62_1-src.zip',
'md5': '408854f7b9b58311b68fab4b4dfc80be',
},
]
def icu_download(path):

View File

@ -1,8 +1,8 @@
Small ICU sources - auto generated by shrink-icu-src.py
This directory contains the ICU subset used by --with-intl=small-icu (the default)
It is a strict subset of ICU 61 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt61l.dat : Reduced-size data file
It is a strict subset of ICU 62 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt62l.dat : Reduced-size data file
To rebuild this directory, see ../../tools/icu/README.md

View File

@ -23,6 +23,18 @@
U_NAMESPACE_BEGIN
CharString::CharString(CharString&& src) U_NOEXCEPT
: buffer(std::move(src.buffer)), len(src.len) {
src.len = 0; // not strictly necessary because we make no guarantees on the source string
}
CharString& CharString::operator=(CharString&& src) U_NOEXCEPT {
buffer = std::move(src.buffer);
len = src.len;
src.len = 0; // not strictly necessary because we make no guarantees on the source string
return *this;
}
CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
len=s.len;

View File

@ -55,6 +55,18 @@ public:
}
~CharString() {}
/**
* Move constructor; might leave src in an undefined state.
* This string will have the same contents and state that the source string had.
*/
CharString(CharString &&src) U_NOEXCEPT;
/**
* Move assignment operator; might leave src in an undefined state.
* This string will have the same contents and state that the source string had.
* The behavior is undefined if *this and src are the same object.
*/
CharString &operator=(CharString &&src) U_NOEXCEPT;
/**
* Replaces this string's contents with the other string's contents.
* CharString does not support the standard copy constructor nor

View File

@ -299,6 +299,14 @@ public:
* Destructor deletes the array (if owned).
*/
~MaybeStackArray() { releaseArray(); }
/**
* Move constructor: transfers ownership or copies the stack array.
*/
MaybeStackArray(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
/**
* Move assignment: transfers ownership or copies the stack array.
*/
MaybeStackArray<T, stackCapacity> &operator=(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
/**
* Returns the array capacity (number of T items).
* @return array capacity
@ -376,6 +384,11 @@ private:
uprv_free(ptr);
}
}
void resetToStackArray() {
ptr=stackArray;
capacity=stackCapacity;
needToRelease=FALSE;
}
/* No comparison operators with other MaybeStackArray's. */
bool operator==(const MaybeStackArray & /*other*/) {return FALSE;}
bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;}
@ -398,6 +411,34 @@ private:
#endif
};
template<typename T, int32_t stackCapacity>
icu::MaybeStackArray<T, stackCapacity>::MaybeStackArray(
MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT
: ptr(src.ptr), capacity(src.capacity), needToRelease(src.needToRelease) {
if (src.ptr == src.stackArray) {
ptr = stackArray;
uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity);
} else {
src.resetToStackArray(); // take ownership away from src
}
}
template<typename T, int32_t stackCapacity>
inline MaybeStackArray <T, stackCapacity>&
MaybeStackArray<T, stackCapacity>::operator=(MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT {
releaseArray(); // in case this instance had its own memory allocated
capacity = src.capacity;
needToRelease = src.needToRelease;
if (src.ptr == src.stackArray) {
ptr = stackArray;
uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity);
} else {
ptr = src.ptr;
src.resetToStackArray(); // take ownership away from src
}
return *this;
}
template<typename T, int32_t stackCapacity>
inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
if(newCapacity>0) {
@ -447,9 +488,7 @@ inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32
uprv_memcpy(p, ptr, (size_t)length*sizeof(T));
}
resultCapacity=length;
ptr=stackArray;
capacity=stackCapacity;
needToRelease=FALSE;
resetToStackArray();
return p;
}

View File

@ -4,10 +4,12 @@
// edits.cpp
// created: 2017feb08 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/edits.h"
#include "unicode/unistr.h"
#include "unicode/utypes.h"
#include "cmemory.h"
#include "uassert.h"
#include "util.h"
U_NAMESPACE_BEGIN
@ -773,4 +775,29 @@ int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &
}
}
UnicodeString& Edits::Iterator::toString(UnicodeString& sb) const {
sb.append(u"{ src[", -1);
ICU_Utility::appendNumber(sb, srcIndex);
sb.append(u"..", -1);
ICU_Utility::appendNumber(sb, srcIndex + oldLength_);
if (changed) {
sb.append(u"] ⇝ dest[", -1);
} else {
sb.append(u"] ≡ dest[", -1);
}
ICU_Utility::appendNumber(sb, destIndex);
sb.append(u"..", -1);
ICU_Utility::appendNumber(sb, destIndex + newLength_);
if (changed) {
sb.append(u"], repl[", -1);
ICU_Utility::appendNumber(sb, replIndex);
sb.append(u"..", -1);
ICU_Utility::appendNumber(sb, replIndex + newLength_);
sb.append(u"] }", -1);
} else {
sb.append(u"] (no-change) }", -1);
}
return sb;
}
U_NAMESPACE_END

View File

@ -1015,7 +1015,7 @@ static const char*
getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
{
uint32_t i;
for (i = 0; i <= this_0->numRegions; i++)
for (i = 0; i < this_0->numRegions; i++)
{
if (this_0->regionMaps[i].hostID == hostID)
{

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -533,6 +533,28 @@ uprv_fmin(double x, double y)
return (x > y ? y : x);
}
U_CAPI UBool U_EXPORT2
uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
// NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
// This function could be optimized by calling one of those primitives.
auto a64 = static_cast<int64_t>(a);
auto b64 = static_cast<int64_t>(b);
int64_t res64 = a64 + b64;
*res = static_cast<int32_t>(res64);
return res64 != *res;
}
U_CAPI UBool U_EXPORT2
uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
// NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
// This function could be optimized by calling one of those primitives.
auto a64 = static_cast<int64_t>(a);
auto b64 = static_cast<int64_t>(b);
int64_t res64 = a64 * b64;
*res = static_cast<int32_t>(res64);
return res64 != *res;
}
/**
* Truncates the given double.
* trunc(3.3) = 3.0, trunc (-3.3) = -3.0

View File

@ -391,6 +391,32 @@ U_INTERNAL double U_EXPORT2 uprv_log(double d);
*/
U_INTERNAL double U_EXPORT2 uprv_round(double x);
/**
* Adds the signed integers a and b, storing the result in res.
* Checks for signed integer overflow.
* Similar to the GCC/Clang extension __builtin_add_overflow
*
* @param a The first operand.
* @param b The second operand.
* @param res a + b
* @return true if overflow occurred; false if no overflow occurred.
* @internal
*/
U_INTERNAL UBool U_EXPORT2 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res);
/**
* Multiplies the signed integers a and b, storing the result in res.
* Checks for signed integer overflow.
* Similar to the GCC/Clang extension __builtin_mul_overflow
*
* @param a The first multiplicand.
* @param b The second multiplicand.
* @param res a * b
* @return true if overflow occurred; false if no overflow occurred.
* @internal
*/
U_INTERNAL UBool U_EXPORT2 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res);
#if 0
/**
* Returns the number of digits after the decimal point in a double number x.

View File

@ -651,7 +651,7 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
}
// Adjust offset to be on a code point boundary and not beyond the end of the text.
// Note that isBoundary() is always be false for offsets that are not on code point boundaries.
// Note that isBoundary() is always false for offsets that are not on code point boundaries.
// But we still need the side effect of leaving iteration at the following boundary.
utext_setNativeIndex(&fText, offset);
@ -937,26 +937,23 @@ int32_t RuleBasedBreakIterator::handleNext() {
}
//-----------------------------------------------------------------------------------
//
// handlePrevious()
// handleSafePrevious()
//
// Iterate backwards using the safe reverse rules.
// The logic of this function is very similar to handleNext(), above.
// The logic of this function is similar to handleNext(), but simpler
// because the safe table does not require as many options.
//
//-----------------------------------------------------------------------------------
int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
int32_t state;
uint16_t category = 0;
RBBIRunMode mode;
RBBIStateTableRow *row;
UChar32 c;
LookAheadResults lookAheadMatches;
int32_t result = 0;
int32_t initialPosition = 0;
const RBBIStateTable *stateTable = fData->fSafeRevTable;
const RBBIStateTable *stateTable = fData->fReverseTable;
UTEXT_SETNATIVEINDEX(&fText, fromPosition);
#ifdef RBBI_DEBUG
if (gTrace) {
@ -969,54 +966,24 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
return BreakIterator::DONE;
}
// Set up the starting char.
initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
result = initialPosition;
c = UTEXT_PREVIOUS32(&fText);
// Set the initial state for the state machine
c = UTEXT_PREVIOUS32(&fText);
state = START_STATE;
row = (RBBIStateTableRow *)
(stateTable->fTableData + (stateTable->fRowLen * state));
category = 3;
mode = RBBI_RUN;
if (stateTable->fFlags & RBBI_BOF_REQUIRED) {
category = 2;
mode = RBBI_START;
}
// loop until we reach the start of the text or transition to state 0
//
for (;;) {
if (c == U_SENTINEL) {
// Reached end of input string.
if (mode == RBBI_END) {
// We have already run the loop one last time with the
// character set to the psueudo {eof} value. Now it is time
// to unconditionally bail out.
break;
}
// Run the loop one last time with the fake end-of-input character category.
mode = RBBI_END;
category = 1;
}
for (; c != U_SENTINEL; c = UTEXT_PREVIOUS32(&fText)) {
// look up the current character's character category, which tells us
// which column in the state table to look at.
// Note: the 16 in UTRIE_GET16 refers to the size of the data being returned,
// not the size of the character going in, which is a UChar32.
//
// Get the char category. An incoming category of 1 or 2 means that
// we are preset for doing the beginning or end of input, and
// that we shouldn't get a category from an actual text input character.
//
if (mode == RBBI_RUN) {
// look up the current character's character category, which tells us
// which column in the state table to look at.
// Note: the 16 in UTRIE_GET16 refers to the size of the data being returned,
// not the size of the character going in, which is a UChar32.
//
// And off the dictionary flag bit. For reverse iteration it is not used.
category = UTRIE2_GET16(fData->fTrie, c);
category &= ~0x4000;
}
// And off the dictionary flag bit. For reverse iteration it is not used.
category = UTRIE2_GET16(fData->fTrie, c);
category &= ~0x4000;
#ifdef RBBI_DEBUG
if (gTrace) {
@ -1032,65 +999,21 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
// State Transition - move machine to its next state
//
// fNextState is a variable-length array.
U_ASSERT(category<fData->fHeader->fCatCount);
state = row->fNextState[category]; /*Not accessing beyond memory*/
row = (RBBIStateTableRow *)
(stateTable->fTableData + (stateTable->fRowLen * state));
if (row->fAccepting == -1) {
// Match found, common case.
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
}
int16_t completedRule = row->fAccepting;
if (completedRule > 0) {
// Lookahead match is completed.
int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
if (lookaheadResult >= 0) {
UTEXT_SETNATIVEINDEX(&fText, lookaheadResult);
return lookaheadResult;
}
}
int16_t rule = row->fLookAhead;
if (rule != 0) {
// At the position of a '/' in a look-ahead match. Record it.
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
lookAheadMatches.setPosition(rule, pos);
}
if (state == STOP_STATE) {
// This is the normal exit from the lookup state machine.
// We have advanced through the string until it is certain that no
// longer match is possible, no matter what characters follow.
// Transistion to state zero means we have found a safe point.
break;
}
// Move (backwards) to the next character to process.
// If this is a beginning-of-input loop iteration, don't advance
// the input position. The next iteration will be processing the
// first real input character.
if (mode == RBBI_RUN) {
c = UTEXT_PREVIOUS32(&fText);
} else {
if (mode == RBBI_START) {
mode = RBBI_RUN;
}
}
}
// The state machine is done. Check whether it found a match...
// If the iterator failed to advance in the match engine, force it ahead by one.
// (This really indicates a defect in the break rules. They should always match
// at least one character.)
if (result == initialPosition) {
UTEXT_SETNATIVEINDEX(&fText, initialPosition);
UTEXT_PREVIOUS32(&fText);
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
}
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
#ifdef RBBI_DEBUG
if (gTrace) {
RBBIDebugPrintf("result = %d\n\n", result);
@ -1099,7 +1022,6 @@ int32_t RuleBasedBreakIterator::handlePrevious(int32_t fromPosition) {
return result;
}
//-------------------------------------------------------------------------------
//
// getRuleStatus() Return the break rule tag associated with the current

View File

@ -354,14 +354,31 @@ UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorC
if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
int32_t aBoundary = 0;
int32_t ruleStatusIndex = 0;
// TODO: check for position == length of text. Although may still need to back up to get rule status.
if (position > 20) {
int32_t backupPos = fBI->handlePrevious(position);
fBI->fPosition = backupPos;
aBoundary = fBI->handleNext(); // Ignore dictionary, just finding a rule based boundary.
ruleStatusIndex = fBI->fRuleStatusIndex;
int32_t backupPos = fBI->handleSafePrevious(position);
if (backupPos > 0) {
// Advance to the boundary following the backup position.
// There is a complication: the safe reverse rules identify pairs of code points
// that are safe. If advancing from the safe point moves forwards by less than
// two code points, we need to advance one more time to ensure that the boundary
// is good, including a correct rules status value.
//
fBI->fPosition = backupPos;
aBoundary = fBI->handleNext();
if (aBoundary <= backupPos + 4) {
// +4 is a quick test for possibly having advanced only one codepoint.
// Four being the length of the longest potential code point, a supplementary in UTF-8
utext_setNativeIndex(&fBI->fText, aBoundary);
if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
// The initial handleNext() only advanced by a single code point. Go again.
aBoundary = fBI->handleNext(); // Safe rules identify safe pairs.
}
}
ruleStatusIndex = fBI->fRuleStatusIndex;
}
}
reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
}
// Fill in boundaries between existing cache content and the new requested position.
@ -485,16 +502,30 @@ UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status)
if (backupPosition <= 0) {
backupPosition = 0;
} else {
backupPosition = fBI->handlePrevious(backupPosition);
backupPosition = fBI->handleSafePrevious(backupPosition);
}
if (backupPosition == UBRK_DONE || backupPosition == 0) {
position = 0;
positionStatusIdx = 0;
} else {
fBI->fPosition = backupPosition; // TODO: pass starting position in a clearer way.
// Advance to the boundary following the backup position.
// There is a complication: the safe reverse rules identify pairs of code points
// that are safe. If advancing from the safe point moves forwards by less than
// two code points, we need to advance one more time to ensure that the boundary
// is good, including a correct rules status value.
//
fBI->fPosition = backupPosition;
position = fBI->handleNext();
if (position <= backupPosition + 4) {
// +4 is a quick test for possibly having advanced only one codepoint.
// Four being the length of the longest potential code point, a supplementary in UTF-8
utext_setNativeIndex(&fBI->fText, position);
if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
// The initial handleNext() only advanced by a single code point. Go again.
position = fBI->handleNext(); // Safe rules identify safe pairs.
}
};
positionStatusIdx = fBI->fRuleStatusIndex;
}
} while (position >= fromPosition);

View File

@ -81,8 +81,6 @@ void RBBIDataWrapper::init0() {
fHeader = NULL;
fForwardTable = NULL;
fReverseTable = NULL;
fSafeFwdTable = NULL;
fSafeRevTable = NULL;
fRuleSource = NULL;
fRuleStatusTable = NULL;
fTrie = NULL;
@ -111,21 +109,6 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
if (data->fRTableLen != 0) {
fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
}
if (data->fSFTableLen != 0) {
fSafeFwdTable = (RBBIStateTable *)((char *)data + fHeader->fSFTable);
}
if (data->fSRTableLen != 0) {
fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
}
// Rule Compatibility Hacks
// If a rule set includes reverse rules but does not explicitly include safe reverse rules,
// the reverse rules are to be treated as safe reverse rules.
if (fSafeRevTable == NULL && fReverseTable != NULL) {
fSafeRevTable = fReverseTable;
fReverseTable = NULL;
}
fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
(uint8_t *)data + fHeader->fTrie,
@ -277,8 +260,6 @@ void RBBIDataWrapper::printData() {
printTable("Forward State Transition Table", fForwardTable);
printTable("Reverse State Transition Table", fReverseTable);
printTable("Safe Forward State Transition Table", fSafeFwdTable);
printTable("Safe Reverse State Transition Table", fSafeRevTable);
RBBIDebugPrintf("\nOrignal Rules source:\n");
for (int32_t c=0; fRuleSource[c] != 0; c++) {
@ -418,28 +399,6 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
outBytes+tableStartOffset+topSize, status);
}
// Safe Forward state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fSFTable);
tableLength = ds->readUInt32(rbbiDH->fSFTableLen);
if (tableLength > 0) {
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
}
// Safe Reverse state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fSRTable);
tableLength = ds->readUInt32(rbbiDH->fSRTableLen);
if (tableLength > 0) {
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
}
// Trie table for character categories
utrie2_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
outBytes+ds->readUInt32(rbbiDH->fTrie), status);

View File

@ -58,7 +58,7 @@ ubrk_swap(const UDataSwapper *ds,
U_NAMESPACE_BEGIN
// The current RBBI data format version.
static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {4, 0, 0, 0};
static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {5, 0, 0, 0};
/*
* The following structs map exactly onto the raw data from ICU common data file.
@ -81,10 +81,6 @@ struct RBBIDataHeader {
uint32_t fFTableLen;
uint32_t fRTable; /* Offset to the reverse state transition table. */
uint32_t fRTableLen;
uint32_t fSFTable; /* safe point forward transition table */
uint32_t fSFTableLen;
uint32_t fSRTable; /* safe point reverse transition table */
uint32_t fSRTableLen;
uint32_t fTrie; /* Offset to Trie data for character categories */
uint32_t fTrieLen;
uint32_t fRuleSource; /* Offset to the source for for the break */
@ -174,8 +170,6 @@ public:
const RBBIDataHeader *fHeader;
const RBBIStateTable *fForwardTable;
const RBBIStateTable *fReverseTable;
const RBBIStateTable *fSafeFwdTable;
const RBBIStateTable *fSafeRevTable;
const UChar *fRuleSource;
const int32_t *fRuleStatusTable;

View File

@ -62,10 +62,7 @@ RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
fSafeFwdTree = NULL;
fSafeRevTree = NULL;
fDefaultTree = &fForwardTree;
fForwardTables = NULL;
fReverseTables = NULL;
fSafeFwdTables = NULL;
fSafeRevTables = NULL;
fForwardTable = NULL;
fRuleStatusVals = NULL;
fChainRules = FALSE;
fLBCMNoChain = FALSE;
@ -114,11 +111,7 @@ RBBIRuleBuilder::~RBBIRuleBuilder() {
delete fUSetNodes;
delete fSetBuilder;
delete fForwardTables;
delete fReverseTables;
delete fSafeFwdTables;
delete fSafeRevTables;
delete fForwardTable;
delete fForwardTree;
delete fReverseTree;
delete fSafeFwdTree;
@ -157,21 +150,15 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
// without the padding.
//
int32_t headerSize = align8(sizeof(RBBIDataHeader));
int32_t forwardTableSize = align8(fForwardTables->getTableSize());
int32_t reverseTableSize = align8(fReverseTables->getTableSize());
int32_t safeFwdTableSize = align8(fSafeFwdTables->getTableSize());
int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
int32_t forwardTableSize = align8(fForwardTable->getTableSize());
int32_t reverseTableSize = align8(fForwardTable->getSafeTableSize());
int32_t trieSize = align8(fSetBuilder->getTrieSize());
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar));
(void)safeFwdTableSize;
int32_t totalSize = headerSize
+ forwardTableSize
+ /* reverseTableSize */ 0
+ /* safeFwdTableSize */ 0
+ (safeRevTableSize ? safeRevTableSize : reverseTableSize)
+ reverseTableSize
+ statusTableSize + trieSize + rulesSize;
RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
@ -190,38 +177,13 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories();
// Only save the forward table and the safe reverse table,
// because these are the only ones used at run-time.
//
// For the moment, we still build the other tables if they are present in the rule source files,
// for backwards compatibility. Old rule files need to work, and this is the simplest approach.
//
// Additional backwards compatibility consideration: if no safe rules are provided, consider the
// reverse rules to actually be the safe reverse rules.
data->fFTable = headerSize;
data->fFTableLen = forwardTableSize;
// Do not save Reverse Table.
data->fRTable = data->fFTable + forwardTableSize;
data->fRTableLen = 0;
data->fRTable = data->fFTable + data->fFTableLen;
data->fRTableLen = reverseTableSize;
// Do not save the Safe Forward table.
data->fSFTable = data->fRTable + 0;
data->fSFTableLen = 0;
data->fSRTable = data->fSFTable + 0;
if (safeRevTableSize > 0) {
data->fSRTableLen = safeRevTableSize;
} else if (reverseTableSize > 0) {
data->fSRTableLen = reverseTableSize;
} else {
U_ASSERT(FALSE); // Rule build should have failed for lack of a reverse table
// before reaching this point.
}
data->fTrie = data->fSRTable + data->fSRTableLen;
data->fTrie = data->fRTable + data->fRTableLen;
data->fTrieLen = fSetBuilder->getTrieSize();
data->fStatusTable = data->fTrie + trieSize;
data->fStatusTableLen= statusTableSize;
@ -230,15 +192,8 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
fForwardTables->exportTable((uint8_t *)data + data->fFTable);
// fReverseTables->exportTable((uint8_t *)data + data->fRTable);
// fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
if (safeRevTableSize > 0) {
fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
} else {
fReverseTables->exportTable((uint8_t *)data + data->fSRTable);
}
fForwardTable->exportTable((uint8_t *)data + data->fFTable);
fForwardTable->exportSafeTable((uint8_t *)data + data->fRTable);
fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
@ -252,10 +207,6 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
}
//----------------------------------------------------------------------------------------
//
// createRuleBasedBreakIterator construct from source rules that are passed in
@ -267,8 +218,6 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
UParseError *parseError,
UErrorCode &status)
{
// status checked below
//
// Read the input rules, generate a parse tree, symbol table,
// and list of all Unicode Sets referenced by the rules.
@ -277,66 +226,13 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
if (U_FAILURE(status)) { // status checked here bcos build below doesn't
return NULL;
}
builder.fScanner->parse();
//
// UnicodeSet processing.
// Munge the Unicode Sets to create a set of character categories.
// Generate the mapping tables (TRIE) from input code points to
// the character categories.
//
builder.fSetBuilder->buildRanges();
RBBIDataHeader *data = builder.build(status);
//
// Generate the DFA state transition table.
//
builder.fForwardTables = new RBBITableBuilder(&builder, &builder.fForwardTree);
builder.fReverseTables = new RBBITableBuilder(&builder, &builder.fReverseTree);
builder.fSafeFwdTables = new RBBITableBuilder(&builder, &builder.fSafeFwdTree);
builder.fSafeRevTables = new RBBITableBuilder(&builder, &builder.fSafeRevTree);
if (builder.fForwardTables == NULL || builder.fReverseTables == NULL ||
builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL)
{
status = U_MEMORY_ALLOCATION_ERROR;
delete builder.fForwardTables; builder.fForwardTables = NULL;
delete builder.fReverseTables; builder.fReverseTables = NULL;
delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL;
delete builder.fSafeRevTables; builder.fSafeRevTables = NULL;
return NULL;
if (U_FAILURE(status)) {
return nullptr;
}
builder.fForwardTables->build();
builder.fReverseTables->build();
builder.fSafeFwdTables->build();
builder.fSafeRevTables->build();
#ifdef RBBI_DEBUG
if (builder.fDebugEnv && uprv_strstr(builder.fDebugEnv, "states")) {
builder.fForwardTables->printRuleStatusTable();
}
#endif
builder.optimizeTables();
builder.fSetBuilder->buildTrie();
//
// Package up the compiled data into a memory image
// in the run-time format.
//
RBBIDataHeader *data = builder.flattenData(); // returns NULL if error
if (U_FAILURE(*builder.fStatus)) {
return NULL;
}
//
// Clean up the compiler related stuff
//
//
// Create a break iterator from the compiled rules.
// (Identical to creation from stored pre-compiled rules)
@ -353,27 +249,71 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
return This;
}
void RBBIRuleBuilder::optimizeTables() {
int32_t leftClass;
int32_t rightClass;
leftClass = 3;
rightClass = 0;
while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
fSetBuilder->mergeCategories(leftClass, rightClass);
fForwardTables->removeColumn(rightClass);
fReverseTables->removeColumn(rightClass);
fSafeFwdTables->removeColumn(rightClass);
fSafeRevTables->removeColumn(rightClass);
RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
if (U_FAILURE(status)) {
return nullptr;
}
fForwardTables->removeDuplicateStates();
fReverseTables->removeDuplicateStates();
fSafeFwdTables->removeDuplicateStates();
fSafeRevTables->removeDuplicateStates();
fScanner->parse();
if (U_FAILURE(status)) {
return nullptr;
}
//
// UnicodeSet processing.
// Munge the Unicode Sets to create a set of character categories.
// Generate the mapping tables (TRIE) from input code points to
// the character categories.
//
fSetBuilder->buildRanges();
//
// Generate the DFA state transition table.
//
fForwardTable = new RBBITableBuilder(this, &fForwardTree, status);
if (fForwardTable == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
fForwardTable->buildForwardTable();
optimizeTables();
fForwardTable->buildSafeReverseTable(status);
#ifdef RBBI_DEBUG
if (fDebugEnv && uprv_strstr(fDebugEnv, "states")) {
fForwardTable->printStates();
fForwardTable->printRuleStatusTable();
fForwardTable->printReverseTable();
}
#endif
fSetBuilder->buildTrie();
//
// Package up the compiled data into a memory image
// in the run-time format.
//
RBBIDataHeader *data = flattenData(); // returns NULL if error
if (U_FAILURE(status)) {
return nullptr;
}
return data;
}
void RBBIRuleBuilder::optimizeTables() {
// Begin looking for duplicates with char class 3.
// Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
// and should not have other categories merged into them.
IntPair duplPair = {3, 0};
while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
fSetBuilder->mergeCategories(duplPair);
fForwardTable->removeColumn(duplPair.second);
}
fForwardTable->removeDuplicateStates();
}
U_NAMESPACE_END

View File

@ -18,6 +18,8 @@
#if !UCONFIG_NO_BREAK_ITERATION
#include <utility>
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "unicode/uniset.h"
@ -25,8 +27,7 @@
#include "uhash.h"
#include "uvector.h"
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
// looks up references to $variables within a set.
// looks up references to $variables within a set.
U_NAMESPACE_BEGIN
@ -123,10 +124,16 @@ public:
RBBIRuleBuilder(const UnicodeString &rules,
UParseError *parseErr,
UErrorCode &status
);
);
virtual ~RBBIRuleBuilder();
/**
* Build the state tables and char class Trie from the source rules.
*/
RBBIDataHeader *build(UErrorCode &status);
/**
* Fold together redundant character classes (table columns) and
* redundant states (table rows). Done after initial table generation,
@ -162,10 +169,7 @@ public:
RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
UVector *fUSetNodes; // Vector of all uset nodes.
RBBITableBuilder *fForwardTables; // State transition tables
RBBITableBuilder *fReverseTables;
RBBITableBuilder *fSafeFwdTables;
RBBITableBuilder *fSafeRevTables;
RBBITableBuilder *fForwardTable; // State transition table, build time form.
UVector *fRuleStatusVals; // The values that can be returned
// from getRuleStatus().
@ -200,6 +204,11 @@ struct RBBISetTableEl {
RBBINode *val;
};
/**
* A pair of ints, used to bundle pairs of states or pairs of character classes.
*/
typedef std::pair<int32_t, int32_t> IntPair;
//----------------------------------------------------------------------------
//

View File

@ -372,7 +372,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
// (forward, reverse, safe_forward, safe_reverse)
// OR this rule into the appropriate group of them.
//
RBBINode **destRules = (fReverseRule? &fRB->fReverseTree : fRB->fDefaultTree);
RBBINode **destRules = (fReverseRule? &fRB->fSafeRevTree : fRB->fDefaultTree);
if (*destRules != NULL) {
// This is not the first rule encounted.
@ -1122,22 +1122,6 @@ void RBBIRuleScanner::parse() {
return;
}
//
// If there were NO user specified reverse rules, set up the equivalent of ".*;"
//
if (fRB->fReverseTree == NULL) {
fRB->fReverseTree = pushNewNode(RBBINode::opStar);
RBBINode *operand = pushNewNode(RBBINode::setRef);
if (U_FAILURE(*fRB->fStatus)) {
return;
}
findSetFor(UnicodeString(TRUE, kAny, 3), operand);
fRB->fReverseTree->fLeftChild = operand;
operand->fParent = fRB->fReverseTree;
fNodeStackPtr -= 2;
}
//
// Parsing of the input RBBI rules is complete.
// We now have a parse tree for the rule expressions

View File

@ -270,15 +270,15 @@ void RBBISetBuilder::buildTrie() {
}
void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) {
U_ASSERT(left >= 1);
U_ASSERT(right > left);
void RBBISetBuilder::mergeCategories(IntPair categories) {
U_ASSERT(categories.first >= 1);
U_ASSERT(categories.second > categories.first);
for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
int32_t rangeNum = rd->fNum & ~DICT_BIT;
int32_t rangeDict = rd->fNum & DICT_BIT;
if (rangeNum == right) {
rd->fNum = left | rangeDict;
} else if (rangeNum > right) {
if (rangeNum == categories.second) {
rd->fNum = categories.first | rangeDict;
} else if (rangeNum > categories.second) {
rd->fNum--;
}
}

View File

@ -94,10 +94,12 @@ public:
UChar32 getFirstChar(int32_t val) const;
UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
// character were encountered.
/** merge two character categories that have been identified as having equivalent behavior.
* The ranges belonging to the right category (table column) will be added to the left.
/**
* Merge two character categories that have been identified as having equivalent behavior.
* The ranges belonging to the second category (table column) will be added to the first.
* @param categories the pair of categories to be merged.
*/
void mergeCategories(int32_t left, int32_t right);
void mergeCategories(IntPair categories);
static constexpr int32_t DICT_BIT = 0x4000;

View File

@ -27,21 +27,19 @@
U_NAMESPACE_BEGIN
RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode) :
fTree(*rootNode) {
fRB = rb;
fStatus = fRB->fStatus;
UErrorCode status = U_ZERO_ERROR;
fDStates = new UVector(status);
if (U_FAILURE(*fStatus)) {
return;
}
RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) :
fRB(rb),
fTree(*rootNode),
fStatus(&status),
fDStates(nullptr),
fSafeTable(nullptr) {
if (U_FAILURE(status)) {
*fStatus = status;
return;
}
if (fDStates == NULL) {
*fStatus = U_MEMORY_ALLOCATION_ERROR;;
// fDStates is UVector<RBBIStateDescriptor *>
fDStates = new UVector(status);
if (U_SUCCESS(status) && fDStates == nullptr ) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
@ -52,17 +50,18 @@ RBBITableBuilder::~RBBITableBuilder() {
for (i=0; i<fDStates->size(); i++) {
delete (RBBIStateDescriptor *)fDStates->elementAt(i);
}
delete fDStates;
delete fDStates;
delete fSafeTable;
}
//-----------------------------------------------------------------------------
//
// RBBITableBuilder::build - This is the main function for building the DFA state transtion
// table from the RBBI rules parse tree.
// RBBITableBuilder::buildForwardTable - This is the main function for building
// the DFA state transition table from the RBBI rules parse tree.
//
//-----------------------------------------------------------------------------
void RBBITableBuilder::build() {
void RBBITableBuilder::buildForwardTable() {
if (U_FAILURE(*fStatus)) {
return;
@ -189,8 +188,6 @@ void RBBITableBuilder::build() {
// for all tables. Merge the ones from this table into the global set.
//
mergeRuleStatusVals();
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();};
}
@ -1081,18 +1078,18 @@ void RBBITableBuilder::printPosSets(RBBINode *n) {
//
// findDuplCharClassFrom()
//
bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) {
bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
uint16_t table_base;
uint16_t table_dupl;
for (; baseCategory < numCols-1; ++baseCategory) {
for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
for (; categories->first < numCols-1; categories->first++) {
for (categories->second=categories->first+1; categories->second < numCols; categories->second++) {
for (int32_t state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
table_base = (uint16_t)sd->fDtran->elementAti(baseCategory);
table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory);
table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
if (table_base != table_dupl) {
break;
}
@ -1121,14 +1118,14 @@ void RBBITableBuilder::removeColumn(int32_t column) {
/*
* findDuplicateState
*/
bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) {
bool RBBITableBuilder::findDuplicateState(IntPair *states) {
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
for (; firstState<numStates-1; ++firstState) {
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState);
for (duplState=firstState+1; duplState<numStates; ++duplState) {
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
for (; states->first<numStates-1; states->first++) {
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first);
for (states->second=states->first+1; states->second<numStates; states->second++) {
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second);
if (firstSD->fAccepting != duplSD->fAccepting ||
firstSD->fLookAhead != duplSD->fLookAhead ||
firstSD->fTagsIdx != duplSD->fTagsIdx) {
@ -1139,8 +1136,8 @@ bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplStat
int32_t firstVal = firstSD->fDtran->elementAti(col);
int32_t duplVal = duplSD->fDtran->elementAti(col);
if (!((firstVal == duplVal) ||
((firstVal == firstState || firstVal == duplState) &&
(duplVal == firstState || duplVal == duplState)))) {
((firstVal == states->first || firstVal == states->second) &&
(duplVal == states->first || duplVal == states->second)))) {
rowsMatch = false;
break;
}
@ -1153,7 +1150,38 @@ bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplStat
return false;
}
void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
int32_t numStates = fSafeTable->size();
for (; states->first<numStates-1; states->first++) {
UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
for (states->second=states->first+1; states->second<numStates; states->second++) {
UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
bool rowsMatch = true;
int32_t numCols = firstRow->length();
for (int32_t col=0; col < numCols; ++col) {
int32_t firstVal = firstRow->charAt(col);
int32_t duplVal = duplRow->charAt(col);
if (!((firstVal == duplVal) ||
((firstVal == states->first || firstVal == states->second) &&
(duplVal == states->first || duplVal == states->second)))) {
rowsMatch = false;
break;
}
}
if (rowsMatch) {
return true;
}
}
}
return false;
}
void RBBITableBuilder::removeState(IntPair duplStates) {
const int32_t keepState = duplStates.first;
const int32_t duplState = duplStates.second;
U_ASSERT(keepState < duplState);
U_ASSERT(duplState < fDStates->size());
@ -1188,19 +1216,44 @@ void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
}
}
void RBBITableBuilder::removeSafeState(IntPair duplStates) {
const int32_t keepState = duplStates.first;
const int32_t duplState = duplStates.second;
U_ASSERT(keepState < duplState);
U_ASSERT(duplState < fSafeTable->size());
fSafeTable->removeElementAt(duplState); // Note that fSafeTable has a deleter function
// and will auto-delete the removed element.
int32_t numStates = fSafeTable->size();
for (int32_t state=0; state<numStates; ++state) {
UnicodeString *sd = (UnicodeString *)fSafeTable->elementAt(state);
int32_t numCols = sd->length();
for (int32_t col=0; col<numCols; col++) {
int32_t existingVal = sd->charAt(col);
int32_t newVal = existingVal;
if (existingVal == duplState) {
newVal = keepState;
} else if (existingVal > duplState) {
newVal = existingVal - 1;
}
sd->setCharAt(col, newVal);
}
}
}
/*
* RemoveDuplicateStates
*/
void RBBITableBuilder::removeDuplicateStates() {
int32_t firstState = 3;
int32_t duplicateState = 0;
while (findDuplicateState(firstState, duplicateState)) {
// printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
removeState(firstState, duplicateState);
IntPair dupls = {3, 0};
while (findDuplicateState(&dupls)) {
// printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
removeState(dupls);
}
}
//-----------------------------------------------------------------------------
//
// getTableSize() Calculate the size of the runtime form of this
@ -1277,6 +1330,185 @@ void RBBITableBuilder::exportTable(void *where) {
}
/**
* Synthesize a safe state table from the main state table.
*/
void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
// The safe table creation has three steps:
// 1. Identifiy pairs of character classes that are "safe." Safe means that boundaries
// following the pair do not depend on context or state before the pair. To test
// whether a pair is safe, run it through the main forward state table, starting
// from each state. If the the final state is the same, no matter what the starting state,
// the pair is safe.
//
// 2. Build a state table that recognizes the safe pairs. It's similar to their
// forward table, with a column for each input character [class], and a row for
// each state. Row 1 is the start state, and row 0 is the stop state. Initially
// create an additional state for each input character category; being in
// one of these states means that the character has been seen, and is potentially
// the first of a pair. In each of these rows, the entry for the second character
// of a safe pair is set to the stop state (0), indicating that a match was found.
// All other table entries are set to the state corresponding the current input
// character, allowing that charcter to be the of a start following pair.
//
// Because the safe rules are to be run in reverse, moving backwards in the text,
// the first and second pair categories are swapped when building the table.
//
// 3. Compress the table. There are typically many rows (states) that are
// equivalent - that have zeroes (match completed) in the same columns -
// and can be folded together.
// Each safe pair is stored as two UChars in the safePair string.
UnicodeString safePairs;
int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories();
int32_t numStates = fDStates->size();
for (int32_t c1=0; c1<numCharClasses; ++c1) {
for (int32_t c2=0; c2 < numCharClasses; ++c2) {
int32_t wantedEndState = -1;
int32_t endState = 0;
for (int32_t startState = 1; startState < numStates; ++startState) {
RBBIStateDescriptor *startStateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(startState));
int32_t s2 = startStateD->fDtran->elementAti(c1);
RBBIStateDescriptor *s2StateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(s2));
endState = s2StateD->fDtran->elementAti(c2);
if (wantedEndState < 0) {
wantedEndState = endState;
} else {
if (wantedEndState != endState) {
break;
}
}
}
if (wantedEndState == endState) {
safePairs.append((char16_t)c1);
safePairs.append((char16_t)c2);
// printf("(%d, %d) ", c1, c2);
}
}
// printf("\n");
}
// Populate the initial safe table.
// The table as a whole is UVector<UnicodeString>
// Each row is represented by a UnicodeString, being used as a Vector<int16>.
// Row 0 is the stop state.
// Row 1 is the start sate.
// Row 2 and beyond are other states, initially one per char class, but
// after initial construction, many of the states will be combined, compacting the table.
// The String holds the nextState data only. The four leading fields of a row, fAccepting,
// fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
U_ASSERT(fSafeTable == nullptr);
fSafeTable = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status);
for (int32_t row=0; row<numCharClasses + 2; ++row) {
fSafeTable->addElement(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);
}
// From the start state, each input char class transitions to the state for that input.
UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));
for (int32_t charClass=0; charClass < numCharClasses; ++charClass) {
// Note: +2 for the start & stop state.
startState.setCharAt(charClass, charClass+2);
}
// Initially make every other state table row look like the start state row,
for (int32_t row=2; row<numCharClasses+2; ++row) {
UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(row));
rowState = startState; // UnicodeString assignment, copies contents.
}
// Run through the safe pairs, set the next state to zero when pair has been seen.
// Zero being the stop state, meaning we found a safe point.
for (int32_t pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
int32_t c1 = safePairs.charAt(pairIdx);
int32_t c2 = safePairs.charAt(pairIdx + 1);
UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(c2 + 2));
rowState.setCharAt(c1, 0);
}
// Remove duplicate or redundant rows from the table.
IntPair states = {1, 0};
while (findDuplicateSafeState(&states)) {
// printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
removeSafeState(states);
}
}
//-----------------------------------------------------------------------------
//
// getSafeTableSize() Calculate the size of the runtime form of this
// safe state table.
//
//-----------------------------------------------------------------------------
int32_t RBBITableBuilder::getSafeTableSize() const {
int32_t size = 0;
int32_t numRows;
int32_t numCols;
int32_t rowSize;
if (fSafeTable == nullptr) {
return 0;
}
size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table.
numRows = fSafeTable->size();
numCols = fRB->fSetBuilder->getNumCharCategories();
rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols;
size += numRows * rowSize;
return size;
}
//-----------------------------------------------------------------------------
//
// exportSafeTable() export the state transition table in the format required
// by the runtime engine. getTableSize() bytes of memory
// must be available at the output address "where".
//
//-----------------------------------------------------------------------------
void RBBITableBuilder::exportSafeTable(void *where) {
RBBIStateTable *table = (RBBIStateTable *)where;
uint32_t state;
int col;
if (U_FAILURE(*fStatus) || fSafeTable == nullptr) {
return;
}
int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
if (catCount > 0x7fff ||
fSafeTable->size() > 0x7fff) {
*fStatus = U_BRK_INTERNAL_ERROR;
return;
}
table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount;
table->fNumStates = fSafeTable->size();
table->fFlags = 0;
table->fReserved = 0;
for (state=0; state<table->fNumStates; state++) {
UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state);
RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
row->fAccepting = 0;
row->fLookAhead = 0;
row->fTagIdx = 0;
row->fReserved = 0;
for (col=0; col<catCount; col++) {
row->fNextState[col] = rowString->charAt(col);
}
}
}
//-----------------------------------------------------------------------------
//
@ -1331,6 +1563,47 @@ void RBBITableBuilder::printStates() {
#endif
//-----------------------------------------------------------------------------
//
// printSafeTable Debug Function. Dump the fully constructed safe table.
//
//-----------------------------------------------------------------------------
#ifdef RBBI_DEBUG
void RBBITableBuilder::printReverseTable() {
int c; // input "character"
int n; // state number
RBBIDebugPrintf(" Safe Reverse Table \n");
if (fSafeTable == nullptr) {
RBBIDebugPrintf(" --- nullptr ---\n");
return;
}
RBBIDebugPrintf("state | i n p u t s y m b o l s \n");
RBBIDebugPrintf(" | Acc LA Tag");
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
RBBIDebugPrintf(" %2d", c);
}
RBBIDebugPrintf("\n");
RBBIDebugPrintf(" |---------------");
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
RBBIDebugPrintf("---");
}
RBBIDebugPrintf("\n");
for (n=0; n<fSafeTable->size(); n++) {
UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n);
RBBIDebugPrintf(" %3d | " , n);
RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
RBBIDebugPrintf(" %2d", rowString->charAt(c));
}
RBBIDebugPrintf("\n");
}
RBBIDebugPrintf("\n\n");
}
#endif
//-----------------------------------------------------------------------------
//

View File

@ -17,6 +17,7 @@
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "rbbirb.h"
#include "rbbinode.h"
@ -37,22 +38,28 @@ class UVector32;
class RBBITableBuilder : public UMemory {
public:
RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode);
RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status);
~RBBITableBuilder();
void build();
int32_t getTableSize() const; // Return the runtime size in bytes of
// the built state table
void buildForwardTable();
/** Return the runtime size in bytes of the built state table. */
int32_t getTableSize() const;
/** Fill in the runtime state table. Sufficient memory must exist at the specified location.
*/
void exportTable(void *where);
/** Find duplicate (redundant) character classes, beginning after the specifed
* pair, within this state table. This is an iterator-like function, used to
* identify char classes (state table columns) that can be eliminated.
/**
* Find duplicate (redundant) character classes. Begin looking with categories.first.
* Duplicate, if found are returned in the categories parameter.
* This is an iterator-like function, used to identify character classes
* (state table columns) that can be eliminated.
* @param categories in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if duplicate char classes were found, false otherwise.
*/
bool findDuplCharClassFrom(int &baseClass, int &duplClass);
bool findDuplCharClassFrom(IntPair *categories);
/** Remove a column from the state table. Used when two character categories
* have been found equivalent, and merged together, to eliminate the uneeded table column.
@ -62,6 +69,16 @@ public:
/** Check for, and remove dupicate states (table rows). */
void removeDuplicateStates();
/** Build the safe reverse table from the already-constructed forward table. */
void buildSafeReverseTable(UErrorCode &status);
/** Return the runtime size in bytes of the built safe reverse state table. */
int32_t getSafeTableSize() const;
/** Fill in the runtime safe state table. Sufficient memory must exist at the specified location.
*/
void exportSafeTable(void *where);
private:
void calcNullable(RBBINode *n);
@ -84,20 +101,36 @@ private:
void addRuleRootNodes(UVector *dest, RBBINode *node);
/** Find the next duplicate state. An iterator function.
* @param firstState (in/out) begin looking at this state, return the first of the
* pair of duplicates.
* @param duplicateState returns the duplicate state of fistState
* @return true if a duplicate pair of states was found.
/**
* Find duplicate (redundant) states, beginning at the specified pair,
* within this state table. This is an iterator-like function, used to
* identify states (state table rows) that can be eliminated.
* @param states in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if duplicate states were found, false otherwise.
*/
bool findDuplicateState(int32_t &firstState, int32_t &duplicateState);
bool findDuplicateState(IntPair *states);
/** Remove a duplicate state/
* @param keepState First of the duplicate pair. Keep it.
* @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state
* to refer to keepState instead.
/** Remove a duplicate state.
* @param duplStates The duplicate states. The first is kept, the second is removed.
* All references to the second in the state table are retargeted
* to the first.
*/
void removeState(int32_t keepState, int32_t duplState);
void removeState(IntPair duplStates);
/** Find the next duplicate state in the safe reverse table. An iterator function.
* @param states in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if a duplicate pair of states was found.
*/
bool findDuplicateSafeState(IntPair *states);
/** Remove a duplicate state from the safe table.
* @param duplStates The duplicate states. The first is kept, the second is removed.
* All references to the second in the state table are retargeted
* to the first.
*/
void removeSafeState(IntPair duplStates);
// Set functions for UVector.
// TODO: make a USet subclass of UVector
@ -113,11 +146,13 @@ public:
void printPosSets(RBBINode *n /* = NULL*/);
void printStates();
void printRuleStatusTable();
void printReverseTable();
#else
#define printSet(s)
#define printPosSets(n)
#define printStates()
#define printRuleStatusTable()
#define printReverseTable()
#endif
private:
@ -126,10 +161,14 @@ private:
// table for.
UErrorCode *fStatus;
/** State Descriptors, UVector<RBBIStateDescriptor> */
UVector *fDStates; // D states (Aho's terminology)
// Index is state number
// Contents are RBBIStateDescriptor pointers.
/** Synthesized safe table, UVector of UnicodeString, one string per table row. */
UVector *fSafeTable;
RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class
RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class

View File

@ -547,16 +547,15 @@ outerEnd:
if (putInCache && cacheResult) {
serviceCache->put(result->actualDescriptor, result, status);
if (U_FAILURE(status)) {
delete result;
return NULL;
}
if (cacheDescriptorList._obj != NULL) {
for (int32_t i = cacheDescriptorList._obj->size(); --i >= 0;) {
UnicodeString* desc = (UnicodeString*)cacheDescriptorList._obj->elementAt(i);
serviceCache->put(*desc, result, status);
if (U_FAILURE(status)) {
delete result;
return NULL;
}

View File

@ -0,0 +1,222 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
#include "static_unicode_sets.h"
#include "umutex.h"
#include "ucln_cmn.h"
#include "unicode/uniset.h"
#include "uresimp.h"
#include "cstring.h"
#include "uassert.h"
using namespace icu;
using namespace icu::unisets;
namespace {
UnicodeSet* gUnicodeSets[COUNT] = {};
// Save the empty instance in static memory to have well-defined behavior if a
// regular UnicodeSet cannot be allocated.
char gEmptyUnicodeSet[sizeof(UnicodeSet)];
// Whether the gEmptyUnicodeSet is initialized and ready to use.
UBool gEmptyUnicodeSetInitialized = FALSE;
inline UnicodeSet* getImpl(Key key) {
UnicodeSet* candidate = gUnicodeSets[key];
if (candidate == nullptr) {
return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
}
return candidate;
}
UnicodeSet* computeUnion(Key k1, Key k2) {
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
return nullptr;
}
result->addAll(*getImpl(k1));
result->addAll(*getImpl(k2));
result->freeze();
return result;
}
UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
UnicodeSet* result = new UnicodeSet();
if (result == nullptr) {
return nullptr;
}
result->addAll(*getImpl(k1));
result->addAll(*getImpl(k2));
result->addAll(*getImpl(k3));
result->freeze();
return result;
}
void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) {
// assert unicodeSets.get(key) == null;
gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status);
}
class ParseDataSink : public ResourceSink {
public:
void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
ResourceTable contextsTable = value.getTable(status);
if (U_FAILURE(status)) { return; }
for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) {
if (uprv_strcmp(key, "date") == 0) {
// ignore
} else {
ResourceTable strictnessTable = value.getTable(status);
if (U_FAILURE(status)) { return; }
for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) {
bool isLenient = (uprv_strcmp(key, "lenient") == 0);
ResourceArray array = value.getArray(status);
if (U_FAILURE(status)) { return; }
for (int k = 0; k < array.getSize(); k++) {
array.getValue(k, value);
UnicodeString str = value.getUnicodeString(status);
if (U_FAILURE(status)) { return; }
// There is both lenient and strict data for comma/period,
// but not for any of the other symbols.
if (str.indexOf(u'.') != -1) {
saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status);
} else if (str.indexOf(u',') != -1) {
saveSet(isLenient ? COMMA : STRICT_COMMA, str, status);
} else if (str.indexOf(u'+') != -1) {
saveSet(PLUS_SIGN, str, status);
} else if (str.indexOf(u'') != -1) {
saveSet(MINUS_SIGN, str, status);
} else if (str.indexOf(u'$') != -1) {
saveSet(DOLLAR_SIGN, str, status);
} else if (str.indexOf(u'£') != -1) {
saveSet(POUND_SIGN, str, status);
} else if (str.indexOf(u'') != -1) {
saveSet(RUPEE_SIGN, str, status);
}
if (U_FAILURE(status)) { return; }
}
}
}
}
}
};
icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
UBool U_CALLCONV cleanupNumberParseUniSets() {
if (gEmptyUnicodeSetInitialized) {
reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet();
gEmptyUnicodeSetInitialized = FALSE;
}
for (int32_t i = 0; i < COUNT; i++) {
delete gUnicodeSets[i];
gUnicodeSets[i] = nullptr;
}
gNumberParseUniSetsInitOnce.reset();
return TRUE;
}
void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
// Initialize the empty instance for well-defined fallback behavior
new(gEmptyUnicodeSet) UnicodeSet();
reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze();
gEmptyUnicodeSetInitialized = TRUE;
// These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet(
u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status);
gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status);
LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status));
if (U_FAILURE(status)) { return; }
ParseDataSink sink;
ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status);
if (U_FAILURE(status)) { return; }
// NOTE: It is OK for these assertions to fail if there was a no-data build.
U_ASSERT(gUnicodeSets[COMMA] != nullptr);
U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr);
gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet(
u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status);
gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr);
U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr);
gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status);
gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status);
gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status);
U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr);
U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr);
gUnicodeSets[YEN_SIGN] = new UnicodeSet(u"\\uffe5]", status);
gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
for (auto* uniset : gUnicodeSets) {
if (uniset != nullptr) {
uniset->freeze();
}
}
}
}
const UnicodeSet* unisets::get(Key key) {
UErrorCode localStatus = U_ZERO_ERROR;
umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
if (U_FAILURE(localStatus)) {
return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
}
return getImpl(key);
}
Key unisets::chooseFrom(UnicodeString str, Key key1) {
return get(key1)->contains(str) ? key1 : NONE;
}
Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
}
//Key unisets::chooseCurrency(UnicodeString str) {
// if (get(DOLLAR_SIGN)->contains(str)) {
// return DOLLAR_SIGN;
// } else if (get(POUND_SIGN)->contains(str)) {
// return POUND_SIGN;
// } else if (get(RUPEE_SIGN)->contains(str)) {
// return RUPEE_SIGN;
// } else if (get(YEN_SIGN)->contains(str)) {
// return YEN_SIGN;
// } else {
// return NONE;
// }
//}
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -0,0 +1,119 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// This file is in common instead of i18n because it is needed by ucurr.cpp.
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#ifndef __STATIC_UNICODE_SETS_H__
#define __STATIC_UNICODE_SETS_H__
#include "unicode/uniset.h"
#include "unicode/unistr.h"
U_NAMESPACE_BEGIN
namespace unisets {
enum Key {
// NONE is used to indicate null in chooseFrom().
// EMPTY is used to get an empty UnicodeSet.
NONE = -1,
EMPTY = 0,
// Ignorables
DEFAULT_IGNORABLES,
STRICT_IGNORABLES,
// Separators
// Notes:
// - COMMA is a superset of STRICT_COMMA
// - PERIOD is a superset of SCRICT_PERIOD
// - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
// - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
COMMA,
PERIOD,
STRICT_COMMA,
STRICT_PERIOD,
OTHER_GROUPING_SEPARATORS,
ALL_SEPARATORS,
STRICT_ALL_SEPARATORS,
// Symbols
MINUS_SIGN,
PLUS_SIGN,
PERCENT_SIGN,
PERMILLE_SIGN,
INFINITY_KEY, // INFINITY is defined in cmath
// Currency Symbols
DOLLAR_SIGN,
POUND_SIGN,
RUPEE_SIGN,
YEN_SIGN, // not in CLDR data, but Currency.java wants it
// Other
DIGITS,
// Combined Separators with Digits (for lead code points)
DIGITS_OR_ALL_SEPARATORS,
DIGITS_OR_STRICT_ALL_SEPARATORS,
// The number of elements in the enum.
COUNT
};
/**
* Gets the static-allocated UnicodeSet according to the provided key. The
* pointer will be deleted during u_cleanup(); the caller should NOT delete it.
*
* Exported as U_COMMON_API for ucurr.cpp
*
* @param key The desired UnicodeSet according to the enum in this file.
* @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but
* may be empty if an error occurred during data loading.
*/
U_COMMON_API const UnicodeSet* get(Key key);
/**
* Checks if the UnicodeSet given by key1 contains the given string.
*
* Exported as U_COMMON_API for numparse_decimal.cpp
*
* @param str The string to check.
* @param key1 The set to check.
* @return key1 if the set contains str, or NONE if not.
*/
U_COMMON_API Key chooseFrom(UnicodeString str, Key key1);
/**
* Checks if the UnicodeSet given by either key1 or key2 contains the string.
*
* Exported as U_COMMON_API for numparse_decimal.cpp
*
* @param str The string to check.
* @param key1 The first set to check.
* @param key2 The second set to check.
* @return key1 if that set contains str; key2 if that set contains str; or
* NONE if neither set contains str.
*/
U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2);
// Unused in C++:
// Key chooseCurrency(UnicodeString str);
// Used instead:
static const struct {
Key key;
UChar32 exemplar;
} kCurrencyEntries[] = {
{DOLLAR_SIGN, u'$'},
{POUND_SIGN, u'£'},
{RUPEE_SIGN, u''},
{YEN_SIGN, u'¥'},
};
} // namespace unisets
U_NAMESPACE_END
#endif //__STATIC_UNICODE_SETS_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

File diff suppressed because it is too large Load Diff

View File

@ -138,6 +138,11 @@ ucase_tolower(UChar32 c) {
} else {
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
uint16_t excWord=*pe++;
if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
int32_t delta;
GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
}
if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
}
@ -155,6 +160,11 @@ ucase_toupper(UChar32 c) {
} else {
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
uint16_t excWord=*pe++;
if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
int32_t delta;
GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
}
if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
}
@ -172,6 +182,11 @@ ucase_totitle(UChar32 c) {
} else {
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
uint16_t excWord=*pe++;
if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
int32_t delta;
GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
}
int32_t idx;
if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
idx=UCASE_EXC_TITLE;
@ -254,6 +269,11 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
sa->add(sa->set, c);
}
}
if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
int32_t delta;
GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
sa->add(sa->set, (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta);
}
/* get the closure string pointer & length */
if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
@ -590,7 +610,12 @@ ucase_isSoftDotted(UChar32 c) {
U_CAPI UBool U_EXPORT2
ucase_isCaseSensitive(UChar32 c) {
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
return (UBool)((props&UCASE_SENSITIVE)!=0);
if(!UCASE_HAS_EXCEPTION(props)) {
return (UBool)((props&UCASE_SENSITIVE)!=0);
} else {
const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
return (UBool)((*pe&UCASE_EXC_SENSITIVE)!=0);
}
}
/* string casing ------------------------------------------------------------ */
@ -1140,6 +1165,11 @@ ucase_toFullLower(UChar32 c,
}
}
if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
int32_t delta;
GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
}
if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result);
}
@ -1229,6 +1259,11 @@ toUpperOrTitle(UChar32 c,
}
}
if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
int32_t delta;
GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
}
if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
idx=UCASE_EXC_TITLE;
} else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
@ -1334,6 +1369,14 @@ ucase_fold(UChar32 c, uint32_t options) {
}
}
}
if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
return c;
}
if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
int32_t delta;
GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
}
if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
idx=UCASE_EXC_FOLD;
} else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
@ -1421,6 +1464,14 @@ ucase_toFullFolding(UChar32 c,
}
}
if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
return ~c;
}
if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
int32_t delta;
GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
}
if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
idx=UCASE_EXC_FOLD;
} else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {

View File

@ -354,8 +354,8 @@ enum {
#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2)
#define UCASE_IGNORABLE 4
#define UCASE_SENSITIVE 8
#define UCASE_EXCEPTION 0x10
#define UCASE_EXCEPTION 8
#define UCASE_SENSITIVE 0x10
#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
@ -379,9 +379,9 @@ enum {
# define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT))
#endif
/* exception: bits 15..5 are an unsigned 11-bit index into the exceptions array */
#define UCASE_EXC_SHIFT 5
#define UCASE_EXC_MASK 0xffe0
/* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */
#define UCASE_EXC_SHIFT 4
#define UCASE_EXC_MASK 0xfff0
#define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1)
/* definitions for 16-bit main exceptions word ------------------------------ */
@ -392,7 +392,7 @@ enum {
UCASE_EXC_FOLD,
UCASE_EXC_UPPER,
UCASE_EXC_TITLE,
UCASE_EXC_4, /* reserved */
UCASE_EXC_DELTA,
UCASE_EXC_5, /* reserved */
UCASE_EXC_CLOSURE,
UCASE_EXC_FULL_MAPPINGS,
@ -402,7 +402,11 @@ enum {
/* each slot is 2 uint16_t instead of 1 */
#define UCASE_EXC_DOUBLE_SLOTS 0x100
/* reserved: exception bits 11..9 */
enum {
UCASE_EXC_NO_SIMPLE_CASE_FOLDING=0x200,
UCASE_EXC_DELTA_IS_NEGATIVE=0x400,
UCASE_EXC_SENSITIVE=0x800
};
/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */
#define UCASE_EXC_DOT_SHIFT 7

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,7 @@ Please keep the order of enums declared in same order
as the cleanup functions are suppose to be called. */
typedef enum ECleanupCommonType {
UCLN_COMMON_START = -1,
UCLN_COMMON_NUMPARSE_UNISETS,
UCLN_COMMON_USPREP,
UCLN_COMMON_BREAKITERATOR,
UCLN_COMMON_RBBI,

View File

@ -261,6 +261,11 @@ static UBool U_CALLCONV ucnv_cleanup(void) {
return (SHARED_DATA_HASHTABLE == NULL);
}
U_CAPI void U_EXPORT2
ucnv_enableCleanup() {
ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
}
static UBool U_CALLCONV
isCnvAcceptable(void * /*context*/,
const char * /*type*/, const char * /*name*/,
@ -439,7 +444,7 @@ ucnv_shareConverterData(UConverterSharedData * data)
SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL,
ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR,
&err);
ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
ucnv_enableCleanup();
if (U_FAILURE(err))
return;
@ -1099,7 +1104,7 @@ static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) {
U_ASSERT(gAvailableConverterCount == 0);
U_ASSERT(gAvailableConverters == NULL);
ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
ucnv_enableCleanup();
UEnumeration *allConvEnum = ucnv_openAllNames(&errCode);
int32_t allConverterCount = uenum_count(allConvEnum, &errCode);
if (U_FAILURE(errCode)) {
@ -1205,7 +1210,7 @@ internalSetName(const char *name, UErrorCode *status) {
// -- Andy
gDefaultConverterName = gDefaultConverterNameBuffer;
ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
ucnv_enableCleanup();
umtx_unlock(&cnvCacheMutex);
}

View File

@ -288,6 +288,9 @@ ucnv_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
U_CAPI void U_EXPORT2
ucnv_enableCleanup();
#endif
#endif /* _UCNV_BLD */

View File

@ -16,10 +16,14 @@
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "unicode/parsepos.h"
#include "unicode/uniset.h"
#include "unicode/usetiter.h"
#include "unicode/utf16.h"
#include "ustr_imp.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "static_unicode_sets.h"
#include "uassert.h"
#include "umutex.h"
#include "ucln_cmn.h"
@ -65,14 +69,6 @@ static const int32_t POW10[] = { 1, 10, 100, 1000, 10000, 100000,
static const int32_t MAX_POW10 = UPRV_LENGTHOF(POW10) - 1;
// Defines equivalent currency symbols.
static const char *EQUIV_CURRENCY_SYMBOLS[][2] = {
{"\\u00a5", "\\uffe5"},
{"$", "\\ufe69"},
{"$", "\\uff04"},
{"\\u20a8", "\\u20b9"},
{"\\u00a3", "\\u20a4"}};
#define ISO_CURRENCY_CODE_LENGTH 3
//------------------------------------------------------------
@ -1287,17 +1283,28 @@ static void
linearSearch(const CurrencyNameStruct* currencyNames,
int32_t begin, int32_t end,
const UChar* text, int32_t textLen,
int32_t *partialMatchLen,
int32_t *maxMatchLen, int32_t* maxMatchIndex) {
int32_t initialPartialMatchLen = *partialMatchLen;
for (int32_t index = begin; index <= end; ++index) {
int32_t len = currencyNames[index].currencyNameLen;
if (len > *maxMatchLen && len <= textLen &&
uprv_memcmp(currencyNames[index].currencyName, text, len * sizeof(UChar)) == 0) {
*partialMatchLen = MAX(*partialMatchLen, len);
*maxMatchIndex = index;
*maxMatchLen = len;
#ifdef UCURR_DEBUG
printf("maxMatchIndex = %d, maxMatchLen = %d\n",
*maxMatchIndex, *maxMatchLen);
#endif
} else {
// Check for partial matches.
for (int32_t i=initialPartialMatchLen; i<MIN(len, textLen); i++) {
if (currencyNames[index].currencyName[i] != text[i]) {
break;
}
*partialMatchLen = MAX(*partialMatchLen, i + 1);
}
}
}
}
@ -1315,6 +1322,7 @@ static void
searchCurrencyName(const CurrencyNameStruct* currencyNames,
int32_t total_currency_count,
const UChar* text, int32_t textLen,
int32_t *partialMatchLen,
int32_t* maxMatchLen, int32_t* maxMatchIndex) {
*maxMatchIndex = -1;
*maxMatchLen = 0;
@ -1344,6 +1352,7 @@ searchCurrencyName(const CurrencyNameStruct* currencyNames,
if (binarySearchBegin == -1) { // did not find the range
break;
}
*partialMatchLen = MAX(*partialMatchLen, index + 1);
if (matchIndex != -1) {
// find an exact match for text from text[0] to text[index]
// in currencyNames array.
@ -1354,6 +1363,7 @@ searchCurrencyName(const CurrencyNameStruct* currencyNames,
// linear search if within threshold.
linearSearch(currencyNames, binarySearchBegin, binarySearchEnd,
text, textLen,
partialMatchLen,
maxMatchLen, maxMatchIndex);
break;
}
@ -1422,19 +1432,13 @@ currency_cache_cleanup(void) {
}
U_CAPI void
uprv_parseCurrency(const char* locale,
const icu::UnicodeString& text,
icu::ParsePosition& pos,
int8_t type,
UChar* result,
UErrorCode& ec)
{
U_NAMESPACE_USE
if (U_FAILURE(ec)) {
return;
}
/**
* Loads the currency name data from the cache, or from resource bundles if necessary.
* The refCount is automatically incremented. It is the caller's responsibility
* to decrement it when done!
*/
static CurrencyNameCacheEntry*
getCacheEntry(const char* locale, UErrorCode& ec) {
int32_t total_currency_name_count = 0;
CurrencyNameStruct* currencyNames = NULL;
@ -1455,17 +1459,13 @@ uprv_parseCurrency(const char* locale,
}
if (found != -1) {
cacheEntry = currCache[found];
currencyNames = cacheEntry->currencyNames;
total_currency_name_count = cacheEntry->totalCurrencyNameCount;
currencySymbols = cacheEntry->currencySymbols;
total_currency_symbol_count = cacheEntry->totalCurrencySymbolCount;
++(cacheEntry->refCount);
}
umtx_unlock(&gCurrencyCacheMutex);
if (found == -1) {
collectCurrencyNames(locale, &currencyNames, &total_currency_name_count, &currencySymbols, &total_currency_symbol_count, ec);
if (U_FAILURE(ec)) {
return;
return NULL;
}
umtx_lock(&gCurrencyCacheMutex);
// check again.
@ -1500,20 +1500,50 @@ uprv_parseCurrency(const char* locale,
cacheEntry->totalCurrencySymbolCount = total_currency_symbol_count;
cacheEntry->refCount = 2; // one for cache, one for reference
currentCacheEntryIndex = (currentCacheEntryIndex + 1) % CURRENCY_NAME_CACHE_NUM;
ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cache_cleanup);
ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup);
} else {
deleteCurrencyNames(currencyNames, total_currency_name_count);
deleteCurrencyNames(currencySymbols, total_currency_symbol_count);
cacheEntry = currCache[found];
currencyNames = cacheEntry->currencyNames;
total_currency_name_count = cacheEntry->totalCurrencyNameCount;
currencySymbols = cacheEntry->currencySymbols;
total_currency_symbol_count = cacheEntry->totalCurrencySymbolCount;
++(cacheEntry->refCount);
}
umtx_unlock(&gCurrencyCacheMutex);
}
return cacheEntry;
}
static void releaseCacheEntry(CurrencyNameCacheEntry* cacheEntry) {
umtx_lock(&gCurrencyCacheMutex);
--(cacheEntry->refCount);
if (cacheEntry->refCount == 0) { // remove
deleteCacheEntry(cacheEntry);
}
umtx_unlock(&gCurrencyCacheMutex);
}
U_CAPI void
uprv_parseCurrency(const char* locale,
const icu::UnicodeString& text,
icu::ParsePosition& pos,
int8_t type,
int32_t* partialMatchLen,
UChar* result,
UErrorCode& ec) {
U_NAMESPACE_USE
if (U_FAILURE(ec)) {
return;
}
CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec);
if (U_FAILURE(ec)) {
return;
}
int32_t total_currency_name_count = cacheEntry->totalCurrencyNameCount;
CurrencyNameStruct* currencyNames = cacheEntry->currencyNames;
int32_t total_currency_symbol_count = cacheEntry->totalCurrencySymbolCount;
CurrencyNameStruct* currencySymbols = cacheEntry->currencySymbols;
int32_t start = pos.getIndex();
UChar inputText[MAX_CURRENCY_NAME_LEN];
@ -1523,11 +1553,14 @@ uprv_parseCurrency(const char* locale,
UErrorCode ec1 = U_ZERO_ERROR;
textLen = u_strToUpper(upperText, MAX_CURRENCY_NAME_LEN, inputText, textLen, locale, &ec1);
// Make sure partialMatchLen is initialized
*partialMatchLen = 0;
int32_t max = 0;
int32_t matchIndex = -1;
// case in-sensitive comparision against currency names
searchCurrencyName(currencyNames, total_currency_name_count,
upperText, textLen, &max, &matchIndex);
upperText, textLen, partialMatchLen, &max, &matchIndex);
#ifdef UCURR_DEBUG
printf("search in names, max = %d, matchIndex = %d\n", max, matchIndex);
@ -1539,6 +1572,7 @@ uprv_parseCurrency(const char* locale,
// case sensitive comparison against currency symbols and ISO code.
searchCurrencyName(currencySymbols, total_currency_symbol_count,
inputText, textLen,
partialMatchLen,
&maxInSymbol, &matchIndexInSymbol);
}
@ -1558,12 +1592,35 @@ uprv_parseCurrency(const char* locale,
}
// decrease reference count
umtx_lock(&gCurrencyCacheMutex);
--(cacheEntry->refCount);
if (cacheEntry->refCount == 0) { // remove
deleteCacheEntry(cacheEntry);
releaseCacheEntry(cacheEntry);
}
void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec) {
U_NAMESPACE_USE
if (U_FAILURE(ec)) {
return;
}
umtx_unlock(&gCurrencyCacheMutex);
CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec);
if (U_FAILURE(ec)) {
return;
}
for (int32_t i=0; i<cacheEntry->totalCurrencySymbolCount; i++) {
const CurrencyNameStruct& info = cacheEntry->currencySymbols[i];
UChar32 cp;
U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp);
result.add(cp);
}
for (int32_t i=0; i<cacheEntry->totalCurrencyNameCount; i++) {
const CurrencyNameStruct& info = cacheEntry->currencyNames[i];
UChar32 cp;
U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp);
result.add(cp);
}
// decrease reference count
releaseCacheEntry(cacheEntry);
}
@ -1729,7 +1786,8 @@ static const struct CurrencyList {
{"BUK", UCURR_COMMON|UCURR_DEPRECATED},
{"BWP", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"BYB", UCURR_COMMON|UCURR_DEPRECATED},
{"BYR", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"BYN", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"BYR", UCURR_COMMON|UCURR_DEPRECATED},
{"BZD", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"CAD", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"CDF", UCURR_COMMON|UCURR_NON_DEPRECATED},
@ -1739,6 +1797,7 @@ static const struct CurrencyList {
{"CLE", UCURR_COMMON|UCURR_DEPRECATED},
{"CLF", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
{"CLP", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"CNH", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
{"CNX", UCURR_UNCOMMON|UCURR_DEPRECATED},
{"CNY", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"COP", UCURR_COMMON|UCURR_NON_DEPRECATED},
@ -1761,7 +1820,7 @@ static const struct CurrencyList {
{"ECV", UCURR_UNCOMMON|UCURR_DEPRECATED},
{"EEK", UCURR_COMMON|UCURR_DEPRECATED},
{"EGP", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"EQE", UCURR_COMMON|UCURR_DEPRECATED},
{"EQE", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove?
{"ERN", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"ESA", UCURR_UNCOMMON|UCURR_DEPRECATED},
{"ESB", UCURR_UNCOMMON|UCURR_DEPRECATED},
@ -1785,7 +1844,7 @@ static const struct CurrencyList {
{"GRD", UCURR_COMMON|UCURR_DEPRECATED},
{"GTQ", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"GWE", UCURR_COMMON|UCURR_DEPRECATED},
{"GWP", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"GWP", UCURR_COMMON|UCURR_DEPRECATED},
{"GYD", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"HKD", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"HNL", UCURR_COMMON|UCURR_NON_DEPRECATED},
@ -1823,13 +1882,13 @@ static const struct CurrencyList {
{"LKR", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"LRD", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"LSL", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"LSM", UCURR_COMMON|UCURR_DEPRECATED},
{"LTL", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"LSM", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove?
{"LTL", UCURR_COMMON|UCURR_DEPRECATED},
{"LTT", UCURR_COMMON|UCURR_DEPRECATED},
{"LUC", UCURR_UNCOMMON|UCURR_DEPRECATED},
{"LUF", UCURR_COMMON|UCURR_DEPRECATED},
{"LUL", UCURR_UNCOMMON|UCURR_DEPRECATED},
{"LVL", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"LVL", UCURR_COMMON|UCURR_DEPRECATED},
{"LVR", UCURR_COMMON|UCURR_DEPRECATED},
{"LYD", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MAD", UCURR_COMMON|UCURR_NON_DEPRECATED},
@ -1845,18 +1904,19 @@ static const struct CurrencyList {
{"MMK", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MNT", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MOP", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MRO", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MRO", UCURR_COMMON|UCURR_DEPRECATED},
{"MRU", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MTL", UCURR_COMMON|UCURR_DEPRECATED},
{"MTP", UCURR_COMMON|UCURR_DEPRECATED},
{"MUR", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MVP", UCURR_COMMON|UCURR_DEPRECATED},
{"MVP", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove?
{"MVR", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MWK", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MXN", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MXP", UCURR_COMMON|UCURR_DEPRECATED},
{"MXV", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
{"MYR", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MZE", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"MZE", UCURR_COMMON|UCURR_DEPRECATED},
{"MZM", UCURR_COMMON|UCURR_DEPRECATED},
{"MZN", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"NAD", UCURR_COMMON|UCURR_NON_DEPRECATED},
@ -1897,15 +1957,16 @@ static const struct CurrencyList {
{"SGD", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"SHP", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"SIT", UCURR_COMMON|UCURR_DEPRECATED},
{"SKK", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"SKK", UCURR_COMMON|UCURR_DEPRECATED},
{"SLL", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"SOS", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"SRD", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"SRG", UCURR_COMMON|UCURR_DEPRECATED},
{"SSP", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"STD", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"STD", UCURR_COMMON|UCURR_DEPRECATED},
{"STN", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"SUR", UCURR_COMMON|UCURR_DEPRECATED},
{"SVC", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"SVC", UCURR_COMMON|UCURR_DEPRECATED},
{"SYP", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"SZL", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"THB", UCURR_COMMON|UCURR_NON_DEPRECATED},
@ -1954,7 +2015,7 @@ static const struct CurrencyList {
{"XPD", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
{"XPF", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"XPT", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
{"XRE", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
{"XRE", UCURR_UNCOMMON|UCURR_DEPRECATED},
{"XSU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
{"XTS", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
{"XUA", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
@ -1965,15 +2026,15 @@ static const struct CurrencyList {
{"YUM", UCURR_COMMON|UCURR_DEPRECATED},
{"YUN", UCURR_COMMON|UCURR_DEPRECATED},
{"YUR", UCURR_COMMON|UCURR_DEPRECATED},
{"ZAL", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
{"ZAL", UCURR_UNCOMMON|UCURR_DEPRECATED},
{"ZAR", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"ZMK", UCURR_COMMON|UCURR_DEPRECATED},
{"ZMW", UCURR_COMMON|UCURR_NON_DEPRECATED},
{"ZRN", UCURR_COMMON|UCURR_DEPRECATED},
{"ZRZ", UCURR_COMMON|UCURR_DEPRECATED},
{"ZWD", UCURR_COMMON|UCURR_DEPRECATED},
{"ZWL", UCURR_COMMON|UCURR_DEPRECATED},
{"ZWR", UCURR_COMMON|UCURR_DEPRECATED},
{"ZWD", UCURR_COMMON|UCURR_DEPRECATED},
{ NULL, 0 } // Leave here to denote the end of the list.
};
@ -2144,16 +2205,20 @@ static void U_CALLCONV initIsoCodes(UErrorCode &status) {
}
static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
int32_t length = UPRV_LENGTHOF(EQUIV_CURRENCY_SYMBOLS);
for (int32_t i = 0; i < length; ++i) {
icu::UnicodeString lhs(EQUIV_CURRENCY_SYMBOLS[i][0], -1, US_INV);
icu::UnicodeString rhs(EQUIV_CURRENCY_SYMBOLS[i][1], -1, US_INV);
makeEquivalent(lhs.unescape(), rhs.unescape(), hash, status);
if (U_FAILURE(status)) {
return;
if (U_FAILURE(status)) { return; }
for (auto& entry : unisets::kCurrencyEntries) {
UnicodeString exemplar(entry.exemplar);
const UnicodeSet* set = unisets::get(entry.key);
if (set == nullptr) { return; }
UnicodeSetIterator it(*set);
while (it.next()) {
UnicodeString value = it.getString();
if (value == exemplar) {
// No need to mark the exemplar character as an equivalent
continue;
}
makeEquivalent(exemplar, value, hash, status);
if (U_FAILURE(status)) { return; }
}
}
}

View File

@ -13,6 +13,7 @@
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/parsepos.h"
#include "unicode/uniset.h"
/**
* Internal method. Given a currency ISO code and a locale, return
@ -36,6 +37,8 @@ uprv_getStaticCurrencyName(const UChar* iso, const char* loc,
* match, then the display name is preferred, unless it's length
* is less than 3.
*
* The parameters must not be NULL.
*
* @param locale the locale of the display names to match
* @param text the text to parse
* @param pos input-output position; on input, the position within
@ -43,6 +46,8 @@ uprv_getStaticCurrencyName(const UChar* iso, const char* loc,
* on output, the position after the last matched character. If
* the parse fails, the position in unchanged upon output.
* @param type currency type to parse against, LONG_NAME only or not
* @param partialMatchLen The length of the longest matching prefix;
* this may be nonzero even if no full currency was matched.
* @return the ISO 4217 code, as a string, of the best match, or
* null if there is no match
*
@ -53,9 +58,21 @@ uprv_parseCurrency(const char* locale,
const icu::UnicodeString& text,
icu::ParsePosition& pos,
int8_t type,
int32_t* partialMatchLen,
UChar* result,
UErrorCode& ec);
/**
* Puts all possible first-characters of a currency into the
* specified UnicodeSet.
*
* @param locale the locale of the display names of interest
* @param result the UnicodeSet to which to add the starting characters
*/
void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec);
#endif /* #ifndef _UCURR_IMP_H_ */
//eof

View File

@ -638,7 +638,7 @@ protected:
private:
/** @internal */
/** @internal (private) */
char actualLocale[ULOC_FULLNAME_CAPACITY];
char validLocale[ULOC_FULLNAME_CAPACITY];
};

View File

@ -143,7 +143,7 @@ private:
virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; }
/**
* @internal
* @internal (private)
*/
class BTLinearMatchNode : public LinearMatchNode {
public:

View File

@ -153,13 +153,13 @@ private:
/**
* Copy constructor. Private for now.
* @internal
* @internal (private)
*/
CanonicalIterator(const CanonicalIterator& other);
/**
* Assignment operator. Private for now.
* @internal
* @internal (private)
*/
CanonicalIterator& operator=(const CanonicalIterator& other);

View File

@ -139,7 +139,7 @@
* </tr>
* <tr>
* <td>Number Formatting</td>
* <td>unum.h</td>
* <td>unumberformatter.h, unum.h</td>
* <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)</td>
* </tr>
* <tr>

View File

@ -17,10 +17,57 @@
U_NAMESPACE_BEGIN
class UnicodeString;
/**
* Records lengths of string edits but not replacement text.
* Supports replacements, insertions, deletions in linear progression.
* Does not support moving/reordering of text.
* Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
* in linear progression. Does not support moving/reordering of text.
*
* There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
* instances of this class using {@link #addReplace(int, int)} (for change edits) and
* {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity,
* whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
* mapping between code points in the source and destination strings.
*
* After all edits have been added, instances of this class should be considered immutable, and an
* {@link Edits::Iterator} can be used for queries.
*
* There are four flavors of Edits::Iterator:
*
* <ul>
* <li>{@link #getFineIterator()} retains full granularity of change edits.
* <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
* next() on the iterator, skips over no-change edits (unchanged regions).
* <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
* edits are automatically merged during the construction phase.)
* <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
* calling next() on the iterator, skips over no-change edits (unchanged regions).
* </ul>
*
* For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
* following fine edits:
* <ul>
* <li>abc abc (no-change)
* <li>ß ss (change)
* <li>D d (change)
* <li>e e (no-change)
* <li>F f (change)
* </ul>
* and the following coarse edits (note how adjacent change edits get merged together):
* <ul>
* <li>abc abc (no-change)
* <li>ßD ssd (change)
* <li>e e (no-change)
* <li>F f (change)
* </ul>
*
* The "fine changes" and "coarse changes" iterators will step through only the change edits when their
* {@link Edits::Iterator#next()} methods are called. They are identical to the non-change iterators when
* their {@link Edits::Iterator#findSourceIndex(int)} or {@link Edits::Iterator#findDestinationIndex(int)}
* methods are used to walk through the string.
*
* For examples of how to use this class, see the test <code>TestCaseMapEditsIteratorDocs</code> in
* UCharacterCaseTest.java.
*
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
* (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
@ -91,13 +138,13 @@ public:
void reset() U_NOEXCEPT;
/**
* Adds a record for an unchanged segment of text.
* Adds a no-change edit: a record for an unchanged segment of text.
* Normally called from inside ICU string transformation functions, not user code.
* @stable ICU 59
*/
void addUnchanged(int32_t unchangedLength);
/**
* Adds a record for a text replacement/insertion/deletion.
* Adds a change edit: a record for a text replacement/insertion/deletion.
* Normally called from inside ICU string transformation functions, not user code.
* @stable ICU 59
*/
@ -136,6 +183,18 @@ public:
/**
* Access to the list of edits.
*
* At any moment in time, an instance of this class points to a single edit: a "window" into a span
* of the source string and the corresponding span of the destination string. The source string span
* starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
* span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
*
* The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)},
* and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator
* to make it point to the corresponding edit.
*
* For more information, see the documentation for {@link Edits}.
*
* @see getCoarseIterator
* @see getFineIterator
* @stable ICU 59
@ -162,7 +221,7 @@ public:
Iterator &operator=(const Iterator &other) = default;
/**
* Advances to the next edit.
* Advances the iterator to the next edit.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
@ -172,9 +231,9 @@ public:
UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
/**
* Finds the edit that contains the source index.
* The source index may be found in a non-change
* even if normal iteration would skip non-changes.
* Moves the iterator to the edit that contains the source index.
* The source index may be found in a no-change edit
* even if normal iteration would skip no-change edits.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
@ -196,9 +255,9 @@ public:
#ifndef U_HIDE_DRAFT_API
/**
* Finds the edit that contains the destination index.
* The destination index may be found in a non-change
* even if normal iteration would skip non-changes.
* Moves the iterator to the edit that contains the destination index.
* The destination index may be found in a no-change edit
* even if normal iteration would skip no-change edits.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
@ -219,7 +278,7 @@ public:
}
/**
* Returns the destination index corresponding to the given source index.
* Computes the destination index corresponding to the given source index.
* If the source index is inside a change edit (not at its start),
* then the destination index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
@ -243,7 +302,7 @@ public:
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
/**
* Returns the source index corresponding to the given destination index.
* Computes the source index corresponding to the given destination index.
* If the destination index is inside a change edit (not at its start),
* then the source index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
@ -268,17 +327,27 @@ public:
#endif // U_HIDE_DRAFT_API
/**
* Returns whether the edit currently represented by the iterator is a change edit.
*
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
* FALSE if oldLength units remain unchanged.
* @stable ICU 59
*/
UBool hasChange() const { return changed; }
/**
* The length of the current span in the source string, which starts at {@link #sourceIndex}.
*
* @return the number of units in the original string which are replaced or remain unchanged.
* @stable ICU 59
*/
int32_t oldLength() const { return oldLength_; }
/**
* The length of the current span in the destination string, which starts at
* {@link #destinationIndex}, or in the replacement string, which starts at
* {@link #replacementIndex}.
*
* @return the number of units in the modified string, if hasChange() is TRUE.
* Same as oldLength if hasChange() is FALSE.
* @stable ICU 59
@ -286,22 +355,52 @@ public:
int32_t newLength() const { return newLength_; }
/**
* The start index of the current span in the source string; the span has length
* {@link #oldLength}.
*
* @return the current index into the source string
* @stable ICU 59
*/
int32_t sourceIndex() const { return srcIndex; }
/**
* The start index of the current span in the replacement string; the span has length
* {@link #newLength}. Well-defined only if the current edit is a change edit.
* <p>
* The <em>replacement string</em> is the concatenation of all substrings of the destination
* string corresponding to change edits.
* <p>
* This method is intended to be used together with operations that write only replacement
* characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified
* in-place.
*
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
* @stable ICU 59
*/
int32_t replacementIndex() const { return replIndex; }
int32_t replacementIndex() const {
// TODO: Throw an exception if we aren't in a change edit?
return replIndex;
}
/**
* The start index of the current span in the destination string; the span has length
* {@link #newLength}.
*
* @return the current index into the full destination string
* @stable ICU 59
*/
int32_t destinationIndex() const { return destIndex; }
#ifndef U_HIDE_INTERNAL_API
/**
* A string representation of the current edit represented by the iterator for debugging. You
* should not depend on the contents of the return string.
* @internal
*/
UnicodeString& toString(UnicodeString& appendTo) const;
#endif // U_HIDE_INTERNAL_API
private:
friend class Edits;
@ -330,8 +429,10 @@ public:
};
/**
* Returns an Iterator for coarse-grained changes for simple string updates.
* Skips non-changes.
* Returns an Iterator for coarse-grained change edits
* (adjacent change edits are treated as one).
* Can be used to perform simple string updates.
* Skips no-change edits.
* @return an Iterator that merges adjacent changes.
* @stable ICU 59
*/
@ -340,7 +441,10 @@ public:
}
/**
* Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
* Returns an Iterator for coarse-grained change and no-change edits
* (adjacent change edits are treated as one).
* Can be used to perform simple string updates.
* Adjacent change edits are treated as one edit.
* @return an Iterator that merges adjacent changes.
* @stable ICU 59
*/
@ -349,8 +453,10 @@ public:
}
/**
* Returns an Iterator for fine-grained changes for modifying styled text.
* Skips non-changes.
* Returns an Iterator for fine-grained change edits
* (full granularity of change edits is retained).
* Can be used for modifying styled text.
* Skips no-change edits.
* @return an Iterator that separates adjacent changes.
* @stable ICU 59
*/
@ -359,7 +465,9 @@ public:
}
/**
* Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
* Returns an Iterator for fine-grained change and no-change edits
* (full granularity of change edits is retained).
* Can be used for modifying styled text.
* @return an Iterator that separates adjacent changes.
* @stable ICU 59
*/

View File

@ -196,20 +196,6 @@
# define U_PLATFORM U_PF_UNKNOWN
#endif
/**
* \def UPRV_INCOMPLETE_CPP11_SUPPORT
* This switch turns off ICU 60 NumberFormatter code.
* By default, this switch is enabled on AIX and z/OS,
* which have poor C++11 support.
*
* NOTE: This switch is intended to be temporary; see #13393.
*
* @internal
*/
#ifndef UPRV_INCOMPLETE_CPP11_SUPPORT
# define UPRV_INCOMPLETE_CPP11_SUPPORT (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_SOLARIS )
#endif
/**
* \def CYGWINMSVC
* Defined if this is Windows with Cygwin, but using MSVC rather than gcc.

View File

@ -55,7 +55,7 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator {
private:
/**
* The UText through which this BreakIterator accesses the text
* @internal
* @internal (private)
*/
UText fText;
@ -70,13 +70,6 @@ public:
RBBIDataWrapper *fData;
private:
/**
* The iteration state - current position, rule status for the current position,
* and whether the iterator ran off the end, yielding UBRK_DONE.
* Current position is pinned to be 0 < position <= text.length.
* Current position is always set to a boundary.
* @internal
*/
/**
* The current position of the iterator. Pinned, 0 < fPosition <= text.length.
* Never has the value UBRK_DONE (-1).
@ -628,25 +621,26 @@ private:
/**
* Dumps caches and performs other actions associated with a complete change
* in text or iteration position.
* @internal
* @internal (private)
*/
void reset(void);
/**
* Common initialization function, used by constructors and bufferClone.
* @internal
* @internal (private)
*/
void init(UErrorCode &status);
/**
* Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules.
* Iterate backwards from an arbitrary position in the input text using the
* synthesized Safe Reverse rules.
* This locates a "Safe Position" from which the forward break rules
* will operate correctly. A Safe Position is not necessarily a boundary itself.
*
* @param fromPosition the position in the input text to begin the iteration.
* @internal
* @internal (private)
*/
int32_t handlePrevious(int32_t fromPosition);
int32_t handleSafePrevious(int32_t fromPosition);
/**
* Find a rule-based boundary by running the state machine.
@ -658,7 +652,7 @@ private:
* If > 0, the segment will be further subdivided
* fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
*
* @internal
* @internal (private)
*/
int32_t handleNext();
@ -667,7 +661,7 @@ private:
* This function returns the appropriate LanguageBreakEngine for a
* given character c.
* @param c A character in the dictionary set
* @internal
* @internal (private)
*/
const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);

View File

@ -42,7 +42,7 @@ U_CDECL_BEGIN
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
#define U_UNICODE_VERSION "10.0"
#define U_UNICODE_VERSION "11.0"
/**
* \file
@ -446,6 +446,13 @@ typedef enum UProperty {
* @stable ICU 60
*/
UCHAR_PREPENDED_CONCATENATION_MARK=63,
/**
* Binary property Extended_Pictographic.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @stable ICU 62
*/
UCHAR_EXTENDED_PICTOGRAPHIC=64,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for binary Unicode properties.
@ -1683,6 +1690,31 @@ enum UBlockCode {
/** @stable ICU 60 */
UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/
// New blocks in Unicode 11.0
/** @stable ICU 62 */
UBLOCK_CHESS_SYMBOLS = 281, /*[1FA00]*/
/** @stable ICU 62 */
UBLOCK_DOGRA = 282, /*[11800]*/
/** @stable ICU 62 */
UBLOCK_GEORGIAN_EXTENDED = 283, /*[1C90]*/
/** @stable ICU 62 */
UBLOCK_GUNJALA_GONDI = 284, /*[11D60]*/
/** @stable ICU 62 */
UBLOCK_HANIFI_ROHINGYA = 285, /*[10D00]*/
/** @stable ICU 62 */
UBLOCK_INDIC_SIYAQ_NUMBERS = 286, /*[1EC70]*/
/** @stable ICU 62 */
UBLOCK_MAKASAR = 287, /*[11EE0]*/
/** @stable ICU 62 */
UBLOCK_MAYAN_NUMERALS = 288, /*[1D2E0]*/
/** @stable ICU 62 */
UBLOCK_MEDEFAIDRIN = 289, /*[16E40]*/
/** @stable ICU 62 */
UBLOCK_OLD_SOGDIAN = 290, /*[10F00]*/
/** @stable ICU 62 */
UBLOCK_SOGDIAN = 291, /*[10F30]*/
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UBlockCode value.
@ -1690,7 +1722,7 @@ enum UBlockCode {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UBLOCK_COUNT = 281,
UBLOCK_COUNT = 292,
#endif // U_HIDE_DEPRECATED_API
/** @stable ICU 2.0 */
@ -1979,6 +2011,9 @@ typedef enum UJoiningGroup {
U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */
U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */
U_JG_HANIFI_ROHINGYA_KINNA_YA, /**< @stable ICU 62 */
U_JG_HANIFI_ROHINGYA_PA, /**< @stable ICU 62 */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UJoiningGroup value.
@ -2029,6 +2064,7 @@ typedef enum UGraphemeClusterBreak {
U_GCB_GLUE_AFTER_ZWJ = 16, /*[GAZ]*/
/** @stable ICU 58 */
U_GCB_ZWJ = 17, /*[ZWJ]*/
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UGraphemeClusterBreak value.
@ -2090,6 +2126,9 @@ typedef enum UWordBreakValues {
U_WB_GLUE_AFTER_ZWJ = 20, /*[GAZ]*/
/** @stable ICU 58 */
U_WB_ZWJ = 21, /*[ZWJ]*/
/** @stable ICU 62 */
U_WB_WSEGSPACE = 22, /*[WSEGSPACE]*/
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UWordBreakValues value.
@ -2097,7 +2136,7 @@ typedef enum UWordBreakValues {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_WB_COUNT = 22
U_WB_COUNT = 23
#endif // U_HIDE_DEPRECATED_API
} UWordBreakValues;

View File

@ -1892,7 +1892,7 @@ public:
UnicodeString &fastCopyFrom(const UnicodeString &src);
/**
* Move assignment operator, might leave src in bogus state.
* Move assignment operator; might leave src in bogus state.
* This string will have the same contents and state that the source string had.
* The behavior is undefined if *this and src are the same object.
* @param src source string
@ -1905,7 +1905,7 @@ public:
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/**
* Move assignment, might leave src in bogus state.
* Move assignment; might leave src in bogus state.
* This string will have the same contents and state that the source string had.
* The behavior is undefined if *this and src are the same object.
*
@ -3314,7 +3314,7 @@ public:
UnicodeString(const UnicodeString& that);
/**
* Move constructor, might leave src in bogus state.
* Move constructor; might leave src in bogus state.
* This string will have the same contents and state that the source string had.
* @param src source string
* @stable ICU 56

View File

@ -613,6 +613,7 @@
#define ucnv_createConverterFromPackage U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromPackage)
#define ucnv_createConverterFromSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromSharedData)
#define ucnv_detectUnicodeSignature U_ICU_ENTRY_POINT_RENAME(ucnv_detectUnicodeSignature)
#define ucnv_enableCleanup U_ICU_ENTRY_POINT_RENAME(ucnv_enableCleanup)
#define ucnv_extContinueMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchFromU)
#define ucnv_extContinueMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchToU)
#define ucnv_extGetUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_extGetUnicodeSet)
@ -1170,6 +1171,16 @@
#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol)
#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute)
#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern)
#define unumf_close U_ICU_ENTRY_POINT_RENAME(unumf_close)
#define unumf_closeResult U_ICU_ENTRY_POINT_RENAME(unumf_closeResult)
#define unumf_formatDecimal U_ICU_ENTRY_POINT_RENAME(unumf_formatDecimal)
#define unumf_formatDouble U_ICU_ENTRY_POINT_RENAME(unumf_formatDouble)
#define unumf_formatInt U_ICU_ENTRY_POINT_RENAME(unumf_formatInt)
#define unumf_openForSkeletonAndLocale U_ICU_ENTRY_POINT_RENAME(unumf_openForSkeletonAndLocale)
#define unumf_openResult U_ICU_ENTRY_POINT_RENAME(unumf_openResult)
#define unumf_resultGetAllFieldPositions U_ICU_ENTRY_POINT_RENAME(unumf_resultGetAllFieldPositions)
#define unumf_resultNextFieldPosition U_ICU_ENTRY_POINT_RENAME(unumf_resultNextFieldPosition)
#define unumf_resultToString U_ICU_ENTRY_POINT_RENAME(unumf_resultToString)
#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close)
#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription)
#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName)
@ -1209,6 +1220,7 @@
#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload)
#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource)
#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts)
#define uprv_add32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_add32_overflow)
#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy)
#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic)
#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower)
@ -1343,6 +1355,7 @@
#define uprv_maximumPtr U_ICU_ENTRY_POINT_RENAME(uprv_maximumPtr)
#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min)
#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf)
#define uprv_mul32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_mul32_overflow)
#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency)
#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute)
#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow)

View File

@ -451,6 +451,21 @@ typedef enum UScriptCode {
/** @stable ICU 60 */
USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */
/** @stable ICU 62 */
USCRIPT_DOGRA = 178,/* Dogr */
/** @stable ICU 62 */
USCRIPT_GUNJALA_GONDI = 179,/* Gong */
/** @stable ICU 62 */
USCRIPT_MAKASAR = 180,/* Maka */
/** @stable ICU 62 */
USCRIPT_MEDEFAIDRIN = 181,/* Medf */
/** @stable ICU 62 */
USCRIPT_HANIFI_ROHINGYA = 182,/* Rohg */
/** @stable ICU 62 */
USCRIPT_SOGDIAN = 183,/* Sogd */
/** @stable ICU 62 */
USCRIPT_OLD_SOGDIAN = 184,/* Sogo */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UScriptCode value.
@ -458,7 +473,7 @@ typedef enum UScriptCode {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
USCRIPT_CODE_LIMIT = 178
USCRIPT_CODE_LIMIT = 185
#endif // U_HIDE_DEPRECATED_API
} UScriptCode;

View File

@ -542,12 +542,15 @@ typedef enum UErrorCode {
#ifndef U_HIDE_DRAFT_API
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DRAFT_API
U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @draft ICU 62 */
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal formatting API error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_FMT_PARSE_ERROR_LIMIT = 0x10113,
U_FMT_PARSE_ERROR_LIMIT = 0x10114,
#endif // U_HIDE_DEPRECATED_API
/*

View File

@ -58,7 +58,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_MAJOR_NUM 61
#define U_ICU_VERSION_MAJOR_NUM 62
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
@ -84,7 +84,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SUFFIX _61
#define U_ICU_VERSION_SUFFIX _62
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@ -119,7 +119,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION "61.1"
#define U_ICU_VERSION "62.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
@ -132,13 +132,13 @@
*
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SHORT "61"
#define U_ICU_VERSION_SHORT "62"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
#define U_ICU_DATA_VERSION "61.1"
#define U_ICU_DATA_VERSION "62.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================

View File

@ -282,6 +282,7 @@ static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
{ 2, U_MASK(UPROPS_2_EMOJI_COMPONENT), defaultContains },
{ 2, 0, isRegionalIndicator },
{ 1, U_MASK(UPROPS_PREPENDED_CONCATENATION_MARK), defaultContains },
{ 2, U_MASK(UPROPS_2_EXTENDED_PICTOGRAPHIC), defaultContains },
};
U_CAPI UBool U_EXPORT2

View File

@ -196,8 +196,7 @@ enum {
/*
* Properties in vector word 2
* Bits
* 31..27 http://www.unicode.org/reports/tr51/#Emoji_Properties
* 26 reserved
* 31..26 http://www.unicode.org/reports/tr51/#Emoji_Properties
* 25..20 Line Break
* 19..15 Sentence Break
* 14..10 Word Break
@ -205,7 +204,8 @@ enum {
* 4.. 0 Decomposition Type
*/
enum {
UPROPS_2_EMOJI_COMPONENT=27,
UPROPS_2_EXTENDED_PICTOGRAPHIC=26,
UPROPS_2_EMOJI_COMPONENT,
UPROPS_2_EMOJI,
UPROPS_2_EMOJI_PRESENTATION,
UPROPS_2_EMOJI_MODIFIER,

View File

@ -71,7 +71,7 @@ const int32_t SCRIPT_PROPS[] = {
0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
0x004C | RECOMMENDED | CASED, // Latn
0x0D15 | RECOMMENDED, // Mlym
0x1826 | LIMITED_USE, // Mong
0x1826 | EXCLUSION, // Mong
0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
0x168F | EXCLUSION, // Ogam
0x10300 | EXCLUSION, // Ital
@ -222,6 +222,13 @@ const int32_t SCRIPT_PROPS[] = {
0x11D10 | EXCLUSION, // Gonm
0x11A5C | EXCLUSION, // Soyo
0x11A0B | EXCLUSION, // Zanb
0x1180B | EXCLUSION, // Dogr
0x11D71 | LIMITED_USE, // Gong
0x11EE5 | EXCLUSION, // Maka
0x16E40 | EXCLUSION | CASED, // Medf
0x10D12 | LIMITED_USE | RTL, // Rohg
0x10F42 | EXCLUSION | RTL, // Sogd
0x10F19 | EXCLUSION | RTL, // Sogo
// End copy-paste from parsescriptmetadata.py
};

View File

@ -28,6 +28,7 @@
#include "cmemory.h"
#include "umutex.h"
#include "ustr_cnv.h"
#include "ucnv_bld.h"
/* mutexed access to a shared default converter ----------------------------- */
@ -68,8 +69,8 @@ u_releaseDefaultConverter(UConverter *converter)
if (converter != NULL) {
ucnv_reset(converter);
}
ucnv_enableCleanup();
umtx_lock(NULL);
if(gDefaultConverter == NULL) {
gDefaultConverter = converter;
converter = NULL;

View File

@ -46,6 +46,13 @@ class U_COMMON_API ICU_Utility /* not : public UObject because all methods are s
int32_t radix = 10,
int32_t minDigits = 1);
/** Returns a bogus UnicodeString by value. */
static inline UnicodeString makeBogusString() {
UnicodeString result;
result.setToBogus();
return result;
}
/**
* Return true if the character is NOT printable ASCII.
*

View File

@ -126,7 +126,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
"U_DEFAULT_KEYWORD_MISSING",
"U_DECIMAL_NUMBER_SYNTAX_ERROR",
"U_FORMAT_INEXACT_ERROR",
"U_NUMBER_ARG_OUTOFBOUNDS_ERROR"
"U_NUMBER_ARG_OUTOFBOUNDS_ERROR",
"U_NUMBER_SKELETON_SYNTAX_ERROR",
};
static const char * const

View File

@ -49,7 +49,7 @@ typedef struct
/**
* Various registry keys and key fragments.
*/
static const char CURRENT_ZONE_REGKEY[] = "SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\";
static const wchar_t CURRENT_ZONE_REGKEY[] = L"SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\";
static const char STANDARD_TIME_REGKEY[] = " Standard Time";
static const char TZI_REGKEY[] = "TZI";
static const char STD_REGKEY[] = "Std";
@ -121,27 +121,39 @@ static LONG getSTDName(const char *winid, char *regStdName, int32_t length)
return result;
}
static LONG getTZKeyName(char* tzKeyName, int32_t length)
static LONG getTZKeyName(char* tzKeyName, int32_t tzKeyNamelength)
{
HKEY hkey;
LONG result = FALSE;
DWORD cbData = length;
WCHAR timeZoneKeyNameData[128];
DWORD timeZoneKeyNameLength = static_cast<DWORD>(sizeof(timeZoneKeyNameData));
if(ERROR_SUCCESS == RegOpenKeyExA(
if(ERROR_SUCCESS == RegOpenKeyExW(
HKEY_LOCAL_MACHINE,
CURRENT_ZONE_REGKEY,
0,
KEY_QUERY_VALUE,
&hkey))
{
result = RegQueryValueExA(
if (ERROR_SUCCESS == RegQueryValueExW(
hkey,
"TimeZoneKeyName",
L"TimeZoneKeyName",
NULL,
NULL,
(LPBYTE)tzKeyName,
&cbData);
(LPBYTE)timeZoneKeyNameData,
&timeZoneKeyNameLength))
{
// Ensure null termination.
timeZoneKeyNameData[UPRV_LENGTHOF(timeZoneKeyNameData) - 1] = L'\0';
// Convert the UTF-16 string to UTF-8.
UErrorCode status = U_ZERO_ERROR;
u_strToUTF8(tzKeyName, tzKeyNamelength, NULL, reinterpret_cast<const UChar *>(timeZoneKeyNameData), -1, &status);
if (U_ZERO_ERROR == status)
{
result = ERROR_SUCCESS;
}
}
RegCloseKey(hkey);
}

View File

@ -1,698 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: affixpatternparser.cpp
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/dcfmtsym.h"
#include "unicode/plurrule.h"
#include "unicode/strenum.h"
#include "unicode/ucurr.h"
#include "unicode/ustring.h"
#include "affixpatternparser.h"
#include "charstr.h"
#include "precision.h"
#include "uassert.h"
#include "unistrappender.h"
static const UChar gDefaultSymbols[] = {0xa4, 0xa4, 0xa4};
static const UChar gPercent = 0x25;
static const UChar gPerMill = 0x2030;
static const UChar gNegative = 0x2D;
static const UChar gPositive = 0x2B;
#define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF)))
#define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F))
#define UNPACK_LONG(c) (((c) >> 8) & 0x80)
#define UNPACK_LENGTH(c) ((c) & 0xFF)
U_NAMESPACE_BEGIN
static int32_t
nextToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) {
if (buffer[idx] != 0x27 || idx + 1 == len) {
*token = buffer[idx];
return 1;
}
*token = buffer[idx + 1];
if (buffer[idx + 1] == 0xA4) {
int32_t i = 2;
for (; idx + i < len && i < 4 && buffer[idx + i] == buffer[idx + 1]; ++i)
;
return i;
}
return 2;
}
static int32_t
nextUserToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) {
*token = buffer[idx];
int32_t max;
switch (buffer[idx]) {
case 0x27:
max = 2;
break;
case 0xA4:
max = 3;
break;
default:
max = 1;
break;
}
int32_t i = 1;
for (; idx + i < len && i < max && buffer[idx + i] == buffer[idx]; ++i)
;
return i;
}
CurrencyAffixInfo::CurrencyAffixInfo()
: fSymbol(gDefaultSymbols, 1),
fISO(gDefaultSymbols, 2),
fLong(DigitAffix(gDefaultSymbols, 3)),
fIsDefault(TRUE) {
}
void
CurrencyAffixInfo::set(
const char *locale,
const PluralRules *rules,
const UChar *currency,
UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
fIsDefault = FALSE;
if (currency == NULL) {
fSymbol.setTo(gDefaultSymbols, 1);
fISO.setTo(gDefaultSymbols, 2);
fLong.remove();
fLong.append(gDefaultSymbols, 3);
fIsDefault = TRUE;
return;
}
int32_t len;
UBool unusedIsChoice;
const UChar *symbol = ucurr_getName(
currency, locale, UCURR_SYMBOL_NAME, &unusedIsChoice,
&len, &status);
if (U_FAILURE(status)) {
return;
}
fSymbol.setTo(symbol, len);
fISO.setTo(currency, u_strlen(currency));
fLong.remove();
StringEnumeration* keywords = rules->getKeywords(status);
if (U_FAILURE(status)) {
return;
}
const UnicodeString* pluralCount;
while ((pluralCount = keywords->snext(status)) != NULL) {
CharString pCount;
pCount.appendInvariantChars(*pluralCount, status);
const UChar *pluralName = ucurr_getPluralName(
currency, locale, &unusedIsChoice, pCount.data(),
&len, &status);
fLong.setVariant(pCount.data(), UnicodeString(pluralName, len), status);
}
delete keywords;
}
void
CurrencyAffixInfo::adjustPrecision(
const UChar *currency, const UCurrencyUsage usage,
FixedPrecision &precision, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
int32_t digitCount = ucurr_getDefaultFractionDigitsForUsage(
currency, usage, &status);
precision.fMin.setFracDigitCount(digitCount);
precision.fMax.setFracDigitCount(digitCount);
double increment = ucurr_getRoundingIncrementForUsage(
currency, usage, &status);
if (increment == 0.0) {
precision.fRoundingIncrement.clear();
} else {
precision.fRoundingIncrement.set(increment);
// guard against round-off error
precision.fRoundingIncrement.round(6);
}
}
void
AffixPattern::addLiteral(
const UChar *literal, int32_t start, int32_t len) {
char32Count += u_countChar32(literal + start, len);
literals.append(literal, start, len);
int32_t tlen = tokens.length();
// Takes 4 UChars to encode maximum literal length.
UChar *tokenChars = tokens.getBuffer(tlen + 4);
// find start of literal size. May be tlen if there is no literal.
// While finding start of literal size, compute literal length
int32_t literalLength = 0;
int32_t tLiteralStart = tlen;
while (tLiteralStart > 0 && UNPACK_TOKEN(tokenChars[tLiteralStart - 1]) == kLiteral) {
tLiteralStart--;
literalLength <<= 8;
literalLength |= UNPACK_LENGTH(tokenChars[tLiteralStart]);
}
// Add number of chars we just added to literal
literalLength += len;
// Now encode the new length starting at tLiteralStart
tlen = tLiteralStart;
tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral, literalLength & 0xFF);
literalLength >>= 8;
while (literalLength) {
tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral | 0x80, literalLength & 0xFF);
literalLength >>= 8;
}
tokens.releaseBuffer(tlen);
}
void
AffixPattern::add(ETokenType t) {
add(t, 1);
}
void
AffixPattern::addCurrency(uint8_t count) {
add(kCurrency, count);
}
void
AffixPattern::add(ETokenType t, uint8_t count) {
U_ASSERT(t != kLiteral);
char32Count += count;
switch (t) {
case kCurrency:
hasCurrencyToken = TRUE;
break;
case kPercent:
hasPercentToken = TRUE;
break;
case kPerMill:
hasPermillToken = TRUE;
break;
default:
// Do nothing
break;
}
tokens.append(PACK_TOKEN_AND_LENGTH(t, count));
}
AffixPattern &
AffixPattern::append(const AffixPattern &other) {
AffixPatternIterator iter;
other.iterator(iter);
UnicodeString literal;
while (iter.nextToken()) {
switch (iter.getTokenType()) {
case kLiteral:
iter.getLiteral(literal);
addLiteral(literal.getBuffer(), 0, literal.length());
break;
case kCurrency:
addCurrency(static_cast<uint8_t>(iter.getTokenLength()));
break;
default:
add(iter.getTokenType());
break;
}
}
return *this;
}
void
AffixPattern::remove() {
tokens.remove();
literals.remove();
hasCurrencyToken = FALSE;
hasPercentToken = FALSE;
hasPermillToken = FALSE;
char32Count = 0;
}
// escapes literals for strings where special characters are NOT escaped
// except for apostrophe.
static void escapeApostropheInLiteral(
const UnicodeString &literal, UnicodeStringAppender &appender) {
int32_t len = literal.length();
const UChar *buffer = literal.getBuffer();
for (int32_t i = 0; i < len; ++i) {
UChar ch = buffer[i];
switch (ch) {
case 0x27:
appender.append((UChar) 0x27);
appender.append((UChar) 0x27);
break;
default:
appender.append(ch);
break;
}
}
}
// escapes literals for user strings where special characters in literals
// are escaped with apostrophe.
static void escapeLiteral(
const UnicodeString &literal, UnicodeStringAppender &appender) {
int32_t len = literal.length();
const UChar *buffer = literal.getBuffer();
for (int32_t i = 0; i < len; ++i) {
UChar ch = buffer[i];
switch (ch) {
case 0x27:
appender.append((UChar) 0x27);
appender.append((UChar) 0x27);
break;
case 0x25:
appender.append((UChar) 0x27);
appender.append((UChar) 0x25);
appender.append((UChar) 0x27);
break;
case 0x2030:
appender.append((UChar) 0x27);
appender.append((UChar) 0x2030);
appender.append((UChar) 0x27);
break;
case 0xA4:
appender.append((UChar) 0x27);
appender.append((UChar) 0xA4);
appender.append((UChar) 0x27);
break;
case 0x2D:
appender.append((UChar) 0x27);
appender.append((UChar) 0x2D);
appender.append((UChar) 0x27);
break;
case 0x2B:
appender.append((UChar) 0x27);
appender.append((UChar) 0x2B);
appender.append((UChar) 0x27);
break;
default:
appender.append(ch);
break;
}
}
}
UnicodeString &
AffixPattern::toString(UnicodeString &appendTo) const {
AffixPatternIterator iter;
iterator(iter);
UnicodeStringAppender appender(appendTo);
UnicodeString literal;
while (iter.nextToken()) {
switch (iter.getTokenType()) {
case kLiteral:
escapeApostropheInLiteral(iter.getLiteral(literal), appender);
break;
case kPercent:
appender.append((UChar) 0x27);
appender.append((UChar) 0x25);
break;
case kPerMill:
appender.append((UChar) 0x27);
appender.append((UChar) 0x2030);
break;
case kCurrency:
{
appender.append((UChar) 0x27);
int32_t cl = iter.getTokenLength();
for (int32_t i = 0; i < cl; ++i) {
appender.append((UChar) 0xA4);
}
}
break;
case kNegative:
appender.append((UChar) 0x27);
appender.append((UChar) 0x2D);
break;
case kPositive:
appender.append((UChar) 0x27);
appender.append((UChar) 0x2B);
break;
default:
U_ASSERT(FALSE);
break;
}
}
return appendTo;
}
UnicodeString &
AffixPattern::toUserString(UnicodeString &appendTo) const {
AffixPatternIterator iter;
iterator(iter);
UnicodeStringAppender appender(appendTo);
UnicodeString literal;
while (iter.nextToken()) {
switch (iter.getTokenType()) {
case kLiteral:
escapeLiteral(iter.getLiteral(literal), appender);
break;
case kPercent:
appender.append((UChar) 0x25);
break;
case kPerMill:
appender.append((UChar) 0x2030);
break;
case kCurrency:
{
int32_t cl = iter.getTokenLength();
for (int32_t i = 0; i < cl; ++i) {
appender.append((UChar) 0xA4);
}
}
break;
case kNegative:
appender.append((UChar) 0x2D);
break;
case kPositive:
appender.append((UChar) 0x2B);
break;
default:
U_ASSERT(FALSE);
break;
}
}
return appendTo;
}
class AffixPatternAppender : public UMemory {
public:
AffixPatternAppender(AffixPattern &dest) : fDest(&dest), fIdx(0) { }
inline void append(UChar x) {
if (fIdx == UPRV_LENGTHOF(fBuffer)) {
fDest->addLiteral(fBuffer, 0, fIdx);
fIdx = 0;
}
fBuffer[fIdx++] = x;
}
inline void append(UChar32 x) {
if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) {
fDest->addLiteral(fBuffer, 0, fIdx);
fIdx = 0;
}
U16_APPEND_UNSAFE(fBuffer, fIdx, x);
}
inline void flush() {
if (fIdx) {
fDest->addLiteral(fBuffer, 0, fIdx);
}
fIdx = 0;
}
/**
* flush the buffer when we go out of scope.
*/
~AffixPatternAppender() {
flush();
}
private:
AffixPattern *fDest;
int32_t fIdx;
UChar fBuffer[32];
AffixPatternAppender(const AffixPatternAppender &other);
AffixPatternAppender &operator=(const AffixPatternAppender &other);
};
AffixPattern &
AffixPattern::parseUserAffixString(
const UnicodeString &affixStr,
AffixPattern &appendTo,
UErrorCode &status) {
if (U_FAILURE(status)) {
return appendTo;
}
int32_t len = affixStr.length();
const UChar *buffer = affixStr.getBuffer();
// 0 = not quoted; 1 = quoted.
int32_t state = 0;
AffixPatternAppender appender(appendTo);
for (int32_t i = 0; i < len; ) {
UChar token;
int32_t tokenSize = nextUserToken(buffer, i, len, &token);
i += tokenSize;
if (token == 0x27 && tokenSize == 1) { // quote
state = 1 - state;
continue;
}
if (state == 0) {
switch (token) {
case 0x25:
appender.flush();
appendTo.add(kPercent, 1);
break;
case 0x27: // double quote
appender.append((UChar) 0x27);
break;
case 0x2030:
appender.flush();
appendTo.add(kPerMill, 1);
break;
case 0x2D:
appender.flush();
appendTo.add(kNegative, 1);
break;
case 0x2B:
appender.flush();
appendTo.add(kPositive, 1);
break;
case 0xA4:
appender.flush();
appendTo.add(kCurrency, static_cast<uint8_t>(tokenSize));
break;
default:
appender.append(token);
break;
}
} else {
switch (token) {
case 0x27: // double quote
appender.append((UChar) 0x27);
break;
case 0xA4: // included b/c tokenSize can be > 1
for (int32_t j = 0; j < tokenSize; ++j) {
appender.append((UChar) 0xA4);
}
break;
default:
appender.append(token);
break;
}
}
}
return appendTo;
}
AffixPattern &
AffixPattern::parseAffixString(
const UnicodeString &affixStr,
AffixPattern &appendTo,
UErrorCode &status) {
if (U_FAILURE(status)) {
return appendTo;
}
int32_t len = affixStr.length();
const UChar *buffer = affixStr.getBuffer();
for (int32_t i = 0; i < len; ) {
UChar token;
int32_t tokenSize = nextToken(buffer, i, len, &token);
if (tokenSize == 1) {
int32_t literalStart = i;
++i;
while (i < len && (tokenSize = nextToken(buffer, i, len, &token)) == 1) {
++i;
}
appendTo.addLiteral(buffer, literalStart, i - literalStart);
// If we reached end of string, we are done
if (i == len) {
return appendTo;
}
}
i += tokenSize;
switch (token) {
case 0x25:
appendTo.add(kPercent, 1);
break;
case 0x2030:
appendTo.add(kPerMill, 1);
break;
case 0x2D:
appendTo.add(kNegative, 1);
break;
case 0x2B:
appendTo.add(kPositive, 1);
break;
case 0xA4:
{
if (tokenSize - 1 > 3) {
status = U_PARSE_ERROR;
return appendTo;
}
appendTo.add(kCurrency, tokenSize - 1);
}
break;
default:
appendTo.addLiteral(&token, 0, 1);
break;
}
}
return appendTo;
}
AffixPatternIterator &
AffixPattern::iterator(AffixPatternIterator &result) const {
result.nextLiteralIndex = 0;
result.lastLiteralLength = 0;
result.nextTokenIndex = 0;
result.tokens = &tokens;
result.literals = &literals;
return result;
}
UBool
AffixPatternIterator::nextToken() {
int32_t tlen = tokens->length();
if (nextTokenIndex == tlen) {
return FALSE;
}
++nextTokenIndex;
const UChar *tokenBuffer = tokens->getBuffer();
if (UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]) ==
AffixPattern::kLiteral) {
while (nextTokenIndex < tlen &&
UNPACK_LONG(tokenBuffer[nextTokenIndex])) {
++nextTokenIndex;
}
lastLiteralLength = 0;
int32_t i = nextTokenIndex - 1;
for (; UNPACK_LONG(tokenBuffer[i]); --i) {
lastLiteralLength <<= 8;
lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]);
}
lastLiteralLength <<= 8;
lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]);
nextLiteralIndex += lastLiteralLength;
}
return TRUE;
}
AffixPattern::ETokenType
AffixPatternIterator::getTokenType() const {
return UNPACK_TOKEN(tokens->charAt(nextTokenIndex - 1));
}
UnicodeString &
AffixPatternIterator::getLiteral(UnicodeString &result) const {
const UChar *buffer = literals->getBuffer();
result.setTo(buffer + (nextLiteralIndex - lastLiteralLength), lastLiteralLength);
return result;
}
int32_t
AffixPatternIterator::getTokenLength() const {
const UChar *tokenBuffer = tokens->getBuffer();
AffixPattern::ETokenType type = UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]);
return type == AffixPattern::kLiteral ? lastLiteralLength : UNPACK_LENGTH(tokenBuffer[nextTokenIndex - 1]);
}
AffixPatternParser::AffixPatternParser()
: fPercent(gPercent), fPermill(gPerMill), fNegative(gNegative), fPositive(gPositive) {
}
AffixPatternParser::AffixPatternParser(
const DecimalFormatSymbols &symbols) {
setDecimalFormatSymbols(symbols);
}
void
AffixPatternParser::setDecimalFormatSymbols(
const DecimalFormatSymbols &symbols) {
fPercent = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
fPermill = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
fNegative = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
fPositive = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
}
PluralAffix &
AffixPatternParser::parse(
const AffixPattern &affixPattern,
const CurrencyAffixInfo &currencyAffixInfo,
PluralAffix &appendTo,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return appendTo;
}
AffixPatternIterator iter;
affixPattern.iterator(iter);
UnicodeString literal;
while (iter.nextToken()) {
switch (iter.getTokenType()) {
case AffixPattern::kPercent:
appendTo.append(fPercent, UNUM_PERCENT_FIELD);
break;
case AffixPattern::kPerMill:
appendTo.append(fPermill, UNUM_PERMILL_FIELD);
break;
case AffixPattern::kNegative:
appendTo.append(fNegative, UNUM_SIGN_FIELD);
break;
case AffixPattern::kPositive:
appendTo.append(fPositive, UNUM_SIGN_FIELD);
break;
case AffixPattern::kCurrency:
switch (iter.getTokenLength()) {
case 1:
appendTo.append(
currencyAffixInfo.getSymbol(), UNUM_CURRENCY_FIELD);
break;
case 2:
appendTo.append(
currencyAffixInfo.getISO(), UNUM_CURRENCY_FIELD);
break;
case 3:
appendTo.append(
currencyAffixInfo.getLong(), UNUM_CURRENCY_FIELD, status);
break;
default:
U_ASSERT(FALSE);
break;
}
break;
case AffixPattern::kLiteral:
appendTo.append(iter.getLiteral(literal));
break;
default:
U_ASSERT(FALSE);
break;
}
}
return appendTo;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -1,402 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* affixpatternparser.h
*
* created on: 2015jan06
* created by: Travis Keep
*/
#ifndef __AFFIX_PATTERN_PARSER_H__
#define __AFFIX_PATTERN_PARSER_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "pluralaffix.h"
U_NAMESPACE_BEGIN
class PluralRules;
class FixedPrecision;
class DecimalFormatSymbols;
/**
* A representation of the various forms of a particular currency according
* to some locale and usage context.
*
* Includes the symbol, ISO code form, and long form(s) of the currency name
* for each plural variation.
*/
class U_I18N_API CurrencyAffixInfo : public UMemory {
public:
/**
* Symbol is \u00a4; ISO form is \u00a4\u00a4;
* long form is \u00a4\u00a4\u00a4.
*/
CurrencyAffixInfo();
const UnicodeString &getSymbol() const { return fSymbol; }
const UnicodeString &getISO() const { return fISO; }
const PluralAffix &getLong() const { return fLong; }
void setSymbol(const UnicodeString &symbol) {
fSymbol = symbol;
fIsDefault = FALSE;
}
void setISO(const UnicodeString &iso) {
fISO = iso;
fIsDefault = FALSE;
}
UBool
equals(const CurrencyAffixInfo &other) const {
return (fSymbol == other.fSymbol)
&& (fISO == other.fISO)
&& (fLong.equals(other.fLong))
&& (fIsDefault == other.fIsDefault);
}
/**
* Intializes this instance.
*
* @param locale the locale for the currency forms.
* @param rules The plural rules for the locale.
* @param currency the null terminated, 3 character ISO code of the
* currency. If NULL, resets this instance as if it were just created.
* In this case, the first 2 parameters may be NULL as well.
* @param status any error returned here.
*/
void set(
const char *locale, const PluralRules *rules,
const UChar *currency, UErrorCode &status);
/**
* Returns true if this instance is the default. That is has no real
* currency. For instance never initialized with set()
* or reset with set(NULL, NULL, NULL, status).
*/
UBool isDefault() const { return fIsDefault; }
/**
* Adjusts the precision used for a particular currency.
* @param currency the null terminated, 3 character ISO code of the
* currency.
* @param usage the usage of the currency
* @param precision min/max fraction digits and rounding increment
* adjusted.
* @params status any error reported here.
*/
static void adjustPrecision(
const UChar *currency, const UCurrencyUsage usage,
FixedPrecision &precision, UErrorCode &status);
private:
/**
* The symbol form of the currency.
*/
UnicodeString fSymbol;
/**
* The ISO form of the currency, usually three letter abbreviation.
*/
UnicodeString fISO;
/**
* The long forms of the currency keyed by plural variation.
*/
PluralAffix fLong;
UBool fIsDefault;
};
class AffixPatternIterator;
/**
* A locale agnostic representation of an affix pattern.
*/
class U_I18N_API AffixPattern : public UMemory {
public:
/**
* The token types that can appear in an affix pattern.
*/
enum ETokenType {
kLiteral,
kPercent,
kPerMill,
kCurrency,
kNegative,
kPositive
};
/**
* An empty affix pattern.
*/
AffixPattern()
: tokens(), literals(), hasCurrencyToken(FALSE),
hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) {
}
/**
* Adds a string literal to this affix pattern.
*/
void addLiteral(const UChar *, int32_t start, int32_t len);
/**
* Adds a token to this affix pattern. t must not be kLiteral as
* the addLiteral() method adds literals.
* @param t the token type to add
*/
void add(ETokenType t);
/**
* Adds a currency token with specific count to this affix pattern.
* @param count the token count. Used to distinguish between
* one, two, or three currency symbols. Note that adding a currency
* token with count=2 (Use ISO code) is different than adding two
* currency tokens each with count=1 (two currency symbols).
*/
void addCurrency(uint8_t count);
/**
* Makes this instance be an empty affix pattern.
*/
void remove();
/**
* Provides an iterator over the tokens in this instance.
* @param result this is initialized to point just before the
* first token of this instance. Caller must call nextToken()
* on the iterator once it is set up to have it actually point
* to the first token. This first call to nextToken() will return
* FALSE if the AffixPattern being iterated over is empty.
* @return result
*/
AffixPatternIterator &iterator(AffixPatternIterator &result) const;
/**
* Returns TRUE if this instance has currency tokens in it.
*/
UBool usesCurrency() const {
return hasCurrencyToken;
}
UBool usesPercent() const {
return hasPercentToken;
}
UBool usesPermill() const {
return hasPermillToken;
}
/**
* Returns the number of code points a string of this instance
* would have if none of the special tokens were escaped.
* Used to compute the padding size.
*/
int32_t countChar32() const {
return char32Count;
}
/**
* Appends other to this instance mutating this instance in place.
* @param other The pattern appended to the end of this one.
* @return a reference to this instance for chaining.
*/
AffixPattern &append(const AffixPattern &other);
/**
* Converts this AffixPattern back into a user string.
* It is the inverse of parseUserAffixString.
*/
UnicodeString &toUserString(UnicodeString &appendTo) const;
/**
* Converts this AffixPattern back into a string.
* It is the inverse of parseAffixString.
*/
UnicodeString &toString(UnicodeString &appendTo) const;
/**
* Parses an affix pattern string appending it to an AffixPattern.
* Parses affix pattern strings produced from using
* DecimalFormatPatternParser to parse a format pattern. Affix patterns
* include the positive prefix and suffix and the negative prefix
* and suffix. This method expects affix patterns strings to be in the
* same format that DecimalFormatPatternParser produces. Namely special
* characters in the affix that correspond to a field type must be
* prefixed with an apostrophe ('). These special character sequences
* inluce minus (-), percent (%), permile (U+2030), plus (+),
* short currency (U+00a4), medium currency (u+00a4 * 2),
* long currency (u+a4 * 3), and apostrophe (')
* (apostrophe does not correspond to a field type but has to be escaped
* because it itself is the escape character).
* Since the expansion of these special character
* sequences is locale dependent, these sequences are not expanded in
* an AffixPattern instance.
* If these special characters are not prefixed with an apostrophe in
* the affix pattern string, then they are treated verbatim just as
* any other character. If an apostrophe prefixes a non special
* character in the affix pattern, the apostrophe is simply ignored.
*
* @param affixStr the string from DecimalFormatPatternParser
* @param appendTo parsed result appended here.
* @param status any error parsing returned here.
*/
static AffixPattern &parseAffixString(
const UnicodeString &affixStr,
AffixPattern &appendTo,
UErrorCode &status);
/**
* Parses an affix pattern string appending it to an AffixPattern.
* Parses affix pattern strings as the user would supply them.
* In this function, quoting makes special characters like normal
* characters whereas in parseAffixString, quoting makes special
* characters special.
*
* @param affixStr the string from the user
* @param appendTo parsed result appended here.
* @param status any error parsing returned here.
*/
static AffixPattern &parseUserAffixString(
const UnicodeString &affixStr,
AffixPattern &appendTo,
UErrorCode &status);
UBool equals(const AffixPattern &other) const {
return (tokens == other.tokens)
&& (literals == other.literals)
&& (hasCurrencyToken == other.hasCurrencyToken)
&& (hasPercentToken == other.hasPercentToken)
&& (hasPermillToken == other.hasPermillToken)
&& (char32Count == other.char32Count);
}
private:
/*
* Tokens stored here. Each UChar generally stands for one token. A
* Each token is of form 'etttttttllllllll' llllllll is the length of
* the token and ranges from 0-255. ttttttt is the token type and ranges
* from 0-127. If e is set it means this is an extendo token (to be
* described later). To accomodate token lengths above 255, each normal
* token (e=0) can be followed by 0 or more extendo tokens (e=1) with
* the same type. Right now only kLiteral Tokens have extendo tokens.
* Each extendo token provides the next 8 higher bits for the length.
* If a kLiteral token is followed by 2 extendo tokens then, then the
* llllllll of the next extendo token contains bits 8-15 of the length
* and the last extendo token contains bits 16-23 of the length.
*/
UnicodeString tokens;
/*
* The characters of the kLiteral tokens are concatenated together here.
* The first characters go with the first kLiteral token, the next
* characters go with the next kLiteral token etc.
*/
UnicodeString literals;
UBool hasCurrencyToken;
UBool hasPercentToken;
UBool hasPermillToken;
int32_t char32Count;
void add(ETokenType t, uint8_t count);
};
/**
* An iterator over the tokens in an AffixPattern instance.
*/
class U_I18N_API AffixPatternIterator : public UMemory {
public:
/**
* Using an iterator without first calling iterator on an AffixPattern
* instance to initialize the iterator results in
* undefined behavior.
*/
AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
/**
* Advances this iterator to the next token. Returns FALSE when there
* are no more tokens. Calling the other methods after nextToken()
* returns FALSE results in undefined behavior.
*/
UBool nextToken();
/**
* Returns the type of token.
*/
AffixPattern::ETokenType getTokenType() const;
/**
* For literal tokens, returns the literal string. Calling this for
* other token types results in undefined behavior.
* @param result replaced with a read-only alias to the literal string.
* @return result
*/
UnicodeString &getLiteral(UnicodeString &result) const;
/**
* Returns the token length. Usually 1, but for currency tokens may
* be 2 for ISO code and 3 for long form.
*/
int32_t getTokenLength() const;
private:
int32_t nextLiteralIndex;
int32_t lastLiteralLength;
int32_t nextTokenIndex;
const UnicodeString *tokens;
const UnicodeString *literals;
friend class AffixPattern;
AffixPatternIterator(const AffixPatternIterator &);
AffixPatternIterator &operator=(const AffixPatternIterator &);
};
/**
* A locale aware class that converts locale independent AffixPattern
* instances into locale dependent PluralAffix instances.
*/
class U_I18N_API AffixPatternParser : public UMemory {
public:
AffixPatternParser();
AffixPatternParser(const DecimalFormatSymbols &symbols);
void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
/**
* Parses affixPattern appending the result to appendTo.
* @param affixPattern The affix pattern.
* @param currencyAffixInfo contains the currency forms.
* @param appendTo The result of parsing affixPattern is appended here.
* @param status any error returned here.
* @return appendTo.
*/
PluralAffix &parse(
const AffixPattern &affixPattern,
const CurrencyAffixInfo &currencyAffixInfo,
PluralAffix &appendTo,
UErrorCode &status) const;
UBool equals(const AffixPatternParser &other) const {
return (fPercent == other.fPercent)
&& (fPermill == other.fPermill)
&& (fNegative == other.fNegative)
&& (fPositive == other.fPositive);
}
private:
UnicodeString fPercent;
UnicodeString fPermill;
UnicodeString fNegative;
UnicodeString fPositive;
};
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif // __AFFIX_PATTERN_PARSER_H__

View File

@ -22,27 +22,27 @@ const uint8_t CollationFCD::lcccIndex[2048]={
0,0,0,0,0,0,0,0,1,1,2,3,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,5,6,7,0,
8,0,9,0xa,0,0,0xb,0xc,0xd,0xe,0xf,0,0,0,0,0x10,
0x11,0x12,0x13,0,0,0,0x14,0x15,0,0x16,0x17,0,0,0x16,0x18,0,
0x11,0x12,0x13,0,0,0,0x14,0x15,0,0x16,0x17,0,0,0x16,0x18,0x19,
0,0x16,0x18,0,0,0x16,0x18,0,0,0x16,0x18,0,0,0,0x18,0,
0,0,0x19,0,0,0x16,0x18,0,0,0x1a,0x18,0,0,0,0x1b,0,
0,0x1c,0x1d,0,0,0x1e,0x1d,0,0x1e,0x1f,0,0x20,0x21,0,0x22,0,
0,0x23,0,0,0x18,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x24,0,0,0,0,0,
0,0,0x1a,0,0,0x16,0x18,0,0,0x1b,0x18,0,0,0,0x1c,0,
0,0x1d,0x1e,0,0,0x1f,0x1e,0,0x1f,0x20,0,0x21,0x22,0,0x23,0,
0,0x24,0,0,0x18,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x25,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x25,0x25,0,0,0,0,0x26,0,
0,0,0,0,0,0x27,0,0,0,0x13,0,0,0,0,0,0,
0x28,0,0,0x29,0,0x2a,0,0,0,0x25,0x2b,0x10,0,0x2c,0,0x2d,
0,0x2e,0,0,0,0,0x2f,0x30,0,0,0,0,0,0,1,0x31,
0,0,0,0,0,0,0,0,0x26,0x26,0,0,0,0,0x27,0,
0,0,0,0,0,0x28,0,0,0,0x13,0,0,0,0,0,0,
0x29,0,0,0x2a,0,0x2b,0,0,0,0x26,0x2c,0x2d,0,0x2e,0,0x2f,
0,0x30,0,0,0,0,0x31,0x32,0,0,0,0,0,0,1,0x33,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x32,0x33,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x34,0x35,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x34,0,0,0,0x35,0,0,0,1,
0,0,0,0,0,0,0,0x36,0,0,0,0x37,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x36,0,0,0x37,0,0,0,0,0,0,0,0,0,0,0,
0,0x38,0,0,0x39,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -101,9 +101,9 @@ const uint8_t CollationFCD::lcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x38,0x39,0,0,0x3a,0,0,0,0,0,0,0,0,
0x22,0,0,0,0,0,0x2b,0x3b,0,0x3c,0x3d,0,0,0x3d,0x3e,0,
0,0,0,0,0,0x3f,0x40,0x41,0,0,0,0,0,0,0,0x18,
0,0,0,0x3a,0x3b,0,0,0x3c,0,0,0,0,0,0,0,0,
0x23,0,0,0,0,0,0x2c,0x3d,0,0x3e,0x3f,0,0,0x3f,0x40,0,
0,0,0,0,0,0x41,0x42,0x43,0,0,0,0,0,0,0,0x18,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -126,7 +126,7 @@ const uint8_t CollationFCD::lcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x42,0x43,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x45,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@ -143,17 +143,17 @@ const uint8_t CollationFCD::lcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x19,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
const uint32_t CollationFCD::lcccBits[69]={
const uint32_t CollationFCD::lcccBits[70]={
0,0xffffffff,0xffff7fff,0xffff,0xf8,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0xfffff800,0x10000,0x9fc00000,0x3d9f,0x20000,0xffff0000,0x7ff,
0xff800,0xfbc00000,0x3eef,0xe000000,0xfff00000,0xfffffffb,0x10000000,0x1e2000,0x2000,0x602000,0x18000000,0x400,0x7000000,0xf00,0x3000000,0x2a00000,
0x3c3e0000,0xdf,0x40,0x6800000,0xe0000000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0x3fff0000,0x10,0xc00,0xc0040,0x800000,0xfff70000,
0x31021fd,0xfbffffff,0x1fff0000,0x1ffe2,0x38000,0x80000000,0xfc00,0x6000000,0x3ff08000,0xc0000000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,
2,0x400000,0x40000f5,0x5108000,0x40000000
0x200ff800,0xfbc00000,0x3eef,0xe000000,0xfff80000,0xfffffffb,0x10000000,0x1e2000,0x2000,0x40000000,0x602000,0x18000000,0x400,0x7000000,0xf00,0x3000000,
0x2a00000,0x3c3e0000,0xdf,0x40,0x6800000,0xe0000000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0x3fff0000,0x10,0xff800,0xc00,0xc0040,
0x800000,0xfff70000,0x31021fd,0xfbffffff,0x1fff0000,0x1ffe2,0x38000,0x80000000,0xfc00,0x6000000,0x3ff08000,0xc0000000,0x30000,0x3ffff,0x3800,0x80000,
1,0xc19d0000,2,0x400000,0x40000fd,0x5108000
};
const uint8_t CollationFCD::tcccIndex[2048]={
@ -161,27 +161,27 @@ const uint8_t CollationFCD::tcccIndex[2048]={
0xb,0xc,0,0,0,0,0,0,1,1,0xd,0xe,0xf,0x10,0x11,0,
0x12,0x13,0x14,0x15,0x16,0,0x17,0x18,0,0,0,0,0x19,0x1a,0x1b,0,
0x1c,0x1d,0x1e,0x1f,0,0,0x20,0x21,0x22,0x23,0x24,0,0,0,0,0x25,
0x26,0x27,0x28,0,0,0,0x29,0x2a,0,0x2b,0x2c,0,0,0x2d,0x2e,0,
0,0x2f,0x30,0,0,0x2d,0x31,0,0,0x2d,0x32,0,0,0,0x31,0,
0,0,0x33,0,0,0x2d,0x31,0,0,0x34,0x31,0,0,0,0x35,0,
0,0x36,0x37,0,0,0x38,0x37,0,0x38,0x39,0,0x3a,0x3b,0,0x3c,0,
0,0x3d,0,0,0x31,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x3e,0,0,0,0,0,
0x26,0x27,0x28,0,0,0,0x29,0x2a,0,0x2b,0x2c,0,0,0x2d,0x2e,0x2f,
0,0x30,0x31,0,0,0x2d,0x32,0,0,0x2d,0x33,0,0,0,0x32,0,
0,0,0x34,0,0,0x2d,0x32,0,0,0x35,0x32,0,0,0,0x36,0,
0,0x37,0x38,0,0,0x39,0x38,0,0x39,0x3a,0,0x3b,0x3c,0,0x3d,0,
0,0x3e,0,0,0x32,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x3f,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x3f,0x3f,0,0,0,0,0x40,0,
0,0,0,0,0,0x41,0,0,0,0x28,0,0,0,0,0,0,
0x42,0,0,0x43,0,0x44,0,0,0,0x3f,0x45,0x25,0,0x46,0,0x47,
0,0x48,0,0,0,0,0x49,0x4a,0,0,0,0,0,0,1,0x4b,
1,1,1,1,0x4c,1,1,0x4d,0x4e,1,0x4f,0x50,1,0x51,0x52,0x53,
0,0,0,0,0,0,0x54,0x55,0,0x56,0,0,0x57,0x58,0x59,0,
0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0,0x60,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x40,0x40,0,0,0,0,0x41,0,
0,0,0,0,0,0x42,0,0,0,0x28,0,0,0,0,0,0,
0x43,0,0,0x44,0,0x45,0,0,0,0x40,0x46,0x47,0,0x48,0,0x49,
0,0x4a,0,0,0,0,0x4b,0x4c,0,0,0,0,0,0,1,0x4d,
1,1,1,1,0x4e,1,1,0x4f,0x50,1,0x51,0x52,1,0x53,0x54,0x55,
0,0,0,0,0,0,0x56,0x57,0,0x58,0,0,0x59,0x5a,0x5b,0,
0x5c,0x5d,0x5e,0x5f,0x60,0x61,0,0x62,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x2d,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x61,0,0,0,0x62,0,0,0,1,
0,0,0,0,0,0,0,0x63,0,0,0,0x64,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x63,0x64,0x65,0x66,0x64,0x65,0x67,0,0,0,0,0,0,0,0,
0,0x65,0x66,0x67,0x68,0x66,0x67,0x69,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -240,9 +240,9 @@ const uint8_t CollationFCD::tcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x68,0x69,0,0,0x6a,0,0,0,0,0,0,0,0,
0x3c,0,0,0,0,0,0x45,0x6b,0,0x6c,0x6d,0,0,0x6d,0x6e,0,
0,0,0,0,0,0x6f,0x70,0x71,0,0,0,0,0,0,0,0x31,
0,0,0,0x6a,0x6b,0,0,0x6c,0,0,0,0,0,0,0,0,
0x3d,0,0,0,0,0,0x46,0x6d,0,0x6e,0x6f,0,0,0x6f,0x70,0,
0,0,0,0,0,0x71,0x72,0x73,0,0,0,0,0,0,0,0x32,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -265,7 +265,7 @@ const uint8_t CollationFCD::tcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x72,0x73,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x74,0x75,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -282,20 +282,20 @@ const uint8_t CollationFCD::tcccIndex[2048]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x3e,0x74,0x75,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x3f,0x76,0x77,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xe,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
const uint32_t CollationFCD::tcccBits[118]={
const uint32_t CollationFCD::tcccBits[120]={
0,0xffffffff,0x3e7effbf,0xbe7effbf,0xfffcffff,0x7ef1ff3f,0xfff3f1f8,0x7fffff3f,0x18003,0xdfffe000,0xff31ffcf,0xcfffffff,0xfffc0,0xffff7fff,0xffff,0x1d760,
0x1fc00,0x187c00,0x200708b,0x2000000,0x708b0000,0xc00000,0xf8,0xfccf0006,0x33ffcfc,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0x7c,0xfffff800,0x10000,
0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0xff800,0xfbc00000,0x3eef,0xe000000,0xfff00000,0xfffffffb,0x10120200,0xff1e2000,0x10000000,0xb0002000,0x10480000,
0x4e002000,0x2000,0x30002000,0x602100,0x18000000,0x24000400,0x7000000,0xf00,0x3000000,0x2a00000,0x3d7e0000,0xdf,0x40,0x6800000,0xe0000000,0x100000,
0x20040000,0x200,0x1800000,0x9fe00001,0x3fff0000,0x10,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,0xfbffffff,0xbffffff,0x3ffffff,0x3f3fffff,0xaaff3f3f,
0x3fffffff,0x1fdfffff,0xefcfffde,0x1fdc7fff,0x1fff0000,0x1ffe2,0x800,0xc000000,0x4000,0xe000,0x1210,0x50,0x292,0x333e005,0x333,0xf000,
0x3c0f,0x38000,0x80000000,0xfc00,0x55555000,0x36db02a5,0x46100000,0x47900000,0x3ff08000,0xc0000000,0x30000,0x3ffff,0x3800,0x80000,1,0xc19d0000,
2,0x400000,0x40000f5,0x5108000,0x5f7ffc00,0x7fdb
0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0x200ff800,0xfbc00000,0x3eef,0xe000000,0xfff80000,0xfffffffb,0x10120200,0xff1e2000,0x10000000,0xb0002000,0x40000000,
0x10480000,0x4e002000,0x2000,0x30002000,0x602100,0x18000000,0x24000400,0x7000000,0xf00,0x3000000,0x2a00000,0x3d7e0000,0xdf,0x40,0x6800000,0xe0000000,
0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0x3fff0000,0x10,0xff800,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,0xfbffffff,0xbffffff,0x3ffffff,
0x3f3fffff,0xaaff3f3f,0x3fffffff,0x1fdfffff,0xefcfffde,0x1fdc7fff,0x1fff0000,0x1ffe2,0x800,0xc000000,0x4000,0xe000,0x1210,0x50,0x292,0x333e005,
0x333,0xf000,0x3c0f,0x38000,0x80000000,0xfc00,0x55555000,0x36db02a5,0x46100000,0x47900000,0x3ff08000,0xc0000000,0x30000,0x3ffff,0x3800,0x80000,
1,0xc19d0000,2,0x400000,0x40000fd,0x5108000,0x5f7ffc00,0x7fdb
};
U_NAMESPACE_END

File diff suppressed because it is too large Load Diff

View File

@ -203,6 +203,9 @@ CurrencyPluralInfo::setCurrencyPluralPattern(const UnicodeString& pluralCount,
const UnicodeString& pattern,
UErrorCode& status) {
if (U_SUCCESS(status)) {
UnicodeString* oldValue = static_cast<UnicodeString*>(
fPluralCountToCurrencyUnitPattern->get(pluralCount));
delete oldValue;
fPluralCountToCurrencyUnitPattern->put(pluralCount, new UnicodeString(pattern), status);
}
}

View File

@ -17,21 +17,32 @@
#include "unicode/currunit.h"
#include "unicode/ustring.h"
#include "cstring.h"
#include "uinvchar.h"
static constexpr char16_t kDefaultCurrency[] = u"XXX";
U_NAMESPACE_BEGIN
CurrencyUnit::CurrencyUnit(ConstChar16Ptr _isoCode, UErrorCode& ec) {
*isoCode = 0;
if (U_SUCCESS(ec)) {
if (_isoCode != nullptr && u_strlen(_isoCode)==3) {
u_strcpy(isoCode, _isoCode);
char simpleIsoCode[4];
u_UCharsToChars(isoCode, simpleIsoCode, 4);
initCurrency(simpleIsoCode);
} else {
ec = U_ILLEGAL_ARGUMENT_ERROR;
}
// The constructor always leaves the CurrencyUnit in a valid state (with a 3-character currency code).
// Note: in ICU4J Currency.getInstance(), we check string length for 3, but in ICU4C we allow a
// non-NUL-terminated string to be passed as an argument, so it is not possible to check length.
const char16_t* isoCodeToUse;
if (U_FAILURE(ec) || _isoCode == nullptr) {
isoCodeToUse = kDefaultCurrency;
} else if (!uprv_isInvariantUString(_isoCode, 3)) {
// TODO: Perform a more strict ASCII check like in ICU4J isAlpha3Code?
isoCodeToUse = kDefaultCurrency;
ec = U_INVARIANT_CONVERSION_ERROR;
} else {
isoCodeToUse = _isoCode;
}
// TODO: Perform uppercasing here like in ICU4J Currency.getInstance()?
uprv_memcpy(isoCode, isoCodeToUse, sizeof(UChar) * 3);
isoCode[3] = 0;
char simpleIsoCode[4];
u_UCharsToChars(isoCode, simpleIsoCode, 4);
initCurrency(simpleIsoCode);
}
CurrencyUnit::CurrencyUnit(const CurrencyUnit& other) : MeasureUnit(other) {
@ -52,7 +63,7 @@ CurrencyUnit::CurrencyUnit(const MeasureUnit& other, UErrorCode& ec) : MeasureUn
}
CurrencyUnit::CurrencyUnit() : MeasureUnit() {
u_strcpy(isoCode, u"XXX");
u_strcpy(isoCode, kDefaultCurrency);
char simpleIsoCode[4];
u_UCharsToChars(isoCode, simpleIsoCode, 4);
initCurrency(simpleIsoCode);

View File

@ -1,54 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
********************************************************************************
* Copyright (C) 2012-2014, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************/
#ifndef DCFMTIMP_H
#define DCFMTIMP_H
#include "unicode/utypes.h"
#if UCONFIG_FORMAT_FASTPATHS_49
U_NAMESPACE_BEGIN
enum EDecimalFormatFastpathStatus {
kFastpathNO = 0,
kFastpathYES = 1,
kFastpathUNKNOWN = 2, /* not yet set */
kFastpathMAYBE = 3 /* depends on value being formatted. */
};
/**
* Must be smaller than DecimalFormat::fReserved
*/
struct DecimalFormatInternal {
uint8_t fFastFormatStatus;
uint8_t fFastParseStatus;
DecimalFormatInternal &operator=(const DecimalFormatInternal& rhs) {
fFastParseStatus = rhs.fFastParseStatus;
fFastFormatStatus = rhs.fFastFormatStatus;
return *this;
}
#ifdef FMT_DEBUG
void dump() const {
printf("DecimalFormatInternal: fFastFormatStatus=%c, fFastParseStatus=%c\n",
"NY?"[(int)fFastFormatStatus&3],
"NY?"[(int)fFastParseStatus&3]
);
}
#endif
};
U_NAMESPACE_END
#endif
#endif

View File

@ -66,7 +66,7 @@ static const UChar INTL_CURRENCY_SYMBOL_STR[] = {0xa4, 0xa4, 0};
static const char *gNumberElementKeys[DecimalFormatSymbols::kFormatSymbolCount] = {
"decimal",
"group",
"list",
NULL, /* #11897: the <list> symbol is NOT the pattern separator symbol */
"percentSign",
NULL, /* Native zero digit is deprecated from CLDR - get it from the numbering system */
NULL, /* Pattern digit character is deprecated from CLDR - use # by default always */
@ -98,7 +98,7 @@ static const char *gNumberElementKeys[DecimalFormatSymbols::kFormatSymbolCount]
// Initializes this with the decimal format symbols in the default locale.
DecimalFormatSymbols::DecimalFormatSymbols(UErrorCode& status)
: UObject(), locale() {
: UObject(), locale(), currPattern(NULL) {
initialize(locale, status, TRUE);
}
@ -106,12 +106,12 @@ DecimalFormatSymbols::DecimalFormatSymbols(UErrorCode& status)
// Initializes this with the decimal format symbols in the desired locale.
DecimalFormatSymbols::DecimalFormatSymbols(const Locale& loc, UErrorCode& status)
: UObject(), locale(loc) {
: UObject(), locale(loc), currPattern(NULL) {
initialize(locale, status);
}
DecimalFormatSymbols::DecimalFormatSymbols(const Locale& loc, const NumberingSystem& ns, UErrorCode& status)
: UObject(), locale(loc) {
: UObject(), locale(loc), currPattern(NULL) {
initialize(locale, status, FALSE, &ns);
}
@ -349,7 +349,6 @@ DecimalFormatSymbols::initialize(const Locale& loc, UErrorCode& status,
{
if (U_FAILURE(status)) { return; }
*validLocale = *actualLocale = 0;
currPattern = NULL;
// First initialize all the symbols to the fallbacks for anything we can't find
initialize();
@ -477,6 +476,7 @@ DecimalFormatSymbols::initialize(const Locale& loc, UErrorCode& status,
UErrorCode localStatus = U_ZERO_ERROR;
uccLen = ucurr_forLocale(locName, ucc, uccLen, &localStatus);
// TODO: Currency pattern data loading is duplicated in number_formatimpl.cpp
if(U_SUCCESS(localStatus) && uccLen > 0) {
char cc[4]={0};
u_UCharsToChars(ucc, cc, uccLen);

View File

@ -1,251 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2009-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* This file contains the class DecimalFormatStaticSets
*
* DecimalFormatStaticSets holds the UnicodeSets that are needed for lenient
* parsing of decimal and group separators.
********************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/unistr.h"
#include "unicode/uniset.h"
#include "unicode/uchar.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
#include "ucln_in.h"
#include "umutex.h"
#include "decfmtst.h"
U_NAMESPACE_BEGIN
//------------------------------------------------------------------------------
//
// Unicode Set pattern strings for all of the required constant sets.
// Initialized with hex values for portability to EBCDIC based machines.
// Really ugly, but there's no good way to avoid it.
//
//------------------------------------------------------------------------------
static const UChar gDotEquivalentsPattern[] = {
// [ . \u2024 \u3002 \uFE12 \uFE52 \uFF0E \uFF61 ]
0x005B, 0x002E, 0x2024, 0x3002, 0xFE12, 0xFE52, 0xFF0E, 0xFF61, 0x005D, 0x0000};
static const UChar gCommaEquivalentsPattern[] = {
// [ , \u060C \u066B \u3001 \uFE10 \uFE11 \uFE50 \uFE51 \uFF0C \uFF64 ]
0x005B, 0x002C, 0x060C, 0x066B, 0x3001, 0xFE10, 0xFE11, 0xFE50, 0xFE51, 0xFF0C, 0xFF64, 0x005D, 0x0000};
static const UChar gOtherGroupingSeparatorsPattern[] = {
// [ \ SPACE ' NBSP \u066C \u2000 - \u200A \u2018 \u2019 \u202F \u205F \u3000 \uFF07 ]
0x005B, 0x005C, 0x0020, 0x0027, 0x00A0, 0x066C, 0x2000, 0x002D, 0x200A, 0x2018, 0x2019, 0x202F, 0x205F, 0x3000, 0xFF07, 0x005D, 0x0000};
static const UChar gDashEquivalentsPattern[] = {
// [ \ - HYPHEN F_DASH N_DASH MINUS ]
0x005B, 0x005C, 0x002D, 0x2010, 0x2012, 0x2013, 0x2212, 0x005D, 0x0000};
static const UChar gStrictDotEquivalentsPattern[] = {
// [ . \u2024 \uFE52 \uFF0E \uFF61 ]
0x005B, 0x002E, 0x2024, 0xFE52, 0xFF0E, 0xFF61, 0x005D, 0x0000};
static const UChar gStrictCommaEquivalentsPattern[] = {
// [ , \u066B \uFE10 \uFE50 \uFF0C ]
0x005B, 0x002C, 0x066B, 0xFE10, 0xFE50, 0xFF0C, 0x005D, 0x0000};
static const UChar gStrictOtherGroupingSeparatorsPattern[] = {
// [ \ SPACE ' NBSP \u066C \u2000 - \u200A \u2018 \u2019 \u202F \u205F \u3000 \uFF07 ]
0x005B, 0x005C, 0x0020, 0x0027, 0x00A0, 0x066C, 0x2000, 0x002D, 0x200A, 0x2018, 0x2019, 0x202F, 0x205F, 0x3000, 0xFF07, 0x005D, 0x0000};
static const UChar gStrictDashEquivalentsPattern[] = {
// [ \ - MINUS ]
0x005B, 0x005C, 0x002D, 0x2212, 0x005D, 0x0000};
static const UChar32 gMinusSigns[] = {
0x002D,
0x207B,
0x208B,
0x2212,
0x2796,
0xFE63,
0xFF0D};
static const UChar32 gPlusSigns[] = {
0x002B,
0x207A,
0x208A,
0x2795,
0xfB29,
0xFE62,
0xFF0B};
static void initUnicodeSet(const UChar32 *raw, int32_t len, UnicodeSet *s) {
for (int32_t i = 0; i < len; ++i) {
s->add(raw[i]);
}
}
DecimalFormatStaticSets::DecimalFormatStaticSets(UErrorCode &status)
: fDotEquivalents(NULL),
fCommaEquivalents(NULL),
fOtherGroupingSeparators(NULL),
fDashEquivalents(NULL),
fStrictDotEquivalents(NULL),
fStrictCommaEquivalents(NULL),
fStrictOtherGroupingSeparators(NULL),
fStrictDashEquivalents(NULL),
fDefaultGroupingSeparators(NULL),
fStrictDefaultGroupingSeparators(NULL),
fMinusSigns(NULL),
fPlusSigns(NULL)
{
fDotEquivalents = new UnicodeSet(UnicodeString(TRUE, gDotEquivalentsPattern, -1), status);
fCommaEquivalents = new UnicodeSet(UnicodeString(TRUE, gCommaEquivalentsPattern, -1), status);
fOtherGroupingSeparators = new UnicodeSet(UnicodeString(TRUE, gOtherGroupingSeparatorsPattern, -1), status);
fDashEquivalents = new UnicodeSet(UnicodeString(TRUE, gDashEquivalentsPattern, -1), status);
fStrictDotEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictDotEquivalentsPattern, -1), status);
fStrictCommaEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictCommaEquivalentsPattern, -1), status);
fStrictOtherGroupingSeparators = new UnicodeSet(UnicodeString(TRUE, gStrictOtherGroupingSeparatorsPattern, -1), status);
fStrictDashEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictDashEquivalentsPattern, -1), status);
fDefaultGroupingSeparators = new UnicodeSet(*fDotEquivalents);
fDefaultGroupingSeparators->addAll(*fCommaEquivalents);
fDefaultGroupingSeparators->addAll(*fOtherGroupingSeparators);
fStrictDefaultGroupingSeparators = new UnicodeSet(*fStrictDotEquivalents);
fStrictDefaultGroupingSeparators->addAll(*fStrictCommaEquivalents);
fStrictDefaultGroupingSeparators->addAll(*fStrictOtherGroupingSeparators);
fMinusSigns = new UnicodeSet();
fPlusSigns = new UnicodeSet();
// Check for null pointers
if (fDotEquivalents == NULL || fCommaEquivalents == NULL || fOtherGroupingSeparators == NULL || fDashEquivalents == NULL ||
fStrictDotEquivalents == NULL || fStrictCommaEquivalents == NULL || fStrictOtherGroupingSeparators == NULL || fStrictDashEquivalents == NULL ||
fDefaultGroupingSeparators == NULL || fStrictOtherGroupingSeparators == NULL ||
fMinusSigns == NULL || fPlusSigns == NULL) {
cleanup();
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
initUnicodeSet(
gMinusSigns,
UPRV_LENGTHOF(gMinusSigns),
fMinusSigns);
initUnicodeSet(
gPlusSigns,
UPRV_LENGTHOF(gPlusSigns),
fPlusSigns);
// Freeze all the sets
fDotEquivalents->freeze();
fCommaEquivalents->freeze();
fOtherGroupingSeparators->freeze();
fDashEquivalents->freeze();
fStrictDotEquivalents->freeze();
fStrictCommaEquivalents->freeze();
fStrictOtherGroupingSeparators->freeze();
fStrictDashEquivalents->freeze();
fDefaultGroupingSeparators->freeze();
fStrictDefaultGroupingSeparators->freeze();
fMinusSigns->freeze();
fPlusSigns->freeze();
}
DecimalFormatStaticSets::~DecimalFormatStaticSets() {
cleanup();
}
void DecimalFormatStaticSets::cleanup() { // Be sure to clean up newly added fields!
delete fDotEquivalents; fDotEquivalents = NULL;
delete fCommaEquivalents; fCommaEquivalents = NULL;
delete fOtherGroupingSeparators; fOtherGroupingSeparators = NULL;
delete fDashEquivalents; fDashEquivalents = NULL;
delete fStrictDotEquivalents; fStrictDotEquivalents = NULL;
delete fStrictCommaEquivalents; fStrictCommaEquivalents = NULL;
delete fStrictOtherGroupingSeparators; fStrictOtherGroupingSeparators = NULL;
delete fStrictDashEquivalents; fStrictDashEquivalents = NULL;
delete fDefaultGroupingSeparators; fDefaultGroupingSeparators = NULL;
delete fStrictDefaultGroupingSeparators; fStrictDefaultGroupingSeparators = NULL;
delete fStrictOtherGroupingSeparators; fStrictOtherGroupingSeparators = NULL;
delete fMinusSigns; fMinusSigns = NULL;
delete fPlusSigns; fPlusSigns = NULL;
}
static DecimalFormatStaticSets *gStaticSets;
static icu::UInitOnce gStaticSetsInitOnce = U_INITONCE_INITIALIZER;
//------------------------------------------------------------------------------
//
// decfmt_cleanup Memory cleanup function, free/delete all
// cached memory. Called by ICU's u_cleanup() function.
//
//------------------------------------------------------------------------------
U_CDECL_BEGIN
static UBool U_CALLCONV
decimfmt_cleanup(void)
{
delete gStaticSets;
gStaticSets = NULL;
gStaticSetsInitOnce.reset();
return TRUE;
}
static void U_CALLCONV initSets(UErrorCode &status) {
U_ASSERT(gStaticSets == NULL);
ucln_i18n_registerCleanup(UCLN_I18N_DECFMT, decimfmt_cleanup);
gStaticSets = new DecimalFormatStaticSets(status);
if (U_FAILURE(status)) {
delete gStaticSets;
gStaticSets = NULL;
return;
}
if (gStaticSets == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
U_CDECL_END
const DecimalFormatStaticSets *DecimalFormatStaticSets::getStaticSets(UErrorCode &status) {
umtx_initOnce(gStaticSetsInitOnce, initSets, status);
return gStaticSets;
}
const UnicodeSet *DecimalFormatStaticSets::getSimilarDecimals(UChar32 decimal, UBool strictParse)
{
UErrorCode status = U_ZERO_ERROR;
umtx_initOnce(gStaticSetsInitOnce, initSets, status);
if (U_FAILURE(status)) {
return NULL;
}
if (gStaticSets->fDotEquivalents->contains(decimal)) {
return strictParse ? gStaticSets->fStrictDotEquivalents : gStaticSets->fDotEquivalents;
}
if (gStaticSets->fCommaEquivalents->contains(decimal)) {
return strictParse ? gStaticSets->fStrictCommaEquivalents : gStaticSets->fCommaEquivalents;
}
// if there is no match, return NULL
return NULL;
}
U_NAMESPACE_END
#endif // !UCONFIG_NO_FORMATTING

View File

@ -1,69 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2009-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* This file contains declarations for the class DecimalFormatStaticSets
*
* DecimalFormatStaticSets holds the UnicodeSets that are needed for lenient
* parsing of decimal and group separators.
********************************************************************************
*/
#ifndef DECFMTST_H
#define DECFMTST_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/uobject.h"
U_NAMESPACE_BEGIN
class UnicodeSet;
class DecimalFormatStaticSets : public UMemory
{
public:
// Constructor and Destructor not for general use.
// Public to permit access from plain C implementation functions.
DecimalFormatStaticSets(UErrorCode &status);
~DecimalFormatStaticSets();
/**
* Return a pointer to a lazy-initialized singleton instance of this class.
*/
static const DecimalFormatStaticSets *getStaticSets(UErrorCode &status);
static const UnicodeSet *getSimilarDecimals(UChar32 decimal, UBool strictParse);
UnicodeSet *fDotEquivalents;
UnicodeSet *fCommaEquivalents;
UnicodeSet *fOtherGroupingSeparators;
UnicodeSet *fDashEquivalents;
UnicodeSet *fStrictDotEquivalents;
UnicodeSet *fStrictCommaEquivalents;
UnicodeSet *fStrictOtherGroupingSeparators;
UnicodeSet *fStrictDashEquivalents;
UnicodeSet *fDefaultGroupingSeparators;
UnicodeSet *fStrictDefaultGroupingSeparators;
UnicodeSet *fMinusSigns;
UnicodeSet *fPlusSigns;
private:
void cleanup();
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_FORMATTING
#endif // DECFMTST_H

View File

@ -1,656 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 1997-2015, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
#include "uassert.h"
#include "decimalformatpattern.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/dcfmtsym.h"
#include "unicode/format.h"
#include "unicode/utf16.h"
#include "decimalformatpatternimpl.h"
#ifdef FMT_DEBUG
#define debug(x) printf("%s:%d: %s\n", __FILE__,__LINE__, x);
#else
#define debug(x)
#endif
U_NAMESPACE_BEGIN
// TODO: Travis Keep: Copied from numfmt.cpp
static int32_t kDoubleIntegerDigits = 309;
static int32_t kDoubleFractionDigits = 340;
// TODO: Travis Keep: Copied from numfmt.cpp
static int32_t gDefaultMaxIntegerDigits = 2000000000;
// TODO: Travis Keep: This function was copied from format.cpp
static void syntaxError(const UnicodeString& pattern,
int32_t pos,
UParseError& parseError) {
parseError.offset = pos;
parseError.line=0; // we are not using line number
// for pre-context
int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1
/* subtract 1 so that we have room for null*/));
int32_t stop = pos;
pattern.extract(start,stop-start,parseError.preContext,0);
//null terminate the buffer
parseError.preContext[stop-start] = 0;
//for post-context
start = pattern.moveIndex32(pos, 1);
stop = pos + U_PARSE_CONTEXT_LEN - 1;
if (stop > pattern.length()) {
stop = pattern.length();
}
pattern.extract(start, stop - start, parseError.postContext, 0);
//null terminate the buffer
parseError.postContext[stop-start]= 0;
}
DecimalFormatPattern::DecimalFormatPattern()
: fMinimumIntegerDigits(1),
fMaximumIntegerDigits(gDefaultMaxIntegerDigits),
fMinimumFractionDigits(0),
fMaximumFractionDigits(3),
fUseSignificantDigits(FALSE),
fMinimumSignificantDigits(1),
fMaximumSignificantDigits(6),
fUseExponentialNotation(FALSE),
fMinExponentDigits(0),
fExponentSignAlwaysShown(FALSE),
fCurrencySignCount(fgCurrencySignCountZero),
fGroupingUsed(TRUE),
fGroupingSize(0),
fGroupingSize2(0),
fMultiplier(1),
fDecimalSeparatorAlwaysShown(FALSE),
fFormatWidth(0),
fRoundingIncrementUsed(FALSE),
fRoundingIncrement(),
fPad(kDefaultPad),
fNegPatternsBogus(TRUE),
fPosPatternsBogus(TRUE),
fNegPrefixPattern(),
fNegSuffixPattern(),
fPosPrefixPattern(),
fPosSuffixPattern(),
fPadPosition(DecimalFormatPattern::kPadBeforePrefix) {
}
DecimalFormatPatternParser::DecimalFormatPatternParser() :
fZeroDigit(kPatternZeroDigit),
fSigDigit(kPatternSignificantDigit),
fGroupingSeparator((UChar)kPatternGroupingSeparator),
fDecimalSeparator((UChar)kPatternDecimalSeparator),
fPercent((UChar)kPatternPercent),
fPerMill((UChar)kPatternPerMill),
fDigit((UChar)kPatternDigit),
fSeparator((UChar)kPatternSeparator),
fExponent((UChar)kPatternExponent),
fPlus((UChar)kPatternPlus),
fMinus((UChar)kPatternMinus),
fPadEscape((UChar)kPatternPadEscape) {
}
void DecimalFormatPatternParser::useSymbols(
const DecimalFormatSymbols& symbols) {
fZeroDigit = symbols.getConstSymbol(
DecimalFormatSymbols::kZeroDigitSymbol).char32At(0);
fSigDigit = symbols.getConstSymbol(
DecimalFormatSymbols::kSignificantDigitSymbol).char32At(0);
fGroupingSeparator = symbols.getConstSymbol(
DecimalFormatSymbols::kGroupingSeparatorSymbol);
fDecimalSeparator = symbols.getConstSymbol(
DecimalFormatSymbols::kDecimalSeparatorSymbol);
fPercent = symbols.getConstSymbol(
DecimalFormatSymbols::kPercentSymbol);
fPerMill = symbols.getConstSymbol(
DecimalFormatSymbols::kPerMillSymbol);
fDigit = symbols.getConstSymbol(
DecimalFormatSymbols::kDigitSymbol);
fSeparator = symbols.getConstSymbol(
DecimalFormatSymbols::kPatternSeparatorSymbol);
fExponent = symbols.getConstSymbol(
DecimalFormatSymbols::kExponentialSymbol);
fPlus = symbols.getConstSymbol(
DecimalFormatSymbols::kPlusSignSymbol);
fMinus = symbols.getConstSymbol(
DecimalFormatSymbols::kMinusSignSymbol);
fPadEscape = symbols.getConstSymbol(
DecimalFormatSymbols::kPadEscapeSymbol);
}
void
DecimalFormatPatternParser::applyPatternWithoutExpandAffix(
const UnicodeString& pattern,
DecimalFormatPattern& out,
UParseError& parseError,
UErrorCode& status) {
if (U_FAILURE(status))
{
return;
}
out = DecimalFormatPattern();
// Clear error struct
parseError.offset = -1;
parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
// TODO: Travis Keep: This won't always work.
UChar nineDigit = (UChar)(fZeroDigit + 9);
int32_t digitLen = fDigit.length();
int32_t groupSepLen = fGroupingSeparator.length();
int32_t decimalSepLen = fDecimalSeparator.length();
int32_t pos = 0;
int32_t patLen = pattern.length();
// Part 0 is the positive pattern. Part 1, if present, is the negative
// pattern.
for (int32_t part=0; part<2 && pos<patLen; ++part) {
// The subpart ranges from 0 to 4: 0=pattern proper, 1=prefix,
// 2=suffix, 3=prefix in quote, 4=suffix in quote. Subpart 0 is
// between the prefix and suffix, and consists of pattern
// characters. In the prefix and suffix, percent, perMill, and
// currency symbols are recognized and translated.
int32_t subpart = 1, sub0Start = 0, sub0Limit = 0, sub2Limit = 0;
// It's important that we don't change any fields of this object
// prematurely. We set the following variables for the multiplier,
// grouping, etc., and then only change the actual object fields if
// everything parses correctly. This also lets us register
// the data from part 0 and ignore the part 1, except for the
// prefix and suffix.
UnicodeString prefix;
UnicodeString suffix;
int32_t decimalPos = -1;
int32_t multiplier = 1;
int32_t digitLeftCount = 0, zeroDigitCount = 0, digitRightCount = 0, sigDigitCount = 0;
int8_t groupingCount = -1;
int8_t groupingCount2 = -1;
int32_t padPos = -1;
UChar32 padChar = 0;
int32_t roundingPos = -1;
DigitList roundingInc;
int8_t expDigits = -1;
UBool expSignAlways = FALSE;
// The affix is either the prefix or the suffix.
UnicodeString* affix = &prefix;
int32_t start = pos;
UBool isPartDone = FALSE;
UChar32 ch;
for (; !isPartDone && pos < patLen; ) {
// Todo: account for surrogate pairs
ch = pattern.char32At(pos);
switch (subpart) {
case 0: // Pattern proper subpart (between prefix & suffix)
// Process the digits, decimal, and grouping characters. We
// record five pieces of information. We expect the digits
// to occur in the pattern ####00.00####, and we record the
// number of left digits, zero (central) digits, and right
// digits. The position of the last grouping character is
// recorded (should be somewhere within the first two blocks
// of characters), as is the position of the decimal point,
// if any (should be in the zero digits). If there is no
// decimal point, then there should be no right digits.
if (pattern.compare(pos, digitLen, fDigit) == 0) {
if (zeroDigitCount > 0 || sigDigitCount > 0) {
++digitRightCount;
} else {
++digitLeftCount;
}
if (groupingCount >= 0 && decimalPos < 0) {
++groupingCount;
}
pos += digitLen;
} else if ((ch >= fZeroDigit && ch <= nineDigit) ||
ch == fSigDigit) {
if (digitRightCount > 0) {
// Unexpected '0'
debug("Unexpected '0'")
status = U_UNEXPECTED_TOKEN;
syntaxError(pattern,pos,parseError);
return;
}
if (ch == fSigDigit) {
++sigDigitCount;
} else {
if (ch != fZeroDigit && roundingPos < 0) {
roundingPos = digitLeftCount + zeroDigitCount;
}
if (roundingPos >= 0) {
roundingInc.append((char)(ch - fZeroDigit + '0'));
}
++zeroDigitCount;
}
if (groupingCount >= 0 && decimalPos < 0) {
++groupingCount;
}
pos += U16_LENGTH(ch);
} else if (pattern.compare(pos, groupSepLen, fGroupingSeparator) == 0) {
if (decimalPos >= 0) {
// Grouping separator after decimal
debug("Grouping separator after decimal")
status = U_UNEXPECTED_TOKEN;
syntaxError(pattern,pos,parseError);
return;
}
groupingCount2 = groupingCount;
groupingCount = 0;
pos += groupSepLen;
} else if (pattern.compare(pos, decimalSepLen, fDecimalSeparator) == 0) {
if (decimalPos >= 0) {
// Multiple decimal separators
debug("Multiple decimal separators")
status = U_MULTIPLE_DECIMAL_SEPARATORS;
syntaxError(pattern,pos,parseError);
return;
}
// Intentionally incorporate the digitRightCount,
// even though it is illegal for this to be > 0
// at this point. We check pattern syntax below.
decimalPos = digitLeftCount + zeroDigitCount + digitRightCount;
pos += decimalSepLen;
} else {
if (pattern.compare(pos, fExponent.length(), fExponent) == 0) {
if (expDigits >= 0) {
// Multiple exponential symbols
debug("Multiple exponential symbols")
status = U_MULTIPLE_EXPONENTIAL_SYMBOLS;
syntaxError(pattern,pos,parseError);
return;
}
if (groupingCount >= 0) {
// Grouping separator in exponential pattern
debug("Grouping separator in exponential pattern")
status = U_MALFORMED_EXPONENTIAL_PATTERN;
syntaxError(pattern,pos,parseError);
return;
}
pos += fExponent.length();
// Check for positive prefix
if (pos < patLen
&& pattern.compare(pos, fPlus.length(), fPlus) == 0) {
expSignAlways = TRUE;
pos += fPlus.length();
}
// Use lookahead to parse out the exponential part of the
// pattern, then jump into suffix subpart.
expDigits = 0;
while (pos < patLen &&
pattern.char32At(pos) == fZeroDigit) {
++expDigits;
pos += U16_LENGTH(fZeroDigit);
}
// 1. Require at least one mantissa pattern digit
// 2. Disallow "#+ @" in mantissa
// 3. Require at least one exponent pattern digit
if (((digitLeftCount + zeroDigitCount) < 1 &&
(sigDigitCount + digitRightCount) < 1) ||
(sigDigitCount > 0 && digitLeftCount > 0) ||
expDigits < 1) {
// Malformed exponential pattern
debug("Malformed exponential pattern")
status = U_MALFORMED_EXPONENTIAL_PATTERN;
syntaxError(pattern,pos,parseError);
return;
}
}
// Transition to suffix subpart
subpart = 2; // suffix subpart
affix = &suffix;
sub0Limit = pos;
continue;
}
break;
case 1: // Prefix subpart
case 2: // Suffix subpart
// Process the prefix / suffix characters
// Process unquoted characters seen in prefix or suffix
// subpart.
// Several syntax characters implicitly begins the
// next subpart if we are in the prefix; otherwise
// they are illegal if unquoted.
if (!pattern.compare(pos, digitLen, fDigit) ||
!pattern.compare(pos, groupSepLen, fGroupingSeparator) ||
!pattern.compare(pos, decimalSepLen, fDecimalSeparator) ||
(ch >= fZeroDigit && ch <= nineDigit) ||
ch == fSigDigit) {
if (subpart == 1) { // prefix subpart
subpart = 0; // pattern proper subpart
sub0Start = pos; // Reprocess this character
continue;
} else {
status = U_UNQUOTED_SPECIAL;
syntaxError(pattern,pos,parseError);
return;
}
} else if (ch == kCurrencySign) {
affix->append(kQuote); // Encode currency
// Use lookahead to determine if the currency sign is
// doubled or not.
U_ASSERT(U16_LENGTH(kCurrencySign) == 1);
if ((pos+1) < pattern.length() && pattern[pos+1] == kCurrencySign) {
affix->append(kCurrencySign);
++pos; // Skip over the doubled character
if ((pos+1) < pattern.length() &&
pattern[pos+1] == kCurrencySign) {
affix->append(kCurrencySign);
++pos; // Skip over the doubled character
out.fCurrencySignCount = fgCurrencySignCountInPluralFormat;
} else {
out.fCurrencySignCount = fgCurrencySignCountInISOFormat;
}
} else {
out.fCurrencySignCount = fgCurrencySignCountInSymbolFormat;
}
// Fall through to append(ch)
} else if (ch == kQuote) {
// A quote outside quotes indicates either the opening
// quote or two quotes, which is a quote literal. That is,
// we have the first quote in 'do' or o''clock.
U_ASSERT(U16_LENGTH(kQuote) == 1);
++pos;
if (pos < pattern.length() && pattern[pos] == kQuote) {
affix->append(kQuote); // Encode quote
// Fall through to append(ch)
} else {
subpart += 2; // open quote
continue;
}
} else if (pattern.compare(pos, fSeparator.length(), fSeparator) == 0) {
// Don't allow separators in the prefix, and don't allow
// separators in the second pattern (part == 1).
if (subpart == 1 || part == 1) {
// Unexpected separator
debug("Unexpected separator")
status = U_UNEXPECTED_TOKEN;
syntaxError(pattern,pos,parseError);
return;
}
sub2Limit = pos;
isPartDone = TRUE; // Go to next part
pos += fSeparator.length();
break;
} else if (pattern.compare(pos, fPercent.length(), fPercent) == 0) {
// Next handle characters which are appended directly.
if (multiplier != 1) {
// Too many percent/perMill characters
debug("Too many percent characters")
status = U_MULTIPLE_PERCENT_SYMBOLS;
syntaxError(pattern,pos,parseError);
return;
}
affix->append(kQuote); // Encode percent/perMill
affix->append(kPatternPercent); // Use unlocalized pattern char
multiplier = 100;
pos += fPercent.length();
break;
} else if (pattern.compare(pos, fPerMill.length(), fPerMill) == 0) {
// Next handle characters which are appended directly.
if (multiplier != 1) {
// Too many percent/perMill characters
debug("Too many perMill characters")
status = U_MULTIPLE_PERMILL_SYMBOLS;
syntaxError(pattern,pos,parseError);
return;
}
affix->append(kQuote); // Encode percent/perMill
affix->append(kPatternPerMill); // Use unlocalized pattern char
multiplier = 1000;
pos += fPerMill.length();
break;
} else if (pattern.compare(pos, fPadEscape.length(), fPadEscape) == 0) {
if (padPos >= 0 || // Multiple pad specifiers
(pos+1) == pattern.length()) { // Nothing after padEscape
debug("Multiple pad specifiers")
status = U_MULTIPLE_PAD_SPECIFIERS;
syntaxError(pattern,pos,parseError);
return;
}
padPos = pos;
pos += fPadEscape.length();
padChar = pattern.char32At(pos);
pos += U16_LENGTH(padChar);
break;
} else if (pattern.compare(pos, fMinus.length(), fMinus) == 0) {
affix->append(kQuote); // Encode minus
affix->append(kPatternMinus);
pos += fMinus.length();
break;
} else if (pattern.compare(pos, fPlus.length(), fPlus) == 0) {
affix->append(kQuote); // Encode plus
affix->append(kPatternPlus);
pos += fPlus.length();
break;
}
// Unquoted, non-special characters fall through to here, as
// well as other code which needs to append something to the
// affix.
affix->append(ch);
pos += U16_LENGTH(ch);
break;
case 3: // Prefix subpart, in quote
case 4: // Suffix subpart, in quote
// A quote within quotes indicates either the closing
// quote or two quotes, which is a quote literal. That is,
// we have the second quote in 'do' or 'don''t'.
if (ch == kQuote) {
++pos;
if (pos < pattern.length() && pattern[pos] == kQuote) {
affix->append(kQuote); // Encode quote
// Fall through to append(ch)
} else {
subpart -= 2; // close quote
continue;
}
}
affix->append(ch);
pos += U16_LENGTH(ch);
break;
}
}
if (sub0Limit == 0) {
sub0Limit = pattern.length();
}
if (sub2Limit == 0) {
sub2Limit = pattern.length();
}
/* Handle patterns with no '0' pattern character. These patterns
* are legal, but must be recodified to make sense. "##.###" ->
* "#0.###". ".###" -> ".0##".
*
* We allow patterns of the form "####" to produce a zeroDigitCount
* of zero (got that?); although this seems like it might make it
* possible for format() to produce empty strings, format() checks
* for this condition and outputs a zero digit in this situation.
* Having a zeroDigitCount of zero yields a minimum integer digits
* of zero, which allows proper round-trip patterns. We don't want
* "#" to become "#0" when toPattern() is called (even though that's
* what it really is, semantically).
*/
if (zeroDigitCount == 0 && sigDigitCount == 0 &&
digitLeftCount > 0 && decimalPos >= 0) {
// Handle "###.###" and "###." and ".###"
int n = decimalPos;
if (n == 0)
++n; // Handle ".###"
digitRightCount = digitLeftCount - n;
digitLeftCount = n - 1;
zeroDigitCount = 1;
}
// Do syntax checking on the digits, decimal points, and quotes.
if ((decimalPos < 0 && digitRightCount > 0 && sigDigitCount == 0) ||
(decimalPos >= 0 &&
(sigDigitCount > 0 ||
decimalPos < digitLeftCount ||
decimalPos > (digitLeftCount + zeroDigitCount))) ||
groupingCount == 0 || groupingCount2 == 0 ||
(sigDigitCount > 0 && zeroDigitCount > 0) ||
subpart > 2)
{ // subpart > 2 == unmatched quote
debug("Syntax error")
status = U_PATTERN_SYNTAX_ERROR;
syntaxError(pattern,pos,parseError);
return;
}
// Make sure pad is at legal position before or after affix.
if (padPos >= 0) {
if (padPos == start) {
padPos = DecimalFormatPattern::kPadBeforePrefix;
} else if (padPos+2 == sub0Start) {
padPos = DecimalFormatPattern::kPadAfterPrefix;
} else if (padPos == sub0Limit) {
padPos = DecimalFormatPattern::kPadBeforeSuffix;
} else if (padPos+2 == sub2Limit) {
padPos = DecimalFormatPattern::kPadAfterSuffix;
} else {
// Illegal pad position
debug("Illegal pad position")
status = U_ILLEGAL_PAD_POSITION;
syntaxError(pattern,pos,parseError);
return;
}
}
if (part == 0) {
out.fPosPatternsBogus = FALSE;
out.fPosPrefixPattern = prefix;
out.fPosSuffixPattern = suffix;
out.fNegPatternsBogus = TRUE;
out.fNegPrefixPattern.remove();
out.fNegSuffixPattern.remove();
out.fUseExponentialNotation = (expDigits >= 0);
if (out.fUseExponentialNotation) {
out.fMinExponentDigits = expDigits;
}
out.fExponentSignAlwaysShown = expSignAlways;
int32_t digitTotalCount = digitLeftCount + zeroDigitCount + digitRightCount;
// The effectiveDecimalPos is the position the decimal is at or
// would be at if there is no decimal. Note that if
// decimalPos<0, then digitTotalCount == digitLeftCount +
// zeroDigitCount.
int32_t effectiveDecimalPos = decimalPos >= 0 ? decimalPos : digitTotalCount;
UBool isSigDig = (sigDigitCount > 0);
out.fUseSignificantDigits = isSigDig;
if (isSigDig) {
out.fMinimumSignificantDigits = sigDigitCount;
out.fMaximumSignificantDigits = sigDigitCount + digitRightCount;
} else {
int32_t minInt = effectiveDecimalPos - digitLeftCount;
out.fMinimumIntegerDigits = minInt;
out.fMaximumIntegerDigits = out.fUseExponentialNotation
? digitLeftCount + out.fMinimumIntegerDigits
: gDefaultMaxIntegerDigits;
out.fMaximumFractionDigits = decimalPos >= 0
? (digitTotalCount - decimalPos) : 0;
out.fMinimumFractionDigits = decimalPos >= 0
? (digitLeftCount + zeroDigitCount - decimalPos) : 0;
}
out.fGroupingUsed = groupingCount > 0;
out.fGroupingSize = (groupingCount > 0) ? groupingCount : 0;
out.fGroupingSize2 = (groupingCount2 > 0 && groupingCount2 != groupingCount)
? groupingCount2 : 0;
out.fMultiplier = multiplier;
out.fDecimalSeparatorAlwaysShown = decimalPos == 0
|| decimalPos == digitTotalCount;
if (padPos >= 0) {
out.fPadPosition = (DecimalFormatPattern::EPadPosition) padPos;
// To compute the format width, first set up sub0Limit -
// sub0Start. Add in prefix/suffix length later.
// fFormatWidth = prefix.length() + suffix.length() +
// sub0Limit - sub0Start;
out.fFormatWidth = sub0Limit - sub0Start;
out.fPad = padChar;
} else {
out.fFormatWidth = 0;
}
if (roundingPos >= 0) {
out.fRoundingIncrementUsed = TRUE;
roundingInc.setDecimalAt(effectiveDecimalPos - roundingPos);
out.fRoundingIncrement = roundingInc;
} else {
out.fRoundingIncrementUsed = FALSE;
}
} else {
out.fNegPatternsBogus = FALSE;
out.fNegPrefixPattern = prefix;
out.fNegSuffixPattern = suffix;
}
}
if (pattern.length() == 0) {
out.fNegPatternsBogus = TRUE;
out.fNegPrefixPattern.remove();
out.fNegSuffixPattern.remove();
out.fPosPatternsBogus = FALSE;
out.fPosPrefixPattern.remove();
out.fPosSuffixPattern.remove();
out.fMinimumIntegerDigits = 0;
out.fMaximumIntegerDigits = kDoubleIntegerDigits;
out.fMinimumFractionDigits = 0;
out.fMaximumFractionDigits = kDoubleFractionDigits;
out.fUseExponentialNotation = FALSE;
out.fCurrencySignCount = fgCurrencySignCountZero;
out.fGroupingUsed = FALSE;
out.fGroupingSize = 0;
out.fGroupingSize2 = 0;
out.fMultiplier = 1;
out.fDecimalSeparatorAlwaysShown = FALSE;
out.fFormatWidth = 0;
out.fRoundingIncrementUsed = FALSE;
}
// If there was no negative pattern, or if the negative pattern is
// identical to the positive pattern, then prepend the minus sign to the
// positive pattern to form the negative pattern.
if (out.fNegPatternsBogus ||
(out.fNegPrefixPattern == out.fPosPrefixPattern
&& out.fNegSuffixPattern == out.fPosSuffixPattern)) {
out.fNegPatternsBogus = FALSE;
out.fNegSuffixPattern = out.fPosSuffixPattern;
out.fNegPrefixPattern.remove();
out.fNegPrefixPattern.append(kQuote).append(kPatternMinus)
.append(out.fPosPrefixPattern);
}
// TODO: Deprecate/Remove out.fNegSuffixPattern and 3 other fields.
AffixPattern::parseAffixString(
out.fNegSuffixPattern, out.fNegSuffixAffix, status);
AffixPattern::parseAffixString(
out.fPosSuffixPattern, out.fPosSuffixAffix, status);
AffixPattern::parseAffixString(
out.fNegPrefixPattern, out.fNegPrefixAffix, status);
AffixPattern::parseAffixString(
out.fPosPrefixPattern, out.fPosPrefixAffix, status);
}
U_NAMESPACE_END
#endif /* !UCONFIG_NO_FORMATTING */

View File

@ -1,106 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 1997-2015, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
#ifndef _DECIMAL_FORMAT_PATTERN
#define _DECIMAL_FORMAT_PATTERN
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "digitlst.h"
#include "affixpatternparser.h"
U_NAMESPACE_BEGIN
// currency sign count
enum CurrencySignCount {
fgCurrencySignCountZero,
fgCurrencySignCountInSymbolFormat,
fgCurrencySignCountInISOFormat,
fgCurrencySignCountInPluralFormat
};
class DecimalFormatSymbols;
struct DecimalFormatPattern : public UMemory {
enum EPadPosition {
kPadBeforePrefix,
kPadAfterPrefix,
kPadBeforeSuffix,
kPadAfterSuffix
};
DecimalFormatPattern();
int32_t fMinimumIntegerDigits;
int32_t fMaximumIntegerDigits;
int32_t fMinimumFractionDigits;
int32_t fMaximumFractionDigits;
UBool fUseSignificantDigits;
int32_t fMinimumSignificantDigits;
int32_t fMaximumSignificantDigits;
UBool fUseExponentialNotation;
int32_t fMinExponentDigits;
UBool fExponentSignAlwaysShown;
int32_t fCurrencySignCount;
UBool fGroupingUsed;
int32_t fGroupingSize;
int32_t fGroupingSize2;
int32_t fMultiplier;
UBool fDecimalSeparatorAlwaysShown;
int32_t fFormatWidth;
UBool fRoundingIncrementUsed;
DigitList fRoundingIncrement;
UChar32 fPad;
UBool fNegPatternsBogus;
UBool fPosPatternsBogus;
UnicodeString fNegPrefixPattern;
UnicodeString fNegSuffixPattern;
UnicodeString fPosPrefixPattern;
UnicodeString fPosSuffixPattern;
AffixPattern fNegPrefixAffix;
AffixPattern fNegSuffixAffix;
AffixPattern fPosPrefixAffix;
AffixPattern fPosSuffixAffix;
EPadPosition fPadPosition;
};
class DecimalFormatPatternParser : public UMemory {
public:
DecimalFormatPatternParser();
void useSymbols(const DecimalFormatSymbols& symbols);
void applyPatternWithoutExpandAffix(
const UnicodeString& pattern,
DecimalFormatPattern& out,
UParseError& parseError,
UErrorCode& status);
private:
DecimalFormatPatternParser(const DecimalFormatPatternParser&);
DecimalFormatPatternParser& operator=(DecimalFormatPatternParser& rhs);
UChar32 fZeroDigit;
UChar32 fSigDigit;
UnicodeString fGroupingSeparator;
UnicodeString fDecimalSeparator;
UnicodeString fPercent;
UnicodeString fPerMill;
UnicodeString fDigit;
UnicodeString fSeparator;
UnicodeString fExponent;
UnicodeString fPlus;
UnicodeString fMinus;
UnicodeString fPadEscape;
};
U_NAMESPACE_END
#endif /* !UCONFIG_NO_FORMATTING */
#endif

View File

@ -1,35 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
********************************************************************************
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************
*
* File decimalformatpatternimpl.h
********************************************************************************
*/
#ifndef DECIMALFORMATPATTERNIMPL_H
#define DECIMALFORMATPATTERNIMPL_H
#include "unicode/utypes.h"
#define kPatternZeroDigit ((UChar)0x0030) /*'0'*/
#define kPatternSignificantDigit ((UChar)0x0040) /*'@'*/
#define kPatternGroupingSeparator ((UChar)0x002C) /*','*/
#define kPatternDecimalSeparator ((UChar)0x002E) /*'.'*/
#define kPatternPerMill ((UChar)0x2030)
#define kPatternPercent ((UChar)0x0025) /*'%'*/
#define kPatternDigit ((UChar)0x0023) /*'#'*/
#define kPatternSeparator ((UChar)0x003B) /*';'*/
#define kPatternExponent ((UChar)0x0045) /*'E'*/
#define kPatternPlus ((UChar)0x002B) /*'+'*/
#define kPatternMinus ((UChar)0x002D) /*'-'*/
#define kPatternPadEscape ((UChar)0x002A) /*'*'*/
#define kQuote ((UChar)0x0027) /*'\''*/
#define kCurrencySign ((UChar)0x00A4)
#define kDefaultPad ((UChar)0x0020) /* */
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,549 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
********************************************************************************
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************
*
* File decimfmtimpl.h
********************************************************************************
*/
#ifndef DECIMFMTIMPL_H
#define DECIMFMTIMPL_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/decimfmt.h"
#include "unicode/uobject.h"
#include "affixpatternparser.h"
#include "digitaffixesandpadding.h"
#include "digitformatter.h"
#include "digitgrouping.h"
#include "precision.h"
U_NAMESPACE_BEGIN
class UnicodeString;
class FieldPosition;
class ValueFormatter;
class FieldPositionHandler;
class FixedDecimal;
/**
* DecimalFormatImpl is the glue code between the legacy DecimalFormat class
* and the new decimal formatting classes. DecimalFormat still handles
* parsing directly. However, DecimalFormat uses attributes of this class
* for parsing when possible.
*
* The public API of this class closely mirrors the legacy API of the
* legacy DecimalFormat deviating only when the legacy API does not make
* sense. For example, although DecimalFormat has a
* getPadCharacterString() method, DecimalFormatImpl has a getPadCharacter()
* method because formatting uses only a single pad character for padding.
*
* Each legacy DecimalFormat instance heap allocates its own instance of
* this class. Most DecimalFormat methods that deal with formatting simply
* delegate to the DecimalFormat's DecimalFormatImpl method.
*
* Because DecimalFormat extends NumberFormat, Each instance of this class
* "borrows" a pointer to the NumberFormat part of its enclosing DecimalFormat
* instance. This way each DecimalFormatImpl instance can read or even modify
* the NumberFormat portion of its enclosing DecimalFormat instance.
*
* Directed acyclic graph (DAG):
*
* This class can be represented as a directed acyclic graph (DAG) where each
* vertex is an attribute, and each directed edge indicates that the value
* of the destination attribute is calculated from the value of the source
* attribute. Attributes with setter methods reside at the bottom of the
* DAG. That is, no edges point to them. We call these independent attributes
* because their values can be set independently of one another. The rest of
* the attributes are derived attributes because their values depend on the
* independent attributes. DecimalFormatImpl often uses the derived
* attributes, not the independent attributes, when formatting numbers.
*
* The independent attributes at the bottom of the DAG correspond to the legacy
* attributes of DecimalFormat while the attributes at the top of the DAG
* correspond to the attributes of the new code. The edges of the DAG
* correspond to the code that handles the complex interaction among all the
* legacy attributes of the DecimalFormat API.
*
* We use a DAG for three reasons.
*
* First, the DAG preserves backward compatibility. Clients of the legacy
* DecimalFormat expect existing getters and setters of each attribute to be
* consistent. That means if a client sets a particular attribute to a new
* value, the attribute should retain that value until the client sets it to
* a new value. The DAG allows these attributes to remain consistent even
* though the new code may not use them when formatting.
*
* Second, the DAG obviates the need to recalculate derived attributes with
* each format. Instead, the DAG "remembers" the values of all derived
* attributes. Only setting an independent attribute requires a recalculation.
* Moreover, setting an independent attribute recalculates only the affected
* dependent attributes rather than all dependent attributes.
*
* Third, the DAG abstracts away the complex interaction among the legacy
* attributes of the DecimalFormat API.
*
* Only the independent attributes of the DAG have setters and getters.
* Derived attributes have no setters (and often no getters either).
*
* Copy and assign:
*
* For copy and assign, DecimalFormatImpl copies and assigns every attribute
* regardless of whether or not it is independent. We do this for simplicity.
*
* Implementation of the DAG:
*
* The DAG consists of three smaller DAGs:
* 1. Grouping attributes
* 2. Precision attributes
* 3. Formatting attributes.
*
* The first two DAGs are simple in that setting any independent attribute
* in the DAG recalculates all the dependent attributes in that DAG.
* The updateGrouping() and updatePrecision() perform the respective
* recalculations.
*
* Because some of the derived formatting attributes are expensive to
* calculate, the formatting attributes DAG is more complex. The
* updateFormatting() method is composed of many updateFormattingXXX()
* methods, each of which recalculates a single derived attribute. The
* updateFormatting() method accepts a bitfield of recently changed
* attributes and passes this bitfield by reference to each of the
* updateFormattingXXX() methods. Each updateFormattingXXX() method checks
* the bitfield to see if any of the attributes it uses to compute the XXX
* attribute changed. If none of them changed, it exists immediately. However,
* if at least one of them changed, it recalculates the XXX attribute and
* sets the corresponding bit in the bitfield. In this way, each
* updateFormattingXXX() method encodes the directed edges in the formatting
* DAG that point to the attribute its calculating.
*
* Maintenance of the updateFormatting() method.
*
* Use care when changing the updateFormatting() method.
* The updateFormatting() method must call each updateFormattingXXX() in the
* same partial order that the formatting DAG prescribes. That is, the
* attributes near the bottom of the DAG must be calculated before attributes
* further up. As we mentioned in the prvious paragraph, the directed edges of
* the formatting DAG are encoded within each updateFormattingXXX() method.
* Finally, adding new attributes may involve adding to the bitmap that the
* updateFormatting() method uses. The top most attributes in the DAG,
* those that do not point to any attributes but only have attributes
* pointing to it, need not have a slot in the bitmap.
*
* Keep in mind that most of the code that makes the legacy DecimalFormat API
* work the way it always has before can be found in these various updateXXX()
* methods. For example the updatePrecisionForScientific() method
* handles the complex interactions amoung the various precision attributes
* when formatting in scientific notation. Changing the way attributes
* interract, often means changing one of these updateXXX() methods.
*
* Conclusion:
*
* The DecimFmtImpl class is the glue code between the legacy and new
* number formatting code. It uses a direct acyclic graph (DAG) to
* maintain backward compatibility, to make the code efficient, and to
* abstract away the complex interraction among legacy attributs.
*/
class DecimalFormatImpl : public UObject {
public:
DecimalFormatImpl(
NumberFormat *super,
const Locale &locale,
const UnicodeString &pattern,
UErrorCode &status);
DecimalFormatImpl(
NumberFormat *super,
const UnicodeString &pattern,
DecimalFormatSymbols *symbolsToAdopt,
UParseError &parseError,
UErrorCode &status);
DecimalFormatImpl(
NumberFormat *super,
const DecimalFormatImpl &other,
UErrorCode &status);
DecimalFormatImpl &assign(
const DecimalFormatImpl &other, UErrorCode &status);
virtual ~DecimalFormatImpl();
void adoptDecimalFormatSymbols(DecimalFormatSymbols *symbolsToAdopt);
const DecimalFormatSymbols &getDecimalFormatSymbols() const {
return *fSymbols;
}
UnicodeString &format(
int32_t number,
UnicodeString &appendTo,
FieldPosition &pos,
UErrorCode &status) const;
UnicodeString &format(
int32_t number,
UnicodeString &appendTo,
FieldPositionIterator *posIter,
UErrorCode &status) const;
UnicodeString &format(
int64_t number,
UnicodeString &appendTo,
FieldPosition &pos,
UErrorCode &status) const;
UnicodeString &format(
double number,
UnicodeString &appendTo,
FieldPosition &pos,
UErrorCode &status) const;
UnicodeString &format(
const DigitList &number,
UnicodeString &appendTo,
FieldPosition &pos,
UErrorCode &status) const;
UnicodeString &format(
int64_t number,
UnicodeString &appendTo,
FieldPositionIterator *posIter,
UErrorCode &status) const;
UnicodeString &format(
double number,
UnicodeString &appendTo,
FieldPositionIterator *posIter,
UErrorCode &status) const;
UnicodeString &format(
const DigitList &number,
UnicodeString &appendTo,
FieldPositionIterator *posIter,
UErrorCode &status) const;
UnicodeString &format(
StringPiece number,
UnicodeString &appendTo,
FieldPositionIterator *posIter,
UErrorCode &status) const;
UnicodeString &format(
const VisibleDigitsWithExponent &digits,
UnicodeString &appendTo,
FieldPosition &pos,
UErrorCode &status) const;
UnicodeString &format(
const VisibleDigitsWithExponent &digits,
UnicodeString &appendTo,
FieldPositionIterator *posIter,
UErrorCode &status) const;
UBool operator==(const DecimalFormatImpl &) const;
UBool operator!=(const DecimalFormatImpl &other) const {
return !(*this == other);
}
void setRoundingMode(DecimalFormat::ERoundingMode mode) {
fRoundingMode = mode;
fEffPrecision.fMantissa.fExactOnly = (fRoundingMode == DecimalFormat::kRoundUnnecessary);
fEffPrecision.fMantissa.fRoundingMode = mode;
}
DecimalFormat::ERoundingMode getRoundingMode() const {
return fRoundingMode;
}
void setFailIfMoreThanMaxDigits(UBool b) {
fEffPrecision.fMantissa.fFailIfOverMax = b;
}
UBool isFailIfMoreThanMaxDigits() const { return fEffPrecision.fMantissa.fFailIfOverMax; }
void setMinimumSignificantDigits(int32_t newValue);
void setMaximumSignificantDigits(int32_t newValue);
void setMinMaxSignificantDigits(int32_t min, int32_t max);
void setScientificNotation(UBool newValue);
void setSignificantDigitsUsed(UBool newValue);
int32_t getMinimumSignificantDigits() const {
return fMinSigDigits; }
int32_t getMaximumSignificantDigits() const {
return fMaxSigDigits; }
UBool isScientificNotation() const { return fUseScientific; }
UBool areSignificantDigitsUsed() const { return fUseSigDigits; }
void setGroupingSize(int32_t newValue);
void setSecondaryGroupingSize(int32_t newValue);
void setMinimumGroupingDigits(int32_t newValue);
int32_t getGroupingSize() const { return fGrouping.fGrouping; }
int32_t getSecondaryGroupingSize() const { return fGrouping.fGrouping2; }
int32_t getMinimumGroupingDigits() const { return fGrouping.fMinGrouping; }
void applyPattern(const UnicodeString &pattern, UErrorCode &status);
void applyPatternFavorCurrencyPrecision(
const UnicodeString &pattern, UErrorCode &status);
void applyPattern(
const UnicodeString &pattern, UParseError &perror, UErrorCode &status);
void applyLocalizedPattern(const UnicodeString &pattern, UErrorCode &status);
void applyLocalizedPattern(
const UnicodeString &pattern, UParseError &perror, UErrorCode &status);
void setCurrencyUsage(UCurrencyUsage usage, UErrorCode &status);
UCurrencyUsage getCurrencyUsage() const { return fCurrencyUsage; }
void setRoundingIncrement(double d);
double getRoundingIncrement() const;
int32_t getMultiplier() const;
void setMultiplier(int32_t m);
UChar32 getPadCharacter() const { return fAffixes.fPadChar; }
void setPadCharacter(UChar32 c) { fAffixes.fPadChar = c; }
int32_t getFormatWidth() const { return fAffixes.fWidth; }
void setFormatWidth(int32_t x) { fAffixes.fWidth = x; }
DigitAffixesAndPadding::EPadPosition getPadPosition() const {
return fAffixes.fPadPosition;
}
void setPadPosition(DigitAffixesAndPadding::EPadPosition x) {
fAffixes.fPadPosition = x;
}
int32_t getMinimumExponentDigits() const {
return fEffPrecision.fMinExponentDigits;
}
void setMinimumExponentDigits(int32_t x) {
fEffPrecision.fMinExponentDigits = x;
}
UBool isExponentSignAlwaysShown() const {
return fOptions.fExponent.fAlwaysShowSign;
}
void setExponentSignAlwaysShown(UBool x) {
fOptions.fExponent.fAlwaysShowSign = x;
}
UBool isDecimalSeparatorAlwaysShown() const {
return fOptions.fMantissa.fAlwaysShowDecimal;
}
void setDecimalSeparatorAlwaysShown(UBool x) {
fOptions.fMantissa.fAlwaysShowDecimal = x;
}
UnicodeString &getPositivePrefix(UnicodeString &result) const;
UnicodeString &getPositiveSuffix(UnicodeString &result) const;
UnicodeString &getNegativePrefix(UnicodeString &result) const;
UnicodeString &getNegativeSuffix(UnicodeString &result) const;
void setPositivePrefix(const UnicodeString &str);
void setPositiveSuffix(const UnicodeString &str);
void setNegativePrefix(const UnicodeString &str);
void setNegativeSuffix(const UnicodeString &str);
UnicodeString &toPattern(UnicodeString& result) const;
FixedDecimal &getFixedDecimal(double value, FixedDecimal &result, UErrorCode &status) const;
FixedDecimal &getFixedDecimal(DigitList &number, FixedDecimal &result, UErrorCode &status) const;
DigitList &round(DigitList &number, UErrorCode &status) const;
VisibleDigitsWithExponent &
initVisibleDigitsWithExponent(
int64_t number,
VisibleDigitsWithExponent &digits,
UErrorCode &status) const;
VisibleDigitsWithExponent &
initVisibleDigitsWithExponent(
double number,
VisibleDigitsWithExponent &digits,
UErrorCode &status) const;
VisibleDigitsWithExponent &
initVisibleDigitsWithExponent(
DigitList &number,
VisibleDigitsWithExponent &digits,
UErrorCode &status) const;
void updatePrecision();
void updateGrouping();
void updateCurrency(UErrorCode &status);
private:
// Disallow copy and assign
DecimalFormatImpl(const DecimalFormatImpl &other);
DecimalFormatImpl &operator=(const DecimalFormatImpl &other);
NumberFormat *fSuper;
DigitList fMultiplier;
int32_t fScale;
DecimalFormat::ERoundingMode fRoundingMode;
// These fields include what the user can see and set.
// When the user updates these fields, it triggers automatic updates of
// other fields that may be invisible to user
// Updating any of the following fields triggers an update to
// fEffPrecision.fMantissa.fMin,
// fEffPrecision.fMantissa.fMax,
// fEffPrecision.fMantissa.fSignificant fields
// We have this two phase update because of backward compatibility.
// DecimalFormat has to remember all settings even if those settings are
// invalid or disabled.
int32_t fMinSigDigits;
int32_t fMaxSigDigits;
UBool fUseScientific;
UBool fUseSigDigits;
// In addition to these listed above, changes to min/max int digits and
// min/max frac digits from fSuper also trigger an update.
// Updating any of the following fields triggers an update to
// fEffGrouping field Again we do it this way because original
// grouping settings have to be retained if grouping is turned off.
DigitGrouping fGrouping;
// In addition to these listed above, changes to isGroupingUsed in
// fSuper also triggers an update to fEffGrouping.
// Updating any of the following fields triggers updates on the following:
// fMonetary, fRules, fAffixParser, fCurrencyAffixInfo,
// fFormatter, fAffixes.fPositivePrefiix, fAffixes.fPositiveSuffix,
// fAffixes.fNegativePrefiix, fAffixes.fNegativeSuffix
// We do this two phase update because localizing the affix patterns
// and formatters can be expensive. Better to do it once with the setters
// than each time within format.
AffixPattern fPositivePrefixPattern;
AffixPattern fNegativePrefixPattern;
AffixPattern fPositiveSuffixPattern;
AffixPattern fNegativeSuffixPattern;
DecimalFormatSymbols *fSymbols;
UCurrencyUsage fCurrencyUsage;
// In addition to these listed above, changes to getCurrency() in
// fSuper also triggers an update.
// Optional may be NULL
PluralRules *fRules;
// These fields are totally hidden from user and are used to derive the affixes
// in fAffixes below from the four affix patterns above.
UBool fMonetary;
AffixPatternParser fAffixParser;
CurrencyAffixInfo fCurrencyAffixInfo;
// The actual precision used when formatting
ScientificPrecision fEffPrecision;
// The actual grouping used when formatting
DigitGrouping fEffGrouping;
SciFormatterOptions fOptions; // Encapsulates fixed precision options
DigitFormatter fFormatter;
DigitAffixesAndPadding fAffixes;
UnicodeString &formatInt32(
int32_t number,
UnicodeString &appendTo,
FieldPositionHandler &handler,
UErrorCode &status) const;
UnicodeString &formatInt64(
int64_t number,
UnicodeString &appendTo,
FieldPositionHandler &handler,
UErrorCode &status) const;
UnicodeString &formatDouble(
double number,
UnicodeString &appendTo,
FieldPositionHandler &handler,
UErrorCode &status) const;
// Scales for precent or permille symbols
UnicodeString &formatDigitList(
DigitList &number,
UnicodeString &appendTo,
FieldPositionHandler &handler,
UErrorCode &status) const;
// Does not scale for precent or permille symbols
UnicodeString &formatAdjustedDigitList(
DigitList &number,
UnicodeString &appendTo,
FieldPositionHandler &handler,
UErrorCode &status) const;
UnicodeString &formatVisibleDigitsWithExponent(
const VisibleDigitsWithExponent &number,
UnicodeString &appendTo,
FieldPositionHandler &handler,
UErrorCode &status) const;
VisibleDigitsWithExponent &
initVisibleDigitsFromAdjusted(
DigitList &number,
VisibleDigitsWithExponent &digits,
UErrorCode &status) const;
template<class T>
UBool maybeFormatWithDigitList(
T number,
UnicodeString &appendTo,
FieldPositionHandler &handler,
UErrorCode &status) const;
template<class T>
UBool maybeInitVisibleDigitsFromDigitList(
T number,
VisibleDigitsWithExponent &digits,
UErrorCode &status) const;
DigitList &adjustDigitList(DigitList &number, UErrorCode &status) const;
void applyPattern(
const UnicodeString &pattern,
UBool localized, UParseError &perror, UErrorCode &status);
ValueFormatter &prepareValueFormatter(ValueFormatter &vf) const;
void setMultiplierScale(int32_t s);
int32_t getPatternScale() const;
void setScale(int32_t s) { fScale = s; }
int32_t getScale() const { return fScale; }
// Updates everything
void updateAll(UErrorCode &status);
void updateAll(
int32_t formattingFlags,
UBool updatePrecisionBasedOnCurrency,
UErrorCode &status);
// Updates from formatting pattern changes
void updateForApplyPattern(UErrorCode &status);
void updateForApplyPatternFavorCurrencyPrecision(UErrorCode &status);
// Updates from changes to third group of attributes
void updateFormatting(int32_t changedFormattingFields, UErrorCode &status);
void updateFormatting(
int32_t changedFormattingFields,
UBool updatePrecisionBasedOnCurrency,
UErrorCode &status);
// Helper functions for updatePrecision
void updatePrecisionForScientific();
void updatePrecisionForFixed();
void extractMinMaxDigits(DigitInterval &min, DigitInterval &max) const;
void extractSigDigits(SignificantDigitInterval &sig) const;
// Helper functions for updateFormatting
void updateFormattingUsesCurrency(int32_t &changedFormattingFields);
void updateFormattingPluralRules(
int32_t &changedFormattingFields, UErrorCode &status);
void updateFormattingAffixParser(int32_t &changedFormattingFields);
void updateFormattingCurrencyAffixInfo(
int32_t &changedFormattingFields,
UBool updatePrecisionBasedOnCurrency,
UErrorCode &status);
void updateFormattingFixedPointFormatter(
int32_t &changedFormattingFields);
void updateFormattingLocalizedPositivePrefix(
int32_t &changedFormattingFields, UErrorCode &status);
void updateFormattingLocalizedPositiveSuffix(
int32_t &changedFormattingFields, UErrorCode &status);
void updateFormattingLocalizedNegativePrefix(
int32_t &changedFormattingFields, UErrorCode &status);
void updateFormattingLocalizedNegativeSuffix(
int32_t &changedFormattingFields, UErrorCode &status);
int32_t computeExponentPatternLength() const;
int32_t countFractionDigitAndDecimalPatternLength(int32_t fracDigitCount) const;
UnicodeString &toNumberPattern(
UBool hasPadding, int32_t minimumLength, UnicodeString& result) const;
int32_t getOldFormatWidth() const;
const UnicodeString &getConstSymbol(
DecimalFormatSymbols::ENumberFormatSymbol symbol) const;
UBool isParseFastpath() const;
friend class DecimalFormat;
};
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif // DECIMFMTIMPL_H
//eof

View File

@ -1,109 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: digitaffix.cpp
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "digitaffix.h"
#include "fphdlimp.h"
#include "uassert.h"
#include "unistrappender.h"
U_NAMESPACE_BEGIN
DigitAffix::DigitAffix() : fAffix(), fAnnotations() {
}
DigitAffix::DigitAffix(
const UChar *value, int32_t charCount, int32_t fieldId)
: fAffix(value, charCount),
fAnnotations(charCount, (UChar) fieldId, charCount) {
}
void
DigitAffix::remove() {
fAffix.remove();
fAnnotations.remove();
}
void
DigitAffix::appendUChar(UChar value, int32_t fieldId) {
fAffix.append(value);
fAnnotations.append((UChar) fieldId);
}
void
DigitAffix::append(const UnicodeString &value, int32_t fieldId) {
fAffix.append(value);
{
UnicodeStringAppender appender(fAnnotations);
int32_t len = value.length();
for (int32_t i = 0; i < len; ++i) {
appender.append((UChar) fieldId);
}
}
}
void
DigitAffix::setTo(const UnicodeString &value, int32_t fieldId) {
fAffix = value;
fAnnotations.remove();
{
UnicodeStringAppender appender(fAnnotations);
int32_t len = value.length();
for (int32_t i = 0; i < len; ++i) {
appender.append((UChar) fieldId);
}
}
}
void
DigitAffix::append(const UChar *value, int32_t charCount, int32_t fieldId) {
fAffix.append(value, charCount);
{
UnicodeStringAppender appender(fAnnotations);
for (int32_t i = 0; i < charCount; ++i) {
appender.append((UChar) fieldId);
}
}
}
UnicodeString &
DigitAffix::format(FieldPositionHandler &handler, UnicodeString &appendTo) const {
int32_t len = fAffix.length();
if (len == 0) {
return appendTo;
}
if (!handler.isRecording()) {
return appendTo.append(fAffix);
}
U_ASSERT(fAffix.length() == fAnnotations.length());
int32_t appendToStart = appendTo.length();
int32_t lastId = (int32_t) fAnnotations.charAt(0);
int32_t lastIdStart = 0;
for (int32_t i = 1; i < len; ++i) {
int32_t id = (int32_t) fAnnotations.charAt(i);
if (id != lastId) {
if (lastId != UNUM_FIELD_COUNT) {
handler.addAttribute(lastId, appendToStart + lastIdStart, appendToStart + i);
}
lastId = id;
lastIdStart = i;
}
}
if (lastId != UNUM_FIELD_COUNT) {
handler.addAttribute(lastId, appendToStart + lastIdStart, appendToStart + len);
}
return appendTo.append(fAffix);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -1,104 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* digitaffix.h
*
* created on: 2015jan06
* created by: Travis Keep
*/
#ifndef __DIGITAFFIX_H__
#define __DIGITAFFIX_H__
#include "unicode/uobject.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/unistr.h"
#include "unicode/unum.h"
#include "unicode/utypes.h"
U_NAMESPACE_BEGIN
class FieldPositionHandler;
/**
* A prefix or suffix of a formatted number.
*/
class U_I18N_API DigitAffix : public UMemory {
public:
/**
* Creates an empty DigitAffix.
*/
DigitAffix();
/**
* Creates a DigitAffix containing given UChars where all of it has
* a field type of fieldId.
*/
DigitAffix(
const UChar *value,
int32_t charCount,
int32_t fieldId=UNUM_FIELD_COUNT);
/**
* Makes this affix be the empty string.
*/
void remove();
/**
* Append value to this affix. If fieldId is present, the appended
* string is considered to be the type fieldId.
*/
void appendUChar(UChar value, int32_t fieldId=UNUM_FIELD_COUNT);
/**
* Append value to this affix. If fieldId is present, the appended
* string is considered to be the type fieldId.
*/
void append(const UnicodeString &value, int32_t fieldId=UNUM_FIELD_COUNT);
/**
* Sets this affix to given string. The entire string
* is considered to be the type fieldId.
*/
void setTo(const UnicodeString &value, int32_t fieldId=UNUM_FIELD_COUNT);
/**
* Append value to this affix. If fieldId is present, the appended
* string is considered to be the type fieldId.
*/
void append(const UChar *value, int32_t charCount, int32_t fieldId=UNUM_FIELD_COUNT);
/**
* Formats this affix.
*/
UnicodeString &format(
FieldPositionHandler &handler, UnicodeString &appendTo) const;
int32_t countChar32() const { return fAffix.countChar32(); }
/**
* Returns this affix as a unicode string.
*/
const UnicodeString & toString() const { return fAffix; }
/**
* Returns TRUE if this object equals rhs.
*/
UBool equals(const DigitAffix &rhs) const {
return ((fAffix == rhs.fAffix) && (fAnnotations == rhs.fAnnotations));
}
private:
UnicodeString fAffix;
UnicodeString fAnnotations;
};
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_FORMATTING
#endif // __DIGITAFFIX_H__

View File

@ -1,175 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: digitaffixesandpadding.cpp
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/plurrule.h"
#include "charstr.h"
#include "digitaffix.h"
#include "digitaffixesandpadding.h"
#include "digitlst.h"
#include "uassert.h"
#include "valueformatter.h"
#include "visibledigits.h"
U_NAMESPACE_BEGIN
UBool
DigitAffixesAndPadding::needsPluralRules() const {
return (
fPositivePrefix.hasMultipleVariants() ||
fPositiveSuffix.hasMultipleVariants() ||
fNegativePrefix.hasMultipleVariants() ||
fNegativeSuffix.hasMultipleVariants());
}
UnicodeString &
DigitAffixesAndPadding::formatInt32(
int32_t value,
const ValueFormatter &formatter,
FieldPositionHandler &handler,
const PluralRules *optPluralRules,
UnicodeString &appendTo,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return appendTo;
}
if (optPluralRules != NULL || fWidth > 0 || !formatter.isFastFormattable(value)) {
VisibleDigitsWithExponent digits;
formatter.toVisibleDigitsWithExponent(
(int64_t) value, digits, status);
return format(
digits,
formatter,
handler,
optPluralRules,
appendTo,
status);
}
UBool bPositive = value >= 0;
const DigitAffix *prefix = bPositive ? &fPositivePrefix.getOtherVariant() : &fNegativePrefix.getOtherVariant();
const DigitAffix *suffix = bPositive ? &fPositiveSuffix.getOtherVariant() : &fNegativeSuffix.getOtherVariant();
if (value < 0) {
value = -value;
}
prefix->format(handler, appendTo);
formatter.formatInt32(value, handler, appendTo);
return suffix->format(handler, appendTo);
}
static UnicodeString &
formatAffix(
const DigitAffix *affix,
FieldPositionHandler &handler,
UnicodeString &appendTo) {
if (affix) {
affix->format(handler, appendTo);
}
return appendTo;
}
static int32_t
countAffixChar32(const DigitAffix *affix) {
if (affix) {
return affix->countChar32();
}
return 0;
}
UnicodeString &
DigitAffixesAndPadding::format(
const VisibleDigitsWithExponent &digits,
const ValueFormatter &formatter,
FieldPositionHandler &handler,
const PluralRules *optPluralRules,
UnicodeString &appendTo,
UErrorCode &status) const {
if (U_FAILURE(status)) {
return appendTo;
}
const DigitAffix *prefix = NULL;
const DigitAffix *suffix = NULL;
if (!digits.isNaN()) {
UBool bPositive = !digits.isNegative();
const PluralAffix *pluralPrefix = bPositive ? &fPositivePrefix : &fNegativePrefix;
const PluralAffix *pluralSuffix = bPositive ? &fPositiveSuffix : &fNegativeSuffix;
if (optPluralRules == NULL || digits.isInfinite()) {
prefix = &pluralPrefix->getOtherVariant();
suffix = &pluralSuffix->getOtherVariant();
} else {
UnicodeString count(optPluralRules->select(digits));
prefix = &pluralPrefix->getByCategory(count);
suffix = &pluralSuffix->getByCategory(count);
}
}
if (fWidth <= 0) {
formatAffix(prefix, handler, appendTo);
formatter.format(digits, handler, appendTo);
return formatAffix(suffix, handler, appendTo);
}
int32_t codePointCount = countAffixChar32(prefix) + formatter.countChar32(digits) + countAffixChar32(suffix);
int32_t paddingCount = fWidth - codePointCount;
switch (fPadPosition) {
case kPadBeforePrefix:
appendPadding(paddingCount, appendTo);
formatAffix(prefix, handler, appendTo);
formatter.format(digits, handler, appendTo);
return formatAffix(suffix, handler, appendTo);
case kPadAfterPrefix:
formatAffix(prefix, handler, appendTo);
appendPadding(paddingCount, appendTo);
formatter.format(digits, handler, appendTo);
return formatAffix(suffix, handler, appendTo);
case kPadBeforeSuffix:
formatAffix(prefix, handler, appendTo);
formatter.format(digits, handler, appendTo);
appendPadding(paddingCount, appendTo);
return formatAffix(suffix, handler, appendTo);
case kPadAfterSuffix:
formatAffix(prefix, handler, appendTo);
formatter.format(digits, handler, appendTo);
formatAffix(suffix, handler, appendTo);
return appendPadding(paddingCount, appendTo);
default:
U_ASSERT(FALSE);
return appendTo;
}
}
UnicodeString &
DigitAffixesAndPadding::format(
DigitList &value,
const ValueFormatter &formatter,
FieldPositionHandler &handler,
const PluralRules *optPluralRules,
UnicodeString &appendTo,
UErrorCode &status) const {
VisibleDigitsWithExponent digits;
formatter.toVisibleDigitsWithExponent(
value, digits, status);
if (U_FAILURE(status)) {
return appendTo;
}
return format(
digits, formatter, handler, optPluralRules, appendTo, status);
}
UnicodeString &
DigitAffixesAndPadding::appendPadding(int32_t paddingCount, UnicodeString &appendTo) const {
for (int32_t i = 0; i < paddingCount; ++i) {
appendTo.append(fPadChar);
}
return appendTo;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -1,179 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* digitaffixesandpadding.h
*
* created on: 2015jan06
* created by: Travis Keep
*/
#ifndef __DIGITAFFIXESANDPADDING_H__
#define __DIGITAFFIXESANDPADDING_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/uobject.h"
#include "pluralaffix.h"
U_NAMESPACE_BEGIN
class DigitList;
class ValueFormatter;
class UnicodeString;
class FieldPositionHandler;
class PluralRules;
class VisibleDigitsWithExponent;
/**
* A formatter of numbers. This class can format any numerical value
* except for not a number (NaN), positive infinity, and negative infinity.
* This class manages prefixes, suffixes, and padding but delegates the
* formatting of actual positive values to a ValueFormatter.
*/
class U_I18N_API DigitAffixesAndPadding : public UMemory {
public:
/**
* Equivalent to DecimalFormat EPadPosition, but redeclared here to prevent
* depending on DecimalFormat which would cause a circular dependency.
*/
enum EPadPosition {
kPadBeforePrefix,
kPadAfterPrefix,
kPadBeforeSuffix,
kPadAfterSuffix
};
/**
* The positive prefix
*/
PluralAffix fPositivePrefix;
/**
* The positive suffix
*/
PluralAffix fPositiveSuffix;
/**
* The negative suffix
*/
PluralAffix fNegativePrefix;
/**
* The negative suffix
*/
PluralAffix fNegativeSuffix;
/**
* The padding position
*/
EPadPosition fPadPosition;
/**
* The padding character.
*/
UChar32 fPadChar;
/**
* The field width in code points. The format method inserts instances of
* the padding character as needed in the desired padding position so that
* the entire formatted string contains this many code points. If the
* formatted string already exceeds this many code points, the format method
* inserts no padding.
*/
int32_t fWidth;
/**
* Pad position is before prefix; padding character is '*' field width is 0.
* The affixes are all the empty string with no annotated fields with just
* the 'other' plural variation.
*/
DigitAffixesAndPadding()
: fPadPosition(kPadBeforePrefix), fPadChar(0x2a), fWidth(0) { }
/**
* Returns TRUE if this object is equal to rhs.
*/
UBool equals(const DigitAffixesAndPadding &rhs) const {
return (fPositivePrefix.equals(rhs.fPositivePrefix) &&
fPositiveSuffix.equals(rhs.fPositiveSuffix) &&
fNegativePrefix.equals(rhs.fNegativePrefix) &&
fNegativeSuffix.equals(rhs.fNegativeSuffix) &&
fPadPosition == rhs.fPadPosition &&
fWidth == rhs.fWidth &&
fPadChar == rhs.fPadChar);
}
/**
* Returns TRUE if a plural rules instance is needed to complete the
* formatting by detecting if any of the affixes have multiple plural
* variations.
*/
UBool needsPluralRules() const;
/**
* Formats value and appends to appendTo.
*
* @param value the value to format. May be NaN or ininite.
* @param formatter handles the details of formatting the actual value.
* @param handler records field positions
* @param optPluralRules the plural rules, but may be NULL if
* needsPluralRules returns FALSE.
* @appendTo formatted string appended here.
* @status any error returned here.
*/
UnicodeString &format(
const VisibleDigitsWithExponent &value,
const ValueFormatter &formatter,
FieldPositionHandler &handler,
const PluralRules *optPluralRules,
UnicodeString &appendTo,
UErrorCode &status) const;
/**
* For testing only.
*/
UnicodeString &format(
DigitList &value,
const ValueFormatter &formatter,
FieldPositionHandler &handler,
const PluralRules *optPluralRules,
UnicodeString &appendTo,
UErrorCode &status) const;
/**
* Formats a 32-bit integer and appends to appendTo. When formatting an
* integer, this method is preferred to plain format as it can run
* several times faster under certain conditions.
*
* @param value the value to format.
* @param formatter handles the details of formatting the actual value.
* @param handler records field positions
* @param optPluralRules the plural rules, but may be NULL if
* needsPluralRules returns FALSE.
* @appendTo formatted string appended here.
* @status any error returned here.
*/
UnicodeString &formatInt32(
int32_t value,
const ValueFormatter &formatter,
FieldPositionHandler &handler,
const PluralRules *optPluralRules,
UnicodeString &appendTo,
UErrorCode &status) const;
private:
UnicodeString &appendPadding(int32_t paddingCount, UnicodeString &appendTo) const;
};
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif // __DIGITAFFIXANDPADDING_H__

View File

@ -1,417 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: digitformatter.cpp
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/dcfmtsym.h"
#include "unicode/unum.h"
#include "digitformatter.h"
#include "digitgrouping.h"
#include "digitinterval.h"
#include "digitlst.h"
#include "fphdlimp.h"
#include "smallintformatter.h"
#include "unistrappender.h"
#include "visibledigits.h"
U_NAMESPACE_BEGIN
DigitFormatter::DigitFormatter()
: fGroupingSeparator(",", -1, US_INV), fDecimal(".", -1, US_INV),
fNegativeSign("-", -1, US_INV), fPositiveSign("+", -1, US_INV),
fIsStandardDigits(TRUE), fExponent("E", -1, US_INV) {
for (int32_t i = 0; i < 10; ++i) {
fLocalizedDigits[i] = (UChar32) (0x30 + i);
}
fInfinity.setTo(UnicodeString("Inf", -1, US_INV), UNUM_INTEGER_FIELD);
fNan.setTo(UnicodeString("Nan", -1, US_INV), UNUM_INTEGER_FIELD);
}
DigitFormatter::DigitFormatter(const DecimalFormatSymbols &symbols) {
setDecimalFormatSymbols(symbols);
}
void
DigitFormatter::setOtherDecimalFormatSymbols(
const DecimalFormatSymbols &symbols) {
fLocalizedDigits[0] = symbols.getConstSymbol(DecimalFormatSymbols::kZeroDigitSymbol).char32At(0);
fLocalizedDigits[1] = symbols.getConstSymbol(DecimalFormatSymbols::kOneDigitSymbol).char32At(0);
fLocalizedDigits[2] = symbols.getConstSymbol(DecimalFormatSymbols::kTwoDigitSymbol).char32At(0);
fLocalizedDigits[3] = symbols.getConstSymbol(DecimalFormatSymbols::kThreeDigitSymbol).char32At(0);
fLocalizedDigits[4] = symbols.getConstSymbol(DecimalFormatSymbols::kFourDigitSymbol).char32At(0);
fLocalizedDigits[5] = symbols.getConstSymbol(DecimalFormatSymbols::kFiveDigitSymbol).char32At(0);
fLocalizedDigits[6] = symbols.getConstSymbol(DecimalFormatSymbols::kSixDigitSymbol).char32At(0);
fLocalizedDigits[7] = symbols.getConstSymbol(DecimalFormatSymbols::kSevenDigitSymbol).char32At(0);
fLocalizedDigits[8] = symbols.getConstSymbol(DecimalFormatSymbols::kEightDigitSymbol).char32At(0);
fLocalizedDigits[9] = symbols.getConstSymbol(DecimalFormatSymbols::kNineDigitSymbol).char32At(0);
fIsStandardDigits = isStandardDigits();
fNegativeSign = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
fPositiveSign = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
fInfinity.setTo(symbols.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol), UNUM_INTEGER_FIELD);
fNan.setTo(symbols.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), UNUM_INTEGER_FIELD);
fExponent = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol);
}
void
DigitFormatter::setDecimalFormatSymbolsForMonetary(
const DecimalFormatSymbols &symbols) {
setOtherDecimalFormatSymbols(symbols);
fGroupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol);
fDecimal = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
}
void
DigitFormatter::setDecimalFormatSymbols(
const DecimalFormatSymbols &symbols) {
setOtherDecimalFormatSymbols(symbols);
fGroupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
fDecimal = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
}
static void appendField(
int32_t fieldId,
const UnicodeString &value,
FieldPositionHandler &handler,
UnicodeString &appendTo) {
int32_t currentLength = appendTo.length();
appendTo.append(value);
handler.addAttribute(
fieldId,
currentLength,
appendTo.length());
}
int32_t DigitFormatter::countChar32(
const DigitGrouping &grouping,
const DigitInterval &interval,
const DigitFormatterOptions &options) const {
int32_t result = interval.length();
// We always emit '0' in lieu of no digits.
if (result == 0) {
result = 1;
}
if (options.fAlwaysShowDecimal || interval.getLeastSignificantInclusive() < 0) {
result += fDecimal.countChar32();
}
result += grouping.getSeparatorCount(interval.getIntDigitCount()) * fGroupingSeparator.countChar32();
return result;
}
int32_t
DigitFormatter::countChar32(
const VisibleDigits &digits,
const DigitGrouping &grouping,
const DigitFormatterOptions &options) const {
if (digits.isNaN()) {
return countChar32ForNaN();
}
if (digits.isInfinite()) {
return countChar32ForInfinity();
}
return countChar32(
grouping,
digits.getInterval(),
options);
}
int32_t
DigitFormatter::countChar32(
const VisibleDigitsWithExponent &digits,
const SciFormatterOptions &options) const {
if (digits.isNaN()) {
return countChar32ForNaN();
}
if (digits.isInfinite()) {
return countChar32ForInfinity();
}
const VisibleDigits *exponent = digits.getExponent();
if (exponent == NULL) {
DigitGrouping grouping;
return countChar32(
grouping,
digits.getMantissa().getInterval(),
options.fMantissa);
}
return countChar32(
*exponent, digits.getMantissa().getInterval(), options);
}
int32_t
DigitFormatter::countChar32(
const VisibleDigits &exponent,
const DigitInterval &mantissaInterval,
const SciFormatterOptions &options) const {
DigitGrouping grouping;
int32_t count = countChar32(
grouping, mantissaInterval, options.fMantissa);
count += fExponent.countChar32();
count += countChar32ForExponent(
exponent, options.fExponent);
return count;
}
UnicodeString &DigitFormatter::format(
const VisibleDigits &digits,
const DigitGrouping &grouping,
const DigitFormatterOptions &options,
FieldPositionHandler &handler,
UnicodeString &appendTo) const {
if (digits.isNaN()) {
return formatNaN(handler, appendTo);
}
if (digits.isInfinite()) {
return formatInfinity(handler, appendTo);
}
const DigitInterval &interval = digits.getInterval();
int32_t digitsLeftOfDecimal = interval.getMostSignificantExclusive();
int32_t lastDigitPos = interval.getLeastSignificantInclusive();
int32_t intBegin = appendTo.length();
int32_t fracBegin = 0; /* initialize to avoid compiler warning */
// Emit "0" instead of empty string.
if (digitsLeftOfDecimal == 0 && lastDigitPos == 0) {
appendTo.append(fLocalizedDigits[0]);
handler.addAttribute(UNUM_INTEGER_FIELD, intBegin, appendTo.length());
if (options.fAlwaysShowDecimal) {
appendField(
UNUM_DECIMAL_SEPARATOR_FIELD,
fDecimal,
handler,
appendTo);
}
return appendTo;
}
{
UnicodeStringAppender appender(appendTo);
for (int32_t i = interval.getMostSignificantExclusive() - 1;
i >= interval.getLeastSignificantInclusive(); --i) {
if (i == -1) {
appender.flush();
appendField(
UNUM_DECIMAL_SEPARATOR_FIELD,
fDecimal,
handler,
appendTo);
fracBegin = appendTo.length();
}
appender.append(fLocalizedDigits[digits.getDigitByExponent(i)]);
if (grouping.isSeparatorAt(digitsLeftOfDecimal, i)) {
appender.flush();
appendField(
UNUM_GROUPING_SEPARATOR_FIELD,
fGroupingSeparator,
handler,
appendTo);
}
if (i == 0) {
appender.flush();
if (digitsLeftOfDecimal > 0) {
handler.addAttribute(UNUM_INTEGER_FIELD, intBegin, appendTo.length());
}
}
}
if (options.fAlwaysShowDecimal && lastDigitPos == 0) {
appender.flush();
appendField(
UNUM_DECIMAL_SEPARATOR_FIELD,
fDecimal,
handler,
appendTo);
}
}
// lastDigitPos is never > 0 so we are guaranteed that kIntegerField
// is already added.
if (lastDigitPos < 0) {
handler.addAttribute(UNUM_FRACTION_FIELD, fracBegin, appendTo.length());
}
return appendTo;
}
UnicodeString &
DigitFormatter::format(
const VisibleDigitsWithExponent &digits,
const SciFormatterOptions &options,
FieldPositionHandler &handler,
UnicodeString &appendTo) const {
DigitGrouping grouping;
format(
digits.getMantissa(),
grouping,
options.fMantissa,
handler,
appendTo);
const VisibleDigits *exponent = digits.getExponent();
if (exponent == NULL) {
return appendTo;
}
int32_t expBegin = appendTo.length();
appendTo.append(fExponent);
handler.addAttribute(
UNUM_EXPONENT_SYMBOL_FIELD, expBegin, appendTo.length());
return formatExponent(
*exponent,
options.fExponent,
UNUM_EXPONENT_SIGN_FIELD,
UNUM_EXPONENT_FIELD,
handler,
appendTo);
}
static int32_t formatInt(
int32_t value, uint8_t *digits) {
int32_t idx = 0;
while (value > 0) {
digits[idx++] = (uint8_t) (value % 10);
value /= 10;
}
return idx;
}
UnicodeString &
DigitFormatter::formatDigits(
const uint8_t *digits,
int32_t count,
const IntDigitCountRange &range,
int32_t intField,
FieldPositionHandler &handler,
UnicodeString &appendTo) const {
int32_t i = range.pin(count) - 1;
int32_t begin = appendTo.length();
// Always emit '0' as placeholder for empty string.
if (i == -1) {
appendTo.append(fLocalizedDigits[0]);
handler.addAttribute(intField, begin, appendTo.length());
return appendTo;
}
{
UnicodeStringAppender appender(appendTo);
for (; i >= count; --i) {
appender.append(fLocalizedDigits[0]);
}
for (; i >= 0; --i) {
appender.append(fLocalizedDigits[digits[i]]);
}
}
handler.addAttribute(intField, begin, appendTo.length());
return appendTo;
}
UnicodeString &
DigitFormatter::formatExponent(
const VisibleDigits &digits,
const DigitFormatterIntOptions &options,
int32_t signField,
int32_t intField,
FieldPositionHandler &handler,
UnicodeString &appendTo) const {
UBool neg = digits.isNegative();
if (neg || options.fAlwaysShowSign) {
appendField(
signField,
neg ? fNegativeSign : fPositiveSign,
handler,
appendTo);
}
int32_t begin = appendTo.length();
DigitGrouping grouping;
DigitFormatterOptions expOptions;
FieldPosition fpos(FieldPosition::DONT_CARE);
FieldPositionOnlyHandler noHandler(fpos);
format(
digits,
grouping,
expOptions,
noHandler,
appendTo);
handler.addAttribute(intField, begin, appendTo.length());
return appendTo;
}
int32_t
DigitFormatter::countChar32ForExponent(
const VisibleDigits &exponent,
const DigitFormatterIntOptions &options) const {
int32_t result = 0;
UBool neg = exponent.isNegative();
if (neg || options.fAlwaysShowSign) {
result += neg ? fNegativeSign.countChar32() : fPositiveSign.countChar32();
}
DigitGrouping grouping;
DigitFormatterOptions expOptions;
result += countChar32(grouping, exponent.getInterval(), expOptions);
return result;
}
UnicodeString &
DigitFormatter::formatPositiveInt32(
int32_t positiveValue,
const IntDigitCountRange &range,
FieldPositionHandler &handler,
UnicodeString &appendTo) const {
// super fast path
if (fIsStandardDigits && SmallIntFormatter::canFormat(positiveValue, range)) {
int32_t begin = appendTo.length();
SmallIntFormatter::format(positiveValue, range, appendTo);
handler.addAttribute(UNUM_INTEGER_FIELD, begin, appendTo.length());
return appendTo;
}
uint8_t digits[10];
int32_t count = formatInt(positiveValue, digits);
return formatDigits(
digits,
count,
range,
UNUM_INTEGER_FIELD,
handler,
appendTo);
}
UBool DigitFormatter::isStandardDigits() const {
UChar32 cdigit = 0x30;
for (int32_t i = 0; i < UPRV_LENGTHOF(fLocalizedDigits); ++i) {
if (fLocalizedDigits[i] != cdigit) {
return FALSE;
}
++cdigit;
}
return TRUE;
}
UBool
DigitFormatter::equals(const DigitFormatter &rhs) const {
UBool result = (fGroupingSeparator == rhs.fGroupingSeparator) &&
(fDecimal == rhs.fDecimal) &&
(fNegativeSign == rhs.fNegativeSign) &&
(fPositiveSign == rhs.fPositiveSign) &&
(fInfinity.equals(rhs.fInfinity)) &&
(fNan.equals(rhs.fNan)) &&
(fIsStandardDigits == rhs.fIsStandardDigits) &&
(fExponent == rhs.fExponent);
if (!result) {
return FALSE;
}
for (int32_t i = 0; i < UPRV_LENGTHOF(fLocalizedDigits); ++i) {
if (fLocalizedDigits[i] != rhs.fLocalizedDigits[i]) {
return FALSE;
}
}
return TRUE;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -1,288 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* digitformatter.h
*
* created on: 2015jan06
* created by: Travis Keep
*/
#ifndef __DIGITFORMATTER_H__
#define __DIGITFORMATTER_H__
#include "unicode/uobject.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "digitaffix.h"
U_NAMESPACE_BEGIN
class DecimalFormatSymbols;
class DigitList;
class DigitGrouping;
class DigitInterval;
class UnicodeString;
class FieldPositionHandler;
class IntDigitCountRange;
class VisibleDigits;
class VisibleDigitsWithExponent;
/**
* Various options for formatting in fixed point.
*/
class U_I18N_API DigitFormatterOptions : public UMemory {
public:
DigitFormatterOptions() : fAlwaysShowDecimal(FALSE) { }
/**
* Returns TRUE if this object equals rhs.
*/
UBool equals(const DigitFormatterOptions &rhs) const {
return (
fAlwaysShowDecimal == rhs.fAlwaysShowDecimal);
}
/**
* Returns TRUE if these options allow for fast formatting of
* integers.
*/
UBool isFastFormattable() const {
return (fAlwaysShowDecimal == FALSE);
}
/**
* If TRUE, show the decimal separator even when there are no fraction
* digits. default is FALSE.
*/
UBool fAlwaysShowDecimal;
};
/**
* Various options for formatting an integer.
*/
class U_I18N_API DigitFormatterIntOptions : public UMemory {
public:
DigitFormatterIntOptions() : fAlwaysShowSign(FALSE) { }
/**
* Returns TRUE if this object equals rhs.
*/
UBool equals(const DigitFormatterIntOptions &rhs) const {
return (fAlwaysShowSign == rhs.fAlwaysShowSign);
}
/**
* If TRUE, always prefix the integer with its sign even if the number is
* positive. Default is FALSE.
*/
UBool fAlwaysShowSign;
};
/**
* Options for formatting in scientific notation.
*/
class U_I18N_API SciFormatterOptions : public UMemory {
public:
/**
* Returns TRUE if this object equals rhs.
*/
UBool equals(const SciFormatterOptions &rhs) const {
return (fMantissa.equals(rhs.fMantissa) &&
fExponent.equals(rhs.fExponent));
}
/**
* Options for formatting the mantissa.
*/
DigitFormatterOptions fMantissa;
/**
* Options for formatting the exponent.
*/
DigitFormatterIntOptions fExponent;
};
/**
* Does fixed point formatting.
*
* This class only does fixed point formatting. It does no rounding before
* formatting.
*/
class U_I18N_API DigitFormatter : public UMemory {
public:
/**
* Decimal separator is period (.), Plus sign is plus (+),
* minus sign is minus (-), grouping separator is comma (,), digits are 0-9.
*/
DigitFormatter();
/**
* Let symbols determine the digits, decimal separator,
* plus and mius sign, grouping separator, and possibly other settings.
*/
DigitFormatter(const DecimalFormatSymbols &symbols);
/**
* Change what this instance uses for digits, decimal separator,
* plus and mius sign, grouping separator, and possibly other settings
* according to symbols.
*/
void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
/**
* Change what this instance uses for digits, decimal separator,
* plus and mius sign, grouping separator, and possibly other settings
* according to symbols in the context of monetary amounts.
*/
void setDecimalFormatSymbolsForMonetary(const DecimalFormatSymbols &symbols);
/**
* Fixed point formatting.
*
* @param positiveDigits the value to format
* Negative sign can be present, but it won't show.
* @param grouping controls how digit grouping is done
* @param options formatting options
* @param handler records field positions
* @param appendTo formatted value appended here.
* @return appendTo
*/
UnicodeString &format(
const VisibleDigits &positiveDigits,
const DigitGrouping &grouping,
const DigitFormatterOptions &options,
FieldPositionHandler &handler,
UnicodeString &appendTo) const;
/**
* formats in scientifc notation.
* @param positiveDigits the value to format.
* Negative sign can be present, but it won't show.
* @param options formatting options
* @param handler records field positions.
* @param appendTo formatted value appended here.
*/
UnicodeString &format(
const VisibleDigitsWithExponent &positiveDigits,
const SciFormatterOptions &options,
FieldPositionHandler &handler,
UnicodeString &appendTo) const;
/**
* Fixed point formatting of integers.
* Always performed with no grouping and no decimal point.
*
* @param positiveValue the value to format must be positive.
* @param range specifies minimum and maximum number of digits.
* @param handler records field positions
* @param appendTo formatted value appended here.
* @return appendTo
*/
UnicodeString &formatPositiveInt32(
int32_t positiveValue,
const IntDigitCountRange &range,
FieldPositionHandler &handler,
UnicodeString &appendTo) const;
/**
* Counts how many code points are needed for fixed formatting.
* If digits is negative, the negative sign is not included in the count.
*/
int32_t countChar32(
const VisibleDigits &digits,
const DigitGrouping &grouping,
const DigitFormatterOptions &options) const;
/**
* Counts how many code points are needed for scientific formatting.
* If digits is negative, the negative sign is not included in the count.
*/
int32_t countChar32(
const VisibleDigitsWithExponent &digits,
const SciFormatterOptions &options) const;
/**
* Returns TRUE if this object equals rhs.
*/
UBool equals(const DigitFormatter &rhs) const;
private:
UChar32 fLocalizedDigits[10];
UnicodeString fGroupingSeparator;
UnicodeString fDecimal;
UnicodeString fNegativeSign;
UnicodeString fPositiveSign;
DigitAffix fInfinity;
DigitAffix fNan;
UBool fIsStandardDigits;
UnicodeString fExponent;
UBool isStandardDigits() const;
UnicodeString &formatDigits(
const uint8_t *digits,
int32_t count,
const IntDigitCountRange &range,
int32_t intField,
FieldPositionHandler &handler,
UnicodeString &appendTo) const;
void setOtherDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
int32_t countChar32(
const VisibleDigits &exponent,
const DigitInterval &mantissaInterval,
const SciFormatterOptions &options) const;
UnicodeString &formatNaN(
FieldPositionHandler &handler,
UnicodeString &appendTo) const {
return fNan.format(handler, appendTo);
}
int32_t countChar32ForNaN() const {
return fNan.toString().countChar32();
}
UnicodeString &formatInfinity(
FieldPositionHandler &handler,
UnicodeString &appendTo) const {
return fInfinity.format(handler, appendTo);
}
int32_t countChar32ForInfinity() const {
return fInfinity.toString().countChar32();
}
UnicodeString &formatExponent(
const VisibleDigits &digits,
const DigitFormatterIntOptions &options,
int32_t signField,
int32_t intField,
FieldPositionHandler &handler,
UnicodeString &appendTo) const;
int32_t countChar32(
const DigitGrouping &grouping,
const DigitInterval &interval,
const DigitFormatterOptions &options) const;
int32_t countChar32ForExponent(
const VisibleDigits &exponent,
const DigitFormatterIntOptions &options) const;
};
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif // __DIGITFORMATTER_H__

View File

@ -1,58 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: digitgrouping.cpp
*/
#include "unicode/utypes.h"
#include "digitgrouping.h"
#include "smallintformatter.h"
U_NAMESPACE_BEGIN
UBool DigitGrouping::isSeparatorAt(
int32_t digitsLeftOfDecimal, int32_t digitPos) const {
if (!isGroupingEnabled(digitsLeftOfDecimal) || digitPos < fGrouping) {
return FALSE;
}
return ((digitPos - fGrouping) % getGrouping2() == 0);
}
int32_t DigitGrouping::getSeparatorCount(int32_t digitsLeftOfDecimal) const {
if (!isGroupingEnabled(digitsLeftOfDecimal)) {
return 0;
}
return (digitsLeftOfDecimal - 1 - fGrouping) / getGrouping2() + 1;
}
UBool DigitGrouping::isGroupingEnabled(int32_t digitsLeftOfDecimal) const {
return (isGroupingUsed()
&& digitsLeftOfDecimal >= fGrouping + getMinGrouping());
}
UBool DigitGrouping::isNoGrouping(
int32_t positiveValue, const IntDigitCountRange &range) const {
return getSeparatorCount(
SmallIntFormatter::estimateDigitCount(positiveValue, range)) == 0;
}
int32_t DigitGrouping::getGrouping2() const {
return (fGrouping2 > 0 ? fGrouping2 : fGrouping);
}
int32_t DigitGrouping::getMinGrouping() const {
return (fMinGrouping > 0 ? fMinGrouping : 1);
}
void
DigitGrouping::clear() {
fMinGrouping = 0;
fGrouping = 0;
fGrouping2 = 0;
}
U_NAMESPACE_END

View File

@ -1,112 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* digitgrouping.h
*
* created on: 2015jan6
* created by: Travis Keep
*/
#ifndef __DIGITGROUPING_H__
#define __DIGITGROUPING_H__
#include "unicode/uobject.h"
#include "unicode/utypes.h"
U_NAMESPACE_BEGIN
class IntDigitCountRange;
/**
* The digit grouping policy.
*/
class U_I18N_API DigitGrouping : public UMemory {
public:
/**
* Default is no digit grouping.
*/
DigitGrouping() : fGrouping(0), fGrouping2(0), fMinGrouping(0) { }
/**
* Returns TRUE if this object is equal to rhs.
*/
UBool equals(const DigitGrouping &rhs) const {
return ((fGrouping == rhs.fGrouping) &&
(fGrouping2 == rhs.fGrouping2) &&
(fMinGrouping == rhs.fMinGrouping));
}
/**
* Returns true if a separator is needed after a particular digit.
* @param digitsLeftOfDecimal the total count of digits left of the
* decimal.
* @param digitPos 0 is the one's place; 1 is the 10's place; -1 is the
* 1/10's place etc.
*/
UBool isSeparatorAt(int32_t digitsLeftOfDecimal, int32_t digitPos) const;
/**
* Returns the total number of separators to be used to format a particular
* number.
* @param digitsLeftOfDecimal the total number of digits to the left of
* the decimal.
*/
int32_t getSeparatorCount(int32_t digitsLeftOfDecimal) const;
/**
* Returns true if grouping is used FALSE otherwise. When
* isGroupingUsed() returns FALSE; isSeparatorAt always returns FALSE
* and getSeparatorCount always returns 0.
*/
UBool isGroupingUsed() const { return fGrouping > 0; }
/**
* Returns TRUE if this instance would not add grouping separators
* when formatting value using the given constraint on digit count.
*
* @param value the value to format.
* @param range the minimum and maximum digits for formatting value.
*/
UBool isNoGrouping(
int32_t positiveValue, const IntDigitCountRange &range) const;
/**
* Clears this instance so that digit grouping is not in effect.
*/
void clear();
public:
/**
* Primary grouping size. A value of 0, the default, or a negative
* number causes isGroupingUsed() to return FALSE.
*/
int32_t fGrouping;
/**
* Secondary grouping size. If > 0, this size is used instead of
* 'fGrouping' for all but the group just to the left of the decimal
* point. The default value of 0, or a negative value indicates that
* there is no secondary grouping size.
*/
int32_t fGrouping2;
/**
* If set (that is > 0), uses no grouping separators if fewer than
* (fGrouping + fMinGrouping) digits appear left of the decimal place.
* The default value for this field is 0.
*/
int32_t fMinGrouping;
private:
UBool isGroupingEnabled(int32_t digitsLeftOfDecimal) const;
int32_t getGrouping2() const;
int32_t getMinGrouping() const;
};
U_NAMESPACE_END
#endif // __DIGITGROUPING_H__

View File

@ -1,55 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
* file name: digitinterval.cpp
*/
#include "unicode/utypes.h"
#include "digitinterval.h"
U_NAMESPACE_BEGIN
void DigitInterval::expandToContain(const DigitInterval &rhs) {
if (fSmallestInclusive > rhs.fSmallestInclusive) {
fSmallestInclusive = rhs.fSmallestInclusive;
}
if (fLargestExclusive < rhs.fLargestExclusive) {
fLargestExclusive = rhs.fLargestExclusive;
}
}
void DigitInterval::shrinkToFitWithin(const DigitInterval &rhs) {
if (fSmallestInclusive < rhs.fSmallestInclusive) {
fSmallestInclusive = rhs.fSmallestInclusive;
}
if (fLargestExclusive > rhs.fLargestExclusive) {
fLargestExclusive = rhs.fLargestExclusive;
}
}
void DigitInterval::setIntDigitCount(int32_t count) {
fLargestExclusive = count < 0 ? INT32_MAX : count;
}
void DigitInterval::setFracDigitCount(int32_t count) {
fSmallestInclusive = count < 0 ? INT32_MIN : -count;
}
void DigitInterval::expandToContainDigit(int32_t digitExponent) {
if (fLargestExclusive <= digitExponent) {
fLargestExclusive = digitExponent + 1;
} else if (fSmallestInclusive > digitExponent) {
fSmallestInclusive = digitExponent;
}
}
UBool DigitInterval::contains(int32_t x) const {
return (x < fLargestExclusive && x >= fSmallestInclusive);
}
U_NAMESPACE_END

View File

@ -1,159 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* digitinterval.h
*
* created on: 2015jan6
* created by: Travis Keep
*/
#ifndef __DIGITINTERVAL_H__
#define __DIGITINTERVAL_H__
#include "unicode/uobject.h"
#include "unicode/utypes.h"
U_NAMESPACE_BEGIN
/**
* An interval of digits.
* DigitIntervals are for fixed point formatting. A DigitInterval specifies
* zero or more integer digits and zero or more fractional digits. This class
* specifies particular digits in a number by their power of 10. For example,
* the digit position just to the left of the decimal is 0, and the digit
* position just left of that is 1. The digit position just to the right of
* the decimal is -1. The digit position just to the right of that is -2.
*/
class U_I18N_API DigitInterval : public UMemory {
public:
/**
* Spans all integer and fraction digits
*/
DigitInterval()
: fLargestExclusive(INT32_MAX), fSmallestInclusive(INT32_MIN) { }
/**
* Makes this instance span all digits.
*/
void clear() {
fLargestExclusive = INT32_MAX;
fSmallestInclusive = INT32_MIN;
}
/**
* Returns TRUE if this interval contains this digit position.
*/
UBool contains(int32_t digitPosition) const;
/**
* Returns true if this object is the same as rhs.
*/
UBool equals(const DigitInterval &rhs) const {
return ((fLargestExclusive == rhs.fLargestExclusive) &&
(fSmallestInclusive == rhs.fSmallestInclusive));
}
/**
* Expand this interval so that it contains all of rhs.
*/
void expandToContain(const DigitInterval &rhs);
/**
* Shrink this interval so that it contains no more than rhs.
*/
void shrinkToFitWithin(const DigitInterval &rhs);
/**
* Expand this interval as necessary to contain digit with given exponent
* After this method returns, this interval is guaranteed to contain
* digitExponent.
*/
void expandToContainDigit(int32_t digitExponent);
/**
* Changes the number of digits to the left of the decimal point that
* this interval spans. If count is negative, it means span all digits
* to the left of the decimal point.
*/
void setIntDigitCount(int32_t count);
/**
* Changes the number of digits to the right of the decimal point that
* this interval spans. If count is negative, it means span all digits
* to the right of the decimal point.
*/
void setFracDigitCount(int32_t count);
/**
* Sets the least significant inclusive value to smallest. If smallest >= 0
* then least significant inclusive value becomes 0.
*/
void setLeastSignificantInclusive(int32_t smallest) {
fSmallestInclusive = smallest < 0 ? smallest : 0;
}
/**
* Sets the most significant exclusive value to largest.
* If largest <= 0 then most significant exclusive value becomes 0.
*/
void setMostSignificantExclusive(int32_t largest) {
fLargestExclusive = largest > 0 ? largest : 0;
}
/**
* If returns 8, the most significant digit in interval is the 10^7 digit.
* Returns INT32_MAX if this interval spans all digits to left of
* decimal point.
*/
int32_t getMostSignificantExclusive() const {
return fLargestExclusive;
}
/**
* Returns number of digits to the left of the decimal that this
* interval includes. This is a synonym for getMostSignificantExclusive().
*/
int32_t getIntDigitCount() const {
return fLargestExclusive;
}
/**
* Returns number of digits to the right of the decimal that this
* interval includes.
*/
int32_t getFracDigitCount() const {
return fSmallestInclusive == INT32_MIN ? INT32_MAX : -fSmallestInclusive;
}
/**
* Returns the total number of digits that this interval spans.
* Caution: If this interval spans all digits to the left or right of
* decimal point instead of some fixed number, then what length()
* returns is undefined.
*/
int32_t length() const {
return fLargestExclusive - fSmallestInclusive;
}
/**
* If returns -3, the least significant digit in interval is the 10^-3
* digit. Returns INT32_MIN if this interval spans all digits to right of
* decimal point.
*/
int32_t getLeastSignificantInclusive() const {
return fSmallestInclusive;
}
private:
int32_t fLargestExclusive;
int32_t fSmallestInclusive;
};
U_NAMESPACE_END
#endif // __DIGITINTERVAL_H__

File diff suppressed because it is too large Load Diff

View File

@ -1,529 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1997-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File DIGITLST.H
*
* Modification History:
*
* Date Name Description
* 02/25/97 aliu Converted from java.
* 03/21/97 clhuang Updated per C++ implementation.
* 04/15/97 aliu Changed MAX_COUNT to DBL_DIG. Changed Digit to char.
* 09/09/97 aliu Adapted for exponential notation support.
* 08/02/98 stephen Added nearest/even rounding
* 06/29/99 stephen Made LONG_DIGITS a macro to satisfy SUN compiler
* 07/09/99 stephen Removed kMaxCount (unused, for HP compiler)
******************************************************************************
*/
#ifndef DIGITLST_H
#define DIGITLST_H
#include "unicode/uobject.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/decimfmt.h"
#include <float.h>
#include "decContext.h"
#include "decNumber.h"
#include "cmemory.h"
// Decimal digits in a 64-bit int
#define INT64_DIGITS 19
typedef enum EDigitListValues {
MAX_DBL_DIGITS = DBL_DIG,
MAX_I64_DIGITS = INT64_DIGITS,
MAX_DIGITS = MAX_I64_DIGITS,
MAX_EXPONENT = DBL_DIG,
DIGIT_PADDING = 3,
DEFAULT_DIGITS = 40, // Initial storage size, will grow as needed.
// "+." + fDigits + "e" + fDecimalAt
MAX_DEC_DIGITS = MAX_DIGITS + DIGIT_PADDING + MAX_EXPONENT
} EDigitListValues;
U_NAMESPACE_BEGIN
class CharString;
class DigitInterval;
// Export an explicit template instantiation of the MaybeStackHeaderAndArray that
// is used as a data member of DigitList.
//
// MSVC requires this, even though it should not be necessary.
// No direct access to the MaybeStackHeaderAndArray leaks out of the i18n library.
//
// Macintosh produces duplicate definition linker errors with the explicit template
// instantiation.
//
#if !U_PLATFORM_IS_DARWIN_BASED
template class U_I18N_API MaybeStackHeaderAndArray<decNumber, char, DEFAULT_DIGITS>;
#endif
enum EStackMode { kOnStack };
enum EFastpathBits { kFastpathOk = 1, kNoDecimal = 2 };
/**
* Digit List is actually a Decimal Floating Point number.
* The original implementation has been replaced by a thin wrapper onto a
* decimal number from the decNumber library.
*
* The original DigitList API has been retained, to minimize the impact of
* the change on the rest of the ICU formatting code.
*
* The change to decNumber enables support for big decimal numbers, and
* allows rounding computations to be done directly in decimal, avoiding
* extra, and inaccurate, conversions to and from doubles.
*
* Original DigitList comments:
*
* Digit List utility class. Private to DecimalFormat. Handles the transcoding
* between numeric values and strings of characters. Only handles
* non-negative numbers. The division of labor between DigitList and
* DecimalFormat is that DigitList handles the radix 10 representation
* issues; DecimalFormat handles the locale-specific issues such as
* positive/negative, grouping, decimal point, currency, and so on.
* <P>
* A DigitList is really a representation of a floating point value.
* It may be an integer value; we assume that a double has sufficient
* precision to represent all digits of a long.
* <P>
* The DigitList representation consists of a string of characters,
* which are the digits radix 10, from '0' to '9'. It also has a radix
* 10 exponent associated with it. The value represented by a DigitList
* object can be computed by mulitplying the fraction f, where 0 <= f < 1,
* derived by placing all the digits of the list to the right of the
* decimal point, by 10^exponent.
*
* --------
*
* DigitList vs. decimalNumber:
*
* DigitList stores digits with the most significant first.
* decNumber stores digits with the least significant first.
*
* DigitList, decimal point is before the most significant.
* decNumber, decimal point is after the least signficant digit.
*
* digitList: 0.ddddd * 10 ^ exp
* decNumber: ddddd. * 10 ^ exp
*
* digitList exponent = decNumber exponent + digit count
*
* digitList, digits are platform invariant chars, '0' - '9'
* decNumber, digits are binary, one per byte, 0 - 9.
*
* (decNumber library is configurable in how digits are stored, ICU has configured
* it this way for convenience in replacing the old DigitList implementation.)
*/
class U_I18N_API DigitList : public UMemory { // Declare external to make compiler happy
public:
DigitList();
~DigitList();
/* copy constructor
* @param DigitList The object to be copied.
* @return the newly created object.
*/
DigitList(const DigitList&); // copy constructor
/* assignment operator
* @param DigitList The object to be copied.
* @return the newly created object.
*/
DigitList& operator=(const DigitList&); // assignment operator
/**
* Return true if another object is semantically equal to this one.
* @param other The DigitList to be compared for equality
* @return true if another object is semantically equal to this one.
* return false otherwise.
*/
UBool operator==(const DigitList& other) const;
int32_t compare(const DigitList& other);
inline UBool operator!=(const DigitList& other) const { return !operator==(other); }
/**
* Clears out the digits.
* Use before appending them.
* Typically, you set a series of digits with append, then at the point
* you hit the decimal point, you set myDigitList.fDecimalAt = myDigitList.fCount;
* then go on appending digits.
*/
void clear(void);
/**
* Remove, by rounding, any fractional part of the decimal number,
* leaving an integer value.
*/
void toIntegralValue();
/**
* Appends digits to the list.
* CAUTION: this function is not recommended for new code.
* In the original DigitList implementation, decimal numbers were
* parsed by appending them to a digit list as they were encountered.
* With the revamped DigitList based on decNumber, append is very
* inefficient, and the interaction with the exponent value is confusing.
* Best avoided.
* TODO: remove this function once all use has been replaced.
* TODO: describe alternative to append()
* @param digit The digit to be appended.
*/
void append(char digit);
/**
* Utility routine to get the value of the digit list
* Returns 0.0 if zero length.
* @return the value of the digit list.
*/
double getDouble(void) const;
/**
* Utility routine to get the value of the digit list
* Make sure that fitsIntoLong() is called before calling this function.
* Returns 0 if zero length.
* @return the value of the digit list, return 0 if it is zero length
*/
int32_t getLong(void) /*const*/;
/**
* Utility routine to get the value of the digit list
* Make sure that fitsIntoInt64() is called before calling this function.
* Returns 0 if zero length.
* @return the value of the digit list, return 0 if it is zero length
*/
int64_t getInt64(void) /*const*/;
/**
* Utility routine to get the value of the digit list as a decimal string.
*/
void getDecimal(CharString &str, UErrorCode &status);
/**
* Return true if the number represented by this object can fit into
* a long.
* @param ignoreNegativeZero True if negative zero is ignored.
* @return true if the number represented by this object can fit into
* a long, return false otherwise.
*/
UBool fitsIntoLong(UBool ignoreNegativeZero) /*const*/;
/**
* Return true if the number represented by this object can fit into
* an int64_t.
* @param ignoreNegativeZero True if negative zero is ignored.
* @return true if the number represented by this object can fit into
* a long, return false otherwise.
*/
UBool fitsIntoInt64(UBool ignoreNegativeZero) /*const*/;
/**
* Utility routine to set the value of the digit list from a double.
* @param source The value to be set
*/
void set(double source);
/**
* Utility routine to set the value of the digit list from a long.
* If a non-zero maximumDigits is specified, no more than that number of
* significant digits will be produced.
* @param source The value to be set
*/
void set(int32_t source);
/**
* Utility routine to set the value of the digit list from an int64.
* If a non-zero maximumDigits is specified, no more than that number of
* significant digits will be produced.
* @param source The value to be set
*/
void set(int64_t source);
/**
* Utility routine to set the value of the digit list from an int64.
* Does not set the decnumber unless requested later
* If a non-zero maximumDigits is specified, no more than that number of
* significant digits will be produced.
* @param source The value to be set
*/
void setInteger(int64_t source);
/**
* Utility routine to set the value of the digit list from a decimal number
* string.
* @param source The value to be set. The string must be nul-terminated.
* @param fastpathBits special flags for fast parsing
*/
void set(StringPiece source, UErrorCode &status, uint32_t fastpathBits = 0);
/**
* Multiply this = this * arg
* This digitlist will be expanded if necessary to accomodate the result.
* @param arg the number to multiply by.
*/
void mult(const DigitList &arg, UErrorCode &status);
/**
* Divide this = this / arg
*/
void div(const DigitList &arg, UErrorCode &status);
// The following functions replace direct access to the original DigitList implmentation
// data structures.
void setRoundingMode(DecimalFormat::ERoundingMode m);
/** Test a number for zero.
* @return TRUE if the number is zero
*/
UBool isZero(void) const;
/** Test for a Nan
* @return TRUE if the number is a NaN
*/
UBool isNaN(void) const {return decNumberIsNaN(fDecNumber);}
UBool isInfinite() const {return decNumberIsInfinite(fDecNumber);}
/** Reduce, or normalize. Removes trailing zeroes, adjusts exponent appropriately. */
void reduce();
/** Remove trailing fraction zeros, adjust exponent accordingly. */
void trim();
/** Set to zero */
void setToZero() {uprv_decNumberZero(fDecNumber);}
/** get the number of digits in the decimal number */
int32_t digits() const {return fDecNumber->digits;}
/**
* Round the number to the given number of digits.
* @param maximumDigits The maximum number of digits to be shown.
* Upon return, count will be less than or equal to maximumDigits.
* result is guaranteed to be trimmed.
*/
void round(int32_t maximumDigits);
void roundFixedPoint(int32_t maximumFractionDigits);
/** Ensure capacity for digits. Grow the storage if it is currently less than
* the requested size. Capacity is not reduced if it is already greater
* than requested.
*/
void ensureCapacity(int32_t requestedSize, UErrorCode &status);
UBool isPositive(void) const { return decNumberIsNegative(fDecNumber) == 0;}
void setPositive(UBool s);
void setDecimalAt(int32_t d);
int32_t getDecimalAt();
void setCount(int32_t c);
int32_t getCount() const;
/**
* Set the digit in platform (invariant) format, from '0'..'9'
* @param i index of digit
* @param v digit value, from '0' to '9' in platform invariant format
*/
void setDigit(int32_t i, char v);
/**
* Get the digit in platform (invariant) format, from '0'..'9' inclusive
* @param i index of digit
* @return invariant format of the digit
*/
char getDigit(int32_t i);
/**
* Get the digit's value, as an integer from 0..9 inclusive.
* Note that internally this value is a decNumberUnit, but ICU configures it to be a uint8_t.
* @param i index of digit
* @return value of that digit
*/
uint8_t getDigitValue(int32_t i);
/**
* Gets the upper bound exponent for this value. For 987, returns 3
* because 10^3 is the smallest power of 10 that is just greater than
* 987.
*/
int32_t getUpperExponent() const;
/**
* Gets the lower bound exponent for this value. For 98.7, returns -1
* because the right most digit, is the 10^-1 place.
*/
int32_t getLowerExponent() const { return fDecNumber->exponent; }
/**
* Sets result to the smallest DigitInterval needed to display this
* DigitList in fixed point form and returns result.
*/
DigitInterval& getSmallestInterval(DigitInterval &result) const;
/**
* Like getDigitValue, but the digit is identified by exponent.
* For example, getDigitByExponent(7) returns the 10^7 place of this
* DigitList. Unlike getDigitValue, there are no upper or lower bounds
* for passed parameter. Instead, getDigitByExponent returns 0 if
* the exponent falls outside the interval for this DigitList.
*/
uint8_t getDigitByExponent(int32_t exponent) const;
/**
* Appends the digits in this object to a CharString.
* 3 is appended as (char) 3, not '3'
*/
void appendDigitsTo(CharString &str, UErrorCode &status) const;
/**
* Equivalent to roundFixedPoint(-digitExponent) except unlike
* roundFixedPoint, this works for any digitExponent value.
* If maxSigDigits is set then this instance is rounded to have no more
* than maxSigDigits. The end result is guaranteed to be trimmed.
*/
void roundAtExponent(int32_t digitExponent, int32_t maxSigDigits=INT32_MAX);
/**
* Quantizes according to some amount and rounds according to the
* context of this instance. Quantizing 3.233 with 0.05 gives 3.25.
*/
void quantize(const DigitList &amount, UErrorCode &status);
/**
* Like toScientific but only returns the exponent
* leaving this instance unchanged.
*/
int32_t getScientificExponent(
int32_t minIntDigitCount, int32_t exponentMultiplier) const;
/**
* Converts this instance to scientific notation. This instance
* becomes the mantissa and the exponent is returned.
* @param minIntDigitCount minimum integer digits in mantissa
* Exponent is set so that the actual number of integer digits
* in mantissa is as close to the minimum as possible.
* @param exponentMultiplier The exponent is always a multiple of
* This number. Usually 1, but set to 3 for engineering notation.
* @return exponent
*/
int32_t toScientific(
int32_t minIntDigitCount, int32_t exponentMultiplier);
/**
* Shifts decimal to the right.
*/
void shiftDecimalRight(int32_t numPlaces);
private:
/*
* These data members are intentionally public and can be set directly.
*<P>
* The value represented is given by placing the decimal point before
* fDigits[fDecimalAt]. If fDecimalAt is < 0, then leading zeros between
* the decimal point and the first nonzero digit are implied. If fDecimalAt
* is > fCount, then trailing zeros between the fDigits[fCount-1] and the
* decimal point are implied.
* <P>
* Equivalently, the represented value is given by f * 10^fDecimalAt. Here
* f is a value 0.1 <= f < 1 arrived at by placing the digits in fDigits to
* the right of the decimal.
* <P>
* DigitList is normalized, so if it is non-zero, fDigits[0] is non-zero. We
* don't allow denormalized numbers because our exponent is effectively of
* unlimited magnitude. The fCount value contains the number of significant
* digits present in fDigits[].
* <P>
* Zero is represented by any DigitList with fCount == 0 or with each fDigits[i]
* for all i <= fCount == '0'.
*
* int32_t fDecimalAt;
* int32_t fCount;
* UBool fIsPositive;
* char *fDigits;
* DecimalFormat::ERoundingMode fRoundingMode;
*/
public:
decContext fContext; // public access to status flags.
private:
decNumber *fDecNumber;
MaybeStackHeaderAndArray<decNumber, char, DEFAULT_DIGITS> fStorage;
/* Cached double value corresponding to this decimal number.
* This is an optimization for the formatting implementation, which may
* ask for the double value multiple times.
*/
union DoubleOrInt64 {
double fDouble;
int64_t fInt64;
} fUnion;
enum EHave {
kNone=0,
kDouble
} fHave;
UBool shouldRoundUp(int32_t maximumDigits) const;
public:
#if U_OVERRIDE_CXX_ALLOCATION
using UMemory::operator new;
using UMemory::operator delete;
#else
static inline void * U_EXPORT2 operator new(size_t size) U_NO_THROW { return ::operator new(size); };
static inline void U_EXPORT2 operator delete(void *ptr ) U_NO_THROW { ::operator delete(ptr); };
#endif
static double U_EXPORT2 decimalStrToDouble(char *decstr, char **end);
/**
* Placement new for stack usage
* @internal
*/
static inline void * U_EXPORT2 operator new(size_t /*size*/, void * onStack, EStackMode /*mode*/) U_NO_THROW { return onStack; }
/**
* Placement delete for stack usage
* @internal
*/
static inline void U_EXPORT2 operator delete(void * /*ptr*/, void * /*onStack*/, EStackMode /*mode*/) U_NO_THROW {}
private:
inline void internalSetDouble(double d) {
fHave = kDouble;
fUnion.fDouble=d;
}
inline void internalClear() {
fHave = kNone;
}
};
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_FORMATTING
#endif // _DIGITLST
//eof

View File

@ -0,0 +1,574 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
//
// From the double-conversion library. Original license:
//
// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include <stdarg.h>
#include <limits.h>
// ICU PATCH: Customize header file paths for ICU.
// The file fixed-dtoa.h is not needed.
#include "double-conversion-strtod.h"
#include "double-conversion-bignum.h"
#include "double-conversion-cached-powers.h"
#include "double-conversion-ieee.h"
// ICU PATCH: Wrap in ICU namespace
U_NAMESPACE_BEGIN
namespace double_conversion {
// 2^53 = 9007199254740992.
// Any integer with at most 15 decimal digits will hence fit into a double
// (which has a 53bit significand) without loss of precision.
static const int kMaxExactDoubleIntegerDecimalDigits = 15;
// 2^64 = 18446744073709551616 > 10^19
static const int kMaxUint64DecimalDigits = 19;
// Max double: 1.7976931348623157 x 10^308
// Min non-zero double: 4.9406564584124654 x 10^-324
// Any x >= 10^309 is interpreted as +infinity.
// Any x <= 10^-324 is interpreted as 0.
// Note that 2.5e-324 (despite being smaller than the min double) will be read
// as non-zero (equal to the min non-zero double).
static const int kMaxDecimalPower = 309;
static const int kMinDecimalPower = -324;
// 2^64 = 18446744073709551616
static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF);
static const double exact_powers_of_ten[] = {
1.0, // 10^0
10.0,
100.0,
1000.0,
10000.0,
100000.0,
1000000.0,
10000000.0,
100000000.0,
1000000000.0,
10000000000.0, // 10^10
100000000000.0,
1000000000000.0,
10000000000000.0,
100000000000000.0,
1000000000000000.0,
10000000000000000.0,
100000000000000000.0,
1000000000000000000.0,
10000000000000000000.0,
100000000000000000000.0, // 10^20
1000000000000000000000.0,
// 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
10000000000000000000000.0
};
static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten);
// Maximum number of significant digits in the decimal representation.
// In fact the value is 772 (see conversions.cc), but to give us some margin
// we round up to 780.
static const int kMaxSignificantDecimalDigits = 780;
static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
for (int i = 0; i < buffer.length(); i++) {
if (buffer[i] != '0') {
return buffer.SubVector(i, buffer.length());
}
}
return Vector<const char>(buffer.start(), 0);
}
static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
for (int i = buffer.length() - 1; i >= 0; --i) {
if (buffer[i] != '0') {
return buffer.SubVector(0, i + 1);
}
}
return Vector<const char>(buffer.start(), 0);
}
static void CutToMaxSignificantDigits(Vector<const char> buffer,
int exponent,
char* significant_buffer,
int* significant_exponent) {
for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) {
significant_buffer[i] = buffer[i];
}
// The input buffer has been trimmed. Therefore the last digit must be
// different from '0'.
ASSERT(buffer[buffer.length() - 1] != '0');
// Set the last digit to be non-zero. This is sufficient to guarantee
// correct rounding.
significant_buffer[kMaxSignificantDecimalDigits - 1] = '1';
*significant_exponent =
exponent + (buffer.length() - kMaxSignificantDecimalDigits);
}
// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
// If possible the input-buffer is reused, but if the buffer needs to be
// modified (due to cutting), then the input needs to be copied into the
// buffer_copy_space.
static void TrimAndCut(Vector<const char> buffer, int exponent,
char* buffer_copy_space, int space_size,
Vector<const char>* trimmed, int* updated_exponent) {
Vector<const char> left_trimmed = TrimLeadingZeros(buffer);
Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
exponent += left_trimmed.length() - right_trimmed.length();
if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
(void) space_size; // Mark variable as used.
ASSERT(space_size >= kMaxSignificantDecimalDigits);
CutToMaxSignificantDigits(right_trimmed, exponent,
buffer_copy_space, updated_exponent);
*trimmed = Vector<const char>(buffer_copy_space,
kMaxSignificantDecimalDigits);
} else {
*trimmed = right_trimmed;
*updated_exponent = exponent;
}
}
// Reads digits from the buffer and converts them to a uint64.
// Reads in as many digits as fit into a uint64.
// When the string starts with "1844674407370955161" no further digit is read.
// Since 2^64 = 18446744073709551616 it would still be possible read another
// digit if it was less or equal than 6, but this would complicate the code.
static uint64_t ReadUint64(Vector<const char> buffer,
int* number_of_read_digits) {
uint64_t result = 0;
int i = 0;
while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) {
int digit = buffer[i++] - '0';
ASSERT(0 <= digit && digit <= 9);
result = 10 * result + digit;
}
*number_of_read_digits = i;
return result;
}
// Reads a DiyFp from the buffer.
// The returned DiyFp is not necessarily normalized.
// If remaining_decimals is zero then the returned DiyFp is accurate.
// Otherwise it has been rounded and has error of at most 1/2 ulp.
static void ReadDiyFp(Vector<const char> buffer,
DiyFp* result,
int* remaining_decimals) {
int read_digits;
uint64_t significand = ReadUint64(buffer, &read_digits);
if (buffer.length() == read_digits) {
*result = DiyFp(significand, 0);
*remaining_decimals = 0;
} else {
// Round the significand.
if (buffer[read_digits] >= '5') {
significand++;
}
// Compute the binary exponent.
int exponent = 0;
*result = DiyFp(significand, exponent);
*remaining_decimals = buffer.length() - read_digits;
}
}
static bool DoubleStrtod(Vector<const char> trimmed,
int exponent,
double* result) {
#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
// On x86 the floating-point stack can be 64 or 80 bits wide. If it is
// 80 bits wide (as is the case on Linux) then double-rounding occurs and the
// result is not accurate.
// We know that Windows32 uses 64 bits and is therefore accurate.
// Note that the ARM simulator is compiled for 32bits. It therefore exhibits
// the same problem.
return false;
#endif
if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
int read_digits;
// The trimmed input fits into a double.
// If the 10^exponent (resp. 10^-exponent) fits into a double too then we
// can compute the result-double simply by multiplying (resp. dividing) the
// two numbers.
// This is possible because IEEE guarantees that floating-point operations
// return the best possible approximation.
if (exponent < 0 && -exponent < kExactPowersOfTenSize) {
// 10^-exponent fits into a double.
*result = static_cast<double>(ReadUint64(trimmed, &read_digits));
ASSERT(read_digits == trimmed.length());
*result /= exact_powers_of_ten[-exponent];
return true;
}
if (0 <= exponent && exponent < kExactPowersOfTenSize) {
// 10^exponent fits into a double.
*result = static_cast<double>(ReadUint64(trimmed, &read_digits));
ASSERT(read_digits == trimmed.length());
*result *= exact_powers_of_ten[exponent];
return true;
}
int remaining_digits =
kMaxExactDoubleIntegerDecimalDigits - trimmed.length();
if ((0 <= exponent) &&
(exponent - remaining_digits < kExactPowersOfTenSize)) {
// The trimmed string was short and we can multiply it with
// 10^remaining_digits. As a result the remaining exponent now fits
// into a double too.
*result = static_cast<double>(ReadUint64(trimmed, &read_digits));
ASSERT(read_digits == trimmed.length());
*result *= exact_powers_of_ten[remaining_digits];
*result *= exact_powers_of_ten[exponent - remaining_digits];
return true;
}
}
return false;
}
// Returns 10^exponent as an exact DiyFp.
// The given exponent must be in the range [1; kDecimalExponentDistance[.
static DiyFp AdjustmentPowerOfTen(int exponent) {
ASSERT(0 < exponent);
ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance);
// Simply hardcode the remaining powers for the given decimal exponent
// distance.
ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8);
switch (exponent) {
case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
default:
UNREACHABLE();
}
}
// If the function returns true then the result is the correct double.
// Otherwise it is either the correct double or the double that is just below
// the correct double.
static bool DiyFpStrtod(Vector<const char> buffer,
int exponent,
double* result) {
DiyFp input;
int remaining_decimals;
ReadDiyFp(buffer, &input, &remaining_decimals);
// Since we may have dropped some digits the input is not accurate.
// If remaining_decimals is different than 0 than the error is at most
// .5 ulp (unit in the last place).
// We don't want to deal with fractions and therefore keep a common
// denominator.
const int kDenominatorLog = 3;
const int kDenominator = 1 << kDenominatorLog;
// Move the remaining decimals into the exponent.
exponent += remaining_decimals;
uint64_t error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
int old_e = input.e();
input.Normalize();
error <<= old_e - input.e();
ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent);
if (exponent < PowersOfTenCache::kMinDecimalExponent) {
*result = 0.0;
return true;
}
DiyFp cached_power;
int cached_decimal_exponent;
PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent,
&cached_power,
&cached_decimal_exponent);
if (cached_decimal_exponent != exponent) {
int adjustment_exponent = exponent - cached_decimal_exponent;
DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent);
input.Multiply(adjustment_power);
if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) {
// The product of input with the adjustment power fits into a 64 bit
// integer.
ASSERT(DiyFp::kSignificandSize == 64);
} else {
// The adjustment power is exact. There is hence only an error of 0.5.
error += kDenominator / 2;
}
}
input.Multiply(cached_power);
// The error introduced by a multiplication of a*b equals
// error_a + error_b + error_a*error_b/2^64 + 0.5
// Substituting a with 'input' and b with 'cached_power' we have
// error_b = 0.5 (all cached powers have an error of less than 0.5 ulp),
// error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
int error_b = kDenominator / 2;
int error_ab = (error == 0 ? 0 : 1); // We round up to 1.
int fixed_error = kDenominator / 2;
error += error_b + error_ab + fixed_error;
old_e = input.e();
input.Normalize();
error <<= old_e - input.e();
// See if the double's significand changes if we add/subtract the error.
int order_of_magnitude = DiyFp::kSignificandSize + input.e();
int effective_significand_size =
Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude);
int precision_digits_count =
DiyFp::kSignificandSize - effective_significand_size;
if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) {
// This can only happen for very small denormals. In this case the
// half-way multiplied by the denominator exceeds the range of an uint64.
// Simply shift everything to the right.
int shift_amount = (precision_digits_count + kDenominatorLog) -
DiyFp::kSignificandSize + 1;
input.set_f(input.f() >> shift_amount);
input.set_e(input.e() + shift_amount);
// We add 1 for the lost precision of error, and kDenominator for
// the lost precision of input.f().
error = (error >> shift_amount) + 1 + kDenominator;
precision_digits_count -= shift_amount;
}
// We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
ASSERT(DiyFp::kSignificandSize == 64);
ASSERT(precision_digits_count < 64);
uint64_t one64 = 1;
uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
uint64_t precision_bits = input.f() & precision_bits_mask;
uint64_t half_way = one64 << (precision_digits_count - 1);
precision_bits *= kDenominator;
half_way *= kDenominator;
DiyFp rounded_input(input.f() >> precision_digits_count,
input.e() + precision_digits_count);
if (precision_bits >= half_way + error) {
rounded_input.set_f(rounded_input.f() + 1);
}
// If the last_bits are too close to the half-way case than we are too
// inaccurate and round down. In this case we return false so that we can
// fall back to a more precise algorithm.
*result = Double(rounded_input).value();
if (half_way - error < precision_bits && precision_bits < half_way + error) {
// Too imprecise. The caller will have to fall back to a slower version.
// However the returned number is guaranteed to be either the correct
// double, or the next-lower double.
return false;
} else {
return true;
}
}
// Returns
// - -1 if buffer*10^exponent < diy_fp.
// - 0 if buffer*10^exponent == diy_fp.
// - +1 if buffer*10^exponent > diy_fp.
// Preconditions:
// buffer.length() + exponent <= kMaxDecimalPower + 1
// buffer.length() + exponent > kMinDecimalPower
// buffer.length() <= kMaxDecimalSignificantDigits
static int CompareBufferWithDiyFp(Vector<const char> buffer,
int exponent,
DiyFp diy_fp) {
ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1);
ASSERT(buffer.length() + exponent > kMinDecimalPower);
ASSERT(buffer.length() <= kMaxSignificantDecimalDigits);
// Make sure that the Bignum will be able to hold all our numbers.
// Our Bignum implementation has a separate field for exponents. Shifts will
// consume at most one bigit (< 64 bits).
// ln(10) == 3.3219...
ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits);
Bignum buffer_bignum;
Bignum diy_fp_bignum;
buffer_bignum.AssignDecimalString(buffer);
diy_fp_bignum.AssignUInt64(diy_fp.f());
if (exponent >= 0) {
buffer_bignum.MultiplyByPowerOfTen(exponent);
} else {
diy_fp_bignum.MultiplyByPowerOfTen(-exponent);
}
if (diy_fp.e() > 0) {
diy_fp_bignum.ShiftLeft(diy_fp.e());
} else {
buffer_bignum.ShiftLeft(-diy_fp.e());
}
return Bignum::Compare(buffer_bignum, diy_fp_bignum);
}
// Returns true if the guess is the correct double.
// Returns false, when guess is either correct or the next-lower double.
static bool ComputeGuess(Vector<const char> trimmed, int exponent,
double* guess) {
if (trimmed.length() == 0) {
*guess = 0.0;
return true;
}
if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) {
*guess = Double::Infinity();
return true;
}
if (exponent + trimmed.length() <= kMinDecimalPower) {
*guess = 0.0;
return true;
}
if (DoubleStrtod(trimmed, exponent, guess) ||
DiyFpStrtod(trimmed, exponent, guess)) {
return true;
}
if (*guess == Double::Infinity()) {
return true;
}
return false;
}
double Strtod(Vector<const char> buffer, int exponent) {
char copy_buffer[kMaxSignificantDecimalDigits];
Vector<const char> trimmed;
int updated_exponent;
TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
&trimmed, &updated_exponent);
exponent = updated_exponent;
double guess;
bool is_correct = ComputeGuess(trimmed, exponent, &guess);
if (is_correct) return guess;
DiyFp upper_boundary = Double(guess).UpperBoundary();
int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
if (comparison < 0) {
return guess;
} else if (comparison > 0) {
return Double(guess).NextDouble();
} else if ((Double(guess).Significand() & 1) == 0) {
// Round towards even.
return guess;
} else {
return Double(guess).NextDouble();
}
}
float Strtof(Vector<const char> buffer, int exponent) {
char copy_buffer[kMaxSignificantDecimalDigits];
Vector<const char> trimmed;
int updated_exponent;
TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
&trimmed, &updated_exponent);
exponent = updated_exponent;
double double_guess;
bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
float float_guess = static_cast<float>(double_guess);
if (float_guess == double_guess) {
// This shortcut triggers for integer values.
return float_guess;
}
// We must catch double-rounding. Say the double has been rounded up, and is
// now a boundary of a float, and rounds up again. This is why we have to
// look at previous too.
// Example (in decimal numbers):
// input: 12349
// high-precision (4 digits): 1235
// low-precision (3 digits):
// when read from input: 123
// when rounded from high precision: 124.
// To do this we simply look at the neigbors of the correct result and see
// if they would round to the same float. If the guess is not correct we have
// to look at four values (since two different doubles could be the correct
// double).
double double_next = Double(double_guess).NextDouble();
double double_previous = Double(double_guess).PreviousDouble();
float f1 = static_cast<float>(double_previous);
float f2 = float_guess;
float f3 = static_cast<float>(double_next);
float f4;
if (is_correct) {
f4 = f3;
} else {
double double_next2 = Double(double_next).NextDouble();
f4 = static_cast<float>(double_next2);
}
(void) f2; // Mark variable as used.
ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
// If the guess doesn't lie near a single-precision boundary we can simply
// return its float-value.
if (f1 == f4) {
return float_guess;
}
ASSERT((f1 != f2 && f2 == f3 && f3 == f4) ||
(f1 == f2 && f2 != f3 && f3 == f4) ||
(f1 == f2 && f2 == f3 && f3 != f4));
// guess and next are the two possible canditates (in the same way that
// double_guess was the lower candidate for a double-precision guess).
float guess = f1;
float next = f4;
DiyFp upper_boundary;
if (guess == 0.0f) {
float min_float = 1e-45f;
upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp();
} else {
upper_boundary = Single(guess).UpperBoundary();
}
int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
if (comparison < 0) {
return guess;
} else if (comparison > 0) {
return next;
} else if ((Single(guess).Significand() & 1) == 0) {
// Round towards even.
return guess;
} else {
return next;
}
}
} // namespace double_conversion
// ICU PATCH: Close ICU namespace
U_NAMESPACE_END
#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING

View File

@ -0,0 +1,63 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
//
// From the double-conversion library. Original license:
//
// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ICU PATCH: ifdef around UCONFIG_NO_FORMATTING
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#ifndef DOUBLE_CONVERSION_STRTOD_H_
#define DOUBLE_CONVERSION_STRTOD_H_
// ICU PATCH: Customize header file paths for ICU.
#include "double-conversion-utils.h"
// ICU PATCH: Wrap in ICU namespace
U_NAMESPACE_BEGIN
namespace double_conversion {
// The buffer must only contain digits in the range [0-9]. It must not
// contain a dot or a sign. It must not start with '0', and must not be empty.
double Strtod(Vector<const char> buffer, int exponent);
// The buffer must only contain digits in the range [0-9]. It must not
// contain a dot or a sign. It must not start with '0', and must not be empty.
float Strtof(Vector<const char> buffer, int exponent);
} // namespace double_conversion
// ICU PATCH: Close ICU namespace
U_NAMESPACE_END
#endif // DOUBLE_CONVERSION_STRTOD_H_
#endif // ICU PATCH: close #if !UCONFIG_NO_FORMATTING

View File

@ -75,9 +75,9 @@ inline void abort_noreturn() { abort(); }
// the output of the division with the expected result. (Inlining must be
// disabled.)
// On Linux,x86 89255e-22 != Div_double(89255.0/1e22)
// ICU PATCH: Enable ARM builds for Windows with 'defined(_M_ARM)'.
// ICU PATCH: Enable ARM32 & ARM64 builds for Windows with 'defined(_M_ARM) || defined(_M_ARM64)'.
#if defined(_M_X64) || defined(__x86_64__) || \
defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || \
defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \
defined(__hppa__) || defined(__ia64__) || \
defined(__mips__) || \
defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \

View File

@ -38,13 +38,14 @@
#include <math.h>
// ICU PATCH: Customize header file paths for ICU.
// The files fixed-dtoa.h and strtod.h are not needed.
// The file fixed-dtoa.h is not needed.
#include "double-conversion.h"
#include "double-conversion-bignum-dtoa.h"
#include "double-conversion-fast-dtoa.h"
#include "double-conversion-ieee.h"
#include "double-conversion-strtod.h"
#include "double-conversion-utils.h"
// ICU PATCH: Wrap in ICU namespace
@ -431,7 +432,6 @@ void DoubleToStringConverter::DoubleToAscii(double v,
}
#if 0 // not needed for ICU
// Consumes the given substring from the iterator.
// Returns false, if the substring does not match.
template <class Iterator>
@ -469,6 +469,7 @@ static const uc16 kWhitespaceTable16[] = {
static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16);
static bool isWhitespace(int x) {
if (x < 128) {
for (int i = 0; i < kWhitespaceTable7Length; i++) {
@ -647,7 +648,6 @@ static double RadixStringToIeee(Iterator* current,
return Double(DiyFp(number, exponent)).value();
}
template <class Iterator>
double StringToDoubleConverter::StringToIeee(
Iterator input,
@ -996,7 +996,6 @@ float StringToDoubleConverter::StringToFloat(
return static_cast<float>(StringToIeee(buffer, length, false,
processed_characters_count));
}
#endif // not needed for ICU
} // namespace double_conversion

View File

@ -391,6 +391,7 @@ class DoubleToStringConverter {
const int decimal_in_shortest_high_;
const int max_leading_padding_zeroes_in_precision_mode_;
const int max_trailing_padding_zeroes_in_precision_mode_;
#endif // not needed for ICU
DISALLOW_IMPLICIT_CONSTRUCTORS(DoubleToStringConverter);
};
@ -554,7 +555,6 @@ class StringToDoubleConverter {
int* processed_characters_count) const;
DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter);
#endif // not needed for ICU
};
} // namespace double_conversion

View File

@ -19,6 +19,7 @@
#if !UCONFIG_NO_FORMATTING
#include <cstdlib>
#include <math.h>
#include "unicode/fmtable.h"
#include "unicode/ustring.h"
@ -28,9 +29,8 @@
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "decNumber.h"
#include "digitlst.h"
#include "fmtableimp.h"
#include "number_decimalquantity.h"
// *****************************************************************************
// class Formattable
@ -40,6 +40,8 @@ U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Formattable)
using number::impl::DecimalQuantity;
//-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.
@ -103,7 +105,7 @@ void Formattable::init() {
fValue.fInt64 = 0;
fType = kLong;
fDecimalStr = NULL;
fDecimalNum = NULL;
fDecimalQuantity = NULL;
fBogus.setToBogus();
}
@ -257,8 +259,8 @@ Formattable::operator=(const Formattable& source)
}
UErrorCode status = U_ZERO_ERROR;
if (source.fDecimalNum != NULL) {
fDecimalNum = new DigitList(*source.fDecimalNum); // TODO: use internal digit list
if (source.fDecimalQuantity != NULL) {
fDecimalQuantity = new DecimalQuantity(*source.fDecimalQuantity);
}
if (source.fDecimalStr != NULL) {
fDecimalStr = new CharString(*source.fDecimalStr, status);
@ -357,13 +359,8 @@ void Formattable::dispose()
delete fDecimalStr;
fDecimalStr = NULL;
FmtStackData *stackData = (FmtStackData*)fStackData;
if(fDecimalNum != &(stackData->stackDecimalNum)) {
delete fDecimalNum;
} else {
fDecimalNum->~DigitList(); // destruct, don't deallocate
}
fDecimalNum = NULL;
delete fDecimalQuantity;
fDecimalQuantity = NULL;
}
Formattable *
@ -465,13 +462,13 @@ Formattable::getInt64(UErrorCode& status) const
} else if (fValue.fDouble < (double)U_INT64_MIN) {
status = U_INVALID_FORMAT_ERROR;
return U_INT64_MIN;
} else if (fabs(fValue.fDouble) > U_DOUBLE_MAX_EXACT_INT && fDecimalNum != NULL) {
int64_t val = fDecimalNum->getInt64();
if (val != 0) {
return val;
} else if (fabs(fValue.fDouble) > U_DOUBLE_MAX_EXACT_INT && fDecimalQuantity != NULL) {
if (fDecimalQuantity->fitsInLong(true)) {
return fDecimalQuantity->toLong();
} else {
// Unexpected
status = U_INVALID_FORMAT_ERROR;
return fValue.fDouble > 0 ? U_INT64_MAX : U_INT64_MIN;
return fDecimalQuantity->isNegative() ? U_INT64_MIN : U_INT64_MAX;
}
} else {
return (int64_t)fValue.fDouble;
@ -714,84 +711,85 @@ StringPiece Formattable::getDecimalNumber(UErrorCode &status) {
CharString *Formattable::internalGetCharString(UErrorCode &status) {
if(fDecimalStr == NULL) {
if (fDecimalNum == NULL) {
if (fDecimalQuantity == NULL) {
// No decimal number for the formattable yet. Which means the value was
// set directly by the user as an int, int64 or double. If the value came
// from parsing, or from the user setting a decimal number, fDecimalNum
// would already be set.
//
fDecimalNum = new DigitList; // TODO: use internal digit list
if (fDecimalNum == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
switch (fType) {
case kDouble:
fDecimalNum->set(this->getDouble());
break;
case kLong:
fDecimalNum->set(this->getLong());
break;
case kInt64:
fDecimalNum->set(this->getInt64());
break;
default:
// The formattable's value is not a numeric type.
status = U_INVALID_STATE_ERROR;
return NULL;
}
LocalPointer<DecimalQuantity> dq(new DecimalQuantity(), status);
if (U_FAILURE(status)) { return nullptr; }
populateDecimalQuantity(*dq, status);
if (U_FAILURE(status)) { return nullptr; }
fDecimalQuantity = dq.orphan();
}
fDecimalStr = new CharString;
fDecimalStr = new CharString();
if (fDecimalStr == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
fDecimalNum->getDecimal(*fDecimalStr, status);
// Older ICUs called uprv_decNumberToString here, which is not exactly the same as
// DecimalQuantity::toScientificString(). The biggest difference is that uprv_decNumberToString does
// not print scientific notation for magnitudes greater than -5 and smaller than some amount (+5?).
if (fDecimalQuantity->isZero()) {
fDecimalStr->append("0", -1, status);
} else if (std::abs(fDecimalQuantity->getMagnitude()) < 5) {
fDecimalStr->appendInvariantChars(fDecimalQuantity->toPlainString(), status);
} else {
fDecimalStr->appendInvariantChars(fDecimalQuantity->toScientificString(), status);
}
}
return fDecimalStr;
}
void
Formattable::populateDecimalQuantity(number::impl::DecimalQuantity& output, UErrorCode& status) const {
if (fDecimalQuantity != nullptr) {
output = *fDecimalQuantity;
return;
}
DigitList *
Formattable::getInternalDigitList() {
FmtStackData *stackData = (FmtStackData*)fStackData;
if(fDecimalNum != &(stackData->stackDecimalNum)) {
delete fDecimalNum;
fDecimalNum = new (&(stackData->stackDecimalNum), kOnStack) DigitList();
} else {
fDecimalNum->clear();
}
return fDecimalNum;
switch (fType) {
case kDouble:
output.setToDouble(this->getDouble());
output.roundToInfinity();
break;
case kLong:
output.setToInt(this->getLong());
break;
case kInt64:
output.setToLong(this->getInt64());
break;
default:
// The formattable's value is not a numeric type.
status = U_INVALID_STATE_ERROR;
}
}
// ---------------------------------------
void
Formattable::adoptDigitList(DigitList *dl) {
if(fDecimalNum==dl) {
fDecimalNum = NULL; // don't delete
}
dispose();
fDecimalNum = dl;
if(dl==NULL) { // allow adoptDigitList(NULL) to clear
return;
}
Formattable::adoptDecimalQuantity(DecimalQuantity *dq) {
if (fDecimalQuantity != NULL) {
delete fDecimalQuantity;
}
fDecimalQuantity = dq;
if (dq == NULL) { // allow adoptDigitList(NULL) to clear
return;
}
// Set the value into the Union of simple type values.
// Cannot use the set() functions because they would delete the fDecimalNum value,
if (fDecimalNum->fitsIntoLong(FALSE)) {
fType = kLong;
fValue.fInt64 = fDecimalNum->getLong();
} else if (fDecimalNum->fitsIntoInt64(FALSE)) {
fType = kInt64;
fValue.fInt64 = fDecimalNum->getInt64();
// Cannot use the set() functions because they would delete the fDecimalNum value.
if (fDecimalQuantity->fitsInLong()) {
fValue.fInt64 = fDecimalQuantity->toLong();
if (fValue.fInt64 <= INT32_MAX && fValue.fInt64 >= INT32_MIN) {
fType = kLong;
} else {
fType = kInt64;
}
} else {
fType = kDouble;
fValue.fDouble = fDecimalNum->getDouble();
fValue.fDouble = fDecimalQuantity->toDouble();
}
}
@ -804,24 +802,12 @@ Formattable::setDecimalNumber(StringPiece numberString, UErrorCode &status) {
}
dispose();
// Copy the input string and nul-terminate it.
// The decNumber library requires nul-terminated input. StringPiece input
// is not guaranteed nul-terminated. Too bad.
// CharString automatically adds the nul.
DigitList *dnum = new DigitList(); // TODO: use getInternalDigitList
if (dnum == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
dnum->set(CharString(numberString, status).toStringPiece(), status);
if (U_FAILURE(status)) {
delete dnum;
return; // String didn't contain a decimal number.
}
adoptDigitList(dnum);
auto* dq = new DecimalQuantity();
dq->setToDecNumber(numberString, status);
adoptDecimalQuantity(dq);
// Note that we do not hang on to the caller's input string.
// If we are asked for the string, we will regenerate one from fDecimalNum.
// If we are asked for the string, we will regenerate one from fDecimalQuantity.
}
#if 0

View File

@ -10,22 +10,12 @@
#ifndef FMTABLEIMP_H
#define FMTABLEIMP_H
#include "digitlst.h"
#include "number_decimalquantity.h"
#if !UCONFIG_NO_FORMATTING
U_NAMESPACE_BEGIN
/**
* @internal
*/
struct FmtStackData {
DigitList stackDecimalNum; // 128
//CharString stackDecimalStr; // 64
// -----
// 192 total
};
/**
* Maximum int64_t value that can be stored in a double without chancing losing precision.
* IEEE doubles have 53 bits of mantissa, 10 bits exponent, 1 bit sign.

View File

@ -22,17 +22,8 @@ U_NAMESPACE_BEGIN
FieldPositionHandler::~FieldPositionHandler() {
}
void
FieldPositionHandler::addAttribute(int32_t, int32_t, int32_t) {
}
void
FieldPositionHandler::shiftLast(int32_t) {
}
UBool
FieldPositionHandler::isRecording(void) const {
return FALSE;
void FieldPositionHandler::setShift(int32_t delta) {
fShift = delta;
}
@ -48,8 +39,8 @@ FieldPositionOnlyHandler::~FieldPositionOnlyHandler() {
void
FieldPositionOnlyHandler::addAttribute(int32_t id, int32_t start, int32_t limit) {
if (pos.getField() == id) {
pos.setBeginIndex(start);
pos.setEndIndex(limit);
pos.setBeginIndex(start + fShift);
pos.setEndIndex(limit + fShift);
}
}
@ -91,8 +82,8 @@ FieldPositionIteratorHandler::addAttribute(int32_t id, int32_t start, int32_t li
if (iter && U_SUCCESS(status) && start < limit) {
int32_t size = vec->size();
vec->addElement(id, status);
vec->addElement(start, status);
vec->addElement(limit, status);
vec->addElement(start + fShift, status);
vec->addElement(limit + fShift, status);
if (!U_SUCCESS(status)) {
vec->setSize(size);
}

View File

@ -22,11 +22,16 @@ U_NAMESPACE_BEGIN
// base class, null implementation
class U_I18N_API FieldPositionHandler: public UMemory {
protected:
int32_t fShift = 0;
public:
virtual ~FieldPositionHandler();
virtual void addAttribute(int32_t id, int32_t start, int32_t limit);
virtual void shiftLast(int32_t delta);
virtual UBool isRecording(void) const;
virtual void addAttribute(int32_t id, int32_t start, int32_t limit) = 0;
virtual void shiftLast(int32_t delta) = 0;
virtual UBool isRecording(void) const = 0;
void setShift(int32_t delta);
};
@ -39,9 +44,9 @@ class FieldPositionOnlyHandler : public FieldPositionHandler {
FieldPositionOnlyHandler(FieldPosition& pos);
virtual ~FieldPositionOnlyHandler();
virtual void addAttribute(int32_t id, int32_t start, int32_t limit);
virtual void shiftLast(int32_t delta);
virtual UBool isRecording(void) const;
void addAttribute(int32_t id, int32_t start, int32_t limit) U_OVERRIDE;
void shiftLast(int32_t delta) U_OVERRIDE;
UBool isRecording(void) const U_OVERRIDE;
};
@ -63,9 +68,9 @@ class FieldPositionIteratorHandler : public FieldPositionHandler {
FieldPositionIteratorHandler(FieldPositionIterator* posIter, UErrorCode& status);
~FieldPositionIteratorHandler();
virtual void addAttribute(int32_t id, int32_t start, int32_t limit);
virtual void shiftLast(int32_t delta);
virtual UBool isRecording(void) const;
void addAttribute(int32_t id, int32_t start, int32_t limit) U_OVERRIDE;
void shiftLast(int32_t delta) U_OVERRIDE;
UBool isRecording(void) const U_OVERRIDE;
};
U_NAMESPACE_END

View File

@ -41,21 +41,21 @@ static const int32_t gOffsets[] = {
16,
20,
24,
285,
295,
306,
310,
316,
320,
340,
341,
321,
331,
342,
346,
352,
355,
361,
366,
370,
374,
399
356,
376,
377,
388,
391,
397,
402,
406,
410,
435
};
static const int32_t gIndexes[] = {
@ -136,15 +136,18 @@ static const char * const gSubTypes[] = {
"AED",
"AFA",
"AFN",
"ALK",
"ALL",
"AMD",
"ANG",
"AOA",
"AOK",
"AON",
"AOR",
"ARA",
"ARP",
"ARS",
"ARY",
"ATS",
"AUD",
"AWG",
@ -158,6 +161,8 @@ static const char * const gSubTypes[] = {
"BEC",
"BEF",
"BEL",
"BGJ",
"BGK",
"BGL",
"BGN",
"BHD",
@ -165,7 +170,9 @@ static const char * const gSubTypes[] = {
"BMD",
"BND",
"BOB",
"BOP",
"BOV",
"BRB",
"BRC",
"BRE",
"BRL",
@ -173,6 +180,7 @@ static const char * const gSubTypes[] = {
"BRR",
"BSD",
"BTN",
"BUK",
"BWP",
"BYB",
"BYN",
@ -191,6 +199,7 @@ static const char * const gSubTypes[] = {
"COU",
"CRC",
"CSD",
"CSJ",
"CSK",
"CUC",
"CUP",
@ -225,10 +234,13 @@ static const char * const gSubTypes[] = {
"GHS",
"GIP",
"GMD",
"GNE",
"GNF",
"GNS",
"GQE",
"GRD",
"GTQ",
"GWE",
"GWP",
"GYD",
"HKD",
@ -239,10 +251,13 @@ static const char * const gSubTypes[] = {
"HUF",
"IDR",
"IEP",
"ILP",
"ILR",
"ILS",
"INR",
"IQD",
"IRR",
"ISJ",
"ISK",
"ITL",
"JMD",
@ -257,11 +272,13 @@ static const char * const gSubTypes[] = {
"KWD",
"KYD",
"KZT",
"LAJ",
"LAK",
"LBP",
"LKR",
"LRD",
"LSL",
"LSM",
"LTL",
"LTT",
"LUC",
@ -280,17 +297,23 @@ static const char * const gSubTypes[] = {
"MNT",
"MOP",
"MRO",
"MRU",
"MTL",
"MTP",
"MUR",
"MVQ",
"MVR",
"MWK",
"MXN",
"MXP",
"MXV",
"MYR",
"MZE",
"MZM",
"MZN",
"NAD",
"NGN",
"NIC",
"NIO",
"NLG",
"NOK",
@ -298,6 +321,7 @@ static const char * const gSubTypes[] = {
"NZD",
"OMR",
"PAB",
"PEH",
"PEI",
"PEN",
"PES",
@ -309,6 +333,8 @@ static const char * const gSubTypes[] = {
"PTE",
"PYG",
"QAR",
"RHD",
"ROK",
"ROL",
"RON",
"RSD",
@ -320,6 +346,7 @@ static const char * const gSubTypes[] = {
"SCR",
"SDD",
"SDG",
"SDP",
"SEK",
"SGD",
"SHP",
@ -331,6 +358,8 @@ static const char * const gSubTypes[] = {
"SRG",
"SSP",
"STD",
"STN",
"SUR",
"SVC",
"SYP",
"SZL",
@ -349,15 +378,20 @@ static const char * const gSubTypes[] = {
"TZS",
"UAH",
"UAK",
"UGS",
"UGW",
"UGX",
"USD",
"USN",
"USS",
"UYI",
"UYN",
"UYP",
"UYU",
"UZS",
"VEB",
"VEF",
"VNC",
"VND",
"VUV",
"WST",
@ -381,6 +415,7 @@ static const char * const gSubTypes[] = {
"XXX",
"YDD",
"YER",
"YUD",
"YUM",
"YUN",
"ZAL",
@ -389,6 +424,7 @@ static const char * const gSubTypes[] = {
"ZMW",
"ZRN",
"ZRZ",
"ZWC",
"ZWD",
"ZWL",
"ZWN",
@ -511,16 +547,20 @@ static const char * const gSubTypes[] = {
// Must be sorted by first value and then second value.
static int32_t unitPerUnitToSingleUnit[][4] = {
{327, 297, 17, 0},
{329, 303, 17, 2},
{331, 297, 17, 3},
{331, 388, 4, 2},
{331, 389, 4, 3},
{346, 386, 3, 1},
{349, 11, 16, 4},
{391, 327, 4, 1}
{363, 333, 17, 0},
{365, 339, 17, 2},
{367, 333, 17, 3},
{367, 424, 4, 2},
{367, 425, 4, 3},
{382, 422, 3, 1},
{385, 11, 16, 4},
{427, 363, 4, 1}
};
// Shortcuts to the base unit in order to make the default constructor fast
static const int32_t kBaseTypeIdx = 14;
static const int32_t kBaseSubTypeIdx = 0;
MeasureUnit *MeasureUnit::createGForce(UErrorCode &status) {
return MeasureUnit::create(0, 0, status);
}
@ -1082,7 +1122,8 @@ static int32_t binarySearch(
MeasureUnit::MeasureUnit() {
fCurrency[0] = 0;
initNoUnit("base");
fTypeId = kBaseTypeIdx;
fSubTypeId = kBaseSubTypeIdx;
}
MeasureUnit::MeasureUnit(const MeasureUnit &other)

View File

@ -31,6 +31,7 @@
#include "unicode/decimfmt.h"
#include "unicode/localpointer.h"
#include "unicode/msgfmt.h"
#include "unicode/numberformatter.h"
#include "unicode/plurfmt.h"
#include "unicode/rbnf.h"
#include "unicode/selfmt.h"
@ -48,7 +49,7 @@
#include "ustrfmt.h"
#include "util.h"
#include "uvector.h"
#include "visibledigits.h"
#include "number_decimalquantity.h"
// *****************************************************************************
// class MessageFormat
@ -1700,12 +1701,21 @@ Format* MessageFormat::createAppropriateFormat(UnicodeString& type, UnicodeStrin
formattableType = Formattable::kLong;
fmt = createIntegerFormat(fLocale, ec);
break;
default: // pattern
fmt = NumberFormat::createInstance(fLocale, ec);
if (fmt) {
DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt);
if (decfmt != NULL) {
decfmt->applyPattern(style,parseError,ec);
default: // pattern or skeleton
int32_t i = 0;
for (; PatternProps::isWhiteSpace(style.charAt(i)); i++);
if (style.compare(i, 2, u"::", 0, 2) == 0) {
// Skeleton
UnicodeString skeleton = style.tempSubString(i + 2);
fmt = number::NumberFormatter::forSkeleton(skeleton, ec).locale(fLocale).toFormat(ec);
} else {
// Pattern
fmt = NumberFormat::createInstance(fLocale, ec);
if (fmt) {
auto* decfmt = dynamic_cast<DecimalFormat*>(fmt);
if (decfmt != nullptr) {
decfmt->applyPattern(style, parseError, ec);
}
}
}
break;
@ -1959,14 +1969,14 @@ UnicodeString MessageFormat::PluralSelectorProvider::select(void *ctx, double nu
return UnicodeString(FALSE, OTHER_STRING, 5);
}
context.formatter->format(context.number, context.numberString, ec);
const DecimalFormat *decFmt = dynamic_cast<const DecimalFormat *>(context.formatter);
auto* decFmt = dynamic_cast<const DecimalFormat *>(context.formatter);
if(decFmt != NULL) {
VisibleDigitsWithExponent digits;
decFmt->initVisibleDigitsWithExponent(context.number, digits, ec);
number::impl::DecimalQuantity dq;
decFmt->formatToDecimalQuantity(context.number, dq, ec);
if (U_FAILURE(ec)) {
return UnicodeString(FALSE, OTHER_STRING, 5);
}
return rules->select(digits);
return rules->select(dq);
} else {
return rules->select(number);
}

View File

@ -19,8 +19,9 @@
#include "utypeinfo.h" // for 'typeid' to work
#include "nfsubs.h"
#include "digitlst.h"
#include "fmtableimp.h"
#include "putilimp.h"
#include "number_decimalquantity.h"
#if U_HAVE_RBNF
@ -47,6 +48,8 @@ static const UChar gGreaterGreaterThan[] =
U_NAMESPACE_BEGIN
using number::impl::DecimalQuantity;
class SameValueSubstitution : public NFSubstitution {
public:
SameValueSubstitution(int32_t pos,
@ -1069,13 +1072,12 @@ FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInser
// numberToFormat /= 10;
// }
DigitList dl;
dl.set(number);
dl.roundFixedPoint(20); // round to 20 fraction digits.
dl.reduce(); // Removes any trailing zeros.
DecimalQuantity dl;
dl.setToDouble(number);
dl.roundToMagnitude(-20, UNUM_ROUND_HALFEVEN, status); // round to 20 fraction digits.
UBool pad = FALSE;
for (int32_t didx = dl.getCount()-1; didx>=dl.getDecimalAt(); didx--) {
for (int32_t didx = dl.getLowerDisplayMagnitude(); didx<0; didx++) {
// Loop iterates over fraction digits, starting with the LSD.
// include both real digits from the number, and zeros
// to the left of the MSD but to the right of the decimal point.
@ -1084,7 +1086,7 @@ FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInser
} else {
pad = TRUE;
}
int64_t digit = didx>=0 ? dl.getDigit(didx) - '0' : 0;
int64_t digit = dl.getDigit(didx);
getRuleSet()->format(digit, toInsertInto, _pos + getPos(), recursionCount, status);
}
@ -1142,7 +1144,8 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
int32_t digit;
// double p10 = 0.1;
DigitList dl;
DecimalQuantity dl;
int32_t totalDigits = 0;
NumberFormat* fmt = NULL;
while (workText.length() > 0 && workPos.getIndex() != 0) {
workPos.setIndex(0);
@ -1170,7 +1173,8 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
}
if (workPos.getIndex() != 0) {
dl.append((char)('0' + digit));
dl.appendDigit(static_cast<int8_t>(digit), 0, true);
totalDigits++;
// result += digit * p10;
// p10 /= 10;
parsePosition.setIndex(parsePosition.getIndex() + workPos.getIndex());
@ -1183,7 +1187,8 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
}
delete fmt;
result = dl.getCount() == 0 ? 0 : dl.getDouble();
dl.adjustMagnitude(-totalDigits);
result = dl.toDouble();
result = composeRuleValue(result, baseValue);
resVal.setDouble(result);
return TRUE;

View File

@ -3,21 +3,25 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#if !UCONFIG_NO_FORMATTING
#include "number_affixutils.h"
#include "unicode/utf16.h"
#include "unicode/uniset.h"
using namespace icu;
using namespace icu::number;
using namespace icu::number::impl;
int32_t AffixUtils::estimateLength(const CharSequence &patternString, UErrorCode &status) {
TokenConsumer::~TokenConsumer() = default;
SymbolProvider::~SymbolProvider() = default;
int32_t AffixUtils::estimateLength(const UnicodeString &patternString, UErrorCode &status) {
AffixPatternState state = STATE_BASE;
int32_t offset = 0;
int32_t length = 0;
for (; offset < patternString.length();) {
UChar32 cp = patternString.codePointAt(offset);
UChar32 cp = patternString.char32At(offset);
switch (state) {
case STATE_BASE:
@ -78,12 +82,12 @@ int32_t AffixUtils::estimateLength(const CharSequence &patternString, UErrorCode
return length;
}
UnicodeString AffixUtils::escape(const CharSequence &input) {
UnicodeString AffixUtils::escape(const UnicodeString &input) {
AffixPatternState state = STATE_BASE;
int32_t offset = 0;
UnicodeString output;
for (; offset < input.length();) {
UChar32 cp = input.codePointAt(offset);
UChar32 cp = input.char32At(offset);
switch (cp) {
case u'\'':
@ -153,7 +157,7 @@ Field AffixUtils::getFieldForType(AffixPatternType type) {
}
int32_t
AffixUtils::unescape(const CharSequence &affixPattern, NumberStringBuilder &output, int32_t position,
AffixUtils::unescape(const UnicodeString &affixPattern, NumberStringBuilder &output, int32_t position,
const SymbolProvider &provider, UErrorCode &status) {
int32_t length = 0;
AffixTag tag;
@ -173,7 +177,7 @@ AffixUtils::unescape(const CharSequence &affixPattern, NumberStringBuilder &outp
return length;
}
int32_t AffixUtils::unescapedCodePointCount(const CharSequence &affixPattern,
int32_t AffixUtils::unescapedCodePointCount(const UnicodeString &affixPattern,
const SymbolProvider &provider, UErrorCode &status) {
int32_t length = 0;
AffixTag tag;
@ -192,7 +196,7 @@ int32_t AffixUtils::unescapedCodePointCount(const CharSequence &affixPattern,
}
bool
AffixUtils::containsType(const CharSequence &affixPattern, AffixPatternType type, UErrorCode &status) {
AffixUtils::containsType(const UnicodeString &affixPattern, AffixPatternType type, UErrorCode &status) {
if (affixPattern.length() == 0) {
return false;
}
@ -207,7 +211,7 @@ AffixUtils::containsType(const CharSequence &affixPattern, AffixPatternType type
return false;
}
bool AffixUtils::hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode &status) {
bool AffixUtils::hasCurrencySymbols(const UnicodeString &affixPattern, UErrorCode &status) {
if (affixPattern.length() == 0) {
return false;
}
@ -222,9 +226,9 @@ bool AffixUtils::hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode
return false;
}
UnicodeString AffixUtils::replaceType(const CharSequence &affixPattern, AffixPatternType type,
UnicodeString AffixUtils::replaceType(const UnicodeString &affixPattern, AffixPatternType type,
char16_t replacementChar, UErrorCode &status) {
UnicodeString output = affixPattern.toUnicodeString();
UnicodeString output(affixPattern); // copy
if (affixPattern.length() == 0) {
return output;
};
@ -239,11 +243,41 @@ UnicodeString AffixUtils::replaceType(const CharSequence &affixPattern, AffixPat
return output;
}
AffixTag AffixUtils::nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status) {
bool AffixUtils::containsOnlySymbolsAndIgnorables(const UnicodeString& affixPattern,
const UnicodeSet& ignorables, UErrorCode& status) {
if (affixPattern.length() == 0) {
return true;
};
AffixTag tag;
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern, status);
if (U_FAILURE(status)) { return false; }
if (tag.type == TYPE_CODEPOINT && !ignorables.contains(tag.codePoint)) {
return false;
}
}
return true;
}
void AffixUtils::iterateWithConsumer(const UnicodeString& affixPattern, TokenConsumer& consumer,
UErrorCode& status) {
if (affixPattern.length() == 0) {
return;
};
AffixTag tag;
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern, status);
if (U_FAILURE(status)) { return; }
consumer.consumeToken(tag.type, tag.codePoint, status);
if (U_FAILURE(status)) { return; }
}
}
AffixTag AffixUtils::nextToken(AffixTag tag, const UnicodeString &patternString, UErrorCode &status) {
int32_t offset = tag.offset;
int32_t state = tag.state;
for (; offset < patternString.length();) {
UChar32 cp = patternString.codePointAt(offset);
UChar32 cp = patternString.char32At(offset);
int32_t count = U16_LENGTH(cp);
switch (state) {
@ -382,7 +416,7 @@ AffixTag AffixUtils::nextToken(AffixTag tag, const CharSequence &patternString,
}
}
bool AffixUtils::hasNext(const AffixTag &tag, const CharSequence &string) {
bool AffixUtils::hasNext(const AffixTag &tag, const UnicodeString &string) {
// First check for the {-1} and default initializer syntax.
if (tag.offset < 0) {
return false;

View File

@ -3,7 +3,7 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#if !UCONFIG_NO_FORMATTING
#ifndef __NUMBER_AFFIXUTILS_H__
#define __NUMBER_AFFIXUTILS_H__
@ -12,6 +12,7 @@
#include "unicode/stringpiece.h"
#include "unicode/unistr.h"
#include "number_stringbuilder.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
@ -37,19 +38,27 @@ struct AffixTag {
AffixPatternState state;
AffixPatternType type;
AffixTag() : offset(0), state(STATE_BASE) {}
AffixTag()
: offset(0), state(STATE_BASE) {}
AffixTag(int32_t offset) : offset(offset) {}
AffixTag(int32_t offset)
: offset(offset) {}
AffixTag(int32_t offset, UChar32 codePoint, AffixPatternState state, AffixPatternType type)
: offset(offset), codePoint(codePoint), state(state), type(type)
{}
: offset(offset), codePoint(codePoint), state(state), type(type) {}
};
class TokenConsumer {
public:
virtual ~TokenConsumer();
virtual void consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) = 0;
};
// Exported as U_I18N_API because it is a base class for other exported types
class U_I18N_API SymbolProvider {
public:
virtual ~SymbolProvider() = default;
virtual ~SymbolProvider();
// TODO: Could this be more efficient if it returned by reference?
virtual UnicodeString getSymbol(AffixPatternType type) const = 0;
@ -107,7 +116,7 @@ class U_I18N_API AffixUtils {
* @param patternString The original string whose width will be estimated.
* @return The length of the unescaped string.
*/
static int32_t estimateLength(const CharSequence &patternString, UErrorCode &status);
static int32_t estimateLength(const UnicodeString& patternString, UErrorCode& status);
/**
* Takes a string and escapes (quotes) characters that have special meaning in the affix pattern
@ -118,7 +127,7 @@ class U_I18N_API AffixUtils {
* @param input The string to be escaped.
* @return The resulting UnicodeString.
*/
static UnicodeString escape(const CharSequence &input);
static UnicodeString escape(const UnicodeString& input);
static Field getFieldForType(AffixPatternType type);
@ -134,9 +143,8 @@ class U_I18N_API AffixUtils {
* @param position The index into the NumberStringBuilder to insert the string.
* @param provider An object to generate locale symbols.
*/
static int32_t
unescape(const CharSequence &affixPattern, NumberStringBuilder &output, int32_t position,
const SymbolProvider &provider, UErrorCode &status);
static int32_t unescape(const UnicodeString& affixPattern, NumberStringBuilder& output,
int32_t position, const SymbolProvider& provider, UErrorCode& status);
/**
* Sames as {@link #unescape}, but only calculates the code point count. More efficient than {@link #unescape}
@ -146,8 +154,8 @@ class U_I18N_API AffixUtils {
* @param provider An object to generate locale symbols.
* @return The same return value as if you called {@link #unescape}.
*/
static int32_t unescapedCodePointCount(const CharSequence &affixPattern,
const SymbolProvider &provider, UErrorCode &status);
static int32_t unescapedCodePointCount(const UnicodeString& affixPattern,
const SymbolProvider& provider, UErrorCode& status);
/**
* Checks whether the given affix pattern contains at least one token of the given type, which is
@ -157,8 +165,7 @@ class U_I18N_API AffixUtils {
* @param type The token type.
* @return true if the affix pattern contains the given token type; false otherwise.
*/
static bool
containsType(const CharSequence &affixPattern, AffixPatternType type, UErrorCode &status);
static bool containsType(const UnicodeString& affixPattern, AffixPatternType type, UErrorCode& status);
/**
* Checks whether the specified affix pattern has any unquoted currency symbols ("¤").
@ -166,7 +173,7 @@ class U_I18N_API AffixUtils {
* @param affixPattern The string to check for currency symbols.
* @return true if the literal has at least one unquoted currency symbol; false otherwise.
*/
static bool hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode &status);
static bool hasCurrencySymbols(const UnicodeString& affixPattern, UErrorCode& status);
/**
* Replaces all occurrences of tokens with the given type with the given replacement char.
@ -176,9 +183,21 @@ class U_I18N_API AffixUtils {
* @param replacementChar The char to substitute in place of chars of the given token type.
* @return A string containing the new affix pattern.
*/
static UnicodeString
replaceType(const CharSequence &affixPattern, AffixPatternType type, char16_t replacementChar,
UErrorCode &status);
static UnicodeString replaceType(const UnicodeString& affixPattern, AffixPatternType type,
char16_t replacementChar, UErrorCode& status);
/**
* Returns whether the given affix pattern contains only symbols and ignorables as defined by the
* given ignorables set.
*/
static bool containsOnlySymbolsAndIgnorables(const UnicodeString& affixPattern,
const UnicodeSet& ignorables, UErrorCode& status);
/**
* Iterates over the affix pattern, calling the TokenConsumer for each token.
*/
static void iterateWithConsumer(const UnicodeString& affixPattern, TokenConsumer& consumer,
UErrorCode& status);
/**
* Returns the next token from the affix pattern.
@ -190,7 +209,7 @@ class U_I18N_API AffixUtils {
* (never negative), or -1 if there were no more tokens in the affix pattern.
* @see #hasNext
*/
static AffixTag nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status);
static AffixTag nextToken(AffixTag tag, const UnicodeString& patternString, UErrorCode& status);
/**
* Returns whether the affix pattern string has any more tokens to be retrieved from a call to
@ -200,7 +219,7 @@ class U_I18N_API AffixUtils {
* @param string The affix pattern.
* @return true if there are more tokens to consume; false otherwise.
*/
static bool hasNext(const AffixTag &tag, const CharSequence &string);
static bool hasNext(const AffixTag& tag, const UnicodeString& string);
private:
/**
@ -208,8 +227,8 @@ class U_I18N_API AffixUtils {
* The order of the arguments is consistent with Java, but the order of the stored
* fields is not necessarily the same.
*/
static inline AffixTag
makeTag(int32_t offset, AffixPatternType type, AffixPatternState state, UChar32 cp) {
static inline AffixTag makeTag(int32_t offset, AffixPatternType type, AffixPatternState state,
UChar32 cp) {
return {offset, cp, state, type};
}
};

View File

@ -0,0 +1,105 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
#include <stdlib.h>
#include <cmath>
#include "number_asformat.h"
#include "number_types.h"
#include "number_utils.h"
#include "fphdlimp.h"
#include "number_utypes.h"
using namespace icu;
using namespace icu::number;
using namespace icu::number::impl;
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocalizedNumberFormatterAsFormat)
LocalizedNumberFormatterAsFormat::LocalizedNumberFormatterAsFormat(
const LocalizedNumberFormatter& formatter, const Locale& locale)
: fFormatter(formatter), fLocale(locale) {
const char* localeName = locale.getName();
setLocaleIDs(localeName, localeName);
}
LocalizedNumberFormatterAsFormat::~LocalizedNumberFormatterAsFormat() = default;
UBool LocalizedNumberFormatterAsFormat::operator==(const Format& other) const {
auto* _other = dynamic_cast<const LocalizedNumberFormatterAsFormat*>(&other);
if (_other == nullptr) {
return false;
}
// TODO: Change this to use LocalizedNumberFormatter::operator== if it is ever proposed.
// This implementation is fine, but not particularly efficient.
UErrorCode localStatus = U_ZERO_ERROR;
return fFormatter.toSkeleton(localStatus) == _other->fFormatter.toSkeleton(localStatus);
}
Format* LocalizedNumberFormatterAsFormat::clone() const {
return new LocalizedNumberFormatterAsFormat(*this);
}
UnicodeString& LocalizedNumberFormatterAsFormat::format(const Formattable& obj, UnicodeString& appendTo,
FieldPosition& pos, UErrorCode& status) const {
if (U_FAILURE(status)) { return appendTo; }
UFormattedNumberData data;
obj.populateDecimalQuantity(data.quantity, status);
if (U_FAILURE(status)) {
return appendTo;
}
fFormatter.formatImpl(&data, status);
if (U_FAILURE(status)) {
return appendTo;
}
// always return first occurrence:
pos.setBeginIndex(0);
pos.setEndIndex(0);
bool found = data.string.nextFieldPosition(pos, status);
if (found && appendTo.length() != 0) {
pos.setBeginIndex(pos.getBeginIndex() + appendTo.length());
pos.setEndIndex(pos.getEndIndex() + appendTo.length());
}
appendTo.append(data.string.toTempUnicodeString());
return appendTo;
}
UnicodeString& LocalizedNumberFormatterAsFormat::format(const Formattable& obj, UnicodeString& appendTo,
FieldPositionIterator* posIter,
UErrorCode& status) const {
if (U_FAILURE(status)) { return appendTo; }
UFormattedNumberData data;
obj.populateDecimalQuantity(data.quantity, status);
if (U_FAILURE(status)) {
return appendTo;
}
fFormatter.formatImpl(&data, status);
if (U_FAILURE(status)) {
return appendTo;
}
appendTo.append(data.string.toTempUnicodeString());
if (posIter != nullptr) {
FieldPositionIteratorHandler fpih(posIter, status);
data.string.getAllFieldPositions(fpih, status);
}
return appendTo;
}
void LocalizedNumberFormatterAsFormat::parseObject(const UnicodeString&, Formattable&,
ParsePosition& parse_pos) const {
// Not supported.
parse_pos.setErrorIndex(0);
}
const LocalizedNumberFormatter& LocalizedNumberFormatterAsFormat::getNumberFormatter() const {
return fFormatter;
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View File

@ -0,0 +1,107 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#ifndef __NUMBER_ASFORMAT_H__
#define __NUMBER_ASFORMAT_H__
#include "unicode/numberformatter.h"
#include "number_types.h"
#include "number_decimalquantity.h"
#include "number_scientific.h"
#include "number_patternstring.h"
#include "number_modifiers.h"
#include "number_multiplier.h"
#include "number_roundingutils.h"
#include "decNumber.h"
#include "charstr.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
/**
* A wrapper around LocalizedNumberFormatter implementing the Format interface, enabling improved
* compatibility with other APIs.
*
* @draft ICU 62
* @see NumberFormatter
*/
class U_I18N_API LocalizedNumberFormatterAsFormat : public Format {
public:
LocalizedNumberFormatterAsFormat(const LocalizedNumberFormatter& formatter, const Locale& locale);
/**
* Destructor.
*/
~LocalizedNumberFormatterAsFormat() U_OVERRIDE;
/**
* Equals operator.
*/
UBool operator==(const Format& other) const U_OVERRIDE;
/**
* Creates a copy of this object.
*/
Format* clone() const U_OVERRIDE;
/**
* Formats a Number using the wrapped LocalizedNumberFormatter. The provided formattable must be a
* number type.
*/
UnicodeString& format(const Formattable& obj, UnicodeString& appendTo, FieldPosition& pos,
UErrorCode& status) const U_OVERRIDE;
/**
* Formats a Number using the wrapped LocalizedNumberFormatter. The provided formattable must be a
* number type.
*/
UnicodeString& format(const Formattable& obj, UnicodeString& appendTo, FieldPositionIterator* posIter,
UErrorCode& status) const U_OVERRIDE;
/**
* Not supported: sets an error index and returns.
*/
void parseObject(const UnicodeString& source, Formattable& result,
ParsePosition& parse_pos) const U_OVERRIDE;
/**
* Gets the LocalizedNumberFormatter that this wrapper class uses to format numbers.
*
* For maximum efficiency, this function returns by const reference. You must copy the return value
* into a local variable if you want to use it beyond the lifetime of the current object:
*
* <pre>
* LocalizedNumberFormatter localFormatter = fmt->getNumberFormatter();
* </pre>
*
* You can however use the return value directly when chaining:
*
* <pre>
* FormattedNumber result = fmt->getNumberFormatter().formatDouble(514.23, status);
* </pre>
*
* @return The unwrapped LocalizedNumberFormatter.
*/
const LocalizedNumberFormatter& getNumberFormatter() const;
UClassID getDynamicClassID() const U_OVERRIDE;
static UClassID U_EXPORT2 getStaticClassID();
private:
LocalizedNumberFormatter fFormatter;
// Even though the locale is inside the LocalizedNumberFormatter, we have to keep it here, too, because
// LocalizedNumberFormatter doesn't have a getLocale() method, and ICU-TC didn't want to add one.
Locale fLocale;
};
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif // __NUMBER_ASFORMAT_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

Some files were not shown because too many files have changed in this diff Show More