123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352 |
- // © 2018 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_FORMATTING
- #ifndef __SOURCE_NUMBER_SKELETONS_H__
- #define __SOURCE_NUMBER_SKELETONS_H__
- #include "number_types.h"
- #include "numparse_types.h"
- #include "unicode/ucharstrie.h"
- #include "string_segment.h"
- U_NAMESPACE_BEGIN
- namespace number {
- namespace impl {
- // Forward-declaration
- struct SeenMacroProps;
- // namespace for enums and entrypoint functions
- namespace skeleton {
- ///////////////////////////////////////////////////////////////////////////////////////
- // NOTE: For an example of how to add a new stem to the number skeleton parser, see: //
- // http://bugs.icu-project.org/trac/changeset/41193 //
- ///////////////////////////////////////////////////////////////////////////////////////
- /**
- * While parsing a skeleton, this enum records what type of option we expect to find next.
- */
- enum ParseState {
- // Section 0: We expect whitespace or a stem, but not an option:
- STATE_NULL,
- // Section 1: We might accept an option, but it is not required:
- STATE_SCIENTIFIC,
- STATE_FRACTION_PRECISION,
- // Section 2: An option is required:
- STATE_INCREMENT_PRECISION,
- STATE_MEASURE_UNIT,
- STATE_PER_MEASURE_UNIT,
- STATE_IDENTIFIER_UNIT,
- STATE_CURRENCY_UNIT,
- STATE_INTEGER_WIDTH,
- STATE_NUMBERING_SYSTEM,
- STATE_SCALE,
- };
- /**
- * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
- * string literal written in upper snake case.
- *
- * @see StemToObject
- * @see #SERIALIZED_STEM_TRIE
- */
- enum StemEnum {
- // Section 1: Stems that do not require an option:
- STEM_COMPACT_SHORT,
- STEM_COMPACT_LONG,
- STEM_SCIENTIFIC,
- STEM_ENGINEERING,
- STEM_NOTATION_SIMPLE,
- STEM_BASE_UNIT,
- STEM_PERCENT,
- STEM_PERMILLE,
- STEM_PERCENT_100, // concise-only
- STEM_PRECISION_INTEGER,
- STEM_PRECISION_UNLIMITED,
- STEM_PRECISION_CURRENCY_STANDARD,
- STEM_PRECISION_CURRENCY_CASH,
- STEM_ROUNDING_MODE_CEILING,
- STEM_ROUNDING_MODE_FLOOR,
- STEM_ROUNDING_MODE_DOWN,
- STEM_ROUNDING_MODE_UP,
- STEM_ROUNDING_MODE_HALF_EVEN,
- STEM_ROUNDING_MODE_HALF_DOWN,
- STEM_ROUNDING_MODE_HALF_UP,
- STEM_ROUNDING_MODE_UNNECESSARY,
- STEM_GROUP_OFF,
- STEM_GROUP_MIN2,
- STEM_GROUP_AUTO,
- STEM_GROUP_ON_ALIGNED,
- STEM_GROUP_THOUSANDS,
- STEM_LATIN,
- STEM_UNIT_WIDTH_NARROW,
- STEM_UNIT_WIDTH_SHORT,
- STEM_UNIT_WIDTH_FULL_NAME,
- STEM_UNIT_WIDTH_ISO_CODE,
- STEM_UNIT_WIDTH_HIDDEN,
- STEM_SIGN_AUTO,
- STEM_SIGN_ALWAYS,
- STEM_SIGN_NEVER,
- STEM_SIGN_ACCOUNTING,
- STEM_SIGN_ACCOUNTING_ALWAYS,
- STEM_SIGN_EXCEPT_ZERO,
- STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
- STEM_DECIMAL_AUTO,
- STEM_DECIMAL_ALWAYS,
- // Section 2: Stems that DO require an option:
- STEM_PRECISION_INCREMENT,
- STEM_MEASURE_UNIT,
- STEM_PER_MEASURE_UNIT,
- STEM_UNIT,
- STEM_CURRENCY,
- STEM_INTEGER_WIDTH,
- STEM_NUMBERING_SYSTEM,
- STEM_SCALE,
- };
- /** Default wildcard char, accepted on input and printed in output */
- constexpr char16_t kWildcardChar = u'*';
- /** Alternative wildcard char, accept on input but not printed in output */
- constexpr char16_t kAltWildcardChar = u'+';
- /** Checks whether the char is a wildcard on input */
- inline bool isWildcardChar(char16_t c) {
- return c == kWildcardChar || c == kAltWildcardChar;
- }
- /**
- * Creates a NumberFormatter corresponding to the given skeleton string.
- *
- * @param skeletonString
- * A number skeleton string, possibly not in its shortest form.
- * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
- */
- UnlocalizedNumberFormatter create(
- const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
- /**
- * Create a skeleton string corresponding to the given NumberFormatter.
- *
- * @param macros
- * The NumberFormatter options object.
- * @return A skeleton string in normalized form.
- */
- UnicodeString generate(const MacroProps& macros, UErrorCode& status);
- /**
- * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
- *
- * Internal: use the create() endpoint instead of this function.
- */
- MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
- /**
- * Given that the current segment represents a stem, parse it and save the result.
- *
- * @return The next state after parsing this stem, corresponding to what subset of options to expect.
- */
- ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
- MacroProps& macros, UErrorCode& status);
- /**
- * Given that the current segment represents an option, parse it and save the result.
- *
- * @return The next state after parsing this option, corresponding to what subset of options to
- * expect next.
- */
- ParseState
- parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- } // namespace skeleton
- /**
- * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
- * applies to only the "Section 1" stems, those that are well-defined without an option.
- */
- namespace stem_to_object {
- Notation notation(skeleton::StemEnum stem);
- MeasureUnit unit(skeleton::StemEnum stem);
- Precision precision(skeleton::StemEnum stem);
- UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
- UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
- UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
- UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
- UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
- } // namespace stem_to_object
- /**
- * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
- * take place in the object_to_stem_string namespace.
- */
- namespace enum_to_stem_string {
- void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
- void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
- void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
- void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
- void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
- } // namespace enum_to_stem_string
- /**
- * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
- */
- namespace blueprint_helpers {
- /** @return Whether we successfully found and parsed an exponent width option. */
- bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
- /** @return Whether we successfully found and parsed an exponent sign option. */
- bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
- void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status);
- void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
- void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
- void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- // Note: no generateScientificStem since this syntax was added later in ICU 67
- void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- // Note: no generateIntegerStem since this syntax was added later in ICU 67
- /** @return Whether we successfully found and parsed a frac-sig option. */
- bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void
- generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status);
- void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
- void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
- void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
- void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
- UErrorCode& status);
- } // namespace blueprint_helpers
- /**
- * Class for utility methods for generating a token corresponding to each macro-prop. Each method
- * returns whether or not a token was written to the string builder.
- *
- * This needs to be a class, not a namespace, so it can be friended.
- */
- class GeneratorHelpers {
- public:
- /**
- * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
- * StringBuilder.
- *
- * Internal: use the create() endpoint instead of this function.
- */
- static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- private:
- static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
- };
- /**
- * Struct for null-checking.
- * In Java, we can just check the object reference. In C++, we need a different method.
- */
- struct SeenMacroProps {
- bool notation = false;
- bool unit = false;
- bool perUnit = false;
- bool precision = false;
- bool roundingMode = false;
- bool grouper = false;
- bool padder = false;
- bool integerWidth = false;
- bool symbols = false;
- bool unitWidth = false;
- bool sign = false;
- bool decimal = false;
- bool scale = false;
- };
- } // namespace impl
- } // namespace number
- U_NAMESPACE_END
- #endif //__SOURCE_NUMBER_SKELETONS_H__
- #endif /* #if !UCONFIG_NO_FORMATTING */
|