number_skeletons.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. // © 2018 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include "unicode/utypes.h"
  4. #if !UCONFIG_NO_FORMATTING
  5. #ifndef __SOURCE_NUMBER_SKELETONS_H__
  6. #define __SOURCE_NUMBER_SKELETONS_H__
  7. #include "number_types.h"
  8. #include "numparse_types.h"
  9. #include "unicode/ucharstrie.h"
  10. #include "string_segment.h"
  11. U_NAMESPACE_BEGIN
  12. namespace number {
  13. namespace impl {
  14. // Forward-declaration
  15. struct SeenMacroProps;
  16. // namespace for enums and entrypoint functions
  17. namespace skeleton {
  18. ///////////////////////////////////////////////////////////////////////////////////////
  19. // NOTE: For an example of how to add a new stem to the number skeleton parser, see: //
  20. // http://bugs.icu-project.org/trac/changeset/41193 //
  21. ///////////////////////////////////////////////////////////////////////////////////////
  22. /**
  23. * While parsing a skeleton, this enum records what type of option we expect to find next.
  24. */
  25. enum ParseState {
  26. // Section 0: We expect whitespace or a stem, but not an option:
  27. STATE_NULL,
  28. // Section 1: We might accept an option, but it is not required:
  29. STATE_SCIENTIFIC,
  30. STATE_FRACTION_PRECISION,
  31. // Section 2: An option is required:
  32. STATE_INCREMENT_PRECISION,
  33. STATE_MEASURE_UNIT,
  34. STATE_PER_MEASURE_UNIT,
  35. STATE_IDENTIFIER_UNIT,
  36. STATE_CURRENCY_UNIT,
  37. STATE_INTEGER_WIDTH,
  38. STATE_NUMBERING_SYSTEM,
  39. STATE_SCALE,
  40. };
  41. /**
  42. * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
  43. * string literal written in upper snake case.
  44. *
  45. * @see StemToObject
  46. * @see #SERIALIZED_STEM_TRIE
  47. */
  48. enum StemEnum {
  49. // Section 1: Stems that do not require an option:
  50. STEM_COMPACT_SHORT,
  51. STEM_COMPACT_LONG,
  52. STEM_SCIENTIFIC,
  53. STEM_ENGINEERING,
  54. STEM_NOTATION_SIMPLE,
  55. STEM_BASE_UNIT,
  56. STEM_PERCENT,
  57. STEM_PERMILLE,
  58. STEM_PERCENT_100, // concise-only
  59. STEM_PRECISION_INTEGER,
  60. STEM_PRECISION_UNLIMITED,
  61. STEM_PRECISION_CURRENCY_STANDARD,
  62. STEM_PRECISION_CURRENCY_CASH,
  63. STEM_ROUNDING_MODE_CEILING,
  64. STEM_ROUNDING_MODE_FLOOR,
  65. STEM_ROUNDING_MODE_DOWN,
  66. STEM_ROUNDING_MODE_UP,
  67. STEM_ROUNDING_MODE_HALF_EVEN,
  68. STEM_ROUNDING_MODE_HALF_DOWN,
  69. STEM_ROUNDING_MODE_HALF_UP,
  70. STEM_ROUNDING_MODE_UNNECESSARY,
  71. STEM_GROUP_OFF,
  72. STEM_GROUP_MIN2,
  73. STEM_GROUP_AUTO,
  74. STEM_GROUP_ON_ALIGNED,
  75. STEM_GROUP_THOUSANDS,
  76. STEM_LATIN,
  77. STEM_UNIT_WIDTH_NARROW,
  78. STEM_UNIT_WIDTH_SHORT,
  79. STEM_UNIT_WIDTH_FULL_NAME,
  80. STEM_UNIT_WIDTH_ISO_CODE,
  81. STEM_UNIT_WIDTH_HIDDEN,
  82. STEM_SIGN_AUTO,
  83. STEM_SIGN_ALWAYS,
  84. STEM_SIGN_NEVER,
  85. STEM_SIGN_ACCOUNTING,
  86. STEM_SIGN_ACCOUNTING_ALWAYS,
  87. STEM_SIGN_EXCEPT_ZERO,
  88. STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
  89. STEM_DECIMAL_AUTO,
  90. STEM_DECIMAL_ALWAYS,
  91. // Section 2: Stems that DO require an option:
  92. STEM_PRECISION_INCREMENT,
  93. STEM_MEASURE_UNIT,
  94. STEM_PER_MEASURE_UNIT,
  95. STEM_UNIT,
  96. STEM_CURRENCY,
  97. STEM_INTEGER_WIDTH,
  98. STEM_NUMBERING_SYSTEM,
  99. STEM_SCALE,
  100. };
  101. /** Default wildcard char, accepted on input and printed in output */
  102. constexpr char16_t kWildcardChar = u'*';
  103. /** Alternative wildcard char, accept on input but not printed in output */
  104. constexpr char16_t kAltWildcardChar = u'+';
  105. /** Checks whether the char is a wildcard on input */
  106. inline bool isWildcardChar(char16_t c) {
  107. return c == kWildcardChar || c == kAltWildcardChar;
  108. }
  109. /**
  110. * Creates a NumberFormatter corresponding to the given skeleton string.
  111. *
  112. * @param skeletonString
  113. * A number skeleton string, possibly not in its shortest form.
  114. * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
  115. */
  116. UnlocalizedNumberFormatter create(
  117. const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
  118. /**
  119. * Create a skeleton string corresponding to the given NumberFormatter.
  120. *
  121. * @param macros
  122. * The NumberFormatter options object.
  123. * @return A skeleton string in normalized form.
  124. */
  125. UnicodeString generate(const MacroProps& macros, UErrorCode& status);
  126. /**
  127. * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
  128. *
  129. * Internal: use the create() endpoint instead of this function.
  130. */
  131. MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
  132. /**
  133. * Given that the current segment represents a stem, parse it and save the result.
  134. *
  135. * @return The next state after parsing this stem, corresponding to what subset of options to expect.
  136. */
  137. ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
  138. MacroProps& macros, UErrorCode& status);
  139. /**
  140. * Given that the current segment represents an option, parse it and save the result.
  141. *
  142. * @return The next state after parsing this option, corresponding to what subset of options to
  143. * expect next.
  144. */
  145. ParseState
  146. parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  147. } // namespace skeleton
  148. /**
  149. * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
  150. * applies to only the "Section 1" stems, those that are well-defined without an option.
  151. */
  152. namespace stem_to_object {
  153. Notation notation(skeleton::StemEnum stem);
  154. MeasureUnit unit(skeleton::StemEnum stem);
  155. Precision precision(skeleton::StemEnum stem);
  156. UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
  157. UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
  158. UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
  159. UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
  160. UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
  161. } // namespace stem_to_object
  162. /**
  163. * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
  164. * take place in the object_to_stem_string namespace.
  165. */
  166. namespace enum_to_stem_string {
  167. void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
  168. void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
  169. void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
  170. void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
  171. void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
  172. } // namespace enum_to_stem_string
  173. /**
  174. * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
  175. */
  176. namespace blueprint_helpers {
  177. /** @return Whether we successfully found and parsed an exponent width option. */
  178. bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  179. void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
  180. /** @return Whether we successfully found and parsed an exponent sign option. */
  181. bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  182. void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  183. void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
  184. void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  185. void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status);
  186. void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  187. void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  188. void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  189. void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
  190. void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  191. void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
  192. void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  193. // Note: no generateScientificStem since this syntax was added later in ICU 67
  194. void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  195. // Note: no generateIntegerStem since this syntax was added later in ICU 67
  196. /** @return Whether we successfully found and parsed a frac-sig option. */
  197. bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  198. void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  199. void
  200. generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status);
  201. void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  202. void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
  203. void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  204. void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
  205. void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
  206. void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
  207. UErrorCode& status);
  208. } // namespace blueprint_helpers
  209. /**
  210. * Class for utility methods for generating a token corresponding to each macro-prop. Each method
  211. * returns whether or not a token was written to the string builder.
  212. *
  213. * This needs to be a class, not a namespace, so it can be friended.
  214. */
  215. class GeneratorHelpers {
  216. public:
  217. /**
  218. * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
  219. * StringBuilder.
  220. *
  221. * Internal: use the create() endpoint instead of this function.
  222. */
  223. static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  224. private:
  225. static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  226. static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  227. static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  228. static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  229. static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  230. static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  231. static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  232. static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  233. static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  234. static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  235. static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  236. static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
  237. };
  238. /**
  239. * Struct for null-checking.
  240. * In Java, we can just check the object reference. In C++, we need a different method.
  241. */
  242. struct SeenMacroProps {
  243. bool notation = false;
  244. bool unit = false;
  245. bool perUnit = false;
  246. bool precision = false;
  247. bool roundingMode = false;
  248. bool grouper = false;
  249. bool padder = false;
  250. bool integerWidth = false;
  251. bool symbols = false;
  252. bool unitWidth = false;
  253. bool sign = false;
  254. bool decimal = false;
  255. bool scale = false;
  256. };
  257. } // namespace impl
  258. } // namespace number
  259. U_NAMESPACE_END
  260. #endif //__SOURCE_NUMBER_SKELETONS_H__
  261. #endif /* #if !UCONFIG_NO_FORMATTING */