numparse_affixes.h 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. // © 2018 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include "unicode/utypes.h"
  4. #if !UCONFIG_NO_FORMATTING
  5. #ifndef __NUMPARSE_AFFIXES_H__
  6. #define __NUMPARSE_AFFIXES_H__
  7. #include "cmemory.h"
  8. #include "numparse_types.h"
  9. #include "numparse_symbols.h"
  10. #include "numparse_currency.h"
  11. #include "number_affixutils.h"
  12. #include "number_currencysymbols.h"
  13. U_NAMESPACE_BEGIN
  14. namespace numparse {
  15. namespace impl {
  16. // Forward-declaration of implementation classes for friending
  17. class AffixPatternMatcherBuilder;
  18. class AffixPatternMatcher;
  19. using ::icu::number::impl::AffixPatternProvider;
  20. using ::icu::number::impl::TokenConsumer;
  21. using ::icu::number::impl::CurrencySymbols;
  22. class U_I18N_API CodePointMatcher : public NumberParseMatcher, public UMemory {
  23. public:
  24. CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state
  25. CodePointMatcher(UChar32 cp);
  26. bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
  27. bool smokeTest(const StringSegment& segment) const override;
  28. UnicodeString toString() const override;
  29. private:
  30. UChar32 fCp;
  31. };
  32. } // namespace impl
  33. } // namespace numparse
  34. // Export a explicit template instantiations of MaybeStackArray, MemoryPool and CompactUnicodeString.
  35. // When building DLLs for Windows this is required even though no direct access leaks out of the i18n library.
  36. // (See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.)
  37. // Note: These need to be outside of the numparse::impl namespace, or Clang will generate a compile error.
  38. #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
  39. template class U_I18N_API MaybeStackArray<numparse::impl::CodePointMatcher*, 8>;
  40. template class U_I18N_API MaybeStackArray<UChar, 4>;
  41. template class U_I18N_API MemoryPool<numparse::impl::CodePointMatcher, 8>;
  42. template class U_I18N_API numparse::impl::CompactUnicodeString<4>;
  43. #endif
  44. namespace numparse {
  45. namespace impl {
  46. struct AffixTokenMatcherSetupData {
  47. const CurrencySymbols& currencySymbols;
  48. const DecimalFormatSymbols& dfs;
  49. IgnorablesMatcher& ignorables;
  50. const Locale& locale;
  51. parse_flags_t parseFlags;
  52. };
  53. /**
  54. * Small helper class that generates matchers for individual tokens for AffixPatternMatcher.
  55. *
  56. * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a
  57. * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The
  58. * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from
  59. * the warehouse.
  60. *
  61. * @author sffc
  62. */
  63. // Exported as U_I18N_API for tests
  64. class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
  65. public:
  66. AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
  67. AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData);
  68. NumberParseMatcher& minusSign();
  69. NumberParseMatcher& plusSign();
  70. NumberParseMatcher& percent();
  71. NumberParseMatcher& permille();
  72. NumberParseMatcher& currency(UErrorCode& status);
  73. IgnorablesMatcher& ignorables();
  74. NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status);
  75. private:
  76. // NOTE: The following field may be unsafe to access after construction is done!
  77. const AffixTokenMatcherSetupData* fSetupData;
  78. // NOTE: These are default-constructed and should not be used until initialized.
  79. MinusSignMatcher fMinusSign;
  80. PlusSignMatcher fPlusSign;
  81. PercentMatcher fPercent;
  82. PermilleMatcher fPermille;
  83. CombinedCurrencyMatcher fCurrency;
  84. // Use a child class for code point matchers, since it requires non-default operators.
  85. MemoryPool<CodePointMatcher> fCodePoints;
  86. friend class AffixPatternMatcherBuilder;
  87. friend class AffixPatternMatcher;
  88. };
  89. class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection {
  90. public:
  91. AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse,
  92. IgnorablesMatcher* ignorables);
  93. void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override;
  94. /** NOTE: You can build only once! */
  95. AffixPatternMatcher build();
  96. private:
  97. ArraySeriesMatcher::MatcherArray fMatchers;
  98. int32_t fMatchersLen;
  99. int32_t fLastTypeOrCp;
  100. const UnicodeString& fPattern;
  101. AffixTokenMatcherWarehouse& fWarehouse;
  102. IgnorablesMatcher* fIgnorables;
  103. void addMatcher(NumberParseMatcher& matcher) override;
  104. };
  105. // Exported as U_I18N_API for tests
  106. class U_I18N_API AffixPatternMatcher : public ArraySeriesMatcher {
  107. public:
  108. AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state
  109. static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern,
  110. AffixTokenMatcherWarehouse& warehouse,
  111. parse_flags_t parseFlags, bool* success,
  112. UErrorCode& status);
  113. UnicodeString getPattern() const;
  114. bool operator==(const AffixPatternMatcher& other) const;
  115. private:
  116. CompactUnicodeString<4> fPattern;
  117. AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern);
  118. friend class AffixPatternMatcherBuilder;
  119. };
  120. class AffixMatcher : public NumberParseMatcher, public UMemory {
  121. public:
  122. AffixMatcher() = default; // WARNING: Leaves the object in an unusable state
  123. AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags);
  124. bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
  125. void postProcess(ParsedNumber& result) const override;
  126. bool smokeTest(const StringSegment& segment) const override;
  127. int8_t compareTo(const AffixMatcher& rhs) const;
  128. UnicodeString toString() const override;
  129. private:
  130. AffixPatternMatcher* fPrefix;
  131. AffixPatternMatcher* fSuffix;
  132. result_flags_t fFlags;
  133. };
  134. /**
  135. * A C++-only class to retain ownership of the AffixMatchers needed for parsing.
  136. */
  137. class AffixMatcherWarehouse {
  138. public:
  139. AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
  140. AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse);
  141. void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output,
  142. const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
  143. UErrorCode& status);
  144. private:
  145. // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix
  146. AffixMatcher fAffixMatchers[9];
  147. // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each
  148. AffixPatternMatcher fAffixPatternMatchers[6];
  149. // Reference to the warehouse for tokens used by the AffixPatternMatchers
  150. AffixTokenMatcherWarehouse* fTokenWarehouse;
  151. friend class AffixMatcher;
  152. static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables,
  153. parse_flags_t parseFlags, UErrorCode& status);
  154. };
  155. } // namespace impl
  156. } // namespace numparse
  157. U_NAMESPACE_END
  158. #endif //__NUMPARSE_AFFIXES_H__
  159. #endif /* #if !UCONFIG_NO_FORMATTING */