uitercollationiterator.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2012-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * uitercollationiterator.h
  9. *
  10. * created on: 2012sep23 (from utf16collationiterator.h)
  11. * created by: Markus W. Scherer
  12. */
  13. #ifndef __UITERCOLLATIONITERATOR_H__
  14. #define __UITERCOLLATIONITERATOR_H__
  15. #include "unicode/utypes.h"
  16. #if !UCONFIG_NO_COLLATION
  17. #include "unicode/uiter.h"
  18. #include "cmemory.h"
  19. #include "collation.h"
  20. #include "collationdata.h"
  21. #include "collationiterator.h"
  22. #include "normalizer2impl.h"
  23. U_NAMESPACE_BEGIN
  24. /**
  25. * UCharIterator-based collation element and character iterator.
  26. * Handles normalized text inline, with length or NUL-terminated.
  27. * Unnormalized text is handled by a subclass.
  28. */
  29. class U_I18N_API UIterCollationIterator : public CollationIterator {
  30. public:
  31. UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
  32. : CollationIterator(d, numeric), iter(ui) {}
  33. virtual ~UIterCollationIterator();
  34. virtual void resetToOffset(int32_t newOffset);
  35. virtual int32_t getOffset() const;
  36. virtual UChar32 nextCodePoint(UErrorCode &errorCode);
  37. virtual UChar32 previousCodePoint(UErrorCode &errorCode);
  38. protected:
  39. virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
  40. virtual UChar handleGetTrailSurrogate();
  41. virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
  42. virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
  43. UCharIterator &iter;
  44. };
  45. /**
  46. * Incrementally checks the input text for FCD and normalizes where necessary.
  47. */
  48. class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
  49. public:
  50. FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
  51. : UIterCollationIterator(data, numeric, ui),
  52. state(ITER_CHECK_FWD), start(startIndex),
  53. nfcImpl(data->nfcImpl) {}
  54. virtual ~FCDUIterCollationIterator();
  55. virtual void resetToOffset(int32_t newOffset);
  56. virtual int32_t getOffset() const;
  57. virtual UChar32 nextCodePoint(UErrorCode &errorCode);
  58. virtual UChar32 previousCodePoint(UErrorCode &errorCode);
  59. protected:
  60. virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
  61. virtual UChar handleGetTrailSurrogate();
  62. virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
  63. virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
  64. private:
  65. /**
  66. * Switches to forward checking if possible.
  67. */
  68. void switchToForward();
  69. /**
  70. * Extends the FCD text segment forward or normalizes around pos.
  71. * @return TRUE if success
  72. */
  73. UBool nextSegment(UErrorCode &errorCode);
  74. /**
  75. * Switches to backward checking.
  76. */
  77. void switchToBackward();
  78. /**
  79. * Extends the FCD text segment backward or normalizes around pos.
  80. * @return TRUE if success
  81. */
  82. UBool previousSegment(UErrorCode &errorCode);
  83. UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
  84. enum State {
  85. /**
  86. * The input text [start..(iter index)[ passes the FCD check.
  87. * Moving forward checks incrementally.
  88. * pos & limit are undefined.
  89. */
  90. ITER_CHECK_FWD,
  91. /**
  92. * The input text [(iter index)..limit[ passes the FCD check.
  93. * Moving backward checks incrementally.
  94. * start & pos are undefined.
  95. */
  96. ITER_CHECK_BWD,
  97. /**
  98. * The input text [start..limit[ passes the FCD check.
  99. * pos tracks the current text index.
  100. */
  101. ITER_IN_FCD_SEGMENT,
  102. /**
  103. * The input text [start..limit[ failed the FCD check and was normalized.
  104. * pos tracks the current index in the normalized string.
  105. * The text iterator is at the limit index.
  106. */
  107. IN_NORM_ITER_AT_LIMIT,
  108. /**
  109. * The input text [start..limit[ failed the FCD check and was normalized.
  110. * pos tracks the current index in the normalized string.
  111. * The text iterator is at the start index.
  112. */
  113. IN_NORM_ITER_AT_START
  114. };
  115. State state;
  116. int32_t start;
  117. int32_t pos;
  118. int32_t limit;
  119. const Normalizer2Impl &nfcImpl;
  120. UnicodeString normalized;
  121. };
  122. U_NAMESPACE_END
  123. #endif // !UCONFIG_NO_COLLATION
  124. #endif // __UITERCOLLATIONITERATOR_H__