utf16collationiterator.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2010-2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * utf16collationiterator.h
  9. *
  10. * created on: 2010oct27
  11. * created by: Markus W. Scherer
  12. */
  13. #ifndef __UTF16COLLATIONITERATOR_H__
  14. #define __UTF16COLLATIONITERATOR_H__
  15. #include "unicode/utypes.h"
  16. #if !UCONFIG_NO_COLLATION
  17. #include "cmemory.h"
  18. #include "collation.h"
  19. #include "collationdata.h"
  20. #include "collationiterator.h"
  21. #include "normalizer2impl.h"
  22. U_NAMESPACE_BEGIN
  23. /**
  24. * UTF-16 collation element and character iterator.
  25. * Handles normalized UTF-16 text inline, with length or NUL-terminated.
  26. * Unnormalized text is handled by a subclass.
  27. */
  28. class U_I18N_API UTF16CollationIterator : public CollationIterator {
  29. public:
  30. UTF16CollationIterator(const CollationData *d, UBool numeric,
  31. const UChar *s, const UChar *p, const UChar *lim)
  32. : CollationIterator(d, numeric),
  33. start(s), pos(p), limit(lim) {}
  34. UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText);
  35. virtual ~UTF16CollationIterator();
  36. virtual UBool operator==(const CollationIterator &other) const;
  37. virtual void resetToOffset(int32_t newOffset);
  38. virtual int32_t getOffset() const;
  39. void setText(const UChar *s, const UChar *lim) {
  40. reset();
  41. start = pos = s;
  42. limit = lim;
  43. }
  44. virtual UChar32 nextCodePoint(UErrorCode &errorCode);
  45. virtual UChar32 previousCodePoint(UErrorCode &errorCode);
  46. protected:
  47. // Copy constructor only for subclasses which set the pointers.
  48. UTF16CollationIterator(const UTF16CollationIterator &other)
  49. : CollationIterator(other),
  50. start(NULL), pos(NULL), limit(NULL) {}
  51. virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
  52. virtual UChar handleGetTrailSurrogate();
  53. virtual UBool foundNULTerminator();
  54. virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
  55. virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
  56. // UTF-16 string pointers.
  57. // limit can be NULL for NUL-terminated strings.
  58. const UChar *start, *pos, *limit;
  59. };
  60. /**
  61. * Incrementally checks the input text for FCD and normalizes where necessary.
  62. */
  63. class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator {
  64. public:
  65. FCDUTF16CollationIterator(const CollationData *data, UBool numeric,
  66. const UChar *s, const UChar *p, const UChar *lim)
  67. : UTF16CollationIterator(data, numeric, s, p, lim),
  68. rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim),
  69. nfcImpl(data->nfcImpl),
  70. checkDir(1) {}
  71. FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText);
  72. virtual ~FCDUTF16CollationIterator();
  73. virtual UBool operator==(const CollationIterator &other) const;
  74. virtual void resetToOffset(int32_t newOffset);
  75. virtual int32_t getOffset() const;
  76. virtual UChar32 nextCodePoint(UErrorCode &errorCode);
  77. virtual UChar32 previousCodePoint(UErrorCode &errorCode);
  78. protected:
  79. virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
  80. virtual UBool foundNULTerminator();
  81. virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
  82. virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
  83. private:
  84. /**
  85. * Switches to forward checking if possible.
  86. * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
  87. * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
  88. */
  89. void switchToForward();
  90. /**
  91. * Extend the FCD text segment forward or normalize around pos.
  92. * To be called when checkDir > 0 && pos != limit.
  93. * @return TRUE if success, checkDir == 0 and pos != limit
  94. */
  95. UBool nextSegment(UErrorCode &errorCode);
  96. /**
  97. * Switches to backward checking.
  98. * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
  99. * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
  100. */
  101. void switchToBackward();
  102. /**
  103. * Extend the FCD text segment backward or normalize around pos.
  104. * To be called when checkDir < 0 && pos != start.
  105. * @return TRUE if success, checkDir == 0 and pos != start
  106. */
  107. UBool previousSegment(UErrorCode &errorCode);
  108. UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode);
  109. // Text pointers: The input text is [rawStart, rawLimit[
  110. // where rawLimit can be NULL for NUL-terminated text.
  111. //
  112. // checkDir > 0:
  113. //
  114. // The input text [segmentStart..pos[ passes the FCD check.
  115. // Moving forward checks incrementally.
  116. // segmentLimit is undefined. limit == rawLimit.
  117. //
  118. // checkDir < 0:
  119. // The input text [pos..segmentLimit[ passes the FCD check.
  120. // Moving backward checks incrementally.
  121. // segmentStart is undefined, start == rawStart.
  122. //
  123. // checkDir == 0:
  124. //
  125. // The input text [segmentStart..segmentLimit[ is being processed.
  126. // These pointers are at FCD boundaries.
  127. // Either this text segment already passes the FCD check
  128. // and segmentStart==start<=pos<=limit==segmentLimit,
  129. // or the current segment had to be normalized so that
  130. // [segmentStart..segmentLimit[ turned into the normalized string,
  131. // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
  132. const UChar *rawStart;
  133. const UChar *segmentStart;
  134. const UChar *segmentLimit;
  135. // rawLimit==NULL for a NUL-terminated string.
  136. const UChar *rawLimit;
  137. const Normalizer2Impl &nfcImpl;
  138. UnicodeString normalized;
  139. // Direction of incremental FCD check. See comments before rawStart.
  140. int8_t checkDir;
  141. };
  142. U_NAMESPACE_END
  143. #endif // !UCONFIG_NO_COLLATION
  144. #endif // __UTF16COLLATIONITERATOR_H__