collationkeys.h 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2012-2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * collationkeys.h
  9. *
  10. * created on: 2012sep02
  11. * created by: Markus W. Scherer
  12. */
  13. #ifndef __COLLATIONKEYS_H__
  14. #define __COLLATIONKEYS_H__
  15. #include "unicode/utypes.h"
  16. #if !UCONFIG_NO_COLLATION
  17. #include "unicode/bytestream.h"
  18. #include "unicode/ucol.h"
  19. #include "charstr.h"
  20. #include "collation.h"
  21. U_NAMESPACE_BEGIN
  22. class CollationIterator;
  23. struct CollationDataReader;
  24. struct CollationSettings;
  25. class SortKeyByteSink : public ByteSink {
  26. public:
  27. SortKeyByteSink(char *dest, int32_t destCapacity)
  28. : buffer_(dest), capacity_(destCapacity),
  29. appended_(0), ignore_(0) {}
  30. virtual ~SortKeyByteSink();
  31. void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
  32. virtual void Append(const char *bytes, int32_t n);
  33. void Append(uint32_t b) {
  34. if (ignore_ > 0) {
  35. --ignore_;
  36. } else {
  37. if (appended_ < capacity_ || Resize(1, appended_)) {
  38. buffer_[appended_] = (char)b;
  39. }
  40. ++appended_;
  41. }
  42. }
  43. virtual char *GetAppendBuffer(int32_t min_capacity,
  44. int32_t desired_capacity_hint,
  45. char *scratch, int32_t scratch_capacity,
  46. int32_t *result_capacity);
  47. int32_t NumberOfBytesAppended() const { return appended_; }
  48. /**
  49. * @return how many bytes can be appended (including ignored ones)
  50. * without reallocation
  51. */
  52. int32_t GetRemainingCapacity() const {
  53. // Either ignore_ or appended_ should be 0.
  54. return ignore_ + capacity_ - appended_;
  55. }
  56. UBool Overflowed() const { return appended_ > capacity_; }
  57. /** @return FALSE if memory allocation failed */
  58. UBool IsOk() const { return buffer_ != NULL; }
  59. protected:
  60. virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
  61. virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
  62. void SetNotOk() {
  63. buffer_ = NULL;
  64. capacity_ = 0;
  65. }
  66. char *buffer_;
  67. int32_t capacity_;
  68. int32_t appended_;
  69. int32_t ignore_;
  70. private:
  71. SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
  72. SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
  73. };
  74. class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
  75. public:
  76. class LevelCallback : public UMemory {
  77. public:
  78. virtual ~LevelCallback();
  79. /**
  80. * @param level The next level about to be written to the ByteSink.
  81. * @return TRUE if the level is to be written
  82. * (the base class implementation always returns TRUE)
  83. */
  84. virtual UBool needToWrite(Collation::Level level);
  85. };
  86. /**
  87. * Writes the sort key bytes for minLevel up to the iterator data's strength.
  88. * Optionally writes the case level.
  89. * Stops writing levels when callback.needToWrite(level) returns FALSE.
  90. * Separates levels with the LEVEL_SEPARATOR_BYTE
  91. * but does not write a TERMINATOR_BYTE.
  92. */
  93. static void writeSortKeyUpToQuaternary(CollationIterator &iter,
  94. const UBool *compressibleBytes,
  95. const CollationSettings &settings,
  96. SortKeyByteSink &sink,
  97. Collation::Level minLevel, LevelCallback &callback,
  98. UBool preflight, UErrorCode &errorCode);
  99. private:
  100. friend struct CollationDataReader;
  101. CollationKeys(); // no instantiation
  102. // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
  103. static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
  104. static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
  105. static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
  106. static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
  107. // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
  108. static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
  109. static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
  110. static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
  111. static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
  112. // Case level, upperFirst: Compress up to 13 common weights as 3..15.
  113. static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
  114. static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
  115. static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
  116. // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
  117. static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
  118. static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
  119. static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
  120. static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
  121. // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
  122. static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
  123. static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
  124. static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
  125. static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
  126. // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
  127. static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
  128. static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
  129. static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
  130. static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
  131. // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
  132. static const uint32_t QUAT_COMMON_LOW = 0x1c;
  133. static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
  134. static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
  135. static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
  136. // Primary weights shifted to quaternary level must be encoded with
  137. // a lead byte below the common-weight compression range.
  138. static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b
  139. };
  140. U_NAMESPACE_END
  141. #endif // !UCONFIG_NO_COLLATION
  142. #endif // __COLLATIONKEYS_H__