propsvec.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2002-2010, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: propsvec.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2002feb22
  16. * created by: Markus W. Scherer
  17. *
  18. * Store bits (Unicode character properties) in bit set vectors.
  19. */
  20. #ifndef __UPROPSVEC_H__
  21. #define __UPROPSVEC_H__
  22. #include "unicode/utypes.h"
  23. #include "utrie.h"
  24. #include "utrie2.h"
  25. U_CDECL_BEGIN
  26. /**
  27. * Unicode Properties Vectors associated with code point ranges.
  28. *
  29. * Rows of uint32_t integers in a contiguous array store
  30. * the range limits and the properties vectors.
  31. *
  32. * Logically, each row has a certain number of uint32_t values,
  33. * which is set via the upvec_open() "columns" parameter.
  34. *
  35. * Internally, two additional columns are stored.
  36. * In each internal row,
  37. * row[0] contains the start code point and
  38. * row[1] contains the limit code point,
  39. * which is the start of the next range.
  40. *
  41. * Initially, there is only one "normal" row for
  42. * range [0..0x110000[ with values 0.
  43. * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
  44. *
  45. * It would be possible to store only one range boundary per row,
  46. * but self-contained rows allow to later sort them by contents.
  47. */
  48. struct UPropsVectors;
  49. typedef struct UPropsVectors UPropsVectors;
  50. /*
  51. * Special pseudo code points for storing the initialValue and the errorValue,
  52. * which are used to initialize a UTrie2 or similar.
  53. */
  54. #define UPVEC_FIRST_SPECIAL_CP 0x110000
  55. #define UPVEC_INITIAL_VALUE_CP 0x110000
  56. #define UPVEC_ERROR_VALUE_CP 0x110001
  57. #define UPVEC_MAX_CP 0x110001
  58. /*
  59. * Special pseudo code point used in upvec_compact() signalling the end of
  60. * delivering special values and the beginning of delivering real ones.
  61. * Stable value, unlike UPVEC_MAX_CP which might grow over time.
  62. */
  63. #define UPVEC_START_REAL_VALUES_CP 0x200000
  64. /*
  65. * Open a UPropsVectors object.
  66. * @param columns Number of value integers (uint32_t) per row.
  67. */
  68. U_CAPI UPropsVectors * U_EXPORT2
  69. upvec_open(int32_t columns, UErrorCode *pErrorCode);
  70. U_CAPI void U_EXPORT2
  71. upvec_close(UPropsVectors *pv);
  72. /*
  73. * In rows for code points [start..end], select the column,
  74. * reset the mask bits and set the value bits (ANDed with the mask).
  75. *
  76. * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
  77. */
  78. U_CAPI void U_EXPORT2
  79. upvec_setValue(UPropsVectors *pv,
  80. UChar32 start, UChar32 end,
  81. int32_t column,
  82. uint32_t value, uint32_t mask,
  83. UErrorCode *pErrorCode);
  84. /*
  85. * Logically const but must not be used on the same pv concurrently!
  86. * Always returns 0 if called after upvec_compact().
  87. */
  88. U_CAPI uint32_t U_EXPORT2
  89. upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
  90. /*
  91. * pRangeStart and pRangeEnd can be NULL.
  92. * @return NULL if rowIndex out of range and for illegal arguments,
  93. * or if called after upvec_compact()
  94. */
  95. U_CAPI uint32_t * U_EXPORT2
  96. upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
  97. UChar32 *pRangeStart, UChar32 *pRangeEnd);
  98. /*
  99. * Compact the vectors:
  100. * - modify the memory
  101. * - keep only unique vectors
  102. * - store them contiguously from the beginning of the memory
  103. * - for each (non-unique) row, call the handler function
  104. *
  105. * The handler's rowIndex is the index of the row in the compacted
  106. * memory block.
  107. * (Therefore, it starts at 0 increases in increments of the columns value.)
  108. *
  109. * In a first phase, only special values are delivered (each exactly once),
  110. * with start==end both equalling a special pseudo code point.
  111. * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
  112. * where rowIndex is the length of the compacted array,
  113. * and the row is arbitrary (but not NULL).
  114. * Then, in the second phase, the handler is called for each row of real values.
  115. */
  116. typedef void U_CALLCONV
  117. UPVecCompactHandler(void *context,
  118. UChar32 start, UChar32 end,
  119. int32_t rowIndex, uint32_t *row, int32_t columns,
  120. UErrorCode *pErrorCode);
  121. U_CAPI void U_EXPORT2
  122. upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
  123. /*
  124. * Get the vectors array after calling upvec_compact().
  125. * The caller must not modify nor release the returned array.
  126. * Returns NULL if called before upvec_compact().
  127. */
  128. U_CAPI const uint32_t * U_EXPORT2
  129. upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
  130. /*
  131. * Get a clone of the vectors array after calling upvec_compact().
  132. * The caller owns the returned array and must uprv_free() it.
  133. * Returns NULL if called before upvec_compact().
  134. */
  135. U_CAPI uint32_t * U_EXPORT2
  136. upvec_cloneArray(const UPropsVectors *pv,
  137. int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
  138. /*
  139. * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
  140. * vectors array, and freeze the trie.
  141. */
  142. U_CAPI UTrie2 * U_EXPORT2
  143. upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
  144. struct UPVecToUTrie2Context {
  145. UTrie2 *trie;
  146. int32_t initialValue;
  147. int32_t errorValue;
  148. int32_t maxValue;
  149. };
  150. typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
  151. /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
  152. U_CAPI void U_CALLCONV
  153. upvec_compactToUTrie2Handler(void *context,
  154. UChar32 start, UChar32 end,
  155. int32_t rowIndex, uint32_t *row, int32_t columns,
  156. UErrorCode *pErrorCode);
  157. U_CDECL_END
  158. #endif