ucnvsel.h 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2008-2011, International Business Machines
  7. * Corporation, Google and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. */
  11. /*
  12. * Author : eldawy@google.com (Mohamed Eldawy)
  13. * ucnvsel.h
  14. *
  15. * Purpose: To generate a list of encodings capable of handling
  16. * a given Unicode text
  17. *
  18. * Started 09-April-2008
  19. */
  20. #ifndef __ICU_UCNV_SEL_H__
  21. #define __ICU_UCNV_SEL_H__
  22. #include "unicode/utypes.h"
  23. #if !UCONFIG_NO_CONVERSION
  24. #include "unicode/uset.h"
  25. #include "unicode/utf16.h"
  26. #include "unicode/uenum.h"
  27. #include "unicode/ucnv.h"
  28. #include "unicode/localpointer.h"
  29. /**
  30. * \file
  31. *
  32. * A converter selector is built with a set of encoding/charset names
  33. * and given an input string returns the set of names of the
  34. * corresponding converters which can convert the string.
  35. *
  36. * A converter selector can be serialized into a buffer and reopened
  37. * from the serialized form.
  38. */
  39. /**
  40. * @{
  41. * The selector data structure
  42. */
  43. struct UConverterSelector;
  44. typedef struct UConverterSelector UConverterSelector;
  45. /** @} */
  46. /**
  47. * Open a selector.
  48. * If converterListSize is 0, build for all available converters.
  49. * If excludedCodePoints is NULL, don't exclude any code points.
  50. *
  51. * @param converterList a pointer to encoding names needed to be involved.
  52. * Can be NULL if converterListSize==0.
  53. * The list and the names will be cloned, and the caller
  54. * retains ownership of the original.
  55. * @param converterListSize number of encodings in above list.
  56. * If 0, builds a selector for all available converters.
  57. * @param excludedCodePoints a set of code points to be excluded from consideration.
  58. * That is, excluded code points in a string do not change
  59. * the selection result. (They might be handled by a callback.)
  60. * Use NULL to exclude nothing.
  61. * @param whichSet what converter set to use? Use this to determine whether
  62. * to consider only roundtrip mappings or also fallbacks.
  63. * @param status an in/out ICU UErrorCode
  64. * @return the new selector
  65. *
  66. * @stable ICU 4.2
  67. */
  68. U_STABLE UConverterSelector* U_EXPORT2
  69. ucnvsel_open(const char* const* converterList, int32_t converterListSize,
  70. const USet* excludedCodePoints,
  71. const UConverterUnicodeSet whichSet, UErrorCode* status);
  72. /**
  73. * Closes a selector.
  74. * If any Enumerations were returned by ucnv_select*, they become invalid.
  75. * They can be closed before or after calling ucnv_closeSelector,
  76. * but should never be used after the selector is closed.
  77. *
  78. * @see ucnv_selectForString
  79. * @see ucnv_selectForUTF8
  80. *
  81. * @param sel selector to close
  82. *
  83. * @stable ICU 4.2
  84. */
  85. U_STABLE void U_EXPORT2
  86. ucnvsel_close(UConverterSelector *sel);
  87. #if U_SHOW_CPLUSPLUS_API
  88. U_NAMESPACE_BEGIN
  89. /**
  90. * \class LocalUConverterSelectorPointer
  91. * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
  92. * For most methods see the LocalPointerBase base class.
  93. *
  94. * @see LocalPointerBase
  95. * @see LocalPointer
  96. * @stable ICU 4.4
  97. */
  98. U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
  99. U_NAMESPACE_END
  100. #endif
  101. /**
  102. * Open a selector from its serialized form.
  103. * The buffer must remain valid and unchanged for the lifetime of the selector.
  104. * This is much faster than creating a selector from scratch.
  105. * Using a serialized form from a different machine (endianness/charset) is supported.
  106. *
  107. * @param buffer pointer to the serialized form of a converter selector;
  108. * must be 32-bit-aligned
  109. * @param length the capacity of this buffer (can be equal to or larger than
  110. * the actual data length)
  111. * @param status an in/out ICU UErrorCode
  112. * @return the new selector
  113. *
  114. * @stable ICU 4.2
  115. */
  116. U_STABLE UConverterSelector* U_EXPORT2
  117. ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
  118. /**
  119. * Serialize a selector into a linear buffer.
  120. * The serialized form is portable to different machines.
  121. *
  122. * @param sel selector to consider
  123. * @param buffer pointer to 32-bit-aligned memory to be filled with the
  124. * serialized form of this converter selector
  125. * @param bufferCapacity the capacity of this buffer
  126. * @param status an in/out ICU UErrorCode
  127. * @return the required buffer capacity to hold serialize data (even if the call fails
  128. * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
  129. *
  130. * @stable ICU 4.2
  131. */
  132. U_STABLE int32_t U_EXPORT2
  133. ucnvsel_serialize(const UConverterSelector* sel,
  134. void* buffer, int32_t bufferCapacity, UErrorCode* status);
  135. /**
  136. * Select converters that can map all characters in a UTF-16 string,
  137. * ignoring the excluded code points.
  138. *
  139. * @param sel a selector
  140. * @param s UTF-16 string
  141. * @param length length of the string, or -1 if NUL-terminated
  142. * @param status an in/out ICU UErrorCode
  143. * @return an enumeration containing encoding names.
  144. * The returned encoding names and their order will be the same as
  145. * supplied when building the selector.
  146. *
  147. * @stable ICU 4.2
  148. */
  149. U_STABLE UEnumeration * U_EXPORT2
  150. ucnvsel_selectForString(const UConverterSelector* sel,
  151. const UChar *s, int32_t length, UErrorCode *status);
  152. /**
  153. * Select converters that can map all characters in a UTF-8 string,
  154. * ignoring the excluded code points.
  155. *
  156. * @param sel a selector
  157. * @param s UTF-8 string
  158. * @param length length of the string, or -1 if NUL-terminated
  159. * @param status an in/out ICU UErrorCode
  160. * @return an enumeration containing encoding names.
  161. * The returned encoding names and their order will be the same as
  162. * supplied when building the selector.
  163. *
  164. * @stable ICU 4.2
  165. */
  166. U_STABLE UEnumeration * U_EXPORT2
  167. ucnvsel_selectForUTF8(const UConverterSelector* sel,
  168. const char *s, int32_t length, UErrorCode *status);
  169. #endif /* !UCONFIG_NO_CONVERSION */
  170. #endif /* __ICU_UCNV_SEL_H__ */