usprep.h 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2003-2014, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: usprep.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2003jul2
  16. * created by: Ram Viswanadha
  17. */
  18. #ifndef __USPREP_H__
  19. #define __USPREP_H__
  20. /**
  21. * \file
  22. * \brief C API: Implements the StringPrep algorithm.
  23. */
  24. #include "unicode/utypes.h"
  25. #include "unicode/localpointer.h"
  26. /**
  27. *
  28. * StringPrep API implements the StingPrep framework as described by RFC 3454.
  29. * StringPrep prepares Unicode strings for use in network protocols.
  30. * Profiles of StingPrep are set of rules and data according to with the
  31. * Unicode Strings are prepared. Each profiles contains tables which describe
  32. * how a code point should be treated. The tables are broadly classified into
  33. * <ul>
  34. * <li> Unassigned Table: Contains code points that are unassigned
  35. * in the Unicode Version supported by StringPrep. Currently
  36. * RFC 3454 supports Unicode 3.2. </li>
  37. * <li> Prohibited Table: Contains code points that are prohibited from
  38. * the output of the StringPrep processing function. </li>
  39. * <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
  40. * </ul>
  41. *
  42. * The procedure for preparing Unicode strings:
  43. * <ol>
  44. * <li> Map: For each character in the input, check if it has a mapping
  45. * and, if so, replace it with its mapping. </li>
  46. * <li> Normalize: Possibly normalize the result of step 1 using Unicode
  47. * normalization. </li>
  48. * <li> Prohibit: Check for any characters that are not allowed in the
  49. * output. If any are found, return an error.</li>
  50. * <li> Check bidi: Possibly check for right-to-left characters, and if
  51. * any are found, make sure that the whole string satisfies the
  52. * requirements for bidirectional strings. If the string does not
  53. * satisfy the requirements for bidirectional strings, return an
  54. * error. </li>
  55. * </ol>
  56. * @author Ram Viswanadha
  57. */
  58. #if !UCONFIG_NO_IDNA
  59. #include "unicode/parseerr.h"
  60. /**
  61. * The StringPrep profile
  62. * @stable ICU 2.8
  63. */
  64. typedef struct UStringPrepProfile UStringPrepProfile;
  65. /**
  66. * Option to prohibit processing of unassigned code points in the input
  67. *
  68. * @see usprep_prepare
  69. * @stable ICU 2.8
  70. */
  71. #define USPREP_DEFAULT 0x0000
  72. /**
  73. * Option to allow processing of unassigned code points in the input
  74. *
  75. * @see usprep_prepare
  76. * @stable ICU 2.8
  77. */
  78. #define USPREP_ALLOW_UNASSIGNED 0x0001
  79. /**
  80. * enums for the standard stringprep profile types
  81. * supported by usprep_openByType.
  82. * @see usprep_openByType
  83. * @stable ICU 4.2
  84. */
  85. typedef enum UStringPrepProfileType {
  86. /**
  87. * RFC3491 Nameprep
  88. * @stable ICU 4.2
  89. */
  90. USPREP_RFC3491_NAMEPREP,
  91. /**
  92. * RFC3530 nfs4_cs_prep
  93. * @stable ICU 4.2
  94. */
  95. USPREP_RFC3530_NFS4_CS_PREP,
  96. /**
  97. * RFC3530 nfs4_cs_prep with case insensitive option
  98. * @stable ICU 4.2
  99. */
  100. USPREP_RFC3530_NFS4_CS_PREP_CI,
  101. /**
  102. * RFC3530 nfs4_cis_prep
  103. * @stable ICU 4.2
  104. */
  105. USPREP_RFC3530_NFS4_CIS_PREP,
  106. /**
  107. * RFC3530 nfs4_mixed_prep for prefix
  108. * @stable ICU 4.2
  109. */
  110. USPREP_RFC3530_NFS4_MIXED_PREP_PREFIX,
  111. /**
  112. * RFC3530 nfs4_mixed_prep for suffix
  113. * @stable ICU 4.2
  114. */
  115. USPREP_RFC3530_NFS4_MIXED_PREP_SUFFIX,
  116. /**
  117. * RFC3722 iSCSI
  118. * @stable ICU 4.2
  119. */
  120. USPREP_RFC3722_ISCSI,
  121. /**
  122. * RFC3920 XMPP Nodeprep
  123. * @stable ICU 4.2
  124. */
  125. USPREP_RFC3920_NODEPREP,
  126. /**
  127. * RFC3920 XMPP Resourceprep
  128. * @stable ICU 4.2
  129. */
  130. USPREP_RFC3920_RESOURCEPREP,
  131. /**
  132. * RFC4011 Policy MIB Stringprep
  133. * @stable ICU 4.2
  134. */
  135. USPREP_RFC4011_MIB,
  136. /**
  137. * RFC4013 SASLprep
  138. * @stable ICU 4.2
  139. */
  140. USPREP_RFC4013_SASLPREP,
  141. /**
  142. * RFC4505 trace
  143. * @stable ICU 4.2
  144. */
  145. USPREP_RFC4505_TRACE,
  146. /**
  147. * RFC4518 LDAP
  148. * @stable ICU 4.2
  149. */
  150. USPREP_RFC4518_LDAP,
  151. /**
  152. * RFC4518 LDAP for case ignore, numeric and stored prefix
  153. * matching rules
  154. * @stable ICU 4.2
  155. */
  156. USPREP_RFC4518_LDAP_CI
  157. } UStringPrepProfileType;
  158. /**
  159. * Creates a StringPrep profile from the data file.
  160. *
  161. * @param path string containing the full path pointing to the directory
  162. * where the profile reside followed by the package name
  163. * e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
  164. * if NULL, ICU default data files will be used.
  165. * @param fileName name of the profile file to be opened
  166. * @param status ICU error code in/out parameter. Must not be NULL.
  167. * Must fulfill U_SUCCESS before the function call.
  168. * @return Pointer to UStringPrepProfile that is opened. Should be closed by
  169. * calling usprep_close()
  170. * @see usprep_close()
  171. * @stable ICU 2.8
  172. */
  173. U_STABLE UStringPrepProfile* U_EXPORT2
  174. usprep_open(const char* path,
  175. const char* fileName,
  176. UErrorCode* status);
  177. /**
  178. * Creates a StringPrep profile for the specified profile type.
  179. *
  180. * @param type The profile type
  181. * @param status ICU error code in/out parameter. Must not be NULL.
  182. * Must fulfill U_SUCCESS before the function call.
  183. * @return Pointer to UStringPrepProfile that is opened. Should be closed by
  184. * calling usprep_close()
  185. * @see usprep_close()
  186. * @stable ICU 4.2
  187. */
  188. U_STABLE UStringPrepProfile* U_EXPORT2
  189. usprep_openByType(UStringPrepProfileType type,
  190. UErrorCode* status);
  191. /**
  192. * Closes the profile
  193. * @param profile The profile to close
  194. * @stable ICU 2.8
  195. */
  196. U_STABLE void U_EXPORT2
  197. usprep_close(UStringPrepProfile* profile);
  198. #if U_SHOW_CPLUSPLUS_API
  199. U_NAMESPACE_BEGIN
  200. /**
  201. * \class LocalUStringPrepProfilePointer
  202. * "Smart pointer" class, closes a UStringPrepProfile via usprep_close().
  203. * For most methods see the LocalPointerBase base class.
  204. *
  205. * @see LocalPointerBase
  206. * @see LocalPointer
  207. * @stable ICU 4.4
  208. */
  209. U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringPrepProfilePointer, UStringPrepProfile, usprep_close);
  210. U_NAMESPACE_END
  211. #endif
  212. /**
  213. * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
  214. * checks for prohibited and BiDi characters in the order defined by RFC 3454
  215. * depending on the options specified in the profile.
  216. *
  217. * @param prep The profile to use
  218. * @param src Pointer to UChar buffer containing the string to prepare
  219. * @param srcLength Number of characters in the source string
  220. * @param dest Pointer to the destination buffer to receive the output
  221. * @param destCapacity The capacity of destination array
  222. * @param options A bit set of options:
  223. *
  224. * - USPREP_DEFAULT Prohibit processing of unassigned code points in the input
  225. *
  226. * - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input
  227. * as normal Unicode code points.
  228. *
  229. * @param parseError Pointer to UParseError struct to receive information on position
  230. * of error if an error is encountered. Can be NULL.
  231. * @param status ICU in/out error code parameter.
  232. * U_INVALID_CHAR_FOUND if src contains
  233. * unmatched single surrogates.
  234. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  235. * too many code points.
  236. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  237. * @return The number of UChars in the destination buffer
  238. * @stable ICU 2.8
  239. */
  240. U_STABLE int32_t U_EXPORT2
  241. usprep_prepare( const UStringPrepProfile* prep,
  242. const UChar* src, int32_t srcLength,
  243. UChar* dest, int32_t destCapacity,
  244. int32_t options,
  245. UParseError* parseError,
  246. UErrorCode* status );
  247. #endif /* #if !UCONFIG_NO_IDNA */
  248. #endif