uinvchar.h 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 1999-2015, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: uinvchar.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:2
  14. *
  15. * created on: 2004sep14
  16. * created by: Markus W. Scherer
  17. *
  18. * Definitions for handling invariant characters, moved here from putil.c
  19. * for better modularization.
  20. */
  21. #ifndef __UINVCHAR_H__
  22. #define __UINVCHAR_H__
  23. #include "unicode/utypes.h"
  24. #ifdef __cplusplus
  25. #include "unicode/unistr.h"
  26. #endif
  27. /**
  28. * Check if a char string only contains invariant characters.
  29. * See utypes.h for details.
  30. *
  31. * @param s Input string pointer.
  32. * @param length Length of the string, can be -1 if NUL-terminated.
  33. * @return TRUE if s contains only invariant characters.
  34. *
  35. * @internal (ICU 2.8)
  36. */
  37. U_INTERNAL UBool U_EXPORT2
  38. uprv_isInvariantString(const char *s, int32_t length);
  39. /**
  40. * Check if a Unicode string only contains invariant characters.
  41. * See utypes.h for details.
  42. *
  43. * @param s Input string pointer.
  44. * @param length Length of the string, can be -1 if NUL-terminated.
  45. * @return TRUE if s contains only invariant characters.
  46. *
  47. * @internal (ICU 2.8)
  48. */
  49. U_INTERNAL UBool U_EXPORT2
  50. uprv_isInvariantUString(const UChar *s, int32_t length);
  51. /**
  52. * \def U_UPPER_ORDINAL
  53. * Get the ordinal number of an uppercase invariant character
  54. * @internal
  55. */
  56. #if U_CHARSET_FAMILY==U_ASCII_FAMILY
  57. # define U_UPPER_ORDINAL(x) ((x)-'A')
  58. #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  59. # define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
  60. (((x) < 'S') ? ((x)-'J'+9) : \
  61. ((x)-'S'+18)))
  62. #else
  63. # error Unknown charset family!
  64. #endif
  65. #ifdef __cplusplus
  66. U_NAMESPACE_BEGIN
  67. /**
  68. * Like U_UPPER_ORDINAL(x) but with validation.
  69. * Returns 0..25 for A..Z else a value outside 0..25.
  70. */
  71. inline int32_t uprv_upperOrdinal(int32_t c) {
  72. #if U_CHARSET_FAMILY==U_ASCII_FAMILY
  73. return c - 'A';
  74. #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  75. // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
  76. // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
  77. if (c <= 'I') { return c - 'A'; } // A-I --> 0-8
  78. if (c < 'J') { return -1; }
  79. if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17
  80. if (c < 'S') { return -1; }
  81. return c - 'S' + 18; // S-Z --> 18..25
  82. #else
  83. # error Unknown charset family!
  84. #endif
  85. }
  86. // Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
  87. // Returns 0..25 for a..z else a value outside 0..25.
  88. inline int32_t uprv_lowerOrdinal(int32_t c) {
  89. #if U_CHARSET_FAMILY==U_ASCII_FAMILY
  90. return c - 'a';
  91. #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  92. // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
  93. // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
  94. if (c <= 'i') { return c - 'a'; } // a-i --> 0-8
  95. if (c < 'j') { return -1; }
  96. if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17
  97. if (c < 's') { return -1; }
  98. return c - 's' + 18; // s-z --> 18..25
  99. #else
  100. # error Unknown charset family!
  101. #endif
  102. }
  103. U_NAMESPACE_END
  104. #endif
  105. /**
  106. * Returns true if c == '@' is possible.
  107. * The @ sign is variant, and the @ sign used on one
  108. * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
  109. * @internal
  110. */
  111. U_CFUNC UBool
  112. uprv_isEbcdicAtSign(char c);
  113. /**
  114. * \def uprv_isAtSign
  115. * Returns true if c == '@' is possible.
  116. * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
  117. * @internal
  118. */
  119. #if U_CHARSET_FAMILY==U_ASCII_FAMILY
  120. # define uprv_isAtSign(c) ((c)=='@')
  121. #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  122. # define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
  123. #else
  124. # error Unknown charset family!
  125. #endif
  126. /**
  127. * Compare two EBCDIC invariant-character strings in ASCII order.
  128. * @internal
  129. */
  130. U_INTERNAL int32_t U_EXPORT2
  131. uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
  132. /**
  133. * \def uprv_compareInvCharsAsAscii
  134. * Compare two invariant-character strings in ASCII order.
  135. * @internal
  136. */
  137. #if U_CHARSET_FAMILY==U_ASCII_FAMILY
  138. # define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
  139. #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  140. # define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
  141. #else
  142. # error Unknown charset family!
  143. #endif
  144. /**
  145. * Converts an EBCDIC invariant character to ASCII.
  146. * @internal
  147. */
  148. U_INTERNAL char U_EXPORT2
  149. uprv_ebcdicToAscii(char c);
  150. /**
  151. * \def uprv_invCharToAscii
  152. * Converts an invariant character to ASCII.
  153. * @internal
  154. */
  155. #if U_CHARSET_FAMILY==U_ASCII_FAMILY
  156. # define uprv_invCharToAscii(c) (c)
  157. #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  158. # define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
  159. #else
  160. # error Unknown charset family!
  161. #endif
  162. /**
  163. * Converts an EBCDIC invariant character to lowercase ASCII.
  164. * @internal
  165. */
  166. U_INTERNAL char U_EXPORT2
  167. uprv_ebcdicToLowercaseAscii(char c);
  168. /**
  169. * \def uprv_invCharToLowercaseAscii
  170. * Converts an invariant character to lowercase ASCII.
  171. * @internal
  172. */
  173. #if U_CHARSET_FAMILY==U_ASCII_FAMILY
  174. # define uprv_invCharToLowercaseAscii uprv_asciitolower
  175. #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  176. # define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
  177. #else
  178. # error Unknown charset family!
  179. #endif
  180. /**
  181. * Copy EBCDIC to ASCII
  182. * @internal
  183. * @see uprv_strncpy
  184. */
  185. U_INTERNAL uint8_t* U_EXPORT2
  186. uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
  187. /**
  188. * Copy ASCII to EBCDIC
  189. * @internal
  190. * @see uprv_strncpy
  191. */
  192. U_INTERNAL uint8_t* U_EXPORT2
  193. uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
  194. #endif