punycode.h 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2002-2003, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: punycode.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2002jan31
  16. * created by: Markus W. Scherer
  17. */
  18. /* This ICU code derived from: */
  19. /*
  20. punycode.c 0.4.0 (2001-Nov-17-Sat)
  21. http://www.cs.berkeley.edu/~amc/idn/
  22. Adam M. Costello
  23. http://www.nicemice.net/amc/
  24. */
  25. #ifndef __PUNYCODE_H__
  26. #define __PUNYCODE_H__
  27. #include "unicode/utypes.h"
  28. #if !UCONFIG_NO_IDNA
  29. /**
  30. * u_strToPunycode() converts Unicode to Punycode.
  31. *
  32. * The input string must not contain single, unpaired surrogates.
  33. * The output will be represented as an array of ASCII code points.
  34. *
  35. * The output string is NUL-terminated according to normal ICU
  36. * string output rules.
  37. *
  38. * @param src Input Unicode string.
  39. * This function handles a limited amount of code points
  40. * (the limit is >=64).
  41. * U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded.
  42. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  43. * @param dest Output Punycode array.
  44. * @param destCapacity Size of dest.
  45. * @param caseFlags Vector of boolean values, one per input UChar,
  46. * indicating that the corresponding character is to be
  47. * marked for the decoder optionally
  48. * uppercasing (TRUE) or lowercasing (FALSE)
  49. * the character.
  50. * ASCII characters are output directly in the case as marked.
  51. * Flags corresponding to trail surrogates are ignored.
  52. * If caseFlags==NULL then input characters are not
  53. * case-mapped.
  54. * @param pErrorCode ICU in/out error code parameter.
  55. * U_INVALID_CHAR_FOUND if src contains
  56. * unmatched single surrogates.
  57. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  58. * too many code points.
  59. * @return Number of ASCII characters in puny.
  60. *
  61. * @see u_strFromPunycode
  62. */
  63. U_CFUNC int32_t
  64. u_strToPunycode(const UChar *src, int32_t srcLength,
  65. UChar *dest, int32_t destCapacity,
  66. const UBool *caseFlags,
  67. UErrorCode *pErrorCode);
  68. /**
  69. * u_strFromPunycode() converts Punycode to Unicode.
  70. * The Unicode string will be at most as long (in UChars)
  71. * than the Punycode string (in chars).
  72. *
  73. * @param src Input Punycode string.
  74. * @param srcLength Length of puny, or -1 if NUL-terminated
  75. * @param dest Output Unicode string buffer.
  76. * @param destCapacity Size of dest in number of UChars,
  77. * and of caseFlags in numbers of UBools.
  78. * @param caseFlags Output array for case flags as
  79. * defined by the Punycode string.
  80. * The caller should uppercase (TRUE) or lowercase (FASLE)
  81. * the corresponding character in dest.
  82. * For supplementary characters, only the lead surrogate
  83. * is marked, and FALSE is stored for the trail surrogate.
  84. * This is redundant and not necessary for ASCII characters
  85. * because they are already in the case indicated.
  86. * Can be NULL if the case flags are not needed.
  87. * @param pErrorCode ICU in/out error code parameter.
  88. * U_INVALID_CHAR_FOUND if a non-ASCII character
  89. * precedes the last delimiter ('-'),
  90. * or if an invalid character (not a-zA-Z0-9) is found
  91. * after the last delimiter.
  92. * U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed.
  93. * @return Number of UChars written to dest.
  94. *
  95. * @see u_strToPunycode
  96. */
  97. U_CFUNC int32_t
  98. u_strFromPunycode(const UChar *src, int32_t srcLength,
  99. UChar *dest, int32_t destCapacity,
  100. UBool *caseFlags,
  101. UErrorCode *pErrorCode);
  102. #endif /* #if !UCONFIG_NO_IDNA */
  103. #endif
  104. /*
  105. * Hey, Emacs, please set the following:
  106. *
  107. * Local Variables:
  108. * indent-tabs-mode: nil
  109. * End:
  110. *
  111. */