udataswp.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2003-2014, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: udataswp.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2003jun05
  16. * created by: Markus W. Scherer
  17. *
  18. * Definitions for ICU data transformations for different platforms,
  19. * changing between big- and little-endian data and/or between
  20. * charset families (ASCII<->EBCDIC).
  21. */
  22. #ifndef __UDATASWP_H__
  23. #define __UDATASWP_H__
  24. #include <stdarg.h>
  25. #include "unicode/utypes.h"
  26. /* forward declaration */
  27. U_CDECL_BEGIN
  28. struct UDataSwapper;
  29. typedef struct UDataSwapper UDataSwapper;
  30. /**
  31. * Function type for data transformation.
  32. * Transforms data, or just returns the length of the data if
  33. * the input length is -1.
  34. * Swap functions assume that their data pointers are aligned properly.
  35. *
  36. * Quick implementation outline:
  37. * (best to copy and adapt and existing swapper implementation)
  38. * check that the data looks like the expected format
  39. * if(length<0) {
  40. * preflight:
  41. * never dereference outData
  42. * read inData and determine the data size
  43. * assume that inData is long enough for this
  44. * } else {
  45. * outData can be NULL if length==0
  46. * inData==outData (in-place swapping) possible but not required!
  47. * verify that length>=(actual size)
  48. * if there is a chance that not every byte up to size is reached
  49. * due to padding etc.:
  50. * if(inData!=outData) {
  51. * memcpy(outData, inData, actual size);
  52. * }
  53. * swap contents
  54. * }
  55. * return actual size
  56. *
  57. * Further implementation notes:
  58. * - read integers from inData before swapping them
  59. * because in-place swapping can make them unreadable
  60. * - compareInvChars compares a local Unicode string with already-swapped
  61. * output charset strings
  62. *
  63. * @param ds Pointer to UDataSwapper containing global data about the
  64. * transformation and function pointers for handling primitive
  65. * types.
  66. * @param inData Pointer to the input data to be transformed or examined.
  67. * @param length Length of the data, counting bytes. May be -1 for preflighting.
  68. * If length>=0, then transform the data.
  69. * If length==-1, then only determine the length of the data.
  70. * The length cannot be determined from the data itself for all
  71. * types of data (e.g., not for simple arrays of integers).
  72. * @param outData Pointer to the output data buffer.
  73. * If length>=0 (transformation), then the output buffer must
  74. * have a capacity of at least length.
  75. * If length==-1, then outData will not be used and can be NULL.
  76. * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
  77. * fulfill U_SUCCESS on input.
  78. * @return The actual length of the data.
  79. *
  80. * @see UDataSwapper
  81. * @internal ICU 2.8
  82. */
  83. typedef int32_t U_CALLCONV
  84. UDataSwapFn(const UDataSwapper *ds,
  85. const void *inData, int32_t length, void *outData,
  86. UErrorCode *pErrorCode);
  87. /**
  88. * Convert one uint16_t from input to platform endianness.
  89. * @internal ICU 2.8
  90. */
  91. typedef uint16_t U_CALLCONV
  92. UDataReadUInt16(uint16_t x);
  93. /**
  94. * Convert one uint32_t from input to platform endianness.
  95. * @internal ICU 2.8
  96. */
  97. typedef uint32_t U_CALLCONV
  98. UDataReadUInt32(uint32_t x);
  99. /**
  100. * Convert one uint16_t from platform to input endianness.
  101. * @internal ICU 2.8
  102. */
  103. typedef void U_CALLCONV
  104. UDataWriteUInt16(uint16_t *p, uint16_t x);
  105. /**
  106. * Convert one uint32_t from platform to input endianness.
  107. * @internal ICU 2.8
  108. */
  109. typedef void U_CALLCONV
  110. UDataWriteUInt32(uint32_t *p, uint32_t x);
  111. /**
  112. * Compare invariant-character strings, one in the output data and the
  113. * other one caller-provided in Unicode.
  114. * An output data string is compared because strings are usually swapped
  115. * before the rest of the data, to allow for sorting of string tables
  116. * according to the output charset.
  117. * You can use -1 for the length parameters of NUL-terminated strings as usual.
  118. * Returns Unicode code point order for invariant characters.
  119. * @internal ICU 2.8
  120. */
  121. typedef int32_t U_CALLCONV
  122. UDataCompareInvChars(const UDataSwapper *ds,
  123. const char *outString, int32_t outLength,
  124. const UChar *localString, int32_t localLength);
  125. /**
  126. * Function for message output when an error occurs during data swapping.
  127. * A format string and variable number of arguments are passed
  128. * like for vprintf().
  129. *
  130. * @param context A function-specific context pointer.
  131. * @param fmt The format string.
  132. * @param args The arguments for format string inserts.
  133. *
  134. * @internal ICU 2.8
  135. */
  136. typedef void U_CALLCONV
  137. UDataPrintError(void *context, const char *fmt, va_list args);
  138. struct UDataSwapper {
  139. /** Input endianness. @internal ICU 2.8 */
  140. UBool inIsBigEndian;
  141. /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
  142. uint8_t inCharset;
  143. /** Output endianness. @internal ICU 2.8 */
  144. UBool outIsBigEndian;
  145. /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
  146. uint8_t outCharset;
  147. /* basic functions for reading data values */
  148. /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */
  149. UDataReadUInt16 *readUInt16;
  150. /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */
  151. UDataReadUInt32 *readUInt32;
  152. /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */
  153. UDataCompareInvChars *compareInvChars;
  154. /* basic functions for writing data values */
  155. /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */
  156. UDataWriteUInt16 *writeUInt16;
  157. /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */
  158. UDataWriteUInt32 *writeUInt32;
  159. /* basic functions for data transformations */
  160. /** Transform an array of 16-bit integers. @internal ICU 2.8 */
  161. UDataSwapFn *swapArray16;
  162. /** Transform an array of 32-bit integers. @internal ICU 2.8 */
  163. UDataSwapFn *swapArray32;
  164. /** Transform an array of 64-bit integers. @internal ICU 53 */
  165. UDataSwapFn *swapArray64;
  166. /** Transform an invariant-character string. @internal ICU 2.8 */
  167. UDataSwapFn *swapInvChars;
  168. /**
  169. * Function for message output when an error occurs during data swapping.
  170. * Can be NULL.
  171. * @internal ICU 2.8
  172. */
  173. UDataPrintError *printError;
  174. /** Context pointer for printError. @internal ICU 2.8 */
  175. void *printErrorContext;
  176. };
  177. U_CDECL_END
  178. U_CAPI UDataSwapper * U_EXPORT2
  179. udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
  180. UBool outIsBigEndian, uint8_t outCharset,
  181. UErrorCode *pErrorCode);
  182. /**
  183. * Open a UDataSwapper for the given input data and the specified output
  184. * characteristics.
  185. * Values of -1 for any of the characteristics mean the local platform's
  186. * characteristics.
  187. *
  188. * @see udata_swap
  189. * @internal ICU 2.8
  190. */
  191. U_CAPI UDataSwapper * U_EXPORT2
  192. udata_openSwapperForInputData(const void *data, int32_t length,
  193. UBool outIsBigEndian, uint8_t outCharset,
  194. UErrorCode *pErrorCode);
  195. U_CAPI void U_EXPORT2
  196. udata_closeSwapper(UDataSwapper *ds);
  197. /**
  198. * Read the beginning of an ICU data piece, recognize magic bytes,
  199. * swap the structure.
  200. * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece.
  201. *
  202. * @return The size of the data header, in bytes.
  203. *
  204. * @internal ICU 2.8
  205. */
  206. U_CAPI int32_t U_EXPORT2
  207. udata_swapDataHeader(const UDataSwapper *ds,
  208. const void *inData, int32_t length, void *outData,
  209. UErrorCode *pErrorCode);
  210. /**
  211. * Convert one int16_t from input to platform endianness.
  212. * @internal ICU 2.8
  213. */
  214. U_CAPI int16_t U_EXPORT2
  215. udata_readInt16(const UDataSwapper *ds, int16_t x);
  216. /**
  217. * Convert one int32_t from input to platform endianness.
  218. * @internal ICU 2.8
  219. */
  220. U_CAPI int32_t U_EXPORT2
  221. udata_readInt32(const UDataSwapper *ds, int32_t x);
  222. /**
  223. * Swap a block of invariant, NUL-terminated strings, but not padding
  224. * bytes after the last string.
  225. * @internal
  226. */
  227. U_CAPI int32_t U_EXPORT2
  228. udata_swapInvStringBlock(const UDataSwapper *ds,
  229. const void *inData, int32_t length, void *outData,
  230. UErrorCode *pErrorCode);
  231. U_CAPI void U_EXPORT2
  232. udata_printError(const UDataSwapper *ds,
  233. const char *fmt,
  234. ...);
  235. /* internal exports from putil.c -------------------------------------------- */
  236. /* declared here to keep them out of the public putil.h */
  237. /**
  238. * Swap invariant char * strings ASCII->EBCDIC.
  239. * @internal
  240. */
  241. U_CAPI int32_t U_EXPORT2
  242. uprv_ebcdicFromAscii(const UDataSwapper *ds,
  243. const void *inData, int32_t length, void *outData,
  244. UErrorCode *pErrorCode);
  245. /**
  246. * Copy invariant ASCII char * strings and verify they are invariant.
  247. * @internal
  248. */
  249. U_CFUNC int32_t
  250. uprv_copyAscii(const UDataSwapper *ds,
  251. const void *inData, int32_t length, void *outData,
  252. UErrorCode *pErrorCode);
  253. /**
  254. * Swap invariant char * strings EBCDIC->ASCII.
  255. * @internal
  256. */
  257. U_CFUNC int32_t
  258. uprv_asciiFromEbcdic(const UDataSwapper *ds,
  259. const void *inData, int32_t length, void *outData,
  260. UErrorCode *pErrorCode);
  261. /**
  262. * Copy invariant EBCDIC char * strings and verify they are invariant.
  263. * @internal
  264. */
  265. U_CFUNC int32_t
  266. uprv_copyEbcdic(const UDataSwapper *ds,
  267. const void *inData, int32_t length, void *outData,
  268. UErrorCode *pErrorCode);
  269. /**
  270. * Compare ASCII invariant char * with Unicode invariant UChar *
  271. * @internal
  272. */
  273. U_CFUNC int32_t
  274. uprv_compareInvAscii(const UDataSwapper *ds,
  275. const char *outString, int32_t outLength,
  276. const UChar *localString, int32_t localLength);
  277. /**
  278. * Compare EBCDIC invariant char * with Unicode invariant UChar *
  279. * @internal
  280. */
  281. U_CFUNC int32_t
  282. uprv_compareInvEbcdic(const UDataSwapper *ds,
  283. const char *outString, int32_t outLength,
  284. const UChar *localString, int32_t localLength);
  285. /**
  286. * \def uprv_compareInvWithUChar
  287. * Compare an invariant-character strings with a UChar string
  288. * @internal
  289. */
  290. #if U_CHARSET_FAMILY==U_ASCII_FAMILY
  291. # define uprv_compareInvWithUChar uprv_compareInvAscii
  292. #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
  293. # define uprv_compareInvWithUChar uprv_compareInvEbcdic
  294. #else
  295. # error Unknown charset family!
  296. #endif
  297. // utrie_swap.cpp -----------------------------------------------------------***
  298. /**
  299. * Swaps a serialized UTrie.
  300. * @internal
  301. */
  302. U_CAPI int32_t U_EXPORT2
  303. utrie_swap(const UDataSwapper *ds,
  304. const void *inData, int32_t length, void *outData,
  305. UErrorCode *pErrorCode);
  306. /**
  307. * Swaps a serialized UTrie2.
  308. * @internal
  309. */
  310. U_CAPI int32_t U_EXPORT2
  311. utrie2_swap(const UDataSwapper *ds,
  312. const void *inData, int32_t length, void *outData,
  313. UErrorCode *pErrorCode);
  314. /**
  315. * Swaps a serialized UCPTrie.
  316. * @internal
  317. */
  318. U_CAPI int32_t U_EXPORT2
  319. ucptrie_swap(const UDataSwapper *ds,
  320. const void *inData, int32_t length, void *outData,
  321. UErrorCode *pErrorCode);
  322. /**
  323. * Swaps a serialized UTrie, UTrie2, or UCPTrie.
  324. * @internal
  325. */
  326. U_CAPI int32_t U_EXPORT2
  327. utrie_swapAnyVersion(const UDataSwapper *ds,
  328. const void *inData, int32_t length, void *outData,
  329. UErrorCode *pErrorCode);
  330. /* material... -------------------------------------------------------------- */
  331. #if 0
  332. /* udata.h */
  333. /**
  334. * Public API function in udata.c
  335. *
  336. * Same as udata_openChoice() but automatically swaps the data.
  337. * isAcceptable, if not NULL, may accept data with endianness and charset family
  338. * different from the current platform's properties.
  339. * If the data is acceptable and the platform properties do not match, then
  340. * the swap function is called to swap an allocated version of the data.
  341. * Preflighting may or may not be performed depending on whether the size of
  342. * the loaded data item is known.
  343. *
  344. * @param isAcceptable Same as for udata_openChoice(). May be NULL.
  345. *
  346. * @internal ICU 2.8
  347. */
  348. U_CAPI UDataMemory * U_EXPORT2
  349. udata_openSwap(const char *path, const char *type, const char *name,
  350. UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext,
  351. UDataSwapFn *swap,
  352. UDataPrintError *printError, void *printErrorContext,
  353. UErrorCode *pErrorCode);
  354. #endif
  355. #endif