norm2allmodes.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * norm2allmodes.h
  9. *
  10. * created on: 2014sep07
  11. * created by: Markus W. Scherer
  12. */
  13. #ifndef __NORM2ALLMODES_H__
  14. #define __NORM2ALLMODES_H__
  15. #include "unicode/utypes.h"
  16. #if !UCONFIG_NO_NORMALIZATION
  17. #include "unicode/edits.h"
  18. #include "unicode/normalizer2.h"
  19. #include "unicode/stringoptions.h"
  20. #include "unicode/unistr.h"
  21. #include "cpputils.h"
  22. #include "normalizer2impl.h"
  23. U_NAMESPACE_BEGIN
  24. // Intermediate class:
  25. // Has Normalizer2Impl and does boilerplate argument checking and setup.
  26. class Normalizer2WithImpl : public Normalizer2 {
  27. public:
  28. Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
  29. virtual ~Normalizer2WithImpl();
  30. // normalize
  31. virtual UnicodeString &
  32. normalize(const UnicodeString &src,
  33. UnicodeString &dest,
  34. UErrorCode &errorCode) const {
  35. if(U_FAILURE(errorCode)) {
  36. dest.setToBogus();
  37. return dest;
  38. }
  39. const UChar *sArray=src.getBuffer();
  40. if(&dest==&src || sArray==NULL) {
  41. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  42. dest.setToBogus();
  43. return dest;
  44. }
  45. dest.remove();
  46. ReorderingBuffer buffer(impl, dest);
  47. if(buffer.init(src.length(), errorCode)) {
  48. normalize(sArray, sArray+src.length(), buffer, errorCode);
  49. }
  50. return dest;
  51. }
  52. virtual void
  53. normalize(const UChar *src, const UChar *limit,
  54. ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
  55. // normalize and append
  56. virtual UnicodeString &
  57. normalizeSecondAndAppend(UnicodeString &first,
  58. const UnicodeString &second,
  59. UErrorCode &errorCode) const {
  60. return normalizeSecondAndAppend(first, second, TRUE, errorCode);
  61. }
  62. virtual UnicodeString &
  63. append(UnicodeString &first,
  64. const UnicodeString &second,
  65. UErrorCode &errorCode) const {
  66. return normalizeSecondAndAppend(first, second, FALSE, errorCode);
  67. }
  68. UnicodeString &
  69. normalizeSecondAndAppend(UnicodeString &first,
  70. const UnicodeString &second,
  71. UBool doNormalize,
  72. UErrorCode &errorCode) const {
  73. uprv_checkCanGetBuffer(first, errorCode);
  74. if(U_FAILURE(errorCode)) {
  75. return first;
  76. }
  77. const UChar *secondArray=second.getBuffer();
  78. if(&first==&second || secondArray==NULL) {
  79. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  80. return first;
  81. }
  82. int32_t firstLength=first.length();
  83. UnicodeString safeMiddle;
  84. {
  85. ReorderingBuffer buffer(impl, first);
  86. if(buffer.init(firstLength+second.length(), errorCode)) {
  87. normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
  88. safeMiddle, buffer, errorCode);
  89. }
  90. } // The ReorderingBuffer destructor finalizes the first string.
  91. if(U_FAILURE(errorCode)) {
  92. // Restore the modified suffix of the first string.
  93. first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
  94. }
  95. return first;
  96. }
  97. virtual void
  98. normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
  99. UnicodeString &safeMiddle,
  100. ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
  101. virtual UBool
  102. getDecomposition(UChar32 c, UnicodeString &decomposition) const {
  103. UChar buffer[4];
  104. int32_t length;
  105. const UChar *d=impl.getDecomposition(c, buffer, length);
  106. if(d==NULL) {
  107. return FALSE;
  108. }
  109. if(d==buffer) {
  110. decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
  111. } else {
  112. decomposition.setTo(FALSE, d, length); // read-only alias
  113. }
  114. return TRUE;
  115. }
  116. virtual UBool
  117. getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
  118. UChar buffer[30];
  119. int32_t length;
  120. const UChar *d=impl.getRawDecomposition(c, buffer, length);
  121. if(d==NULL) {
  122. return FALSE;
  123. }
  124. if(d==buffer) {
  125. decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
  126. } else {
  127. decomposition.setTo(FALSE, d, length); // read-only alias
  128. }
  129. return TRUE;
  130. }
  131. virtual UChar32
  132. composePair(UChar32 a, UChar32 b) const {
  133. return impl.composePair(a, b);
  134. }
  135. virtual uint8_t
  136. getCombiningClass(UChar32 c) const {
  137. return impl.getCC(impl.getNorm16(c));
  138. }
  139. // quick checks
  140. virtual UBool
  141. isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
  142. if(U_FAILURE(errorCode)) {
  143. return FALSE;
  144. }
  145. const UChar *sArray=s.getBuffer();
  146. if(sArray==NULL) {
  147. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  148. return FALSE;
  149. }
  150. const UChar *sLimit=sArray+s.length();
  151. return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
  152. }
  153. virtual UNormalizationCheckResult
  154. quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
  155. return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
  156. }
  157. virtual int32_t
  158. spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
  159. if(U_FAILURE(errorCode)) {
  160. return 0;
  161. }
  162. const UChar *sArray=s.getBuffer();
  163. if(sArray==NULL) {
  164. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  165. return 0;
  166. }
  167. return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
  168. }
  169. virtual const UChar *
  170. spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
  171. virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
  172. return UNORM_YES;
  173. }
  174. const Normalizer2Impl &impl;
  175. };
  176. class DecomposeNormalizer2 : public Normalizer2WithImpl {
  177. public:
  178. DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
  179. virtual ~DecomposeNormalizer2();
  180. private:
  181. virtual void
  182. normalize(const UChar *src, const UChar *limit,
  183. ReorderingBuffer &buffer, UErrorCode &errorCode) const {
  184. impl.decompose(src, limit, &buffer, errorCode);
  185. }
  186. using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
  187. virtual void
  188. normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
  189. UnicodeString &safeMiddle,
  190. ReorderingBuffer &buffer, UErrorCode &errorCode) const {
  191. impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
  192. }
  193. virtual const UChar *
  194. spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
  195. return impl.decompose(src, limit, NULL, errorCode);
  196. }
  197. using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
  198. virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
  199. return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
  200. }
  201. virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
  202. virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
  203. virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
  204. };
  205. class ComposeNormalizer2 : public Normalizer2WithImpl {
  206. public:
  207. ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
  208. Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
  209. virtual ~ComposeNormalizer2();
  210. private:
  211. virtual void
  212. normalize(const UChar *src, const UChar *limit,
  213. ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
  214. impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
  215. }
  216. using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
  217. void
  218. normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
  219. Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
  220. if (U_FAILURE(errorCode)) {
  221. return;
  222. }
  223. if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
  224. edits->reset();
  225. }
  226. const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
  227. impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
  228. &sink, edits, errorCode);
  229. sink.Flush();
  230. }
  231. virtual void
  232. normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
  233. UnicodeString &safeMiddle,
  234. ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
  235. impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
  236. }
  237. virtual UBool
  238. isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
  239. if(U_FAILURE(errorCode)) {
  240. return FALSE;
  241. }
  242. const UChar *sArray=s.getBuffer();
  243. if(sArray==NULL) {
  244. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  245. return FALSE;
  246. }
  247. UnicodeString temp;
  248. ReorderingBuffer buffer(impl, temp);
  249. if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
  250. return FALSE;
  251. }
  252. return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
  253. }
  254. virtual UBool
  255. isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
  256. if(U_FAILURE(errorCode)) {
  257. return FALSE;
  258. }
  259. const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
  260. return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
  261. }
  262. virtual UNormalizationCheckResult
  263. quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
  264. if(U_FAILURE(errorCode)) {
  265. return UNORM_MAYBE;
  266. }
  267. const UChar *sArray=s.getBuffer();
  268. if(sArray==NULL) {
  269. errorCode=U_ILLEGAL_ARGUMENT_ERROR;
  270. return UNORM_MAYBE;
  271. }
  272. UNormalizationCheckResult qcResult=UNORM_YES;
  273. impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
  274. return qcResult;
  275. }
  276. virtual const UChar *
  277. spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
  278. return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
  279. }
  280. using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
  281. virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
  282. return impl.getCompQuickCheck(impl.getNorm16(c));
  283. }
  284. virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
  285. return impl.hasCompBoundaryBefore(c);
  286. }
  287. virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
  288. return impl.hasCompBoundaryAfter(c, onlyContiguous);
  289. }
  290. virtual UBool isInert(UChar32 c) const U_OVERRIDE {
  291. return impl.isCompInert(c, onlyContiguous);
  292. }
  293. const UBool onlyContiguous;
  294. };
  295. class FCDNormalizer2 : public Normalizer2WithImpl {
  296. public:
  297. FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
  298. virtual ~FCDNormalizer2();
  299. private:
  300. virtual void
  301. normalize(const UChar *src, const UChar *limit,
  302. ReorderingBuffer &buffer, UErrorCode &errorCode) const {
  303. impl.makeFCD(src, limit, &buffer, errorCode);
  304. }
  305. using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
  306. virtual void
  307. normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
  308. UnicodeString &safeMiddle,
  309. ReorderingBuffer &buffer, UErrorCode &errorCode) const {
  310. impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
  311. }
  312. virtual const UChar *
  313. spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
  314. return impl.makeFCD(src, limit, NULL, errorCode);
  315. }
  316. using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
  317. virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
  318. virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
  319. virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
  320. };
  321. struct Norm2AllModes : public UMemory {
  322. Norm2AllModes(Normalizer2Impl *i)
  323. : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
  324. ~Norm2AllModes();
  325. static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
  326. static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
  327. static Norm2AllModes *createInstance(const char *packageName,
  328. const char *name,
  329. UErrorCode &errorCode);
  330. static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
  331. static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
  332. static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
  333. Normalizer2Impl *impl;
  334. ComposeNormalizer2 comp;
  335. DecomposeNormalizer2 decomp;
  336. FCDNormalizer2 fcd;
  337. ComposeNormalizer2 fcc;
  338. };
  339. U_NAMESPACE_END
  340. #endif // !UCONFIG_NO_NORMALIZATION
  341. #endif // __NORM2ALLMODES_H__