unistr.h 170 KB


  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1998-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. *
  9. * File unistr.h
  10. *
  11. * Modification History:
  12. *
  13. * Date Name Description
  14. * 09/25/98 stephen Creation.
  15. * 11/11/98 stephen Changed per 11/9 code review.
  16. * 04/20/99 stephen Overhauled per 4/16 code review.
  17. * 11/18/99 aliu Made to inherit from Replaceable. Added method
  18. * handleReplaceBetween(); other methods unchanged.
  19. * 06/25/01 grhoten Remove dependency on iostream.
  20. ******************************************************************************
  21. */
  22. #ifndef UNISTR_H
  23. #define UNISTR_H
  24. /**
  25. * \file
  26. * \brief C++ API: Unicode String
  27. */
  28. #include "unicode/utypes.h"
  29. #if U_SHOW_CPLUSPLUS_API
  30. #include <cstddef>
  31. #include "unicode/char16ptr.h"
  32. #include "unicode/rep.h"
  33. #include "unicode/std_string.h"
  34. #include "unicode/stringpiece.h"
  35. #include "unicode/bytestream.h"
  36. struct UConverter; // unicode/ucnv.h
  37. #ifndef USTRING_H
  38. /**
  39. * \ingroup ustring_ustrlen
  40. */
  41. U_STABLE int32_t U_EXPORT2
  42. u_strlen(const UChar *s);
  43. #endif
  44. U_NAMESPACE_BEGIN
  45. #if !UCONFIG_NO_BREAK_ITERATION
  46. class BreakIterator; // unicode/brkiter.h
  47. #endif
  48. class Edits;
  49. U_NAMESPACE_END
  50. // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
  51. /**
  52. * Internal string case mapping function type.
  53. * All error checking must be done.
  54. * src and dest must not overlap.
  55. * @internal
  56. */
  57. typedef int32_t U_CALLCONV
  58. UStringCaseMapper(int32_t caseLocale, uint32_t options,
  59. #if !UCONFIG_NO_BREAK_ITERATION
  60. icu::BreakIterator *iter,
  61. #endif
  62. char16_t *dest, int32_t destCapacity,
  63. const char16_t *src, int32_t srcLength,
  64. icu::Edits *edits,
  65. UErrorCode &errorCode);
  66. U_NAMESPACE_BEGIN
  67. class Locale; // unicode/locid.h
  68. class StringCharacterIterator;
  69. class UnicodeStringAppendable; // unicode/appendable.h
  70. /* The <iostream> include has been moved to unicode/ustream.h */
  71. /**
  72. * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
  73. * which constructs a Unicode string from an invariant-character char * string.
  74. * About invariant characters see utypes.h.
  75. * This constructor has no runtime dependency on conversion code and is
  76. * therefore recommended over ones taking a charset name string
  77. * (where the empty string "" indicates invariant-character conversion).
  78. *
  79. * @stable ICU 3.2
  80. */
  81. #define US_INV icu::UnicodeString::kInvariant
  82. /**
  83. * Unicode String literals in C++.
  84. *
  85. * Note: these macros are not recommended for new code.
  86. * Prior to the availability of C++11 and u"unicode string literals",
  87. * these macros were provided for portability and efficiency when
  88. * initializing UnicodeStrings from literals.
  89. *
  90. * They work only for strings that contain "invariant characters", i.e.,
  91. * only latin letters, digits, and some punctuation.
  92. * See utypes.h for details.
  93. *
  94. * The string parameter must be a C string literal.
  95. * The length of the string, not including the terminating
  96. * `NUL`, must be specified as a constant.
  97. * @stable ICU 2.0
  98. */
  99. #if !U_CHAR16_IS_TYPEDEF
  100. # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length)
  101. #else
  102. # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length)
  103. #endif
  104. /**
  105. * Unicode String literals in C++.
  106. * Dependent on the platform properties, different UnicodeString
  107. * constructors should be used to create a UnicodeString object from
  108. * a string literal.
  109. * The macros are defined for improved performance.
  110. * They work only for strings that contain "invariant characters", i.e.,
  111. * only latin letters, digits, and some punctuation.
  112. * See utypes.h for details.
  113. *
  114. * The string parameter must be a C string literal.
  115. * @stable ICU 2.0
  116. */
  117. #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
  118. /**
  119. * \def UNISTR_FROM_CHAR_EXPLICIT
  120. * This can be defined to be empty or "explicit".
  121. * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
  122. * constructors are marked as explicit, preventing their inadvertent use.
  123. * @stable ICU 49
  124. */
  125. #ifndef UNISTR_FROM_CHAR_EXPLICIT
  126. # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
  127. // Auto-"explicit" in ICU library code.
  128. # define UNISTR_FROM_CHAR_EXPLICIT explicit
  129. # else
  130. // Empty by default for source code compatibility.
  131. # define UNISTR_FROM_CHAR_EXPLICIT
  132. # endif
  133. #endif
  134. /**
  135. * \def UNISTR_FROM_STRING_EXPLICIT
  136. * This can be defined to be empty or "explicit".
  137. * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *)
  138. * constructors are marked as explicit, preventing their inadvertent use.
  139. *
  140. * In particular, this helps prevent accidentally depending on ICU conversion code
  141. * by passing a string literal into an API with a const UnicodeString & parameter.
  142. * @stable ICU 49
  143. */
  144. #ifndef UNISTR_FROM_STRING_EXPLICIT
  145. # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
  146. // Auto-"explicit" in ICU library code.
  147. # define UNISTR_FROM_STRING_EXPLICIT explicit
  148. # else
  149. // Empty by default for source code compatibility.
  150. # define UNISTR_FROM_STRING_EXPLICIT
  151. # endif
  152. #endif
  153. /**
  154. * \def UNISTR_OBJECT_SIZE
  155. * Desired sizeof(UnicodeString) in bytes.
  156. * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
  157. * The object size may want to be a multiple of 16 bytes,
  158. * which is a common granularity for heap allocation.
  159. *
  160. * Any space inside the object beyond sizeof(vtable pointer) + 2
  161. * is available for storing short strings inside the object.
  162. * The bigger the object, the longer a string that can be stored inside the object,
  163. * without additional heap allocation.
  164. *
  165. * Depending on a platform's pointer size, pointer alignment requirements,
  166. * and struct padding, the compiler will usually round up sizeof(UnicodeString)
  167. * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
  168. * to hold the fields for heap-allocated strings.
  169. * Such a minimum size also ensures that the object is easily large enough
  170. * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
  171. *
  172. * sizeof(UnicodeString) >= 48 should work for all known platforms.
  173. *
  174. * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
  175. * sizeof(UnicodeString) = 64 would leave space for
  176. * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
  177. * char16_ts stored inside the object.
  178. *
  179. * The minimum object size on a 64-bit machine would be
  180. * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
  181. * and the internal buffer would hold up to 11 char16_ts in that case.
  182. *
  183. * @see U16_MAX_LENGTH
  184. * @stable ICU 56
  185. */
  186. #ifndef UNISTR_OBJECT_SIZE
  187. # define UNISTR_OBJECT_SIZE 64
  188. #endif
  189. /**
  190. * UnicodeString is a string class that stores Unicode characters directly and provides
  191. * similar functionality as the Java String and StringBuffer/StringBuilder classes.
  192. * It is a concrete implementation of the abstract class Replaceable (for transliteration).
  193. *
  194. * The UnicodeString equivalent of std::string’s clear() is remove().
  195. *
  196. * A UnicodeString may "alias" an external array of characters
  197. * (that is, point to it, rather than own the array)
  198. * whose lifetime must then at least match the lifetime of the aliasing object.
  199. * This aliasing may be preserved when returning a UnicodeString by value,
  200. * depending on the compiler and the function implementation,
  201. * via Return Value Optimization (RVO) or the move assignment operator.
  202. * (However, the copy assignment operator does not preserve aliasing.)
  203. * For details see the description of storage models at the end of the class API docs
  204. * and in the User Guide chapter linked from there.
  205. *
  206. * The UnicodeString class is not suitable for subclassing.
  207. *
  208. * For an overview of Unicode strings in C and C++ see the
  209. * [User Guide Strings chapter](http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-).
  210. *
  211. * In ICU, a Unicode string consists of 16-bit Unicode *code units*.
  212. * A Unicode character may be stored with either one code unit
  213. * (the most common case) or with a matched pair of special code units
  214. * ("surrogates"). The data type for code units is char16_t.
  215. * For single-character handling, a Unicode character code *point* is a value
  216. * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
  217. *
  218. * Indexes and offsets into and lengths of strings always count code units, not code points.
  219. * This is the same as with multi-byte char* strings in traditional string handling.
  220. * Operations on partial strings typically do not test for code point boundaries.
  221. * If necessary, the user needs to take care of such boundaries by testing for the code unit
  222. * values or by using functions like
  223. * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
  224. * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
  225. *
  226. * UnicodeString methods are more lenient with regard to input parameter values
  227. * than other ICU APIs. In particular:
  228. * - If indexes are out of bounds for a UnicodeString object
  229. * (< 0 or > length()) then they are "pinned" to the nearest boundary.
  230. * - If the buffer passed to an insert/append/replace operation is owned by the
  231. * target object, e.g., calling str.append(str), an extra copy may take place
  232. * to ensure safety.
  233. * - If primitive string pointer values (e.g., const char16_t * or char *)
  234. * for input strings are NULL, then those input string parameters are treated
  235. * as if they pointed to an empty string.
  236. * However, this is *not* the case for char * parameters for charset names
  237. * or other IDs.
  238. * - Most UnicodeString methods do not take a UErrorCode parameter because
  239. * there are usually very few opportunities for failure other than a shortage
  240. * of memory, error codes in low-level C++ string methods would be inconvenient,
  241. * and the error code as the last parameter (ICU convention) would prevent
  242. * the use of default parameter values.
  243. * Instead, such methods set the UnicodeString into a "bogus" state
  244. * (see isBogus()) if an error occurs.
  245. *
  246. * In string comparisons, two UnicodeString objects that are both "bogus"
  247. * compare equal (to be transitive and prevent endless loops in sorting),
  248. * and a "bogus" string compares less than any non-"bogus" one.
  249. *
  250. * Const UnicodeString methods are thread-safe. Multiple threads can use
  251. * const methods on the same UnicodeString object simultaneously,
  252. * but non-const methods must not be called concurrently (in multiple threads)
  253. * with any other (const or non-const) methods.
  254. *
  255. * Similarly, const UnicodeString & parameters are thread-safe.
  256. * One object may be passed in as such a parameter concurrently in multiple threads.
  257. * This includes the const UnicodeString & parameters for
  258. * copy construction, assignment, and cloning.
  259. *
  260. * UnicodeString uses several storage methods.
  261. * String contents can be stored inside the UnicodeString object itself,
  262. * in an allocated and shared buffer, or in an outside buffer that is "aliased".
  263. * Most of this is done transparently, but careful aliasing in particular provides
  264. * significant performance improvements.
  265. * Also, the internal buffer is accessible via special functions.
  266. * For details see the
  267. * [User Guide Strings chapter](http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model).
  268. *
  269. * @see utf.h
  270. * @see CharacterIterator
  271. * @stable ICU 2.0
  272. */
  273. class U_COMMON_API UnicodeString : public Replaceable
  274. {
  275. public:
  276. /**
  277. * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
  278. * which constructs a Unicode string from an invariant-character char * string.
  279. * Use the macro US_INV instead of the full qualification for this value.
  280. *
  281. * @see US_INV
  282. * @stable ICU 3.2
  283. */
  284. enum EInvariant {
  285. /**
  286. * @see EInvariant
  287. * @stable ICU 3.2
  288. */
  289. kInvariant
  290. };
  291. //========================================
  292. // Read-only operations
  293. //========================================
  294. /* Comparison - bitwise only - for international comparison use collation */
  295. /**
  296. * Equality operator. Performs only bitwise comparison.
  297. * @param text The UnicodeString to compare to this one.
  298. * @return TRUE if `text` contains the same characters as this one,
  299. * FALSE otherwise.
  300. * @stable ICU 2.0
  301. */
  302. inline UBool operator== (const UnicodeString& text) const;
  303. /**
  304. * Inequality operator. Performs only bitwise comparison.
  305. * @param text The UnicodeString to compare to this one.
  306. * @return FALSE if `text` contains the same characters as this one,
  307. * TRUE otherwise.
  308. * @stable ICU 2.0
  309. */
  310. inline UBool operator!= (const UnicodeString& text) const;
  311. /**
  312. * Greater than operator. Performs only bitwise comparison.
  313. * @param text The UnicodeString to compare to this one.
  314. * @return TRUE if the characters in this are bitwise
  315. * greater than the characters in `text`, FALSE otherwise
  316. * @stable ICU 2.0
  317. */
  318. inline UBool operator> (const UnicodeString& text) const;
  319. /**
  320. * Less than operator. Performs only bitwise comparison.
  321. * @param text The UnicodeString to compare to this one.
  322. * @return TRUE if the characters in this are bitwise
  323. * less than the characters in `text`, FALSE otherwise
  324. * @stable ICU 2.0
  325. */
  326. inline UBool operator< (const UnicodeString& text) const;
  327. /**
  328. * Greater than or equal operator. Performs only bitwise comparison.
  329. * @param text The UnicodeString to compare to this one.
  330. * @return TRUE if the characters in this are bitwise
  331. * greater than or equal to the characters in `text`, FALSE otherwise
  332. * @stable ICU 2.0
  333. */
  334. inline UBool operator>= (const UnicodeString& text) const;
  335. /**
  336. * Less than or equal operator. Performs only bitwise comparison.
  337. * @param text The UnicodeString to compare to this one.
  338. * @return TRUE if the characters in this are bitwise
  339. * less than or equal to the characters in `text`, FALSE otherwise
  340. * @stable ICU 2.0
  341. */
  342. inline UBool operator<= (const UnicodeString& text) const;
  343. /**
  344. * Compare the characters bitwise in this UnicodeString to
  345. * the characters in `text`.
  346. * @param text The UnicodeString to compare to this one.
  347. * @return The result of bitwise character comparison: 0 if this
  348. * contains the same characters as `text`, -1 if the characters in
  349. * this are bitwise less than the characters in `text`, +1 if the
  350. * characters in this are bitwise greater than the characters
  351. * in `text`.
  352. * @stable ICU 2.0
  353. */
  354. inline int8_t compare(const UnicodeString& text) const;
  355. /**
  356. * Compare the characters bitwise in the range
  357. * [`start`, `start + length`) with the characters
  358. * in the **entire string** `text`.
  359. * (The parameters "start" and "length" are not applied to the other text "text".)
  360. * @param start the offset at which the compare operation begins
  361. * @param length the number of characters of text to compare.
  362. * @param text the other text to be compared against this string.
  363. * @return The result of bitwise character comparison: 0 if this
  364. * contains the same characters as `text`, -1 if the characters in
  365. * this are bitwise less than the characters in `text`, +1 if the
  366. * characters in this are bitwise greater than the characters
  367. * in `text`.
  368. * @stable ICU 2.0
  369. */
  370. inline int8_t compare(int32_t start,
  371. int32_t length,
  372. const UnicodeString& text) const;
  373. /**
  374. * Compare the characters bitwise in the range
  375. * [`start`, `start + length`) with the characters
  376. * in `srcText` in the range
  377. * [`srcStart`, `srcStart + srcLength`).
  378. * @param start the offset at which the compare operation begins
  379. * @param length the number of characters in this to compare.
  380. * @param srcText the text to be compared
  381. * @param srcStart the offset into `srcText` to start comparison
  382. * @param srcLength the number of characters in `src` to compare
  383. * @return The result of bitwise character comparison: 0 if this
  384. * contains the same characters as `srcText`, -1 if the characters in
  385. * this are bitwise less than the characters in `srcText`, +1 if the
  386. * characters in this are bitwise greater than the characters
  387. * in `srcText`.
  388. * @stable ICU 2.0
  389. */
  390. inline int8_t compare(int32_t start,
  391. int32_t length,
  392. const UnicodeString& srcText,
  393. int32_t srcStart,
  394. int32_t srcLength) const;
  395. /**
  396. * Compare the characters bitwise in this UnicodeString with the first
  397. * `srcLength` characters in `srcChars`.
  398. * @param srcChars The characters to compare to this UnicodeString.
  399. * @param srcLength the number of characters in `srcChars` to compare
  400. * @return The result of bitwise character comparison: 0 if this
  401. * contains the same characters as `srcChars`, -1 if the characters in
  402. * this are bitwise less than the characters in `srcChars`, +1 if the
  403. * characters in this are bitwise greater than the characters
  404. * in `srcChars`.
  405. * @stable ICU 2.0
  406. */
  407. inline int8_t compare(ConstChar16Ptr srcChars,
  408. int32_t srcLength) const;
  409. /**
  410. * Compare the characters bitwise in the range
  411. * [`start`, `start + length`) with the first
  412. * `length` characters in `srcChars`
  413. * @param start the offset at which the compare operation begins
  414. * @param length the number of characters to compare.
  415. * @param srcChars the characters to be compared
  416. * @return The result of bitwise character comparison: 0 if this
  417. * contains the same characters as `srcChars`, -1 if the characters in
  418. * this are bitwise less than the characters in `srcChars`, +1 if the
  419. * characters in this are bitwise greater than the characters
  420. * in `srcChars`.
  421. * @stable ICU 2.0
  422. */
  423. inline int8_t compare(int32_t start,
  424. int32_t length,
  425. const char16_t *srcChars) const;
  426. /**
  427. * Compare the characters bitwise in the range
  428. * [`start`, `start + length`) with the characters
  429. * in `srcChars` in the range
  430. * [`srcStart`, `srcStart + srcLength`).
  431. * @param start the offset at which the compare operation begins
  432. * @param length the number of characters in this to compare
  433. * @param srcChars the characters to be compared
  434. * @param srcStart the offset into `srcChars` to start comparison
  435. * @param srcLength the number of characters in `srcChars` to compare
  436. * @return The result of bitwise character comparison: 0 if this
  437. * contains the same characters as `srcChars`, -1 if the characters in
  438. * this are bitwise less than the characters in `srcChars`, +1 if the
  439. * characters in this are bitwise greater than the characters
  440. * in `srcChars`.
  441. * @stable ICU 2.0
  442. */
  443. inline int8_t compare(int32_t start,
  444. int32_t length,
  445. const char16_t *srcChars,
  446. int32_t srcStart,
  447. int32_t srcLength) const;
  448. /**
  449. * Compare the characters bitwise in the range
  450. * [`start`, `limit`) with the characters
  451. * in `srcText` in the range
  452. * [`srcStart`, `srcLimit`).
  453. * @param start the offset at which the compare operation begins
  454. * @param limit the offset immediately following the compare operation
  455. * @param srcText the text to be compared
  456. * @param srcStart the offset into `srcText` to start comparison
  457. * @param srcLimit the offset into `srcText` to limit comparison
  458. * @return The result of bitwise character comparison: 0 if this
  459. * contains the same characters as `srcText`, -1 if the characters in
  460. * this are bitwise less than the characters in `srcText`, +1 if the
  461. * characters in this are bitwise greater than the characters
  462. * in `srcText`.
  463. * @stable ICU 2.0
  464. */
  465. inline int8_t compareBetween(int32_t start,
  466. int32_t limit,
  467. const UnicodeString& srcText,
  468. int32_t srcStart,
  469. int32_t srcLimit) const;
  470. /**
  471. * Compare two Unicode strings in code point order.
  472. * The result may be different from the results of compare(), operator<, etc.
  473. * if supplementary characters are present:
  474. *
  475. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  476. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  477. * which means that they compare as less than some other BMP characters like U+feff.
  478. * This function compares Unicode strings in code point order.
  479. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  480. *
  481. * @param text Another string to compare this one to.
  482. * @return a negative/zero/positive integer corresponding to whether
  483. * this string is less than/equal to/greater than the second one
  484. * in code point order
  485. * @stable ICU 2.0
  486. */
  487. inline int8_t compareCodePointOrder(const UnicodeString& text) const;
  488. /**
  489. * Compare two Unicode strings in code point order.
  490. * The result may be different from the results of compare(), operator<, etc.
  491. * if supplementary characters are present:
  492. *
  493. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  494. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  495. * which means that they compare as less than some other BMP characters like U+feff.
  496. * This function compares Unicode strings in code point order.
  497. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  498. *
  499. * @param start The start offset in this string at which the compare operation begins.
  500. * @param length The number of code units from this string to compare.
  501. * @param srcText Another string to compare this one to.
  502. * @return a negative/zero/positive integer corresponding to whether
  503. * this string is less than/equal to/greater than the second one
  504. * in code point order
  505. * @stable ICU 2.0
  506. */
  507. inline int8_t compareCodePointOrder(int32_t start,
  508. int32_t length,
  509. const UnicodeString& srcText) const;
  510. /**
  511. * Compare two Unicode strings in code point order.
  512. * The result may be different from the results of compare(), operator<, etc.
  513. * if supplementary characters are present:
  514. *
  515. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  516. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  517. * which means that they compare as less than some other BMP characters like U+feff.
  518. * This function compares Unicode strings in code point order.
  519. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  520. *
  521. * @param start The start offset in this string at which the compare operation begins.
  522. * @param length The number of code units from this string to compare.
  523. * @param srcText Another string to compare this one to.
  524. * @param srcStart The start offset in that string at which the compare operation begins.
  525. * @param srcLength The number of code units from that string to compare.
  526. * @return a negative/zero/positive integer corresponding to whether
  527. * this string is less than/equal to/greater than the second one
  528. * in code point order
  529. * @stable ICU 2.0
  530. */
  531. inline int8_t compareCodePointOrder(int32_t start,
  532. int32_t length,
  533. const UnicodeString& srcText,
  534. int32_t srcStart,
  535. int32_t srcLength) const;
  536. /**
  537. * Compare two Unicode strings in code point order.
  538. * The result may be different from the results of compare(), operator<, etc.
  539. * if supplementary characters are present:
  540. *
  541. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  542. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  543. * which means that they compare as less than some other BMP characters like U+feff.
  544. * This function compares Unicode strings in code point order.
  545. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  546. *
  547. * @param srcChars A pointer to another string to compare this one to.
  548. * @param srcLength The number of code units from that string to compare.
  549. * @return a negative/zero/positive integer corresponding to whether
  550. * this string is less than/equal to/greater than the second one
  551. * in code point order
  552. * @stable ICU 2.0
  553. */
  554. inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
  555. int32_t srcLength) const;
  556. /**
  557. * Compare two Unicode strings in code point order.
  558. * The result may be different from the results of compare(), operator<, etc.
  559. * if supplementary characters are present:
  560. *
  561. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  562. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  563. * which means that they compare as less than some other BMP characters like U+feff.
  564. * This function compares Unicode strings in code point order.
  565. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  566. *
  567. * @param start The start offset in this string at which the compare operation begins.
  568. * @param length The number of code units from this string to compare.
  569. * @param srcChars A pointer to another string to compare this one to.
  570. * @return a negative/zero/positive integer corresponding to whether
  571. * this string is less than/equal to/greater than the second one
  572. * in code point order
  573. * @stable ICU 2.0
  574. */
  575. inline int8_t compareCodePointOrder(int32_t start,
  576. int32_t length,
  577. const char16_t *srcChars) const;
  578. /**
  579. * Compare two Unicode strings in code point order.
  580. * The result may be different from the results of compare(), operator<, etc.
  581. * if supplementary characters are present:
  582. *
  583. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  584. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  585. * which means that they compare as less than some other BMP characters like U+feff.
  586. * This function compares Unicode strings in code point order.
  587. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  588. *
  589. * @param start The start offset in this string at which the compare operation begins.
  590. * @param length The number of code units from this string to compare.
  591. * @param srcChars A pointer to another string to compare this one to.
  592. * @param srcStart The start offset in that string at which the compare operation begins.
  593. * @param srcLength The number of code units from that string to compare.
  594. * @return a negative/zero/positive integer corresponding to whether
  595. * this string is less than/equal to/greater than the second one
  596. * in code point order
  597. * @stable ICU 2.0
  598. */
  599. inline int8_t compareCodePointOrder(int32_t start,
  600. int32_t length,
  601. const char16_t *srcChars,
  602. int32_t srcStart,
  603. int32_t srcLength) const;
  604. /**
  605. * Compare two Unicode strings in code point order.
  606. * The result may be different from the results of compare(), operator<, etc.
  607. * if supplementary characters are present:
  608. *
  609. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  610. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  611. * which means that they compare as less than some other BMP characters like U+feff.
  612. * This function compares Unicode strings in code point order.
  613. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  614. *
  615. * @param start The start offset in this string at which the compare operation begins.
  616. * @param limit The offset after the last code unit from this string to compare.
  617. * @param srcText Another string to compare this one to.
  618. * @param srcStart The start offset in that string at which the compare operation begins.
  619. * @param srcLimit The offset after the last code unit from that string to compare.
  620. * @return a negative/zero/positive integer corresponding to whether
  621. * this string is less than/equal to/greater than the second one
  622. * in code point order
  623. * @stable ICU 2.0
  624. */
  625. inline int8_t compareCodePointOrderBetween(int32_t start,
  626. int32_t limit,
  627. const UnicodeString& srcText,
  628. int32_t srcStart,
  629. int32_t srcLimit) const;
  630. /**
  631. * Compare two strings case-insensitively using full case folding.
  632. * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
  633. *
  634. * @param text Another string to compare this one to.
  635. * @param options A bit set of options:
  636. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  637. * Comparison in code unit order with default case folding.
  638. *
  639. * - U_COMPARE_CODE_POINT_ORDER
  640. * Set to choose code point order instead of code unit order
  641. * (see u_strCompare for details).
  642. *
  643. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  644. *
  645. * @return A negative, zero, or positive integer indicating the comparison result.
  646. * @stable ICU 2.0
  647. */
  648. inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
  649. /**
  650. * Compare two strings case-insensitively using full case folding.
  651. * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
  652. *
  653. * @param start The start offset in this string at which the compare operation begins.
  654. * @param length The number of code units from this string to compare.
  655. * @param srcText Another string to compare this one to.
  656. * @param options A bit set of options:
  657. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  658. * Comparison in code unit order with default case folding.
  659. *
  660. * - U_COMPARE_CODE_POINT_ORDER
  661. * Set to choose code point order instead of code unit order
  662. * (see u_strCompare for details).
  663. *
  664. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  665. *
  666. * @return A negative, zero, or positive integer indicating the comparison result.
  667. * @stable ICU 2.0
  668. */
  669. inline int8_t caseCompare(int32_t start,
  670. int32_t length,
  671. const UnicodeString& srcText,
  672. uint32_t options) const;
  673. /**
  674. * Compare two strings case-insensitively using full case folding.
  675. * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
  676. *
  677. * @param start The start offset in this string at which the compare operation begins.
  678. * @param length The number of code units from this string to compare.
  679. * @param srcText Another string to compare this one to.
  680. * @param srcStart The start offset in that string at which the compare operation begins.
  681. * @param srcLength The number of code units from that string to compare.
  682. * @param options A bit set of options:
  683. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  684. * Comparison in code unit order with default case folding.
  685. *
  686. * - U_COMPARE_CODE_POINT_ORDER
  687. * Set to choose code point order instead of code unit order
  688. * (see u_strCompare for details).
  689. *
  690. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  691. *
  692. * @return A negative, zero, or positive integer indicating the comparison result.
  693. * @stable ICU 2.0
  694. */
  695. inline int8_t caseCompare(int32_t start,
  696. int32_t length,
  697. const UnicodeString& srcText,
  698. int32_t srcStart,
  699. int32_t srcLength,
  700. uint32_t options) const;
  701. /**
  702. * Compare two strings case-insensitively using full case folding.
  703. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  704. *
  705. * @param srcChars A pointer to another string to compare this one to.
  706. * @param srcLength The number of code units from that string to compare.
  707. * @param options A bit set of options:
  708. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  709. * Comparison in code unit order with default case folding.
  710. *
  711. * - U_COMPARE_CODE_POINT_ORDER
  712. * Set to choose code point order instead of code unit order
  713. * (see u_strCompare for details).
  714. *
  715. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  716. *
  717. * @return A negative, zero, or positive integer indicating the comparison result.
  718. * @stable ICU 2.0
  719. */
  720. inline int8_t caseCompare(ConstChar16Ptr srcChars,
  721. int32_t srcLength,
  722. uint32_t options) const;
  723. /**
  724. * Compare two strings case-insensitively using full case folding.
  725. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  726. *
  727. * @param start The start offset in this string at which the compare operation begins.
  728. * @param length The number of code units from this string to compare.
  729. * @param srcChars A pointer to another string to compare this one to.
  730. * @param options A bit set of options:
  731. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  732. * Comparison in code unit order with default case folding.
  733. *
  734. * - U_COMPARE_CODE_POINT_ORDER
  735. * Set to choose code point order instead of code unit order
  736. * (see u_strCompare for details).
  737. *
  738. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  739. *
  740. * @return A negative, zero, or positive integer indicating the comparison result.
  741. * @stable ICU 2.0
  742. */
  743. inline int8_t caseCompare(int32_t start,
  744. int32_t length,
  745. const char16_t *srcChars,
  746. uint32_t options) const;
  747. /**
  748. * Compare two strings case-insensitively using full case folding.
  749. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  750. *
  751. * @param start The start offset in this string at which the compare operation begins.
  752. * @param length The number of code units from this string to compare.
  753. * @param srcChars A pointer to another string to compare this one to.
  754. * @param srcStart The start offset in that string at which the compare operation begins.
  755. * @param srcLength The number of code units from that string to compare.
  756. * @param options A bit set of options:
  757. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  758. * Comparison in code unit order with default case folding.
  759. *
  760. * - U_COMPARE_CODE_POINT_ORDER
  761. * Set to choose code point order instead of code unit order
  762. * (see u_strCompare for details).
  763. *
  764. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  765. *
  766. * @return A negative, zero, or positive integer indicating the comparison result.
  767. * @stable ICU 2.0
  768. */
  769. inline int8_t caseCompare(int32_t start,
  770. int32_t length,
  771. const char16_t *srcChars,
  772. int32_t srcStart,
  773. int32_t srcLength,
  774. uint32_t options) const;
  775. /**
  776. * Compare two strings case-insensitively using full case folding.
  777. * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
  778. *
  779. * @param start The start offset in this string at which the compare operation begins.
  780. * @param limit The offset after the last code unit from this string to compare.
  781. * @param srcText Another string to compare this one to.
  782. * @param srcStart The start offset in that string at which the compare operation begins.
  783. * @param srcLimit The offset after the last code unit from that string to compare.
  784. * @param options A bit set of options:
  785. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  786. * Comparison in code unit order with default case folding.
  787. *
  788. * - U_COMPARE_CODE_POINT_ORDER
  789. * Set to choose code point order instead of code unit order
  790. * (see u_strCompare for details).
  791. *
  792. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  793. *
  794. * @return A negative, zero, or positive integer indicating the comparison result.
  795. * @stable ICU 2.0
  796. */
  797. inline int8_t caseCompareBetween(int32_t start,
  798. int32_t limit,
  799. const UnicodeString& srcText,
  800. int32_t srcStart,
  801. int32_t srcLimit,
  802. uint32_t options) const;
  803. /**
  804. * Determine if this starts with the characters in `text`
  805. * @param text The text to match.
  806. * @return TRUE if this starts with the characters in `text`,
  807. * FALSE otherwise
  808. * @stable ICU 2.0
  809. */
  810. inline UBool startsWith(const UnicodeString& text) const;
  811. /**
  812. * Determine if this starts with the characters in `srcText`
  813. * in the range [`srcStart`, `srcStart + srcLength`).
  814. * @param srcText The text to match.
  815. * @param srcStart the offset into `srcText` to start matching
  816. * @param srcLength the number of characters in `srcText` to match
  817. * @return TRUE if this starts with the characters in `text`,
  818. * FALSE otherwise
  819. * @stable ICU 2.0
  820. */
  821. inline UBool startsWith(const UnicodeString& srcText,
  822. int32_t srcStart,
  823. int32_t srcLength) const;
  824. /**
  825. * Determine if this starts with the characters in `srcChars`
  826. * @param srcChars The characters to match.
  827. * @param srcLength the number of characters in `srcChars`
  828. * @return TRUE if this starts with the characters in `srcChars`,
  829. * FALSE otherwise
  830. * @stable ICU 2.0
  831. */
  832. inline UBool startsWith(ConstChar16Ptr srcChars,
  833. int32_t srcLength) const;
  834. /**
  835. * Determine if this ends with the characters in `srcChars`
  836. * in the range [`srcStart`, `srcStart + srcLength`).
  837. * @param srcChars The characters to match.
  838. * @param srcStart the offset into `srcText` to start matching
  839. * @param srcLength the number of characters in `srcChars` to match
  840. * @return TRUE if this ends with the characters in `srcChars`, FALSE otherwise
  841. * @stable ICU 2.0
  842. */
  843. inline UBool startsWith(const char16_t *srcChars,
  844. int32_t srcStart,
  845. int32_t srcLength) const;
  846. /**
  847. * Determine if this ends with the characters in `text`
  848. * @param text The text to match.
  849. * @return TRUE if this ends with the characters in `text`,
  850. * FALSE otherwise
  851. * @stable ICU 2.0
  852. */
  853. inline UBool endsWith(const UnicodeString& text) const;
  854. /**
  855. * Determine if this ends with the characters in `srcText`
  856. * in the range [`srcStart`, `srcStart + srcLength`).
  857. * @param srcText The text to match.
  858. * @param srcStart the offset into `srcText` to start matching
  859. * @param srcLength the number of characters in `srcText` to match
  860. * @return TRUE if this ends with the characters in `text`,
  861. * FALSE otherwise
  862. * @stable ICU 2.0
  863. */
  864. inline UBool endsWith(const UnicodeString& srcText,
  865. int32_t srcStart,
  866. int32_t srcLength) const;
  867. /**
  868. * Determine if this ends with the characters in `srcChars`
  869. * @param srcChars The characters to match.
  870. * @param srcLength the number of characters in `srcChars`
  871. * @return TRUE if this ends with the characters in `srcChars`,
  872. * FALSE otherwise
  873. * @stable ICU 2.0
  874. */
  875. inline UBool endsWith(ConstChar16Ptr srcChars,
  876. int32_t srcLength) const;
  877. /**
  878. * Determine if this ends with the characters in `srcChars`
  879. * in the range [`srcStart`, `srcStart + srcLength`).
  880. * @param srcChars The characters to match.
  881. * @param srcStart the offset into `srcText` to start matching
  882. * @param srcLength the number of characters in `srcChars` to match
  883. * @return TRUE if this ends with the characters in `srcChars`,
  884. * FALSE otherwise
  885. * @stable ICU 2.0
  886. */
  887. inline UBool endsWith(const char16_t *srcChars,
  888. int32_t srcStart,
  889. int32_t srcLength) const;
  890. /* Searching - bitwise only */
  891. /**
  892. * Locate in this the first occurrence of the characters in `text`,
  893. * using bitwise comparison.
  894. * @param text The text to search for.
  895. * @return The offset into this of the start of `text`,
  896. * or -1 if not found.
  897. * @stable ICU 2.0
  898. */
  899. inline int32_t indexOf(const UnicodeString& text) const;
  900. /**
  901. * Locate in this the first occurrence of the characters in `text`
  902. * starting at offset `start`, using bitwise comparison.
  903. * @param text The text to search for.
  904. * @param start The offset at which searching will start.
  905. * @return The offset into this of the start of `text`,
  906. * or -1 if not found.
  907. * @stable ICU 2.0
  908. */
  909. inline int32_t indexOf(const UnicodeString& text,
  910. int32_t start) const;
  911. /**
  912. * Locate in this the first occurrence in the range
  913. * [`start`, `start + length`) of the characters
  914. * in `text`, using bitwise comparison.
  915. * @param text The text to search for.
  916. * @param start The offset at which searching will start.
  917. * @param length The number of characters to search
  918. * @return The offset into this of the start of `text`,
  919. * or -1 if not found.
  920. * @stable ICU 2.0
  921. */
  922. inline int32_t indexOf(const UnicodeString& text,
  923. int32_t start,
  924. int32_t length) const;
  925. /**
  926. * Locate in this the first occurrence in the range
  927. * [`start`, `start + length`) of the characters
  928. * in `srcText` in the range
  929. * [`srcStart`, `srcStart + srcLength`),
  930. * using bitwise comparison.
  931. * @param srcText The text to search for.
  932. * @param srcStart the offset into `srcText` at which
  933. * to start matching
  934. * @param srcLength the number of characters in `srcText` to match
  935. * @param start the offset into this at which to start matching
  936. * @param length the number of characters in this to search
  937. * @return The offset into this of the start of `text`,
  938. * or -1 if not found.
  939. * @stable ICU 2.0
  940. */
  941. inline int32_t indexOf(const UnicodeString& srcText,
  942. int32_t srcStart,
  943. int32_t srcLength,
  944. int32_t start,
  945. int32_t length) const;
  946. /**
  947. * Locate in this the first occurrence of the characters in
  948. * `srcChars`
  949. * starting at offset `start`, using bitwise comparison.
  950. * @param srcChars The text to search for.
  951. * @param srcLength the number of characters in `srcChars` to match
  952. * @param start the offset into this at which to start matching
  953. * @return The offset into this of the start of `text`,
  954. * or -1 if not found.
  955. * @stable ICU 2.0
  956. */
  957. inline int32_t indexOf(const char16_t *srcChars,
  958. int32_t srcLength,
  959. int32_t start) const;
  960. /**
  961. * Locate in this the first occurrence in the range
  962. * [`start`, `start + length`) of the characters
  963. * in `srcChars`, using bitwise comparison.
  964. * @param srcChars The text to search for.
  965. * @param srcLength the number of characters in `srcChars`
  966. * @param start The offset at which searching will start.
  967. * @param length The number of characters to search
  968. * @return The offset into this of the start of `srcChars`,
  969. * or -1 if not found.
  970. * @stable ICU 2.0
  971. */
  972. inline int32_t indexOf(ConstChar16Ptr srcChars,
  973. int32_t srcLength,
  974. int32_t start,
  975. int32_t length) const;
  976. /**
  977. * Locate in this the first occurrence in the range
  978. * [`start`, `start + length`) of the characters
  979. * in `srcChars` in the range
  980. * [`srcStart`, `srcStart + srcLength`),
  981. * using bitwise comparison.
  982. * @param srcChars The text to search for.
  983. * @param srcStart the offset into `srcChars` at which
  984. * to start matching
  985. * @param srcLength the number of characters in `srcChars` to match
  986. * @param start the offset into this at which to start matching
  987. * @param length the number of characters in this to search
  988. * @return The offset into this of the start of `text`,
  989. * or -1 if not found.
  990. * @stable ICU 2.0
  991. */
  992. int32_t indexOf(const char16_t *srcChars,
  993. int32_t srcStart,
  994. int32_t srcLength,
  995. int32_t start,
  996. int32_t length) const;
  997. /**
  998. * Locate in this the first occurrence of the BMP code point `c`,
  999. * using bitwise comparison.
  1000. * @param c The code unit to search for.
  1001. * @return The offset into this of `c`, or -1 if not found.
  1002. * @stable ICU 2.0
  1003. */
  1004. inline int32_t indexOf(char16_t c) const;
  1005. /**
  1006. * Locate in this the first occurrence of the code point `c`,
  1007. * using bitwise comparison.
  1008. *
  1009. * @param c The code point to search for.
  1010. * @return The offset into this of `c`, or -1 if not found.
  1011. * @stable ICU 2.0
  1012. */
  1013. inline int32_t indexOf(UChar32 c) const;
  1014. /**
  1015. * Locate in this the first occurrence of the BMP code point `c`,
  1016. * starting at offset `start`, using bitwise comparison.
  1017. * @param c The code unit to search for.
  1018. * @param start The offset at which searching will start.
  1019. * @return The offset into this of `c`, or -1 if not found.
  1020. * @stable ICU 2.0
  1021. */
  1022. inline int32_t indexOf(char16_t c,
  1023. int32_t start) const;
  1024. /**
  1025. * Locate in this the first occurrence of the code point `c`
  1026. * starting at offset `start`, using bitwise comparison.
  1027. *
  1028. * @param c The code point to search for.
  1029. * @param start The offset at which searching will start.
  1030. * @return The offset into this of `c`, or -1 if not found.
  1031. * @stable ICU 2.0
  1032. */
  1033. inline int32_t indexOf(UChar32 c,
  1034. int32_t start) const;
  1035. /**
  1036. * Locate in this the first occurrence of the BMP code point `c`
  1037. * in the range [`start`, `start + length`),
  1038. * using bitwise comparison.
  1039. * @param c The code unit to search for.
  1040. * @param start the offset into this at which to start matching
  1041. * @param length the number of characters in this to search
  1042. * @return The offset into this of `c`, or -1 if not found.
  1043. * @stable ICU 2.0
  1044. */
  1045. inline int32_t indexOf(char16_t c,
  1046. int32_t start,
  1047. int32_t length) const;
  1048. /**
  1049. * Locate in this the first occurrence of the code point `c`
  1050. * in the range [`start`, `start + length`),
  1051. * using bitwise comparison.
  1052. *
  1053. * @param c The code point to search for.
  1054. * @param start the offset into this at which to start matching
  1055. * @param length the number of characters in this to search
  1056. * @return The offset into this of `c`, or -1 if not found.
  1057. * @stable ICU 2.0
  1058. */
  1059. inline int32_t indexOf(UChar32 c,
  1060. int32_t start,
  1061. int32_t length) const;
  1062. /**
  1063. * Locate in this the last occurrence of the characters in `text`,
  1064. * using bitwise comparison.
  1065. * @param text The text to search for.
  1066. * @return The offset into this of the start of `text`,
  1067. * or -1 if not found.
  1068. * @stable ICU 2.0
  1069. */
  1070. inline int32_t lastIndexOf(const UnicodeString& text) const;
  1071. /**
  1072. * Locate in this the last occurrence of the characters in `text`
  1073. * starting at offset `start`, using bitwise comparison.
  1074. * @param text The text to search for.
  1075. * @param start The offset at which searching will start.
  1076. * @return The offset into this of the start of `text`,
  1077. * or -1 if not found.
  1078. * @stable ICU 2.0
  1079. */
  1080. inline int32_t lastIndexOf(const UnicodeString& text,
  1081. int32_t start) const;
  1082. /**
  1083. * Locate in this the last occurrence in the range
  1084. * [`start`, `start + length`) of the characters
  1085. * in `text`, using bitwise comparison.
  1086. * @param text The text to search for.
  1087. * @param start The offset at which searching will start.
  1088. * @param length The number of characters to search
  1089. * @return The offset into this of the start of `text`,
  1090. * or -1 if not found.
  1091. * @stable ICU 2.0
  1092. */
  1093. inline int32_t lastIndexOf(const UnicodeString& text,
  1094. int32_t start,
  1095. int32_t length) const;
  1096. /**
  1097. * Locate in this the last occurrence in the range
  1098. * [`start`, `start + length`) of the characters
  1099. * in `srcText` in the range
  1100. * [`srcStart`, `srcStart + srcLength`),
  1101. * using bitwise comparison.
  1102. * @param srcText The text to search for.
  1103. * @param srcStart the offset into `srcText` at which
  1104. * to start matching
  1105. * @param srcLength the number of characters in `srcText` to match
  1106. * @param start the offset into this at which to start matching
  1107. * @param length the number of characters in this to search
  1108. * @return The offset into this of the start of `text`,
  1109. * or -1 if not found.
  1110. * @stable ICU 2.0
  1111. */
  1112. inline int32_t lastIndexOf(const UnicodeString& srcText,
  1113. int32_t srcStart,
  1114. int32_t srcLength,
  1115. int32_t start,
  1116. int32_t length) const;
  1117. /**
  1118. * Locate in this the last occurrence of the characters in `srcChars`
  1119. * starting at offset `start`, using bitwise comparison.
  1120. * @param srcChars The text to search for.
  1121. * @param srcLength the number of characters in `srcChars` to match
  1122. * @param start the offset into this at which to start matching
  1123. * @return The offset into this of the start of `text`,
  1124. * or -1 if not found.
  1125. * @stable ICU 2.0
  1126. */
  1127. inline int32_t lastIndexOf(const char16_t *srcChars,
  1128. int32_t srcLength,
  1129. int32_t start) const;
  1130. /**
  1131. * Locate in this the last occurrence in the range
  1132. * [`start`, `start + length`) of the characters
  1133. * in `srcChars`, using bitwise comparison.
  1134. * @param srcChars The text to search for.
  1135. * @param srcLength the number of characters in `srcChars`
  1136. * @param start The offset at which searching will start.
  1137. * @param length The number of characters to search
  1138. * @return The offset into this of the start of `srcChars`,
  1139. * or -1 if not found.
  1140. * @stable ICU 2.0
  1141. */
  1142. inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
  1143. int32_t srcLength,
  1144. int32_t start,
  1145. int32_t length) const;
  1146. /**
  1147. * Locate in this the last occurrence in the range
  1148. * [`start`, `start + length`) of the characters
  1149. * in `srcChars` in the range
  1150. * [`srcStart`, `srcStart + srcLength`),
  1151. * using bitwise comparison.
  1152. * @param srcChars The text to search for.
  1153. * @param srcStart the offset into `srcChars` at which
  1154. * to start matching
  1155. * @param srcLength the number of characters in `srcChars` to match
  1156. * @param start the offset into this at which to start matching
  1157. * @param length the number of characters in this to search
  1158. * @return The offset into this of the start of `text`,
  1159. * or -1 if not found.
  1160. * @stable ICU 2.0
  1161. */
  1162. int32_t lastIndexOf(const char16_t *srcChars,
  1163. int32_t srcStart,
  1164. int32_t srcLength,
  1165. int32_t start,
  1166. int32_t length) const;
  1167. /**
  1168. * Locate in this the last occurrence of the BMP code point `c`,
  1169. * using bitwise comparison.
  1170. * @param c The code unit to search for.
  1171. * @return The offset into this of `c`, or -1 if not found.
  1172. * @stable ICU 2.0
  1173. */
  1174. inline int32_t lastIndexOf(char16_t c) const;
  1175. /**
  1176. * Locate in this the last occurrence of the code point `c`,
  1177. * using bitwise comparison.
  1178. *
  1179. * @param c The code point to search for.
  1180. * @return The offset into this of `c`, or -1 if not found.
  1181. * @stable ICU 2.0
  1182. */
  1183. inline int32_t lastIndexOf(UChar32 c) const;
  1184. /**
  1185. * Locate in this the last occurrence of the BMP code point `c`
  1186. * starting at offset `start`, using bitwise comparison.
  1187. * @param c The code unit to search for.
  1188. * @param start The offset at which searching will start.
  1189. * @return The offset into this of `c`, or -1 if not found.
  1190. * @stable ICU 2.0
  1191. */
  1192. inline int32_t lastIndexOf(char16_t c,
  1193. int32_t start) const;
  1194. /**
  1195. * Locate in this the last occurrence of the code point `c`
  1196. * starting at offset `start`, using bitwise comparison.
  1197. *
  1198. * @param c The code point to search for.
  1199. * @param start The offset at which searching will start.
  1200. * @return The offset into this of `c`, or -1 if not found.
  1201. * @stable ICU 2.0
  1202. */
  1203. inline int32_t lastIndexOf(UChar32 c,
  1204. int32_t start) const;
  1205. /**
  1206. * Locate in this the last occurrence of the BMP code point `c`
  1207. * in the range [`start`, `start + length`),
  1208. * using bitwise comparison.
  1209. * @param c The code unit to search for.
  1210. * @param start the offset into this at which to start matching
  1211. * @param length the number of characters in this to search
  1212. * @return The offset into this of `c`, or -1 if not found.
  1213. * @stable ICU 2.0
  1214. */
  1215. inline int32_t lastIndexOf(char16_t c,
  1216. int32_t start,
  1217. int32_t length) const;
  1218. /**
  1219. * Locate in this the last occurrence of the code point `c`
  1220. * in the range [`start`, `start + length`),
  1221. * using bitwise comparison.
  1222. *
  1223. * @param c The code point to search for.
  1224. * @param start the offset into this at which to start matching
  1225. * @param length the number of characters in this to search
  1226. * @return The offset into this of `c`, or -1 if not found.
  1227. * @stable ICU 2.0
  1228. */
  1229. inline int32_t lastIndexOf(UChar32 c,
  1230. int32_t start,
  1231. int32_t length) const;
  1232. /* Character access */
  1233. /**
  1234. * Return the code unit at offset `offset`.
  1235. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1236. * @param offset a valid offset into the text
  1237. * @return the code unit at offset `offset`
  1238. * or 0xffff if the offset is not valid for this string
  1239. * @stable ICU 2.0
  1240. */
  1241. inline char16_t charAt(int32_t offset) const;
  1242. /**
  1243. * Return the code unit at offset `offset`.
  1244. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1245. * @param offset a valid offset into the text
  1246. * @return the code unit at offset `offset`
  1247. * @stable ICU 2.0
  1248. */
  1249. inline char16_t operator[] (int32_t offset) const;
  1250. /**
  1251. * Return the code point that contains the code unit
  1252. * at offset `offset`.
  1253. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1254. * @param offset a valid offset into the text
  1255. * that indicates the text offset of any of the code units
  1256. * that will be assembled into a code point (21-bit value) and returned
  1257. * @return the code point of text at `offset`
  1258. * or 0xffff if the offset is not valid for this string
  1259. * @stable ICU 2.0
  1260. */
  1261. UChar32 char32At(int32_t offset) const;
  1262. /**
  1263. * Adjust a random-access offset so that
  1264. * it points to the beginning of a Unicode character.
  1265. * The offset that is passed in points to
  1266. * any code unit of a code point,
  1267. * while the returned offset will point to the first code unit
  1268. * of the same code point.
  1269. * In UTF-16, if the input offset points to a second surrogate
  1270. * of a surrogate pair, then the returned offset will point
  1271. * to the first surrogate.
  1272. * @param offset a valid offset into one code point of the text
  1273. * @return offset of the first code unit of the same code point
  1274. * @see U16_SET_CP_START
  1275. * @stable ICU 2.0
  1276. */
  1277. int32_t getChar32Start(int32_t offset) const;
  1278. /**
  1279. * Adjust a random-access offset so that
  1280. * it points behind a Unicode character.
  1281. * The offset that is passed in points behind
  1282. * any code unit of a code point,
  1283. * while the returned offset will point behind the last code unit
  1284. * of the same code point.
  1285. * In UTF-16, if the input offset points behind the first surrogate
  1286. * (i.e., to the second surrogate)
  1287. * of a surrogate pair, then the returned offset will point
  1288. * behind the second surrogate (i.e., to the first surrogate).
  1289. * @param offset a valid offset after any code unit of a code point of the text
  1290. * @return offset of the first code unit after the same code point
  1291. * @see U16_SET_CP_LIMIT
  1292. * @stable ICU 2.0
  1293. */
  1294. int32_t getChar32Limit(int32_t offset) const;
  1295. /**
  1296. * Move the code unit index along the string by delta code points.
  1297. * Interpret the input index as a code unit-based offset into the string,
  1298. * move the index forward or backward by delta code points, and
  1299. * return the resulting index.
  1300. * The input index should point to the first code unit of a code point,
  1301. * if there is more than one.
  1302. *
  1303. * Both input and output indexes are code unit-based as for all
  1304. * string indexes/offsets in ICU (and other libraries, like MBCS char*).
  1305. * If delta<0 then the index is moved backward (toward the start of the string).
  1306. * If delta>0 then the index is moved forward (toward the end of the string).
  1307. *
  1308. * This behaves like CharacterIterator::move32(delta, kCurrent).
  1309. *
  1310. * Behavior for out-of-bounds indexes:
  1311. * `moveIndex32` pins the input index to 0..length(), i.e.,
  1312. * if the input index<0 then it is pinned to 0;
  1313. * if it is index>length() then it is pinned to length().
  1314. * Afterwards, the index is moved by `delta` code points
  1315. * forward or backward,
  1316. * but no further backward than to 0 and no further forward than to length().
  1317. * The resulting index return value will be in between 0 and length(), inclusively.
  1318. *
  1319. * Examples:
  1320. * \code
  1321. * // s has code points 'a' U+10000 'b' U+10ffff U+2029
  1322. * UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
  1323. *
  1324. * // initial index: position of U+10000
  1325. * int32_t index=1;
  1326. *
  1327. * // the following examples will all result in index==4, position of U+10ffff
  1328. *
  1329. * // skip 2 code points from some position in the string
  1330. * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
  1331. *
  1332. * // go to the 3rd code point from the start of s (0-based)
  1333. * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
  1334. *
  1335. * // go to the next-to-last code point of s
  1336. * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
  1337. * \endcode
  1338. *
  1339. * @param index input code unit index
  1340. * @param delta (signed) code point count to move the index forward or backward
  1341. * in the string
  1342. * @return the resulting code unit index
  1343. * @stable ICU 2.0
  1344. */
  1345. int32_t moveIndex32(int32_t index, int32_t delta) const;
  1346. /* Substring extraction */
  1347. /**
  1348. * Copy the characters in the range
  1349. * [`start`, `start + length`) into the array `dst`,
  1350. * beginning at `dstStart`.
  1351. * If the string aliases to `dst` itself as an external buffer,
  1352. * then extract() will not copy the contents.
  1353. *
  1354. * @param start offset of first character which will be copied into the array
  1355. * @param length the number of characters to extract
  1356. * @param dst array in which to copy characters. The length of `dst`
  1357. * must be at least (`dstStart + length`).
  1358. * @param dstStart the offset in `dst` where the first character
  1359. * will be extracted
  1360. * @stable ICU 2.0
  1361. */
  1362. inline void extract(int32_t start,
  1363. int32_t length,
  1364. Char16Ptr dst,
  1365. int32_t dstStart = 0) const;
  1366. /**
  1367. * Copy the contents of the string into dest.
  1368. * This is a convenience function that
  1369. * checks if there is enough space in dest,
  1370. * extracts the entire string if possible,
  1371. * and NUL-terminates dest if possible.
  1372. *
  1373. * If the string fits into dest but cannot be NUL-terminated
  1374. * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
  1375. * If the string itself does not fit into dest
  1376. * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
  1377. *
  1378. * If the string aliases to `dest` itself as an external buffer,
  1379. * then extract() will not copy the contents.
  1380. *
  1381. * @param dest Destination string buffer.
  1382. * @param destCapacity Number of char16_ts available at dest.
  1383. * @param errorCode ICU error code.
  1384. * @return length()
  1385. * @stable ICU 2.0
  1386. */
  1387. int32_t
  1388. extract(Char16Ptr dest, int32_t destCapacity,
  1389. UErrorCode &errorCode) const;
  1390. /**
  1391. * Copy the characters in the range
  1392. * [`start`, `start + length`) into the UnicodeString
  1393. * `target`.
  1394. * @param start offset of first character which will be copied
  1395. * @param length the number of characters to extract
  1396. * @param target UnicodeString into which to copy characters.
  1397. * @stable ICU 2.0
  1398. */
  1399. inline void extract(int32_t start,
  1400. int32_t length,
  1401. UnicodeString& target) const;
  1402. /**
  1403. * Copy the characters in the range [`start`, `limit`)
  1404. * into the array `dst`, beginning at `dstStart`.
  1405. * @param start offset of first character which will be copied into the array
  1406. * @param limit offset immediately following the last character to be copied
  1407. * @param dst array in which to copy characters. The length of `dst`
  1408. * must be at least (`dstStart + (limit - start)`).
  1409. * @param dstStart the offset in `dst` where the first character
  1410. * will be extracted
  1411. * @stable ICU 2.0
  1412. */
  1413. inline void extractBetween(int32_t start,
  1414. int32_t limit,
  1415. char16_t *dst,
  1416. int32_t dstStart = 0) const;
  1417. /**
  1418. * Copy the characters in the range [`start`, `limit`)
  1419. * into the UnicodeString `target`. Replaceable API.
  1420. * @param start offset of first character which will be copied
  1421. * @param limit offset immediately following the last character to be copied
  1422. * @param target UnicodeString into which to copy characters.
  1423. * @stable ICU 2.0
  1424. */
  1425. virtual void extractBetween(int32_t start,
  1426. int32_t limit,
  1427. UnicodeString& target) const;
  1428. /**
  1429. * Copy the characters in the range
  1430. * [`start`, `start + startLength`) into an array of characters.
  1431. * All characters must be invariant (see utypes.h).
  1432. * Use US_INV as the last, signature-distinguishing parameter.
  1433. *
  1434. * This function does not write any more than `targetCapacity`
  1435. * characters but returns the length of the entire output string
  1436. * so that one can allocate a larger buffer and call the function again
  1437. * if necessary.
  1438. * The output string is NUL-terminated if possible.
  1439. *
  1440. * @param start offset of first character which will be copied
  1441. * @param startLength the number of characters to extract
  1442. * @param target the target buffer for extraction, can be NULL
  1443. * if targetLength is 0
  1444. * @param targetCapacity the length of the target buffer
  1445. * @param inv Signature-distinguishing paramater, use US_INV.
  1446. * @return the output string length, not including the terminating NUL
  1447. * @stable ICU 3.2
  1448. */
  1449. int32_t extract(int32_t start,
  1450. int32_t startLength,
  1451. char *target,
  1452. int32_t targetCapacity,
  1453. enum EInvariant inv) const;
  1454. #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  1455. /**
  1456. * Copy the characters in the range
  1457. * [`start`, `start + length`) into an array of characters
  1458. * in the platform's default codepage.
  1459. * This function does not write any more than `targetLength`
  1460. * characters but returns the length of the entire output string
  1461. * so that one can allocate a larger buffer and call the function again
  1462. * if necessary.
  1463. * The output string is NUL-terminated if possible.
  1464. *
  1465. * @param start offset of first character which will be copied
  1466. * @param startLength the number of characters to extract
  1467. * @param target the target buffer for extraction
  1468. * @param targetLength the length of the target buffer
  1469. * If `target` is NULL, then the number of bytes required for
  1470. * `target` is returned.
  1471. * @return the output string length, not including the terminating NUL
  1472. * @stable ICU 2.0
  1473. */
  1474. int32_t extract(int32_t start,
  1475. int32_t startLength,
  1476. char *target,
  1477. uint32_t targetLength) const;
  1478. #endif
  1479. #if !UCONFIG_NO_CONVERSION
  1480. /**
  1481. * Copy the characters in the range
  1482. * [`start`, `start + length`) into an array of characters
  1483. * in a specified codepage.
  1484. * The output string is NUL-terminated.
  1485. *
  1486. * Recommendation: For invariant-character strings use
  1487. * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1488. * because it avoids object code dependencies of UnicodeString on
  1489. * the conversion code.
  1490. *
  1491. * @param start offset of first character which will be copied
  1492. * @param startLength the number of characters to extract
  1493. * @param target the target buffer for extraction
  1494. * @param codepage the desired codepage for the characters. 0 has
  1495. * the special meaning of the default codepage
  1496. * If `codepage` is an empty string (`""`),
  1497. * then a simple conversion is performed on the codepage-invariant
  1498. * subset ("invariant characters") of the platform encoding. See utypes.h.
  1499. * If `target` is NULL, then the number of bytes required for
  1500. * `target` is returned. It is assumed that the target is big enough
  1501. * to fit all of the characters.
  1502. * @return the output string length, not including the terminating NUL
  1503. * @stable ICU 2.0
  1504. */
  1505. inline int32_t extract(int32_t start,
  1506. int32_t startLength,
  1507. char *target,
  1508. const char *codepage = 0) const;
  1509. /**
  1510. * Copy the characters in the range
  1511. * [`start`, `start + length`) into an array of characters
  1512. * in a specified codepage.
  1513. * This function does not write any more than `targetLength`
  1514. * characters but returns the length of the entire output string
  1515. * so that one can allocate a larger buffer and call the function again
  1516. * if necessary.
  1517. * The output string is NUL-terminated if possible.
  1518. *
  1519. * Recommendation: For invariant-character strings use
  1520. * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1521. * because it avoids object code dependencies of UnicodeString on
  1522. * the conversion code.
  1523. *
  1524. * @param start offset of first character which will be copied
  1525. * @param startLength the number of characters to extract
  1526. * @param target the target buffer for extraction
  1527. * @param targetLength the length of the target buffer
  1528. * @param codepage the desired codepage for the characters. 0 has
  1529. * the special meaning of the default codepage
  1530. * If `codepage` is an empty string (`""`),
  1531. * then a simple conversion is performed on the codepage-invariant
  1532. * subset ("invariant characters") of the platform encoding. See utypes.h.
  1533. * If `target` is NULL, then the number of bytes required for
  1534. * `target` is returned.
  1535. * @return the output string length, not including the terminating NUL
  1536. * @stable ICU 2.0
  1537. */
  1538. int32_t extract(int32_t start,
  1539. int32_t startLength,
  1540. char *target,
  1541. uint32_t targetLength,
  1542. const char *codepage) const;
  1543. /**
  1544. * Convert the UnicodeString into a codepage string using an existing UConverter.
  1545. * The output string is NUL-terminated if possible.
  1546. *
  1547. * This function avoids the overhead of opening and closing a converter if
  1548. * multiple strings are extracted.
  1549. *
  1550. * @param dest destination string buffer, can be NULL if destCapacity==0
  1551. * @param destCapacity the number of chars available at dest
  1552. * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
  1553. * or NULL for the default converter
  1554. * @param errorCode normal ICU error code
  1555. * @return the length of the output string, not counting the terminating NUL;
  1556. * if the length is greater than destCapacity, then the string will not fit
  1557. * and a buffer of the indicated length would need to be passed in
  1558. * @stable ICU 2.0
  1559. */
  1560. int32_t extract(char *dest, int32_t destCapacity,
  1561. UConverter *cnv,
  1562. UErrorCode &errorCode) const;
  1563. #endif
  1564. /**
  1565. * Create a temporary substring for the specified range.
  1566. * Unlike the substring constructor and setTo() functions,
  1567. * the object returned here will be a read-only alias (using getBuffer())
  1568. * rather than copying the text.
  1569. * As a result, this substring operation is much faster but requires
  1570. * that the original string not be modified or deleted during the lifetime
  1571. * of the returned substring object.
  1572. * @param start offset of the first character visible in the substring
  1573. * @param length length of the substring
  1574. * @return a read-only alias UnicodeString object for the substring
  1575. * @stable ICU 4.4
  1576. */
  1577. UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
  1578. /**
  1579. * Create a temporary substring for the specified range.
  1580. * Same as tempSubString(start, length) except that the substring range
  1581. * is specified as a (start, limit) pair (with an exclusive limit index)
  1582. * rather than a (start, length) pair.
  1583. * @param start offset of the first character visible in the substring
  1584. * @param limit offset immediately following the last character visible in the substring
  1585. * @return a read-only alias UnicodeString object for the substring
  1586. * @stable ICU 4.4
  1587. */
  1588. inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
  1589. /**
  1590. * Convert the UnicodeString to UTF-8 and write the result
  1591. * to a ByteSink. This is called by toUTF8String().
  1592. * Unpaired surrogates are replaced with U+FFFD.
  1593. * Calls u_strToUTF8WithSub().
  1594. *
  1595. * @param sink A ByteSink to which the UTF-8 version of the string is written.
  1596. * sink.Flush() is called at the end.
  1597. * @stable ICU 4.2
  1598. * @see toUTF8String
  1599. */
  1600. void toUTF8(ByteSink &sink) const;
  1601. /**
  1602. * Convert the UnicodeString to UTF-8 and append the result
  1603. * to a standard string.
  1604. * Unpaired surrogates are replaced with U+FFFD.
  1605. * Calls toUTF8().
  1606. *
  1607. * @param result A standard string (or a compatible object)
  1608. * to which the UTF-8 version of the string is appended.
  1609. * @return The string object.
  1610. * @stable ICU 4.2
  1611. * @see toUTF8
  1612. */
  1613. template<typename StringClass>
  1614. StringClass &toUTF8String(StringClass &result) const {
  1615. StringByteSink<StringClass> sbs(&result, length());
  1616. toUTF8(sbs);
  1617. return result;
  1618. }
  1619. /**
  1620. * Convert the UnicodeString to UTF-32.
  1621. * Unpaired surrogates are replaced with U+FFFD.
  1622. * Calls u_strToUTF32WithSub().
  1623. *
  1624. * @param utf32 destination string buffer, can be NULL if capacity==0
  1625. * @param capacity the number of UChar32s available at utf32
  1626. * @param errorCode Standard ICU error code. Its input value must
  1627. * pass the U_SUCCESS() test, or else the function returns
  1628. * immediately. Check for U_FAILURE() on output or use with
  1629. * function chaining. (See User Guide for details.)
  1630. * @return The length of the UTF-32 string.
  1631. * @see fromUTF32
  1632. * @stable ICU 4.2
  1633. */
  1634. int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
  1635. /* Length operations */
  1636. /**
  1637. * Return the length of the UnicodeString object.
  1638. * The length is the number of char16_t code units are in the UnicodeString.
  1639. * If you want the number of code points, please use countChar32().
  1640. * @return the length of the UnicodeString object
  1641. * @see countChar32
  1642. * @stable ICU 2.0
  1643. */
  1644. inline int32_t length(void) const;
  1645. /**
  1646. * Count Unicode code points in the length char16_t code units of the string.
  1647. * A code point may occupy either one or two char16_t code units.
  1648. * Counting code points involves reading all code units.
  1649. *
  1650. * This functions is basically the inverse of moveIndex32().
  1651. *
  1652. * @param start the index of the first code unit to check
  1653. * @param length the number of char16_t code units to check
  1654. * @return the number of code points in the specified code units
  1655. * @see length
  1656. * @stable ICU 2.0
  1657. */
  1658. int32_t
  1659. countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
  1660. /**
  1661. * Check if the length char16_t code units of the string
  1662. * contain more Unicode code points than a certain number.
  1663. * This is more efficient than counting all code points in this part of the string
  1664. * and comparing that number with a threshold.
  1665. * This function may not need to scan the string at all if the length
  1666. * falls within a certain range, and
  1667. * never needs to count more than 'number+1' code points.
  1668. * Logically equivalent to (countChar32(start, length)>number).
  1669. * A Unicode code point may occupy either one or two char16_t code units.
  1670. *
  1671. * @param start the index of the first code unit to check (0 for the entire string)
  1672. * @param length the number of char16_t code units to check
  1673. * (use INT32_MAX for the entire string; remember that start/length
  1674. * values are pinned)
  1675. * @param number The number of code points in the (sub)string is compared against
  1676. * the 'number' parameter.
  1677. * @return Boolean value for whether the string contains more Unicode code points
  1678. * than 'number'. Same as (u_countChar32(s, length)>number).
  1679. * @see countChar32
  1680. * @see u_strHasMoreChar32Than
  1681. * @stable ICU 2.4
  1682. */
  1683. UBool
  1684. hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
  1685. /**
  1686. * Determine if this string is empty.
  1687. * @return TRUE if this string contains 0 characters, FALSE otherwise.
  1688. * @stable ICU 2.0
  1689. */
  1690. inline UBool isEmpty(void) const;
  1691. /**
  1692. * Return the capacity of the internal buffer of the UnicodeString object.
  1693. * This is useful together with the getBuffer functions.
  1694. * See there for details.
  1695. *
  1696. * @return the number of char16_ts available in the internal buffer
  1697. * @see getBuffer
  1698. * @stable ICU 2.0
  1699. */
  1700. inline int32_t getCapacity(void) const;
  1701. /* Other operations */
  1702. /**
  1703. * Generate a hash code for this object.
  1704. * @return The hash code of this UnicodeString.
  1705. * @stable ICU 2.0
  1706. */
  1707. inline int32_t hashCode(void) const;
  1708. /**
  1709. * Determine if this object contains a valid string.
  1710. * A bogus string has no value. It is different from an empty string,
  1711. * although in both cases isEmpty() returns TRUE and length() returns 0.
  1712. * setToBogus() and isBogus() can be used to indicate that no string value is available.
  1713. * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
  1714. * length() returns 0.
  1715. *
  1716. * @return TRUE if the string is bogus/invalid, FALSE otherwise
  1717. * @see setToBogus()
  1718. * @stable ICU 2.0
  1719. */
  1720. inline UBool isBogus(void) const;
  1721. //========================================
  1722. // Write operations
  1723. //========================================
  1724. /* Assignment operations */
  1725. /**
  1726. * Assignment operator. Replace the characters in this UnicodeString
  1727. * with the characters from `srcText`.
  1728. *
  1729. * Starting with ICU 2.4, the assignment operator and the copy constructor
  1730. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1731. * By contrast, the fastCopyFrom() function implements the old,
  1732. * more efficient but less safe behavior
  1733. * of making this string also a readonly alias to the same buffer.
  1734. *
  1735. * If the source object has an "open" buffer from getBuffer(minCapacity),
  1736. * then the copy is an empty string.
  1737. *
  1738. * @param srcText The text containing the characters to replace
  1739. * @return a reference to this
  1740. * @stable ICU 2.0
  1741. * @see fastCopyFrom
  1742. */
  1743. UnicodeString &operator=(const UnicodeString &srcText);
  1744. /**
  1745. * Almost the same as the assignment operator.
  1746. * Replace the characters in this UnicodeString
  1747. * with the characters from `srcText`.
  1748. *
  1749. * This function works the same as the assignment operator
  1750. * for all strings except for ones that are readonly aliases.
  1751. *
  1752. * Starting with ICU 2.4, the assignment operator and the copy constructor
  1753. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1754. * This function implements the old, more efficient but less safe behavior
  1755. * of making this string also a readonly alias to the same buffer.
  1756. *
  1757. * The fastCopyFrom function must be used only if it is known that the lifetime of
  1758. * this UnicodeString does not exceed the lifetime of the aliased buffer
  1759. * including its contents, for example for strings from resource bundles
  1760. * or aliases to string constants.
  1761. *
  1762. * If the source object has an "open" buffer from getBuffer(minCapacity),
  1763. * then the copy is an empty string.
  1764. *
  1765. * @param src The text containing the characters to replace.
  1766. * @return a reference to this
  1767. * @stable ICU 2.4
  1768. */
  1769. UnicodeString &fastCopyFrom(const UnicodeString &src);
  1770. /**
  1771. * Move assignment operator; might leave src in bogus state.
  1772. * This string will have the same contents and state that the source string had.
  1773. * The behavior is undefined if *this and src are the same object.
  1774. * @param src source string
  1775. * @return *this
  1776. * @stable ICU 56
  1777. */
  1778. UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT;
  1779. /**
  1780. * Swap strings.
  1781. * @param other other string
  1782. * @stable ICU 56
  1783. */
  1784. void swap(UnicodeString &other) U_NOEXCEPT;
  1785. /**
  1786. * Non-member UnicodeString swap function.
  1787. * @param s1 will get s2's contents and state
  1788. * @param s2 will get s1's contents and state
  1789. * @stable ICU 56
  1790. */
  1791. friend inline void U_EXPORT2
  1792. swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
  1793. s1.swap(s2);
  1794. }
  1795. /**
  1796. * Assignment operator. Replace the characters in this UnicodeString
  1797. * with the code unit `ch`.
  1798. * @param ch the code unit to replace
  1799. * @return a reference to this
  1800. * @stable ICU 2.0
  1801. */
  1802. inline UnicodeString& operator= (char16_t ch);
  1803. /**
  1804. * Assignment operator. Replace the characters in this UnicodeString
  1805. * with the code point `ch`.
  1806. * @param ch the code point to replace
  1807. * @return a reference to this
  1808. * @stable ICU 2.0
  1809. */
  1810. inline UnicodeString& operator= (UChar32 ch);
  1811. /**
  1812. * Set the text in the UnicodeString object to the characters
  1813. * in `srcText` in the range
  1814. * [`srcStart`, `srcText.length()`).
  1815. * `srcText` is not modified.
  1816. * @param srcText the source for the new characters
  1817. * @param srcStart the offset into `srcText` where new characters
  1818. * will be obtained
  1819. * @return a reference to this
  1820. * @stable ICU 2.2
  1821. */
  1822. inline UnicodeString& setTo(const UnicodeString& srcText,
  1823. int32_t srcStart);
  1824. /**
  1825. * Set the text in the UnicodeString object to the characters
  1826. * in `srcText` in the range
  1827. * [`srcStart`, `srcStart + srcLength`).
  1828. * `srcText` is not modified.
  1829. * @param srcText the source for the new characters
  1830. * @param srcStart the offset into `srcText` where new characters
  1831. * will be obtained
  1832. * @param srcLength the number of characters in `srcText` in the
  1833. * replace string.
  1834. * @return a reference to this
  1835. * @stable ICU 2.0
  1836. */
  1837. inline UnicodeString& setTo(const UnicodeString& srcText,
  1838. int32_t srcStart,
  1839. int32_t srcLength);
  1840. /**
  1841. * Set the text in the UnicodeString object to the characters in
  1842. * `srcText`.
  1843. * `srcText` is not modified.
  1844. * @param srcText the source for the new characters
  1845. * @return a reference to this
  1846. * @stable ICU 2.0
  1847. */
  1848. inline UnicodeString& setTo(const UnicodeString& srcText);
  1849. /**
  1850. * Set the characters in the UnicodeString object to the characters
  1851. * in `srcChars`. `srcChars` is not modified.
  1852. * @param srcChars the source for the new characters
  1853. * @param srcLength the number of Unicode characters in srcChars.
  1854. * @return a reference to this
  1855. * @stable ICU 2.0
  1856. */
  1857. inline UnicodeString& setTo(const char16_t *srcChars,
  1858. int32_t srcLength);
  1859. /**
  1860. * Set the characters in the UnicodeString object to the code unit
  1861. * `srcChar`.
  1862. * @param srcChar the code unit which becomes the UnicodeString's character
  1863. * content
  1864. * @return a reference to this
  1865. * @stable ICU 2.0
  1866. */
  1867. inline UnicodeString& setTo(char16_t srcChar);
  1868. /**
  1869. * Set the characters in the UnicodeString object to the code point
  1870. * `srcChar`.
  1871. * @param srcChar the code point which becomes the UnicodeString's character
  1872. * content
  1873. * @return a reference to this
  1874. * @stable ICU 2.0
  1875. */
  1876. inline UnicodeString& setTo(UChar32 srcChar);
  1877. /**
  1878. * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
  1879. * The text will be used for the UnicodeString object, but
  1880. * it will not be released when the UnicodeString is destroyed.
  1881. * This has copy-on-write semantics:
  1882. * When the string is modified, then the buffer is first copied into
  1883. * newly allocated memory.
  1884. * The aliased buffer is never modified.
  1885. *
  1886. * In an assignment to another UnicodeString, when using the copy constructor
  1887. * or the assignment operator, the text will be copied.
  1888. * When using fastCopyFrom(), the text will be aliased again,
  1889. * so that both strings then alias the same readonly-text.
  1890. *
  1891. * @param isTerminated specifies if `text` is `NUL`-terminated.
  1892. * This must be true if `textLength==-1`.
  1893. * @param text The characters to alias for the UnicodeString.
  1894. * @param textLength The number of Unicode characters in `text` to alias.
  1895. * If -1, then this constructor will determine the length
  1896. * by calling `u_strlen()`.
  1897. * @return a reference to this
  1898. * @stable ICU 2.0
  1899. */
  1900. UnicodeString &setTo(UBool isTerminated,
  1901. ConstChar16Ptr text,
  1902. int32_t textLength);
  1903. /**
  1904. * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
  1905. * The text will be used for the UnicodeString object, but
  1906. * it will not be released when the UnicodeString is destroyed.
  1907. * This has write-through semantics:
  1908. * For as long as the capacity of the buffer is sufficient, write operations
  1909. * will directly affect the buffer. When more capacity is necessary, then
  1910. * a new buffer will be allocated and the contents copied as with regularly
  1911. * constructed strings.
  1912. * In an assignment to another UnicodeString, the buffer will be copied.
  1913. * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
  1914. * as the string buffer itself and will in this case not copy the contents.
  1915. *
  1916. * @param buffer The characters to alias for the UnicodeString.
  1917. * @param buffLength The number of Unicode characters in `buffer` to alias.
  1918. * @param buffCapacity The size of `buffer` in char16_ts.
  1919. * @return a reference to this
  1920. * @stable ICU 2.0
  1921. */
  1922. UnicodeString &setTo(char16_t *buffer,
  1923. int32_t buffLength,
  1924. int32_t buffCapacity);
  1925. /**
  1926. * Make this UnicodeString object invalid.
  1927. * The string will test TRUE with isBogus().
  1928. *
  1929. * A bogus string has no value. It is different from an empty string.
  1930. * It can be used to indicate that no string value is available.
  1931. * getBuffer() and getTerminatedBuffer() return NULL, and
  1932. * length() returns 0.
  1933. *
  1934. * This utility function is used throughout the UnicodeString
  1935. * implementation to indicate that a UnicodeString operation failed,
  1936. * and may be used in other functions,
  1937. * especially but not exclusively when such functions do not
  1938. * take a UErrorCode for simplicity.
  1939. *
  1940. * The following methods, and no others, will clear a string object's bogus flag:
  1941. * - remove()
  1942. * - remove(0, INT32_MAX)
  1943. * - truncate(0)
  1944. * - operator=() (assignment operator)
  1945. * - setTo(...)
  1946. *
  1947. * The simplest ways to turn a bogus string into an empty one
  1948. * is to use the remove() function.
  1949. * Examples for other functions that are equivalent to "set to empty string":
  1950. * \code
  1951. * if(s.isBogus()) {
  1952. * s.remove(); // set to an empty string (remove all), or
  1953. * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
  1954. * s.truncate(0); // set to an empty string (complete truncation), or
  1955. * s=UnicodeString(); // assign an empty string, or
  1956. * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
  1957. * s.setTo(u"", 0); // set to an empty C Unicode string
  1958. * }
  1959. * \endcode
  1960. *
  1961. * @see isBogus()
  1962. * @stable ICU 2.0
  1963. */
  1964. void setToBogus();
  1965. /**
  1966. * Set the character at the specified offset to the specified character.
  1967. * @param offset A valid offset into the text of the character to set
  1968. * @param ch The new character
  1969. * @return A reference to this
  1970. * @stable ICU 2.0
  1971. */
  1972. UnicodeString& setCharAt(int32_t offset,
  1973. char16_t ch);
  1974. /* Append operations */
  1975. /**
  1976. * Append operator. Append the code unit `ch` to the UnicodeString
  1977. * object.
  1978. * @param ch the code unit to be appended
  1979. * @return a reference to this
  1980. * @stable ICU 2.0
  1981. */
  1982. inline UnicodeString& operator+= (char16_t ch);
  1983. /**
  1984. * Append operator. Append the code point `ch` to the UnicodeString
  1985. * object.
  1986. * @param ch the code point to be appended
  1987. * @return a reference to this
  1988. * @stable ICU 2.0
  1989. */
  1990. inline UnicodeString& operator+= (UChar32 ch);
  1991. /**
  1992. * Append operator. Append the characters in `srcText` to the
  1993. * UnicodeString object. `srcText` is not modified.
  1994. * @param srcText the source for the new characters
  1995. * @return a reference to this
  1996. * @stable ICU 2.0
  1997. */
  1998. inline UnicodeString& operator+= (const UnicodeString& srcText);
  1999. /**
  2000. * Append the characters
  2001. * in `srcText` in the range
  2002. * [`srcStart`, `srcStart + srcLength`) to the
  2003. * UnicodeString object at offset `start`. `srcText`
  2004. * is not modified.
  2005. * @param srcText the source for the new characters
  2006. * @param srcStart the offset into `srcText` where new characters
  2007. * will be obtained
  2008. * @param srcLength the number of characters in `srcText` in
  2009. * the append string
  2010. * @return a reference to this
  2011. * @stable ICU 2.0
  2012. */
  2013. inline UnicodeString& append(const UnicodeString& srcText,
  2014. int32_t srcStart,
  2015. int32_t srcLength);
  2016. /**
  2017. * Append the characters in `srcText` to the UnicodeString object.
  2018. * `srcText` is not modified.
  2019. * @param srcText the source for the new characters
  2020. * @return a reference to this
  2021. * @stable ICU 2.0
  2022. */
  2023. inline UnicodeString& append(const UnicodeString& srcText);
  2024. /**
  2025. * Append the characters in `srcChars` in the range
  2026. * [`srcStart`, `srcStart + srcLength`) to the UnicodeString
  2027. * object at offset
  2028. * `start`. `srcChars` is not modified.
  2029. * @param srcChars the source for the new characters
  2030. * @param srcStart the offset into `srcChars` where new characters
  2031. * will be obtained
  2032. * @param srcLength the number of characters in `srcChars` in
  2033. * the append string; can be -1 if `srcChars` is NUL-terminated
  2034. * @return a reference to this
  2035. * @stable ICU 2.0
  2036. */
  2037. inline UnicodeString& append(const char16_t *srcChars,
  2038. int32_t srcStart,
  2039. int32_t srcLength);
  2040. /**
  2041. * Append the characters in `srcChars` to the UnicodeString object
  2042. * at offset `start`. `srcChars` is not modified.
  2043. * @param srcChars the source for the new characters
  2044. * @param srcLength the number of Unicode characters in `srcChars`;
  2045. * can be -1 if `srcChars` is NUL-terminated
  2046. * @return a reference to this
  2047. * @stable ICU 2.0
  2048. */
  2049. inline UnicodeString& append(ConstChar16Ptr srcChars,
  2050. int32_t srcLength);
  2051. /**
  2052. * Append the code unit `srcChar` to the UnicodeString object.
  2053. * @param srcChar the code unit to append
  2054. * @return a reference to this
  2055. * @stable ICU 2.0
  2056. */
  2057. inline UnicodeString& append(char16_t srcChar);
  2058. /**
  2059. * Append the code point `srcChar` to the UnicodeString object.
  2060. * @param srcChar the code point to append
  2061. * @return a reference to this
  2062. * @stable ICU 2.0
  2063. */
  2064. UnicodeString& append(UChar32 srcChar);
  2065. /* Insert operations */
  2066. /**
  2067. * Insert the characters in `srcText` in the range
  2068. * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
  2069. * object at offset `start`. `srcText` is not modified.
  2070. * @param start the offset where the insertion begins
  2071. * @param srcText the source for the new characters
  2072. * @param srcStart the offset into `srcText` where new characters
  2073. * will be obtained
  2074. * @param srcLength the number of characters in `srcText` in
  2075. * the insert string
  2076. * @return a reference to this
  2077. * @stable ICU 2.0
  2078. */
  2079. inline UnicodeString& insert(int32_t start,
  2080. const UnicodeString& srcText,
  2081. int32_t srcStart,
  2082. int32_t srcLength);
  2083. /**
  2084. * Insert the characters in `srcText` into the UnicodeString object
  2085. * at offset `start`. `srcText` is not modified.
  2086. * @param start the offset where the insertion begins
  2087. * @param srcText the source for the new characters
  2088. * @return a reference to this
  2089. * @stable ICU 2.0
  2090. */
  2091. inline UnicodeString& insert(int32_t start,
  2092. const UnicodeString& srcText);
  2093. /**
  2094. * Insert the characters in `srcChars` in the range
  2095. * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
  2096. * object at offset `start`. `srcChars` is not modified.
  2097. * @param start the offset at which the insertion begins
  2098. * @param srcChars the source for the new characters
  2099. * @param srcStart the offset into `srcChars` where new characters
  2100. * will be obtained
  2101. * @param srcLength the number of characters in `srcChars`
  2102. * in the insert string
  2103. * @return a reference to this
  2104. * @stable ICU 2.0
  2105. */
  2106. inline UnicodeString& insert(int32_t start,
  2107. const char16_t *srcChars,
  2108. int32_t srcStart,
  2109. int32_t srcLength);
  2110. /**
  2111. * Insert the characters in `srcChars` into the UnicodeString object
  2112. * at offset `start`. `srcChars` is not modified.
  2113. * @param start the offset where the insertion begins
  2114. * @param srcChars the source for the new characters
  2115. * @param srcLength the number of Unicode characters in srcChars.
  2116. * @return a reference to this
  2117. * @stable ICU 2.0
  2118. */
  2119. inline UnicodeString& insert(int32_t start,
  2120. ConstChar16Ptr srcChars,
  2121. int32_t srcLength);
  2122. /**
  2123. * Insert the code unit `srcChar` into the UnicodeString object at
  2124. * offset `start`.
  2125. * @param start the offset at which the insertion occurs
  2126. * @param srcChar the code unit to insert
  2127. * @return a reference to this
  2128. * @stable ICU 2.0
  2129. */
  2130. inline UnicodeString& insert(int32_t start,
  2131. char16_t srcChar);
  2132. /**
  2133. * Insert the code point `srcChar` into the UnicodeString object at
  2134. * offset `start`.
  2135. * @param start the offset at which the insertion occurs
  2136. * @param srcChar the code point to insert
  2137. * @return a reference to this
  2138. * @stable ICU 2.0
  2139. */
  2140. inline UnicodeString& insert(int32_t start,
  2141. UChar32 srcChar);
  2142. /* Replace operations */
  2143. /**
  2144. * Replace the characters in the range
  2145. * [`start`, `start + length`) with the characters in
  2146. * `srcText` in the range
  2147. * [`srcStart`, `srcStart + srcLength`).
  2148. * `srcText` is not modified.
  2149. * @param start the offset at which the replace operation begins
  2150. * @param length the number of characters to replace. The character at
  2151. * `start + length` is not modified.
  2152. * @param srcText the source for the new characters
  2153. * @param srcStart the offset into `srcText` where new characters
  2154. * will be obtained
  2155. * @param srcLength the number of characters in `srcText` in
  2156. * the replace string
  2157. * @return a reference to this
  2158. * @stable ICU 2.0
  2159. */
  2160. inline UnicodeString& replace(int32_t start,
  2161. int32_t length,
  2162. const UnicodeString& srcText,
  2163. int32_t srcStart,
  2164. int32_t srcLength);
  2165. /**
  2166. * Replace the characters in the range
  2167. * [`start`, `start + length`)
  2168. * with the characters in `srcText`. `srcText` is
  2169. * not modified.
  2170. * @param start the offset at which the replace operation begins
  2171. * @param length the number of characters to replace. The character at
  2172. * `start + length` is not modified.
  2173. * @param srcText the source for the new characters
  2174. * @return a reference to this
  2175. * @stable ICU 2.0
  2176. */
  2177. inline UnicodeString& replace(int32_t start,
  2178. int32_t length,
  2179. const UnicodeString& srcText);
  2180. /**
  2181. * Replace the characters in the range
  2182. * [`start`, `start + length`) with the characters in
  2183. * `srcChars` in the range
  2184. * [`srcStart`, `srcStart + srcLength`). `srcChars`
  2185. * is not modified.
  2186. * @param start the offset at which the replace operation begins
  2187. * @param length the number of characters to replace. The character at
  2188. * `start + length` is not modified.
  2189. * @param srcChars the source for the new characters
  2190. * @param srcStart the offset into `srcChars` where new characters
  2191. * will be obtained
  2192. * @param srcLength the number of characters in `srcChars`
  2193. * in the replace string
  2194. * @return a reference to this
  2195. * @stable ICU 2.0
  2196. */
  2197. inline UnicodeString& replace(int32_t start,
  2198. int32_t length,
  2199. const char16_t *srcChars,
  2200. int32_t srcStart,
  2201. int32_t srcLength);
  2202. /**
  2203. * Replace the characters in the range
  2204. * [`start`, `start + length`) with the characters in
  2205. * `srcChars`. `srcChars` is not modified.
  2206. * @param start the offset at which the replace operation begins
  2207. * @param length number of characters to replace. The character at
  2208. * `start + length` is not modified.
  2209. * @param srcChars the source for the new characters
  2210. * @param srcLength the number of Unicode characters in srcChars
  2211. * @return a reference to this
  2212. * @stable ICU 2.0
  2213. */
  2214. inline UnicodeString& replace(int32_t start,
  2215. int32_t length,
  2216. ConstChar16Ptr srcChars,
  2217. int32_t srcLength);
  2218. /**
  2219. * Replace the characters in the range
  2220. * [`start`, `start + length`) with the code unit
  2221. * `srcChar`.
  2222. * @param start the offset at which the replace operation begins
  2223. * @param length the number of characters to replace. The character at
  2224. * `start + length` is not modified.
  2225. * @param srcChar the new code unit
  2226. * @return a reference to this
  2227. * @stable ICU 2.0
  2228. */
  2229. inline UnicodeString& replace(int32_t start,
  2230. int32_t length,
  2231. char16_t srcChar);
  2232. /**
  2233. * Replace the characters in the range
  2234. * [`start`, `start + length`) with the code point
  2235. * `srcChar`.
  2236. * @param start the offset at which the replace operation begins
  2237. * @param length the number of characters to replace. The character at
  2238. * `start + length` is not modified.
  2239. * @param srcChar the new code point
  2240. * @return a reference to this
  2241. * @stable ICU 2.0
  2242. */
  2243. UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
  2244. /**
  2245. * Replace the characters in the range [`start`, `limit`)
  2246. * with the characters in `srcText`. `srcText` is not modified.
  2247. * @param start the offset at which the replace operation begins
  2248. * @param limit the offset immediately following the replace range
  2249. * @param srcText the source for the new characters
  2250. * @return a reference to this
  2251. * @stable ICU 2.0
  2252. */
  2253. inline UnicodeString& replaceBetween(int32_t start,
  2254. int32_t limit,
  2255. const UnicodeString& srcText);
  2256. /**
  2257. * Replace the characters in the range [`start`, `limit`)
  2258. * with the characters in `srcText` in the range
  2259. * [`srcStart`, `srcLimit`). `srcText` is not modified.
  2260. * @param start the offset at which the replace operation begins
  2261. * @param limit the offset immediately following the replace range
  2262. * @param srcText the source for the new characters
  2263. * @param srcStart the offset into `srcChars` where new characters
  2264. * will be obtained
  2265. * @param srcLimit the offset immediately following the range to copy
  2266. * in `srcText`
  2267. * @return a reference to this
  2268. * @stable ICU 2.0
  2269. */
  2270. inline UnicodeString& replaceBetween(int32_t start,
  2271. int32_t limit,
  2272. const UnicodeString& srcText,
  2273. int32_t srcStart,
  2274. int32_t srcLimit);
  2275. /**
  2276. * Replace a substring of this object with the given text.
  2277. * @param start the beginning index, inclusive; `0 <= start <= limit`.
  2278. * @param limit the ending index, exclusive; `start <= limit <= length()`.
  2279. * @param text the text to replace characters `start` to `limit - 1`
  2280. * @stable ICU 2.0
  2281. */
  2282. virtual void handleReplaceBetween(int32_t start,
  2283. int32_t limit,
  2284. const UnicodeString& text);
  2285. /**
  2286. * Replaceable API
  2287. * @return TRUE if it has MetaData
  2288. * @stable ICU 2.4
  2289. */
  2290. virtual UBool hasMetaData() const;
  2291. /**
  2292. * Copy a substring of this object, retaining attribute (out-of-band)
  2293. * information. This method is used to duplicate or reorder substrings.
  2294. * The destination index must not overlap the source range.
  2295. *
  2296. * @param start the beginning index, inclusive; `0 <= start <= limit`.
  2297. * @param limit the ending index, exclusive; `start <= limit <= length()`.
  2298. * @param dest the destination index. The characters from
  2299. * `start..limit-1` will be copied to `dest`.
  2300. * Implementations of this method may assume that `dest <= start ||
  2301. * dest >= limit`.
  2302. * @stable ICU 2.0
  2303. */
  2304. virtual void copy(int32_t start, int32_t limit, int32_t dest);
  2305. /* Search and replace operations */
  2306. /**
  2307. * Replace all occurrences of characters in oldText with the characters
  2308. * in newText
  2309. * @param oldText the text containing the search text
  2310. * @param newText the text containing the replacement text
  2311. * @return a reference to this
  2312. * @stable ICU 2.0
  2313. */
  2314. inline UnicodeString& findAndReplace(const UnicodeString& oldText,
  2315. const UnicodeString& newText);
  2316. /**
  2317. * Replace all occurrences of characters in oldText with characters
  2318. * in newText
  2319. * in the range [`start`, `start + length`).
  2320. * @param start the start of the range in which replace will performed
  2321. * @param length the length of the range in which replace will be performed
  2322. * @param oldText the text containing the search text
  2323. * @param newText the text containing the replacement text
  2324. * @return a reference to this
  2325. * @stable ICU 2.0
  2326. */
  2327. inline UnicodeString& findAndReplace(int32_t start,
  2328. int32_t length,
  2329. const UnicodeString& oldText,
  2330. const UnicodeString& newText);
  2331. /**
  2332. * Replace all occurrences of characters in oldText in the range
  2333. * [`oldStart`, `oldStart + oldLength`) with the characters
  2334. * in newText in the range
  2335. * [`newStart`, `newStart + newLength`)
  2336. * in the range [`start`, `start + length`).
  2337. * @param start the start of the range in which replace will performed
  2338. * @param length the length of the range in which replace will be performed
  2339. * @param oldText the text containing the search text
  2340. * @param oldStart the start of the search range in `oldText`
  2341. * @param oldLength the length of the search range in `oldText`
  2342. * @param newText the text containing the replacement text
  2343. * @param newStart the start of the replacement range in `newText`
  2344. * @param newLength the length of the replacement range in `newText`
  2345. * @return a reference to this
  2346. * @stable ICU 2.0
  2347. */
  2348. UnicodeString& findAndReplace(int32_t start,
  2349. int32_t length,
  2350. const UnicodeString& oldText,
  2351. int32_t oldStart,
  2352. int32_t oldLength,
  2353. const UnicodeString& newText,
  2354. int32_t newStart,
  2355. int32_t newLength);
  2356. /* Remove operations */
  2357. /**
  2358. * Removes all characters from the UnicodeString object and clears the bogus flag.
  2359. * This is the UnicodeString equivalent of std::string’s clear().
  2360. *
  2361. * @return a reference to this
  2362. * @see setToBogus
  2363. * @stable ICU 2.0
  2364. */
  2365. inline UnicodeString& remove();
  2366. /**
  2367. * Remove the characters in the range
  2368. * [`start`, `start + length`) from the UnicodeString object.
  2369. * @param start the offset of the first character to remove
  2370. * @param length the number of characters to remove
  2371. * @return a reference to this
  2372. * @stable ICU 2.0
  2373. */
  2374. inline UnicodeString& remove(int32_t start,
  2375. int32_t length = (int32_t)INT32_MAX);
  2376. /**
  2377. * Remove the characters in the range
  2378. * [`start`, `limit`) from the UnicodeString object.
  2379. * @param start the offset of the first character to remove
  2380. * @param limit the offset immediately following the range to remove
  2381. * @return a reference to this
  2382. * @stable ICU 2.0
  2383. */
  2384. inline UnicodeString& removeBetween(int32_t start,
  2385. int32_t limit = (int32_t)INT32_MAX);
  2386. /**
  2387. * Retain only the characters in the range
  2388. * [`start`, `limit`) from the UnicodeString object.
  2389. * Removes characters before `start` and at and after `limit`.
  2390. * @param start the offset of the first character to retain
  2391. * @param limit the offset immediately following the range to retain
  2392. * @return a reference to this
  2393. * @stable ICU 4.4
  2394. */
  2395. inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
  2396. /* Length operations */
  2397. /**
  2398. * Pad the start of this UnicodeString with the character `padChar`.
  2399. * If the length of this UnicodeString is less than targetLength,
  2400. * length() - targetLength copies of padChar will be added to the
  2401. * beginning of this UnicodeString.
  2402. * @param targetLength the desired length of the string
  2403. * @param padChar the character to use for padding. Defaults to
  2404. * space (U+0020)
  2405. * @return TRUE if the text was padded, FALSE otherwise.
  2406. * @stable ICU 2.0
  2407. */
  2408. UBool padLeading(int32_t targetLength,
  2409. char16_t padChar = 0x0020);
  2410. /**
  2411. * Pad the end of this UnicodeString with the character `padChar`.
  2412. * If the length of this UnicodeString is less than targetLength,
  2413. * length() - targetLength copies of padChar will be added to the
  2414. * end of this UnicodeString.
  2415. * @param targetLength the desired length of the string
  2416. * @param padChar the character to use for padding. Defaults to
  2417. * space (U+0020)
  2418. * @return TRUE if the text was padded, FALSE otherwise.
  2419. * @stable ICU 2.0
  2420. */
  2421. UBool padTrailing(int32_t targetLength,
  2422. char16_t padChar = 0x0020);
  2423. /**
  2424. * Truncate this UnicodeString to the `targetLength`.
  2425. * @param targetLength the desired length of this UnicodeString.
  2426. * @return TRUE if the text was truncated, FALSE otherwise
  2427. * @stable ICU 2.0
  2428. */
  2429. inline UBool truncate(int32_t targetLength);
  2430. /**
  2431. * Trims leading and trailing whitespace from this UnicodeString.
  2432. * @return a reference to this
  2433. * @stable ICU 2.0
  2434. */
  2435. UnicodeString& trim(void);
  2436. /* Miscellaneous operations */
  2437. /**
  2438. * Reverse this UnicodeString in place.
  2439. * @return a reference to this
  2440. * @stable ICU 2.0
  2441. */
  2442. inline UnicodeString& reverse(void);
  2443. /**
  2444. * Reverse the range [`start`, `start + length`) in
  2445. * this UnicodeString.
  2446. * @param start the start of the range to reverse
  2447. * @param length the number of characters to to reverse
  2448. * @return a reference to this
  2449. * @stable ICU 2.0
  2450. */
  2451. inline UnicodeString& reverse(int32_t start,
  2452. int32_t length);
  2453. /**
  2454. * Convert the characters in this to UPPER CASE following the conventions of
  2455. * the default locale.
  2456. * @return A reference to this.
  2457. * @stable ICU 2.0
  2458. */
  2459. UnicodeString& toUpper(void);
  2460. /**
  2461. * Convert the characters in this to UPPER CASE following the conventions of
  2462. * a specific locale.
  2463. * @param locale The locale containing the conventions to use.
  2464. * @return A reference to this.
  2465. * @stable ICU 2.0
  2466. */
  2467. UnicodeString& toUpper(const Locale& locale);
  2468. /**
  2469. * Convert the characters in this to lower case following the conventions of
  2470. * the default locale.
  2471. * @return A reference to this.
  2472. * @stable ICU 2.0
  2473. */
  2474. UnicodeString& toLower(void);
  2475. /**
  2476. * Convert the characters in this to lower case following the conventions of
  2477. * a specific locale.
  2478. * @param locale The locale containing the conventions to use.
  2479. * @return A reference to this.
  2480. * @stable ICU 2.0
  2481. */
  2482. UnicodeString& toLower(const Locale& locale);
  2483. #if !UCONFIG_NO_BREAK_ITERATION
  2484. /**
  2485. * Titlecase this string, convenience function using the default locale.
  2486. *
  2487. * Casing is locale-dependent and context-sensitive.
  2488. * Titlecasing uses a break iterator to find the first characters of words
  2489. * that are to be titlecased. It titlecases those characters and lowercases
  2490. * all others.
  2491. *
  2492. * The titlecase break iterator can be provided to customize for arbitrary
  2493. * styles, using rules and dictionaries beyond the standard iterators.
  2494. * It may be more efficient to always provide an iterator to avoid
  2495. * opening and closing one for each string.
  2496. * The standard titlecase iterator for the root locale implements the
  2497. * algorithm of Unicode TR 21.
  2498. *
  2499. * This function uses only the setText(), first() and next() methods of the
  2500. * provided break iterator.
  2501. *
  2502. * @param titleIter A break iterator to find the first characters of words
  2503. * that are to be titlecased.
  2504. * If none is provided (0), then a standard titlecase
  2505. * break iterator is opened.
  2506. * Otherwise the provided iterator is set to the string's text.
  2507. * @return A reference to this.
  2508. * @stable ICU 2.1
  2509. */
  2510. UnicodeString &toTitle(BreakIterator *titleIter);
  2511. /**
  2512. * Titlecase this string.
  2513. *
  2514. * Casing is locale-dependent and context-sensitive.
  2515. * Titlecasing uses a break iterator to find the first characters of words
  2516. * that are to be titlecased. It titlecases those characters and lowercases
  2517. * all others.
  2518. *
  2519. * The titlecase break iterator can be provided to customize for arbitrary
  2520. * styles, using rules and dictionaries beyond the standard iterators.
  2521. * It may be more efficient to always provide an iterator to avoid
  2522. * opening and closing one for each string.
  2523. * The standard titlecase iterator for the root locale implements the
  2524. * algorithm of Unicode TR 21.
  2525. *
  2526. * This function uses only the setText(), first() and next() methods of the
  2527. * provided break iterator.
  2528. *
  2529. * @param titleIter A break iterator to find the first characters of words
  2530. * that are to be titlecased.
  2531. * If none is provided (0), then a standard titlecase
  2532. * break iterator is opened.
  2533. * Otherwise the provided iterator is set to the string's text.
  2534. * @param locale The locale to consider.
  2535. * @return A reference to this.
  2536. * @stable ICU 2.1
  2537. */
  2538. UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
  2539. /**
  2540. * Titlecase this string, with options.
  2541. *
  2542. * Casing is locale-dependent and context-sensitive.
  2543. * Titlecasing uses a break iterator to find the first characters of words
  2544. * that are to be titlecased. It titlecases those characters and lowercases
  2545. * all others. (This can be modified with options.)
  2546. *
  2547. * The titlecase break iterator can be provided to customize for arbitrary
  2548. * styles, using rules and dictionaries beyond the standard iterators.
  2549. * It may be more efficient to always provide an iterator to avoid
  2550. * opening and closing one for each string.
  2551. * The standard titlecase iterator for the root locale implements the
  2552. * algorithm of Unicode TR 21.
  2553. *
  2554. * This function uses only the setText(), first() and next() methods of the
  2555. * provided break iterator.
  2556. *
  2557. * @param titleIter A break iterator to find the first characters of words
  2558. * that are to be titlecased.
  2559. * If none is provided (0), then a standard titlecase
  2560. * break iterator is opened.
  2561. * Otherwise the provided iterator is set to the string's text.
  2562. * @param locale The locale to consider.
  2563. * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
  2564. * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
  2565. * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
  2566. * @param options Options bit set, see ucasemap_open().
  2567. * @return A reference to this.
  2568. * @stable ICU 3.8
  2569. */
  2570. UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
  2571. #endif
  2572. /**
  2573. * Case-folds the characters in this string.
  2574. *
  2575. * Case-folding is locale-independent and not context-sensitive,
  2576. * but there is an option for whether to include or exclude mappings for dotted I
  2577. * and dotless i that are marked with 'T' in CaseFolding.txt.
  2578. *
  2579. * The result may be longer or shorter than the original.
  2580. *
  2581. * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
  2582. * @return A reference to this.
  2583. * @stable ICU 2.0
  2584. */
  2585. UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
  2586. //========================================
  2587. // Access to the internal buffer
  2588. //========================================
  2589. /**
  2590. * Get a read/write pointer to the internal buffer.
  2591. * The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
  2592. * writable, and is still owned by the UnicodeString object.
  2593. * Calls to getBuffer(minCapacity) must not be nested, and
  2594. * must be matched with calls to releaseBuffer(newLength).
  2595. * If the string buffer was read-only or shared,
  2596. * then it will be reallocated and copied.
  2597. *
  2598. * An attempted nested call will return 0, and will not further modify the
  2599. * state of the UnicodeString object.
  2600. * It also returns 0 if the string is bogus.
  2601. *
  2602. * The actual capacity of the string buffer may be larger than minCapacity.
  2603. * getCapacity() returns the actual capacity.
  2604. * For many operations, the full capacity should be used to avoid reallocations.
  2605. *
  2606. * While the buffer is "open" between getBuffer(minCapacity)
  2607. * and releaseBuffer(newLength), the following applies:
  2608. * - The string length is set to 0.
  2609. * - Any read API call on the UnicodeString object will behave like on a 0-length string.
  2610. * - Any write API call on the UnicodeString object is disallowed and will have no effect.
  2611. * - You can read from and write to the returned buffer.
  2612. * - The previous string contents will still be in the buffer;
  2613. * if you want to use it, then you need to call length() before getBuffer(minCapacity).
  2614. * If the length() was greater than minCapacity, then any contents after minCapacity
  2615. * may be lost.
  2616. * The buffer contents is not NUL-terminated by getBuffer().
  2617. * If length() < getCapacity() then you can terminate it by writing a NUL
  2618. * at index length().
  2619. * - You must call releaseBuffer(newLength) before and in order to
  2620. * return to normal UnicodeString operation.
  2621. *
  2622. * @param minCapacity the minimum number of char16_ts that are to be available
  2623. * in the buffer, starting at the returned pointer;
  2624. * default to the current string capacity if minCapacity==-1
  2625. * @return a writable pointer to the internal string buffer,
  2626. * or nullptr if an error occurs (nested calls, out of memory)
  2627. *
  2628. * @see releaseBuffer
  2629. * @see getTerminatedBuffer()
  2630. * @stable ICU 2.0
  2631. */
  2632. char16_t *getBuffer(int32_t minCapacity);
  2633. /**
  2634. * Release a read/write buffer on a UnicodeString object with an
  2635. * "open" getBuffer(minCapacity).
  2636. * This function must be called in a matched pair with getBuffer(minCapacity).
  2637. * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
  2638. *
  2639. * It will set the string length to newLength, at most to the current capacity.
  2640. * If newLength==-1 then it will set the length according to the
  2641. * first NUL in the buffer, or to the capacity if there is no NUL.
  2642. *
  2643. * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
  2644. *
  2645. * @param newLength the new length of the UnicodeString object;
  2646. * defaults to the current capacity if newLength is greater than that;
  2647. * if newLength==-1, it defaults to u_strlen(buffer) but not more than
  2648. * the current capacity of the string
  2649. *
  2650. * @see getBuffer(int32_t minCapacity)
  2651. * @stable ICU 2.0
  2652. */
  2653. void releaseBuffer(int32_t newLength=-1);
  2654. /**
  2655. * Get a read-only pointer to the internal buffer.
  2656. * This can be called at any time on a valid UnicodeString.
  2657. *
  2658. * It returns 0 if the string is bogus, or
  2659. * during an "open" getBuffer(minCapacity).
  2660. *
  2661. * It can be called as many times as desired.
  2662. * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2663. * at which time the pointer is semantically invalidated and must not be used any more.
  2664. *
  2665. * The capacity of the buffer can be determined with getCapacity().
  2666. * The part after length() may or may not be initialized and valid,
  2667. * depending on the history of the UnicodeString object.
  2668. *
  2669. * The buffer contents is (probably) not NUL-terminated.
  2670. * You can check if it is with
  2671. * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
  2672. * (See getTerminatedBuffer().)
  2673. *
  2674. * The buffer may reside in read-only memory. Its contents must not
  2675. * be modified.
  2676. *
  2677. * @return a read-only pointer to the internal string buffer,
  2678. * or nullptr if the string is empty or bogus
  2679. *
  2680. * @see getBuffer(int32_t minCapacity)
  2681. * @see getTerminatedBuffer()
  2682. * @stable ICU 2.0
  2683. */
  2684. inline const char16_t *getBuffer() const;
  2685. /**
  2686. * Get a read-only pointer to the internal buffer,
  2687. * making sure that it is NUL-terminated.
  2688. * This can be called at any time on a valid UnicodeString.
  2689. *
  2690. * It returns 0 if the string is bogus, or
  2691. * during an "open" getBuffer(minCapacity), or if the buffer cannot
  2692. * be NUL-terminated (because memory allocation failed).
  2693. *
  2694. * It can be called as many times as desired.
  2695. * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2696. * at which time the pointer is semantically invalidated and must not be used any more.
  2697. *
  2698. * The capacity of the buffer can be determined with getCapacity().
  2699. * The part after length()+1 may or may not be initialized and valid,
  2700. * depending on the history of the UnicodeString object.
  2701. *
  2702. * The buffer contents is guaranteed to be NUL-terminated.
  2703. * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
  2704. * is written.
  2705. * For this reason, this function is not const, unlike getBuffer().
  2706. * Note that a UnicodeString may also contain NUL characters as part of its contents.
  2707. *
  2708. * The buffer may reside in read-only memory. Its contents must not
  2709. * be modified.
  2710. *
  2711. * @return a read-only pointer to the internal string buffer,
  2712. * or 0 if the string is empty or bogus
  2713. *
  2714. * @see getBuffer(int32_t minCapacity)
  2715. * @see getBuffer()
  2716. * @stable ICU 2.2
  2717. */
  2718. const char16_t *getTerminatedBuffer();
  2719. //========================================
  2720. // Constructors
  2721. //========================================
  2722. /** Construct an empty UnicodeString.
  2723. * @stable ICU 2.0
  2724. */
  2725. inline UnicodeString();
  2726. /**
  2727. * Construct a UnicodeString with capacity to hold `capacity` char16_ts
  2728. * @param capacity the number of char16_ts this UnicodeString should hold
  2729. * before a resize is necessary; if count is greater than 0 and count
  2730. * code points c take up more space than capacity, then capacity is adjusted
  2731. * accordingly.
  2732. * @param c is used to initially fill the string
  2733. * @param count specifies how many code points c are to be written in the
  2734. * string
  2735. * @stable ICU 2.0
  2736. */
  2737. UnicodeString(int32_t capacity, UChar32 c, int32_t count);
  2738. /**
  2739. * Single char16_t (code unit) constructor.
  2740. *
  2741. * It is recommended to mark this constructor "explicit" by
  2742. * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
  2743. * on the compiler command line or similar.
  2744. * @param ch the character to place in the UnicodeString
  2745. * @stable ICU 2.0
  2746. */
  2747. UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
  2748. /**
  2749. * Single UChar32 (code point) constructor.
  2750. *
  2751. * It is recommended to mark this constructor "explicit" by
  2752. * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
  2753. * on the compiler command line or similar.
  2754. * @param ch the character to place in the UnicodeString
  2755. * @stable ICU 2.0
  2756. */
  2757. UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
  2758. /**
  2759. * char16_t* constructor.
  2760. *
  2761. * It is recommended to mark this constructor "explicit" by
  2762. * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
  2763. * on the compiler command line or similar.
  2764. * @param text The characters to place in the UnicodeString. `text`
  2765. * must be NULL (U+0000) terminated.
  2766. * @stable ICU 2.0
  2767. */
  2768. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
  2769. #if !U_CHAR16_IS_TYPEDEF
  2770. /**
  2771. * uint16_t * constructor.
  2772. * Delegates to UnicodeString(const char16_t *).
  2773. *
  2774. * It is recommended to mark this constructor "explicit" by
  2775. * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
  2776. * on the compiler command line or similar.
  2777. * @param text NUL-terminated UTF-16 string
  2778. * @stable ICU 59
  2779. */
  2780. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
  2781. UnicodeString(ConstChar16Ptr(text)) {}
  2782. #endif
  2783. #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
  2784. /**
  2785. * wchar_t * constructor.
  2786. * (Only defined if U_SIZEOF_WCHAR_T==2.)
  2787. * Delegates to UnicodeString(const char16_t *).
  2788. *
  2789. * It is recommended to mark this constructor "explicit" by
  2790. * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
  2791. * on the compiler command line or similar.
  2792. * @param text NUL-terminated UTF-16 string
  2793. * @stable ICU 59
  2794. */
  2795. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
  2796. UnicodeString(ConstChar16Ptr(text)) {}
  2797. #endif
  2798. /**
  2799. * nullptr_t constructor.
  2800. * Effectively the same as the default constructor, makes an empty string object.
  2801. *
  2802. * It is recommended to mark this constructor "explicit" by
  2803. * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
  2804. * on the compiler command line or similar.
  2805. * @param text nullptr
  2806. * @stable ICU 59
  2807. */
  2808. UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
  2809. /**
  2810. * char16_t* constructor.
  2811. * @param text The characters to place in the UnicodeString.
  2812. * @param textLength The number of Unicode characters in `text`
  2813. * to copy.
  2814. * @stable ICU 2.0
  2815. */
  2816. UnicodeString(const char16_t *text,
  2817. int32_t textLength);
  2818. #if !U_CHAR16_IS_TYPEDEF
  2819. /**
  2820. * uint16_t * constructor.
  2821. * Delegates to UnicodeString(const char16_t *, int32_t).
  2822. * @param text UTF-16 string
  2823. * @param textLength string length
  2824. * @stable ICU 59
  2825. */
  2826. UnicodeString(const uint16_t *text, int32_t textLength) :
  2827. UnicodeString(ConstChar16Ptr(text), textLength) {}
  2828. #endif
  2829. #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
  2830. /**
  2831. * wchar_t * constructor.
  2832. * (Only defined if U_SIZEOF_WCHAR_T==2.)
  2833. * Delegates to UnicodeString(const char16_t *, int32_t).
  2834. * @param text NUL-terminated UTF-16 string
  2835. * @param textLength string length
  2836. * @stable ICU 59
  2837. */
  2838. UnicodeString(const wchar_t *text, int32_t textLength) :
  2839. UnicodeString(ConstChar16Ptr(text), textLength) {}
  2840. #endif
  2841. /**
  2842. * nullptr_t constructor.
  2843. * Effectively the same as the default constructor, makes an empty string object.
  2844. * @param text nullptr
  2845. * @param textLength ignored
  2846. * @stable ICU 59
  2847. */
  2848. inline UnicodeString(const std::nullptr_t text, int32_t textLength);
  2849. /**
  2850. * Readonly-aliasing char16_t* constructor.
  2851. * The text will be used for the UnicodeString object, but
  2852. * it will not be released when the UnicodeString is destroyed.
  2853. * This has copy-on-write semantics:
  2854. * When the string is modified, then the buffer is first copied into
  2855. * newly allocated memory.
  2856. * The aliased buffer is never modified.
  2857. *
  2858. * In an assignment to another UnicodeString, when using the copy constructor
  2859. * or the assignment operator, the text will be copied.
  2860. * When using fastCopyFrom(), the text will be aliased again,
  2861. * so that both strings then alias the same readonly-text.
  2862. *
  2863. * @param isTerminated specifies if `text` is `NUL`-terminated.
  2864. * This must be true if `textLength==-1`.
  2865. * @param text The characters to alias for the UnicodeString.
  2866. * @param textLength The number of Unicode characters in `text` to alias.
  2867. * If -1, then this constructor will determine the length
  2868. * by calling `u_strlen()`.
  2869. * @stable ICU 2.0
  2870. */
  2871. UnicodeString(UBool isTerminated,
  2872. ConstChar16Ptr text,
  2873. int32_t textLength);
  2874. /**
  2875. * Writable-aliasing char16_t* constructor.
  2876. * The text will be used for the UnicodeString object, but
  2877. * it will not be released when the UnicodeString is destroyed.
  2878. * This has write-through semantics:
  2879. * For as long as the capacity of the buffer is sufficient, write operations
  2880. * will directly affect the buffer. When more capacity is necessary, then
  2881. * a new buffer will be allocated and the contents copied as with regularly
  2882. * constructed strings.
  2883. * In an assignment to another UnicodeString, the buffer will be copied.
  2884. * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
  2885. * as the string buffer itself and will in this case not copy the contents.
  2886. *
  2887. * @param buffer The characters to alias for the UnicodeString.
  2888. * @param buffLength The number of Unicode characters in `buffer` to alias.
  2889. * @param buffCapacity The size of `buffer` in char16_ts.
  2890. * @stable ICU 2.0
  2891. */
  2892. UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
  2893. #if !U_CHAR16_IS_TYPEDEF
  2894. /**
  2895. * Writable-aliasing uint16_t * constructor.
  2896. * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
  2897. * @param buffer writable buffer of/for UTF-16 text
  2898. * @param buffLength length of the current buffer contents
  2899. * @param buffCapacity buffer capacity
  2900. * @stable ICU 59
  2901. */
  2902. UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
  2903. UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
  2904. #endif
  2905. #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
  2906. /**
  2907. * Writable-aliasing wchar_t * constructor.
  2908. * (Only defined if U_SIZEOF_WCHAR_T==2.)
  2909. * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
  2910. * @param buffer writable buffer of/for UTF-16 text
  2911. * @param buffLength length of the current buffer contents
  2912. * @param buffCapacity buffer capacity
  2913. * @stable ICU 59
  2914. */
  2915. UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
  2916. UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
  2917. #endif
  2918. /**
  2919. * Writable-aliasing nullptr_t constructor.
  2920. * Effectively the same as the default constructor, makes an empty string object.
  2921. * @param buffer nullptr
  2922. * @param buffLength ignored
  2923. * @param buffCapacity ignored
  2924. * @stable ICU 59
  2925. */
  2926. inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
  2927. #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  2928. /**
  2929. * char* constructor.
  2930. * Uses the default converter (and thus depends on the ICU conversion code)
  2931. * unless U_CHARSET_IS_UTF8 is set to 1.
  2932. *
  2933. * For ASCII (really "invariant character") strings it is more efficient to use
  2934. * the constructor that takes a US_INV (for its enum EInvariant).
  2935. * For ASCII (invariant-character) string literals, see UNICODE_STRING and
  2936. * UNICODE_STRING_SIMPLE.
  2937. *
  2938. * It is recommended to mark this constructor "explicit" by
  2939. * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
  2940. * on the compiler command line or similar.
  2941. * @param codepageData an array of bytes, null-terminated,
  2942. * in the platform's default codepage.
  2943. * @stable ICU 2.0
  2944. * @see UNICODE_STRING
  2945. * @see UNICODE_STRING_SIMPLE
  2946. */
  2947. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
  2948. /**
  2949. * char* constructor.
  2950. * Uses the default converter (and thus depends on the ICU conversion code)
  2951. * unless U_CHARSET_IS_UTF8 is set to 1.
  2952. * @param codepageData an array of bytes in the platform's default codepage.
  2953. * @param dataLength The number of bytes in `codepageData`.
  2954. * @stable ICU 2.0
  2955. */
  2956. UnicodeString(const char *codepageData, int32_t dataLength);
  2957. #endif
  2958. #if !UCONFIG_NO_CONVERSION
  2959. /**
  2960. * char* constructor.
  2961. * @param codepageData an array of bytes, null-terminated
  2962. * @param codepage the encoding of `codepageData`. The special
  2963. * value 0 for `codepage` indicates that the text is in the
  2964. * platform's default codepage.
  2965. *
  2966. * If `codepage` is an empty string (`""`),
  2967. * then a simple conversion is performed on the codepage-invariant
  2968. * subset ("invariant characters") of the platform encoding. See utypes.h.
  2969. * Recommendation: For invariant-character strings use the constructor
  2970. * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  2971. * because it avoids object code dependencies of UnicodeString on
  2972. * the conversion code.
  2973. *
  2974. * @stable ICU 2.0
  2975. */
  2976. UnicodeString(const char *codepageData, const char *codepage);
  2977. /**
  2978. * char* constructor.
  2979. * @param codepageData an array of bytes.
  2980. * @param dataLength The number of bytes in `codepageData`.
  2981. * @param codepage the encoding of `codepageData`. The special
  2982. * value 0 for `codepage` indicates that the text is in the
  2983. * platform's default codepage.
  2984. * If `codepage` is an empty string (`""`),
  2985. * then a simple conversion is performed on the codepage-invariant
  2986. * subset ("invariant characters") of the platform encoding. See utypes.h.
  2987. * Recommendation: For invariant-character strings use the constructor
  2988. * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  2989. * because it avoids object code dependencies of UnicodeString on
  2990. * the conversion code.
  2991. *
  2992. * @stable ICU 2.0
  2993. */
  2994. UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
  2995. /**
  2996. * char * / UConverter constructor.
  2997. * This constructor uses an existing UConverter object to
  2998. * convert the codepage string to Unicode and construct a UnicodeString
  2999. * from that.
  3000. *
  3001. * The converter is reset at first.
  3002. * If the error code indicates a failure before this constructor is called,
  3003. * or if an error occurs during conversion or construction,
  3004. * then the string will be bogus.
  3005. *
  3006. * This function avoids the overhead of opening and closing a converter if
  3007. * multiple strings are constructed.
  3008. *
  3009. * @param src input codepage string
  3010. * @param srcLength length of the input string, can be -1 for NUL-terminated strings
  3011. * @param cnv converter object (ucnv_resetToUnicode() will be called),
  3012. * can be NULL for the default converter
  3013. * @param errorCode normal ICU error code
  3014. * @stable ICU 2.0
  3015. */
  3016. UnicodeString(
  3017. const char *src, int32_t srcLength,
  3018. UConverter *cnv,
  3019. UErrorCode &errorCode);
  3020. #endif
  3021. /**
  3022. * Constructs a Unicode string from an invariant-character char * string.
  3023. * About invariant characters see utypes.h.
  3024. * This constructor has no runtime dependency on conversion code and is
  3025. * therefore recommended over ones taking a charset name string
  3026. * (where the empty string "" indicates invariant-character conversion).
  3027. *
  3028. * Use the macro US_INV as the third, signature-distinguishing parameter.
  3029. *
  3030. * For example:
  3031. * \code
  3032. * void fn(const char *s) {
  3033. * UnicodeString ustr(s, -1, US_INV);
  3034. * // use ustr ...
  3035. * }
  3036. * \endcode
  3037. * @param src String using only invariant characters.
  3038. * @param textLength Length of src, or -1 if NUL-terminated.
  3039. * @param inv Signature-distinguishing paramater, use US_INV.
  3040. *
  3041. * @see US_INV
  3042. * @stable ICU 3.2
  3043. */
  3044. UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
  3045. /**
  3046. * Copy constructor.
  3047. *
  3048. * Starting with ICU 2.4, the assignment operator and the copy constructor
  3049. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  3050. * By contrast, the fastCopyFrom() function implements the old,
  3051. * more efficient but less safe behavior
  3052. * of making this string also a readonly alias to the same buffer.
  3053. *
  3054. * If the source object has an "open" buffer from getBuffer(minCapacity),
  3055. * then the copy is an empty string.
  3056. *
  3057. * @param that The UnicodeString object to copy.
  3058. * @stable ICU 2.0
  3059. * @see fastCopyFrom
  3060. */
  3061. UnicodeString(const UnicodeString& that);
  3062. /**
  3063. * Move constructor; might leave src in bogus state.
  3064. * This string will have the same contents and state that the source string had.
  3065. * @param src source string
  3066. * @stable ICU 56
  3067. */
  3068. UnicodeString(UnicodeString &&src) U_NOEXCEPT;
  3069. /**
  3070. * 'Substring' constructor from tail of source string.
  3071. * @param src The UnicodeString object to copy.
  3072. * @param srcStart The offset into `src` at which to start copying.
  3073. * @stable ICU 2.2
  3074. */
  3075. UnicodeString(const UnicodeString& src, int32_t srcStart);
  3076. /**
  3077. * 'Substring' constructor from subrange of source string.
  3078. * @param src The UnicodeString object to copy.
  3079. * @param srcStart The offset into `src` at which to start copying.
  3080. * @param srcLength The number of characters from `src` to copy.
  3081. * @stable ICU 2.2
  3082. */
  3083. UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  3084. /**
  3085. * Clone this object, an instance of a subclass of Replaceable.
  3086. * Clones can be used concurrently in multiple threads.
  3087. * If a subclass does not implement clone(), or if an error occurs,
  3088. * then NULL is returned.
  3089. * The caller must delete the clone.
  3090. *
  3091. * @return a clone of this object
  3092. *
  3093. * @see Replaceable::clone
  3094. * @see getDynamicClassID
  3095. * @stable ICU 2.6
  3096. */
  3097. virtual UnicodeString *clone() const;
  3098. /** Destructor.
  3099. * @stable ICU 2.0
  3100. */
  3101. virtual ~UnicodeString();
  3102. /**
  3103. * Create a UnicodeString from a UTF-8 string.
  3104. * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  3105. * Calls u_strFromUTF8WithSub().
  3106. *
  3107. * @param utf8 UTF-8 input string.
  3108. * Note that a StringPiece can be implicitly constructed
  3109. * from a std::string or a NUL-terminated const char * string.
  3110. * @return A UnicodeString with equivalent UTF-16 contents.
  3111. * @see toUTF8
  3112. * @see toUTF8String
  3113. * @stable ICU 4.2
  3114. */
  3115. static UnicodeString fromUTF8(StringPiece utf8);
  3116. /**
  3117. * Create a UnicodeString from a UTF-32 string.
  3118. * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  3119. * Calls u_strFromUTF32WithSub().
  3120. *
  3121. * @param utf32 UTF-32 input string. Must not be NULL.
  3122. * @param length Length of the input string, or -1 if NUL-terminated.
  3123. * @return A UnicodeString with equivalent UTF-16 contents.
  3124. * @see toUTF32
  3125. * @stable ICU 4.2
  3126. */
  3127. static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
  3128. /* Miscellaneous operations */
  3129. /**
  3130. * Unescape a string of characters and return a string containing
  3131. * the result. The following escape sequences are recognized:
  3132. *
  3133. * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
  3134. * \\Uhhhhhhhh 8 hex digits
  3135. * \\xhh 1-2 hex digits
  3136. * \\ooo 1-3 octal digits; o in [0-7]
  3137. * \\cX control-X; X is masked with 0x1F
  3138. *
  3139. * as well as the standard ANSI C escapes:
  3140. *
  3141. * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
  3142. * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
  3143. * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
  3144. *
  3145. * Anything else following a backslash is generically escaped. For
  3146. * example, "[a\\-z]" returns "[a-z]".
  3147. *
  3148. * If an escape sequence is ill-formed, this method returns an empty
  3149. * string. An example of an ill-formed sequence is "\\u" followed by
  3150. * fewer than 4 hex digits.
  3151. *
  3152. * This function is similar to u_unescape() but not identical to it.
  3153. * The latter takes a source char*, so it does escape recognition
  3154. * and also invariant conversion.
  3155. *
  3156. * @return a string with backslash escapes interpreted, or an
  3157. * empty string on error.
  3158. * @see UnicodeString#unescapeAt()
  3159. * @see u_unescape()
  3160. * @see u_unescapeAt()
  3161. * @stable ICU 2.0
  3162. */
  3163. UnicodeString unescape() const;
  3164. /**
  3165. * Unescape a single escape sequence and return the represented
  3166. * character. See unescape() for a listing of the recognized escape
  3167. * sequences. The character at offset-1 is assumed (without
  3168. * checking) to be a backslash. If the escape sequence is
  3169. * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
  3170. * returned.
  3171. *
  3172. * @param offset an input output parameter. On input, it is the
  3173. * offset into this string where the escape sequence is located,
  3174. * after the initial backslash. On output, it is advanced after the
  3175. * last character parsed. On error, it is not advanced at all.
  3176. * @return the character represented by the escape sequence at
  3177. * offset, or U_SENTINEL=-1 on error.
  3178. * @see UnicodeString#unescape()
  3179. * @see u_unescape()
  3180. * @see u_unescapeAt()
  3181. * @stable ICU 2.0
  3182. */
  3183. UChar32 unescapeAt(int32_t &offset) const;
  3184. /**
  3185. * ICU "poor man's RTTI", returns a UClassID for this class.
  3186. *
  3187. * @stable ICU 2.2
  3188. */
  3189. static UClassID U_EXPORT2 getStaticClassID();
  3190. /**
  3191. * ICU "poor man's RTTI", returns a UClassID for the actual class.
  3192. *
  3193. * @stable ICU 2.2
  3194. */
  3195. virtual UClassID getDynamicClassID() const;
  3196. //========================================
  3197. // Implementation methods
  3198. //========================================
  3199. protected:
  3200. /**
  3201. * Implement Replaceable::getLength() (see jitterbug 1027).
  3202. * @stable ICU 2.4
  3203. */
  3204. virtual int32_t getLength() const;
  3205. /**
  3206. * The change in Replaceable to use virtual getCharAt() allows
  3207. * UnicodeString::charAt() to be inline again (see jitterbug 709).
  3208. * @stable ICU 2.4
  3209. */
  3210. virtual char16_t getCharAt(int32_t offset) const;
  3211. /**
  3212. * The change in Replaceable to use virtual getChar32At() allows
  3213. * UnicodeString::char32At() to be inline again (see jitterbug 709).
  3214. * @stable ICU 2.4
  3215. */
  3216. virtual UChar32 getChar32At(int32_t offset) const;
  3217. private:
  3218. // For char* constructors. Could be made public.
  3219. UnicodeString &setToUTF8(StringPiece utf8);
  3220. // For extract(char*).
  3221. // We could make a toUTF8(target, capacity, errorCode) public but not
  3222. // this version: New API will be cleaner if we make callers create substrings
  3223. // rather than having start+length on every method,
  3224. // and it should take a UErrorCode&.
  3225. int32_t
  3226. toUTF8(int32_t start, int32_t len,
  3227. char *target, int32_t capacity) const;
  3228. /**
  3229. * Internal string contents comparison, called by operator==.
  3230. * Requires: this & text not bogus and have same lengths.
  3231. */
  3232. UBool doEquals(const UnicodeString &text, int32_t len) const;
  3233. inline int8_t
  3234. doCompare(int32_t start,
  3235. int32_t length,
  3236. const UnicodeString& srcText,
  3237. int32_t srcStart,
  3238. int32_t srcLength) const;
  3239. int8_t doCompare(int32_t start,
  3240. int32_t length,
  3241. const char16_t *srcChars,
  3242. int32_t srcStart,
  3243. int32_t srcLength) const;
  3244. inline int8_t
  3245. doCompareCodePointOrder(int32_t start,
  3246. int32_t length,
  3247. const UnicodeString& srcText,
  3248. int32_t srcStart,
  3249. int32_t srcLength) const;
  3250. int8_t doCompareCodePointOrder(int32_t start,
  3251. int32_t length,
  3252. const char16_t *srcChars,
  3253. int32_t srcStart,
  3254. int32_t srcLength) const;
  3255. inline int8_t
  3256. doCaseCompare(int32_t start,
  3257. int32_t length,
  3258. const UnicodeString &srcText,
  3259. int32_t srcStart,
  3260. int32_t srcLength,
  3261. uint32_t options) const;
  3262. int8_t
  3263. doCaseCompare(int32_t start,
  3264. int32_t length,
  3265. const char16_t *srcChars,
  3266. int32_t srcStart,
  3267. int32_t srcLength,
  3268. uint32_t options) const;
  3269. int32_t doIndexOf(char16_t c,
  3270. int32_t start,
  3271. int32_t length) const;
  3272. int32_t doIndexOf(UChar32 c,
  3273. int32_t start,
  3274. int32_t length) const;
  3275. int32_t doLastIndexOf(char16_t c,
  3276. int32_t start,
  3277. int32_t length) const;
  3278. int32_t doLastIndexOf(UChar32 c,
  3279. int32_t start,
  3280. int32_t length) const;
  3281. void doExtract(int32_t start,
  3282. int32_t length,
  3283. char16_t *dst,
  3284. int32_t dstStart) const;
  3285. inline void doExtract(int32_t start,
  3286. int32_t length,
  3287. UnicodeString& target) const;
  3288. inline char16_t doCharAt(int32_t offset) const;
  3289. UnicodeString& doReplace(int32_t start,
  3290. int32_t length,
  3291. const UnicodeString& srcText,
  3292. int32_t srcStart,
  3293. int32_t srcLength);
  3294. UnicodeString& doReplace(int32_t start,
  3295. int32_t length,
  3296. const char16_t *srcChars,
  3297. int32_t srcStart,
  3298. int32_t srcLength);
  3299. UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  3300. UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
  3301. UnicodeString& doReverse(int32_t start,
  3302. int32_t length);
  3303. // calculate hash code
  3304. int32_t doHashCode(void) const;
  3305. // get pointer to start of array
  3306. // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
  3307. inline char16_t* getArrayStart(void);
  3308. inline const char16_t* getArrayStart(void) const;
  3309. inline UBool hasShortLength() const;
  3310. inline int32_t getShortLength() const;
  3311. // A UnicodeString object (not necessarily its current buffer)
  3312. // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
  3313. inline UBool isWritable() const;
  3314. // Is the current buffer writable?
  3315. inline UBool isBufferWritable() const;
  3316. // None of the following does releaseArray().
  3317. inline void setZeroLength();
  3318. inline void setShortLength(int32_t len);
  3319. inline void setLength(int32_t len);
  3320. inline void setToEmpty();
  3321. inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
  3322. // allocate the array; result may be the stack buffer
  3323. // sets refCount to 1 if appropriate
  3324. // sets fArray, fCapacity, and flags
  3325. // sets length to 0
  3326. // returns boolean for success or failure
  3327. UBool allocate(int32_t capacity);
  3328. // release the array if owned
  3329. void releaseArray(void);
  3330. // turn a bogus string into an empty one
  3331. void unBogus();
  3332. // implements assigment operator, copy constructor, and fastCopyFrom()
  3333. UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
  3334. // Copies just the fields without memory management.
  3335. void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
  3336. // Pin start and limit to acceptable values.
  3337. inline void pinIndex(int32_t& start) const;
  3338. inline void pinIndices(int32_t& start,
  3339. int32_t& length) const;
  3340. #if !UCONFIG_NO_CONVERSION
  3341. /* Internal extract() using UConverter. */
  3342. int32_t doExtract(int32_t start, int32_t length,
  3343. char *dest, int32_t destCapacity,
  3344. UConverter *cnv,
  3345. UErrorCode &errorCode) const;
  3346. /*
  3347. * Real constructor for converting from codepage data.
  3348. * It assumes that it is called with !fRefCounted.
  3349. *
  3350. * If `codepage==0`, then the default converter
  3351. * is used for the platform encoding.
  3352. * If `codepage` is an empty string (`""`),
  3353. * then a simple conversion is performed on the codepage-invariant
  3354. * subset ("invariant characters") of the platform encoding. See utypes.h.
  3355. */
  3356. void doCodepageCreate(const char *codepageData,
  3357. int32_t dataLength,
  3358. const char *codepage);
  3359. /*
  3360. * Worker function for creating a UnicodeString from
  3361. * a codepage string using a UConverter.
  3362. */
  3363. void
  3364. doCodepageCreate(const char *codepageData,
  3365. int32_t dataLength,
  3366. UConverter *converter,
  3367. UErrorCode &status);
  3368. #endif
  3369. /*
  3370. * This function is called when write access to the array
  3371. * is necessary.
  3372. *
  3373. * We need to make a copy of the array if
  3374. * the buffer is read-only, or
  3375. * the buffer is refCounted (shared), and refCount>1, or
  3376. * the buffer is too small.
  3377. *
  3378. * Return FALSE if memory could not be allocated.
  3379. */
  3380. UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
  3381. int32_t growCapacity = -1,
  3382. UBool doCopyArray = TRUE,
  3383. int32_t **pBufferToDelete = 0,
  3384. UBool forceClone = FALSE);
  3385. /**
  3386. * Common function for UnicodeString case mappings.
  3387. * The stringCaseMapper has the same type UStringCaseMapper
  3388. * as in ustr_imp.h for ustrcase_map().
  3389. */
  3390. UnicodeString &
  3391. caseMap(int32_t caseLocale, uint32_t options,
  3392. #if !UCONFIG_NO_BREAK_ITERATION
  3393. BreakIterator *iter,
  3394. #endif
  3395. UStringCaseMapper *stringCaseMapper);
  3396. // ref counting
  3397. void addRef(void);
  3398. int32_t removeRef(void);
  3399. int32_t refCount(void) const;
  3400. // constants
  3401. enum {
  3402. /**
  3403. * Size of stack buffer for short strings.
  3404. * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
  3405. * @see UNISTR_OBJECT_SIZE
  3406. */
  3407. US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
  3408. kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
  3409. kInvalidHashCode=0, // invalid hash code
  3410. kEmptyHashCode=1, // hash code for empty string
  3411. // bit flag values for fLengthAndFlags
  3412. kIsBogus=1, // this string is bogus, i.e., not valid or NULL
  3413. kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
  3414. kRefCounted=4, // there is a refCount field before the characters in fArray
  3415. kBufferIsReadonly=8,// do not write to this buffer
  3416. kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
  3417. // and releaseBuffer(newLength) must be called
  3418. kAllStorageFlags=0x1f,
  3419. kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
  3420. kLength1=1<<kLengthShift,
  3421. kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
  3422. kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
  3423. // combined values for convenience
  3424. kShortString=kUsingStackBuffer,
  3425. kLongString=kRefCounted,
  3426. kReadonlyAlias=kBufferIsReadonly,
  3427. kWritableAlias=0
  3428. };
  3429. friend class UnicodeStringAppendable;
  3430. union StackBufferOrFields; // forward declaration necessary before friend declaration
  3431. friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
  3432. /*
  3433. * The following are all the class fields that are stored
  3434. * in each UnicodeString object.
  3435. * Note that UnicodeString has virtual functions,
  3436. * therefore there is an implicit vtable pointer
  3437. * as the first real field.
  3438. * The fields should be aligned such that no padding is necessary.
  3439. * On 32-bit machines, the size should be 32 bytes,
  3440. * on 64-bit machines (8-byte pointers), it should be 40 bytes.
  3441. *
  3442. * We use a hack to achieve this.
  3443. *
  3444. * With at least some compilers, each of the following is forced to
  3445. * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
  3446. * rounded up with additional padding if the fields do not already fit that requirement:
  3447. * - sizeof(class UnicodeString)
  3448. * - offsetof(UnicodeString, fUnion)
  3449. * - sizeof(fUnion)
  3450. * - sizeof(fStackFields)
  3451. *
  3452. * We optimize for the longest possible internal buffer for short strings.
  3453. * fUnion.fStackFields begins with 2 bytes for storage flags
  3454. * and the length of relatively short strings,
  3455. * followed by the buffer for short string contents.
  3456. * There is no padding inside fStackFields.
  3457. *
  3458. * Heap-allocated and aliased strings use fUnion.fFields.
  3459. * Both fStackFields and fFields must begin with the same fields for flags and short length,
  3460. * that is, those must have the same memory offsets inside the object,
  3461. * because the flags must be inspected in order to decide which half of fUnion is being used.
  3462. * We assume that the compiler does not reorder the fields.
  3463. *
  3464. * (Padding at the end of fFields is ok:
  3465. * As long as it is no larger than fStackFields, it is not wasted space.)
  3466. *
  3467. * For some of the history of the UnicodeString class fields layout, see
  3468. * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
  3469. * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
  3470. * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
  3471. */
  3472. // (implicit) *vtable;
  3473. union StackBufferOrFields {
  3474. // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
  3475. // Each struct of the union must begin with fLengthAndFlags.
  3476. struct {
  3477. int16_t fLengthAndFlags; // bit fields: see constants above
  3478. char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
  3479. } fStackFields;
  3480. struct {
  3481. int16_t fLengthAndFlags; // bit fields: see constants above
  3482. int32_t fLength; // number of characters in fArray if >127; else undefined
  3483. int32_t fCapacity; // capacity of fArray (in char16_ts)
  3484. // array pointer last to minimize padding for machines with P128 data model
  3485. // or pointer sizes that are not a power of 2
  3486. char16_t *fArray; // the Unicode data
  3487. } fFields;
  3488. } fUnion;
  3489. };
  3490. /**
  3491. * Create a new UnicodeString with the concatenation of two others.
  3492. *
  3493. * @param s1 The first string to be copied to the new one.
  3494. * @param s2 The second string to be copied to the new one, after s1.
  3495. * @return UnicodeString(s1).append(s2)
  3496. * @stable ICU 2.8
  3497. */
  3498. U_COMMON_API UnicodeString U_EXPORT2
  3499. operator+ (const UnicodeString &s1, const UnicodeString &s2);
  3500. //========================================
  3501. // Inline members
  3502. //========================================
  3503. //========================================
  3504. // Privates
  3505. //========================================
  3506. inline void
  3507. UnicodeString::pinIndex(int32_t& start) const
  3508. {
  3509. // pin index
  3510. if(start < 0) {
  3511. start = 0;
  3512. } else if(start > length()) {
  3513. start = length();
  3514. }
  3515. }
  3516. inline void
  3517. UnicodeString::pinIndices(int32_t& start,
  3518. int32_t& _length) const
  3519. {
  3520. // pin indices
  3521. int32_t len = length();
  3522. if(start < 0) {
  3523. start = 0;
  3524. } else if(start > len) {
  3525. start = len;
  3526. }
  3527. if(_length < 0) {
  3528. _length = 0;
  3529. } else if(_length > (len - start)) {
  3530. _length = (len - start);
  3531. }
  3532. }
  3533. inline char16_t*
  3534. UnicodeString::getArrayStart() {
  3535. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3536. fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
  3537. }
  3538. inline const char16_t*
  3539. UnicodeString::getArrayStart() const {
  3540. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3541. fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
  3542. }
  3543. //========================================
  3544. // Default constructor
  3545. //========================================
  3546. inline
  3547. UnicodeString::UnicodeString() {
  3548. fUnion.fStackFields.fLengthAndFlags=kShortString;
  3549. }
  3550. inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
  3551. fUnion.fStackFields.fLengthAndFlags=kShortString;
  3552. }
  3553. inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
  3554. fUnion.fStackFields.fLengthAndFlags=kShortString;
  3555. }
  3556. inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
  3557. fUnion.fStackFields.fLengthAndFlags=kShortString;
  3558. }
  3559. //========================================
  3560. // Read-only implementation methods
  3561. //========================================
  3562. inline UBool
  3563. UnicodeString::hasShortLength() const {
  3564. return fUnion.fFields.fLengthAndFlags>=0;
  3565. }
  3566. inline int32_t
  3567. UnicodeString::getShortLength() const {
  3568. // fLengthAndFlags must be non-negative -> short length >= 0
  3569. // and arithmetic or logical shift does not matter.
  3570. return fUnion.fFields.fLengthAndFlags>>kLengthShift;
  3571. }
  3572. inline int32_t
  3573. UnicodeString::length() const {
  3574. return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
  3575. }
  3576. inline int32_t
  3577. UnicodeString::getCapacity() const {
  3578. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3579. US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
  3580. }
  3581. inline int32_t
  3582. UnicodeString::hashCode() const
  3583. { return doHashCode(); }
  3584. inline UBool
  3585. UnicodeString::isBogus() const
  3586. { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
  3587. inline UBool
  3588. UnicodeString::isWritable() const
  3589. { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
  3590. inline UBool
  3591. UnicodeString::isBufferWritable() const
  3592. {
  3593. return (UBool)(
  3594. !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
  3595. (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
  3596. }
  3597. inline const char16_t *
  3598. UnicodeString::getBuffer() const {
  3599. if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
  3600. return nullptr;
  3601. } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
  3602. return fUnion.fStackFields.fBuffer;
  3603. } else {
  3604. return fUnion.fFields.fArray;
  3605. }
  3606. }
  3607. //========================================
  3608. // Read-only alias methods
  3609. //========================================
  3610. inline int8_t
  3611. UnicodeString::doCompare(int32_t start,
  3612. int32_t thisLength,
  3613. const UnicodeString& srcText,
  3614. int32_t srcStart,
  3615. int32_t srcLength) const
  3616. {
  3617. if(srcText.isBogus()) {
  3618. return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3619. } else {
  3620. srcText.pinIndices(srcStart, srcLength);
  3621. return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  3622. }
  3623. }
  3624. inline UBool
  3625. UnicodeString::operator== (const UnicodeString& text) const
  3626. {
  3627. if(isBogus()) {
  3628. return text.isBogus();
  3629. } else {
  3630. int32_t len = length(), textLength = text.length();
  3631. return !text.isBogus() && len == textLength && doEquals(text, len);
  3632. }
  3633. }
  3634. inline UBool
  3635. UnicodeString::operator!= (const UnicodeString& text) const
  3636. { return (! operator==(text)); }
  3637. inline UBool
  3638. UnicodeString::operator> (const UnicodeString& text) const
  3639. { return doCompare(0, length(), text, 0, text.length()) == 1; }
  3640. inline UBool
  3641. UnicodeString::operator< (const UnicodeString& text) const
  3642. { return doCompare(0, length(), text, 0, text.length()) == -1; }
  3643. inline UBool
  3644. UnicodeString::operator>= (const UnicodeString& text) const
  3645. { return doCompare(0, length(), text, 0, text.length()) != -1; }
  3646. inline UBool
  3647. UnicodeString::operator<= (const UnicodeString& text) const
  3648. { return doCompare(0, length(), text, 0, text.length()) != 1; }
  3649. inline int8_t
  3650. UnicodeString::compare(const UnicodeString& text) const
  3651. { return doCompare(0, length(), text, 0, text.length()); }
  3652. inline int8_t
  3653. UnicodeString::compare(int32_t start,
  3654. int32_t _length,
  3655. const UnicodeString& srcText) const
  3656. { return doCompare(start, _length, srcText, 0, srcText.length()); }
  3657. inline int8_t
  3658. UnicodeString::compare(ConstChar16Ptr srcChars,
  3659. int32_t srcLength) const
  3660. { return doCompare(0, length(), srcChars, 0, srcLength); }
  3661. inline int8_t
  3662. UnicodeString::compare(int32_t start,
  3663. int32_t _length,
  3664. const UnicodeString& srcText,
  3665. int32_t srcStart,
  3666. int32_t srcLength) const
  3667. { return doCompare(start, _length, srcText, srcStart, srcLength); }
  3668. inline int8_t
  3669. UnicodeString::compare(int32_t start,
  3670. int32_t _length,
  3671. const char16_t *srcChars) const
  3672. { return doCompare(start, _length, srcChars, 0, _length); }
  3673. inline int8_t
  3674. UnicodeString::compare(int32_t start,
  3675. int32_t _length,
  3676. const char16_t *srcChars,
  3677. int32_t srcStart,
  3678. int32_t srcLength) const
  3679. { return doCompare(start, _length, srcChars, srcStart, srcLength); }
  3680. inline int8_t
  3681. UnicodeString::compareBetween(int32_t start,
  3682. int32_t limit,
  3683. const UnicodeString& srcText,
  3684. int32_t srcStart,
  3685. int32_t srcLimit) const
  3686. { return doCompare(start, limit - start,
  3687. srcText, srcStart, srcLimit - srcStart); }
  3688. inline int8_t
  3689. UnicodeString::doCompareCodePointOrder(int32_t start,
  3690. int32_t thisLength,
  3691. const UnicodeString& srcText,
  3692. int32_t srcStart,
  3693. int32_t srcLength) const
  3694. {
  3695. if(srcText.isBogus()) {
  3696. return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3697. } else {
  3698. srcText.pinIndices(srcStart, srcLength);
  3699. return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  3700. }
  3701. }
  3702. inline int8_t
  3703. UnicodeString::compareCodePointOrder(const UnicodeString& text) const
  3704. { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
  3705. inline int8_t
  3706. UnicodeString::compareCodePointOrder(int32_t start,
  3707. int32_t _length,
  3708. const UnicodeString& srcText) const
  3709. { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
  3710. inline int8_t
  3711. UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
  3712. int32_t srcLength) const
  3713. { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
  3714. inline int8_t
  3715. UnicodeString::compareCodePointOrder(int32_t start,
  3716. int32_t _length,
  3717. const UnicodeString& srcText,
  3718. int32_t srcStart,
  3719. int32_t srcLength) const
  3720. { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
  3721. inline int8_t
  3722. UnicodeString::compareCodePointOrder(int32_t start,
  3723. int32_t _length,
  3724. const char16_t *srcChars) const
  3725. { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
  3726. inline int8_t
  3727. UnicodeString::compareCodePointOrder(int32_t start,
  3728. int32_t _length,
  3729. const char16_t *srcChars,
  3730. int32_t srcStart,
  3731. int32_t srcLength) const
  3732. { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
  3733. inline int8_t
  3734. UnicodeString::compareCodePointOrderBetween(int32_t start,
  3735. int32_t limit,
  3736. const UnicodeString& srcText,
  3737. int32_t srcStart,
  3738. int32_t srcLimit) const
  3739. { return doCompareCodePointOrder(start, limit - start,
  3740. srcText, srcStart, srcLimit - srcStart); }
  3741. inline int8_t
  3742. UnicodeString::doCaseCompare(int32_t start,
  3743. int32_t thisLength,
  3744. const UnicodeString &srcText,
  3745. int32_t srcStart,
  3746. int32_t srcLength,
  3747. uint32_t options) const
  3748. {
  3749. if(srcText.isBogus()) {
  3750. return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3751. } else {
  3752. srcText.pinIndices(srcStart, srcLength);
  3753. return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
  3754. }
  3755. }
  3756. inline int8_t
  3757. UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
  3758. return doCaseCompare(0, length(), text, 0, text.length(), options);
  3759. }
  3760. inline int8_t
  3761. UnicodeString::caseCompare(int32_t start,
  3762. int32_t _length,
  3763. const UnicodeString &srcText,
  3764. uint32_t options) const {
  3765. return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
  3766. }
  3767. inline int8_t
  3768. UnicodeString::caseCompare(ConstChar16Ptr srcChars,
  3769. int32_t srcLength,
  3770. uint32_t options) const {
  3771. return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
  3772. }
  3773. inline int8_t
  3774. UnicodeString::caseCompare(int32_t start,
  3775. int32_t _length,
  3776. const UnicodeString &srcText,
  3777. int32_t srcStart,
  3778. int32_t srcLength,
  3779. uint32_t options) const {
  3780. return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
  3781. }
  3782. inline int8_t
  3783. UnicodeString::caseCompare(int32_t start,
  3784. int32_t _length,
  3785. const char16_t *srcChars,
  3786. uint32_t options) const {
  3787. return doCaseCompare(start, _length, srcChars, 0, _length, options);
  3788. }
  3789. inline int8_t
  3790. UnicodeString::caseCompare(int32_t start,
  3791. int32_t _length,
  3792. const char16_t *srcChars,
  3793. int32_t srcStart,
  3794. int32_t srcLength,
  3795. uint32_t options) const {
  3796. return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
  3797. }
  3798. inline int8_t
  3799. UnicodeString::caseCompareBetween(int32_t start,
  3800. int32_t limit,
  3801. const UnicodeString &srcText,
  3802. int32_t srcStart,
  3803. int32_t srcLimit,
  3804. uint32_t options) const {
  3805. return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
  3806. }
  3807. inline int32_t
  3808. UnicodeString::indexOf(const UnicodeString& srcText,
  3809. int32_t srcStart,
  3810. int32_t srcLength,
  3811. int32_t start,
  3812. int32_t _length) const
  3813. {
  3814. if(!srcText.isBogus()) {
  3815. srcText.pinIndices(srcStart, srcLength);
  3816. if(srcLength > 0) {
  3817. return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  3818. }
  3819. }
  3820. return -1;
  3821. }
  3822. inline int32_t
  3823. UnicodeString::indexOf(const UnicodeString& text) const
  3824. { return indexOf(text, 0, text.length(), 0, length()); }
  3825. inline int32_t
  3826. UnicodeString::indexOf(const UnicodeString& text,
  3827. int32_t start) const {
  3828. pinIndex(start);
  3829. return indexOf(text, 0, text.length(), start, length() - start);
  3830. }
  3831. inline int32_t
  3832. UnicodeString::indexOf(const UnicodeString& text,
  3833. int32_t start,
  3834. int32_t _length) const
  3835. { return indexOf(text, 0, text.length(), start, _length); }
  3836. inline int32_t
  3837. UnicodeString::indexOf(const char16_t *srcChars,
  3838. int32_t srcLength,
  3839. int32_t start) const {
  3840. pinIndex(start);
  3841. return indexOf(srcChars, 0, srcLength, start, length() - start);
  3842. }
  3843. inline int32_t
  3844. UnicodeString::indexOf(ConstChar16Ptr srcChars,
  3845. int32_t srcLength,
  3846. int32_t start,
  3847. int32_t _length) const
  3848. { return indexOf(srcChars, 0, srcLength, start, _length); }
  3849. inline int32_t
  3850. UnicodeString::indexOf(char16_t c,
  3851. int32_t start,
  3852. int32_t _length) const
  3853. { return doIndexOf(c, start, _length); }
  3854. inline int32_t
  3855. UnicodeString::indexOf(UChar32 c,
  3856. int32_t start,
  3857. int32_t _length) const
  3858. { return doIndexOf(c, start, _length); }
  3859. inline int32_t
  3860. UnicodeString::indexOf(char16_t c) const
  3861. { return doIndexOf(c, 0, length()); }
  3862. inline int32_t
  3863. UnicodeString::indexOf(UChar32 c) const
  3864. { return indexOf(c, 0, length()); }
  3865. inline int32_t
  3866. UnicodeString::indexOf(char16_t c,
  3867. int32_t start) const {
  3868. pinIndex(start);
  3869. return doIndexOf(c, start, length() - start);
  3870. }
  3871. inline int32_t
  3872. UnicodeString::indexOf(UChar32 c,
  3873. int32_t start) const {
  3874. pinIndex(start);
  3875. return indexOf(c, start, length() - start);
  3876. }
  3877. inline int32_t
  3878. UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
  3879. int32_t srcLength,
  3880. int32_t start,
  3881. int32_t _length) const
  3882. { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
  3883. inline int32_t
  3884. UnicodeString::lastIndexOf(const char16_t *srcChars,
  3885. int32_t srcLength,
  3886. int32_t start) const {
  3887. pinIndex(start);
  3888. return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
  3889. }
  3890. inline int32_t
  3891. UnicodeString::lastIndexOf(const UnicodeString& srcText,
  3892. int32_t srcStart,
  3893. int32_t srcLength,
  3894. int32_t start,
  3895. int32_t _length) const
  3896. {
  3897. if(!srcText.isBogus()) {
  3898. srcText.pinIndices(srcStart, srcLength);
  3899. if(srcLength > 0) {
  3900. return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  3901. }
  3902. }
  3903. return -1;
  3904. }
  3905. inline int32_t
  3906. UnicodeString::lastIndexOf(const UnicodeString& text,
  3907. int32_t start,
  3908. int32_t _length) const
  3909. { return lastIndexOf(text, 0, text.length(), start, _length); }
  3910. inline int32_t
  3911. UnicodeString::lastIndexOf(const UnicodeString& text,
  3912. int32_t start) const {
  3913. pinIndex(start);
  3914. return lastIndexOf(text, 0, text.length(), start, length() - start);
  3915. }
  3916. inline int32_t
  3917. UnicodeString::lastIndexOf(const UnicodeString& text) const
  3918. { return lastIndexOf(text, 0, text.length(), 0, length()); }
  3919. inline int32_t
  3920. UnicodeString::lastIndexOf(char16_t c,
  3921. int32_t start,
  3922. int32_t _length) const
  3923. { return doLastIndexOf(c, start, _length); }
  3924. inline int32_t
  3925. UnicodeString::lastIndexOf(UChar32 c,
  3926. int32_t start,
  3927. int32_t _length) const {
  3928. return doLastIndexOf(c, start, _length);
  3929. }
  3930. inline int32_t
  3931. UnicodeString::lastIndexOf(char16_t c) const
  3932. { return doLastIndexOf(c, 0, length()); }
  3933. inline int32_t
  3934. UnicodeString::lastIndexOf(UChar32 c) const {
  3935. return lastIndexOf(c, 0, length());
  3936. }
  3937. inline int32_t
  3938. UnicodeString::lastIndexOf(char16_t c,
  3939. int32_t start) const {
  3940. pinIndex(start);
  3941. return doLastIndexOf(c, start, length() - start);
  3942. }
  3943. inline int32_t
  3944. UnicodeString::lastIndexOf(UChar32 c,
  3945. int32_t start) const {
  3946. pinIndex(start);
  3947. return lastIndexOf(c, start, length() - start);
  3948. }
  3949. inline UBool
  3950. UnicodeString::startsWith(const UnicodeString& text) const
  3951. { return compare(0, text.length(), text, 0, text.length()) == 0; }
  3952. inline UBool
  3953. UnicodeString::startsWith(const UnicodeString& srcText,
  3954. int32_t srcStart,
  3955. int32_t srcLength) const
  3956. { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
  3957. inline UBool
  3958. UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
  3959. if(srcLength < 0) {
  3960. srcLength = u_strlen(toUCharPtr(srcChars));
  3961. }
  3962. return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
  3963. }
  3964. inline UBool
  3965. UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
  3966. if(srcLength < 0) {
  3967. srcLength = u_strlen(toUCharPtr(srcChars));
  3968. }
  3969. return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
  3970. }
  3971. inline UBool
  3972. UnicodeString::endsWith(const UnicodeString& text) const
  3973. { return doCompare(length() - text.length(), text.length(),
  3974. text, 0, text.length()) == 0; }
  3975. inline UBool
  3976. UnicodeString::endsWith(const UnicodeString& srcText,
  3977. int32_t srcStart,
  3978. int32_t srcLength) const {
  3979. srcText.pinIndices(srcStart, srcLength);
  3980. return doCompare(length() - srcLength, srcLength,
  3981. srcText, srcStart, srcLength) == 0;
  3982. }
  3983. inline UBool
  3984. UnicodeString::endsWith(ConstChar16Ptr srcChars,
  3985. int32_t srcLength) const {
  3986. if(srcLength < 0) {
  3987. srcLength = u_strlen(toUCharPtr(srcChars));
  3988. }
  3989. return doCompare(length() - srcLength, srcLength,
  3990. srcChars, 0, srcLength) == 0;
  3991. }
  3992. inline UBool
  3993. UnicodeString::endsWith(const char16_t *srcChars,
  3994. int32_t srcStart,
  3995. int32_t srcLength) const {
  3996. if(srcLength < 0) {
  3997. srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
  3998. }
  3999. return doCompare(length() - srcLength, srcLength,
  4000. srcChars, srcStart, srcLength) == 0;
  4001. }
  4002. //========================================
  4003. // replace
  4004. //========================================
  4005. inline UnicodeString&
  4006. UnicodeString::replace(int32_t start,
  4007. int32_t _length,
  4008. const UnicodeString& srcText)
  4009. { return doReplace(start, _length, srcText, 0, srcText.length()); }
  4010. inline UnicodeString&
  4011. UnicodeString::replace(int32_t start,
  4012. int32_t _length,
  4013. const UnicodeString& srcText,
  4014. int32_t srcStart,
  4015. int32_t srcLength)
  4016. { return doReplace(start, _length, srcText, srcStart, srcLength); }
  4017. inline UnicodeString&
  4018. UnicodeString::replace(int32_t start,
  4019. int32_t _length,
  4020. ConstChar16Ptr srcChars,
  4021. int32_t srcLength)
  4022. { return doReplace(start, _length, srcChars, 0, srcLength); }
  4023. inline UnicodeString&
  4024. UnicodeString::replace(int32_t start,
  4025. int32_t _length,
  4026. const char16_t *srcChars,
  4027. int32_t srcStart,
  4028. int32_t srcLength)
  4029. { return doReplace(start, _length, srcChars, srcStart, srcLength); }
  4030. inline UnicodeString&
  4031. UnicodeString::replace(int32_t start,
  4032. int32_t _length,
  4033. char16_t srcChar)
  4034. { return doReplace(start, _length, &srcChar, 0, 1); }
  4035. inline UnicodeString&
  4036. UnicodeString::replaceBetween(int32_t start,
  4037. int32_t limit,
  4038. const UnicodeString& srcText)
  4039. { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
  4040. inline UnicodeString&
  4041. UnicodeString::replaceBetween(int32_t start,
  4042. int32_t limit,
  4043. const UnicodeString& srcText,
  4044. int32_t srcStart,
  4045. int32_t srcLimit)
  4046. { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
  4047. inline UnicodeString&
  4048. UnicodeString::findAndReplace(const UnicodeString& oldText,
  4049. const UnicodeString& newText)
  4050. { return findAndReplace(0, length(), oldText, 0, oldText.length(),
  4051. newText, 0, newText.length()); }
  4052. inline UnicodeString&
  4053. UnicodeString::findAndReplace(int32_t start,
  4054. int32_t _length,
  4055. const UnicodeString& oldText,
  4056. const UnicodeString& newText)
  4057. { return findAndReplace(start, _length, oldText, 0, oldText.length(),
  4058. newText, 0, newText.length()); }
  4059. // ============================
  4060. // extract
  4061. // ============================
  4062. inline void
  4063. UnicodeString::doExtract(int32_t start,
  4064. int32_t _length,
  4065. UnicodeString& target) const
  4066. { target.replace(0, target.length(), *this, start, _length); }
  4067. inline void
  4068. UnicodeString::extract(int32_t start,
  4069. int32_t _length,
  4070. Char16Ptr target,
  4071. int32_t targetStart) const
  4072. { doExtract(start, _length, target, targetStart); }
  4073. inline void
  4074. UnicodeString::extract(int32_t start,
  4075. int32_t _length,
  4076. UnicodeString& target) const
  4077. { doExtract(start, _length, target); }
  4078. #if !UCONFIG_NO_CONVERSION
  4079. inline int32_t
  4080. UnicodeString::extract(int32_t start,
  4081. int32_t _length,
  4082. char *dst,
  4083. const char *codepage) const
  4084. {
  4085. // This dstSize value will be checked explicitly
  4086. return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
  4087. }
  4088. #endif
  4089. inline void
  4090. UnicodeString::extractBetween(int32_t start,
  4091. int32_t limit,
  4092. char16_t *dst,
  4093. int32_t dstStart) const {
  4094. pinIndex(start);
  4095. pinIndex(limit);
  4096. doExtract(start, limit - start, dst, dstStart);
  4097. }
  4098. inline UnicodeString
  4099. UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
  4100. return tempSubString(start, limit - start);
  4101. }
  4102. inline char16_t
  4103. UnicodeString::doCharAt(int32_t offset) const
  4104. {
  4105. if((uint32_t)offset < (uint32_t)length()) {
  4106. return getArrayStart()[offset];
  4107. } else {
  4108. return kInvalidUChar;
  4109. }
  4110. }
  4111. inline char16_t
  4112. UnicodeString::charAt(int32_t offset) const
  4113. { return doCharAt(offset); }
  4114. inline char16_t
  4115. UnicodeString::operator[] (int32_t offset) const
  4116. { return doCharAt(offset); }
  4117. inline UBool
  4118. UnicodeString::isEmpty() const {
  4119. // Arithmetic or logical right shift does not matter: only testing for 0.
  4120. return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
  4121. }
  4122. //========================================
  4123. // Write implementation methods
  4124. //========================================
  4125. inline void
  4126. UnicodeString::setZeroLength() {
  4127. fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
  4128. }
  4129. inline void
  4130. UnicodeString::setShortLength(int32_t len) {
  4131. // requires 0 <= len <= kMaxShortLength
  4132. fUnion.fFields.fLengthAndFlags =
  4133. (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
  4134. }
  4135. inline void
  4136. UnicodeString::setLength(int32_t len) {
  4137. if(len <= kMaxShortLength) {
  4138. setShortLength(len);
  4139. } else {
  4140. fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
  4141. fUnion.fFields.fLength = len;
  4142. }
  4143. }
  4144. inline void
  4145. UnicodeString::setToEmpty() {
  4146. fUnion.fFields.fLengthAndFlags = kShortString;
  4147. }
  4148. inline void
  4149. UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
  4150. setLength(len);
  4151. fUnion.fFields.fArray = array;
  4152. fUnion.fFields.fCapacity = capacity;
  4153. }
  4154. inline UnicodeString&
  4155. UnicodeString::operator= (char16_t ch)
  4156. { return doReplace(0, length(), &ch, 0, 1); }
  4157. inline UnicodeString&
  4158. UnicodeString::operator= (UChar32 ch)
  4159. { return replace(0, length(), ch); }
  4160. inline UnicodeString&
  4161. UnicodeString::setTo(const UnicodeString& srcText,
  4162. int32_t srcStart,
  4163. int32_t srcLength)
  4164. {
  4165. unBogus();
  4166. return doReplace(0, length(), srcText, srcStart, srcLength);
  4167. }
  4168. inline UnicodeString&
  4169. UnicodeString::setTo(const UnicodeString& srcText,
  4170. int32_t srcStart)
  4171. {
  4172. unBogus();
  4173. srcText.pinIndex(srcStart);
  4174. return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
  4175. }
  4176. inline UnicodeString&
  4177. UnicodeString::setTo(const UnicodeString& srcText)
  4178. {
  4179. return copyFrom(srcText);
  4180. }
  4181. inline UnicodeString&
  4182. UnicodeString::setTo(const char16_t *srcChars,
  4183. int32_t srcLength)
  4184. {
  4185. unBogus();
  4186. return doReplace(0, length(), srcChars, 0, srcLength);
  4187. }
  4188. inline UnicodeString&
  4189. UnicodeString::setTo(char16_t srcChar)
  4190. {
  4191. unBogus();
  4192. return doReplace(0, length(), &srcChar, 0, 1);
  4193. }
  4194. inline UnicodeString&
  4195. UnicodeString::setTo(UChar32 srcChar)
  4196. {
  4197. unBogus();
  4198. return replace(0, length(), srcChar);
  4199. }
  4200. inline UnicodeString&
  4201. UnicodeString::append(const UnicodeString& srcText,
  4202. int32_t srcStart,
  4203. int32_t srcLength)
  4204. { return doAppend(srcText, srcStart, srcLength); }
  4205. inline UnicodeString&
  4206. UnicodeString::append(const UnicodeString& srcText)
  4207. { return doAppend(srcText, 0, srcText.length()); }
  4208. inline UnicodeString&
  4209. UnicodeString::append(const char16_t *srcChars,
  4210. int32_t srcStart,
  4211. int32_t srcLength)
  4212. { return doAppend(srcChars, srcStart, srcLength); }
  4213. inline UnicodeString&
  4214. UnicodeString::append(ConstChar16Ptr srcChars,
  4215. int32_t srcLength)
  4216. { return doAppend(srcChars, 0, srcLength); }
  4217. inline UnicodeString&
  4218. UnicodeString::append(char16_t srcChar)
  4219. { return doAppend(&srcChar, 0, 1); }
  4220. inline UnicodeString&
  4221. UnicodeString::operator+= (char16_t ch)
  4222. { return doAppend(&ch, 0, 1); }
  4223. inline UnicodeString&
  4224. UnicodeString::operator+= (UChar32 ch) {
  4225. return append(ch);
  4226. }
  4227. inline UnicodeString&
  4228. UnicodeString::operator+= (const UnicodeString& srcText)
  4229. { return doAppend(srcText, 0, srcText.length()); }
  4230. inline UnicodeString&
  4231. UnicodeString::insert(int32_t start,
  4232. const UnicodeString& srcText,
  4233. int32_t srcStart,
  4234. int32_t srcLength)
  4235. { return doReplace(start, 0, srcText, srcStart, srcLength); }
  4236. inline UnicodeString&
  4237. UnicodeString::insert(int32_t start,
  4238. const UnicodeString& srcText)
  4239. { return doReplace(start, 0, srcText, 0, srcText.length()); }
  4240. inline UnicodeString&
  4241. UnicodeString::insert(int32_t start,
  4242. const char16_t *srcChars,
  4243. int32_t srcStart,
  4244. int32_t srcLength)
  4245. { return doReplace(start, 0, srcChars, srcStart, srcLength); }
  4246. inline UnicodeString&
  4247. UnicodeString::insert(int32_t start,
  4248. ConstChar16Ptr srcChars,
  4249. int32_t srcLength)
  4250. { return doReplace(start, 0, srcChars, 0, srcLength); }
  4251. inline UnicodeString&
  4252. UnicodeString::insert(int32_t start,
  4253. char16_t srcChar)
  4254. { return doReplace(start, 0, &srcChar, 0, 1); }
  4255. inline UnicodeString&
  4256. UnicodeString::insert(int32_t start,
  4257. UChar32 srcChar)
  4258. { return replace(start, 0, srcChar); }
  4259. inline UnicodeString&
  4260. UnicodeString::remove()
  4261. {
  4262. // remove() of a bogus string makes the string empty and non-bogus
  4263. if(isBogus()) {
  4264. setToEmpty();
  4265. } else {
  4266. setZeroLength();
  4267. }
  4268. return *this;
  4269. }
  4270. inline UnicodeString&
  4271. UnicodeString::remove(int32_t start,
  4272. int32_t _length)
  4273. {
  4274. if(start <= 0 && _length == INT32_MAX) {
  4275. // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
  4276. return remove();
  4277. }
  4278. return doReplace(start, _length, NULL, 0, 0);
  4279. }
  4280. inline UnicodeString&
  4281. UnicodeString::removeBetween(int32_t start,
  4282. int32_t limit)
  4283. { return doReplace(start, limit - start, NULL, 0, 0); }
  4284. inline UnicodeString &
  4285. UnicodeString::retainBetween(int32_t start, int32_t limit) {
  4286. truncate(limit);
  4287. return doReplace(0, start, NULL, 0, 0);
  4288. }
  4289. inline UBool
  4290. UnicodeString::truncate(int32_t targetLength)
  4291. {
  4292. if(isBogus() && targetLength == 0) {
  4293. // truncate(0) of a bogus string makes the string empty and non-bogus
  4294. unBogus();
  4295. return FALSE;
  4296. } else if((uint32_t)targetLength < (uint32_t)length()) {
  4297. setLength(targetLength);
  4298. return TRUE;
  4299. } else {
  4300. return FALSE;
  4301. }
  4302. }
  4303. inline UnicodeString&
  4304. UnicodeString::reverse()
  4305. { return doReverse(0, length()); }
  4306. inline UnicodeString&
  4307. UnicodeString::reverse(int32_t start,
  4308. int32_t _length)
  4309. { return doReverse(start, _length); }
  4310. U_NAMESPACE_END
  4311. #endif /* U_SHOW_CPLUSPLUS_API */
  4312. #endif