stringpiece.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. // Copyright (C) 2009-2013, International Business Machines
  4. // Corporation and others. All Rights Reserved.
  5. //
  6. // Copyright 2001 and onwards Google Inc.
  7. // Author: Sanjay Ghemawat
  8. // This code is a contribution of Google code, and the style used here is
  9. // a compromise between the original Google code and the ICU coding guidelines.
  10. // For example, data types are ICU-ified (size_t,int->int32_t),
  11. // and API comments doxygen-ified, but function names and behavior are
  12. // as in the original, if possible.
  13. // Assertion-style error handling, not available in ICU, was changed to
  14. // parameter "pinning" similar to UnicodeString.
  15. //
  16. // In addition, this is only a partial port of the original Google code,
  17. // limited to what was needed so far. The (nearly) complete original code
  18. // is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
  19. // (see ICU ticket 6765, r25517).
  20. #ifndef __STRINGPIECE_H__
  21. #define __STRINGPIECE_H__
  22. /**
  23. * \file
  24. * \brief C++ API: StringPiece: Read-only byte string wrapper class.
  25. */
  26. #include "unicode/utypes.h"
  27. #if U_SHOW_CPLUSPLUS_API
  28. #include <cstddef>
  29. #include <type_traits>
  30. #include "unicode/uobject.h"
  31. #include "unicode/std_string.h"
  32. // Arghh! I wish C++ literals were "string".
  33. U_NAMESPACE_BEGIN
  34. /**
  35. * A string-like object that points to a sized piece of memory.
  36. *
  37. * We provide non-explicit singleton constructors so users can pass
  38. * in a "const char*" or a "string" wherever a "StringPiece" is
  39. * expected.
  40. *
  41. * Functions or methods may use StringPiece parameters to accept either a
  42. * "const char*" or a "string" value that will be implicitly converted to a
  43. * StringPiece.
  44. *
  45. * Systematic usage of StringPiece is encouraged as it will reduce unnecessary
  46. * conversions from "const char*" to "string" and back again.
  47. *
  48. * @stable ICU 4.2
  49. */
  50. class U_COMMON_API StringPiece : public UMemory {
  51. private:
  52. const char* ptr_;
  53. int32_t length_;
  54. public:
  55. /**
  56. * Default constructor, creates an empty StringPiece.
  57. * @stable ICU 4.2
  58. */
  59. StringPiece() : ptr_(nullptr), length_(0) { }
  60. /**
  61. * Constructs from a NUL-terminated const char * pointer.
  62. * @param str a NUL-terminated const char * pointer
  63. * @stable ICU 4.2
  64. */
  65. StringPiece(const char* str);
  66. #ifndef U_HIDE_DRAFT_API
  67. #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
  68. /**
  69. * Constructs from a NUL-terminated const char8_t * pointer.
  70. * @param str a NUL-terminated const char8_t * pointer
  71. * @draft ICU 67
  72. */
  73. StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
  74. #endif
  75. /**
  76. * Constructs an empty StringPiece.
  77. * Needed for type disambiguation from multiple other overloads.
  78. * @param p nullptr
  79. * @draft ICU 67
  80. */
  81. StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
  82. #endif // U_HIDE_DRAFT_API
  83. /**
  84. * Constructs from a std::string.
  85. * @stable ICU 4.2
  86. */
  87. StringPiece(const std::string& str)
  88. : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
  89. #ifndef U_HIDE_DRAFT_API
  90. #if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
  91. /**
  92. * Constructs from a std::u8string.
  93. * @draft ICU 67
  94. */
  95. StringPiece(const std::u8string& str)
  96. : ptr_(reinterpret_cast<const char*>(str.data())),
  97. length_(static_cast<int32_t>(str.size())) { }
  98. #endif
  99. #endif // U_HIDE_DRAFT_API
  100. #ifndef U_HIDE_DRAFT_API
  101. /**
  102. * Constructs from some other implementation of a string piece class, from any
  103. * C++ record type that has these two methods:
  104. *
  105. * \code{.cpp}
  106. *
  107. * struct OtherStringPieceClass {
  108. * const char* data(); // or const char8_t*
  109. * size_t size();
  110. * };
  111. *
  112. * \endcode
  113. *
  114. * The other string piece class will typically be std::string_view from C++17
  115. * or absl::string_view from Abseil.
  116. *
  117. * Starting with C++20, data() may also return a const char8_t* pointer,
  118. * as from std::u8string_view.
  119. *
  120. * @param str the other string piece
  121. * @draft ICU 65
  122. */
  123. template <typename T,
  124. typename = typename std::enable_if<
  125. (std::is_same<decltype(T().data()), const char*>::value
  126. #if defined(__cpp_char8_t)
  127. || std::is_same<decltype(T().data()), const char8_t*>::value
  128. #endif
  129. ) &&
  130. std::is_same<decltype(T().size()), size_t>::value>::type>
  131. StringPiece(T str)
  132. : ptr_(reinterpret_cast<const char*>(str.data())),
  133. length_(static_cast<int32_t>(str.size())) {}
  134. #endif // U_HIDE_DRAFT_API
  135. /**
  136. * Constructs from a const char * pointer and a specified length.
  137. * @param offset a const char * pointer (need not be terminated)
  138. * @param len the length of the string; must be non-negative
  139. * @stable ICU 4.2
  140. */
  141. StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
  142. #ifndef U_HIDE_DRAFT_API
  143. #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
  144. /**
  145. * Constructs from a const char8_t * pointer and a specified length.
  146. * @param str a const char8_t * pointer (need not be terminated)
  147. * @param len the length of the string; must be non-negative
  148. * @draft ICU 67
  149. */
  150. StringPiece(const char8_t* str, int32_t len) :
  151. StringPiece(reinterpret_cast<const char*>(str), len) {}
  152. #endif
  153. #endif // U_HIDE_DRAFT_API
  154. /**
  155. * Substring of another StringPiece.
  156. * @param x the other StringPiece
  157. * @param pos start position in x; must be non-negative and <= x.length().
  158. * @stable ICU 4.2
  159. */
  160. StringPiece(const StringPiece& x, int32_t pos);
  161. /**
  162. * Substring of another StringPiece.
  163. * @param x the other StringPiece
  164. * @param pos start position in x; must be non-negative and <= x.length().
  165. * @param len length of the substring;
  166. * must be non-negative and will be pinned to at most x.length() - pos.
  167. * @stable ICU 4.2
  168. */
  169. StringPiece(const StringPiece& x, int32_t pos, int32_t len);
  170. /**
  171. * Returns the string pointer. May be nullptr if it is empty.
  172. *
  173. * data() may return a pointer to a buffer with embedded NULs, and the
  174. * returned buffer may or may not be null terminated. Therefore it is
  175. * typically a mistake to pass data() to a routine that expects a NUL
  176. * terminated string.
  177. * @return the string pointer
  178. * @stable ICU 4.2
  179. */
  180. const char* data() const { return ptr_; }
  181. /**
  182. * Returns the string length. Same as length().
  183. * @return the string length
  184. * @stable ICU 4.2
  185. */
  186. int32_t size() const { return length_; }
  187. /**
  188. * Returns the string length. Same as size().
  189. * @return the string length
  190. * @stable ICU 4.2
  191. */
  192. int32_t length() const { return length_; }
  193. /**
  194. * Returns whether the string is empty.
  195. * @return TRUE if the string is empty
  196. * @stable ICU 4.2
  197. */
  198. UBool empty() const { return length_ == 0; }
  199. /**
  200. * Sets to an empty string.
  201. * @stable ICU 4.2
  202. */
  203. void clear() { ptr_ = nullptr; length_ = 0; }
  204. /**
  205. * Reset the stringpiece to refer to new data.
  206. * @param xdata pointer the new string data. Need not be nul terminated.
  207. * @param len the length of the new data
  208. * @stable ICU 4.8
  209. */
  210. void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
  211. /**
  212. * Reset the stringpiece to refer to new data.
  213. * @param str a pointer to a NUL-terminated string.
  214. * @stable ICU 4.8
  215. */
  216. void set(const char* str);
  217. #ifndef U_HIDE_DRAFT_API
  218. #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
  219. /**
  220. * Resets the stringpiece to refer to new data.
  221. * @param xdata pointer the new string data. Need not be NUL-terminated.
  222. * @param len the length of the new data
  223. * @draft ICU 67
  224. */
  225. inline void set(const char8_t* xdata, int32_t len) {
  226. set(reinterpret_cast<const char*>(xdata), len);
  227. }
  228. /**
  229. * Resets the stringpiece to refer to new data.
  230. * @param str a pointer to a NUL-terminated string.
  231. * @draft ICU 67
  232. */
  233. inline void set(const char8_t* str) {
  234. set(reinterpret_cast<const char*>(str));
  235. }
  236. #endif
  237. #endif // U_HIDE_DRAFT_API
  238. /**
  239. * Removes the first n string units.
  240. * @param n prefix length, must be non-negative and <=length()
  241. * @stable ICU 4.2
  242. */
  243. void remove_prefix(int32_t n) {
  244. if (n >= 0) {
  245. if (n > length_) {
  246. n = length_;
  247. }
  248. ptr_ += n;
  249. length_ -= n;
  250. }
  251. }
  252. /**
  253. * Removes the last n string units.
  254. * @param n suffix length, must be non-negative and <=length()
  255. * @stable ICU 4.2
  256. */
  257. void remove_suffix(int32_t n) {
  258. if (n >= 0) {
  259. if (n <= length_) {
  260. length_ -= n;
  261. } else {
  262. length_ = 0;
  263. }
  264. }
  265. }
  266. #ifndef U_HIDE_DRAFT_API
  267. /**
  268. * Searches the StringPiece for the given search string (needle);
  269. * @param needle The string for which to search.
  270. * @param offset Where to start searching within this string (haystack).
  271. * @return The offset of needle in haystack, or -1 if not found.
  272. * @draft ICU 67
  273. */
  274. int32_t find(StringPiece needle, int32_t offset);
  275. /**
  276. * Compares this StringPiece with the other StringPiece, with semantics
  277. * similar to std::string::compare().
  278. * @param other The string to compare to.
  279. * @return below zero if this < other; above zero if this > other; 0 if this == other.
  280. * @draft ICU 67
  281. */
  282. int32_t compare(StringPiece other);
  283. #endif // U_HIDE_DRAFT_API
  284. /**
  285. * Maximum integer, used as a default value for substring methods.
  286. * @stable ICU 4.2
  287. */
  288. static const int32_t npos; // = 0x7fffffff;
  289. /**
  290. * Returns a substring of this StringPiece.
  291. * @param pos start position; must be non-negative and <= length().
  292. * @param len length of the substring;
  293. * must be non-negative and will be pinned to at most length() - pos.
  294. * @return the substring StringPiece
  295. * @stable ICU 4.2
  296. */
  297. StringPiece substr(int32_t pos, int32_t len = npos) const {
  298. return StringPiece(*this, pos, len);
  299. }
  300. };
  301. /**
  302. * Global operator == for StringPiece
  303. * @param x The first StringPiece to compare.
  304. * @param y The second StringPiece to compare.
  305. * @return TRUE if the string data is equal
  306. * @stable ICU 4.8
  307. */
  308. U_EXPORT UBool U_EXPORT2
  309. operator==(const StringPiece& x, const StringPiece& y);
  310. /**
  311. * Global operator != for StringPiece
  312. * @param x The first StringPiece to compare.
  313. * @param y The second StringPiece to compare.
  314. * @return TRUE if the string data is not equal
  315. * @stable ICU 4.8
  316. */
  317. inline UBool operator!=(const StringPiece& x, const StringPiece& y) {
  318. return !(x == y);
  319. }
  320. U_NAMESPACE_END
  321. #endif /* U_SHOW_CPLUSPLUS_API */
  322. #endif // __STRINGPIECE_H__