convert.hpp 3.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. //
  2. // Copyright (c) 2012 Artyom Beilis (Tonkikh)
  3. // Copyright (c) 2020 Alexander Grund
  4. //
  5. // Distributed under the Boost Software License, Version 1.0. (See
  6. // accompanying file LICENSE or copy at
  7. // http://www.boost.org/LICENSE_1_0.txt)
  8. //
  9. #ifndef BOOST_NOWIDE_UTF_CONVERT_HPP_INCLUDED
  10. #define BOOST_NOWIDE_UTF_CONVERT_HPP_INCLUDED
  11. #include <boost/nowide/detail/is_string_container.hpp>
  12. #include <boost/nowide/replacement.hpp>
  13. #include <boost/nowide/utf/utf.hpp>
  14. #include <iterator>
  15. #include <string>
  16. namespace boost {
  17. namespace nowide {
  18. namespace utf {
  19. /// Return the length of the given string in code units.
  20. /// That is the number of elements of type Char until the first NULL character.
  21. /// Equivalent to `std::strlen(s)` but can handle wide-strings
  22. template<typename Char>
  23. size_t strlen(const Char* s)
  24. {
  25. const Char* end = s;
  26. while(*end)
  27. end++;
  28. return end - s;
  29. }
  30. /// Convert a buffer of UTF sequences in the range [source_begin, source_end)
  31. /// from \a CharIn to \a CharOut to the output \a buffer of size \a buffer_size.
  32. ///
  33. /// \return original buffer containing the NULL terminated string or NULL
  34. ///
  35. /// If there is not enough room in the buffer NULL is returned, and the content of the buffer is undefined.
  36. /// Any illegal sequences are replaced with the replacement character, see #BOOST_NOWIDE_REPLACEMENT_CHARACTER
  37. template<typename CharOut, typename CharIn>
  38. CharOut*
  39. convert_buffer(CharOut* buffer, size_t buffer_size, const CharIn* source_begin, const CharIn* source_end)
  40. {
  41. CharOut* rv = buffer;
  42. if(buffer_size == 0)
  43. return nullptr;
  44. buffer_size--;
  45. while(source_begin != source_end)
  46. {
  47. code_point c = utf_traits<CharIn>::decode(source_begin, source_end);
  48. if(c == illegal || c == incomplete)
  49. {
  50. c = BOOST_NOWIDE_REPLACEMENT_CHARACTER;
  51. }
  52. size_t width = utf_traits<CharOut>::width(c);
  53. if(buffer_size < width)
  54. {
  55. rv = NULL;
  56. break;
  57. }
  58. buffer = utf_traits<CharOut>::encode(c, buffer);
  59. buffer_size -= width;
  60. }
  61. *buffer++ = 0;
  62. return rv;
  63. }
  64. /// Convert the UTF sequences in range [begin, end) from \a CharIn to \a CharOut
  65. /// and return it as a string
  66. ///
  67. /// Any illegal sequences are replaced with the replacement character, see #BOOST_NOWIDE_REPLACEMENT_CHARACTER
  68. /// \tparam CharOut Output character type
  69. template<typename CharOut, typename CharIn>
  70. std::basic_string<CharOut> convert_string(const CharIn* begin, const CharIn* end)
  71. {
  72. std::basic_string<CharOut> result;
  73. result.reserve(end - begin);
  74. using inserter_type = std::back_insert_iterator<std::basic_string<CharOut>>;
  75. inserter_type inserter(result);
  76. code_point c;
  77. while(begin != end)
  78. {
  79. c = utf_traits<CharIn>::decode(begin, end);
  80. if(c == illegal || c == incomplete)
  81. {
  82. c = BOOST_NOWIDE_REPLACEMENT_CHARACTER;
  83. }
  84. utf_traits<CharOut>::encode(c, inserter);
  85. }
  86. return result;
  87. }
  88. } // namespace utf
  89. } // namespace nowide
  90. } // namespace boost
  91. #endif