utf8.hpp 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. /*=============================================================================
  2. Copyright (c) 2001-2011 Joel de Guzman
  3. Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. ==============================================================================*/
  6. #if !defined(BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM)
  7. #define BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM
  8. #if defined(_MSC_VER)
  9. #pragma once
  10. #endif
  11. #include <boost/cstdint.hpp>
  12. #include <boost/foreach.hpp>
  13. #include <boost/regex/pending/unicode_iterator.hpp>
  14. #include <boost/type_traits/make_unsigned.hpp>
  15. #include <string>
  16. namespace boost { namespace spirit
  17. {
  18. typedef ::boost::uint32_t ucs4_char;
  19. typedef char utf8_char;
  20. typedef std::basic_string<ucs4_char> ucs4_string;
  21. typedef std::basic_string<utf8_char> utf8_string;
  22. template <typename Char>
  23. inline utf8_string to_utf8(Char value)
  24. {
  25. // always store as UTF8
  26. utf8_string result;
  27. typedef std::back_insert_iterator<utf8_string> insert_iter;
  28. insert_iter out_iter(result);
  29. utf8_output_iterator<insert_iter> utf8_iter(out_iter);
  30. typedef typename make_unsigned<Char>::type UChar;
  31. *utf8_iter = (UChar)value;
  32. return result;
  33. }
  34. template <typename Char>
  35. inline utf8_string to_utf8(Char const* str)
  36. {
  37. // always store as UTF8
  38. utf8_string result;
  39. typedef std::back_insert_iterator<utf8_string> insert_iter;
  40. insert_iter out_iter(result);
  41. utf8_output_iterator<insert_iter> utf8_iter(out_iter);
  42. typedef typename make_unsigned<Char>::type UChar;
  43. while (*str)
  44. *utf8_iter++ = (UChar)*str++;
  45. return result;
  46. }
  47. template <typename Char, typename Traits, typename Allocator>
  48. inline utf8_string
  49. to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
  50. {
  51. // always store as UTF8
  52. utf8_string result;
  53. typedef std::back_insert_iterator<utf8_string> insert_iter;
  54. insert_iter out_iter(result);
  55. utf8_output_iterator<insert_iter> utf8_iter(out_iter);
  56. typedef typename make_unsigned<Char>::type UChar;
  57. BOOST_FOREACH(Char ch, str)
  58. {
  59. *utf8_iter++ = (UChar)ch;
  60. }
  61. return result;
  62. }
  63. // Assume wchar_t content is UTF-16 on MSVC, or mingw/wineg++ with -fshort-wchar
  64. #if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2
  65. inline utf8_string to_utf8(wchar_t value)
  66. {
  67. utf8_string result;
  68. typedef std::back_insert_iterator<utf8_string> insert_iter;
  69. insert_iter out_iter(result);
  70. utf8_output_iterator<insert_iter> utf8_iter(out_iter);
  71. u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(&value);
  72. *utf8_iter++ = *ucs4_iter;
  73. return result;
  74. }
  75. inline utf8_string to_utf8(wchar_t const* str)
  76. {
  77. utf8_string result;
  78. typedef std::back_insert_iterator<utf8_string> insert_iter;
  79. insert_iter out_iter(result);
  80. utf8_output_iterator<insert_iter> utf8_iter(out_iter);
  81. u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(str);
  82. for (ucs4_char c; (c = *ucs4_iter) != ucs4_char(); ++ucs4_iter) {
  83. *utf8_iter++ = c;
  84. }
  85. return result;
  86. }
  87. template <typename Traits, typename Allocator>
  88. inline utf8_string
  89. to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str)
  90. {
  91. return to_utf8(str.c_str());
  92. }
  93. #endif
  94. }}
  95. #endif