c_regex_traits.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE c_regex_traits.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares regular expression traits class that wraps the global C locale.
  16. */
  17. #ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED
  18. #define BOOST_C_REGEX_TRAITS_HPP_INCLUDED
  19. #include <boost/regex/config.hpp>
  20. #include <boost/regex/v5/regex_workaround.hpp>
  21. #include <cctype>
  22. namespace boost{
  23. namespace BOOST_REGEX_DETAIL_NS {
  24. enum
  25. {
  26. char_class_space = 1 << 0,
  27. char_class_print = 1 << 1,
  28. char_class_cntrl = 1 << 2,
  29. char_class_upper = 1 << 3,
  30. char_class_lower = 1 << 4,
  31. char_class_alpha = 1 << 5,
  32. char_class_digit = 1 << 6,
  33. char_class_punct = 1 << 7,
  34. char_class_xdigit = 1 << 8,
  35. char_class_alnum = char_class_alpha | char_class_digit,
  36. char_class_graph = char_class_alnum | char_class_punct,
  37. char_class_blank = 1 << 9,
  38. char_class_word = 1 << 10,
  39. char_class_unicode = 1 << 11,
  40. char_class_horizontal = 1 << 12,
  41. char_class_vertical = 1 << 13
  42. };
  43. }
  44. template <class charT>
  45. struct c_regex_traits;
  46. template<>
  47. struct c_regex_traits<char>
  48. {
  49. c_regex_traits(){}
  50. typedef char char_type;
  51. typedef std::size_t size_type;
  52. typedef std::string string_type;
  53. struct locale_type{};
  54. typedef std::uint32_t char_class_type;
  55. static size_type length(const char_type* p)
  56. {
  57. return (std::strlen)(p);
  58. }
  59. char translate(char c) const
  60. {
  61. return c;
  62. }
  63. char translate_nocase(char c) const
  64. {
  65. return static_cast<char>((std::tolower)(static_cast<unsigned char>(c)));
  66. }
  67. static string_type transform(const char* p1, const char* p2);
  68. static string_type transform_primary(const char* p1, const char* p2);
  69. static char_class_type lookup_classname(const char* p1, const char* p2);
  70. static string_type lookup_collatename(const char* p1, const char* p2);
  71. static bool isctype(char, char_class_type);
  72. static int value(char, int);
  73. locale_type imbue(locale_type l)
  74. { return l; }
  75. locale_type getloc()const
  76. { return locale_type(); }
  77. private:
  78. // this type is not copyable:
  79. c_regex_traits(const c_regex_traits&);
  80. c_regex_traits& operator=(const c_regex_traits&);
  81. };
  82. #ifndef BOOST_NO_WREGEX
  83. template<>
  84. struct c_regex_traits<wchar_t>
  85. {
  86. c_regex_traits(){}
  87. typedef wchar_t char_type;
  88. typedef std::size_t size_type;
  89. typedef std::wstring string_type;
  90. struct locale_type{};
  91. typedef std::uint32_t char_class_type;
  92. static size_type length(const char_type* p)
  93. {
  94. return (std::wcslen)(p);
  95. }
  96. wchar_t translate(wchar_t c) const
  97. {
  98. return c;
  99. }
  100. wchar_t translate_nocase(wchar_t c) const
  101. {
  102. return (std::towlower)(c);
  103. }
  104. static string_type transform(const wchar_t* p1, const wchar_t* p2);
  105. static string_type transform_primary(const wchar_t* p1, const wchar_t* p2);
  106. static char_class_type lookup_classname(const wchar_t* p1, const wchar_t* p2);
  107. static string_type lookup_collatename(const wchar_t* p1, const wchar_t* p2);
  108. static bool isctype(wchar_t, char_class_type);
  109. static int value(wchar_t, int);
  110. locale_type imbue(locale_type l)
  111. { return l; }
  112. locale_type getloc()const
  113. { return locale_type(); }
  114. private:
  115. // this type is not copyable:
  116. c_regex_traits(const c_regex_traits&);
  117. c_regex_traits& operator=(const c_regex_traits&);
  118. };
  119. #endif // BOOST_NO_WREGEX
  120. inline c_regex_traits<char>::string_type c_regex_traits<char>::transform(const char* p1, const char* p2)
  121. {
  122. std::string result(10, ' ');
  123. std::size_t s = result.size();
  124. std::size_t r;
  125. std::string src(p1, p2);
  126. while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s)))
  127. {
  128. #if defined(_CPPLIB_VER)
  129. //
  130. // A bug in VC11 and 12 causes the program to hang if we pass a null-string
  131. // to std::strxfrm, but only for certain locales :-(
  132. // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
  133. //
  134. if (r == INT_MAX)
  135. {
  136. result.erase();
  137. result.insert(result.begin(), static_cast<char>(0));
  138. return result;
  139. }
  140. #endif
  141. result.append(r - s + 3, ' ');
  142. s = result.size();
  143. }
  144. result.erase(r);
  145. return result;
  146. }
  147. inline c_regex_traits<char>::string_type c_regex_traits<char>::transform_primary(const char* p1, const char* p2)
  148. {
  149. static char s_delim;
  150. static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<c_regex_traits<char>*>(0), &s_delim);
  151. std::string result;
  152. //
  153. // What we do here depends upon the format of the sort key returned by
  154. // sort key returned by this->transform:
  155. //
  156. switch (s_collate_type)
  157. {
  158. case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
  159. case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
  160. // the best we can do is translate to lower case, then get a regular sort key:
  161. {
  162. result.assign(p1, p2);
  163. for (std::string::size_type i = 0; i < result.size(); ++i)
  164. result[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(result[i])));
  165. result = transform(&*result.begin(), &*result.begin() + result.size());
  166. break;
  167. }
  168. case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
  169. {
  170. // get a regular sort key, and then truncate it:
  171. result = transform(p1, p2);
  172. result.erase(s_delim);
  173. break;
  174. }
  175. case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
  176. // get a regular sort key, and then truncate everything after the delim:
  177. result = transform(p1, p2);
  178. if ((!result.empty()) && (result[0] == s_delim))
  179. break;
  180. std::size_t i;
  181. for (i = 0; i < result.size(); ++i)
  182. {
  183. if (result[i] == s_delim)
  184. break;
  185. }
  186. result.erase(i);
  187. break;
  188. }
  189. if (result.empty())
  190. result = std::string(1, char(0));
  191. return result;
  192. }
  193. inline c_regex_traits<char>::char_class_type c_regex_traits<char>::lookup_classname(const char* p1, const char* p2)
  194. {
  195. using namespace BOOST_REGEX_DETAIL_NS;
  196. static const char_class_type masks[] =
  197. {
  198. 0,
  199. char_class_alnum,
  200. char_class_alpha,
  201. char_class_blank,
  202. char_class_cntrl,
  203. char_class_digit,
  204. char_class_digit,
  205. char_class_graph,
  206. char_class_horizontal,
  207. char_class_lower,
  208. char_class_lower,
  209. char_class_print,
  210. char_class_punct,
  211. char_class_space,
  212. char_class_space,
  213. char_class_upper,
  214. char_class_unicode,
  215. char_class_upper,
  216. char_class_vertical,
  217. char_class_alnum | char_class_word,
  218. char_class_alnum | char_class_word,
  219. char_class_xdigit,
  220. };
  221. int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
  222. if (idx < 0)
  223. {
  224. std::string s(p1, p2);
  225. for (std::string::size_type i = 0; i < s.size(); ++i)
  226. s[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(s[i])));
  227. idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
  228. }
  229. BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0]));
  230. return masks[idx + 1];
  231. }
  232. inline bool c_regex_traits<char>::isctype(char c, char_class_type mask)
  233. {
  234. using namespace BOOST_REGEX_DETAIL_NS;
  235. return
  236. ((mask & char_class_space) && (std::isspace)(static_cast<unsigned char>(c)))
  237. || ((mask & char_class_print) && (std::isprint)(static_cast<unsigned char>(c)))
  238. || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast<unsigned char>(c)))
  239. || ((mask & char_class_upper) && (std::isupper)(static_cast<unsigned char>(c)))
  240. || ((mask & char_class_lower) && (std::islower)(static_cast<unsigned char>(c)))
  241. || ((mask & char_class_alpha) && (std::isalpha)(static_cast<unsigned char>(c)))
  242. || ((mask & char_class_digit) && (std::isdigit)(static_cast<unsigned char>(c)))
  243. || ((mask & char_class_punct) && (std::ispunct)(static_cast<unsigned char>(c)))
  244. || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast<unsigned char>(c)))
  245. || ((mask & char_class_blank) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
  246. || ((mask & char_class_word) && (c == '_'))
  247. || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
  248. || ((mask & char_class_horizontal) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v'));
  249. }
  250. inline c_regex_traits<char>::string_type c_regex_traits<char>::lookup_collatename(const char* p1, const char* p2)
  251. {
  252. std::string s(p1, p2);
  253. s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
  254. if (s.empty() && (p2 - p1 == 1))
  255. s.append(1, *p1);
  256. return s;
  257. }
  258. inline int c_regex_traits<char>::value(char c, int radix)
  259. {
  260. char b[2] = { c, '\0', };
  261. char* ep;
  262. int result = std::strtol(b, &ep, radix);
  263. if (ep == b)
  264. return -1;
  265. return result;
  266. }
  267. #ifndef BOOST_NO_WREGEX
  268. inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2)
  269. {
  270. std::size_t r;
  271. std::size_t s = 10;
  272. std::wstring src(p1, p2);
  273. std::wstring result(s, L' ');
  274. while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s)))
  275. {
  276. #if defined(_CPPLIB_VER)
  277. //
  278. // A bug in VC11 and 12 causes the program to hang if we pass a null-string
  279. // to std::strxfrm, but only for certain locales :-(
  280. // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
  281. //
  282. if (r == INT_MAX)
  283. {
  284. result.erase();
  285. result.insert(result.begin(), static_cast<wchar_t>(0));
  286. return result;
  287. }
  288. #endif
  289. result.append(r - s + 3, L' ');
  290. s = result.size();
  291. }
  292. result.erase(r);
  293. return result;
  294. }
  295. inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2)
  296. {
  297. static wchar_t s_delim;
  298. static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim);
  299. std::wstring result;
  300. //
  301. // What we do here depends upon the format of the sort key returned by
  302. // sort key returned by this->transform:
  303. //
  304. switch (s_collate_type)
  305. {
  306. case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
  307. case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
  308. // the best we can do is translate to lower case, then get a regular sort key:
  309. {
  310. result.assign(p1, p2);
  311. for (std::wstring::size_type i = 0; i < result.size(); ++i)
  312. result[i] = (std::towlower)(result[i]);
  313. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  314. break;
  315. }
  316. case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
  317. {
  318. // get a regular sort key, and then truncate it:
  319. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  320. result.erase(s_delim);
  321. break;
  322. }
  323. case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
  324. // get a regular sort key, and then truncate everything after the delim:
  325. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  326. if ((!result.empty()) && (result[0] == s_delim))
  327. break;
  328. std::size_t i;
  329. for (i = 0; i < result.size(); ++i)
  330. {
  331. if (result[i] == s_delim)
  332. break;
  333. }
  334. result.erase(i);
  335. break;
  336. }
  337. if (result.empty())
  338. result = std::wstring(1, char(0));
  339. return result;
  340. }
  341. inline c_regex_traits<wchar_t>::char_class_type c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2)
  342. {
  343. using namespace BOOST_REGEX_DETAIL_NS;
  344. static const char_class_type masks[] =
  345. {
  346. 0,
  347. char_class_alnum,
  348. char_class_alpha,
  349. char_class_blank,
  350. char_class_cntrl,
  351. char_class_digit,
  352. char_class_digit,
  353. char_class_graph,
  354. char_class_horizontal,
  355. char_class_lower,
  356. char_class_lower,
  357. char_class_print,
  358. char_class_punct,
  359. char_class_space,
  360. char_class_space,
  361. char_class_upper,
  362. char_class_unicode,
  363. char_class_upper,
  364. char_class_vertical,
  365. char_class_alnum | char_class_word,
  366. char_class_alnum | char_class_word,
  367. char_class_xdigit,
  368. };
  369. int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
  370. if (idx < 0)
  371. {
  372. std::wstring s(p1, p2);
  373. for (std::wstring::size_type i = 0; i < s.size(); ++i)
  374. s[i] = (std::towlower)(s[i]);
  375. idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
  376. }
  377. BOOST_REGEX_ASSERT(idx + 1 < static_cast<int>(sizeof(masks) / sizeof(masks[0])));
  378. return masks[idx + 1];
  379. }
  380. inline bool c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask)
  381. {
  382. using namespace BOOST_REGEX_DETAIL_NS;
  383. return
  384. ((mask & char_class_space) && (std::iswspace)(c))
  385. || ((mask & char_class_print) && (std::iswprint)(c))
  386. || ((mask & char_class_cntrl) && (std::iswcntrl)(c))
  387. || ((mask & char_class_upper) && (std::iswupper)(c))
  388. || ((mask & char_class_lower) && (std::iswlower)(c))
  389. || ((mask & char_class_alpha) && (std::iswalpha)(c))
  390. || ((mask & char_class_digit) && (std::iswdigit)(c))
  391. || ((mask & char_class_punct) && (std::iswpunct)(c))
  392. || ((mask & char_class_xdigit) && (std::iswxdigit)(c))
  393. || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
  394. || ((mask & char_class_word) && (c == '_'))
  395. || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff)))
  396. || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v')))
  397. || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v'));
  398. }
  399. inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2)
  400. {
  401. std::string name;
  402. // Usual msvc warning suppression does not work here with std::string template constructor.... use a workaround instead:
  403. for (const wchar_t* pos = p1; pos != p2; ++pos)
  404. name.push_back((char)*pos);
  405. name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name);
  406. if (!name.empty())
  407. return string_type(name.begin(), name.end());
  408. if (p2 - p1 == 1)
  409. return string_type(1, *p1);
  410. return string_type();
  411. }
  412. inline int c_regex_traits<wchar_t>::value(wchar_t c, int radix)
  413. {
  414. #ifdef BOOST_BORLANDC
  415. // workaround for broken wcstol:
  416. if ((std::iswxdigit)(c) == 0)
  417. return -1;
  418. #endif
  419. wchar_t b[2] = { c, '\0', };
  420. wchar_t* ep;
  421. int result = std::wcstol(b, &ep, radix);
  422. if (ep == b)
  423. return -1;
  424. return result;
  425. }
  426. #endif
  427. }
  428. #endif