c_regex_traits.hpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE c_regex_traits.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares regular expression traits class that wraps the global C locale.
  16. */
  17. #ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED
  18. #define BOOST_C_REGEX_TRAITS_HPP_INCLUDED
  19. #ifndef BOOST_REGEX_CONFIG_HPP
  20. #include <boost/regex/config.hpp>
  21. #endif
  22. #ifndef BOOST_REGEX_WORKAROUND_HPP
  23. #include <boost/regex/v4/regex_workaround.hpp>
  24. #endif
  25. #include <cctype>
  26. #ifdef BOOST_NO_STDC_NAMESPACE
  27. namespace std{
  28. using ::strlen; using ::tolower;
  29. }
  30. #endif
  31. #ifdef BOOST_MSVC
  32. #pragma warning(push)
  33. #pragma warning(disable: 4103 4244)
  34. #endif
  35. #ifdef BOOST_HAS_ABI_HEADERS
  36. # include BOOST_ABI_PREFIX
  37. #endif
  38. #ifdef BOOST_MSVC
  39. #pragma warning(pop)
  40. #endif
  41. namespace boost{
  42. namespace BOOST_REGEX_DETAIL_NS {
  43. enum
  44. {
  45. char_class_space = 1 << 0,
  46. char_class_print = 1 << 1,
  47. char_class_cntrl = 1 << 2,
  48. char_class_upper = 1 << 3,
  49. char_class_lower = 1 << 4,
  50. char_class_alpha = 1 << 5,
  51. char_class_digit = 1 << 6,
  52. char_class_punct = 1 << 7,
  53. char_class_xdigit = 1 << 8,
  54. char_class_alnum = char_class_alpha | char_class_digit,
  55. char_class_graph = char_class_alnum | char_class_punct,
  56. char_class_blank = 1 << 9,
  57. char_class_word = 1 << 10,
  58. char_class_unicode = 1 << 11,
  59. char_class_horizontal = 1 << 12,
  60. char_class_vertical = 1 << 13
  61. };
  62. }
  63. template <class charT>
  64. struct c_regex_traits;
  65. template<>
  66. struct c_regex_traits<char>
  67. {
  68. c_regex_traits(){}
  69. typedef char char_type;
  70. typedef std::size_t size_type;
  71. typedef std::string string_type;
  72. struct locale_type{};
  73. typedef boost::uint32_t char_class_type;
  74. static size_type length(const char_type* p)
  75. {
  76. return (std::strlen)(p);
  77. }
  78. char translate(char c) const
  79. {
  80. return c;
  81. }
  82. char translate_nocase(char c) const
  83. {
  84. return static_cast<char>((std::tolower)(static_cast<unsigned char>(c)));
  85. }
  86. static string_type BOOST_REGEX_CALL transform(const char* p1, const char* p2);
  87. static string_type BOOST_REGEX_CALL transform_primary(const char* p1, const char* p2);
  88. static char_class_type BOOST_REGEX_CALL lookup_classname(const char* p1, const char* p2);
  89. static string_type BOOST_REGEX_CALL lookup_collatename(const char* p1, const char* p2);
  90. static bool BOOST_REGEX_CALL isctype(char, char_class_type);
  91. static int BOOST_REGEX_CALL value(char, int);
  92. locale_type imbue(locale_type l)
  93. { return l; }
  94. locale_type getloc()const
  95. { return locale_type(); }
  96. private:
  97. // this type is not copyable:
  98. c_regex_traits(const c_regex_traits&);
  99. c_regex_traits& operator=(const c_regex_traits&);
  100. };
  101. #ifndef BOOST_NO_WREGEX
  102. template<>
  103. struct c_regex_traits<wchar_t>
  104. {
  105. c_regex_traits(){}
  106. typedef wchar_t char_type;
  107. typedef std::size_t size_type;
  108. typedef std::wstring string_type;
  109. struct locale_type{};
  110. typedef boost::uint32_t char_class_type;
  111. static size_type length(const char_type* p)
  112. {
  113. return (std::wcslen)(p);
  114. }
  115. wchar_t translate(wchar_t c) const
  116. {
  117. return c;
  118. }
  119. wchar_t translate_nocase(wchar_t c) const
  120. {
  121. return (std::towlower)(c);
  122. }
  123. static string_type BOOST_REGEX_CALL transform(const wchar_t* p1, const wchar_t* p2);
  124. static string_type BOOST_REGEX_CALL transform_primary(const wchar_t* p1, const wchar_t* p2);
  125. static char_class_type BOOST_REGEX_CALL lookup_classname(const wchar_t* p1, const wchar_t* p2);
  126. static string_type BOOST_REGEX_CALL lookup_collatename(const wchar_t* p1, const wchar_t* p2);
  127. static bool BOOST_REGEX_CALL isctype(wchar_t, char_class_type);
  128. static int BOOST_REGEX_CALL value(wchar_t, int);
  129. locale_type imbue(locale_type l)
  130. { return l; }
  131. locale_type getloc()const
  132. { return locale_type(); }
  133. private:
  134. // this type is not copyable:
  135. c_regex_traits(const c_regex_traits&);
  136. c_regex_traits& operator=(const c_regex_traits&);
  137. };
  138. #endif // BOOST_NO_WREGEX
  139. inline c_regex_traits<char>::string_type BOOST_REGEX_CALL c_regex_traits<char>::transform(const char* p1, const char* p2)
  140. {
  141. std::string result(10, ' ');
  142. std::size_t s = result.size();
  143. std::size_t r;
  144. std::string src(p1, p2);
  145. while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s)))
  146. {
  147. #if defined(_CPPLIB_VER)
  148. //
  149. // A bug in VC11 and 12 causes the program to hang if we pass a null-string
  150. // to std::strxfrm, but only for certain locales :-(
  151. // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
  152. //
  153. if (r == INT_MAX)
  154. {
  155. result.erase();
  156. result.insert(result.begin(), static_cast<char>(0));
  157. return result;
  158. }
  159. #endif
  160. result.append(r - s + 3, ' ');
  161. s = result.size();
  162. }
  163. result.erase(r);
  164. return result;
  165. }
  166. inline c_regex_traits<char>::string_type BOOST_REGEX_CALL c_regex_traits<char>::transform_primary(const char* p1, const char* p2)
  167. {
  168. static char s_delim;
  169. static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<c_regex_traits<char>*>(0), &s_delim);
  170. std::string result;
  171. //
  172. // What we do here depends upon the format of the sort key returned by
  173. // sort key returned by this->transform:
  174. //
  175. switch (s_collate_type)
  176. {
  177. case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
  178. case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
  179. // the best we can do is translate to lower case, then get a regular sort key:
  180. {
  181. result.assign(p1, p2);
  182. for (std::string::size_type i = 0; i < result.size(); ++i)
  183. result[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(result[i])));
  184. result = transform(&*result.begin(), &*result.begin() + result.size());
  185. break;
  186. }
  187. case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
  188. {
  189. // get a regular sort key, and then truncate it:
  190. result = transform(p1, p2);
  191. result.erase(s_delim);
  192. break;
  193. }
  194. case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
  195. // get a regular sort key, and then truncate everything after the delim:
  196. result = transform(p1, p2);
  197. if ((!result.empty()) && (result[0] == s_delim))
  198. break;
  199. std::size_t i;
  200. for (i = 0; i < result.size(); ++i)
  201. {
  202. if (result[i] == s_delim)
  203. break;
  204. }
  205. result.erase(i);
  206. break;
  207. }
  208. if (result.empty())
  209. result = std::string(1, char(0));
  210. return result;
  211. }
  212. inline c_regex_traits<char>::char_class_type BOOST_REGEX_CALL c_regex_traits<char>::lookup_classname(const char* p1, const char* p2)
  213. {
  214. using namespace BOOST_REGEX_DETAIL_NS;
  215. static const char_class_type masks[] =
  216. {
  217. 0,
  218. char_class_alnum,
  219. char_class_alpha,
  220. char_class_blank,
  221. char_class_cntrl,
  222. char_class_digit,
  223. char_class_digit,
  224. char_class_graph,
  225. char_class_horizontal,
  226. char_class_lower,
  227. char_class_lower,
  228. char_class_print,
  229. char_class_punct,
  230. char_class_space,
  231. char_class_space,
  232. char_class_upper,
  233. char_class_unicode,
  234. char_class_upper,
  235. char_class_vertical,
  236. char_class_alnum | char_class_word,
  237. char_class_alnum | char_class_word,
  238. char_class_xdigit,
  239. };
  240. int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
  241. if (idx < 0)
  242. {
  243. std::string s(p1, p2);
  244. for (std::string::size_type i = 0; i < s.size(); ++i)
  245. s[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(s[i])));
  246. idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
  247. }
  248. BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0]));
  249. return masks[idx + 1];
  250. }
  251. inline bool BOOST_REGEX_CALL c_regex_traits<char>::isctype(char c, char_class_type mask)
  252. {
  253. using namespace BOOST_REGEX_DETAIL_NS;
  254. return
  255. ((mask & char_class_space) && (std::isspace)(static_cast<unsigned char>(c)))
  256. || ((mask & char_class_print) && (std::isprint)(static_cast<unsigned char>(c)))
  257. || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast<unsigned char>(c)))
  258. || ((mask & char_class_upper) && (std::isupper)(static_cast<unsigned char>(c)))
  259. || ((mask & char_class_lower) && (std::islower)(static_cast<unsigned char>(c)))
  260. || ((mask & char_class_alpha) && (std::isalpha)(static_cast<unsigned char>(c)))
  261. || ((mask & char_class_digit) && (std::isdigit)(static_cast<unsigned char>(c)))
  262. || ((mask & char_class_punct) && (std::ispunct)(static_cast<unsigned char>(c)))
  263. || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast<unsigned char>(c)))
  264. || ((mask & char_class_blank) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
  265. || ((mask & char_class_word) && (c == '_'))
  266. || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
  267. || ((mask & char_class_horizontal) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v'));
  268. }
  269. inline c_regex_traits<char>::string_type BOOST_REGEX_CALL c_regex_traits<char>::lookup_collatename(const char* p1, const char* p2)
  270. {
  271. std::string s(p1, p2);
  272. s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
  273. if (s.empty() && (p2 - p1 == 1))
  274. s.append(1, *p1);
  275. return s;
  276. }
  277. inline int BOOST_REGEX_CALL c_regex_traits<char>::value(char c, int radix)
  278. {
  279. char b[2] = { c, '\0', };
  280. char* ep;
  281. int result = std::strtol(b, &ep, radix);
  282. if (ep == b)
  283. return -1;
  284. return result;
  285. }
  286. #ifndef BOOST_NO_WREGEX
  287. inline c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2)
  288. {
  289. std::size_t r;
  290. std::size_t s = 10;
  291. std::wstring src(p1, p2);
  292. std::wstring result(s, L' ');
  293. while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s)))
  294. {
  295. #if defined(_CPPLIB_VER)
  296. //
  297. // A bug in VC11 and 12 causes the program to hang if we pass a null-string
  298. // to std::strxfrm, but only for certain locales :-(
  299. // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
  300. //
  301. if (r == INT_MAX)
  302. {
  303. result.erase();
  304. result.insert(result.begin(), static_cast<wchar_t>(0));
  305. return result;
  306. }
  307. #endif
  308. result.append(r - s + 3, L' ');
  309. s = result.size();
  310. }
  311. result.erase(r);
  312. return result;
  313. }
  314. inline c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2)
  315. {
  316. static wchar_t s_delim;
  317. static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim);
  318. std::wstring result;
  319. //
  320. // What we do here depends upon the format of the sort key returned by
  321. // sort key returned by this->transform:
  322. //
  323. switch (s_collate_type)
  324. {
  325. case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
  326. case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
  327. // the best we can do is translate to lower case, then get a regular sort key:
  328. {
  329. result.assign(p1, p2);
  330. for (std::wstring::size_type i = 0; i < result.size(); ++i)
  331. result[i] = (std::towlower)(result[i]);
  332. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  333. break;
  334. }
  335. case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
  336. {
  337. // get a regular sort key, and then truncate it:
  338. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  339. result.erase(s_delim);
  340. break;
  341. }
  342. case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
  343. // get a regular sort key, and then truncate everything after the delim:
  344. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  345. if ((!result.empty()) && (result[0] == s_delim))
  346. break;
  347. std::size_t i;
  348. for (i = 0; i < result.size(); ++i)
  349. {
  350. if (result[i] == s_delim)
  351. break;
  352. }
  353. result.erase(i);
  354. break;
  355. }
  356. if (result.empty())
  357. result = std::wstring(1, char(0));
  358. return result;
  359. }
  360. inline c_regex_traits<wchar_t>::char_class_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2)
  361. {
  362. using namespace BOOST_REGEX_DETAIL_NS;
  363. static const char_class_type masks[] =
  364. {
  365. 0,
  366. char_class_alnum,
  367. char_class_alpha,
  368. char_class_blank,
  369. char_class_cntrl,
  370. char_class_digit,
  371. char_class_digit,
  372. char_class_graph,
  373. char_class_horizontal,
  374. char_class_lower,
  375. char_class_lower,
  376. char_class_print,
  377. char_class_punct,
  378. char_class_space,
  379. char_class_space,
  380. char_class_upper,
  381. char_class_unicode,
  382. char_class_upper,
  383. char_class_vertical,
  384. char_class_alnum | char_class_word,
  385. char_class_alnum | char_class_word,
  386. char_class_xdigit,
  387. };
  388. int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
  389. if (idx < 0)
  390. {
  391. std::wstring s(p1, p2);
  392. for (std::wstring::size_type i = 0; i < s.size(); ++i)
  393. s[i] = (std::towlower)(s[i]);
  394. idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
  395. }
  396. BOOST_REGEX_ASSERT(idx + 1 < static_cast<int>(sizeof(masks) / sizeof(masks[0])));
  397. return masks[idx + 1];
  398. }
  399. inline bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask)
  400. {
  401. using namespace BOOST_REGEX_DETAIL_NS;
  402. return
  403. ((mask & char_class_space) && (std::iswspace)(c))
  404. || ((mask & char_class_print) && (std::iswprint)(c))
  405. || ((mask & char_class_cntrl) && (std::iswcntrl)(c))
  406. || ((mask & char_class_upper) && (std::iswupper)(c))
  407. || ((mask & char_class_lower) && (std::iswlower)(c))
  408. || ((mask & char_class_alpha) && (std::iswalpha)(c))
  409. || ((mask & char_class_digit) && (std::iswdigit)(c))
  410. || ((mask & char_class_punct) && (std::iswpunct)(c))
  411. || ((mask & char_class_xdigit) && (std::iswxdigit)(c))
  412. || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
  413. || ((mask & char_class_word) && (c == '_'))
  414. || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff)))
  415. || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v')))
  416. || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v'));
  417. }
  418. inline c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2)
  419. {
  420. #ifdef BOOST_MSVC
  421. #pragma warning(push)
  422. #pragma warning(disable: 4244)
  423. #endif
  424. std::string name(p1, p2);
  425. #ifdef BOOST_MSVC
  426. #pragma warning(pop)
  427. #endif
  428. name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name);
  429. if (!name.empty())
  430. return string_type(name.begin(), name.end());
  431. if (p2 - p1 == 1)
  432. return string_type(1, *p1);
  433. return string_type();
  434. }
  435. inline int BOOST_REGEX_CALL c_regex_traits<wchar_t>::value(wchar_t c, int radix)
  436. {
  437. #ifdef BOOST_BORLANDC
  438. // workaround for broken wcstol:
  439. if ((std::iswxdigit)(c) == 0)
  440. return -1;
  441. #endif
  442. wchar_t b[2] = { c, '\0', };
  443. wchar_t* ep;
  444. int result = std::wcstol(b, &ep, radix);
  445. if (ep == b)
  446. return -1;
  447. return result;
  448. }
  449. #endif
  450. }
  451. #ifdef BOOST_MSVC
  452. #pragma warning(push)
  453. #pragma warning(disable: 4103)
  454. #endif
  455. #ifdef BOOST_HAS_ABI_HEADERS
  456. # include BOOST_ABI_SUFFIX
  457. #endif
  458. #ifdef BOOST_MSVC
  459. #pragma warning(pop)
  460. #endif
  461. #endif