w32_regex_traits.hpp 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE w32_regex_traits.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares regular expression traits class w32_regex_traits.
  16. */
  17. #ifndef BOOST_W32_REGEX_TRAITS_HPP_INCLUDED
  18. #define BOOST_W32_REGEX_TRAITS_HPP_INCLUDED
  19. #ifndef BOOST_REGEX_NO_WIN32_LOCALE
  20. #include <boost/regex/pattern_except.hpp>
  21. #include <boost/regex/v5/regex_traits_defaults.hpp>
  22. #ifdef BOOST_HAS_THREADS
  23. #include <mutex>
  24. #endif
  25. #include <boost/regex/v5/primary_transform.hpp>
  26. #include <boost/regex/v5/object_cache.hpp>
  27. #define VC_EXTRALEAN
  28. #define WIN32_LEAN_AND_MEAN
  29. #include <windows.h>
  30. #if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE)
  31. #pragma comment(lib, "user32.lib")
  32. #endif
  33. #ifdef BOOST_REGEX_MSVC
  34. #pragma warning(push)
  35. #pragma warning(disable:4786)
  36. #if BOOST_REGEX_MSVC < 1910
  37. #pragma warning(disable:4800)
  38. #endif
  39. #endif
  40. namespace boost{
  41. //
  42. // forward declaration is needed by some compilers:
  43. //
  44. template <class charT>
  45. class w32_regex_traits;
  46. namespace BOOST_REGEX_DETAIL_NS{
  47. //
  48. // start by typedeffing the types we'll need:
  49. //
  50. typedef std::uint32_t lcid_type; // placeholder for LCID.
  51. typedef std::shared_ptr<void> cat_type; // placeholder for dll HANDLE.
  52. //
  53. // then add wrappers around the actual Win32 API's (ie implementation hiding):
  54. //
  55. lcid_type w32_get_default_locale();
  56. bool w32_is_lower(char, lcid_type);
  57. #ifndef BOOST_NO_WREGEX
  58. bool w32_is_lower(wchar_t, lcid_type);
  59. #endif
  60. bool w32_is_upper(char, lcid_type);
  61. #ifndef BOOST_NO_WREGEX
  62. bool w32_is_upper(wchar_t, lcid_type);
  63. #endif
  64. cat_type w32_cat_open(const std::string& name);
  65. std::string w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::string& def);
  66. #ifndef BOOST_NO_WREGEX
  67. std::wstring w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::wstring& def);
  68. #endif
  69. std::string w32_transform(lcid_type state_id, const char* p1, const char* p2);
  70. #ifndef BOOST_NO_WREGEX
  71. std::wstring w32_transform(lcid_type state_id, const wchar_t* p1, const wchar_t* p2);
  72. #endif
  73. char w32_tolower(char c, lcid_type);
  74. #ifndef BOOST_NO_WREGEX
  75. wchar_t w32_tolower(wchar_t c, lcid_type);
  76. #endif
  77. char w32_toupper(char c, lcid_type);
  78. #ifndef BOOST_NO_WREGEX
  79. wchar_t w32_toupper(wchar_t c, lcid_type);
  80. #endif
  81. bool w32_is(lcid_type, std::uint32_t mask, char c);
  82. #ifndef BOOST_NO_WREGEX
  83. bool w32_is(lcid_type, std::uint32_t mask, wchar_t c);
  84. #endif
  85. //
  86. // class w32_regex_traits_base:
  87. // acts as a container for locale and the facets we are using.
  88. //
  89. template <class charT>
  90. struct w32_regex_traits_base
  91. {
  92. w32_regex_traits_base(lcid_type l)
  93. { imbue(l); }
  94. lcid_type imbue(lcid_type l);
  95. lcid_type m_locale;
  96. };
  97. template <class charT>
  98. inline lcid_type w32_regex_traits_base<charT>::imbue(lcid_type l)
  99. {
  100. lcid_type result(m_locale);
  101. m_locale = l;
  102. return result;
  103. }
  104. //
  105. // class w32_regex_traits_char_layer:
  106. // implements methods that require specialisation for narrow characters:
  107. //
  108. template <class charT>
  109. class w32_regex_traits_char_layer : public w32_regex_traits_base<charT>
  110. {
  111. typedef std::basic_string<charT> string_type;
  112. typedef std::map<charT, regex_constants::syntax_type> map_type;
  113. typedef typename map_type::const_iterator map_iterator_type;
  114. public:
  115. w32_regex_traits_char_layer(const lcid_type l);
  116. regex_constants::syntax_type syntax_type(charT c)const
  117. {
  118. map_iterator_type i = m_char_map.find(c);
  119. return ((i == m_char_map.end()) ? 0 : i->second);
  120. }
  121. regex_constants::escape_syntax_type escape_syntax_type(charT c) const
  122. {
  123. map_iterator_type i = m_char_map.find(c);
  124. if(i == m_char_map.end())
  125. {
  126. if(::boost::BOOST_REGEX_DETAIL_NS::w32_is_lower(c, this->m_locale)) return regex_constants::escape_type_class;
  127. if(::boost::BOOST_REGEX_DETAIL_NS::w32_is_upper(c, this->m_locale)) return regex_constants::escape_type_not_class;
  128. return 0;
  129. }
  130. return i->second;
  131. }
  132. charT tolower(charT c)const
  133. {
  134. return ::boost::BOOST_REGEX_DETAIL_NS::w32_tolower(c, this->m_locale);
  135. }
  136. bool isctype(std::uint32_t mask, charT c)const
  137. {
  138. return ::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, mask, c);
  139. }
  140. private:
  141. string_type get_default_message(regex_constants::syntax_type);
  142. // TODO: use a hash table when available!
  143. map_type m_char_map;
  144. };
  145. template <class charT>
  146. w32_regex_traits_char_layer<charT>::w32_regex_traits_char_layer(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l)
  147. : w32_regex_traits_base<charT>(l)
  148. {
  149. // we need to start by initialising our syntax map so we know which
  150. // character is used for which purpose:
  151. cat_type cat;
  152. std::string cat_name(w32_regex_traits<charT>::get_catalog_name());
  153. if(cat_name.size())
  154. {
  155. cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name);
  156. if(!cat)
  157. {
  158. std::string m("Unable to open message catalog: ");
  159. std::runtime_error err(m + cat_name);
  160. boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
  161. }
  162. }
  163. //
  164. // if we have a valid catalog then load our messages:
  165. //
  166. if(cat)
  167. {
  168. for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  169. {
  170. string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_message(i));
  171. for(typename string_type::size_type j = 0; j < mss.size(); ++j)
  172. {
  173. this->m_char_map[mss[j]] = i;
  174. }
  175. }
  176. }
  177. else
  178. {
  179. for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  180. {
  181. const char* ptr = get_default_syntax(i);
  182. while(ptr && *ptr)
  183. {
  184. this->m_char_map[static_cast<charT>(*ptr)] = i;
  185. ++ptr;
  186. }
  187. }
  188. }
  189. }
  190. template <class charT>
  191. typename w32_regex_traits_char_layer<charT>::string_type
  192. w32_regex_traits_char_layer<charT>::get_default_message(regex_constants::syntax_type i)
  193. {
  194. const char* ptr = get_default_syntax(i);
  195. string_type result;
  196. while(ptr && *ptr)
  197. {
  198. result.append(1, static_cast<charT>(*ptr));
  199. ++ptr;
  200. }
  201. return result;
  202. }
  203. //
  204. // specialised version for narrow characters:
  205. //
  206. template <>
  207. class w32_regex_traits_char_layer<char> : public w32_regex_traits_base<char>
  208. {
  209. typedef std::string string_type;
  210. public:
  211. w32_regex_traits_char_layer(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l)
  212. : w32_regex_traits_base<char>(l)
  213. {
  214. init<char>();
  215. }
  216. regex_constants::syntax_type syntax_type(char c)const
  217. {
  218. return m_char_map[static_cast<unsigned char>(c)];
  219. }
  220. regex_constants::escape_syntax_type escape_syntax_type(char c) const
  221. {
  222. return m_char_map[static_cast<unsigned char>(c)];
  223. }
  224. char tolower(char c)const
  225. {
  226. return m_lower_map[static_cast<unsigned char>(c)];
  227. }
  228. bool isctype(std::uint32_t mask, char c)const
  229. {
  230. return m_type_map[static_cast<unsigned char>(c)] & mask;
  231. }
  232. private:
  233. regex_constants::syntax_type m_char_map[1u << CHAR_BIT];
  234. char m_lower_map[1u << CHAR_BIT];
  235. std::uint16_t m_type_map[1u << CHAR_BIT];
  236. template <class U>
  237. void init();
  238. };
  239. //
  240. // class w32_regex_traits_implementation:
  241. // provides pimpl implementation for w32_regex_traits.
  242. //
  243. template <class charT>
  244. class w32_regex_traits_implementation : public w32_regex_traits_char_layer<charT>
  245. {
  246. public:
  247. typedef typename w32_regex_traits<charT>::char_class_type char_class_type;
  248. static const char_class_type mask_word = 0x0400; // must be C1_DEFINED << 1
  249. static const char_class_type mask_unicode = 0x0800; // must be C1_DEFINED << 2
  250. static const char_class_type mask_horizontal = 0x1000; // must be C1_DEFINED << 3
  251. static const char_class_type mask_vertical = 0x2000; // must be C1_DEFINED << 4
  252. static const char_class_type mask_base = 0x3ff; // all the masks used by the CT_CTYPE1 group
  253. typedef std::basic_string<charT> string_type;
  254. typedef charT char_type;
  255. w32_regex_traits_implementation(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l);
  256. std::string error_string(regex_constants::error_type n) const
  257. {
  258. if(!m_error_strings.empty())
  259. {
  260. std::map<int, std::string>::const_iterator p = m_error_strings.find(n);
  261. return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second;
  262. }
  263. return get_default_error_string(n);
  264. }
  265. char_class_type lookup_classname(const charT* p1, const charT* p2) const
  266. {
  267. char_class_type result = lookup_classname_imp(p1, p2);
  268. if(result == 0)
  269. {
  270. typedef typename string_type::size_type size_type;
  271. string_type temp(p1, p2);
  272. for(size_type i = 0; i < temp.size(); ++i)
  273. temp[i] = this->tolower(temp[i]);
  274. result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size());
  275. }
  276. return result;
  277. }
  278. string_type lookup_collatename(const charT* p1, const charT* p2) const;
  279. string_type transform_primary(const charT* p1, const charT* p2) const;
  280. string_type transform(const charT* p1, const charT* p2) const
  281. {
  282. return ::boost::BOOST_REGEX_DETAIL_NS::w32_transform(this->m_locale, p1, p2);
  283. }
  284. private:
  285. std::map<int, std::string> m_error_strings; // error messages indexed by numberic ID
  286. std::map<string_type, char_class_type> m_custom_class_names; // character class names
  287. std::map<string_type, string_type> m_custom_collate_names; // collating element names
  288. unsigned m_collate_type; // the form of the collation string
  289. charT m_collate_delim; // the collation group delimiter
  290. //
  291. // helpers:
  292. //
  293. char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
  294. };
  295. template <class charT>
  296. typename w32_regex_traits_implementation<charT>::string_type
  297. w32_regex_traits_implementation<charT>::transform_primary(const charT* p1, const charT* p2) const
  298. {
  299. string_type result;
  300. //
  301. // What we do here depends upon the format of the sort key returned by
  302. // sort key returned by this->transform:
  303. //
  304. switch(m_collate_type)
  305. {
  306. case sort_C:
  307. case sort_unknown:
  308. // the best we can do is translate to lower case, then get a regular sort key:
  309. {
  310. result.assign(p1, p2);
  311. typedef typename string_type::size_type size_type;
  312. for(size_type i = 0; i < result.size(); ++i)
  313. result[i] = this->tolower(result[i]);
  314. result = this->transform(&*result.begin(), &*result.begin() + result.size());
  315. break;
  316. }
  317. case sort_fixed:
  318. {
  319. // get a regular sort key, and then truncate it:
  320. result.assign(this->transform(p1, p2));
  321. result.erase(this->m_collate_delim);
  322. break;
  323. }
  324. case sort_delim:
  325. // get a regular sort key, and then truncate everything after the delim:
  326. result.assign(this->transform(p1, p2));
  327. std::size_t i;
  328. for(i = 0; i < result.size(); ++i)
  329. {
  330. if(result[i] == m_collate_delim)
  331. break;
  332. }
  333. result.erase(i);
  334. break;
  335. }
  336. if(result.empty())
  337. result = string_type(1, charT(0));
  338. return result;
  339. }
  340. template <class charT>
  341. typename w32_regex_traits_implementation<charT>::string_type
  342. w32_regex_traits_implementation<charT>::lookup_collatename(const charT* p1, const charT* p2) const
  343. {
  344. typedef typename std::map<string_type, string_type>::const_iterator iter_type;
  345. if(m_custom_collate_names.size())
  346. {
  347. iter_type pos = m_custom_collate_names.find(string_type(p1, p2));
  348. if(pos != m_custom_collate_names.end())
  349. return pos->second;
  350. }
  351. std::string name(p1, p2);
  352. name = lookup_default_collate_name(name);
  353. if(name.size())
  354. return string_type(name.begin(), name.end());
  355. if(p2 - p1 == 1)
  356. return string_type(1, *p1);
  357. return string_type();
  358. }
  359. template <class charT>
  360. w32_regex_traits_implementation<charT>::w32_regex_traits_implementation(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l)
  361. : w32_regex_traits_char_layer<charT>(l)
  362. {
  363. cat_type cat;
  364. std::string cat_name(w32_regex_traits<charT>::get_catalog_name());
  365. if(cat_name.size())
  366. {
  367. cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name);
  368. if(!cat)
  369. {
  370. std::string m("Unable to open message catalog: ");
  371. std::runtime_error err(m + cat_name);
  372. boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
  373. }
  374. }
  375. //
  376. // if we have a valid catalog then load our messages:
  377. //
  378. if(cat)
  379. {
  380. //
  381. // Error messages:
  382. //
  383. for(boost::regex_constants::error_type i = static_cast<boost::regex_constants::error_type>(0);
  384. i <= boost::regex_constants::error_unknown;
  385. i = static_cast<boost::regex_constants::error_type>(i + 1))
  386. {
  387. const char* p = get_default_error_string(i);
  388. string_type default_message;
  389. while(*p)
  390. {
  391. default_message.append(1, static_cast<charT>(*p));
  392. ++p;
  393. }
  394. string_type s = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i+200, default_message);
  395. std::string result;
  396. for(std::string::size_type j = 0; j < s.size(); ++j)
  397. {
  398. result.append(1, static_cast<char>(s[j]));
  399. }
  400. m_error_strings[i] = result;
  401. }
  402. //
  403. // Custom class names:
  404. //
  405. static const char_class_type masks[14] =
  406. {
  407. 0x0104u, // C1_ALPHA | C1_DIGIT
  408. 0x0100u, // C1_ALPHA
  409. 0x0020u, // C1_CNTRL
  410. 0x0004u, // C1_DIGIT
  411. (~(0x0020u|0x0008u) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE
  412. 0x0002u, // C1_LOWER
  413. (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL
  414. 0x0010u, // C1_PUNCT
  415. 0x0008u, // C1_SPACE
  416. 0x0001u, // C1_UPPER
  417. 0x0080u, // C1_XDIGIT
  418. 0x0040u, // C1_BLANK
  419. w32_regex_traits_implementation<charT>::mask_word,
  420. w32_regex_traits_implementation<charT>::mask_unicode,
  421. };
  422. static const string_type null_string;
  423. for(unsigned int j = 0; j <= 13; ++j)
  424. {
  425. string_type s(::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, j+300, null_string));
  426. if(s.size())
  427. this->m_custom_class_names[s] = masks[j];
  428. }
  429. }
  430. //
  431. // get the collation format used by m_pcollate:
  432. //
  433. m_collate_type = BOOST_REGEX_DETAIL_NS::find_sort_syntax(this, &m_collate_delim);
  434. }
  435. template <class charT>
  436. typename w32_regex_traits_implementation<charT>::char_class_type
  437. w32_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
  438. {
  439. static const char_class_type masks[22] =
  440. {
  441. 0,
  442. 0x0104u, // C1_ALPHA | C1_DIGIT
  443. 0x0100u, // C1_ALPHA
  444. 0x0040u, // C1_BLANK
  445. 0x0020u, // C1_CNTRL
  446. 0x0004u, // C1_DIGIT
  447. 0x0004u, // C1_DIGIT
  448. (~(0x0020u|0x0008u|0x0040) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE or C1_BLANK
  449. w32_regex_traits_implementation<charT>::mask_horizontal,
  450. 0x0002u, // C1_LOWER
  451. 0x0002u, // C1_LOWER
  452. (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL
  453. 0x0010u, // C1_PUNCT
  454. 0x0008u, // C1_SPACE
  455. 0x0008u, // C1_SPACE
  456. 0x0001u, // C1_UPPER
  457. w32_regex_traits_implementation<charT>::mask_unicode,
  458. 0x0001u, // C1_UPPER
  459. w32_regex_traits_implementation<charT>::mask_vertical,
  460. 0x0104u | w32_regex_traits_implementation<charT>::mask_word,
  461. 0x0104u | w32_regex_traits_implementation<charT>::mask_word,
  462. 0x0080u, // C1_XDIGIT
  463. };
  464. if(m_custom_class_names.size())
  465. {
  466. typedef typename std::map<std::basic_string<charT>, char_class_type>::const_iterator map_iter;
  467. map_iter pos = m_custom_class_names.find(string_type(p1, p2));
  468. if(pos != m_custom_class_names.end())
  469. return pos->second;
  470. }
  471. std::size_t state_id = 1u + (std::size_t)BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
  472. if(state_id < sizeof(masks) / sizeof(masks[0]))
  473. return masks[state_id];
  474. return masks[0];
  475. }
  476. template <class charT>
  477. std::shared_ptr<const w32_regex_traits_implementation<charT> > create_w32_regex_traits(::boost::BOOST_REGEX_DETAIL_NS::lcid_type l)
  478. {
  479. // TODO: create a cache for previously constructed objects.
  480. return boost::object_cache< ::boost::BOOST_REGEX_DETAIL_NS::lcid_type, w32_regex_traits_implementation<charT> >::get(l, 5);
  481. }
  482. } // BOOST_REGEX_DETAIL_NS
  483. template <class charT>
  484. class w32_regex_traits
  485. {
  486. public:
  487. typedef charT char_type;
  488. typedef std::size_t size_type;
  489. typedef std::basic_string<char_type> string_type;
  490. typedef ::boost::BOOST_REGEX_DETAIL_NS::lcid_type locale_type;
  491. typedef std::uint_least32_t char_class_type;
  492. struct boost_extensions_tag{};
  493. w32_regex_traits()
  494. : m_pimpl(BOOST_REGEX_DETAIL_NS::create_w32_regex_traits<charT>(::boost::BOOST_REGEX_DETAIL_NS::w32_get_default_locale()))
  495. { }
  496. static size_type length(const char_type* p)
  497. {
  498. return std::char_traits<charT>::length(p);
  499. }
  500. regex_constants::syntax_type syntax_type(charT c)const
  501. {
  502. return m_pimpl->syntax_type(c);
  503. }
  504. regex_constants::escape_syntax_type escape_syntax_type(charT c) const
  505. {
  506. return m_pimpl->escape_syntax_type(c);
  507. }
  508. charT translate(charT c) const
  509. {
  510. return c;
  511. }
  512. charT translate_nocase(charT c) const
  513. {
  514. return this->m_pimpl->tolower(c);
  515. }
  516. charT translate(charT c, bool icase) const
  517. {
  518. return icase ? this->m_pimpl->tolower(c) : c;
  519. }
  520. charT tolower(charT c) const
  521. {
  522. return this->m_pimpl->tolower(c);
  523. }
  524. charT toupper(charT c) const
  525. {
  526. return ::boost::BOOST_REGEX_DETAIL_NS::w32_toupper(c, this->m_pimpl->m_locale);
  527. }
  528. string_type transform(const charT* p1, const charT* p2) const
  529. {
  530. return ::boost::BOOST_REGEX_DETAIL_NS::w32_transform(this->m_pimpl->m_locale, p1, p2);
  531. }
  532. string_type transform_primary(const charT* p1, const charT* p2) const
  533. {
  534. return m_pimpl->transform_primary(p1, p2);
  535. }
  536. char_class_type lookup_classname(const charT* p1, const charT* p2) const
  537. {
  538. return m_pimpl->lookup_classname(p1, p2);
  539. }
  540. string_type lookup_collatename(const charT* p1, const charT* p2) const
  541. {
  542. return m_pimpl->lookup_collatename(p1, p2);
  543. }
  544. bool isctype(charT c, char_class_type f) const
  545. {
  546. if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation<charT>::mask_base)
  547. && (this->m_pimpl->isctype(f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation<charT>::mask_base, c)))
  548. return true;
  549. else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation<charT>::mask_unicode) && BOOST_REGEX_DETAIL_NS::is_extended(c))
  550. return true;
  551. else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation<charT>::mask_word) && (c == '_'))
  552. return true;
  553. else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation<charT>::mask_vertical)
  554. && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
  555. return true;
  556. else if((f & BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation<charT>::mask_horizontal)
  557. && this->isctype(c, 0x0008u) && !this->isctype(c, BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation<charT>::mask_vertical))
  558. return true;
  559. return false;
  560. }
  561. std::intmax_t toi(const charT*& p1, const charT* p2, int radix)const
  562. {
  563. return ::boost::BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
  564. }
  565. int value(charT c, int radix)const
  566. {
  567. int result = (int)::boost::BOOST_REGEX_DETAIL_NS::global_value(c);
  568. return result < radix ? result : -1;
  569. }
  570. locale_type imbue(locale_type l)
  571. {
  572. ::boost::BOOST_REGEX_DETAIL_NS::lcid_type result(getloc());
  573. m_pimpl = BOOST_REGEX_DETAIL_NS::create_w32_regex_traits<charT>(l);
  574. return result;
  575. }
  576. locale_type getloc()const
  577. {
  578. return m_pimpl->m_locale;
  579. }
  580. std::string error_string(regex_constants::error_type n) const
  581. {
  582. return m_pimpl->error_string(n);
  583. }
  584. //
  585. // extension:
  586. // set the name of the message catalog in use (defaults to "boost_regex").
  587. //
  588. static std::string catalog_name(const std::string& name);
  589. static std::string get_catalog_name();
  590. private:
  591. std::shared_ptr<const BOOST_REGEX_DETAIL_NS::w32_regex_traits_implementation<charT> > m_pimpl;
  592. //
  593. // catalog name handler:
  594. //
  595. static std::string& get_catalog_name_inst();
  596. #ifdef BOOST_HAS_THREADS
  597. static std::mutex& get_mutex_inst();
  598. #endif
  599. };
  600. template <class charT>
  601. std::string w32_regex_traits<charT>::catalog_name(const std::string& name)
  602. {
  603. #ifdef BOOST_HAS_THREADS
  604. std::lock_guard<std::mutex> lk(get_mutex_inst());
  605. #endif
  606. std::string result(get_catalog_name_inst());
  607. get_catalog_name_inst() = name;
  608. return result;
  609. }
  610. template <class charT>
  611. std::string& w32_regex_traits<charT>::get_catalog_name_inst()
  612. {
  613. static std::string s_name;
  614. return s_name;
  615. }
  616. template <class charT>
  617. std::string w32_regex_traits<charT>::get_catalog_name()
  618. {
  619. #ifdef BOOST_HAS_THREADS
  620. std::lock_guard<std::mutex> lk(get_mutex_inst());
  621. #endif
  622. std::string result(get_catalog_name_inst());
  623. return result;
  624. }
  625. #ifdef BOOST_HAS_THREADS
  626. template <class charT>
  627. std::mutex& w32_regex_traits<charT>::get_mutex_inst()
  628. {
  629. static std::mutex s_mutex;
  630. return s_mutex;
  631. }
  632. #endif
  633. namespace BOOST_REGEX_DETAIL_NS {
  634. #ifdef BOOST_NO_ANSI_APIS
  635. inline UINT get_code_page_for_locale_id(lcid_type idx)
  636. {
  637. WCHAR code_page_string[7];
  638. if (::GetLocaleInfoW(idx, LOCALE_IDEFAULTANSICODEPAGE, code_page_string, 7) == 0)
  639. return 0;
  640. return static_cast<UINT>(_wtol(code_page_string));
  641. }
  642. #endif
  643. template <class U>
  644. inline void w32_regex_traits_char_layer<char>::init()
  645. {
  646. // we need to start by initialising our syntax map so we know which
  647. // character is used for which purpose:
  648. std::memset(m_char_map, 0, sizeof(m_char_map));
  649. cat_type cat;
  650. std::string cat_name(w32_regex_traits<char>::get_catalog_name());
  651. if (cat_name.size())
  652. {
  653. cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name);
  654. if (!cat)
  655. {
  656. std::string m("Unable to open message catalog: ");
  657. std::runtime_error err(m + cat_name);
  658. ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
  659. }
  660. }
  661. //
  662. // if we have a valid catalog then load our messages:
  663. //
  664. if (cat)
  665. {
  666. for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  667. {
  668. string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_syntax(i));
  669. for (string_type::size_type j = 0; j < mss.size(); ++j)
  670. {
  671. m_char_map[static_cast<unsigned char>(mss[j])] = i;
  672. }
  673. }
  674. }
  675. else
  676. {
  677. for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  678. {
  679. const char* ptr = get_default_syntax(i);
  680. while (ptr && *ptr)
  681. {
  682. m_char_map[static_cast<unsigned char>(*ptr)] = i;
  683. ++ptr;
  684. }
  685. }
  686. }
  687. //
  688. // finish off by calculating our escape types:
  689. //
  690. unsigned char i = 'A';
  691. do
  692. {
  693. if (m_char_map[i] == 0)
  694. {
  695. if (::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0002u, (char)i))
  696. m_char_map[i] = regex_constants::escape_type_class;
  697. else if (::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0001u, (char)i))
  698. m_char_map[i] = regex_constants::escape_type_not_class;
  699. }
  700. } while (0xFF != i++);
  701. //
  702. // fill in lower case map:
  703. //
  704. char char_map[1 << CHAR_BIT];
  705. for (int ii = 0; ii < (1 << CHAR_BIT); ++ii)
  706. char_map[ii] = static_cast<char>(ii);
  707. #ifndef BOOST_NO_ANSI_APIS
  708. int r = ::LCMapStringA(this->m_locale, LCMAP_LOWERCASE, char_map, 1 << CHAR_BIT, this->m_lower_map, 1 << CHAR_BIT);
  709. BOOST_REGEX_ASSERT(r != 0);
  710. #else
  711. UINT code_page = get_code_page_for_locale_id(this->m_locale);
  712. BOOST_REGEX_ASSERT(code_page != 0);
  713. WCHAR wide_char_map[1 << CHAR_BIT];
  714. int conv_r = ::MultiByteToWideChar(code_page, 0, char_map, 1 << CHAR_BIT, wide_char_map, 1 << CHAR_BIT);
  715. BOOST_REGEX_ASSERT(conv_r != 0);
  716. WCHAR wide_lower_map[1 << CHAR_BIT];
  717. int r = ::LCMapStringW(this->m_locale, LCMAP_LOWERCASE, wide_char_map, 1 << CHAR_BIT, wide_lower_map, 1 << CHAR_BIT);
  718. BOOST_REGEX_ASSERT(r != 0);
  719. conv_r = ::WideCharToMultiByte(code_page, 0, wide_lower_map, r, this->m_lower_map, 1 << CHAR_BIT, NULL, NULL);
  720. BOOST_REGEX_ASSERT(conv_r != 0);
  721. #endif
  722. if (r < (1 << CHAR_BIT))
  723. {
  724. // if we have multibyte characters then not all may have been given
  725. // a lower case mapping:
  726. for (int jj = r; jj < (1 << CHAR_BIT); ++jj)
  727. this->m_lower_map[jj] = static_cast<char>(jj);
  728. }
  729. #ifndef BOOST_NO_ANSI_APIS
  730. r = ::GetStringTypeExA(this->m_locale, CT_CTYPE1, char_map, 1 << CHAR_BIT, this->m_type_map);
  731. #else
  732. r = ::GetStringTypeExW(this->m_locale, CT_CTYPE1, wide_char_map, 1 << CHAR_BIT, this->m_type_map);
  733. #endif
  734. BOOST_REGEX_ASSERT(0 != r);
  735. }
  736. inline lcid_type w32_get_default_locale()
  737. {
  738. return ::GetUserDefaultLCID();
  739. }
  740. inline bool w32_is_lower(char c, lcid_type idx)
  741. {
  742. #ifndef BOOST_NO_ANSI_APIS
  743. WORD mask;
  744. if (::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
  745. return true;
  746. return false;
  747. #else
  748. UINT code_page = get_code_page_for_locale_id(idx);
  749. if (code_page == 0)
  750. return false;
  751. WCHAR wide_c;
  752. if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
  753. return false;
  754. WORD mask;
  755. if (::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_LOWER))
  756. return true;
  757. return false;
  758. #endif
  759. }
  760. inline bool w32_is_lower(wchar_t c, lcid_type idx)
  761. {
  762. WORD mask;
  763. if (::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
  764. return true;
  765. return false;
  766. }
  767. inline bool w32_is_upper(char c, lcid_type idx)
  768. {
  769. #ifndef BOOST_NO_ANSI_APIS
  770. WORD mask;
  771. if (::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
  772. return true;
  773. return false;
  774. #else
  775. UINT code_page = get_code_page_for_locale_id(idx);
  776. if (code_page == 0)
  777. return false;
  778. WCHAR wide_c;
  779. if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
  780. return false;
  781. WORD mask;
  782. if (::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_UPPER))
  783. return true;
  784. return false;
  785. #endif
  786. }
  787. inline bool w32_is_upper(wchar_t c, lcid_type idx)
  788. {
  789. WORD mask;
  790. if (::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
  791. return true;
  792. return false;
  793. }
  794. inline void free_module(void* mod)
  795. {
  796. ::FreeLibrary(static_cast<HMODULE>(mod));
  797. }
  798. inline cat_type w32_cat_open(const std::string& name)
  799. {
  800. #ifndef BOOST_NO_ANSI_APIS
  801. cat_type result(::LoadLibraryA(name.c_str()), &free_module);
  802. return result;
  803. #else
  804. LPWSTR wide_name = (LPWSTR)_alloca((name.size() + 1) * sizeof(WCHAR));
  805. if (::MultiByteToWideChar(CP_ACP, 0, name.c_str(), name.size(), wide_name, name.size() + 1) == 0)
  806. return cat_type();
  807. cat_type result(::LoadLibraryW(wide_name), &free_module);
  808. return result;
  809. #endif
  810. }
  811. inline std::string w32_cat_get(const cat_type& cat, lcid_type, int i, const std::string& def)
  812. {
  813. #ifndef BOOST_NO_ANSI_APIS
  814. char buf[256];
  815. if (0 == ::LoadStringA(
  816. static_cast<HMODULE>(cat.get()),
  817. i,
  818. buf,
  819. 256
  820. ))
  821. {
  822. return def;
  823. }
  824. #else
  825. WCHAR wbuf[256];
  826. int r = ::LoadStringW(
  827. static_cast<HMODULE>(cat.get()),
  828. i,
  829. wbuf,
  830. 256
  831. );
  832. if (r == 0)
  833. return def;
  834. int buf_size = 1 + ::WideCharToMultiByte(CP_ACP, 0, wbuf, r, NULL, 0, NULL, NULL);
  835. LPSTR buf = (LPSTR)_alloca(buf_size);
  836. if (::WideCharToMultiByte(CP_ACP, 0, wbuf, r, buf, buf_size, NULL, NULL) == 0)
  837. return def; // failed conversion.
  838. #endif
  839. return std::string(buf);
  840. }
  841. #ifndef BOOST_NO_WREGEX
  842. inline std::wstring w32_cat_get(const cat_type& cat, lcid_type, int i, const std::wstring& def)
  843. {
  844. wchar_t buf[256];
  845. if (0 == ::LoadStringW(
  846. static_cast<HMODULE>(cat.get()),
  847. i,
  848. buf,
  849. 256
  850. ))
  851. {
  852. return def;
  853. }
  854. return std::wstring(buf);
  855. }
  856. #endif
  857. inline std::string w32_transform(lcid_type idx, const char* p1, const char* p2)
  858. {
  859. #ifndef BOOST_NO_ANSI_APIS
  860. int bytes = ::LCMapStringA(
  861. idx, // locale identifier
  862. LCMAP_SORTKEY, // mapping transformation type
  863. p1, // source string
  864. static_cast<int>(p2 - p1), // number of characters in source string
  865. 0, // destination buffer
  866. 0 // size of destination buffer
  867. );
  868. if (!bytes)
  869. return std::string(p1, p2);
  870. std::string result(++bytes, '\0');
  871. bytes = ::LCMapStringA(
  872. idx, // locale identifier
  873. LCMAP_SORTKEY, // mapping transformation type
  874. p1, // source string
  875. static_cast<int>(p2 - p1), // number of characters in source string
  876. &*result.begin(), // destination buffer
  877. bytes // size of destination buffer
  878. );
  879. #else
  880. UINT code_page = get_code_page_for_locale_id(idx);
  881. if (code_page == 0)
  882. return std::string(p1, p2);
  883. int src_len = static_cast<int>(p2 - p1);
  884. LPWSTR wide_p1 = (LPWSTR)_alloca((src_len + 1) * 2);
  885. if (::MultiByteToWideChar(code_page, 0, p1, src_len, wide_p1, src_len + 1) == 0)
  886. return std::string(p1, p2);
  887. int bytes = ::LCMapStringW(
  888. idx, // locale identifier
  889. LCMAP_SORTKEY, // mapping transformation type
  890. wide_p1, // source string
  891. src_len, // number of characters in source string
  892. 0, // destination buffer
  893. 0 // size of destination buffer
  894. );
  895. if (!bytes)
  896. return std::string(p1, p2);
  897. std::string result(++bytes, '\0');
  898. bytes = ::LCMapStringW(
  899. idx, // locale identifier
  900. LCMAP_SORTKEY, // mapping transformation type
  901. wide_p1, // source string
  902. src_len, // number of characters in source string
  903. (LPWSTR) & *result.begin(), // destination buffer
  904. bytes // size of destination buffer
  905. );
  906. #endif
  907. if (bytes > static_cast<int>(result.size()))
  908. return std::string(p1, p2);
  909. while (result.size() && result[result.size() - 1] == '\0')
  910. {
  911. result.erase(result.size() - 1);
  912. }
  913. return result;
  914. }
  915. #ifndef BOOST_NO_WREGEX
  916. inline std::wstring w32_transform(lcid_type idx, const wchar_t* p1, const wchar_t* p2)
  917. {
  918. int bytes = ::LCMapStringW(
  919. idx, // locale identifier
  920. LCMAP_SORTKEY, // mapping transformation type
  921. p1, // source string
  922. static_cast<int>(p2 - p1), // number of characters in source string
  923. 0, // destination buffer
  924. 0 // size of destination buffer
  925. );
  926. if (!bytes)
  927. return std::wstring(p1, p2);
  928. std::string result(++bytes, '\0');
  929. bytes = ::LCMapStringW(
  930. idx, // locale identifier
  931. LCMAP_SORTKEY, // mapping transformation type
  932. p1, // source string
  933. static_cast<int>(p2 - p1), // number of characters in source string
  934. reinterpret_cast<wchar_t*>(&*result.begin()), // destination buffer *of bytes*
  935. bytes // size of destination buffer
  936. );
  937. if (bytes > static_cast<int>(result.size()))
  938. return std::wstring(p1, p2);
  939. while (result.size() && result[result.size() - 1] == L'\0')
  940. {
  941. result.erase(result.size() - 1);
  942. }
  943. std::wstring r2;
  944. for (std::string::size_type i = 0; i < result.size(); ++i)
  945. r2.append(1, static_cast<wchar_t>(static_cast<unsigned char>(result[i])));
  946. return r2;
  947. }
  948. #endif
  949. inline char w32_tolower(char c, lcid_type idx)
  950. {
  951. char result[2];
  952. #ifndef BOOST_NO_ANSI_APIS
  953. int b = ::LCMapStringA(
  954. idx, // locale identifier
  955. LCMAP_LOWERCASE, // mapping transformation type
  956. &c, // source string
  957. 1, // number of characters in source string
  958. result, // destination buffer
  959. 1); // size of destination buffer
  960. if (b == 0)
  961. return c;
  962. #else
  963. UINT code_page = get_code_page_for_locale_id(idx);
  964. if (code_page == 0)
  965. return c;
  966. WCHAR wide_c;
  967. if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
  968. return c;
  969. WCHAR wide_result;
  970. int b = ::LCMapStringW(
  971. idx, // locale identifier
  972. LCMAP_LOWERCASE, // mapping transformation type
  973. &wide_c, // source string
  974. 1, // number of characters in source string
  975. &wide_result, // destination buffer
  976. 1); // size of destination buffer
  977. if (b == 0)
  978. return c;
  979. if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0)
  980. return c; // No single byte lower case equivalent available
  981. #endif
  982. return result[0];
  983. }
  984. #ifndef BOOST_NO_WREGEX
  985. inline wchar_t w32_tolower(wchar_t c, lcid_type idx)
  986. {
  987. wchar_t result[2];
  988. int b = ::LCMapStringW(
  989. idx, // locale identifier
  990. LCMAP_LOWERCASE, // mapping transformation type
  991. &c, // source string
  992. 1, // number of characters in source string
  993. result, // destination buffer
  994. 1); // size of destination buffer
  995. if (b == 0)
  996. return c;
  997. return result[0];
  998. }
  999. #endif
  1000. inline char w32_toupper(char c, lcid_type idx)
  1001. {
  1002. char result[2];
  1003. #ifndef BOOST_NO_ANSI_APIS
  1004. int b = ::LCMapStringA(
  1005. idx, // locale identifier
  1006. LCMAP_UPPERCASE, // mapping transformation type
  1007. &c, // source string
  1008. 1, // number of characters in source string
  1009. result, // destination buffer
  1010. 1); // size of destination buffer
  1011. if (b == 0)
  1012. return c;
  1013. #else
  1014. UINT code_page = get_code_page_for_locale_id(idx);
  1015. if (code_page == 0)
  1016. return c;
  1017. WCHAR wide_c;
  1018. if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
  1019. return c;
  1020. WCHAR wide_result;
  1021. int b = ::LCMapStringW(
  1022. idx, // locale identifier
  1023. LCMAP_UPPERCASE, // mapping transformation type
  1024. &wide_c, // source string
  1025. 1, // number of characters in source string
  1026. &wide_result, // destination buffer
  1027. 1); // size of destination buffer
  1028. if (b == 0)
  1029. return c;
  1030. if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0)
  1031. return c; // No single byte upper case equivalent available.
  1032. #endif
  1033. return result[0];
  1034. }
  1035. #ifndef BOOST_NO_WREGEX
  1036. inline wchar_t w32_toupper(wchar_t c, lcid_type idx)
  1037. {
  1038. wchar_t result[2];
  1039. int b = ::LCMapStringW(
  1040. idx, // locale identifier
  1041. LCMAP_UPPERCASE, // mapping transformation type
  1042. &c, // source string
  1043. 1, // number of characters in source string
  1044. result, // destination buffer
  1045. 1); // size of destination buffer
  1046. if (b == 0)
  1047. return c;
  1048. return result[0];
  1049. }
  1050. #endif
  1051. inline bool w32_is(lcid_type idx, std::uint32_t m, char c)
  1052. {
  1053. WORD mask;
  1054. #ifndef BOOST_NO_ANSI_APIS
  1055. if (::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation<char>::mask_base))
  1056. return true;
  1057. #else
  1058. UINT code_page = get_code_page_for_locale_id(idx);
  1059. if (code_page == 0)
  1060. return false;
  1061. WCHAR wide_c;
  1062. if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
  1063. return false;
  1064. if (::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & m & w32_regex_traits_implementation<char>::mask_base))
  1065. return true;
  1066. #endif
  1067. if ((m & w32_regex_traits_implementation<char>::mask_word) && (c == '_'))
  1068. return true;
  1069. return false;
  1070. }
  1071. #ifndef BOOST_NO_WREGEX
  1072. inline bool w32_is(lcid_type idx, std::uint32_t m, wchar_t c)
  1073. {
  1074. WORD mask;
  1075. if (::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation<wchar_t>::mask_base))
  1076. return true;
  1077. if ((m & w32_regex_traits_implementation<wchar_t>::mask_word) && (c == '_'))
  1078. return true;
  1079. if ((m & w32_regex_traits_implementation<wchar_t>::mask_unicode) && (c > 0xff))
  1080. return true;
  1081. return false;
  1082. }
  1083. #endif
  1084. } // BOOST_REGEX_DETAIL_NS
  1085. } // boost
  1086. #ifdef BOOST_REGEX_MSVC
  1087. #pragma warning(pop)
  1088. #endif
  1089. #endif // BOOST_REGEX_NO_WIN32_LOCALE
  1090. #endif