cpp_regex_traits.hpp 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. /*
  2. *
  3. * Copyright (c) 2004 John Maddock
  4. * Copyright 2011 Garmin Ltd. or its subsidiaries
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE cpp_regex_traits.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares regular expression traits class cpp_regex_traits.
  16. */
  17. #ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED
  18. #define BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED
  19. #include <boost/regex/config.hpp>
  20. #include <cstdint>
  21. #include <locale>
  22. #include <type_traits>
  23. #include <boost/regex/pattern_except.hpp>
  24. #include <boost/regex/v5/regex_traits_defaults.hpp>
  25. #ifdef BOOST_HAS_THREADS
  26. #include <mutex>
  27. #endif
  28. #include <boost/regex/v5/primary_transform.hpp>
  29. #include <boost/regex/v5/object_cache.hpp>
  30. #include <climits>
  31. #include <ios>
  32. #include <istream>
  33. #ifdef BOOST_REGEX_MSVC
  34. #pragma warning(push)
  35. #pragma warning(disable:4786 4251)
  36. #endif
  37. namespace boost{
  38. //
  39. // forward declaration is needed by some compilers:
  40. //
  41. template <class charT>
  42. class cpp_regex_traits;
  43. namespace BOOST_REGEX_DETAIL_NS{
  44. //
  45. // class parser_buf:
  46. // acts as a stream buffer which wraps around a pair of pointers:
  47. //
  48. template <class charT,
  49. class traits = ::std::char_traits<charT> >
  50. class parser_buf : public ::std::basic_streambuf<charT, traits>
  51. {
  52. typedef ::std::basic_streambuf<charT, traits> base_type;
  53. typedef typename base_type::int_type int_type;
  54. typedef typename base_type::char_type char_type;
  55. typedef typename base_type::pos_type pos_type;
  56. typedef ::std::streamsize streamsize;
  57. typedef typename base_type::off_type off_type;
  58. public:
  59. parser_buf() : base_type() { setbuf(0, 0); }
  60. const charT* getnext() { return this->gptr(); }
  61. protected:
  62. std::basic_streambuf<charT, traits>* setbuf(char_type* s, streamsize n) override;
  63. typename parser_buf<charT, traits>::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which) override;
  64. typename parser_buf<charT, traits>::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which) override;
  65. private:
  66. parser_buf& operator=(const parser_buf&);
  67. parser_buf(const parser_buf&);
  68. };
  69. template<class charT, class traits>
  70. std::basic_streambuf<charT, traits>*
  71. parser_buf<charT, traits>::setbuf(char_type* s, streamsize n)
  72. {
  73. this->setg(s, s, s + n);
  74. return this;
  75. }
  76. template<class charT, class traits>
  77. typename parser_buf<charT, traits>::pos_type
  78. parser_buf<charT, traits>::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which)
  79. {
  80. if(which & ::std::ios_base::out)
  81. return pos_type(off_type(-1));
  82. std::ptrdiff_t size = this->egptr() - this->eback();
  83. std::ptrdiff_t pos = this->gptr() - this->eback();
  84. charT* g = this->eback();
  85. switch(static_cast<std::intmax_t>(way))
  86. {
  87. case ::std::ios_base::beg:
  88. if((off < 0) || (off > size))
  89. return pos_type(off_type(-1));
  90. else
  91. this->setg(g, g + off, g + size);
  92. break;
  93. case ::std::ios_base::end:
  94. if((off < 0) || (off > size))
  95. return pos_type(off_type(-1));
  96. else
  97. this->setg(g, g + size - off, g + size);
  98. break;
  99. case ::std::ios_base::cur:
  100. {
  101. std::ptrdiff_t newpos = static_cast<std::ptrdiff_t>(pos + off);
  102. if((newpos < 0) || (newpos > size))
  103. return pos_type(off_type(-1));
  104. else
  105. this->setg(g, g + newpos, g + size);
  106. break;
  107. }
  108. default: ;
  109. }
  110. #ifdef BOOST_REGEX_MSVC
  111. #pragma warning(push)
  112. #pragma warning(disable:4244)
  113. #endif
  114. return static_cast<pos_type>(this->gptr() - this->eback());
  115. #ifdef BOOST_REGEX_MSVC
  116. #pragma warning(pop)
  117. #endif
  118. }
  119. template<class charT, class traits>
  120. typename parser_buf<charT, traits>::pos_type
  121. parser_buf<charT, traits>::seekpos(pos_type sp, ::std::ios_base::openmode which)
  122. {
  123. if(which & ::std::ios_base::out)
  124. return pos_type(off_type(-1));
  125. off_type size = static_cast<off_type>(this->egptr() - this->eback());
  126. charT* g = this->eback();
  127. if(off_type(sp) <= size)
  128. {
  129. this->setg(g, g + off_type(sp), g + size);
  130. }
  131. return pos_type(off_type(-1));
  132. }
  133. //
  134. // class cpp_regex_traits_base:
  135. // acts as a container for locale and the facets we are using.
  136. //
  137. template <class charT>
  138. struct cpp_regex_traits_base
  139. {
  140. cpp_regex_traits_base(const std::locale& l)
  141. { (void)imbue(l); }
  142. std::locale imbue(const std::locale& l);
  143. std::locale m_locale;
  144. std::ctype<charT> const* m_pctype;
  145. std::messages<charT> const* m_pmessages;
  146. std::collate<charT> const* m_pcollate;
  147. bool operator<(const cpp_regex_traits_base& b)const
  148. {
  149. if(m_pctype == b.m_pctype)
  150. {
  151. if(m_pmessages == b.m_pmessages)
  152. {
  153. return m_pcollate < b.m_pcollate;
  154. }
  155. return m_pmessages < b.m_pmessages;
  156. }
  157. return m_pctype < b.m_pctype;
  158. }
  159. bool operator==(const cpp_regex_traits_base& b)const
  160. {
  161. return (m_pctype == b.m_pctype)
  162. && (m_pmessages == b.m_pmessages)
  163. && (m_pcollate == b.m_pcollate);
  164. }
  165. };
  166. template <class charT>
  167. std::locale cpp_regex_traits_base<charT>::imbue(const std::locale& l)
  168. {
  169. std::locale result(m_locale);
  170. m_locale = l;
  171. m_pctype = &std::use_facet<std::ctype<charT>>(l);
  172. m_pmessages = std::has_facet<std::messages<charT> >(l) ? &std::use_facet<std::messages<charT> >(l) : 0;
  173. m_pcollate = &std::use_facet<std::collate<charT> >(l);
  174. return result;
  175. }
  176. //
  177. // class cpp_regex_traits_char_layer:
  178. // implements methods that require specialization for narrow characters:
  179. //
  180. template <class charT>
  181. class cpp_regex_traits_char_layer : public cpp_regex_traits_base<charT>
  182. {
  183. typedef std::basic_string<charT> string_type;
  184. typedef std::map<charT, regex_constants::syntax_type> map_type;
  185. typedef typename map_type::const_iterator map_iterator_type;
  186. public:
  187. cpp_regex_traits_char_layer(const std::locale& l)
  188. : cpp_regex_traits_base<charT>(l)
  189. {
  190. init();
  191. }
  192. cpp_regex_traits_char_layer(const cpp_regex_traits_base<charT>& b)
  193. : cpp_regex_traits_base<charT>(b)
  194. {
  195. init();
  196. }
  197. void init();
  198. regex_constants::syntax_type syntax_type(charT c)const
  199. {
  200. map_iterator_type i = m_char_map.find(c);
  201. return ((i == m_char_map.end()) ? 0 : i->second);
  202. }
  203. regex_constants::escape_syntax_type escape_syntax_type(charT c) const
  204. {
  205. map_iterator_type i = m_char_map.find(c);
  206. if(i == m_char_map.end())
  207. {
  208. if(this->m_pctype->is(std::ctype_base::lower, c)) return regex_constants::escape_type_class;
  209. if(this->m_pctype->is(std::ctype_base::upper, c)) return regex_constants::escape_type_not_class;
  210. return 0;
  211. }
  212. return i->second;
  213. }
  214. private:
  215. string_type get_default_message(regex_constants::syntax_type);
  216. // TODO: use a hash table when available!
  217. map_type m_char_map;
  218. };
  219. template <class charT>
  220. void cpp_regex_traits_char_layer<charT>::init()
  221. {
  222. // we need to start by initialising our syntax map so we know which
  223. // character is used for which purpose:
  224. #ifndef __IBMCPP__
  225. typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
  226. #else
  227. typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
  228. #endif
  229. std::string cat_name(cpp_regex_traits<charT>::get_catalog_name());
  230. if((!cat_name.empty()) && (this->m_pmessages != 0))
  231. {
  232. cat = this->m_pmessages->open(
  233. cat_name,
  234. this->m_locale);
  235. if((int)cat < 0)
  236. {
  237. std::string m("Unable to open message catalog: ");
  238. std::runtime_error err(m + cat_name);
  239. boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
  240. }
  241. }
  242. //
  243. // if we have a valid catalog then load our messages:
  244. //
  245. if((int)cat >= 0)
  246. {
  247. #ifndef BOOST_NO_EXCEPTIONS
  248. try{
  249. #endif
  250. for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  251. {
  252. string_type mss = this->m_pmessages->get(cat, 0, i, get_default_message(i));
  253. for(typename string_type::size_type j = 0; j < mss.size(); ++j)
  254. {
  255. m_char_map[mss[j]] = i;
  256. }
  257. }
  258. this->m_pmessages->close(cat);
  259. #ifndef BOOST_NO_EXCEPTIONS
  260. }
  261. catch(...)
  262. {
  263. if(this->m_pmessages)
  264. this->m_pmessages->close(cat);
  265. throw;
  266. }
  267. #endif
  268. }
  269. else
  270. {
  271. for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  272. {
  273. const char* ptr = get_default_syntax(i);
  274. while(ptr && *ptr)
  275. {
  276. m_char_map[this->m_pctype->widen(*ptr)] = i;
  277. ++ptr;
  278. }
  279. }
  280. }
  281. }
  282. template <class charT>
  283. typename cpp_regex_traits_char_layer<charT>::string_type
  284. cpp_regex_traits_char_layer<charT>::get_default_message(regex_constants::syntax_type i)
  285. {
  286. const char* ptr = get_default_syntax(i);
  287. string_type result;
  288. while(ptr && *ptr)
  289. {
  290. result.append(1, this->m_pctype->widen(*ptr));
  291. ++ptr;
  292. }
  293. return result;
  294. }
  295. //
  296. // specialized version for narrow characters:
  297. //
  298. template <>
  299. class cpp_regex_traits_char_layer<char> : public cpp_regex_traits_base<char>
  300. {
  301. typedef std::string string_type;
  302. public:
  303. cpp_regex_traits_char_layer(const std::locale& l)
  304. : cpp_regex_traits_base<char>(l)
  305. {
  306. init();
  307. }
  308. cpp_regex_traits_char_layer(const cpp_regex_traits_base<char>& l)
  309. : cpp_regex_traits_base<char>(l)
  310. {
  311. init();
  312. }
  313. regex_constants::syntax_type syntax_type(char c)const
  314. {
  315. return m_char_map[static_cast<unsigned char>(c)];
  316. }
  317. regex_constants::escape_syntax_type escape_syntax_type(char c) const
  318. {
  319. return m_char_map[static_cast<unsigned char>(c)];
  320. }
  321. private:
  322. regex_constants::syntax_type m_char_map[1u << CHAR_BIT];
  323. void init();
  324. };
  325. //
  326. // class cpp_regex_traits_implementation:
  327. // provides pimpl implementation for cpp_regex_traits.
  328. //
  329. template <class charT>
  330. class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer<charT>
  331. {
  332. public:
  333. typedef typename cpp_regex_traits<charT>::char_class_type char_class_type;
  334. typedef typename std::ctype<charT>::mask native_mask_type;
  335. typedef typename std::make_unsigned<native_mask_type>::type unsigned_native_mask_type;
  336. static const char_class_type mask_blank = 1u << 24;
  337. static const char_class_type mask_word = 1u << 25;
  338. static const char_class_type mask_unicode = 1u << 26;
  339. static const char_class_type mask_horizontal = 1u << 27;
  340. static const char_class_type mask_vertical = 1u << 28;
  341. typedef std::basic_string<charT> string_type;
  342. typedef charT char_type;
  343. //cpp_regex_traits_implementation();
  344. cpp_regex_traits_implementation(const std::locale& l)
  345. : cpp_regex_traits_char_layer<charT>(l)
  346. {
  347. init();
  348. }
  349. cpp_regex_traits_implementation(const cpp_regex_traits_base<charT>& l)
  350. : cpp_regex_traits_char_layer<charT>(l)
  351. {
  352. init();
  353. }
  354. std::string error_string(regex_constants::error_type n) const
  355. {
  356. if(!m_error_strings.empty())
  357. {
  358. std::map<int, std::string>::const_iterator p = m_error_strings.find(n);
  359. return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second;
  360. }
  361. return get_default_error_string(n);
  362. }
  363. char_class_type lookup_classname(const charT* p1, const charT* p2) const
  364. {
  365. char_class_type result = lookup_classname_imp(p1, p2);
  366. if(result == 0)
  367. {
  368. string_type temp(p1, p2);
  369. this->m_pctype->tolower(&*temp.begin(), &*temp.begin() + temp.size());
  370. result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size());
  371. }
  372. return result;
  373. }
  374. string_type lookup_collatename(const charT* p1, const charT* p2) const;
  375. string_type transform_primary(const charT* p1, const charT* p2) const;
  376. string_type transform(const charT* p1, const charT* p2) const;
  377. private:
  378. std::map<int, std::string> m_error_strings; // error messages indexed by numberic ID
  379. std::map<string_type, char_class_type> m_custom_class_names; // character class names
  380. std::map<string_type, string_type> m_custom_collate_names; // collating element names
  381. unsigned m_collate_type; // the form of the collation string
  382. charT m_collate_delim; // the collation group delimiter
  383. //
  384. // helpers:
  385. //
  386. char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
  387. void init();
  388. };
  389. template <class charT>
  390. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_blank;
  391. template <class charT>
  392. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_word;
  393. template <class charT>
  394. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_unicode;
  395. template <class charT>
  396. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_vertical;
  397. template <class charT>
  398. typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_horizontal;
  399. template <class charT>
  400. typename cpp_regex_traits_implementation<charT>::string_type
  401. cpp_regex_traits_implementation<charT>::transform_primary(const charT* p1, const charT* p2) const
  402. {
  403. //
  404. // PRECONDITIONS:
  405. //
  406. // A bug in gcc 3.2 (and maybe other versions as well) treats
  407. // p1 as a null terminated string, for efficiency reasons
  408. // we work around this elsewhere, but just assert here that
  409. // we adhere to gcc's (buggy) preconditions...
  410. //
  411. BOOST_REGEX_ASSERT(*p2 == 0);
  412. string_type result;
  413. #if defined(_CPPLIB_VER)
  414. //
  415. // A bug in VC11 and 12 causes the program to hang if we pass a null-string
  416. // to std::collate::transform, but only for certain locales :-(
  417. // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
  418. //
  419. if(*p1 == 0)
  420. {
  421. return string_type(1, charT(0));
  422. }
  423. #endif
  424. //
  425. // swallowing all exceptions here is a bad idea
  426. // however at least one std lib will always throw
  427. // std::bad_alloc for certain arguments...
  428. //
  429. #ifndef BOOST_NO_EXCEPTIONS
  430. try{
  431. #endif
  432. //
  433. // What we do here depends upon the format of the sort key returned by
  434. // sort key returned by this->transform:
  435. //
  436. switch(m_collate_type)
  437. {
  438. case sort_C:
  439. case sort_unknown:
  440. // the best we can do is translate to lower case, then get a regular sort key:
  441. {
  442. result.assign(p1, p2);
  443. this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size());
  444. result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size());
  445. break;
  446. }
  447. case sort_fixed:
  448. {
  449. // get a regular sort key, and then truncate it:
  450. result.assign(this->m_pcollate->transform(p1, p2));
  451. result.erase(this->m_collate_delim);
  452. break;
  453. }
  454. case sort_delim:
  455. // get a regular sort key, and then truncate everything after the delim:
  456. result.assign(this->m_pcollate->transform(p1, p2));
  457. std::size_t i;
  458. for(i = 0; i < result.size(); ++i)
  459. {
  460. if(result[i] == m_collate_delim)
  461. break;
  462. }
  463. result.erase(i);
  464. break;
  465. }
  466. #ifndef BOOST_NO_EXCEPTIONS
  467. }catch(...){}
  468. #endif
  469. while((!result.empty()) && (charT(0) == *result.rbegin()))
  470. result.erase(result.size() - 1);
  471. if(result.empty())
  472. {
  473. // character is ignorable at the primary level:
  474. result = string_type(1, charT(0));
  475. }
  476. return result;
  477. }
  478. template <class charT>
  479. typename cpp_regex_traits_implementation<charT>::string_type
  480. cpp_regex_traits_implementation<charT>::transform(const charT* p1, const charT* p2) const
  481. {
  482. //
  483. // PRECONDITIONS:
  484. //
  485. // A bug in gcc 3.2 (and maybe other versions as well) treats
  486. // p1 as a null terminated string, for efficiency reasons
  487. // we work around this elsewhere, but just assert here that
  488. // we adhere to gcc's (buggy) preconditions...
  489. //
  490. BOOST_REGEX_ASSERT(*p2 == 0);
  491. //
  492. // swallowing all exceptions here is a bad idea
  493. // however at least one std lib will always throw
  494. // std::bad_alloc for certain arguments...
  495. //
  496. string_type result, result2;
  497. #if defined(_CPPLIB_VER)
  498. //
  499. // A bug in VC11 and 12 causes the program to hang if we pass a null-string
  500. // to std::collate::transform, but only for certain locales :-(
  501. // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
  502. //
  503. if(*p1 == 0)
  504. {
  505. return result;
  506. }
  507. #endif
  508. #ifndef BOOST_NO_EXCEPTIONS
  509. try{
  510. #endif
  511. result = this->m_pcollate->transform(p1, p2);
  512. //
  513. // some implementations (Dinkumware) append unnecessary trailing \0's:
  514. while((!result.empty()) && (charT(0) == *result.rbegin()))
  515. result.erase(result.size() - 1);
  516. //
  517. // We may have NULL's used as separators between sections of the collate string,
  518. // an example would be Boost.Locale. We have no way to detect this case via
  519. // #defines since this can be used with any compiler/platform combination.
  520. // Unfortunately our state machine (which was devised when all implementations
  521. // used underlying C language API's) can't cope with that case. One workaround
  522. // is to replace each character with 2, fortunately this code isn't used that
  523. // much as this is now slower than before :-(
  524. //
  525. typedef typename std::make_unsigned<charT>::type uchar_type;
  526. result2.reserve(result.size() * 2 + 2);
  527. for(unsigned i = 0; i < result.size(); ++i)
  528. {
  529. if(static_cast<uchar_type>(result[i]) == (std::numeric_limits<uchar_type>::max)())
  530. {
  531. result2.append(1, charT((std::numeric_limits<uchar_type>::max)())).append(1, charT('b'));
  532. }
  533. else
  534. {
  535. result2.append(1, static_cast<charT>(1 + static_cast<uchar_type>(result[i]))).append(1, charT('b') - 1);
  536. }
  537. }
  538. BOOST_REGEX_ASSERT(std::find(result2.begin(), result2.end(), charT(0)) == result2.end());
  539. #ifndef BOOST_NO_EXCEPTIONS
  540. }
  541. catch(...)
  542. {
  543. }
  544. #endif
  545. return result2;
  546. }
  547. template <class charT>
  548. typename cpp_regex_traits_implementation<charT>::string_type
  549. cpp_regex_traits_implementation<charT>::lookup_collatename(const charT* p1, const charT* p2) const
  550. {
  551. typedef typename std::map<string_type, string_type>::const_iterator iter_type;
  552. if(!m_custom_collate_names.empty())
  553. {
  554. iter_type pos = m_custom_collate_names.find(string_type(p1, p2));
  555. if(pos != m_custom_collate_names.end())
  556. return pos->second;
  557. }
  558. std::string name(p1, p2);
  559. name = lookup_default_collate_name(name);
  560. if(!name.empty())
  561. return string_type(name.begin(), name.end());
  562. if(p2 - p1 == 1)
  563. return string_type(1, *p1);
  564. return string_type();
  565. }
  566. template <class charT>
  567. void cpp_regex_traits_implementation<charT>::init()
  568. {
  569. #ifndef __IBMCPP__
  570. typename std::messages<charT>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
  571. #else
  572. typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
  573. #endif
  574. std::string cat_name(cpp_regex_traits<charT>::get_catalog_name());
  575. if((!cat_name.empty()) && (this->m_pmessages != 0))
  576. {
  577. cat = this->m_pmessages->open(
  578. cat_name,
  579. this->m_locale);
  580. if((int)cat < 0)
  581. {
  582. std::string m("Unable to open message catalog: ");
  583. std::runtime_error err(m + cat_name);
  584. boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
  585. }
  586. }
  587. //
  588. // if we have a valid catalog then load our messages:
  589. //
  590. if((int)cat >= 0)
  591. {
  592. //
  593. // Error messages:
  594. //
  595. for(boost::regex_constants::error_type i = static_cast<boost::regex_constants::error_type>(0);
  596. i <= boost::regex_constants::error_unknown;
  597. i = static_cast<boost::regex_constants::error_type>(i + 1))
  598. {
  599. const char* p = get_default_error_string(i);
  600. string_type default_message;
  601. while(*p)
  602. {
  603. default_message.append(1, this->m_pctype->widen(*p));
  604. ++p;
  605. }
  606. string_type s = this->m_pmessages->get(cat, 0, i+200, default_message);
  607. std::string result;
  608. for(std::string::size_type j = 0; j < s.size(); ++j)
  609. {
  610. result.append(1, this->m_pctype->narrow(s[j], 0));
  611. }
  612. m_error_strings[i] = result;
  613. }
  614. //
  615. // Custom class names:
  616. //
  617. static const char_class_type masks[16] =
  618. {
  619. static_cast<unsigned_native_mask_type>(std::ctype<charT>::alnum),
  620. static_cast<unsigned_native_mask_type>(std::ctype<charT>::alpha),
  621. static_cast<unsigned_native_mask_type>(std::ctype<charT>::cntrl),
  622. static_cast<unsigned_native_mask_type>(std::ctype<charT>::digit),
  623. static_cast<unsigned_native_mask_type>(std::ctype<charT>::graph),
  624. cpp_regex_traits_implementation<charT>::mask_horizontal,
  625. static_cast<unsigned_native_mask_type>(std::ctype<charT>::lower),
  626. static_cast<unsigned_native_mask_type>(std::ctype<charT>::print),
  627. static_cast<unsigned_native_mask_type>(std::ctype<charT>::punct),
  628. static_cast<unsigned_native_mask_type>(std::ctype<charT>::space),
  629. static_cast<unsigned_native_mask_type>(std::ctype<charT>::upper),
  630. cpp_regex_traits_implementation<charT>::mask_vertical,
  631. static_cast<unsigned_native_mask_type>(std::ctype<charT>::xdigit),
  632. cpp_regex_traits_implementation<charT>::mask_blank,
  633. cpp_regex_traits_implementation<charT>::mask_word,
  634. cpp_regex_traits_implementation<charT>::mask_unicode,
  635. };
  636. static const string_type null_string;
  637. for(unsigned int j = 0; j <= 13; ++j)
  638. {
  639. string_type s(this->m_pmessages->get(cat, 0, j+300, null_string));
  640. if(!s.empty())
  641. this->m_custom_class_names[s] = masks[j];
  642. }
  643. }
  644. //
  645. // get the collation format used by m_pcollate:
  646. //
  647. m_collate_type = BOOST_REGEX_DETAIL_NS::find_sort_syntax(this, &m_collate_delim);
  648. }
  649. template <class charT>
  650. typename cpp_regex_traits_implementation<charT>::char_class_type
  651. cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
  652. {
  653. static const char_class_type masks[22] =
  654. {
  655. 0,
  656. static_cast<unsigned_native_mask_type>(std::ctype<char>::alnum),
  657. static_cast<unsigned_native_mask_type>(std::ctype<char>::alpha),
  658. cpp_regex_traits_implementation<charT>::mask_blank,
  659. static_cast<unsigned_native_mask_type>(std::ctype<char>::cntrl),
  660. static_cast<unsigned_native_mask_type>(std::ctype<char>::digit),
  661. static_cast<unsigned_native_mask_type>(std::ctype<char>::digit),
  662. static_cast<unsigned_native_mask_type>(std::ctype<char>::graph),
  663. cpp_regex_traits_implementation<charT>::mask_horizontal,
  664. static_cast<unsigned_native_mask_type>(std::ctype<char>::lower),
  665. static_cast<unsigned_native_mask_type>(std::ctype<char>::lower),
  666. static_cast<unsigned_native_mask_type>(std::ctype<char>::print),
  667. static_cast<unsigned_native_mask_type>(std::ctype<char>::punct),
  668. static_cast<unsigned_native_mask_type>(std::ctype<char>::space),
  669. static_cast<unsigned_native_mask_type>(std::ctype<char>::space),
  670. static_cast<unsigned_native_mask_type>(std::ctype<char>::upper),
  671. cpp_regex_traits_implementation<charT>::mask_unicode,
  672. static_cast<unsigned_native_mask_type>(std::ctype<char>::upper),
  673. cpp_regex_traits_implementation<charT>::mask_vertical,
  674. static_cast<unsigned_native_mask_type>(std::ctype<char>::alnum) | cpp_regex_traits_implementation<charT>::mask_word,
  675. static_cast<unsigned_native_mask_type>(std::ctype<char>::alnum) | cpp_regex_traits_implementation<charT>::mask_word,
  676. static_cast<unsigned_native_mask_type>(std::ctype<char>::xdigit),
  677. };
  678. if(!m_custom_class_names.empty())
  679. {
  680. typedef typename std::map<std::basic_string<charT>, char_class_type>::const_iterator map_iter;
  681. map_iter pos = m_custom_class_names.find(string_type(p1, p2));
  682. if(pos != m_custom_class_names.end())
  683. return pos->second;
  684. }
  685. std::size_t state_id = 1 + BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
  686. BOOST_REGEX_ASSERT(state_id < sizeof(masks) / sizeof(masks[0]));
  687. return masks[state_id];
  688. }
  689. template <class charT>
  690. inline std::shared_ptr<const cpp_regex_traits_implementation<charT> > create_cpp_regex_traits(const std::locale& l)
  691. {
  692. cpp_regex_traits_base<charT> key(l);
  693. return ::boost::object_cache<cpp_regex_traits_base<charT>, cpp_regex_traits_implementation<charT> >::get(key, 5);
  694. }
  695. } // BOOST_REGEX_DETAIL_NS
  696. template <class charT>
  697. class cpp_regex_traits
  698. {
  699. private:
  700. typedef std::ctype<charT> ctype_type;
  701. public:
  702. typedef charT char_type;
  703. typedef std::size_t size_type;
  704. typedef std::basic_string<char_type> string_type;
  705. typedef std::locale locale_type;
  706. typedef std::uint_least32_t char_class_type;
  707. struct boost_extensions_tag{};
  708. cpp_regex_traits()
  709. : m_pimpl(BOOST_REGEX_DETAIL_NS::create_cpp_regex_traits<charT>(std::locale()))
  710. { }
  711. static size_type length(const char_type* p)
  712. {
  713. return std::char_traits<charT>::length(p);
  714. }
  715. regex_constants::syntax_type syntax_type(charT c)const
  716. {
  717. return m_pimpl->syntax_type(c);
  718. }
  719. regex_constants::escape_syntax_type escape_syntax_type(charT c) const
  720. {
  721. return m_pimpl->escape_syntax_type(c);
  722. }
  723. charT translate(charT c) const
  724. {
  725. return c;
  726. }
  727. charT translate_nocase(charT c) const
  728. {
  729. return m_pimpl->m_pctype->tolower(c);
  730. }
  731. charT translate(charT c, bool icase) const
  732. {
  733. return icase ? m_pimpl->m_pctype->tolower(c) : c;
  734. }
  735. charT tolower(charT c) const
  736. {
  737. return m_pimpl->m_pctype->tolower(c);
  738. }
  739. charT toupper(charT c) const
  740. {
  741. return m_pimpl->m_pctype->toupper(c);
  742. }
  743. string_type transform(const charT* p1, const charT* p2) const
  744. {
  745. return m_pimpl->transform(p1, p2);
  746. }
  747. string_type transform_primary(const charT* p1, const charT* p2) const
  748. {
  749. return m_pimpl->transform_primary(p1, p2);
  750. }
  751. char_class_type lookup_classname(const charT* p1, const charT* p2) const
  752. {
  753. return m_pimpl->lookup_classname(p1, p2);
  754. }
  755. string_type lookup_collatename(const charT* p1, const charT* p2) const
  756. {
  757. return m_pimpl->lookup_collatename(p1, p2);
  758. }
  759. bool isctype(charT c, char_class_type f) const
  760. {
  761. typedef typename std::ctype<charT>::mask ctype_mask;
  762. static const ctype_mask mask_base =
  763. static_cast<ctype_mask>(
  764. std::ctype<charT>::alnum
  765. | std::ctype<charT>::alpha
  766. | std::ctype<charT>::cntrl
  767. | std::ctype<charT>::digit
  768. | std::ctype<charT>::graph
  769. | std::ctype<charT>::lower
  770. | std::ctype<charT>::print
  771. | std::ctype<charT>::punct
  772. | std::ctype<charT>::space
  773. | std::ctype<charT>::upper
  774. | std::ctype<charT>::xdigit);
  775. if((f & mask_base)
  776. && (m_pimpl->m_pctype->is(
  777. static_cast<ctype_mask>(f & mask_base), c)))
  778. return true;
  779. else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_unicode) && BOOST_REGEX_DETAIL_NS::is_extended(c))
  780. return true;
  781. else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_word) && (c == '_'))
  782. return true;
  783. else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_blank)
  784. && m_pimpl->m_pctype->is(std::ctype<charT>::space, c)
  785. && !BOOST_REGEX_DETAIL_NS::is_separator(c))
  786. return true;
  787. else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_vertical)
  788. && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
  789. return true;
  790. else if((f & BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_horizontal)
  791. && this->isctype(c, std::ctype<charT>::space) && !this->isctype(c, BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT>::mask_vertical))
  792. return true;
  793. #ifdef __CYGWIN__
  794. //
  795. // Cygwin has a buggy ctype facet, see https://www.cygwin.com/ml/cygwin/2012-08/msg00178.html:
  796. //
  797. else if((f & std::ctype<charT>::xdigit) == std::ctype<charT>::xdigit)
  798. {
  799. if((c >= 'a') && (c <= 'f'))
  800. return true;
  801. if((c >= 'A') && (c <= 'F'))
  802. return true;
  803. }
  804. #endif
  805. return false;
  806. }
  807. std::intmax_t toi(const charT*& p1, const charT* p2, int radix)const;
  808. int value(charT c, int radix)const
  809. {
  810. const charT* pc = &c;
  811. return (int)toi(pc, pc + 1, radix);
  812. }
  813. locale_type imbue(locale_type l)
  814. {
  815. std::locale result(getloc());
  816. m_pimpl = BOOST_REGEX_DETAIL_NS::create_cpp_regex_traits<charT>(l);
  817. return result;
  818. }
  819. locale_type getloc()const
  820. {
  821. return m_pimpl->m_locale;
  822. }
  823. std::string error_string(regex_constants::error_type n) const
  824. {
  825. return m_pimpl->error_string(n);
  826. }
  827. //
  828. // extension:
  829. // set the name of the message catalog in use (defaults to "boost_regex").
  830. //
  831. static std::string catalog_name(const std::string& name);
  832. static std::string get_catalog_name();
  833. private:
  834. std::shared_ptr<const BOOST_REGEX_DETAIL_NS::cpp_regex_traits_implementation<charT> > m_pimpl;
  835. //
  836. // catalog name handler:
  837. //
  838. static std::string& get_catalog_name_inst();
  839. #ifdef BOOST_HAS_THREADS
  840. static std::mutex& get_mutex_inst();
  841. #endif
  842. };
  843. template <class charT>
  844. std::intmax_t cpp_regex_traits<charT>::toi(const charT*& first, const charT* last, int radix)const
  845. {
  846. BOOST_REGEX_DETAIL_NS::parser_buf<charT> sbuf; // buffer for parsing numbers.
  847. std::basic_istream<charT> is(&sbuf); // stream for parsing numbers.
  848. // we do NOT want to parse any thousands separators inside the stream:
  849. last = std::find(first, last, std::use_facet<std::numpunct<charT>>(is.getloc()).thousands_sep());
  850. sbuf.pubsetbuf(const_cast<charT*>(static_cast<const charT*>(first)), static_cast<std::streamsize>(last-first));
  851. is.clear();
  852. if(std::abs(radix) == 16) is >> std::hex;
  853. else if(std::abs(radix) == 8) is >> std::oct;
  854. else is >> std::dec;
  855. std::intmax_t val;
  856. if(is >> val)
  857. {
  858. first = first + ((last - first) - sbuf.in_avail());
  859. return val;
  860. }
  861. else
  862. return -1;
  863. }
  864. template <class charT>
  865. std::string cpp_regex_traits<charT>::catalog_name(const std::string& name)
  866. {
  867. #ifdef BOOST_HAS_THREADS
  868. std::lock_guard<std::mutex> lk(get_mutex_inst());
  869. #endif
  870. std::string result(get_catalog_name_inst());
  871. get_catalog_name_inst() = name;
  872. return result;
  873. }
  874. template <class charT>
  875. std::string& cpp_regex_traits<charT>::get_catalog_name_inst()
  876. {
  877. static std::string s_name;
  878. return s_name;
  879. }
  880. template <class charT>
  881. std::string cpp_regex_traits<charT>::get_catalog_name()
  882. {
  883. #ifdef BOOST_HAS_THREADS
  884. std::lock_guard<std::mutex> lk(get_mutex_inst());
  885. #endif
  886. std::string result(get_catalog_name_inst());
  887. return result;
  888. }
  889. #ifdef BOOST_HAS_THREADS
  890. template <class charT>
  891. std::mutex& cpp_regex_traits<charT>::get_mutex_inst()
  892. {
  893. static std::mutex s_mutex;
  894. return s_mutex;
  895. }
  896. #endif
  897. namespace BOOST_REGEX_DETAIL_NS {
  898. inline void cpp_regex_traits_char_layer<char>::init()
  899. {
  900. // we need to start by initialising our syntax map so we know which
  901. // character is used for which purpose:
  902. std::memset(m_char_map, 0, sizeof(m_char_map));
  903. #ifndef __IBMCPP__
  904. std::messages<char>::catalog cat = static_cast<std::messages<char>::catalog>(-1);
  905. #else
  906. std::messages<char>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1);
  907. #endif
  908. std::string cat_name(cpp_regex_traits<char>::get_catalog_name());
  909. if ((!cat_name.empty()) && (m_pmessages != 0))
  910. {
  911. cat = this->m_pmessages->open(
  912. cat_name,
  913. this->m_locale);
  914. if ((int)cat < 0)
  915. {
  916. std::string m("Unable to open message catalog: ");
  917. std::runtime_error err(m + cat_name);
  918. boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
  919. }
  920. }
  921. //
  922. // if we have a valid catalog then load our messages:
  923. //
  924. if ((int)cat >= 0)
  925. {
  926. #ifndef BOOST_NO_EXCEPTIONS
  927. try {
  928. #endif
  929. for (regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
  930. {
  931. string_type mss = this->m_pmessages->get(cat, 0, i, get_default_syntax(i));
  932. for (string_type::size_type j = 0; j < mss.size(); ++j)
  933. {
  934. m_char_map[static_cast<unsigned char>(mss[j])] = i;
  935. }
  936. }
  937. this->m_pmessages->close(cat);
  938. #ifndef BOOST_NO_EXCEPTIONS
  939. }
  940. catch (...)
  941. {
  942. this->m_pmessages->close(cat);
  943. throw;
  944. }
  945. #endif
  946. }
  947. else
  948. {
  949. for (regex_constants::syntax_type j = 1; j < regex_constants::syntax_max; ++j)
  950. {
  951. const char* ptr = get_default_syntax(j);
  952. while (ptr && *ptr)
  953. {
  954. m_char_map[static_cast<unsigned char>(*ptr)] = j;
  955. ++ptr;
  956. }
  957. }
  958. }
  959. //
  960. // finish off by calculating our escape types:
  961. //
  962. unsigned char i = 'A';
  963. do
  964. {
  965. if (m_char_map[i] == 0)
  966. {
  967. if (this->m_pctype->is(std::ctype_base::lower, i))
  968. m_char_map[i] = regex_constants::escape_type_class;
  969. else if (this->m_pctype->is(std::ctype_base::upper, i))
  970. m_char_map[i] = regex_constants::escape_type_not_class;
  971. }
  972. } while (0xFF != i++);
  973. }
  974. } // namespace detail
  975. } // boost
  976. #ifdef BOOST_REGEX_MSVC
  977. #pragma warning(pop)
  978. #endif
  979. #endif