icu.hpp 64 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE icu.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Unicode regular expressions on top of the ICU Library.
  16. */
  17. #ifndef BOOST_REGEX_ICU_V4_HPP
  18. #define BOOST_REGEX_ICU_V4_HPP
  19. #include <boost/config.hpp>
  20. #include <unicode/utypes.h>
  21. #include <unicode/uchar.h>
  22. #include <unicode/coll.h>
  23. #include <boost/regex.hpp>
  24. #include <boost/regex/v4/unicode_iterator.hpp>
  25. #include <boost/mpl/int_fwd.hpp>
  26. #include <boost/static_assert.hpp>
  27. #include <bitset>
  28. #ifdef BOOST_MSVC
  29. #pragma warning (push)
  30. #pragma warning (disable: 4251)
  31. #endif
  32. namespace boost {
  33. namespace BOOST_REGEX_DETAIL_NS {
  34. //
  35. // Implementation details:
  36. //
  37. class icu_regex_traits_implementation
  38. {
  39. typedef UChar32 char_type;
  40. typedef std::size_t size_type;
  41. typedef std::vector<char_type> string_type;
  42. typedef U_NAMESPACE_QUALIFIER Locale locale_type;
  43. typedef boost::uint_least32_t char_class_type;
  44. public:
  45. icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l)
  46. : m_locale(l)
  47. {
  48. UErrorCode success = U_ZERO_ERROR;
  49. m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
  50. if (U_SUCCESS(success) == 0)
  51. init_error();
  52. m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL);
  53. success = U_ZERO_ERROR;
  54. m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
  55. if (U_SUCCESS(success) == 0)
  56. init_error();
  57. m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY);
  58. }
  59. U_NAMESPACE_QUALIFIER Locale getloc()const
  60. {
  61. return m_locale;
  62. }
  63. string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const
  64. {
  65. // TODO make thread safe!!!! :
  66. typedef u32_to_u16_iterator<const char_type*, ::UChar> itt;
  67. itt i(p1), j(p2);
  68. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  69. std::vector< ::UChar> t(i, j);
  70. #else
  71. std::vector< ::UChar> t;
  72. while (i != j)
  73. t.push_back(*i++);
  74. #endif
  75. ::uint8_t result[100];
  76. ::int32_t len;
  77. if (!t.empty())
  78. len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), result, sizeof(result));
  79. else
  80. len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast< ::int32_t>(0), result, sizeof(result));
  81. if (std::size_t(len) > sizeof(result))
  82. {
  83. scoped_array< ::uint8_t> presult(new ::uint8_t[len + 1]);
  84. if (!t.empty())
  85. len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), presult.get(), len + 1);
  86. else
  87. len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast< ::int32_t>(0), presult.get(), len + 1);
  88. if ((0 == presult[len - 1]) && (len > 1))
  89. --len;
  90. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  91. return string_type(presult.get(), presult.get() + len);
  92. #else
  93. string_type sresult;
  94. ::uint8_t const* ia = presult.get();
  95. ::uint8_t const* ib = presult.get() + len;
  96. while (ia != ib)
  97. sresult.push_back(*ia++);
  98. return sresult;
  99. #endif
  100. }
  101. if ((0 == result[len - 1]) && (len > 1))
  102. --len;
  103. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  104. return string_type(result, result + len);
  105. #else
  106. string_type sresult;
  107. ::uint8_t const* ia = result;
  108. ::uint8_t const* ib = result + len;
  109. while (ia != ib)
  110. sresult.push_back(*ia++);
  111. return sresult;
  112. #endif
  113. }
  114. string_type transform(const char_type* p1, const char_type* p2) const
  115. {
  116. return do_transform(p1, p2, m_collator.get());
  117. }
  118. string_type transform_primary(const char_type* p1, const char_type* p2) const
  119. {
  120. return do_transform(p1, p2, m_primary_collator.get());
  121. }
  122. private:
  123. void init_error()
  124. {
  125. std::runtime_error e("Could not initialize ICU resources");
  126. boost::throw_exception(e);
  127. }
  128. U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using
  129. boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object
  130. boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object
  131. };
  132. inline boost::shared_ptr<icu_regex_traits_implementation> get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc)
  133. {
  134. return boost::shared_ptr<icu_regex_traits_implementation>(new icu_regex_traits_implementation(loc));
  135. }
  136. }
  137. class icu_regex_traits
  138. {
  139. public:
  140. typedef UChar32 char_type;
  141. typedef std::size_t size_type;
  142. typedef std::vector<char_type> string_type;
  143. typedef U_NAMESPACE_QUALIFIER Locale locale_type;
  144. #ifdef BOOST_NO_INT64_T
  145. typedef std::bitset<64> char_class_type;
  146. #else
  147. typedef boost::uint64_t char_class_type;
  148. #endif
  149. struct boost_extensions_tag {};
  150. icu_regex_traits()
  151. : m_pimpl(BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale()))
  152. {
  153. }
  154. static size_type length(const char_type* p)
  155. {
  156. size_type result = 0;
  157. while (*p)
  158. {
  159. ++p;
  160. ++result;
  161. }
  162. return result;
  163. }
  164. ::boost::regex_constants::syntax_type syntax_type(char_type c)const
  165. {
  166. return ((c < 0x7f) && (c > 0)) ? BOOST_REGEX_DETAIL_NS::get_default_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
  167. }
  168. ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const
  169. {
  170. return ((c < 0x7f) && (c > 0)) ? BOOST_REGEX_DETAIL_NS::get_default_escape_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
  171. }
  172. char_type translate(char_type c) const
  173. {
  174. return c;
  175. }
  176. char_type translate_nocase(char_type c) const
  177. {
  178. return ::u_tolower(c);
  179. }
  180. char_type translate(char_type c, bool icase) const
  181. {
  182. return icase ? translate_nocase(c) : translate(c);
  183. }
  184. char_type tolower(char_type c) const
  185. {
  186. return ::u_tolower(c);
  187. }
  188. char_type toupper(char_type c) const
  189. {
  190. return ::u_toupper(c);
  191. }
  192. string_type transform(const char_type* p1, const char_type* p2) const
  193. {
  194. return m_pimpl->transform(p1, p2);
  195. }
  196. string_type transform_primary(const char_type* p1, const char_type* p2) const
  197. {
  198. return m_pimpl->transform_primary(p1, p2);
  199. }
  200. char_class_type lookup_classname(const char_type* p1, const char_type* p2) const
  201. {
  202. static const char_class_type mask_blank = char_class_type(1) << offset_blank;
  203. static const char_class_type mask_space = char_class_type(1) << offset_space;
  204. static const char_class_type mask_xdigit = char_class_type(1) << offset_xdigit;
  205. static const char_class_type mask_underscore = char_class_type(1) << offset_underscore;
  206. static const char_class_type mask_unicode = char_class_type(1) << offset_unicode;
  207. static const char_class_type mask_any = char_class_type(1) << offset_any;
  208. static const char_class_type mask_ascii = char_class_type(1) << offset_ascii;
  209. static const char_class_type mask_horizontal = char_class_type(1) << offset_horizontal;
  210. static const char_class_type mask_vertical = char_class_type(1) << offset_vertical;
  211. static const char_class_type masks[] =
  212. {
  213. 0,
  214. U_GC_L_MASK | U_GC_ND_MASK,
  215. U_GC_L_MASK,
  216. mask_blank,
  217. U_GC_CC_MASK | U_GC_CF_MASK | U_GC_ZL_MASK | U_GC_ZP_MASK,
  218. U_GC_ND_MASK,
  219. U_GC_ND_MASK,
  220. (0x3FFFFFFFu) & ~(U_GC_CC_MASK | U_GC_CF_MASK | U_GC_CS_MASK | U_GC_CN_MASK | U_GC_Z_MASK),
  221. mask_horizontal,
  222. U_GC_LL_MASK,
  223. U_GC_LL_MASK,
  224. ~(U_GC_C_MASK),
  225. U_GC_P_MASK,
  226. char_class_type(U_GC_Z_MASK) | mask_space,
  227. char_class_type(U_GC_Z_MASK) | mask_space,
  228. U_GC_LU_MASK,
  229. mask_unicode,
  230. U_GC_LU_MASK,
  231. mask_vertical,
  232. char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore,
  233. char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore,
  234. char_class_type(U_GC_ND_MASK) | mask_xdigit,
  235. };
  236. int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
  237. if (idx >= 0)
  238. return masks[idx + 1];
  239. char_class_type result = lookup_icu_mask(p1, p2);
  240. if (result != 0)
  241. return result;
  242. if (idx < 0)
  243. {
  244. string_type s(p1, p2);
  245. string_type::size_type i = 0;
  246. while (i < s.size())
  247. {
  248. s[i] = static_cast<char>((::u_tolower)(s[i]));
  249. if (::u_isspace(s[i]) || (s[i] == '-') || (s[i] == '_'))
  250. s.erase(s.begin() + i, s.begin() + i + 1);
  251. else
  252. {
  253. s[i] = static_cast<char>((::u_tolower)(s[i]));
  254. ++i;
  255. }
  256. }
  257. if (!s.empty())
  258. idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
  259. if (idx >= 0)
  260. return masks[idx + 1];
  261. if (!s.empty())
  262. result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size());
  263. if (result != 0)
  264. return result;
  265. }
  266. BOOST_ASSERT(std::size_t(idx + 1) < sizeof(masks) / sizeof(masks[0]));
  267. return masks[idx + 1];
  268. }
  269. string_type lookup_collatename(const char_type* p1, const char_type* p2) const
  270. {
  271. string_type result;
  272. #ifdef BOOST_NO_CXX98_BINDERS
  273. if (std::find_if(p1, p2, std::bind(std::greater< ::UChar32>(), std::placeholders::_1, 0x7f)) == p2)
  274. #else
  275. if (std::find_if(p1, p2, std::bind2nd(std::greater< ::UChar32>(), 0x7f)) == p2)
  276. #endif
  277. {
  278. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  279. std::string s(p1, p2);
  280. #else
  281. std::string s;
  282. const char_type* p3 = p1;
  283. while (p3 != p2)
  284. s.append(1, *p3++);
  285. #endif
  286. // Try Unicode name:
  287. UErrorCode err = U_ZERO_ERROR;
  288. UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err);
  289. if (U_SUCCESS(err))
  290. {
  291. result.push_back(c);
  292. return result;
  293. }
  294. // Try Unicode-extended name:
  295. err = U_ZERO_ERROR;
  296. c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err);
  297. if (U_SUCCESS(err))
  298. {
  299. result.push_back(c);
  300. return result;
  301. }
  302. // try POSIX name:
  303. s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
  304. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  305. result.assign(s.begin(), s.end());
  306. #else
  307. result.clear();
  308. std::string::const_iterator si, sj;
  309. si = s.begin();
  310. sj = s.end();
  311. while (si != sj)
  312. result.push_back(*si++);
  313. #endif
  314. }
  315. if (result.empty() && (p2 - p1 == 1))
  316. result.push_back(*p1);
  317. return result;
  318. }
  319. bool isctype(char_type c, char_class_type f) const
  320. {
  321. static const char_class_type mask_blank = char_class_type(1) << offset_blank;
  322. static const char_class_type mask_space = char_class_type(1) << offset_space;
  323. static const char_class_type mask_xdigit = char_class_type(1) << offset_xdigit;
  324. static const char_class_type mask_underscore = char_class_type(1) << offset_underscore;
  325. static const char_class_type mask_unicode = char_class_type(1) << offset_unicode;
  326. static const char_class_type mask_any = char_class_type(1) << offset_any;
  327. static const char_class_type mask_ascii = char_class_type(1) << offset_ascii;
  328. static const char_class_type mask_horizontal = char_class_type(1) << offset_horizontal;
  329. static const char_class_type mask_vertical = char_class_type(1) << offset_vertical;
  330. // check for standard catagories first:
  331. char_class_type m = char_class_type(static_cast<char_class_type>(1) << u_charType(c));
  332. if ((m & f) != 0)
  333. return true;
  334. // now check for special cases:
  335. if (((f & mask_blank) != 0) && u_isblank(c))
  336. return true;
  337. if (((f & mask_space) != 0) && u_isspace(c))
  338. return true;
  339. if (((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0))
  340. return true;
  341. if (((f & mask_unicode) != 0) && (c >= 0x100))
  342. return true;
  343. if (((f & mask_underscore) != 0) && (c == '_'))
  344. return true;
  345. if (((f & mask_any) != 0) && (c <= 0x10FFFF))
  346. return true;
  347. if (((f & mask_ascii) != 0) && (c <= 0x7F))
  348. return true;
  349. if (((f & mask_vertical) != 0) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == static_cast<char_type>('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK)))
  350. return true;
  351. if (((f & mask_horizontal) != 0) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && u_isspace(c) && (c != static_cast<char_type>('\v')))
  352. return true;
  353. return false;
  354. }
  355. boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const
  356. {
  357. return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
  358. }
  359. int value(char_type c, int radix)const
  360. {
  361. return u_digit(c, static_cast< ::int8_t>(radix));
  362. }
  363. locale_type imbue(locale_type l)
  364. {
  365. locale_type result(m_pimpl->getloc());
  366. m_pimpl = BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(l);
  367. return result;
  368. }
  369. locale_type getloc()const
  370. {
  371. return locale_type();
  372. }
  373. std::string error_string(::boost::regex_constants::error_type n) const
  374. {
  375. return BOOST_REGEX_DETAIL_NS::get_default_error_string(n);
  376. }
  377. private:
  378. icu_regex_traits(const icu_regex_traits&);
  379. icu_regex_traits& operator=(const icu_regex_traits&);
  380. //
  381. // define the bitmasks offsets we need for additional character properties:
  382. //
  383. enum {
  384. offset_blank = U_CHAR_CATEGORY_COUNT,
  385. offset_space = U_CHAR_CATEGORY_COUNT + 1,
  386. offset_xdigit = U_CHAR_CATEGORY_COUNT + 2,
  387. offset_underscore = U_CHAR_CATEGORY_COUNT + 3,
  388. offset_unicode = U_CHAR_CATEGORY_COUNT + 4,
  389. offset_any = U_CHAR_CATEGORY_COUNT + 5,
  390. offset_ascii = U_CHAR_CATEGORY_COUNT + 6,
  391. offset_horizontal = U_CHAR_CATEGORY_COUNT + 7,
  392. offset_vertical = U_CHAR_CATEGORY_COUNT + 8
  393. };
  394. static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2)
  395. {
  396. static const char_class_type mask_blank = char_class_type(1) << offset_blank;
  397. static const char_class_type mask_space = char_class_type(1) << offset_space;
  398. static const char_class_type mask_xdigit = char_class_type(1) << offset_xdigit;
  399. static const char_class_type mask_underscore = char_class_type(1) << offset_underscore;
  400. static const char_class_type mask_unicode = char_class_type(1) << offset_unicode;
  401. static const char_class_type mask_any = char_class_type(1) << offset_any;
  402. static const char_class_type mask_ascii = char_class_type(1) << offset_ascii;
  403. static const char_class_type mask_horizontal = char_class_type(1) << offset_horizontal;
  404. static const char_class_type mask_vertical = char_class_type(1) << offset_vertical;
  405. static const ::UChar32 prop_name_table[] = {
  406. /* any */ 'a', 'n', 'y',
  407. /* ascii */ 'a', 's', 'c', 'i', 'i',
  408. /* assigned */ 'a', 's', 's', 'i', 'g', 'n', 'e', 'd',
  409. /* c* */ 'c', '*',
  410. /* cc */ 'c', 'c',
  411. /* cf */ 'c', 'f',
  412. /* closepunctuation */ 'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
  413. /* cn */ 'c', 'n',
  414. /* co */ 'c', 'o',
  415. /* connectorpunctuation */ 'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
  416. /* control */ 'c', 'o', 'n', 't', 'r', 'o', 'l',
  417. /* cs */ 'c', 's',
  418. /* currencysymbol */ 'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l',
  419. /* dashpunctuation */ 'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
  420. /* decimaldigitnumber */ 'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r',
  421. /* enclosingmark */ 'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
  422. /* finalpunctuation */ 'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
  423. /* format */ 'f', 'o', 'r', 'm', 'a', 't',
  424. /* initialpunctuation */ 'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
  425. /* l* */ 'l', '*',
  426. /* letter */ 'l', 'e', 't', 't', 'e', 'r',
  427. /* letternumber */ 'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r',
  428. /* lineseparator */ 'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
  429. /* ll */ 'l', 'l',
  430. /* lm */ 'l', 'm',
  431. /* lo */ 'l', 'o',
  432. /* lowercaseletter */ 'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
  433. /* lt */ 'l', 't',
  434. /* lu */ 'l', 'u',
  435. /* m* */ 'm', '*',
  436. /* mark */ 'm', 'a', 'r', 'k',
  437. /* mathsymbol */ 'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l',
  438. /* mc */ 'm', 'c',
  439. /* me */ 'm', 'e',
  440. /* mn */ 'm', 'n',
  441. /* modifierletter */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r',
  442. /* modifiersymbol */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l',
  443. /* n* */ 'n', '*',
  444. /* nd */ 'n', 'd',
  445. /* nl */ 'n', 'l',
  446. /* no */ 'n', 'o',
  447. /* nonspacingmark */ 'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
  448. /* notassigned */ 'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd',
  449. /* number */ 'n', 'u', 'm', 'b', 'e', 'r',
  450. /* openpunctuation */ 'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
  451. /* other */ 'o', 't', 'h', 'e', 'r',
  452. /* otherletter */ 'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r',
  453. /* othernumber */ 'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r',
  454. /* otherpunctuation */ 'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
  455. /* othersymbol */ 'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l',
  456. /* p* */ 'p', '*',
  457. /* paragraphseparator */ 'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
  458. /* pc */ 'p', 'c',
  459. /* pd */ 'p', 'd',
  460. /* pe */ 'p', 'e',
  461. /* pf */ 'p', 'f',
  462. /* pi */ 'p', 'i',
  463. /* po */ 'p', 'o',
  464. /* privateuse */ 'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e',
  465. /* ps */ 'p', 's',
  466. /* punctuation */ 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
  467. /* s* */ 's', '*',
  468. /* sc */ 's', 'c',
  469. /* separator */ 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
  470. /* sk */ 's', 'k',
  471. /* sm */ 's', 'm',
  472. /* so */ 's', 'o',
  473. /* spaceseparator */ 's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
  474. /* spacingcombiningmark */ 's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
  475. /* surrogate */ 's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e',
  476. /* symbol */ 's', 'y', 'm', 'b', 'o', 'l',
  477. /* titlecase */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e',
  478. /* titlecaseletter */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
  479. /* uppercaseletter */ 'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
  480. /* z* */ 'z', '*',
  481. /* zl */ 'z', 'l',
  482. /* zp */ 'z', 'p',
  483. /* zs */ 'z', 's',
  484. };
  485. static const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> range_data[] = {
  486. { prop_name_table + 0, prop_name_table + 3, }, // any
  487. { prop_name_table + 3, prop_name_table + 8, }, // ascii
  488. { prop_name_table + 8, prop_name_table + 16, }, // assigned
  489. { prop_name_table + 16, prop_name_table + 18, }, // c*
  490. { prop_name_table + 18, prop_name_table + 20, }, // cc
  491. { prop_name_table + 20, prop_name_table + 22, }, // cf
  492. { prop_name_table + 22, prop_name_table + 38, }, // closepunctuation
  493. { prop_name_table + 38, prop_name_table + 40, }, // cn
  494. { prop_name_table + 40, prop_name_table + 42, }, // co
  495. { prop_name_table + 42, prop_name_table + 62, }, // connectorpunctuation
  496. { prop_name_table + 62, prop_name_table + 69, }, // control
  497. { prop_name_table + 69, prop_name_table + 71, }, // cs
  498. { prop_name_table + 71, prop_name_table + 85, }, // currencysymbol
  499. { prop_name_table + 85, prop_name_table + 100, }, // dashpunctuation
  500. { prop_name_table + 100, prop_name_table + 118, }, // decimaldigitnumber
  501. { prop_name_table + 118, prop_name_table + 131, }, // enclosingmark
  502. { prop_name_table + 131, prop_name_table + 147, }, // finalpunctuation
  503. { prop_name_table + 147, prop_name_table + 153, }, // format
  504. { prop_name_table + 153, prop_name_table + 171, }, // initialpunctuation
  505. { prop_name_table + 171, prop_name_table + 173, }, // l*
  506. { prop_name_table + 173, prop_name_table + 179, }, // letter
  507. { prop_name_table + 179, prop_name_table + 191, }, // letternumber
  508. { prop_name_table + 191, prop_name_table + 204, }, // lineseparator
  509. { prop_name_table + 204, prop_name_table + 206, }, // ll
  510. { prop_name_table + 206, prop_name_table + 208, }, // lm
  511. { prop_name_table + 208, prop_name_table + 210, }, // lo
  512. { prop_name_table + 210, prop_name_table + 225, }, // lowercaseletter
  513. { prop_name_table + 225, prop_name_table + 227, }, // lt
  514. { prop_name_table + 227, prop_name_table + 229, }, // lu
  515. { prop_name_table + 229, prop_name_table + 231, }, // m*
  516. { prop_name_table + 231, prop_name_table + 235, }, // mark
  517. { prop_name_table + 235, prop_name_table + 245, }, // mathsymbol
  518. { prop_name_table + 245, prop_name_table + 247, }, // mc
  519. { prop_name_table + 247, prop_name_table + 249, }, // me
  520. { prop_name_table + 249, prop_name_table + 251, }, // mn
  521. { prop_name_table + 251, prop_name_table + 265, }, // modifierletter
  522. { prop_name_table + 265, prop_name_table + 279, }, // modifiersymbol
  523. { prop_name_table + 279, prop_name_table + 281, }, // n*
  524. { prop_name_table + 281, prop_name_table + 283, }, // nd
  525. { prop_name_table + 283, prop_name_table + 285, }, // nl
  526. { prop_name_table + 285, prop_name_table + 287, }, // no
  527. { prop_name_table + 287, prop_name_table + 301, }, // nonspacingmark
  528. { prop_name_table + 301, prop_name_table + 312, }, // notassigned
  529. { prop_name_table + 312, prop_name_table + 318, }, // number
  530. { prop_name_table + 318, prop_name_table + 333, }, // openpunctuation
  531. { prop_name_table + 333, prop_name_table + 338, }, // other
  532. { prop_name_table + 338, prop_name_table + 349, }, // otherletter
  533. { prop_name_table + 349, prop_name_table + 360, }, // othernumber
  534. { prop_name_table + 360, prop_name_table + 376, }, // otherpunctuation
  535. { prop_name_table + 376, prop_name_table + 387, }, // othersymbol
  536. { prop_name_table + 387, prop_name_table + 389, }, // p*
  537. { prop_name_table + 389, prop_name_table + 407, }, // paragraphseparator
  538. { prop_name_table + 407, prop_name_table + 409, }, // pc
  539. { prop_name_table + 409, prop_name_table + 411, }, // pd
  540. { prop_name_table + 411, prop_name_table + 413, }, // pe
  541. { prop_name_table + 413, prop_name_table + 415, }, // pf
  542. { prop_name_table + 415, prop_name_table + 417, }, // pi
  543. { prop_name_table + 417, prop_name_table + 419, }, // po
  544. { prop_name_table + 419, prop_name_table + 429, }, // privateuse
  545. { prop_name_table + 429, prop_name_table + 431, }, // ps
  546. { prop_name_table + 431, prop_name_table + 442, }, // punctuation
  547. { prop_name_table + 442, prop_name_table + 444, }, // s*
  548. { prop_name_table + 444, prop_name_table + 446, }, // sc
  549. { prop_name_table + 446, prop_name_table + 455, }, // separator
  550. { prop_name_table + 455, prop_name_table + 457, }, // sk
  551. { prop_name_table + 457, prop_name_table + 459, }, // sm
  552. { prop_name_table + 459, prop_name_table + 461, }, // so
  553. { prop_name_table + 461, prop_name_table + 475, }, // spaceseparator
  554. { prop_name_table + 475, prop_name_table + 495, }, // spacingcombiningmark
  555. { prop_name_table + 495, prop_name_table + 504, }, // surrogate
  556. { prop_name_table + 504, prop_name_table + 510, }, // symbol
  557. { prop_name_table + 510, prop_name_table + 519, }, // titlecase
  558. { prop_name_table + 519, prop_name_table + 534, }, // titlecaseletter
  559. { prop_name_table + 534, prop_name_table + 549, }, // uppercaseletter
  560. { prop_name_table + 549, prop_name_table + 551, }, // z*
  561. { prop_name_table + 551, prop_name_table + 553, }, // zl
  562. { prop_name_table + 553, prop_name_table + 555, }, // zp
  563. { prop_name_table + 555, prop_name_table + 557, }, // zs
  564. };
  565. static const icu_regex_traits::char_class_type icu_class_map[] = {
  566. mask_any, // any
  567. mask_ascii, // ascii
  568. (0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned
  569. U_GC_C_MASK, // c*
  570. U_GC_CC_MASK, // cc
  571. U_GC_CF_MASK, // cf
  572. U_GC_PE_MASK, // closepunctuation
  573. U_GC_CN_MASK, // cn
  574. U_GC_CO_MASK, // co
  575. U_GC_PC_MASK, // connectorpunctuation
  576. U_GC_CC_MASK, // control
  577. U_GC_CS_MASK, // cs
  578. U_GC_SC_MASK, // currencysymbol
  579. U_GC_PD_MASK, // dashpunctuation
  580. U_GC_ND_MASK, // decimaldigitnumber
  581. U_GC_ME_MASK, // enclosingmark
  582. U_GC_PF_MASK, // finalpunctuation
  583. U_GC_CF_MASK, // format
  584. U_GC_PI_MASK, // initialpunctuation
  585. U_GC_L_MASK, // l*
  586. U_GC_L_MASK, // letter
  587. U_GC_NL_MASK, // letternumber
  588. U_GC_ZL_MASK, // lineseparator
  589. U_GC_LL_MASK, // ll
  590. U_GC_LM_MASK, // lm
  591. U_GC_LO_MASK, // lo
  592. U_GC_LL_MASK, // lowercaseletter
  593. U_GC_LT_MASK, // lt
  594. U_GC_LU_MASK, // lu
  595. U_GC_M_MASK, // m*
  596. U_GC_M_MASK, // mark
  597. U_GC_SM_MASK, // mathsymbol
  598. U_GC_MC_MASK, // mc
  599. U_GC_ME_MASK, // me
  600. U_GC_MN_MASK, // mn
  601. U_GC_LM_MASK, // modifierletter
  602. U_GC_SK_MASK, // modifiersymbol
  603. U_GC_N_MASK, // n*
  604. U_GC_ND_MASK, // nd
  605. U_GC_NL_MASK, // nl
  606. U_GC_NO_MASK, // no
  607. U_GC_MN_MASK, // nonspacingmark
  608. U_GC_CN_MASK, // notassigned
  609. U_GC_N_MASK, // number
  610. U_GC_PS_MASK, // openpunctuation
  611. U_GC_C_MASK, // other
  612. U_GC_LO_MASK, // otherletter
  613. U_GC_NO_MASK, // othernumber
  614. U_GC_PO_MASK, // otherpunctuation
  615. U_GC_SO_MASK, // othersymbol
  616. U_GC_P_MASK, // p*
  617. U_GC_ZP_MASK, // paragraphseparator
  618. U_GC_PC_MASK, // pc
  619. U_GC_PD_MASK, // pd
  620. U_GC_PE_MASK, // pe
  621. U_GC_PF_MASK, // pf
  622. U_GC_PI_MASK, // pi
  623. U_GC_PO_MASK, // po
  624. U_GC_CO_MASK, // privateuse
  625. U_GC_PS_MASK, // ps
  626. U_GC_P_MASK, // punctuation
  627. U_GC_S_MASK, // s*
  628. U_GC_SC_MASK, // sc
  629. U_GC_Z_MASK, // separator
  630. U_GC_SK_MASK, // sk
  631. U_GC_SM_MASK, // sm
  632. U_GC_SO_MASK, // so
  633. U_GC_ZS_MASK, // spaceseparator
  634. U_GC_MC_MASK, // spacingcombiningmark
  635. U_GC_CS_MASK, // surrogate
  636. U_GC_S_MASK, // symbol
  637. U_GC_LT_MASK, // titlecase
  638. U_GC_LT_MASK, // titlecaseletter
  639. U_GC_LU_MASK, // uppercaseletter
  640. U_GC_Z_MASK, // z*
  641. U_GC_ZL_MASK, // zl
  642. U_GC_ZP_MASK, // zp
  643. U_GC_ZS_MASK, // zs
  644. };
  645. const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_begin = range_data;
  646. const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data) / sizeof(range_data[0]));
  647. BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> t = { p1, p2, };
  648. const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t);
  649. if ((p != ranges_end) && (t == *p))
  650. return icu_class_map[p - ranges_begin];
  651. return 0;
  652. }
  653. boost::shared_ptr< ::boost::BOOST_REGEX_DETAIL_NS::icu_regex_traits_implementation> m_pimpl;
  654. };
  655. } // namespace boost
  656. namespace boost {
  657. // types:
  658. typedef basic_regex< ::UChar32, icu_regex_traits> u32regex;
  659. typedef match_results<const ::UChar32*> u32match;
  660. typedef match_results<const ::UChar*> u16match;
  661. //
  662. // Construction of 32-bit regex types from UTF-8 and UTF-16 primitives:
  663. //
  664. namespace BOOST_REGEX_DETAIL_NS {
  665. #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
  666. template <class InputIterator>
  667. inline u32regex do_make_u32regex(InputIterator i,
  668. InputIterator j,
  669. boost::regex_constants::syntax_option_type opt,
  670. const boost::mpl::int_<1>*)
  671. {
  672. typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
  673. return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt);
  674. }
  675. template <class InputIterator>
  676. inline u32regex do_make_u32regex(InputIterator i,
  677. InputIterator j,
  678. boost::regex_constants::syntax_option_type opt,
  679. const boost::mpl::int_<2>*)
  680. {
  681. typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
  682. return u32regex(conv_type(i, i, j), conv_type(j, i, j), opt);
  683. }
  684. template <class InputIterator>
  685. inline u32regex do_make_u32regex(InputIterator i,
  686. InputIterator j,
  687. boost::regex_constants::syntax_option_type opt,
  688. const boost::mpl::int_<4>*)
  689. {
  690. return u32regex(i, j, opt);
  691. }
  692. #else
  693. template <class InputIterator>
  694. inline u32regex do_make_u32regex(InputIterator i,
  695. InputIterator j,
  696. boost::regex_constants::syntax_option_type opt,
  697. const boost::mpl::int_<1>*)
  698. {
  699. typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
  700. typedef std::vector<UChar32> vector_type;
  701. vector_type v;
  702. conv_type a(i, i, j), b(j, i, j);
  703. while (a != b)
  704. {
  705. v.push_back(*a);
  706. ++a;
  707. }
  708. if (v.size())
  709. return u32regex(&*v.begin(), v.size(), opt);
  710. return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
  711. }
  712. template <class InputIterator>
  713. inline u32regex do_make_u32regex(InputIterator i,
  714. InputIterator j,
  715. boost::regex_constants::syntax_option_type opt,
  716. const boost::mpl::int_<2>*)
  717. {
  718. typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
  719. typedef std::vector<UChar32> vector_type;
  720. vector_type v;
  721. conv_type a(i, i, j), b(j, i, j);
  722. while (a != b)
  723. {
  724. v.push_back(*a);
  725. ++a;
  726. }
  727. if (v.size())
  728. return u32regex(&*v.begin(), v.size(), opt);
  729. return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
  730. }
  731. template <class InputIterator>
  732. inline u32regex do_make_u32regex(InputIterator i,
  733. InputIterator j,
  734. boost::regex_constants::syntax_option_type opt,
  735. const boost::mpl::int_<4>*)
  736. {
  737. typedef std::vector<UChar32> vector_type;
  738. vector_type v;
  739. while (i != j)
  740. {
  741. v.push_back((UChar32)(*i));
  742. ++i;
  743. }
  744. if (v.size())
  745. return u32regex(&*v.begin(), v.size(), opt);
  746. return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
  747. }
  748. #endif
  749. }
  750. // BOOST_REGEX_UCHAR_IS_WCHAR_T
  751. //
  752. // Source inspection of unicode/umachine.h in ICU version 59 indicates that:
  753. //
  754. // On version 59, UChar is always char16_t in C++ mode (and uint16_t in C mode)
  755. //
  756. // On earlier versions, the logic is
  757. //
  758. // #if U_SIZEOF_WCHAR_T==2
  759. // typedef wchar_t OldUChar;
  760. // #elif defined(__CHAR16_TYPE__)
  761. // typedef __CHAR16_TYPE__ OldUChar;
  762. // #else
  763. // typedef uint16_t OldUChar;
  764. // #endif
  765. //
  766. // That is, UChar is wchar_t only on versions below 59, when U_SIZEOF_WCHAR_T==2
  767. //
  768. // Hence,
  769. #define BOOST_REGEX_UCHAR_IS_WCHAR_T (U_ICU_VERSION_MAJOR_NUM < 59 && U_SIZEOF_WCHAR_T == 2)
  770. #if BOOST_REGEX_UCHAR_IS_WCHAR_T
  771. BOOST_STATIC_ASSERT((boost::is_same<UChar, wchar_t>::value));
  772. #else
  773. BOOST_STATIC_ASSERT(!(boost::is_same<UChar, wchar_t>::value));
  774. #endif
  775. //
  776. // Construction from an iterator pair:
  777. //
  778. template <class InputIterator>
  779. inline u32regex make_u32regex(InputIterator i,
  780. InputIterator j,
  781. boost::regex_constants::syntax_option_type opt)
  782. {
  783. return BOOST_REGEX_DETAIL_NS::do_make_u32regex(i, j, opt, static_cast<boost::mpl::int_<sizeof(*i)> const*>(0));
  784. }
  785. //
  786. // construction from UTF-8 nul-terminated strings:
  787. //
  788. inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  789. {
  790. return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::strlen(p), opt, static_cast<boost::mpl::int_<1> const*>(0));
  791. }
  792. inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  793. {
  794. return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::strlen(reinterpret_cast<const char*>(p)), opt, static_cast<boost::mpl::int_<1> const*>(0));
  795. }
  796. //
  797. // construction from UTF-16 nul-terminated strings:
  798. //
  799. #ifndef BOOST_NO_WREGEX
  800. inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  801. {
  802. return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast<boost::mpl::int_<sizeof(wchar_t)> const*>(0));
  803. }
  804. #endif
  805. #if !BOOST_REGEX_UCHAR_IS_WCHAR_T
  806. inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  807. {
  808. return BOOST_REGEX_DETAIL_NS::do_make_u32regex(p, p + u_strlen(p), opt, static_cast<boost::mpl::int_<2> const*>(0));
  809. }
  810. #endif
  811. //
  812. // construction from basic_string class-template:
  813. //
  814. template<class C, class T, class A>
  815. inline u32regex make_u32regex(const std::basic_string<C, T, A>& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  816. {
  817. return BOOST_REGEX_DETAIL_NS::do_make_u32regex(s.begin(), s.end(), opt, static_cast<boost::mpl::int_<sizeof(C)> const*>(0));
  818. }
  819. //
  820. // Construction from ICU string type:
  821. //
  822. inline u32regex make_u32regex(const U_NAMESPACE_QUALIFIER UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
  823. {
  824. return BOOST_REGEX_DETAIL_NS::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast<boost::mpl::int_<2> const*>(0));
  825. }
  826. //
  827. // regex_match overloads that widen the character type as appropriate:
  828. //
  829. namespace BOOST_REGEX_DETAIL_NS {
  830. template<class MR1, class MR2, class NSubs>
  831. void copy_results(MR1& out, MR2 const& in, NSubs named_subs)
  832. {
  833. // copy results from an adapted MR2 match_results:
  834. out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base());
  835. out.set_base(in.base().base());
  836. out.set_named_subs(named_subs);
  837. for (int i = 0; i < (int)in.size(); ++i)
  838. {
  839. if (in[i].matched || !i)
  840. {
  841. out.set_first(in[i].first.base(), i);
  842. out.set_second(in[i].second.base(), i, in[i].matched);
  843. }
  844. }
  845. #ifdef BOOST_REGEX_MATCH_EXTRA
  846. // Copy full capture info as well:
  847. for (int i = 0; i < (int)in.size(); ++i)
  848. {
  849. if (in[i].captures().size())
  850. {
  851. out[i].get_captures().assign(in[i].captures().size(), typename MR1::value_type());
  852. for (int j = 0; j < (int)out[i].captures().size(); ++j)
  853. {
  854. out[i].get_captures()[j].first = in[i].captures()[j].first.base();
  855. out[i].get_captures()[j].second = in[i].captures()[j].second.base();
  856. out[i].get_captures()[j].matched = in[i].captures()[j].matched;
  857. }
  858. }
  859. }
  860. #endif
  861. }
  862. template <class BidiIterator, class Allocator>
  863. inline bool do_regex_match(BidiIterator first, BidiIterator last,
  864. match_results<BidiIterator, Allocator>& m,
  865. const u32regex& e,
  866. match_flag_type flags,
  867. boost::mpl::int_<4> const*)
  868. {
  869. return ::boost::regex_match(first, last, m, e, flags);
  870. }
  871. template <class BidiIterator, class Allocator>
  872. bool do_regex_match(BidiIterator first, BidiIterator last,
  873. match_results<BidiIterator, Allocator>& m,
  874. const u32regex& e,
  875. match_flag_type flags,
  876. boost::mpl::int_<2> const*)
  877. {
  878. typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
  879. typedef match_results<conv_type> match_type;
  880. //typedef typename match_type::allocator_type alloc_type;
  881. match_type what;
  882. bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags);
  883. // copy results across to m:
  884. if (result) copy_results(m, what, e.get_named_subs());
  885. return result;
  886. }
  887. template <class BidiIterator, class Allocator>
  888. bool do_regex_match(BidiIterator first, BidiIterator last,
  889. match_results<BidiIterator, Allocator>& m,
  890. const u32regex& e,
  891. match_flag_type flags,
  892. boost::mpl::int_<1> const*)
  893. {
  894. typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type;
  895. typedef match_results<conv_type> match_type;
  896. //typedef typename match_type::allocator_type alloc_type;
  897. match_type what;
  898. bool result = ::boost::regex_match(conv_type(first, first, last), conv_type(last, first, last), what, e, flags);
  899. // copy results across to m:
  900. if (result) copy_results(m, what, e.get_named_subs());
  901. return result;
  902. }
  903. } // namespace BOOST_REGEX_DETAIL_NS
  904. template <class BidiIterator, class Allocator>
  905. inline bool u32regex_match(BidiIterator first, BidiIterator last,
  906. match_results<BidiIterator, Allocator>& m,
  907. const u32regex& e,
  908. match_flag_type flags = match_default)
  909. {
  910. return BOOST_REGEX_DETAIL_NS::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
  911. }
  912. inline bool u32regex_match(const UChar* p,
  913. match_results<const UChar*>& m,
  914. const u32regex& e,
  915. match_flag_type flags = match_default)
  916. {
  917. return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
  918. }
  919. #if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX)
  920. inline bool u32regex_match(const wchar_t* p,
  921. match_results<const wchar_t*>& m,
  922. const u32regex& e,
  923. match_flag_type flags = match_default)
  924. {
  925. return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  926. }
  927. #endif
  928. inline bool u32regex_match(const char* p,
  929. match_results<const char*>& m,
  930. const u32regex& e,
  931. match_flag_type flags = match_default)
  932. {
  933. return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  934. }
  935. inline bool u32regex_match(const unsigned char* p,
  936. match_results<const unsigned char*>& m,
  937. const u32regex& e,
  938. match_flag_type flags = match_default)
  939. {
  940. return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  941. }
  942. inline bool u32regex_match(const std::string& s,
  943. match_results<std::string::const_iterator>& m,
  944. const u32regex& e,
  945. match_flag_type flags = match_default)
  946. {
  947. return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  948. }
  949. #ifndef BOOST_NO_STD_WSTRING
  950. inline bool u32regex_match(const std::wstring& s,
  951. match_results<std::wstring::const_iterator>& m,
  952. const u32regex& e,
  953. match_flag_type flags = match_default)
  954. {
  955. return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  956. }
  957. #endif
  958. inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s,
  959. match_results<const UChar*>& m,
  960. const u32regex& e,
  961. match_flag_type flags = match_default)
  962. {
  963. return BOOST_REGEX_DETAIL_NS::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<2> const*>(0));
  964. }
  965. //
  966. // regex_match overloads that do not return what matched:
  967. //
  968. template <class BidiIterator>
  969. inline bool u32regex_match(BidiIterator first, BidiIterator last,
  970. const u32regex& e,
  971. match_flag_type flags = match_default)
  972. {
  973. match_results<BidiIterator> m;
  974. return BOOST_REGEX_DETAIL_NS::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
  975. }
  976. inline bool u32regex_match(const UChar* p,
  977. const u32regex& e,
  978. match_flag_type flags = match_default)
  979. {
  980. match_results<const UChar*> m;
  981. return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
  982. }
  983. #if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX)
  984. inline bool u32regex_match(const wchar_t* p,
  985. const u32regex& e,
  986. match_flag_type flags = match_default)
  987. {
  988. match_results<const wchar_t*> m;
  989. return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  990. }
  991. #endif
  992. inline bool u32regex_match(const char* p,
  993. const u32regex& e,
  994. match_flag_type flags = match_default)
  995. {
  996. match_results<const char*> m;
  997. return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  998. }
  999. inline bool u32regex_match(const unsigned char* p,
  1000. const u32regex& e,
  1001. match_flag_type flags = match_default)
  1002. {
  1003. match_results<const unsigned char*> m;
  1004. return BOOST_REGEX_DETAIL_NS::do_regex_match(p, p + std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  1005. }
  1006. inline bool u32regex_match(const std::string& s,
  1007. const u32regex& e,
  1008. match_flag_type flags = match_default)
  1009. {
  1010. match_results<std::string::const_iterator> m;
  1011. return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
  1012. }
  1013. #ifndef BOOST_NO_STD_WSTRING
  1014. inline bool u32regex_match(const std::wstring& s,
  1015. const u32regex& e,
  1016. match_flag_type flags = match_default)
  1017. {
  1018. match_results<std::wstring::const_iterator> m;
  1019. return BOOST_REGEX_DETAIL_NS::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  1020. }
  1021. #endif
  1022. inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s,
  1023. const u32regex& e,
  1024. match_flag_type flags = match_default)
  1025. {
  1026. match_results<const UChar*> m;
  1027. return BOOST_REGEX_DETAIL_NS::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<2> const*>(0));
  1028. }
  1029. //
  1030. // regex_search overloads that widen the character type as appropriate:
  1031. //
  1032. namespace BOOST_REGEX_DETAIL_NS {
  1033. template <class BidiIterator, class Allocator>
  1034. inline bool do_regex_search(BidiIterator first, BidiIterator last,
  1035. match_results<BidiIterator, Allocator>& m,
  1036. const u32regex& e,
  1037. match_flag_type flags,
  1038. BidiIterator base,
  1039. boost::mpl::int_<4> const*)
  1040. {
  1041. return ::boost::regex_search(first, last, m, e, flags, base);
  1042. }
  1043. template <class BidiIterator, class Allocator>
  1044. bool do_regex_search(BidiIterator first, BidiIterator last,
  1045. match_results<BidiIterator, Allocator>& m,
  1046. const u32regex& e,
  1047. match_flag_type flags,
  1048. BidiIterator base,
  1049. boost::mpl::int_<2> const*)
  1050. {
  1051. typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
  1052. typedef match_results<conv_type> match_type;
  1053. //typedef typename match_type::allocator_type alloc_type;
  1054. match_type what;
  1055. bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base));
  1056. // copy results across to m:
  1057. if (result) copy_results(m, what, e.get_named_subs());
  1058. return result;
  1059. }
  1060. template <class BidiIterator, class Allocator>
  1061. bool do_regex_search(BidiIterator first, BidiIterator last,
  1062. match_results<BidiIterator, Allocator>& m,
  1063. const u32regex& e,
  1064. match_flag_type flags,
  1065. BidiIterator base,
  1066. boost::mpl::int_<1> const*)
  1067. {
  1068. typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type;
  1069. typedef match_results<conv_type> match_type;
  1070. //typedef typename match_type::allocator_type alloc_type;
  1071. match_type what;
  1072. bool result = ::boost::regex_search(conv_type(first, first, last), conv_type(last, first, last), what, e, flags, conv_type(base));
  1073. // copy results across to m:
  1074. if (result) copy_results(m, what, e.get_named_subs());
  1075. return result;
  1076. }
  1077. }
  1078. template <class BidiIterator, class Allocator>
  1079. inline bool u32regex_search(BidiIterator first, BidiIterator last,
  1080. match_results<BidiIterator, Allocator>& m,
  1081. const u32regex& e,
  1082. match_flag_type flags = match_default)
  1083. {
  1084. return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
  1085. }
  1086. template <class BidiIterator, class Allocator>
  1087. inline bool u32regex_search(BidiIterator first, BidiIterator last,
  1088. match_results<BidiIterator, Allocator>& m,
  1089. const u32regex& e,
  1090. match_flag_type flags,
  1091. BidiIterator base)
  1092. {
  1093. return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, base, static_cast<mpl::int_<sizeof(*first)> const*>(0));
  1094. }
  1095. inline bool u32regex_search(const UChar* p,
  1096. match_results<const UChar*>& m,
  1097. const u32regex& e,
  1098. match_flag_type flags = match_default)
  1099. {
  1100. return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
  1101. }
  1102. #if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX)
  1103. inline bool u32regex_search(const wchar_t* p,
  1104. match_results<const wchar_t*>& m,
  1105. const u32regex& e,
  1106. match_flag_type flags = match_default)
  1107. {
  1108. return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  1109. }
  1110. #endif
  1111. inline bool u32regex_search(const char* p,
  1112. match_results<const char*>& m,
  1113. const u32regex& e,
  1114. match_flag_type flags = match_default)
  1115. {
  1116. return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
  1117. }
  1118. inline bool u32regex_search(const unsigned char* p,
  1119. match_results<const unsigned char*>& m,
  1120. const u32regex& e,
  1121. match_flag_type flags = match_default)
  1122. {
  1123. return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
  1124. }
  1125. inline bool u32regex_search(const std::string& s,
  1126. match_results<std::string::const_iterator>& m,
  1127. const u32regex& e,
  1128. match_flag_type flags = match_default)
  1129. {
  1130. return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
  1131. }
  1132. #ifndef BOOST_NO_STD_WSTRING
  1133. inline bool u32regex_search(const std::wstring& s,
  1134. match_results<std::wstring::const_iterator>& m,
  1135. const u32regex& e,
  1136. match_flag_type flags = match_default)
  1137. {
  1138. return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  1139. }
  1140. #endif
  1141. inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s,
  1142. match_results<const UChar*>& m,
  1143. const u32regex& e,
  1144. match_flag_type flags = match_default)
  1145. {
  1146. return BOOST_REGEX_DETAIL_NS::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<2> const*>(0));
  1147. }
  1148. template <class BidiIterator>
  1149. inline bool u32regex_search(BidiIterator first, BidiIterator last,
  1150. const u32regex& e,
  1151. match_flag_type flags = match_default)
  1152. {
  1153. match_results<BidiIterator> m;
  1154. return BOOST_REGEX_DETAIL_NS::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
  1155. }
  1156. inline bool u32regex_search(const UChar* p,
  1157. const u32regex& e,
  1158. match_flag_type flags = match_default)
  1159. {
  1160. match_results<const UChar*> m;
  1161. return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
  1162. }
  1163. #if !BOOST_REGEX_UCHAR_IS_WCHAR_T && !defined(BOOST_NO_WREGEX)
  1164. inline bool u32regex_search(const wchar_t* p,
  1165. const u32regex& e,
  1166. match_flag_type flags = match_default)
  1167. {
  1168. match_results<const wchar_t*> m;
  1169. return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  1170. }
  1171. #endif
  1172. inline bool u32regex_search(const char* p,
  1173. const u32regex& e,
  1174. match_flag_type flags = match_default)
  1175. {
  1176. match_results<const char*> m;
  1177. return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
  1178. }
  1179. inline bool u32regex_search(const unsigned char* p,
  1180. const u32regex& e,
  1181. match_flag_type flags = match_default)
  1182. {
  1183. match_results<const unsigned char*> m;
  1184. return BOOST_REGEX_DETAIL_NS::do_regex_search(p, p + std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
  1185. }
  1186. inline bool u32regex_search(const std::string& s,
  1187. const u32regex& e,
  1188. match_flag_type flags = match_default)
  1189. {
  1190. match_results<std::string::const_iterator> m;
  1191. return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
  1192. }
  1193. #ifndef BOOST_NO_STD_WSTRING
  1194. inline bool u32regex_search(const std::wstring& s,
  1195. const u32regex& e,
  1196. match_flag_type flags = match_default)
  1197. {
  1198. match_results<std::wstring::const_iterator> m;
  1199. return BOOST_REGEX_DETAIL_NS::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
  1200. }
  1201. #endif
  1202. inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s,
  1203. const u32regex& e,
  1204. match_flag_type flags = match_default)
  1205. {
  1206. match_results<const UChar*> m;
  1207. return BOOST_REGEX_DETAIL_NS::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<2> const*>(0));
  1208. }
  1209. //
  1210. // overloads for regex_replace with utf-8 and utf-16 data types:
  1211. //
  1212. namespace BOOST_REGEX_DETAIL_NS {
  1213. template <class I>
  1214. inline std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >
  1215. make_utf32_seq(I i, I j, mpl::int_<1> const*)
  1216. {
  1217. return std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >(boost::u8_to_u32_iterator<I>(i, i, j), boost::u8_to_u32_iterator<I>(j, i, j));
  1218. }
  1219. template <class I>
  1220. inline std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >
  1221. make_utf32_seq(I i, I j, mpl::int_<2> const*)
  1222. {
  1223. return std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >(boost::u16_to_u32_iterator<I>(i, i, j), boost::u16_to_u32_iterator<I>(j, i, j));
  1224. }
  1225. template <class I>
  1226. inline std::pair< I, I >
  1227. make_utf32_seq(I i, I j, mpl::int_<4> const*)
  1228. {
  1229. return std::pair< I, I >(i, j);
  1230. }
  1231. template <class charT>
  1232. inline std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >
  1233. make_utf32_seq(const charT* p, mpl::int_<1> const*)
  1234. {
  1235. std::size_t len = std::strlen((const char*)p);
  1236. return std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >(boost::u8_to_u32_iterator<const charT*>(p, p, p + len), boost::u8_to_u32_iterator<const charT*>(p + len, p, p + len));
  1237. }
  1238. template <class charT>
  1239. inline std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >
  1240. make_utf32_seq(const charT* p, mpl::int_<2> const*)
  1241. {
  1242. std::size_t len = u_strlen((const UChar*)p);
  1243. return std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >(boost::u16_to_u32_iterator<const charT*>(p, p, p + len), boost::u16_to_u32_iterator<const charT*>(p + len, p, p + len));
  1244. }
  1245. template <class charT>
  1246. inline std::pair< const charT*, const charT* >
  1247. make_utf32_seq(const charT* p, mpl::int_<4> const*)
  1248. {
  1249. return std::pair< const charT*, const charT* >(p, p + icu_regex_traits::length((UChar32 const*)p));
  1250. }
  1251. template <class OutputIterator>
  1252. inline OutputIterator make_utf32_out(OutputIterator o, mpl::int_<4> const*)
  1253. {
  1254. return o;
  1255. }
  1256. template <class OutputIterator>
  1257. inline utf16_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<2> const*)
  1258. {
  1259. return o;
  1260. }
  1261. template <class OutputIterator>
  1262. inline utf8_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<1> const*)
  1263. {
  1264. return o;
  1265. }
  1266. template <class OutputIterator, class I1, class I2>
  1267. OutputIterator do_regex_replace(OutputIterator out,
  1268. std::pair<I1, I1> const& in,
  1269. const u32regex& e,
  1270. const std::pair<I2, I2>& fmt,
  1271. match_flag_type flags
  1272. )
  1273. {
  1274. // unfortunately we have to copy the format string in order to pass in onward:
  1275. std::vector<UChar32> f;
  1276. #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
  1277. f.assign(fmt.first, fmt.second);
  1278. #else
  1279. f.clear();
  1280. I2 pos = fmt.first;
  1281. while (pos != fmt.second)
  1282. f.push_back(*pos++);
  1283. #endif
  1284. regex_iterator<I1, UChar32, icu_regex_traits> i(in.first, in.second, e, flags);
  1285. regex_iterator<I1, UChar32, icu_regex_traits> j;
  1286. if (i == j)
  1287. {
  1288. if (!(flags & regex_constants::format_no_copy))
  1289. out = BOOST_REGEX_DETAIL_NS::copy(in.first, in.second, out);
  1290. }
  1291. else
  1292. {
  1293. I1 last_m = in.first;
  1294. while (i != j)
  1295. {
  1296. if (!(flags & regex_constants::format_no_copy))
  1297. out = BOOST_REGEX_DETAIL_NS::copy(i->prefix().first, i->prefix().second, out);
  1298. if (!f.empty())
  1299. out = ::boost::BOOST_REGEX_DETAIL_NS::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits());
  1300. else
  1301. out = ::boost::BOOST_REGEX_DETAIL_NS::regex_format_imp(out, *i, static_cast<UChar32 const*>(0), static_cast<UChar32 const*>(0), flags, e.get_traits());
  1302. last_m = (*i)[0].second;
  1303. if (flags & regex_constants::format_first_only)
  1304. break;
  1305. ++i;
  1306. }
  1307. if (!(flags & regex_constants::format_no_copy))
  1308. out = BOOST_REGEX_DETAIL_NS::copy(last_m, in.second, out);
  1309. }
  1310. return out;
  1311. }
  1312. template <class BaseIterator>
  1313. inline const BaseIterator& extract_output_base(const BaseIterator& b)
  1314. {
  1315. return b;
  1316. }
  1317. template <class BaseIterator>
  1318. inline BaseIterator extract_output_base(const utf8_output_iterator<BaseIterator>& b)
  1319. {
  1320. return b.base();
  1321. }
  1322. template <class BaseIterator>
  1323. inline BaseIterator extract_output_base(const utf16_output_iterator<BaseIterator>& b)
  1324. {
  1325. return b.base();
  1326. }
  1327. } // BOOST_REGEX_DETAIL_NS
  1328. template <class OutputIterator, class BidirectionalIterator, class charT>
  1329. inline OutputIterator u32regex_replace(OutputIterator out,
  1330. BidirectionalIterator first,
  1331. BidirectionalIterator last,
  1332. const u32regex& e,
  1333. const charT* fmt,
  1334. match_flag_type flags = match_default)
  1335. {
  1336. return BOOST_REGEX_DETAIL_NS::extract_output_base
  1337. (
  1338. BOOST_REGEX_DETAIL_NS::do_regex_replace(
  1339. BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  1340. BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  1341. e,
  1342. BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt, static_cast<mpl::int_<sizeof(*fmt)> const*>(0)),
  1343. flags)
  1344. );
  1345. }
  1346. template <class OutputIterator, class Iterator, class charT>
  1347. inline OutputIterator u32regex_replace(OutputIterator out,
  1348. Iterator first,
  1349. Iterator last,
  1350. const u32regex& e,
  1351. const std::basic_string<charT>& fmt,
  1352. match_flag_type flags = match_default)
  1353. {
  1354. return BOOST_REGEX_DETAIL_NS::extract_output_base
  1355. (
  1356. BOOST_REGEX_DETAIL_NS::do_regex_replace(
  1357. BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  1358. BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  1359. e,
  1360. BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.begin(), fmt.end(), static_cast<mpl::int_<sizeof(charT)> const*>(0)),
  1361. flags)
  1362. );
  1363. }
  1364. template <class OutputIterator, class Iterator>
  1365. inline OutputIterator u32regex_replace(OutputIterator out,
  1366. Iterator first,
  1367. Iterator last,
  1368. const u32regex& e,
  1369. const U_NAMESPACE_QUALIFIER UnicodeString& fmt,
  1370. match_flag_type flags = match_default)
  1371. {
  1372. return BOOST_REGEX_DETAIL_NS::extract_output_base
  1373. (
  1374. BOOST_REGEX_DETAIL_NS::do_regex_replace(
  1375. BOOST_REGEX_DETAIL_NS::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  1376. BOOST_REGEX_DETAIL_NS::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
  1377. e,
  1378. BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
  1379. flags)
  1380. );
  1381. }
  1382. template <class charT>
  1383. std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
  1384. const u32regex& e,
  1385. const charT* fmt,
  1386. match_flag_type flags = match_default)
  1387. {
  1388. std::basic_string<charT> result;
  1389. BOOST_REGEX_DETAIL_NS::string_out_iterator<std::basic_string<charT> > i(result);
  1390. u32regex_replace(i, s.begin(), s.end(), e, fmt, flags);
  1391. return result;
  1392. }
  1393. template <class charT>
  1394. std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
  1395. const u32regex& e,
  1396. const std::basic_string<charT>& fmt,
  1397. match_flag_type flags = match_default)
  1398. {
  1399. std::basic_string<charT> result;
  1400. BOOST_REGEX_DETAIL_NS::string_out_iterator<std::basic_string<charT> > i(result);
  1401. u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags);
  1402. return result;
  1403. }
  1404. namespace BOOST_REGEX_DETAIL_NS {
  1405. class unicode_string_out_iterator
  1406. {
  1407. U_NAMESPACE_QUALIFIER UnicodeString* out;
  1408. public:
  1409. unicode_string_out_iterator(U_NAMESPACE_QUALIFIER UnicodeString& s) : out(&s) {}
  1410. unicode_string_out_iterator& operator++() { return *this; }
  1411. unicode_string_out_iterator& operator++(int) { return *this; }
  1412. unicode_string_out_iterator& operator*() { return *this; }
  1413. unicode_string_out_iterator& operator=(UChar v)
  1414. {
  1415. *out += v;
  1416. return *this;
  1417. }
  1418. typedef std::ptrdiff_t difference_type;
  1419. typedef UChar value_type;
  1420. typedef value_type* pointer;
  1421. typedef value_type& reference;
  1422. typedef std::output_iterator_tag iterator_category;
  1423. };
  1424. }
  1425. inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s,
  1426. const u32regex& e,
  1427. const UChar* fmt,
  1428. match_flag_type flags = match_default)
  1429. {
  1430. U_NAMESPACE_QUALIFIER UnicodeString result;
  1431. BOOST_REGEX_DETAIL_NS::unicode_string_out_iterator i(result);
  1432. u32regex_replace(i, s.getBuffer(), s.getBuffer() + s.length(), e, fmt, flags);
  1433. return result;
  1434. }
  1435. inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s,
  1436. const u32regex& e,
  1437. const U_NAMESPACE_QUALIFIER UnicodeString& fmt,
  1438. match_flag_type flags = match_default)
  1439. {
  1440. U_NAMESPACE_QUALIFIER UnicodeString result;
  1441. BOOST_REGEX_DETAIL_NS::unicode_string_out_iterator i(result);
  1442. BOOST_REGEX_DETAIL_NS::do_regex_replace(
  1443. BOOST_REGEX_DETAIL_NS::make_utf32_out(i, static_cast<mpl::int_<2> const*>(0)),
  1444. BOOST_REGEX_DETAIL_NS::make_utf32_seq(s.getBuffer(), s.getBuffer() + s.length(), static_cast<mpl::int_<2> const*>(0)),
  1445. e,
  1446. BOOST_REGEX_DETAIL_NS::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
  1447. flags);
  1448. return result;
  1449. }
  1450. } // namespace boost.
  1451. #ifdef BOOST_MSVC
  1452. #pragma warning (pop)
  1453. #endif
  1454. #include <boost/regex/v4/u32regex_iterator.hpp>
  1455. #include <boost/regex/v4/u32regex_token_iterator.hpp>
  1456. #endif