123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474 |
- /*
- *
- * Copyright (c) 2004
- * John Maddock
- *
- * Use, modification and distribution are subject to the
- * Boost Software License, Version 1.0. (See accompanying file
- * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- *
- */
-
- /*
- * LOCATION: see http://www.boost.org for most recent version.
- * FILE c_regex_traits.hpp
- * VERSION see <boost/version.hpp>
- * DESCRIPTION: Declares regular expression traits class that wraps the global C locale.
- */
- #ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED
- #define BOOST_C_REGEX_TRAITS_HPP_INCLUDED
- #include <boost/regex/config.hpp>
- #include <boost/regex/v5/regex_workaround.hpp>
- #include <cctype>
- namespace boost{
- namespace BOOST_REGEX_DETAIL_NS {
- enum
- {
- char_class_space = 1 << 0,
- char_class_print = 1 << 1,
- char_class_cntrl = 1 << 2,
- char_class_upper = 1 << 3,
- char_class_lower = 1 << 4,
- char_class_alpha = 1 << 5,
- char_class_digit = 1 << 6,
- char_class_punct = 1 << 7,
- char_class_xdigit = 1 << 8,
- char_class_alnum = char_class_alpha | char_class_digit,
- char_class_graph = char_class_alnum | char_class_punct,
- char_class_blank = 1 << 9,
- char_class_word = 1 << 10,
- char_class_unicode = 1 << 11,
- char_class_horizontal = 1 << 12,
- char_class_vertical = 1 << 13
- };
- }
- template <class charT>
- struct c_regex_traits;
- template<>
- struct c_regex_traits<char>
- {
- c_regex_traits(){}
- typedef char char_type;
- typedef std::size_t size_type;
- typedef std::string string_type;
- struct locale_type{};
- typedef std::uint32_t char_class_type;
- static size_type length(const char_type* p)
- {
- return (std::strlen)(p);
- }
- char translate(char c) const
- {
- return c;
- }
- char translate_nocase(char c) const
- {
- return static_cast<char>((std::tolower)(static_cast<unsigned char>(c)));
- }
- static string_type transform(const char* p1, const char* p2);
- static string_type transform_primary(const char* p1, const char* p2);
- static char_class_type lookup_classname(const char* p1, const char* p2);
- static string_type lookup_collatename(const char* p1, const char* p2);
- static bool isctype(char, char_class_type);
- static int value(char, int);
- locale_type imbue(locale_type l)
- { return l; }
- locale_type getloc()const
- { return locale_type(); }
- private:
- // this type is not copyable:
- c_regex_traits(const c_regex_traits&);
- c_regex_traits& operator=(const c_regex_traits&);
- };
- #ifndef BOOST_NO_WREGEX
- template<>
- struct c_regex_traits<wchar_t>
- {
- c_regex_traits(){}
- typedef wchar_t char_type;
- typedef std::size_t size_type;
- typedef std::wstring string_type;
- struct locale_type{};
- typedef std::uint32_t char_class_type;
- static size_type length(const char_type* p)
- {
- return (std::wcslen)(p);
- }
- wchar_t translate(wchar_t c) const
- {
- return c;
- }
- wchar_t translate_nocase(wchar_t c) const
- {
- return (std::towlower)(c);
- }
- static string_type transform(const wchar_t* p1, const wchar_t* p2);
- static string_type transform_primary(const wchar_t* p1, const wchar_t* p2);
- static char_class_type lookup_classname(const wchar_t* p1, const wchar_t* p2);
- static string_type lookup_collatename(const wchar_t* p1, const wchar_t* p2);
- static bool isctype(wchar_t, char_class_type);
- static int value(wchar_t, int);
- locale_type imbue(locale_type l)
- { return l; }
- locale_type getloc()const
- { return locale_type(); }
- private:
- // this type is not copyable:
- c_regex_traits(const c_regex_traits&);
- c_regex_traits& operator=(const c_regex_traits&);
- };
- #endif // BOOST_NO_WREGEX
- inline c_regex_traits<char>::string_type c_regex_traits<char>::transform(const char* p1, const char* p2)
- {
- std::string result(10, ' ');
- std::size_t s = result.size();
- std::size_t r;
- std::string src(p1, p2);
- while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s)))
- {
- #if defined(_CPPLIB_VER)
- //
- // A bug in VC11 and 12 causes the program to hang if we pass a null-string
- // to std::strxfrm, but only for certain locales :-(
- // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
- //
- if (r == INT_MAX)
- {
- result.erase();
- result.insert(result.begin(), static_cast<char>(0));
- return result;
- }
- #endif
- result.append(r - s + 3, ' ');
- s = result.size();
- }
- result.erase(r);
- return result;
- }
- inline c_regex_traits<char>::string_type c_regex_traits<char>::transform_primary(const char* p1, const char* p2)
- {
- static char s_delim;
- static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<c_regex_traits<char>*>(0), &s_delim);
- std::string result;
- //
- // What we do here depends upon the format of the sort key returned by
- // sort key returned by this->transform:
- //
- switch (s_collate_type)
- {
- case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
- case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
- // the best we can do is translate to lower case, then get a regular sort key:
- {
- result.assign(p1, p2);
- for (std::string::size_type i = 0; i < result.size(); ++i)
- result[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(result[i])));
- result = transform(&*result.begin(), &*result.begin() + result.size());
- break;
- }
- case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
- {
- // get a regular sort key, and then truncate it:
- result = transform(p1, p2);
- result.erase(s_delim);
- break;
- }
- case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
- // get a regular sort key, and then truncate everything after the delim:
- result = transform(p1, p2);
- if ((!result.empty()) && (result[0] == s_delim))
- break;
- std::size_t i;
- for (i = 0; i < result.size(); ++i)
- {
- if (result[i] == s_delim)
- break;
- }
- result.erase(i);
- break;
- }
- if (result.empty())
- result = std::string(1, char(0));
- return result;
- }
- inline c_regex_traits<char>::char_class_type c_regex_traits<char>::lookup_classname(const char* p1, const char* p2)
- {
- using namespace BOOST_REGEX_DETAIL_NS;
- static const char_class_type masks[] =
- {
- 0,
- char_class_alnum,
- char_class_alpha,
- char_class_blank,
- char_class_cntrl,
- char_class_digit,
- char_class_digit,
- char_class_graph,
- char_class_horizontal,
- char_class_lower,
- char_class_lower,
- char_class_print,
- char_class_punct,
- char_class_space,
- char_class_space,
- char_class_upper,
- char_class_unicode,
- char_class_upper,
- char_class_vertical,
- char_class_alnum | char_class_word,
- char_class_alnum | char_class_word,
- char_class_xdigit,
- };
- int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
- if (idx < 0)
- {
- std::string s(p1, p2);
- for (std::string::size_type i = 0; i < s.size(); ++i)
- s[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(s[i])));
- idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
- }
- BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0]));
- return masks[idx + 1];
- }
- inline bool c_regex_traits<char>::isctype(char c, char_class_type mask)
- {
- using namespace BOOST_REGEX_DETAIL_NS;
- return
- ((mask & char_class_space) && (std::isspace)(static_cast<unsigned char>(c)))
- || ((mask & char_class_print) && (std::isprint)(static_cast<unsigned char>(c)))
- || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast<unsigned char>(c)))
- || ((mask & char_class_upper) && (std::isupper)(static_cast<unsigned char>(c)))
- || ((mask & char_class_lower) && (std::islower)(static_cast<unsigned char>(c)))
- || ((mask & char_class_alpha) && (std::isalpha)(static_cast<unsigned char>(c)))
- || ((mask & char_class_digit) && (std::isdigit)(static_cast<unsigned char>(c)))
- || ((mask & char_class_punct) && (std::ispunct)(static_cast<unsigned char>(c)))
- || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast<unsigned char>(c)))
- || ((mask & char_class_blank) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
- || ((mask & char_class_word) && (c == '_'))
- || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
- || ((mask & char_class_horizontal) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v'));
- }
- inline c_regex_traits<char>::string_type c_regex_traits<char>::lookup_collatename(const char* p1, const char* p2)
- {
- std::string s(p1, p2);
- s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
- if (s.empty() && (p2 - p1 == 1))
- s.append(1, *p1);
- return s;
- }
- inline int c_regex_traits<char>::value(char c, int radix)
- {
- char b[2] = { c, '\0', };
- char* ep;
- int result = std::strtol(b, &ep, radix);
- if (ep == b)
- return -1;
- return result;
- }
- #ifndef BOOST_NO_WREGEX
- inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2)
- {
- std::size_t r;
- std::size_t s = 10;
- std::wstring src(p1, p2);
- std::wstring result(s, L' ');
- while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s)))
- {
- #if defined(_CPPLIB_VER)
- //
- // A bug in VC11 and 12 causes the program to hang if we pass a null-string
- // to std::strxfrm, but only for certain locales :-(
- // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
- //
- if (r == INT_MAX)
- {
- result.erase();
- result.insert(result.begin(), static_cast<wchar_t>(0));
- return result;
- }
- #endif
- result.append(r - s + 3, L' ');
- s = result.size();
- }
- result.erase(r);
- return result;
- }
- inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2)
- {
- static wchar_t s_delim;
- static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim);
- std::wstring result;
- //
- // What we do here depends upon the format of the sort key returned by
- // sort key returned by this->transform:
- //
- switch (s_collate_type)
- {
- case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
- case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
- // the best we can do is translate to lower case, then get a regular sort key:
- {
- result.assign(p1, p2);
- for (std::wstring::size_type i = 0; i < result.size(); ++i)
- result[i] = (std::towlower)(result[i]);
- result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
- break;
- }
- case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
- {
- // get a regular sort key, and then truncate it:
- result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
- result.erase(s_delim);
- break;
- }
- case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
- // get a regular sort key, and then truncate everything after the delim:
- result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
- if ((!result.empty()) && (result[0] == s_delim))
- break;
- std::size_t i;
- for (i = 0; i < result.size(); ++i)
- {
- if (result[i] == s_delim)
- break;
- }
- result.erase(i);
- break;
- }
- if (result.empty())
- result = std::wstring(1, char(0));
- return result;
- }
- inline c_regex_traits<wchar_t>::char_class_type c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2)
- {
- using namespace BOOST_REGEX_DETAIL_NS;
- static const char_class_type masks[] =
- {
- 0,
- char_class_alnum,
- char_class_alpha,
- char_class_blank,
- char_class_cntrl,
- char_class_digit,
- char_class_digit,
- char_class_graph,
- char_class_horizontal,
- char_class_lower,
- char_class_lower,
- char_class_print,
- char_class_punct,
- char_class_space,
- char_class_space,
- char_class_upper,
- char_class_unicode,
- char_class_upper,
- char_class_vertical,
- char_class_alnum | char_class_word,
- char_class_alnum | char_class_word,
- char_class_xdigit,
- };
- int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
- if (idx < 0)
- {
- std::wstring s(p1, p2);
- for (std::wstring::size_type i = 0; i < s.size(); ++i)
- s[i] = (std::towlower)(s[i]);
- idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
- }
- BOOST_REGEX_ASSERT(idx + 1 < static_cast<int>(sizeof(masks) / sizeof(masks[0])));
- return masks[idx + 1];
- }
- inline bool c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask)
- {
- using namespace BOOST_REGEX_DETAIL_NS;
- return
- ((mask & char_class_space) && (std::iswspace)(c))
- || ((mask & char_class_print) && (std::iswprint)(c))
- || ((mask & char_class_cntrl) && (std::iswcntrl)(c))
- || ((mask & char_class_upper) && (std::iswupper)(c))
- || ((mask & char_class_lower) && (std::iswlower)(c))
- || ((mask & char_class_alpha) && (std::iswalpha)(c))
- || ((mask & char_class_digit) && (std::iswdigit)(c))
- || ((mask & char_class_punct) && (std::iswpunct)(c))
- || ((mask & char_class_xdigit) && (std::iswxdigit)(c))
- || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
- || ((mask & char_class_word) && (c == '_'))
- || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff)))
- || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v')))
- || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v'));
- }
- inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2)
- {
- std::string name;
- // Usual msvc warning suppression does not work here with std::string template constructor.... use a workaround instead:
- for (const wchar_t* pos = p1; pos != p2; ++pos)
- name.push_back((char)*pos);
- name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name);
- if (!name.empty())
- return string_type(name.begin(), name.end());
- if (p2 - p1 == 1)
- return string_type(1, *p1);
- return string_type();
- }
- inline int c_regex_traits<wchar_t>::value(wchar_t c, int radix)
- {
- #ifdef BOOST_BORLANDC
- // workaround for broken wcstol:
- if ((std::iswxdigit)(c) == 0)
- return -1;
- #endif
- wchar_t b[2] = { c, '\0', };
- wchar_t* ep;
- int result = std::wcstol(b, &ep, radix);
- if (ep == b)
- return -1;
- return result;
- }
- #endif
- }
- #endif
|