/* * * Copyright (c) 1998-2004 John Maddock * Copyright 2011 Garmin Ltd. or its subsidiaries * * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) * */ /* * LOCATION: see http://www.boost.org/ for most recent version. * FILE basic_regex.cpp * VERSION see * DESCRIPTION: Declares template class basic_regex. */ #ifndef BOOST_REGEX_V5_BASIC_REGEX_HPP #define BOOST_REGEX_V5_BASIC_REGEX_HPP #include namespace boost{ #ifdef BOOST_REGEX_MSVC #pragma warning(push) #pragma warning(disable : 4251) #if BOOST_REGEX_MSVC < 1700 # pragma warning(disable : 4231) #endif #if BOOST_REGEX_MSVC < 1600 #pragma warning(disable : 4660) #endif #if BOOST_REGEX_MSVC < 1910 #pragma warning(disable:4800) #endif #endif namespace BOOST_REGEX_DETAIL_NS{ // // forward declaration, we will need this one later: // template class basic_regex_parser; template void bubble_down_one(I first, I last) { if(first != last) { I next = last - 1; while((next != first) && (*next < *(next-1))) { (next-1)->swap(*next); --next; } } } static const int hash_value_mask = 1 << (std::numeric_limits::digits - 1); template inline int hash_value_from_capture_name(Iterator i, Iterator j) { std::size_t r = 0; while (i != j) { r ^= *i + 0x9e3779b9 + (r << 6) + (r >> 2); ++i; } r %= ((std::numeric_limits::max)()); return static_cast(r) | hash_value_mask; } class named_subexpressions { public: struct name { template name(const charT* i, const charT* j, int idx) : index(idx) { hash = hash_value_from_capture_name(i, j); } name(int h, int idx) : index(idx), hash(h) { } int index; int hash; bool operator < (const name& other)const { return hash < other.hash; } bool operator == (const name& other)const { return hash == other.hash; } void swap(name& other) { std::swap(index, other.index); std::swap(hash, other.hash); } }; typedef std::vector::const_iterator const_iterator; typedef std::pair range_type; named_subexpressions(){} template void set_name(const charT* i, const charT* j, int index) { m_sub_names.push_back(name(i, j, index)); bubble_down_one(m_sub_names.begin(), m_sub_names.end()); } template int get_id(const charT* i, const charT* j)const { name t(i, j, 0); typename std::vector::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); if((pos != m_sub_names.end()) && (*pos == t)) { return pos->index; } return -1; } template range_type equal_range(const charT* i, const charT* j)const { name t(i, j, 0); return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t); } int get_id(int h)const { name t(h, 0); std::vector::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); if((pos != m_sub_names.end()) && (*pos == t)) { return pos->index; } return -1; } range_type equal_range(int h)const { name t(h, 0); return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t); } private: std::vector m_sub_names; }; // // class regex_data: // represents the data we wish to expose to the matching algorithms. // template struct regex_data : public named_subexpressions { typedef regex_constants::syntax_option_type flag_type; typedef std::size_t size_type; regex_data(const ::std::shared_ptr< ::boost::regex_traits_wrapper >& t) : m_ptraits(t), m_flags(0), m_status(0), m_expression(0), m_expression_len(0), m_mark_count(0), m_first_state(0), m_restart_type(0), m_startmap{ 0 }, m_can_be_null(0), m_word_mask(0), m_has_recursions(false), m_disable_match_any(false) {} regex_data() : m_ptraits(new ::boost::regex_traits_wrapper()), m_flags(0), m_status(0), m_expression(0), m_expression_len(0), m_mark_count(0), m_first_state(0), m_restart_type(0), m_startmap{ 0 }, m_can_be_null(0), m_word_mask(0), m_has_recursions(false), m_disable_match_any(false) {} ::std::shared_ptr< ::boost::regex_traits_wrapper > m_ptraits; // traits class instance flag_type m_flags; // flags with which we were compiled int m_status; // error code (0 implies OK). const charT* m_expression; // the original expression std::ptrdiff_t m_expression_len; // the length of the original expression size_type m_mark_count; // the number of marked sub-expressions BOOST_REGEX_DETAIL_NS::re_syntax_base* m_first_state; // the first state of the machine unsigned m_restart_type; // search optimisation type unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match unsigned int m_can_be_null; // whether we can match a null string BOOST_REGEX_DETAIL_NS::raw_storage m_data; // the buffer in which our states are constructed typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character std::vector< std::pair< std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*. bool m_has_recursions; // whether we have recursive expressions; bool m_disable_match_any; // when set we need to disable the match_any flag as it causes different/buggy behaviour. }; // // class basic_regex_implementation // pimpl implementation class for basic_regex. // template class basic_regex_implementation : public regex_data { public: typedef regex_constants::syntax_option_type flag_type; typedef std::ptrdiff_t difference_type; typedef std::size_t size_type; typedef typename traits::locale_type locale_type; typedef const charT* const_iterator; basic_regex_implementation(){} basic_regex_implementation(const ::std::shared_ptr< ::boost::regex_traits_wrapper >& t) : regex_data(t) {} void assign(const charT* arg_first, const charT* arg_last, flag_type f) { regex_data* pdat = this; basic_regex_parser parser(pdat); parser.parse(arg_first, arg_last, f); } locale_type imbue(locale_type l) { return this->m_ptraits->imbue(l); } locale_type getloc()const { return this->m_ptraits->getloc(); } std::basic_string str()const { std::basic_string result; if(this->m_status == 0) result = std::basic_string(this->m_expression, this->m_expression_len); return result; } const_iterator expression()const { return this->m_expression; } std::pair subexpression(std::size_t n)const { const std::pair& pi = this->m_subs.at(n); std::pair p(expression() + pi.first, expression() + pi.second); return p; } // // begin, end: const_iterator begin()const { return (this->m_status ? 0 : this->m_expression); } const_iterator end()const { return (this->m_status ? 0 : this->m_expression + this->m_expression_len); } flag_type flags()const { return this->m_flags; } size_type size()const { return this->m_expression_len; } int status()const { return this->m_status; } size_type mark_count()const { return this->m_mark_count - 1; } const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const { return this->m_first_state; } unsigned get_restart_type()const { return this->m_restart_type; } const unsigned char* get_map()const { return this->m_startmap; } const ::boost::regex_traits_wrapper& get_traits()const { return *(this->m_ptraits); } bool can_be_null()const { return this->m_can_be_null; } const regex_data& get_data()const { basic_regex_implementation const* p = this; return *static_cast*>(p); } }; } // namespace BOOST_REGEX_DETAIL_NS // // class basic_regex: // represents the compiled // regular expression: // #ifdef BOOST_REGEX_NO_FWD template > #else template #endif class basic_regex : public regbase { public: // typedefs: typedef std::size_t traits_size_type; typedef typename traits::string_type traits_string_type; typedef charT char_type; typedef traits traits_type; typedef charT value_type; typedef charT& reference; typedef const charT& const_reference; typedef const charT* const_iterator; typedef const_iterator iterator; typedef std::ptrdiff_t difference_type; typedef std::size_t size_type; typedef regex_constants::syntax_option_type flag_type; // locale_type // placeholder for actual locale type used by the // traits class to localise *this. typedef typename traits::locale_type locale_type; public: explicit basic_regex(){} explicit basic_regex(const charT* p, flag_type f = regex_constants::normal) { assign(p, f); } basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) { assign(p1, p2, f); } basic_regex(const charT* p, size_type len, flag_type f) { assign(p, len, f); } basic_regex(const basic_regex& that) : m_pimpl(that.m_pimpl) {} ~basic_regex(){} basic_regex& operator=(const basic_regex& that) { return assign(that); } basic_regex& operator=(const charT* ptr) { return assign(ptr); } // // assign: basic_regex& assign(const basic_regex& that) { m_pimpl = that.m_pimpl; return *this; } basic_regex& assign(const charT* p, flag_type f = regex_constants::normal) { return assign(p, p + traits::length(p), f); } basic_regex& assign(const charT* p, size_type len, flag_type f) { return assign(p, p + len, f); } private: basic_regex& do_assign(const charT* p1, const charT* p2, flag_type f); public: basic_regex& assign(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) { return do_assign(p1, p2, f); } template unsigned int set_expression(const std::basic_string& p, flag_type f = regex_constants::normal) { return set_expression(p.data(), p.data() + p.size(), f); } template explicit basic_regex(const std::basic_string& p, flag_type f = regex_constants::normal) { assign(p, f); } template basic_regex(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) { typedef typename traits::string_type seq_type; seq_type a(arg_first, arg_last); if(!a.empty()) assign(static_cast(&*a.begin()), static_cast(&*a.begin() + a.size()), f); else assign(static_cast(0), static_cast(0), f); } template basic_regex& operator=(const std::basic_string& p) { return assign(p.data(), p.data() + p.size(), regex_constants::normal); } template basic_regex& assign( const std::basic_string& s, flag_type f = regex_constants::normal) { return assign(s.data(), s.data() + s.size(), f); } template basic_regex& assign(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) { typedef typename traits::string_type seq_type; seq_type a(arg_first, arg_last); if(a.size()) { const charT* p1 = &*a.begin(); const charT* p2 = &*a.begin() + a.size(); return assign(p1, p2, f); } return assign(static_cast(0), static_cast(0), f); } // // locale: locale_type imbue(locale_type l); locale_type getloc()const { return m_pimpl.get() ? m_pimpl->getloc() : locale_type(); } // // getflags: // retained for backwards compatibility only, "flags" // is now the preferred name: flag_type getflags()const { return flags(); } flag_type flags()const { return m_pimpl.get() ? m_pimpl->flags() : 0; } // // str: std::basic_string str()const { return m_pimpl.get() ? m_pimpl->str() : std::basic_string(); } // // begin, end, subexpression: std::pair subexpression(std::size_t n)const { #ifdef BOOST_REGEX_STANDALONE if (!m_pimpl.get()) throw std::logic_error("Can't access subexpressions in an invalid regex."); #else if(!m_pimpl.get()) boost::throw_exception(std::logic_error("Can't access subexpressions in an invalid regex.")); #endif return m_pimpl->subexpression(n); } const_iterator begin()const { return (m_pimpl.get() ? m_pimpl->begin() : 0); } const_iterator end()const { return (m_pimpl.get() ? m_pimpl->end() : 0); } // // swap: void swap(basic_regex& that)throw() { m_pimpl.swap(that.m_pimpl); } // // size: size_type size()const { return (m_pimpl.get() ? m_pimpl->size() : 0); } // // max_size: size_type max_size()const { return UINT_MAX; } // // empty: bool empty()const { return (m_pimpl.get() ? 0 != m_pimpl->status() : true); } size_type mark_count()const { return (m_pimpl.get() ? m_pimpl->mark_count() : 0); } int status()const { return (m_pimpl.get() ? m_pimpl->status() : regex_constants::error_empty); } int compare(const basic_regex& that) const { if(m_pimpl.get() == that.m_pimpl.get()) return 0; if(!m_pimpl.get()) return -1; if(!that.m_pimpl.get()) return 1; if(status() != that.status()) return status() - that.status(); if(flags() != that.flags()) return flags() - that.flags(); return str().compare(that.str()); } bool operator==(const basic_regex& e)const { return compare(e) == 0; } bool operator != (const basic_regex& e)const { return compare(e) != 0; } bool operator<(const basic_regex& e)const { return compare(e) < 0; } bool operator>(const basic_regex& e)const { return compare(e) > 0; } bool operator<=(const basic_regex& e)const { return compare(e) <= 0; } bool operator>=(const basic_regex& e)const { return compare(e) >= 0; } // // The following are deprecated as public interfaces // but are available for compatibility with earlier versions. const charT* expression()const { return (m_pimpl.get() && !m_pimpl->status() ? m_pimpl->expression() : 0); } unsigned int set_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) { assign(p1, p2, f | regex_constants::no_except); return status(); } unsigned int set_expression(const charT* p, flag_type f = regex_constants::normal) { assign(p, f | regex_constants::no_except); return status(); } unsigned int error_code()const { return status(); } // // private access methods: // const BOOST_REGEX_DETAIL_NS::re_syntax_base* get_first_state()const { BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_first_state(); } unsigned get_restart_type()const { BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_restart_type(); } const unsigned char* get_map()const { BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_map(); } const ::boost::regex_traits_wrapper& get_traits()const { BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_traits(); } bool can_be_null()const { BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->can_be_null(); } const BOOST_REGEX_DETAIL_NS::regex_data& get_data()const { BOOST_REGEX_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_data(); } std::shared_ptr get_named_subs()const { return m_pimpl; } private: std::shared_ptr > m_pimpl; }; // // out of line members; // these are the only members that mutate the basic_regex object, // and are designed to provide the strong exception guarantee // (in the event of a throw, the state of the object remains unchanged). // template basic_regex& basic_regex::do_assign(const charT* p1, const charT* p2, flag_type f) { std::shared_ptr > temp; if(!m_pimpl.get()) { temp = std::shared_ptr >(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation()); } else { temp = std::shared_ptr >(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation(m_pimpl->m_ptraits)); } temp->assign(p1, p2, f); temp.swap(m_pimpl); return *this; } template typename basic_regex::locale_type basic_regex::imbue(locale_type l) { std::shared_ptr > temp(new BOOST_REGEX_DETAIL_NS::basic_regex_implementation()); locale_type result = temp->imbue(l); temp.swap(m_pimpl); return result; } // // non-members: // template void swap(basic_regex& e1, basic_regex& e2) { e1.swap(e2); } template std::basic_ostream& operator << (std::basic_ostream& os, const basic_regex& e) { return (os << e.str()); } // // class reg_expression: // this is provided for backwards compatibility only, // it is deprecated, no not use! // #ifdef BOOST_REGEX_NO_FWD template > #else template #endif class reg_expression : public basic_regex { public: typedef typename basic_regex::flag_type flag_type; typedef typename basic_regex::size_type size_type; explicit reg_expression(){} explicit reg_expression(const charT* p, flag_type f = regex_constants::normal) : basic_regex(p, f){} reg_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) : basic_regex(p1, p2, f){} reg_expression(const charT* p, size_type len, flag_type f) : basic_regex(p, len, f){} reg_expression(const reg_expression& that) : basic_regex(that) {} ~reg_expression(){} reg_expression& operator=(const reg_expression& that) { return this->assign(that); } template explicit reg_expression(const std::basic_string& p, flag_type f = regex_constants::normal) : basic_regex(p, f) { } template reg_expression(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) : basic_regex(arg_first, arg_last, f) { } template reg_expression& operator=(const std::basic_string& p) { this->assign(p); return *this; } }; #ifdef BOOST_REGEX_MSVC #pragma warning (pop) #endif } // namespace boost #endif