123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475 |
- ///////////////////////////////////////////////////////////////////////////////
- // detail/dynamic/parser_traits.hpp
- //
- // Copyright 2008 Eric Niebler. Distributed under the Boost
- // Software License, Version 1.0. (See accompanying file
- // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
- #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
- // MS compatible compilers support #pragma once
- #if defined(_MSC_VER)
- # pragma once
- #endif
- #include <string>
- #include <climits>
- #include <boost/config.hpp>
- #include <boost/assert.hpp>
- #include <boost/throw_exception.hpp>
- #include <boost/xpressive/regex_error.hpp>
- #include <boost/xpressive/regex_traits.hpp>
- #include <boost/xpressive/detail/detail_fwd.hpp>
- #include <boost/xpressive/detail/dynamic/matchable.hpp>
- #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
- #include <boost/xpressive/detail/utility/literals.hpp>
- #include <boost/xpressive/detail/utility/algorithm.hpp>
- namespace boost { namespace xpressive
- {
- ///////////////////////////////////////////////////////////////////////////////
- // compiler_traits
- // this works for char and wchar_t. it must be specialized for anything else.
- //
- template<typename RegexTraits>
- struct compiler_traits
- {
- typedef RegexTraits regex_traits;
- typedef typename regex_traits::char_type char_type;
- typedef typename regex_traits::string_type string_type;
- typedef typename regex_traits::locale_type locale_type;
- ///////////////////////////////////////////////////////////////////////////////
- // constructor
- explicit compiler_traits(RegexTraits const &traits = RegexTraits())
- : traits_(traits)
- , flags_(regex_constants::ECMAScript)
- , space_(lookup_classname(traits_, "space"))
- , alnum_(lookup_classname(traits_, "alnum"))
- {
- }
- ///////////////////////////////////////////////////////////////////////////////
- // flags
- regex_constants::syntax_option_type flags() const
- {
- return this->flags_;
- }
- ///////////////////////////////////////////////////////////////////////////////
- // flags
- void flags(regex_constants::syntax_option_type flags)
- {
- this->flags_ = flags;
- }
- ///////////////////////////////////////////////////////////////////////////////
- // traits
- regex_traits &traits()
- {
- return this->traits_;
- }
- regex_traits const &traits() const
- {
- return this->traits_;
- }
- ///////////////////////////////////////////////////////////////////////////////
- // imbue
- locale_type imbue(locale_type const &loc)
- {
- locale_type oldloc = this->traits().imbue(loc);
- this->space_ = lookup_classname(this->traits(), "space");
- this->alnum_ = lookup_classname(this->traits(), "alnum");
- return oldloc;
- }
- ///////////////////////////////////////////////////////////////////////////////
- // getloc
- locale_type getloc() const
- {
- return this->traits().getloc();
- }
- ///////////////////////////////////////////////////////////////////////////////
- // get_token
- // get a token and advance the iterator
- template<typename FwdIter>
- regex_constants::compiler_token_type get_token(FwdIter &begin, FwdIter end)
- {
- using namespace regex_constants;
- if(this->eat_ws_(begin, end) == end)
- {
- return regex_constants::token_end_of_pattern;
- }
- switch(*begin)
- {
- case BOOST_XPR_CHAR_(char_type, '\\'): return this->get_escape_token(++begin, end);
- case BOOST_XPR_CHAR_(char_type, '.'): ++begin; return token_any;
- case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_assert_begin_line;
- case BOOST_XPR_CHAR_(char_type, '$'): ++begin; return token_assert_end_line;
- case BOOST_XPR_CHAR_(char_type, '('): ++begin; return token_group_begin;
- case BOOST_XPR_CHAR_(char_type, ')'): ++begin; return token_group_end;
- case BOOST_XPR_CHAR_(char_type, '|'): ++begin; return token_alternate;
- case BOOST_XPR_CHAR_(char_type, '['): ++begin; return token_charset_begin;
- case BOOST_XPR_CHAR_(char_type, '*'):
- case BOOST_XPR_CHAR_(char_type, '+'):
- case BOOST_XPR_CHAR_(char_type, '?'):
- return token_invalid_quantifier;
- case BOOST_XPR_CHAR_(char_type, ']'):
- case BOOST_XPR_CHAR_(char_type, '{'):
- default:
- return token_literal;
- }
- }
- ///////////////////////////////////////////////////////////////////////////////
- // get_quant_spec
- template<typename FwdIter>
- bool get_quant_spec(FwdIter &begin, FwdIter end, detail::quant_spec &spec)
- {
- using namespace regex_constants;
- FwdIter old_begin;
- if(this->eat_ws_(begin, end) == end)
- {
- return false;
- }
- switch(*begin)
- {
- case BOOST_XPR_CHAR_(char_type, '*'):
- spec.min_ = 0;
- spec.max_ = (std::numeric_limits<unsigned int>::max)();
- break;
- case BOOST_XPR_CHAR_(char_type, '+'):
- spec.min_ = 1;
- spec.max_ = (std::numeric_limits<unsigned int>::max)();
- break;
- case BOOST_XPR_CHAR_(char_type, '?'):
- spec.min_ = 0;
- spec.max_ = 1;
- break;
- case BOOST_XPR_CHAR_(char_type, '{'):
- old_begin = this->eat_ws_(++begin, end);
- spec.min_ = spec.max_ = detail::toi(begin, end, this->traits());
- BOOST_XPR_ENSURE_
- (
- begin != old_begin && begin != end, error_brace, "invalid quantifier"
- );
- if(*begin == BOOST_XPR_CHAR_(char_type, ','))
- {
- old_begin = this->eat_ws_(++begin, end);
- spec.max_ = detail::toi(begin, end, this->traits());
- BOOST_XPR_ENSURE_
- (
- begin != end && BOOST_XPR_CHAR_(char_type, '}') == *begin
- , error_brace, "invalid quantifier"
- );
- if(begin == old_begin)
- {
- spec.max_ = (std::numeric_limits<unsigned int>::max)();
- }
- else
- {
- BOOST_XPR_ENSURE_
- (
- spec.min_ <= spec.max_, error_badbrace, "invalid quantification range"
- );
- }
- }
- else
- {
- BOOST_XPR_ENSURE_
- (
- BOOST_XPR_CHAR_(char_type, '}') == *begin, error_brace, "invalid quantifier"
- );
- }
- break;
- default:
- return false;
- }
- spec.greedy_ = true;
- if(this->eat_ws_(++begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
- {
- ++begin;
- spec.greedy_ = false;
- }
- return true;
- }
- ///////////////////////////////////////////////////////////////////////////
- // get_group_type
- template<typename FwdIter>
- regex_constants::compiler_token_type get_group_type(FwdIter &begin, FwdIter end, string_type &name)
- {
- using namespace regex_constants;
- if(this->eat_ws_(begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
- {
- this->eat_ws_(++begin, end);
- BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
- switch(*begin)
- {
- case BOOST_XPR_CHAR_(char_type, ':'): ++begin; return token_no_mark;
- case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_independent_sub_expression;
- case BOOST_XPR_CHAR_(char_type, '#'): ++begin; return token_comment;
- case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookahead;
- case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookahead;
- case BOOST_XPR_CHAR_(char_type, 'R'): ++begin; return token_recurse;
- case BOOST_XPR_CHAR_(char_type, '$'):
- this->get_name_(++begin, end, name);
- BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
- if(BOOST_XPR_CHAR_(char_type, '=') == *begin)
- {
- ++begin;
- return token_rule_assign;
- }
- return token_rule_ref;
- case BOOST_XPR_CHAR_(char_type, '<'):
- this->eat_ws_(++begin, end);
- BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
- switch(*begin)
- {
- case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookbehind;
- case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookbehind;
- default:
- BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
- }
- case BOOST_XPR_CHAR_(char_type, 'P'):
- this->eat_ws_(++begin, end);
- BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
- switch(*begin)
- {
- case BOOST_XPR_CHAR_(char_type, '<'):
- this->get_name_(++begin, end, name);
- BOOST_XPR_ENSURE_(begin != end && BOOST_XPR_CHAR_(char_type, '>') == *begin++, error_paren, "incomplete extension");
- return token_named_mark;
- case BOOST_XPR_CHAR_(char_type, '='):
- this->get_name_(++begin, end, name);
- BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
- return token_named_mark_ref;
- default:
- BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
- }
- case BOOST_XPR_CHAR_(char_type, 'i'):
- case BOOST_XPR_CHAR_(char_type, 'm'):
- case BOOST_XPR_CHAR_(char_type, 's'):
- case BOOST_XPR_CHAR_(char_type, 'x'):
- case BOOST_XPR_CHAR_(char_type, '-'):
- return this->parse_mods_(begin, end);
- default:
- BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
- }
- }
- return token_literal;
- }
- //////////////////////////////////////////////////////////////////////////
- // get_charset_token
- // NOTE: white-space is *never* ignored in a charset.
- template<typename FwdIter>
- regex_constants::compiler_token_type get_charset_token(FwdIter &begin, FwdIter end)
- {
- using namespace regex_constants;
- BOOST_ASSERT(begin != end);
- switch(*begin)
- {
- case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_charset_invert;
- case BOOST_XPR_CHAR_(char_type, '-'): ++begin; return token_charset_hyphen;
- case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end;
- case BOOST_XPR_CHAR_(char_type, '['):
- {
- FwdIter next = begin; ++next;
- if(next != end)
- {
- BOOST_XPR_ENSURE_(
- *next != BOOST_XPR_CHAR_(char_type, '=')
- , error_collate
- , "equivalence classes are not yet supported"
- );
- BOOST_XPR_ENSURE_(
- *next != BOOST_XPR_CHAR_(char_type, '.')
- , error_collate
- , "collation sequences are not yet supported"
- );
- if(*next == BOOST_XPR_CHAR_(char_type, ':'))
- {
- begin = ++next;
- return token_posix_charset_begin;
- }
- }
- }
- break;
- case BOOST_XPR_CHAR_(char_type, ':'):
- {
- FwdIter next = begin; ++next;
- if(next != end && *next == BOOST_XPR_CHAR_(char_type, ']'))
- {
- begin = ++next;
- return token_posix_charset_end;
- }
- }
- break;
- case BOOST_XPR_CHAR_(char_type, '\\'):
- if(++begin != end)
- {
- switch(*begin)
- {
- case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_charset_backspace;
- default:;
- }
- }
- return token_escape;
- default:;
- }
- return token_literal;
- }
- //////////////////////////////////////////////////////////////////////////
- // get_escape_token
- template<typename FwdIter>
- regex_constants::compiler_token_type get_escape_token(FwdIter &begin, FwdIter end)
- {
- using namespace regex_constants;
- if(begin != end)
- {
- switch(*begin)
- {
- //case BOOST_XPR_CHAR_(char_type, 'a'): ++begin; return token_escape_bell;
- //case BOOST_XPR_CHAR_(char_type, 'c'): ++begin; return token_escape_control;
- //case BOOST_XPR_CHAR_(char_type, 'e'): ++begin; return token_escape_escape;
- //case BOOST_XPR_CHAR_(char_type, 'f'): ++begin; return token_escape_formfeed;
- //case BOOST_XPR_CHAR_(char_type, 'n'): ++begin; return token_escape_newline;
- //case BOOST_XPR_CHAR_(char_type, 't'): ++begin; return token_escape_horizontal_tab;
- //case BOOST_XPR_CHAR_(char_type, 'v'): ++begin; return token_escape_vertical_tab;
- case BOOST_XPR_CHAR_(char_type, 'A'): ++begin; return token_assert_begin_sequence;
- case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_assert_word_boundary;
- case BOOST_XPR_CHAR_(char_type, 'B'): ++begin; return token_assert_not_word_boundary;
- case BOOST_XPR_CHAR_(char_type, 'E'): ++begin; return token_quote_meta_end;
- case BOOST_XPR_CHAR_(char_type, 'Q'): ++begin; return token_quote_meta_begin;
- case BOOST_XPR_CHAR_(char_type, 'Z'): ++begin; return token_assert_end_sequence;
- // Non-standard extension to ECMAScript syntax
- case BOOST_XPR_CHAR_(char_type, '<'): ++begin; return token_assert_word_begin;
- case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_assert_word_end;
- default:; // fall-through
- }
- }
- return token_escape;
- }
- private:
- //////////////////////////////////////////////////////////////////////////
- // parse_mods_
- template<typename FwdIter>
- regex_constants::compiler_token_type parse_mods_(FwdIter &begin, FwdIter end)
- {
- using namespace regex_constants;
- bool set = true;
- do switch(*begin)
- {
- case BOOST_XPR_CHAR_(char_type, 'i'): this->flag_(set, icase_); break;
- case BOOST_XPR_CHAR_(char_type, 'm'): this->flag_(!set, single_line); break;
- case BOOST_XPR_CHAR_(char_type, 's'): this->flag_(!set, not_dot_newline); break;
- case BOOST_XPR_CHAR_(char_type, 'x'): this->flag_(set, ignore_white_space); break;
- case BOOST_XPR_CHAR_(char_type, ':'): ++begin; BOOST_FALLTHROUGH;
- case BOOST_XPR_CHAR_(char_type, ')'): return token_no_mark;
- case BOOST_XPR_CHAR_(char_type, '-'): if(false == (set = !set)) break; BOOST_FALLTHROUGH;
- default: BOOST_THROW_EXCEPTION(regex_error(error_paren, "unknown pattern modifier"));
- }
- while(BOOST_XPR_ENSURE_(++begin != end, error_paren, "incomplete extension"));
- // this return is technically unreachable, but this must
- // be here to work around a bug in gcc 4.0
- return token_no_mark;
- }
- ///////////////////////////////////////////////////////////////////////////////
- // flag_
- void flag_(bool set, regex_constants::syntax_option_type flag)
- {
- this->flags_ = set ? (this->flags_ | flag) : (this->flags_ & ~flag);
- }
- ///////////////////////////////////////////////////////////////////////////
- // is_space_
- bool is_space_(char_type ch) const
- {
- return 0 != this->space_ && this->traits().isctype(ch, this->space_);
- }
- ///////////////////////////////////////////////////////////////////////////
- // is_alnum_
- bool is_alnum_(char_type ch) const
- {
- return 0 != this->alnum_ && this->traits().isctype(ch, this->alnum_);
- }
- ///////////////////////////////////////////////////////////////////////////
- // get_name_
- template<typename FwdIter>
- void get_name_(FwdIter &begin, FwdIter end, string_type &name)
- {
- this->eat_ws_(begin, end);
- for(name.clear(); begin != end && this->is_alnum_(*begin); ++begin)
- {
- name.push_back(*begin);
- }
- this->eat_ws_(begin, end);
- BOOST_XPR_ENSURE_(!name.empty(), regex_constants::error_paren, "incomplete extension");
- }
- ///////////////////////////////////////////////////////////////////////////////
- // eat_ws_
- template<typename FwdIter>
- FwdIter &eat_ws_(FwdIter &begin, FwdIter end)
- {
- if(0 != (regex_constants::ignore_white_space & this->flags()))
- {
- while(end != begin && (BOOST_XPR_CHAR_(char_type, '#') == *begin || this->is_space_(*begin)))
- {
- if(BOOST_XPR_CHAR_(char_type, '#') == *begin++)
- {
- while(end != begin && BOOST_XPR_CHAR_(char_type, '\n') != *begin++) {}
- }
- else
- {
- for(; end != begin && this->is_space_(*begin); ++begin) {}
- }
- }
- }
- return begin;
- }
- regex_traits traits_;
- regex_constants::syntax_option_type flags_;
- typename regex_traits::char_class_type space_;
- typename regex_traits::char_class_type alnum_;
- };
- }} // namespace boost::xpressive
- #endif
|