// ---------------------------------------------------------------------------- // parsing.hpp : implementation of the parsing member functions // ( parse, parse_printf_directive) // ---------------------------------------------------------------------------- // Copyright Samuel Krempp 2003. Use, modification, and distribution are // subject to the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // see http://www.boost.org/libs/format for library home page // ---------------------------------------------------------------------------- #ifndef BOOST_FORMAT_PARSING_HPP #define BOOST_FORMAT_PARSING_HPP #include #include #include #include #include #include namespace boost { namespace io { namespace detail { #if defined(BOOST_NO_STD_LOCALE) // streams will be used for narrow / widen. but these methods are not const template T& const_or_not(const T& x) { return const_cast (x); } #else template const T& const_or_not(const T& x) { return x; } #endif template inline char wrap_narrow(const Facet& fac, Ch c, char deflt) { return const_or_not(fac).narrow(c, deflt); } template inline bool wrap_isdigit(const Facet& fac, Ch c) { #if ! defined( BOOST_NO_LOCALE_ISDIGIT ) return fac.is(std::ctype::digit, c); # else ignore_unused(fac); using namespace std; return isdigit(c) != 0; #endif } template Iter wrap_scan_notdigit(const Facet & fac, Iter beg, Iter end) { using namespace std; for( ; beg!=end && wrap_isdigit(fac, *beg); ++beg) ; return beg; } // Input : [start, last) iterators range and a // a Facet to use its widen/narrow member function // Effects : read sequence and convert digits into integral n, of type Res // Returns : n template Iter str2int (const Iter & start, const Iter & last, Res & res, const Facet& fac) { using namespace std; Iter it; res=0; for(it=start; it != last && wrap_isdigit(fac, *it); ++it ) { char cur_ch = wrap_narrow(fac, *it, 0); // cant fail. res *= 10; res += cur_ch - '0'; // 22.2.1.1.2.13 of the C++ standard } return it; } // auxiliary func called by parse_printf_directive // for centralising error handling // it either throws if user sets the corresponding flag, or does nothing. inline void maybe_throw_exception(unsigned char exceptions, std::size_t pos, std::size_t size) { if(exceptions & io::bad_format_string_bit) boost::throw_exception(io::bad_format_string(pos, size) ); } // Input: the position of a printf-directive in the format-string // a basic_ios& merely to use its widen/narrow member function // a bitset'exceptions' telling whether to throw exceptions on errors. // Returns: // true if parse succeeded (ignore some errors if exceptions disabled) // false if it failed so bad that the directive should be printed verbatim // Effects: // start is incremented so that *start is the first char after // this directive // *fpar is set with the parameters read in the directive template bool parse_printf_directive(Iter & start, const Iter& last, detail::format_item * fpar, const Facet& fac, std::size_t offset, unsigned char exceptions) { typedef typename basic_format::format_item_t format_item_t; fpar->argN_ = format_item_t::argN_no_posit; // if no positional-directive bool precision_set = false; bool in_brackets=false; Iter start0 = start; std::size_t fstring_size = last-start0+offset; char mssiz = 0; if(start>= last) { // empty directive : this is a trailing % maybe_throw_exception(exceptions, start-start0 + offset, fstring_size); return false; } if(*start== const_or_not(fac).widen( '|')) { in_brackets=true; if( ++start >= last ) { maybe_throw_exception(exceptions, start-start0 + offset, fstring_size); return false; } } // the flag '0' would be picked as a digit for argument order, but here it's a flag : if(*start== const_or_not(fac).widen( '0')) goto parse_flags; // handle argument order (%2$d) or possibly width specification: %2d if(wrap_isdigit(fac, *start)) { int n; start = str2int(start, last, n, fac); if( start >= last ) { maybe_throw_exception(exceptions, start-start0+offset, fstring_size); return false; } // %N% case : this is already the end of the directive if( *start == const_or_not(fac).widen( '%') ) { fpar->argN_ = n-1; ++start; if( in_brackets) maybe_throw_exception(exceptions, start-start0+offset, fstring_size); return true; } if ( *start== const_or_not(fac).widen( '$') ) { fpar->argN_ = n-1; ++start; } else { // non-positional directive fpar->fmtstate_.width_ = n; fpar->argN_ = format_item_t::argN_no_posit; goto parse_precision; } } parse_flags: // handle flags while (start != last) { // as long as char is one of + - = _ # 0 or ' ' switch ( wrap_narrow(fac, *start, 0)) { case '\'': break; // no effect yet. (painful to implement) case '-': fpar->fmtstate_.flags_ |= std::ios_base::left; break; case '=': fpar->pad_scheme_ |= format_item_t::centered; break; case '_': fpar->fmtstate_.flags_ |= std::ios_base::internal; break; case ' ': fpar->pad_scheme_ |= format_item_t::spacepad; break; case '+': fpar->fmtstate_.flags_ |= std::ios_base::showpos; break; case '0': fpar->pad_scheme_ |= format_item_t::zeropad; // need to know alignment before really setting flags, // so just add 'zeropad' flag for now, it will be processed later. break; case '#': fpar->fmtstate_.flags_ |= std::ios_base::showpoint | std::ios_base::showbase; break; default: goto parse_width; } ++start; } // loop on flag. if( start>=last) { maybe_throw_exception(exceptions, start-start0+offset, fstring_size); return true; } // first skip 'asterisk fields' : * or num (length) parse_width: if(*start == const_or_not(fac).widen( '*') ) ++start; else if(start!=last && wrap_isdigit(fac, *start)) start = str2int(start, last, fpar->fmtstate_.width_, fac); parse_precision: if( start>= last) { maybe_throw_exception(exceptions, start-start0+offset, fstring_size); return true; } // handle precision spec if (*start== const_or_not(fac).widen( '.')) { ++start; if(start != last && *start == const_or_not(fac).widen( '*') ) ++start; else if(start != last && wrap_isdigit(fac, *start)) { start = str2int(start, last, fpar->fmtstate_.precision_, fac); precision_set = true; } else fpar->fmtstate_.precision_ =0; } // argument type modifiers while (start != last) { switch (wrap_narrow(fac, *start, 0)) { case 'h': case 'l': case 'j': case 'z': case 'L': // boost::format ignores argument type modifiers as it relies on // the type of the argument fed into it by operator % break; // Note that the ptrdiff_t argument type 't' from C++11 is not honored // because it was already in use as the tabulation specifier in boost::format // case 't': // Microsoft extensions: // https://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx case 'w': break; case 'I': mssiz = 'I'; break; case '3': if (mssiz != 'I') { maybe_throw_exception(exceptions, start - start0 + offset, fstring_size); return true; } mssiz = '3'; break; case '2': if (mssiz != '3') { maybe_throw_exception(exceptions, start - start0 + offset, fstring_size); return true; } mssiz = 0x00; break; case '6': if (mssiz != 'I') { maybe_throw_exception(exceptions, start - start0 + offset, fstring_size); return true; } mssiz = '6'; break; case '4': if (mssiz != '6') { maybe_throw_exception(exceptions, start - start0 + offset, fstring_size); return true; } mssiz = 0x00; break; default: if (mssiz && mssiz == 'I') { mssiz = 0; } goto parse_conversion_specification; } ++start; } // loop on argument type modifiers to pick up 'hh', 'll', and the more complex microsoft ones parse_conversion_specification: if (start >= last || mssiz) { maybe_throw_exception(exceptions, start - start0 + offset, fstring_size); return true; } if( in_brackets && *start== const_or_not(fac).widen( '|') ) { ++start; return true; } // The default flags are "dec" and "skipws" // so if changing the base, need to unset basefield first switch (wrap_narrow(fac, *start, 0)) { // Boolean case 'b': fpar->fmtstate_.flags_ |= std::ios_base::boolalpha; break; // Decimal case 'u': case 'd': case 'i': // Defaults are sufficient break; // Hex case 'X': fpar->fmtstate_.flags_ |= std::ios_base::uppercase; BOOST_FALLTHROUGH; case 'x': case 'p': // pointer => set hex. fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; fpar->fmtstate_.flags_ |= std::ios_base::hex; break; // Octal case 'o': fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; fpar->fmtstate_.flags_ |= std::ios_base::oct; break; // Floating case 'A': fpar->fmtstate_.flags_ |= std::ios_base::uppercase; BOOST_FALLTHROUGH; case 'a': fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; fpar->fmtstate_.flags_ |= std::ios_base::fixed; fpar->fmtstate_.flags_ |= std::ios_base::scientific; break; case 'E': fpar->fmtstate_.flags_ |= std::ios_base::uppercase; BOOST_FALLTHROUGH; case 'e': fpar->fmtstate_.flags_ |= std::ios_base::scientific; break; case 'F': fpar->fmtstate_.flags_ |= std::ios_base::uppercase; BOOST_FALLTHROUGH; case 'f': fpar->fmtstate_.flags_ |= std::ios_base::fixed; break; case 'G': fpar->fmtstate_.flags_ |= std::ios_base::uppercase; BOOST_FALLTHROUGH; case 'g': // default flags are correct here break; // Tabulation (a boost::format extension) case 'T': ++start; if( start >= last) { maybe_throw_exception(exceptions, start-start0+offset, fstring_size); return false; } else { fpar->fmtstate_.fill_ = *start; } fpar->pad_scheme_ |= format_item_t::tabulation; fpar->argN_ = format_item_t::argN_tabulation; break; case 't': fpar->fmtstate_.fill_ = const_or_not(fac).widen( ' '); fpar->pad_scheme_ |= format_item_t::tabulation; fpar->argN_ = format_item_t::argN_tabulation; break; // Character case 'C': case 'c': fpar->truncate_ = 1; break; // String case 'S': case 's': if(precision_set) // handle truncation manually, with own parameter. fpar->truncate_ = fpar->fmtstate_.precision_; fpar->fmtstate_.precision_ = 6; // default stream precision. break; // %n is insecure and ignored by boost::format case 'n' : fpar->argN_ = format_item_t::argN_ignored; break; default: maybe_throw_exception(exceptions, start-start0+offset, fstring_size); } ++start; if( in_brackets ) { if( start != last && *start== const_or_not(fac).widen( '|') ) { ++start; return true; } else maybe_throw_exception(exceptions, start-start0+offset, fstring_size); } return true; } // -end parse_printf_directive() template int upper_bound_from_fstring(const String& buf, const typename String::value_type arg_mark, const Facet& fac, unsigned char exceptions) { // quick-parsing of the format-string to count arguments mark (arg_mark, '%') // returns : upper bound on the number of format items in the format strings using namespace boost::io; typename String::size_type i1=0; int num_items=0; while( (i1=buf.find(arg_mark,i1)) != String::npos ) { if( i1+1 >= buf.size() ) { if(exceptions & bad_format_string_bit) boost::throw_exception(bad_format_string(i1, buf.size() )); // must not end in ".. %" else { ++num_items; break; } } if(buf[i1+1] == buf[i1] ) {// escaped "%%" i1+=2; continue; } ++i1; // in case of %N% directives, dont count it double (wastes allocations..) : i1 = detail::wrap_scan_notdigit(fac, buf.begin()+i1, buf.end()) - buf.begin(); if( i1 < buf.size() && buf[i1] == arg_mark ) ++i1; ++num_items; } return num_items; } template inline void append_string(String& dst, const String& src, const typename String::size_type beg, const typename String::size_type end) { dst.append(src.begin()+beg, src.begin()+end); } } // detail namespace } // io namespace // ----------------------------------------------- // format :: parse(..) template basic_format& basic_format:: parse (const string_type& buf) { // parse the format-string using namespace std; #if !defined(BOOST_NO_STD_LOCALE) const std::ctype & fac = BOOST_USE_FACET( std::ctype, getloc()); #else io::basic_oaltstringstream fac; //has widen and narrow even on compilers without locale #endif const Ch arg_mark = io::detail::const_or_not(fac).widen( '%'); bool ordered_args=true; int max_argN=-1; // A: find upper_bound on num_items and allocates arrays int num_items = io::detail::upper_bound_from_fstring(buf, arg_mark, fac, exceptions()); make_or_reuse_data(num_items); // B: Now the real parsing of the format string : num_items=0; typename string_type::size_type i0=0, i1=0; typename string_type::const_iterator it; bool special_things=false; int cur_item=0; while( (i1=buf.find(arg_mark,i1)) != string_type::npos ) { string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_; if( buf[i1+1] == buf[i1] ) { // escaped mark, '%%' io::detail::append_string(piece, buf, i0, i1+1); i1+=2; i0=i1; continue; } BOOST_ASSERT( static_cast(cur_item) < items_.size() || cur_item==0); if(i1!=i0) { io::detail::append_string(piece, buf, i0, i1); i0=i1; } ++i1; it = buf.begin()+i1; bool parse_ok = io::detail::parse_printf_directive( it, buf.end(), &items_[cur_item], fac, i1, exceptions()); i1 = it - buf.begin(); if( ! parse_ok ) // the directive will be printed verbatim continue; i0=i1; items_[cur_item].compute_states(); // process complex options, like zeropad, into params int argN=items_[cur_item].argN_; if(argN == format_item_t::argN_ignored) continue; if(argN ==format_item_t::argN_no_posit) ordered_args=false; else if(argN == format_item_t::argN_tabulation) special_things=true; else if(argN > max_argN) max_argN = argN; ++num_items; ++cur_item; } // loop on %'s BOOST_ASSERT(cur_item == num_items); // store the final piece of string { string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_; io::detail::append_string(piece, buf, i0, buf.size()); } if( !ordered_args) { if(max_argN >= 0 ) { // dont mix positional with non-positionnal directives if(exceptions() & io::bad_format_string_bit) boost::throw_exception( io::bad_format_string(static_cast(max_argN), 0)); // else do nothing. => positionnal arguments are processed as non-positionnal } // set things like it would have been with positional directives : int non_ordered_items = 0; for(int i=0; i< num_items; ++i) if(items_[i].argN_ == format_item_t::argN_no_posit) { items_[i].argN_ = non_ordered_items; ++non_ordered_items; } max_argN = non_ordered_items-1; } // C: set some member data : items_.resize(num_items, format_item_t(io::detail::const_or_not(fac).widen( ' ')) ); if(special_things) style_ |= special_needs; num_args_ = max_argN + 1; if(ordered_args) style_ |= ordered; else style_ &= ~ordered; return *this; } } // namespace boost #endif // BOOST_FORMAT_PARSING_HPP