123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303 |
- #ifndef BASE_STRINGS_STRING_TOKENIZER_H_
- #define BASE_STRINGS_STRING_TOKENIZER_H_
- #include <algorithm>
- #include <string>
- #include "base/strings/string_piece.h"
- namespace base {
- template <class str, class const_iterator>
- class StringTokenizerT {
- public:
- typedef typename str::value_type char_type;
-
- enum {
-
- RETURN_DELIMS = 1 << 0,
-
-
-
- RETURN_EMPTY_TOKENS = 1 << 1,
- };
-
-
-
-
- StringTokenizerT(const str& string,
- const str& delims) {
- Init(string.begin(), string.end(), delims);
- }
-
-
- StringTokenizerT(str&&, const str& delims) = delete;
- StringTokenizerT(const_iterator string_begin,
- const_iterator string_end,
- const str& delims) {
- Init(string_begin, string_end, delims);
- }
-
- void set_options(int options) { options_ = options; }
-
-
-
-
-
- void set_quote_chars(const str& quotes) { quotes_ = quotes; }
-
-
-
- bool GetNext() {
- if (quotes_.empty() && options_ == 0)
- return QuickGetNext();
- else
- return FullGetNext();
- }
-
- void Reset() {
- token_end_ = start_pos_;
- }
-
-
-
-
- bool token_is_delim() const { return token_is_delim_; }
-
-
- const_iterator token_begin() const { return token_begin_; }
- const_iterator token_end() const { return token_end_; }
- str token() const { return str(token_begin_, token_end_); }
- BasicStringPiece<str> token_piece() const {
- return BasicStringPiece<str>(&*token_begin_,
- std::distance(token_begin_, token_end_));
- }
- private:
- void Init(const_iterator string_begin,
- const_iterator string_end,
- const str& delims) {
- start_pos_ = string_begin;
- token_begin_ = string_begin;
- token_end_ = string_begin;
- end_ = string_end;
- delims_ = delims;
- options_ = 0;
- token_is_delim_ = true;
- }
-
-
-
- bool QuickGetNext() {
- token_is_delim_ = false;
- for (;;) {
- token_begin_ = token_end_;
- if (token_end_ == end_) {
- token_is_delim_ = true;
- return false;
- }
- ++token_end_;
- if (delims_.find(*token_begin_) == str::npos)
- break;
-
- }
- while (token_end_ != end_ && delims_.find(*token_end_) == str::npos)
- ++token_end_;
- return true;
- }
-
- bool FullGetNext() {
- AdvanceState state;
- for (;;) {
- if (token_is_delim_) {
-
-
-
-
-
-
-
-
-
-
-
- token_is_delim_ = false;
- token_begin_ = token_end_;
-
- while (token_end_ != end_ && AdvanceOne(&state, *token_end_)) {
- ++token_end_;
- }
-
- if (token_begin_ != token_end_ || (options_ & RETURN_EMPTY_TOKENS))
- return true;
- }
- DCHECK(!token_is_delim_);
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- token_is_delim_ = true;
- token_begin_ = token_end_;
- if (token_end_ == end_)
- return false;
-
- ++token_end_;
- if (options_ & RETURN_DELIMS)
- return true;
- }
- return false;
- }
- bool IsDelim(char_type c) const {
- return delims_.find(c) != str::npos;
- }
- bool IsQuote(char_type c) const {
- return quotes_.find(c) != str::npos;
- }
- struct AdvanceState {
- bool in_quote;
- bool in_escape;
- char_type quote_char;
- AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {}
- };
-
- bool AdvanceOne(AdvanceState* state, char_type c) {
- if (state->in_quote) {
- if (state->in_escape) {
- state->in_escape = false;
- } else if (c == '\\') {
- state->in_escape = true;
- } else if (c == state->quote_char) {
- state->in_quote = false;
- }
- } else {
- if (IsDelim(c))
- return false;
- state->in_quote = IsQuote(state->quote_char = c);
- }
- return true;
- }
- const_iterator start_pos_;
- const_iterator token_begin_;
- const_iterator token_end_;
- const_iterator end_;
- str delims_;
- str quotes_;
- int options_;
- bool token_is_delim_;
- };
- typedef StringTokenizerT<std::string, std::string::const_iterator>
- StringTokenizer;
- typedef StringTokenizerT<string16, string16::const_iterator> String16Tokenizer;
- typedef StringTokenizerT<std::string, const char*> CStringTokenizer;
- }
- #endif
|