stringtok.hpp 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. // (C) Copyright Jeremy Siek 2004
  2. // Distributed under the Boost Software License, Version 1.0. (See
  3. // accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. #ifndef BOOST_STRINGTOK_HPP
  6. #define BOOST_STRINGTOK_HPP
  7. /*
  8. * stringtok.hpp -- Breaks a string into tokens. This is an example for lib3.
  9. *
  10. * Template function looks like this:
  11. *
  12. * template <typename Container>
  13. * void stringtok (Container &l,
  14. * string const &s,
  15. * char const * const ws = " \t\n");
  16. *
  17. * A nondestructive version of strtok() that handles its own memory and can
  18. * be broken up by any character(s). Does all the work at once rather than
  19. * in an invocation loop like strtok() requires.
  20. *
  21. * Container is any type that supports push_back(a_string), although using
  22. * list<string> and deque<string> are indicated due to their O(1) push_back.
  23. * (I prefer deque<> because op[]/at() is available as well.) The first
  24. * parameter references an existing Container.
  25. *
  26. * s is the string to be tokenized. From the parameter declaration, it can
  27. * be seen that s is not affected. Since references-to-const may refer to
  28. * temporaries, you could use stringtok(some_container, readline("")) when
  29. * using the GNU readline library.
  30. *
  31. * The final parameter is an array of characters that serve as whitespace.
  32. * Whitespace characters default to one or more of tab, space, and newline,
  33. * in any combination.
  34. *
  35. * 'l' need not be empty on entry. On return, 'l' will have the token
  36. * strings appended.
  37. *
  38. *
  39. * [Example:
  40. * list<string> ls;
  41. * stringtok (ls, " this \t is\t\n a test ");
  42. * for (list<string>::const_iterator i = ls.begin();
  43. * i != ls.end(); ++i)
  44. * {
  45. * cerr << ':' << (*i) << ":\n";
  46. * }
  47. *
  48. * would print
  49. * :this:
  50. * :is:
  51. * :a:
  52. * :test:
  53. * -end example]
  54. *
  55. * pedwards@jaj.com May 1999
  56. */
  57. #include <string>
  58. #include <cstring> // for strchr
  59. /*****************************************************************
  60. * This is the only part of the implementation that I don't like.
  61. * It can probably be improved upon by the reader...
  62. */
  63. inline bool isws(char c, char const* const wstr)
  64. {
  65. using namespace std;
  66. return (strchr(wstr, c) != NULL);
  67. }
  68. namespace boost
  69. {
  70. /*****************************************************************
  71. * Simplistic and quite Standard, but a bit slow. This should be
  72. * templatized on basic_string instead, or on a more generic StringT
  73. * that just happens to support ::size_type, .substr(), and so on.
  74. * I had hoped that "whitespace" would be a trait, but it isn't, so
  75. * the user must supply it. Enh, this lets them break up strings on
  76. * different things easier than traits would anyhow.
  77. */
  78. template < typename Container >
  79. void stringtok(
  80. Container& l, std::string const& s, char const* const ws = " \t\n")
  81. {
  82. typedef std::string::size_type size_type;
  83. const size_type S = s.size();
  84. size_type i = 0;
  85. while (i < S)
  86. {
  87. // eat leading whitespace
  88. while ((i < S) && (isws(s[i], ws)))
  89. ++i;
  90. if (i == S)
  91. return; // nothing left but WS
  92. // find end of word
  93. size_type j = i + 1;
  94. while ((j < S) && (!isws(s[j], ws)))
  95. ++j;
  96. // add word
  97. l.push_back(s.substr(i, j - i));
  98. // set up for next loop
  99. i = j + 1;
  100. }
  101. }
  102. } // namespace boost
  103. #endif // BOOST_STRINGTOK_HPP