utf_offset_string_conversions.h 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. // Copyright (c) 2011 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
  5. #define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
  6. #include <stddef.h>
  7. #include <string>
  8. #include <vector>
  9. #include "base/base_export.h"
  10. #include "base/strings/string16.h"
  11. #include "base/strings/string_piece.h"
  12. namespace base {
  13. // A helper class and associated data structures to adjust offsets into a
  14. // string in response to various adjustments one might do to that string
  15. // (e.g., eliminating a range). For details on offsets, see the comments by
  16. // the AdjustOffsets() function below.
  17. class BASE_EXPORT OffsetAdjuster {
  18. public:
  19. struct BASE_EXPORT Adjustment {
  20. Adjustment(size_t original_offset,
  21. size_t original_length,
  22. size_t output_length);
  23. size_t original_offset;
  24. size_t original_length;
  25. size_t output_length;
  26. };
  27. typedef std::vector<Adjustment> Adjustments;
  28. // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
  29. // recorded in |adjustments|. Adjusted offsets greater than |limit| will be
  30. // set to string16::npos.
  31. //
  32. // Offsets represents insertion/selection points between characters: if |src|
  33. // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
  34. // end of the string. Valid input offsets range from 0 to |src_len|. On
  35. // exit, each offset will have been modified to point at the same logical
  36. // position in the output string. If an offset cannot be successfully
  37. // adjusted (e.g., because it points into the middle of a multibyte sequence),
  38. // it will be set to string16::npos.
  39. static void AdjustOffsets(const Adjustments& adjustments,
  40. std::vector<size_t>* offsets_for_adjustment,
  41. size_t limit = string16::npos);
  42. // Adjusts the single |offset| to reflect the adjustments recorded in
  43. // |adjustments|.
  44. static void AdjustOffset(const Adjustments& adjustments,
  45. size_t* offset,
  46. size_t limit = string16::npos);
  47. // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
  48. // of the adjustments recorded in |adjustments|. In other words, the offsets
  49. // provided represent offsets into an adjusted string and the caller wants
  50. // to know the offsets they correspond to in the original string. If an
  51. // offset cannot be successfully unadjusted (e.g., because it points into
  52. // the middle of a multibyte sequence), it will be set to string16::npos.
  53. static void UnadjustOffsets(const Adjustments& adjustments,
  54. std::vector<size_t>* offsets_for_unadjustment);
  55. // Adjusts the single |offset| to reflect the reverse of the adjustments
  56. // recorded in |adjustments|.
  57. static void UnadjustOffset(const Adjustments& adjustments,
  58. size_t* offset);
  59. // Combines two sequential sets of adjustments, storing the combined revised
  60. // adjustments in |adjustments_on_adjusted_string|. That is, suppose a
  61. // string was altered in some way, with the alterations recorded as
  62. // adjustments in |first_adjustments|. Then suppose the resulting string is
  63. // further altered, with the alterations recorded as adjustments scored in
  64. // |adjustments_on_adjusted_string|, with the offsets recorded in these
  65. // adjustments being with respect to the intermediate string. This function
  66. // combines the two sets of adjustments into one, storing the result in
  67. // |adjustments_on_adjusted_string|, whose offsets are correct with respect
  68. // to the original string.
  69. //
  70. // Assumes both parameters are sorted by increasing offset.
  71. //
  72. // WARNING: Only supports |first_adjustments| that involve collapsing ranges
  73. // of text, not expanding ranges.
  74. static void MergeSequentialAdjustments(
  75. const Adjustments& first_adjustments,
  76. Adjustments* adjustments_on_adjusted_string);
  77. };
  78. // Like the conversions in utf_string_conversions.h, but also fills in an
  79. // |adjustments| parameter that reflects the alterations done to the string.
  80. // It may be NULL.
  81. BASE_EXPORT bool UTF8ToUTF16WithAdjustments(
  82. const char* src,
  83. size_t src_len,
  84. string16* output,
  85. base::OffsetAdjuster::Adjustments* adjustments);
  86. BASE_EXPORT string16 UTF8ToUTF16WithAdjustments(
  87. const base::StringPiece& utf8,
  88. base::OffsetAdjuster::Adjustments* adjustments) WARN_UNUSED_RESULT;
  89. // As above, but instead internally examines the adjustments and applies them
  90. // to |offsets_for_adjustment|. Input offsets greater than the length of the
  91. // input string will be set to string16::npos. See comments by AdjustOffsets().
  92. BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets(
  93. const base::StringPiece& utf8,
  94. std::vector<size_t>* offsets_for_adjustment);
  95. BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
  96. const base::StringPiece16& utf16,
  97. std::vector<size_t>* offsets_for_adjustment);
  98. } // namespace base
  99. #endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_