rbbinode.h 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /********************************************************************
  4. * COPYRIGHT:
  5. * Copyright (c) 2001-2016, International Business Machines Corporation and
  6. * others. All Rights Reserved.
  7. ********************************************************************/
  8. #ifndef RBBINODE_H
  9. #define RBBINODE_H
  10. #include "unicode/utypes.h"
  11. #include "unicode/unistr.h"
  12. #include "unicode/uobject.h"
  13. //
  14. // class RBBINode
  15. //
  16. // Represents a node in the parse tree generated when reading
  17. // a rule file.
  18. //
  19. U_NAMESPACE_BEGIN
  20. class UnicodeSet;
  21. class UVector;
  22. class RBBINode : public UMemory {
  23. public:
  24. enum NodeType {
  25. setRef,
  26. uset,
  27. varRef,
  28. leafChar,
  29. lookAhead,
  30. tag,
  31. endMark,
  32. opStart,
  33. opCat,
  34. opOr,
  35. opStar,
  36. opPlus,
  37. opQuestion,
  38. opBreak,
  39. opReverse,
  40. opLParen
  41. };
  42. enum OpPrecedence {
  43. precZero,
  44. precStart,
  45. precLParen,
  46. precOpOr,
  47. precOpCat
  48. };
  49. NodeType fType;
  50. RBBINode *fParent;
  51. RBBINode *fLeftChild;
  52. RBBINode *fRightChild;
  53. UnicodeSet *fInputSet; // For uset nodes only.
  54. OpPrecedence fPrecedence; // For binary ops only.
  55. UnicodeString fText; // Text corresponding to this node.
  56. // May be lazily evaluated when (if) needed
  57. // for some node types.
  58. int fFirstPos; // Position in the rule source string of the
  59. // first text associated with the node.
  60. // If there's a left child, this will be the same
  61. // as that child's left pos.
  62. int fLastPos; // Last position in the rule source string
  63. // of any text associated with this node.
  64. // If there's a right child, this will be the same
  65. // as that child's last postion.
  66. UBool fNullable; // See Aho.
  67. int32_t fVal; // For leafChar nodes, the value.
  68. // Values are the character category,
  69. // corresponds to columns in the final
  70. // state transition table.
  71. UBool fLookAheadEnd; // For endMark nodes, set TRUE if
  72. // marking the end of a look-ahead rule.
  73. UBool fRuleRoot; // True if this node is the root of a rule.
  74. UBool fChainIn; // True if chaining into this rule is allowed
  75. // (no '^' present).
  76. UVector *fFirstPosSet;
  77. UVector *fLastPosSet; // TODO: rename fFirstPos & fLastPos to avoid confusion.
  78. UVector *fFollowPos;
  79. RBBINode(NodeType t);
  80. RBBINode(const RBBINode &other);
  81. ~RBBINode();
  82. RBBINode *cloneTree();
  83. RBBINode *flattenVariables();
  84. void flattenSets();
  85. void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);
  86. #ifdef RBBI_DEBUG
  87. static void printNodeHeader();
  88. static void printNode(const RBBINode *n);
  89. static void printTree(const RBBINode *n, UBool withHeading);
  90. #endif
  91. private:
  92. RBBINode &operator = (const RBBINode &other); // No defs.
  93. UBool operator == (const RBBINode &other); // Private, so these functions won't accidently be used.
  94. #ifdef RBBI_DEBUG
  95. public:
  96. int fSerialNum; // Debugging aids.
  97. #endif
  98. };
  99. #ifdef RBBI_DEBUG
  100. U_CFUNC void
  101. RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0);
  102. #endif
  103. U_NAMESPACE_END
  104. #endif