uspoof_conf.h 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. *
  6. * Copyright (C) 2008-2016, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. ******************************************************************************
  10. * file name: uspoof_conf.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2009Jan05
  16. * created by: Andy Heninger
  17. *
  18. * Internal classes for compiling confusable data into its binary (runtime) form.
  19. */
  20. #ifndef __USPOOF_BUILDCONF_H__
  21. #define __USPOOF_BUILDCONF_H__
  22. #include "unicode/utypes.h"
  23. #if !UCONFIG_NO_NORMALIZATION
  24. #if !UCONFIG_NO_REGULAR_EXPRESSIONS
  25. #include "unicode/uregex.h"
  26. #include "uhash.h"
  27. #include "uspoof_impl.h"
  28. U_NAMESPACE_BEGIN
  29. // SPUString
  30. // Holds a string that is the result of one of the mappings defined
  31. // by the confusable mapping data (confusables.txt from Unicode.org)
  32. // Instances of SPUString exist during the compilation process only.
  33. struct SPUString : public UMemory {
  34. UnicodeString *fStr; // The actual string.
  35. int32_t fCharOrStrTableIndex; // Index into the final runtime data for this
  36. // string (or, for length 1, the single string char
  37. // itself, there being no string table entry for it.)
  38. SPUString(UnicodeString *s);
  39. ~SPUString();
  40. };
  41. // String Pool A utility class for holding the strings that are the result of
  42. // the spoof mappings. These strings will utimately end up in the
  43. // run-time String Table.
  44. // This is sort of like a sorted set of strings, except that ICU's anemic
  45. // built-in collections don't support those, so it is implemented with a
  46. // combination of a uhash and a UVector.
  47. class SPUStringPool : public UMemory {
  48. public:
  49. SPUStringPool(UErrorCode &status);
  50. ~SPUStringPool();
  51. // Add a string. Return the string from the table.
  52. // If the input parameter string is already in the table, delete the
  53. // input parameter and return the existing string.
  54. SPUString *addString(UnicodeString *src, UErrorCode &status);
  55. // Get the n-th string in the collection.
  56. SPUString *getByIndex(int32_t i);
  57. // Sort the contents; affects the ordering of getByIndex().
  58. void sort(UErrorCode &status);
  59. int32_t size();
  60. private:
  61. UVector *fVec; // Elements are SPUString *
  62. UHashtable *fHash; // Key: UnicodeString Value: SPUString
  63. };
  64. // class ConfusabledataBuilder
  65. // An instance of this class exists while the confusable data is being built from source.
  66. // It encapsulates the intermediate data structures that are used for building.
  67. // It exports one static function, to do a confusable data build.
  68. class ConfusabledataBuilder : public UMemory {
  69. private:
  70. SpoofImpl *fSpoofImpl;
  71. UChar *fInput;
  72. UHashtable *fTable;
  73. UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables.
  74. // The binary data is first assembled into the following four collections, then
  75. // copied to its final raw-memory destination.
  76. UVector *fKeyVec;
  77. UVector *fValueVec;
  78. UnicodeString *fStringTable;
  79. SPUStringPool *stringPool;
  80. URegularExpression *fParseLine;
  81. URegularExpression *fParseHexNum;
  82. int32_t fLineNum;
  83. ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
  84. ~ConfusabledataBuilder();
  85. void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
  86. // Add an entry to the key and value tables being built
  87. // input: data from SLTable, MATable, etc.
  88. // outut: entry added to fKeyVec and fValueVec
  89. void addKeyEntry(UChar32 keyChar, // The key character
  90. UHashtable *table, // The table, one of SATable, MATable, etc.
  91. int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
  92. UErrorCode &status);
  93. // From an index into fKeyVec & fValueVec
  94. // get a UnicodeString with the corresponding mapping.
  95. UnicodeString getMapping(int32_t index);
  96. // Populate the final binary output data array with the compiled data.
  97. void outputData(UErrorCode &status);
  98. public:
  99. static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
  100. int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
  101. };
  102. U_NAMESPACE_END
  103. #endif
  104. #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
  105. #endif // __USPOOF_BUILDCONF_H__