transreg.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (c) 2001-2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. * Date Name Description
  9. * 08/10/2001 aliu Creation.
  10. **********************************************************************
  11. */
  12. #ifndef _TRANSREG_H
  13. #define _TRANSREG_H
  14. #include "unicode/utypes.h"
  15. #if !UCONFIG_NO_TRANSLITERATION
  16. #include "unicode/uobject.h"
  17. #include "unicode/translit.h"
  18. #include "hash.h"
  19. #include "uvector.h"
  20. U_NAMESPACE_BEGIN
  21. class TransliteratorEntry;
  22. class TransliteratorSpec;
  23. class UnicodeString;
  24. //------------------------------------------------------------------
  25. // TransliteratorAlias
  26. //------------------------------------------------------------------
  27. /**
  28. * A TransliteratorAlias object is returned by get() if the given ID
  29. * actually translates into something else. The caller then invokes
  30. * the create() method on the alias to create the actual
  31. * transliterator, and deletes the alias.
  32. *
  33. * Why all the shenanigans? To prevent circular calls between
  34. * the registry code and the transliterator code that deadlocks.
  35. */
  36. class TransliteratorAlias : public UMemory {
  37. public:
  38. /**
  39. * Construct a simple alias (type == SIMPLE)
  40. * @param aliasID the given id.
  41. */
  42. TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
  43. /**
  44. * Construct a compound RBT alias (type == COMPOUND)
  45. */
  46. TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
  47. UVector* adoptedTransliterators,
  48. const UnicodeSet* compoundFilter);
  49. /**
  50. * Construct a rules alias (type = RULES)
  51. */
  52. TransliteratorAlias(const UnicodeString& theID,
  53. const UnicodeString& rules,
  54. UTransDirection dir);
  55. ~TransliteratorAlias();
  56. /**
  57. * The whole point of create() is that the caller must invoke
  58. * it when the registry mutex is NOT held, to prevent deadlock.
  59. * It may only be called once.
  60. *
  61. * Note: Only call create() if isRuleBased() returns FALSE.
  62. *
  63. * This method must be called *outside* of the TransliteratorRegistry
  64. * mutex.
  65. */
  66. Transliterator* create(UParseError&, UErrorCode&);
  67. /**
  68. * Return TRUE if this alias is rule-based. If so, the caller
  69. * must call parse() on it, then call TransliteratorRegistry::reget().
  70. */
  71. UBool isRuleBased() const;
  72. /**
  73. * If isRuleBased() returns TRUE, then the caller must call this
  74. * method, followed by TransliteratorRegistry::reget(). The latter
  75. * method must be called inside the TransliteratorRegistry mutex.
  76. *
  77. * Note: Only call parse() if isRuleBased() returns TRUE.
  78. *
  79. * This method must be called *outside* of the TransliteratorRegistry
  80. * mutex, because it can instantiate Transliterators embedded in
  81. * the rules via the "&Latin-Arabic()" syntax.
  82. */
  83. void parse(TransliteratorParser& parser,
  84. UParseError& pe, UErrorCode& ec) const;
  85. private:
  86. // We actually come in three flavors:
  87. // 1. Simple alias
  88. // Here aliasID is the alias string. Everything else is
  89. // null, zero, empty.
  90. // 2. CompoundRBT
  91. // Here ID is the ID, aliasID is the idBlock, trans is the
  92. // contained RBT, and idSplitPoint is the offet in aliasID
  93. // where the contained RBT goes. compoundFilter is the
  94. // compound filter, and it is _not_ owned.
  95. // 3. Rules
  96. // Here ID is the ID, aliasID is the rules string.
  97. // idSplitPoint is the UTransDirection.
  98. UnicodeString ID;
  99. UnicodeString aliasesOrRules;
  100. UVector* transes; // owned
  101. const UnicodeSet* compoundFilter; // alias
  102. UTransDirection direction;
  103. enum { SIMPLE, COMPOUND, RULES } type;
  104. TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
  105. TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
  106. };
  107. /**
  108. * A registry of system transliterators. This is the data structure
  109. * that implements the mapping between transliterator IDs and the data
  110. * or function pointers used to create the corresponding
  111. * transliterators. There is one instance of the registry that is
  112. * created statically.
  113. *
  114. * The registry consists of a dynamic component -- a hashtable -- and
  115. * a static component -- locale resource bundles. The dynamic store
  116. * is semantically overlaid on the static store, so the static mapping
  117. * can be dynamically overridden.
  118. *
  119. * This is an internal class that is only used by Transliterator.
  120. * Transliterator maintains one static instance of this class and
  121. * delegates all registry-related operations to it.
  122. *
  123. * @author Alan Liu
  124. */
  125. class TransliteratorRegistry : public UMemory {
  126. public:
  127. /**
  128. * Contructor
  129. * @param status Output param set to success/failure code.
  130. */
  131. TransliteratorRegistry(UErrorCode& status);
  132. /**
  133. * Nonvirtual destructor -- this class is not subclassable.
  134. */
  135. ~TransliteratorRegistry();
  136. //------------------------------------------------------------------
  137. // Basic public API
  138. //------------------------------------------------------------------
  139. /**
  140. * Given a simple ID (forward direction, no inline filter, not
  141. * compound) attempt to instantiate it from the registry. Return
  142. * 0 on failure.
  143. *
  144. * Return a non-NULL aliasReturn value if the ID points to an alias.
  145. * We cannot instantiate it ourselves because the alias may contain
  146. * filters or compounds, which we do not understand. Caller should
  147. * make aliasReturn NULL before calling.
  148. * @param ID the given ID
  149. * @param aliasReturn output param to receive TransliteratorAlias;
  150. * should be NULL on entry
  151. * @param parseError Struct to recieve information on position
  152. * of error if an error is encountered
  153. * @param status Output param set to success/failure code.
  154. */
  155. Transliterator* get(const UnicodeString& ID,
  156. TransliteratorAlias*& aliasReturn,
  157. UErrorCode& status);
  158. /**
  159. * The caller must call this after calling get(), if [a] calling get()
  160. * returns an alias, and [b] the alias is rule based. In that
  161. * situation the caller must call alias->parse() to do the parsing
  162. * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
  163. * instantiating the transliterator.
  164. *
  165. * Note: Another alias might be returned by this method.
  166. *
  167. * This method (like all public methods of this class) must be called
  168. * from within the TransliteratorRegistry mutex.
  169. *
  170. * @param aliasReturn output param to receive TransliteratorAlias;
  171. * should be NULL on entry
  172. */
  173. Transliterator* reget(const UnicodeString& ID,
  174. TransliteratorParser& parser,
  175. TransliteratorAlias*& aliasReturn,
  176. UErrorCode& status);
  177. /**
  178. * Register a prototype (adopted). This adds an entry to the
  179. * dynamic store, or replaces an existing entry. Any entry in the
  180. * underlying static locale resource store is masked.
  181. */
  182. void put(Transliterator* adoptedProto,
  183. UBool visible,
  184. UErrorCode& ec);
  185. /**
  186. * Register an ID and a factory function pointer. This adds an
  187. * entry to the dynamic store, or replaces an existing entry. Any
  188. * entry in the underlying static locale resource store is masked.
  189. */
  190. void put(const UnicodeString& ID,
  191. Transliterator::Factory factory,
  192. Transliterator::Token context,
  193. UBool visible,
  194. UErrorCode& ec);
  195. /**
  196. * Register an ID and a resource name. This adds an entry to the
  197. * dynamic store, or replaces an existing entry. Any entry in the
  198. * underlying static locale resource store is masked.
  199. */
  200. void put(const UnicodeString& ID,
  201. const UnicodeString& resourceName,
  202. UTransDirection dir,
  203. UBool readonlyResourceAlias,
  204. UBool visible,
  205. UErrorCode& ec);
  206. /**
  207. * Register an ID and an alias ID. This adds an entry to the
  208. * dynamic store, or replaces an existing entry. Any entry in the
  209. * underlying static locale resource store is masked.
  210. */
  211. void put(const UnicodeString& ID,
  212. const UnicodeString& alias,
  213. UBool readonlyAliasAlias,
  214. UBool visible,
  215. UErrorCode& ec);
  216. /**
  217. * Unregister an ID. This removes an entry from the dynamic store
  218. * if there is one. The static locale resource store is
  219. * unaffected.
  220. * @param ID the given ID.
  221. */
  222. void remove(const UnicodeString& ID);
  223. //------------------------------------------------------------------
  224. // Public ID and spec management
  225. //------------------------------------------------------------------
  226. /**
  227. * Return a StringEnumeration over the IDs currently registered
  228. * with the system.
  229. * @internal
  230. */
  231. StringEnumeration* getAvailableIDs() const;
  232. /**
  233. * == OBSOLETE - remove in ICU 3.4 ==
  234. * Return the number of IDs currently registered with the system.
  235. * To retrieve the actual IDs, call getAvailableID(i) with
  236. * i from 0 to countAvailableIDs() - 1.
  237. * @return the number of IDs currently registered with the system.
  238. * @internal
  239. */
  240. int32_t countAvailableIDs(void) const;
  241. /**
  242. * == OBSOLETE - remove in ICU 3.4 ==
  243. * Return the index-th available ID. index must be between 0
  244. * and countAvailableIDs() - 1, inclusive. If index is out of
  245. * range, the result of getAvailableID(0) is returned.
  246. * @param index the given index.
  247. * @return the index-th available ID. index must be between 0
  248. * and countAvailableIDs() - 1, inclusive. If index is out of
  249. * range, the result of getAvailableID(0) is returned.
  250. * @internal
  251. */
  252. const UnicodeString& getAvailableID(int32_t index) const;
  253. /**
  254. * Return the number of registered source specifiers.
  255. * @return the number of registered source specifiers.
  256. */
  257. int32_t countAvailableSources(void) const;
  258. /**
  259. * Return a registered source specifier.
  260. * @param index which specifier to return, from 0 to n-1, where
  261. * n = countAvailableSources()
  262. * @param result fill-in paramter to receive the source specifier.
  263. * If index is out of range, result will be empty.
  264. * @return reference to result
  265. */
  266. UnicodeString& getAvailableSource(int32_t index,
  267. UnicodeString& result) const;
  268. /**
  269. * Return the number of registered target specifiers for a given
  270. * source specifier.
  271. * @param source the given source specifier.
  272. * @return the number of registered target specifiers for a given
  273. * source specifier.
  274. */
  275. int32_t countAvailableTargets(const UnicodeString& source) const;
  276. /**
  277. * Return a registered target specifier for a given source.
  278. * @param index which specifier to return, from 0 to n-1, where
  279. * n = countAvailableTargets(source)
  280. * @param source the source specifier
  281. * @param result fill-in paramter to receive the target specifier.
  282. * If source is invalid or if index is out of range, result will
  283. * be empty.
  284. * @return reference to result
  285. */
  286. UnicodeString& getAvailableTarget(int32_t index,
  287. const UnicodeString& source,
  288. UnicodeString& result) const;
  289. /**
  290. * Return the number of registered variant specifiers for a given
  291. * source-target pair. There is always at least one variant: If
  292. * just source-target is registered, then the single variant
  293. * NO_VARIANT is returned. If source-target/variant is registered
  294. * then that variant is returned.
  295. * @param source the source specifiers
  296. * @param target the target specifiers
  297. * @return the number of registered variant specifiers for a given
  298. * source-target pair.
  299. */
  300. int32_t countAvailableVariants(const UnicodeString& source,
  301. const UnicodeString& target) const;
  302. /**
  303. * Return a registered variant specifier for a given source-target
  304. * pair. If NO_VARIANT is one of the variants, then it will be
  305. * at index 0.
  306. * @param index which specifier to return, from 0 to n-1, where
  307. * n = countAvailableVariants(source, target)
  308. * @param source the source specifier
  309. * @param target the target specifier
  310. * @param result fill-in paramter to receive the variant
  311. * specifier. If source is invalid or if target is invalid or if
  312. * index is out of range, result will be empty.
  313. * @return reference to result
  314. */
  315. UnicodeString& getAvailableVariant(int32_t index,
  316. const UnicodeString& source,
  317. const UnicodeString& target,
  318. UnicodeString& result) const;
  319. private:
  320. //----------------------------------------------------------------
  321. // Private implementation
  322. //----------------------------------------------------------------
  323. TransliteratorEntry* find(const UnicodeString& ID);
  324. TransliteratorEntry* find(UnicodeString& source,
  325. UnicodeString& target,
  326. UnicodeString& variant);
  327. TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
  328. const TransliteratorSpec& trg,
  329. const UnicodeString& variant) const;
  330. TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
  331. const TransliteratorSpec& trg,
  332. const UnicodeString& variant);
  333. static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
  334. const TransliteratorSpec& specToFind,
  335. const UnicodeString& variant,
  336. UTransDirection direction);
  337. void registerEntry(const UnicodeString& source,
  338. const UnicodeString& target,
  339. const UnicodeString& variant,
  340. TransliteratorEntry* adopted,
  341. UBool visible);
  342. void registerEntry(const UnicodeString& ID,
  343. TransliteratorEntry* adopted,
  344. UBool visible);
  345. void registerEntry(const UnicodeString& ID,
  346. const UnicodeString& source,
  347. const UnicodeString& target,
  348. const UnicodeString& variant,
  349. TransliteratorEntry* adopted,
  350. UBool visible);
  351. void registerSTV(const UnicodeString& source,
  352. const UnicodeString& target,
  353. const UnicodeString& variant);
  354. void removeSTV(const UnicodeString& source,
  355. const UnicodeString& target,
  356. const UnicodeString& variant);
  357. Transliterator* instantiateEntry(const UnicodeString& ID,
  358. TransliteratorEntry *entry,
  359. TransliteratorAlias*& aliasReturn,
  360. UErrorCode& status);
  361. /**
  362. * A StringEnumeration over the registered IDs in this object.
  363. */
  364. class Enumeration : public StringEnumeration {
  365. public:
  366. Enumeration(const TransliteratorRegistry& reg);
  367. virtual ~Enumeration();
  368. virtual int32_t count(UErrorCode& status) const;
  369. virtual const UnicodeString* snext(UErrorCode& status);
  370. virtual void reset(UErrorCode& status);
  371. static UClassID U_EXPORT2 getStaticClassID();
  372. virtual UClassID getDynamicClassID() const;
  373. private:
  374. int32_t index;
  375. const TransliteratorRegistry& reg;
  376. };
  377. friend class Enumeration;
  378. private:
  379. /**
  380. * Dynamic registry mapping full IDs to Entry objects. This
  381. * contains both public and internal entities. The visibility is
  382. * controlled by whether an entry is listed in availableIDs and
  383. * specDAG or not.
  384. */
  385. Hashtable registry;
  386. /**
  387. * DAG of visible IDs by spec. Hashtable: source => (Hashtable:
  388. * target => variant bitmask)
  389. */
  390. Hashtable specDAG;
  391. /**
  392. * Vector of all variant names
  393. */
  394. UVector variantList;
  395. /**
  396. * Vector of public full IDs.
  397. */
  398. UVector availableIDs;
  399. TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
  400. TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
  401. };
  402. U_NAMESPACE_END
  403. U_CFUNC UBool utrans_transliterator_cleanup(void);
  404. #endif /* #if !UCONFIG_NO_TRANSLITERATION */
  405. #endif
  406. //eof