123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- **********************************************************************
- * Copyright (c) 2001-2014, International Business Machines
- * Corporation and others. All Rights Reserved.
- **********************************************************************
- * Date Name Description
- * 08/10/2001 aliu Creation.
- **********************************************************************
- */
- #ifndef _TRANSREG_H
- #define _TRANSREG_H
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_TRANSLITERATION
- #include "unicode/uobject.h"
- #include "unicode/translit.h"
- #include "hash.h"
- #include "uvector.h"
- U_NAMESPACE_BEGIN
- class TransliteratorEntry;
- class TransliteratorSpec;
- class UnicodeString;
- //------------------------------------------------------------------
- // TransliteratorAlias
- //------------------------------------------------------------------
- /**
- * A TransliteratorAlias object is returned by get() if the given ID
- * actually translates into something else. The caller then invokes
- * the create() method on the alias to create the actual
- * transliterator, and deletes the alias.
- *
- * Why all the shenanigans? To prevent circular calls between
- * the registry code and the transliterator code that deadlocks.
- */
- class TransliteratorAlias : public UMemory {
- public:
- /**
- * Construct a simple alias (type == SIMPLE)
- * @param aliasID the given id.
- */
- TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
- /**
- * Construct a compound RBT alias (type == COMPOUND)
- */
- TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
- UVector* adoptedTransliterators,
- const UnicodeSet* compoundFilter);
- /**
- * Construct a rules alias (type = RULES)
- */
- TransliteratorAlias(const UnicodeString& theID,
- const UnicodeString& rules,
- UTransDirection dir);
- ~TransliteratorAlias();
- /**
- * The whole point of create() is that the caller must invoke
- * it when the registry mutex is NOT held, to prevent deadlock.
- * It may only be called once.
- *
- * Note: Only call create() if isRuleBased() returns FALSE.
- *
- * This method must be called *outside* of the TransliteratorRegistry
- * mutex.
- */
- Transliterator* create(UParseError&, UErrorCode&);
- /**
- * Return TRUE if this alias is rule-based. If so, the caller
- * must call parse() on it, then call TransliteratorRegistry::reget().
- */
- UBool isRuleBased() const;
- /**
- * If isRuleBased() returns TRUE, then the caller must call this
- * method, followed by TransliteratorRegistry::reget(). The latter
- * method must be called inside the TransliteratorRegistry mutex.
- *
- * Note: Only call parse() if isRuleBased() returns TRUE.
- *
- * This method must be called *outside* of the TransliteratorRegistry
- * mutex, because it can instantiate Transliterators embedded in
- * the rules via the "&Latin-Arabic()" syntax.
- */
- void parse(TransliteratorParser& parser,
- UParseError& pe, UErrorCode& ec) const;
- private:
- // We actually come in three flavors:
- // 1. Simple alias
- // Here aliasID is the alias string. Everything else is
- // null, zero, empty.
- // 2. CompoundRBT
- // Here ID is the ID, aliasID is the idBlock, trans is the
- // contained RBT, and idSplitPoint is the offet in aliasID
- // where the contained RBT goes. compoundFilter is the
- // compound filter, and it is _not_ owned.
- // 3. Rules
- // Here ID is the ID, aliasID is the rules string.
- // idSplitPoint is the UTransDirection.
- UnicodeString ID;
- UnicodeString aliasesOrRules;
- UVector* transes; // owned
- const UnicodeSet* compoundFilter; // alias
- UTransDirection direction;
- enum { SIMPLE, COMPOUND, RULES } type;
- TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
- TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
- };
- /**
- * A registry of system transliterators. This is the data structure
- * that implements the mapping between transliterator IDs and the data
- * or function pointers used to create the corresponding
- * transliterators. There is one instance of the registry that is
- * created statically.
- *
- * The registry consists of a dynamic component -- a hashtable -- and
- * a static component -- locale resource bundles. The dynamic store
- * is semantically overlaid on the static store, so the static mapping
- * can be dynamically overridden.
- *
- * This is an internal class that is only used by Transliterator.
- * Transliterator maintains one static instance of this class and
- * delegates all registry-related operations to it.
- *
- * @author Alan Liu
- */
- class TransliteratorRegistry : public UMemory {
- public:
- /**
- * Contructor
- * @param status Output param set to success/failure code.
- */
- TransliteratorRegistry(UErrorCode& status);
- /**
- * Nonvirtual destructor -- this class is not subclassable.
- */
- ~TransliteratorRegistry();
- //------------------------------------------------------------------
- // Basic public API
- //------------------------------------------------------------------
- /**
- * Given a simple ID (forward direction, no inline filter, not
- * compound) attempt to instantiate it from the registry. Return
- * 0 on failure.
- *
- * Return a non-NULL aliasReturn value if the ID points to an alias.
- * We cannot instantiate it ourselves because the alias may contain
- * filters or compounds, which we do not understand. Caller should
- * make aliasReturn NULL before calling.
- * @param ID the given ID
- * @param aliasReturn output param to receive TransliteratorAlias;
- * should be NULL on entry
- * @param parseError Struct to recieve information on position
- * of error if an error is encountered
- * @param status Output param set to success/failure code.
- */
- Transliterator* get(const UnicodeString& ID,
- TransliteratorAlias*& aliasReturn,
- UErrorCode& status);
- /**
- * The caller must call this after calling get(), if [a] calling get()
- * returns an alias, and [b] the alias is rule based. In that
- * situation the caller must call alias->parse() to do the parsing
- * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
- * instantiating the transliterator.
- *
- * Note: Another alias might be returned by this method.
- *
- * This method (like all public methods of this class) must be called
- * from within the TransliteratorRegistry mutex.
- *
- * @param aliasReturn output param to receive TransliteratorAlias;
- * should be NULL on entry
- */
- Transliterator* reget(const UnicodeString& ID,
- TransliteratorParser& parser,
- TransliteratorAlias*& aliasReturn,
- UErrorCode& status);
- /**
- * Register a prototype (adopted). This adds an entry to the
- * dynamic store, or replaces an existing entry. Any entry in the
- * underlying static locale resource store is masked.
- */
- void put(Transliterator* adoptedProto,
- UBool visible,
- UErrorCode& ec);
- /**
- * Register an ID and a factory function pointer. This adds an
- * entry to the dynamic store, or replaces an existing entry. Any
- * entry in the underlying static locale resource store is masked.
- */
- void put(const UnicodeString& ID,
- Transliterator::Factory factory,
- Transliterator::Token context,
- UBool visible,
- UErrorCode& ec);
- /**
- * Register an ID and a resource name. This adds an entry to the
- * dynamic store, or replaces an existing entry. Any entry in the
- * underlying static locale resource store is masked.
- */
- void put(const UnicodeString& ID,
- const UnicodeString& resourceName,
- UTransDirection dir,
- UBool readonlyResourceAlias,
- UBool visible,
- UErrorCode& ec);
- /**
- * Register an ID and an alias ID. This adds an entry to the
- * dynamic store, or replaces an existing entry. Any entry in the
- * underlying static locale resource store is masked.
- */
- void put(const UnicodeString& ID,
- const UnicodeString& alias,
- UBool readonlyAliasAlias,
- UBool visible,
- UErrorCode& ec);
- /**
- * Unregister an ID. This removes an entry from the dynamic store
- * if there is one. The static locale resource store is
- * unaffected.
- * @param ID the given ID.
- */
- void remove(const UnicodeString& ID);
- //------------------------------------------------------------------
- // Public ID and spec management
- //------------------------------------------------------------------
- /**
- * Return a StringEnumeration over the IDs currently registered
- * with the system.
- * @internal
- */
- StringEnumeration* getAvailableIDs() const;
- /**
- * == OBSOLETE - remove in ICU 3.4 ==
- * Return the number of IDs currently registered with the system.
- * To retrieve the actual IDs, call getAvailableID(i) with
- * i from 0 to countAvailableIDs() - 1.
- * @return the number of IDs currently registered with the system.
- * @internal
- */
- int32_t countAvailableIDs(void) const;
- /**
- * == OBSOLETE - remove in ICU 3.4 ==
- * Return the index-th available ID. index must be between 0
- * and countAvailableIDs() - 1, inclusive. If index is out of
- * range, the result of getAvailableID(0) is returned.
- * @param index the given index.
- * @return the index-th available ID. index must be between 0
- * and countAvailableIDs() - 1, inclusive. If index is out of
- * range, the result of getAvailableID(0) is returned.
- * @internal
- */
- const UnicodeString& getAvailableID(int32_t index) const;
- /**
- * Return the number of registered source specifiers.
- * @return the number of registered source specifiers.
- */
- int32_t countAvailableSources(void) const;
- /**
- * Return a registered source specifier.
- * @param index which specifier to return, from 0 to n-1, where
- * n = countAvailableSources()
- * @param result fill-in paramter to receive the source specifier.
- * If index is out of range, result will be empty.
- * @return reference to result
- */
- UnicodeString& getAvailableSource(int32_t index,
- UnicodeString& result) const;
- /**
- * Return the number of registered target specifiers for a given
- * source specifier.
- * @param source the given source specifier.
- * @return the number of registered target specifiers for a given
- * source specifier.
- */
- int32_t countAvailableTargets(const UnicodeString& source) const;
- /**
- * Return a registered target specifier for a given source.
- * @param index which specifier to return, from 0 to n-1, where
- * n = countAvailableTargets(source)
- * @param source the source specifier
- * @param result fill-in paramter to receive the target specifier.
- * If source is invalid or if index is out of range, result will
- * be empty.
- * @return reference to result
- */
- UnicodeString& getAvailableTarget(int32_t index,
- const UnicodeString& source,
- UnicodeString& result) const;
- /**
- * Return the number of registered variant specifiers for a given
- * source-target pair. There is always at least one variant: If
- * just source-target is registered, then the single variant
- * NO_VARIANT is returned. If source-target/variant is registered
- * then that variant is returned.
- * @param source the source specifiers
- * @param target the target specifiers
- * @return the number of registered variant specifiers for a given
- * source-target pair.
- */
- int32_t countAvailableVariants(const UnicodeString& source,
- const UnicodeString& target) const;
- /**
- * Return a registered variant specifier for a given source-target
- * pair. If NO_VARIANT is one of the variants, then it will be
- * at index 0.
- * @param index which specifier to return, from 0 to n-1, where
- * n = countAvailableVariants(source, target)
- * @param source the source specifier
- * @param target the target specifier
- * @param result fill-in paramter to receive the variant
- * specifier. If source is invalid or if target is invalid or if
- * index is out of range, result will be empty.
- * @return reference to result
- */
- UnicodeString& getAvailableVariant(int32_t index,
- const UnicodeString& source,
- const UnicodeString& target,
- UnicodeString& result) const;
- private:
- //----------------------------------------------------------------
- // Private implementation
- //----------------------------------------------------------------
- TransliteratorEntry* find(const UnicodeString& ID);
- TransliteratorEntry* find(UnicodeString& source,
- UnicodeString& target,
- UnicodeString& variant);
- TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
- const TransliteratorSpec& trg,
- const UnicodeString& variant) const;
- TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
- const TransliteratorSpec& trg,
- const UnicodeString& variant);
- static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
- const TransliteratorSpec& specToFind,
- const UnicodeString& variant,
- UTransDirection direction);
- void registerEntry(const UnicodeString& source,
- const UnicodeString& target,
- const UnicodeString& variant,
- TransliteratorEntry* adopted,
- UBool visible);
- void registerEntry(const UnicodeString& ID,
- TransliteratorEntry* adopted,
- UBool visible);
- void registerEntry(const UnicodeString& ID,
- const UnicodeString& source,
- const UnicodeString& target,
- const UnicodeString& variant,
- TransliteratorEntry* adopted,
- UBool visible);
- void registerSTV(const UnicodeString& source,
- const UnicodeString& target,
- const UnicodeString& variant);
- void removeSTV(const UnicodeString& source,
- const UnicodeString& target,
- const UnicodeString& variant);
- Transliterator* instantiateEntry(const UnicodeString& ID,
- TransliteratorEntry *entry,
- TransliteratorAlias*& aliasReturn,
- UErrorCode& status);
- /**
- * A StringEnumeration over the registered IDs in this object.
- */
- class Enumeration : public StringEnumeration {
- public:
- Enumeration(const TransliteratorRegistry& reg);
- virtual ~Enumeration();
- virtual int32_t count(UErrorCode& status) const;
- virtual const UnicodeString* snext(UErrorCode& status);
- virtual void reset(UErrorCode& status);
- static UClassID U_EXPORT2 getStaticClassID();
- virtual UClassID getDynamicClassID() const;
- private:
- int32_t index;
- const TransliteratorRegistry& reg;
- };
- friend class Enumeration;
- private:
- /**
- * Dynamic registry mapping full IDs to Entry objects. This
- * contains both public and internal entities. The visibility is
- * controlled by whether an entry is listed in availableIDs and
- * specDAG or not.
- */
- Hashtable registry;
- /**
- * DAG of visible IDs by spec. Hashtable: source => (Hashtable:
- * target => variant bitmask)
- */
- Hashtable specDAG;
- /**
- * Vector of all variant names
- */
- UVector variantList;
- /**
- * Vector of public full IDs.
- */
- UVector availableIDs;
- TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
- TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
- };
- U_NAMESPACE_END
- U_CFUNC UBool utrans_transliterator_cleanup(void);
- #endif /* #if !UCONFIG_NO_TRANSLITERATION */
- #endif
- //eof
|