123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- // © 2019 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html#License
- // locdistance.h
- // created: 2019may08 Markus W. Scherer
- #ifndef __LOCDISTANCE_H__
- #define __LOCDISTANCE_H__
- #include "unicode/utypes.h"
- #include "unicode/bytestrie.h"
- #include "unicode/localematcher.h"
- #include "unicode/locid.h"
- #include "unicode/uobject.h"
- #include "lsr.h"
- U_NAMESPACE_BEGIN
- struct LocaleDistanceData;
- /**
- * Offline-built data for LocaleMatcher.
- * Mostly but not only the data for mapping locales to their maximized forms.
- */
- class LocaleDistance final : public UMemory {
- public:
- static const LocaleDistance *getSingleton(UErrorCode &errorCode);
- static int32_t shiftDistance(int32_t distance) {
- return distance << DISTANCE_SHIFT;
- }
- static int32_t getShiftedDistance(int32_t indexAndDistance) {
- return indexAndDistance & DISTANCE_MASK;
- }
- static double getDistanceDouble(int32_t indexAndDistance) {
- double shiftedDistance = getShiftedDistance(indexAndDistance);
- return shiftedDistance / (1 << DISTANCE_SHIFT);
- }
- static int32_t getIndex(int32_t indexAndDistance) {
- // assert indexAndDistance >= 0;
- return indexAndDistance >> INDEX_SHIFT;
- }
- /**
- * Finds the supported LSR with the smallest distance from the desired one.
- * Equivalent LSR subtags must be normalized into a canonical form.
- *
- * <p>Returns the index of the lowest-distance supported LSR in the high bits
- * (negative if none has a distance below the threshold),
- * and its distance (0..ABOVE_THRESHOLD) in the low bits.
- */
- int32_t getBestIndexAndDistance(const LSR &desired,
- const LSR **supportedLSRs, int32_t supportedLSRsLength,
- int32_t shiftedThreshold,
- ULocMatchFavorSubtag favorSubtag,
- ULocMatchDirection direction) const;
- UBool isParadigmLSR(const LSR &lsr) const;
- int32_t getDefaultScriptDistance() const {
- return defaultScriptDistance;
- }
- int32_t getDefaultDemotionPerDesiredLocale() const {
- return defaultDemotionPerDesiredLocale;
- }
- private:
- // The distance is shifted left to gain some fraction bits.
- static constexpr int32_t DISTANCE_SHIFT = 3;
- static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
- // 7 bits for 0..100
- static constexpr int32_t DISTANCE_INT_SHIFT = 7;
- static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
- static constexpr int32_t DISTANCE_MASK = 0x3ff;
- // tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit
- static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
- static int32_t getDistanceFloor(int32_t indexAndDistance) {
- return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
- }
- LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
- LocaleDistance(const LocaleDistance &other) = delete;
- LocaleDistance &operator=(const LocaleDistance &other) = delete;
- static void initLocaleDistance(UErrorCode &errorCode);
- UBool isMatch(const LSR &desired, const LSR &supported,
- int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
- const LSR *pSupp = &supported;
- return getBestIndexAndDistance(
- desired, &pSupp, 1,
- shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0;
- }
- static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
- const char *desired, const char *supported);
- static int32_t getRegionPartitionsDistance(
- BytesTrie &iter, uint64_t startState,
- const char *desiredPartitions, const char *supportedPartitions,
- int32_t threshold);
- static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
- static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
- const char *partitionsForRegion(const LSR &lsr) const {
- // ill-formed region -> one non-matching string
- int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
- return partitionArrays[pIndex];
- }
- int32_t getDefaultRegionDistance() const {
- return defaultRegionDistance;
- }
- const XLikelySubtags &likelySubtags;
- // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
- // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
- // There is also a trie value for each subsequence of whole subtags.
- // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
- BytesTrie trie;
- /**
- * Maps each region to zero or more single-character partitions.
- */
- const uint8_t *regionToPartitionsIndex;
- const char **partitionArrays;
- /**
- * Used to get the paradigm region for a cluster, if there is one.
- */
- const LSR *paradigmLSRs;
- int32_t paradigmLSRsLength;
- int32_t defaultLanguageDistance;
- int32_t defaultScriptDistance;
- int32_t defaultRegionDistance;
- int32_t minRegionDistance;
- int32_t defaultDemotionPerDesiredLocale;
- };
- U_NAMESPACE_END
- #endif // __LOCDISTANCE_H__
|