123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- // Copyright (c) 2011 The Chromium Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
- #ifndef THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_READER_H_
- #define THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_READER_H_
- #include <stddef.h>
- #include <string>
- #include <vector>
- #include "base/macros.h"
- #include "third_party/hunspell/google/bdict.h"
- namespace hunspell {
- class BDictReader;
- class NodeReader;
- // Iterators -------------------------------------------------------------------
- // Iterates through all words in the dictionary. It will fill the word into
- // a caller-specified buffer.
- class WordIterator {
- public:
- WordIterator(const WordIterator& other);
- ~WordIterator();
- // This must be explicitly declared and implemneted in the .cc file so it will
- // compile without knowing the size of NodeInfo.
- WordIterator& operator=(const WordIterator&);
- // Fills the buffer with the next word and the affixes for it into the given
- // array. Returns the number of affixes. A return value of 0 means there are
- // no more words.
- int Advance(char* output_buffer, size_t output_len,
- int affix_ids[BDict::MAX_AFFIXES_PER_WORD]);
- private:
- friend class BDictReader;
- struct NodeInfo;
- WordIterator(const NodeReader& reader);
- // Called by Advance when a leaf is found to generate the word, affix list,
- // and return value.
- int FoundLeaf(const NodeReader& node, char cur_char,
- char* output_buffer, size_t output_len,
- int affix_ids[BDict::MAX_AFFIXES_PER_WORD]);
- std::vector<NodeInfo> stack_;
- };
- // Will iterate over a list of lines separated by NULLs.
- class LineIterator {
- public:
- // Returns the next word in the sequence or NULL if there are no mode.
- const char* Advance();
- // Advances to the next word in the sequence and copies it into the given
- // buffer, of the given length. If it doesn't fit, it will be truncated.
- // Returns true on success.
- bool AdvanceAndCopy(char* buf, size_t buf_len);
- // Returns true when all data has been read. We're done when we reach a
- // double-NULL or a the end of the input (shouldn't happen).
- bool IsDone() const;
- protected:
- friend class BDictReader;
- LineIterator(const unsigned char* bdict_data, size_t bdict_length,
- size_t first_offset);
- const unsigned char* bdict_data_;
- size_t bdict_length_;
- // Current offset within bdict_data of the next string to read.
- size_t cur_offset_;
- };
- // Created by GetReplacementIterator to iterate over all replacement pairs.
- class ReplacementIterator : public LineIterator {
- public:
- // Fills pointers to NULL terminated strings into the given output params.
- // Returns false if there are no more pairs and nothing was filled in.
- bool GetNext(const char** first, const char** second);
- private:
- friend class BDictReader;
- ReplacementIterator(const unsigned char* bdict_data, size_t bdict_length,
- size_t first_offset)
- : LineIterator(bdict_data, bdict_length, first_offset) {
- }
- };
- // Reads a BDict file mapped into memory.
- class BDictReader {
- public:
- // You must call Init and it must succeed before calling any other functions.
- BDictReader();
- // Initializes the reader with the given data. The data does not transfer
- // ownership, and the caller must keep it valid until the reader is destroyed.
- // Returns true on success.
- bool Init(const unsigned char* bdic_data, size_t bdic_length);
- // Returns true if Init() succeeded and other functions can be called.
- bool IsValid() const { return !!bdict_data_; }
- // Locates the given word in the dictionary. There may be multiple matches if
- // the word is listed multiple times in the dictionary with different affix
- // rules.
- //
- // The number of matches is returned, and that number of corresponding affix
- // group IDs are filled into |*affix_indices|. These IDs may be 0 to indicate
- // there is no affix for that particular match. A return valuf of 0 means that
- // there are no matches.
- int FindWord(const char* word,
- int affix_indices[BDict::MAX_AFFIXES_PER_WORD]) const;
- // Returns an iterator that will go over all AF lines ("affix groups").
- LineIterator GetAfLineIterator() const;
- // Returns an iterator that will go over all SFX/PFX lines ("affix rules").
- LineIterator GetAffixLineIterator() const;
- // Returns an iterator that will go over all "other" lines.
- LineIterator GetOtherLineIterator() const;
- // Returns an iterator that can be used to iterate all replacements.
- ReplacementIterator GetReplacementIterator() const;
- // Used for testing, returns an iterator for all words in the dictionary.
- WordIterator GetAllWordIterator() const;
- private:
- // Non-NULL indicates Init succeeded.
- const unsigned char* bdict_data_;
- size_t bdict_length_;
- // Pointer not owned by this class. It will point into the data. It will be
- // NULL if the data is invalid.
- const BDict::Header* header_;
- const BDict::AffHeader* aff_header_;
- DISALLOW_COPY_AND_ASSIGN(BDictReader);
- };
- } // namespace hunspell
- #endif // THIRD_PARTY_HUNSPELL_GOOGLE_BDICT_READER_H_
|