zip_reader.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. // Copyright (c) 2011 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
  5. #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
  6. #include <stddef.h>
  7. #include <stdint.h>
  8. #include <memory>
  9. #include <string>
  10. #include "base/callback.h"
  11. #include "base/files/file.h"
  12. #include "base/files/file_path.h"
  13. #include "base/files/file_util.h"
  14. #include "base/macros.h"
  15. #include "base/memory/weak_ptr.h"
  16. #include "base/time/time.h"
  17. #if defined(USE_SYSTEM_MINIZIP)
  18. #include <minizip/unzip.h>
  19. #else
  20. #include "third_party/zlib/contrib/minizip/unzip.h"
  21. #endif
  22. namespace zip {
  23. // A delegate interface used to stream out an entry; see
  24. // ZipReader::ExtractCurrentEntry.
  25. class WriterDelegate {
  26. public:
  27. virtual ~WriterDelegate() {}
  28. // Invoked once before any data is streamed out to pave the way (e.g., to open
  29. // the output file). Return false on failure to cancel extraction.
  30. virtual bool PrepareOutput() = 0;
  31. // Invoked to write the next chunk of data. Return false on failure to cancel
  32. // extraction.
  33. virtual bool WriteBytes(const char* data, int num_bytes) = 0;
  34. // Sets the last-modified time of the data.
  35. virtual void SetTimeModified(const base::Time& time) = 0;
  36. };
  37. // This class is used for reading zip files. A typical use case of this
  38. // class is to scan entries in a zip file and extract them. The code will
  39. // look like:
  40. //
  41. // ZipReader reader;
  42. // reader.Open(zip_file_path);
  43. // while (reader.HasMore()) {
  44. // reader.OpenCurrentEntryInZip();
  45. // const base::FilePath& entry_path =
  46. // reader.current_entry_info()->file_path();
  47. // auto writer = CreateFilePathWriterDelegate(extract_dir, entry_path);
  48. // reader.ExtractCurrentEntry(writer, std::numeric_limits<uint64_t>::max());
  49. // reader.AdvanceToNextEntry();
  50. // }
  51. //
  52. // For simplicity, error checking is omitted in the example code above. The
  53. // production code should check return values from all of these functions.
  54. //
  55. class ZipReader {
  56. public:
  57. // A callback that is called when the operation is successful.
  58. using SuccessCallback = base::OnceClosure;
  59. // A callback that is called when the operation fails.
  60. using FailureCallback = base::OnceClosure;
  61. // A callback that is called periodically during the operation with the number
  62. // of bytes that have been processed so far.
  63. using ProgressCallback = base::RepeatingCallback<void(int64_t)>;
  64. // This class represents information of an entry (file or directory) in
  65. // a zip file.
  66. class EntryInfo {
  67. public:
  68. EntryInfo(const std::string& filename_in_zip,
  69. const unz_file_info& raw_file_info);
  70. // Returns the file path. The path is usually relative like
  71. // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
  72. const base::FilePath& file_path() const { return file_path_; }
  73. // Returns the size of the original file (i.e. after uncompressed).
  74. // Returns 0 if the entry is a directory.
  75. // Note: this value should not be trusted, because it is stored as metadata
  76. // in the zip archive and can be different from the real uncompressed size.
  77. int64_t original_size() const { return original_size_; }
  78. // Returns the last modified time. If the time stored in the zip file was
  79. // not valid, the unix epoch will be returned.
  80. //
  81. // The time stored in the zip archive uses the MS-DOS date and time format.
  82. // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
  83. // As such the following limitations apply:
  84. // * only years from 1980 to 2107 can be represented.
  85. // * the time stamp has a 2 second resolution.
  86. // * there's no timezone information, so the time is interpreted as local.
  87. base::Time last_modified() const { return last_modified_; }
  88. // Returns true if the entry is a directory.
  89. bool is_directory() const { return is_directory_; }
  90. // Returns true if the entry is unsafe, like having ".." or invalid
  91. // UTF-8 characters in its file name, or the file path is absolute.
  92. bool is_unsafe() const { return is_unsafe_; }
  93. // Returns true if the entry is encrypted.
  94. bool is_encrypted() const { return is_encrypted_; }
  95. private:
  96. const base::FilePath file_path_;
  97. int64_t original_size_;
  98. base::Time last_modified_;
  99. bool is_directory_;
  100. bool is_unsafe_;
  101. bool is_encrypted_;
  102. DISALLOW_COPY_AND_ASSIGN(EntryInfo);
  103. };
  104. ZipReader();
  105. ~ZipReader();
  106. // Opens the zip file specified by |zip_file_path|. Returns true on
  107. // success.
  108. bool Open(const base::FilePath& zip_file_path);
  109. // Opens the zip file referred to by the platform file |zip_fd|, without
  110. // taking ownership of |zip_fd|. Returns true on success.
  111. bool OpenFromPlatformFile(base::PlatformFile zip_fd);
  112. // Opens the zip data stored in |data|. This class uses a weak reference to
  113. // the given sring while extracting files, i.e. the caller should keep the
  114. // string until it finishes extracting files.
  115. bool OpenFromString(const std::string& data);
  116. // Closes the currently opened zip file. This function is called in the
  117. // destructor of the class, so you usually don't need to call this.
  118. void Close();
  119. // Returns true if there is at least one entry to read. This function is
  120. // used to scan entries with AdvanceToNextEntry(), like:
  121. //
  122. // while (reader.HasMore()) {
  123. // // Do something with the current file here.
  124. // reader.AdvanceToNextEntry();
  125. // }
  126. bool HasMore();
  127. // Advances the next entry. Returns true on success.
  128. bool AdvanceToNextEntry();
  129. // Opens the current entry in the zip file. On success, returns true and
  130. // updates the the current entry state (i.e. current_entry_info() is
  131. // updated). This function should be called before operations over the
  132. // current entry like ExtractCurrentEntryToFile().
  133. //
  134. // Note that there is no CloseCurrentEntryInZip(). The the current entry
  135. // state is reset automatically as needed.
  136. bool OpenCurrentEntryInZip();
  137. // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|,
  138. // starting from the beginning of the entry. Return value specifies whether
  139. // the entire file was extracted.
  140. bool ExtractCurrentEntry(WriterDelegate* delegate,
  141. uint64_t num_bytes_to_extract) const;
  142. // Asynchronously extracts the current entry to the given output file path.
  143. // If the current entry is a directory it just creates the directory
  144. // synchronously instead. OpenCurrentEntryInZip() must be called beforehand.
  145. // success_callback will be called on success and failure_callback will be
  146. // called on failure. progress_callback will be called at least once.
  147. // Callbacks will be posted to the current MessageLoop in-order.
  148. void ExtractCurrentEntryToFilePathAsync(
  149. const base::FilePath& output_file_path,
  150. SuccessCallback success_callback,
  151. FailureCallback failure_callback,
  152. const ProgressCallback& progress_callback);
  153. // Extracts the current entry into memory. If the current entry is a
  154. // directory, the |output| parameter is set to the empty string. If the
  155. // current entry is a file, the |output| parameter is filled with its
  156. // contents. OpenCurrentEntryInZip() must be called beforehand. Note: the
  157. // |output| parameter can be filled with a big amount of data, avoid passing
  158. // it around by value, but by reference or pointer. Note: the value returned
  159. // by EntryInfo::original_size() cannot be trusted, so the real size of the
  160. // uncompressed contents can be different. |max_read_bytes| limits the ammount
  161. // of memory used to carry the entry. Returns true if the entire content is
  162. // read. If the entry is bigger than |max_read_bytes|, returns false and
  163. // |output| is filled with |max_read_bytes| of data. If an error occurs,
  164. // returns false, and |output| is set to the empty string.
  165. bool ExtractCurrentEntryToString(uint64_t max_read_bytes,
  166. std::string* output) const;
  167. // Returns the current entry info. Returns NULL if the current entry is
  168. // not yet opened. OpenCurrentEntryInZip() must be called beforehand.
  169. EntryInfo* current_entry_info() const {
  170. return current_entry_info_.get();
  171. }
  172. // Returns the number of entries in the zip file.
  173. // Open() must be called beforehand.
  174. int num_entries() const { return num_entries_; }
  175. private:
  176. // Common code used both in Open and OpenFromFd.
  177. bool OpenInternal();
  178. // Resets the internal state.
  179. void Reset();
  180. // Extracts a chunk of the file to the target. Will post a task for the next
  181. // chunk and success/failure/progress callbacks as necessary.
  182. void ExtractChunk(base::File target_file,
  183. SuccessCallback success_callback,
  184. FailureCallback failure_callback,
  185. const ProgressCallback& progress_callback,
  186. const int64_t offset);
  187. unzFile zip_file_;
  188. int num_entries_;
  189. bool reached_end_;
  190. std::unique_ptr<EntryInfo> current_entry_info_;
  191. base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this};
  192. DISALLOW_COPY_AND_ASSIGN(ZipReader);
  193. };
  194. // A writer delegate that writes to a given File.
  195. class FileWriterDelegate : public WriterDelegate {
  196. public:
  197. // Constructs a FileWriterDelegate that manipulates |file|. The delegate will
  198. // not own |file|, therefore the caller must guarantee |file| will outlive the
  199. // delegate.
  200. explicit FileWriterDelegate(base::File* file);
  201. // Constructs a FileWriterDelegate that takes ownership of |file|.
  202. explicit FileWriterDelegate(std::unique_ptr<base::File> file);
  203. // Truncates the file to the number of bytes written.
  204. ~FileWriterDelegate() override;
  205. // WriterDelegate methods:
  206. // Seeks to the beginning of the file, returning false if the seek fails.
  207. bool PrepareOutput() override;
  208. // Writes |num_bytes| bytes of |data| to the file, returning false on error or
  209. // if not all bytes could be written.
  210. bool WriteBytes(const char* data, int num_bytes) override;
  211. // Sets the last-modified time of the data.
  212. void SetTimeModified(const base::Time& time) override;
  213. // Return the actual size of the file.
  214. int64_t file_length() { return file_length_; }
  215. private:
  216. // The file the delegate modifies.
  217. base::File* file_;
  218. // The delegate can optionally own the file it modifies, in which case
  219. // owned_file_ is set and file_ is an alias for owned_file_.
  220. std::unique_ptr<base::File> owned_file_;
  221. int64_t file_length_ = 0;
  222. DISALLOW_COPY_AND_ASSIGN(FileWriterDelegate);
  223. };
  224. // A writer delegate that writes a file at a given path.
  225. class FilePathWriterDelegate : public WriterDelegate {
  226. public:
  227. explicit FilePathWriterDelegate(const base::FilePath& output_file_path);
  228. ~FilePathWriterDelegate() override;
  229. // WriterDelegate methods:
  230. // Creates the output file and any necessary intermediate directories.
  231. bool PrepareOutput() override;
  232. // Writes |num_bytes| bytes of |data| to the file, returning false if not all
  233. // bytes could be written.
  234. bool WriteBytes(const char* data, int num_bytes) override;
  235. // Sets the last-modified time of the data.
  236. void SetTimeModified(const base::Time& time) override;
  237. private:
  238. base::FilePath output_file_path_;
  239. base::File file_;
  240. DISALLOW_COPY_AND_ASSIGN(FilePathWriterDelegate);
  241. };
  242. } // namespace zip
  243. #endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_