| From 98c02858983d8e54e0169aa8f12f6bed6a1061e9 Mon Sep 17 00:00:00 2001 |
| From: =?UTF-8?q?Fran=C3=A7ois=20Degros?= <fdegros@chromium.org> |
| Date: Thu, 18 Jun 2020 23:35:55 +1000 |
| Subject: [PATCH] Rolling cache |
| |
| Use a rolling cache of uncompressed data for each file being read. |
| |
| This allows to accomodate out-of-order read operations. |
| --- |
| lib/dataNode.cpp | 113 ++---------------- |
| lib/dataNode.h | 30 +---- |
| lib/fileNode.cpp | 25 +--- |
| lib/fileNode.h | 25 +--- |
| lib/fuse-zip.cpp | 84 ++----------- |
| lib/fuse-zip.h | 8 -- |
| lib/fuseZipData.cpp | 52 -------- |
| lib/fuseZipData.h | 5 - |
| lib/reader.cpp | 282 ++++++++++++++++++++++++++++++++++++++++++++ |
| lib/reader.h | 162 +++++++++++++++++++++++++ |
| main.cpp | 4 - |
| 11 files changed, 469 insertions(+), 321 deletions(-) |
| create mode 100644 lib/reader.cpp |
| create mode 100644 lib/reader.h |
| |
| diff --git a/lib/dataNode.cpp b/lib/dataNode.cpp |
| index 0efd7bc..893e3d1 100644 |
| --- a/lib/dataNode.cpp |
| +++ b/lib/dataNode.cpp |
| @@ -47,8 +47,6 @@ std::shared_ptr<DataNode> DataNode::createNew(mode_t mode, uid_t uid, gid_t gid, |
| std::shared_ptr<DataNode> n(new DataNode(FAKE_ID, mode, uid, gid, dev)); |
| |
| n->_state = NodeState::NEW; |
| - n->_buffer.reset(new BigBuffer()); |
| - |
| n->_has_btime = true; |
| n->_metadataChanged = true; |
| n->_mtime = n->_atime = n->_ctime = n->_btime = currentTime(); |
| @@ -60,8 +58,6 @@ std::shared_ptr<DataNode> DataNode::createTmpDir(mode_t mode, uid_t uid, gid_t g |
| std::shared_ptr<DataNode> n(new DataNode(FAKE_ID, mode, uid, gid, dev)); |
| |
| n->_state = NodeState::NEW; |
| - n->_buffer.reset(new BigBuffer()); |
| - |
| n->_has_btime = true; |
| n->_metadataChanged = false; |
| n->_mtime = n->_atime = n->_ctime = n->_btime = currentTime(); |
| @@ -102,94 +98,24 @@ std::shared_ptr<DataNode> DataNode::createExisting(struct zip *zip, zip_uint64_t |
| return n; |
| } |
| |
| -int DataNode::open(struct zip *zip) { |
| - if (_state == NodeState::NEW || _state == NodeState::VIRTUAL_SYMLINK) { |
| - return 0; |
| - } |
| - if (_state == NodeState::OPENED) { |
| - if (_open_count == INT_MAX) { |
| - return -EMFILE; |
| - } else { |
| - ++_open_count; |
| - } |
| - } |
| - if (_state == NodeState::CLOSED) { |
| - _open_count = 1; |
| - assert(zip != NULL); |
| - if (_size > std::numeric_limits<size_t>::max()) { |
| - return -ENOMEM; |
| - } |
| - assert(_id != FAKE_ID); |
| - _buffer.reset(new BigBuffer(zip, _id, static_cast<size_t>(_size))); |
| - _state = NodeState::OPENED; |
| - } |
| - return 0; |
| -} |
| +ReaderPtr DataNode::open(struct zip *zip) { |
| + struct zip_stat st = {}; |
| + if (zip_stat_index(zip, _id, 0, &st) < 0) |
| + throw ZipError("Cannot stat file", zip); |
| |
| -int DataNode::read(char *buf, size_t sz, size_t offset) { |
| _atime = currentTime(); |
| - return _buffer->read(buf, sz, offset); |
| -} |
| |
| -int DataNode::write(const char *buf, size_t sz, size_t offset) { |
| - assert(_state != NodeState::VIRTUAL_SYMLINK); |
| - if (_state == NodeState::OPENED) { |
| - _state = NodeState::CHANGED; |
| + if ((st.valid & ZIP_STAT_COMP_METHOD) != 0 && |
| + st.comp_method == ZIP_CM_STORE) { |
| + // The file is stored without compression. |
| + return ReaderPtr(new UnbufferedReader(zip, _id, st.size)); |
| } |
| - _mtime = currentTime(); |
| - _metadataChanged = true; |
| - return _buffer->write(buf, sz, offset); |
| -} |
| |
| -int DataNode::close() { |
| - _size = _buffer->len; |
| - if (_state == NodeState::OPENED && --_open_count == 0) { |
| - _buffer.reset(); |
| - _state = NodeState::CLOSED; |
| - } |
| - return 0; |
| + // The file is compressed. |
| + return ReaderPtr(new BufferedReader(zip, _id, st.size)); |
| } |
| |
| -int DataNode::save(struct zip *zip, const char *full_name, zip_int64_t &index) { |
| - assert(zip != NULL); |
| - assert(full_name != NULL); |
| - return _buffer->saveToZip(_mtime.tv_sec, zip, full_name, |
| - _state == NodeState::NEW, index); |
| -} |
| - |
| -//int DataNode::saveMetadata(bool force_precise_time) const { |
| -// assert(zip != NULL); |
| -// assert(_id >= 0); |
| -// |
| -// int res = updateExtraFields(force_precise_time); |
| -// if (res != 0) |
| -// return res; |
| -// return updateExternalAttributes(); |
| -//} |
| - |
| -int DataNode::truncate(size_t offset) { |
| - assert(_state != NodeState::VIRTUAL_SYMLINK); |
| - if (_state != NodeState::CLOSED) { |
| - if (_state != NodeState::NEW) { |
| - _state = NodeState::CHANGED; |
| - } |
| - _buffer->truncate(offset); |
| - _mtime = currentTime(); |
| - _metadataChanged = true; |
| - return 0; |
| - } else { |
| - return EBADF; |
| - } |
| -} |
| - |
| -zip_uint64_t DataNode::size() const { |
| - if (_state == NodeState::NEW || _state == NodeState::OPENED || _state == NodeState::CHANGED || |
| - _state == NodeState::VIRTUAL_SYMLINK) { |
| - return _buffer->len; |
| - } else { |
| - return _size; |
| - } |
| -} |
| +zip_uint64_t DataNode::size() const { return _size; } |
| |
| /** |
| * Get timestamp information from extra fields. |
| @@ -355,23 +281,6 @@ void DataNode::processPkWareUnixField(zip_uint16_t type, zip_uint16_t len, const |
| _atime.tv_nsec = 0; |
| } |
| _device = dev; |
| - // use PKWARE link target only if link target in Info-ZIP format is not |
| - // specified (empty file content) |
| - if (S_ISLNK(_mode) && _size == 0 && link_len > 0) { |
| - assert(_state == NodeState::CLOSED || _state == NodeState::VIRTUAL_SYMLINK); |
| - if (_state == NodeState::VIRTUAL_SYMLINK) |
| - { |
| - _state = NodeState::CLOSED; |
| - _buffer.reset(); |
| - } |
| - _buffer.reset(new BigBuffer()); |
| - if (!_buffer) |
| - return; |
| - assert(link != NULL); |
| - _buffer->write(link, link_len, 0); |
| - _state = NodeState::VIRTUAL_SYMLINK; |
| - } |
| - // hardlinks are handled in FuseZipData::build_tree |
| } |
| |
| void DataNode::chmod (mode_t mode) { |
| diff --git a/lib/dataNode.h b/lib/dataNode.h |
| index 7489ec9..e1552f5 100644 |
| --- a/lib/dataNode.h |
| +++ b/lib/dataNode.h |
| @@ -27,8 +27,8 @@ |
| #include <sys/stat.h> |
| #include <unistd.h> |
| |
| +#include "reader.h" |
| #include "types.h" |
| -#include "bigBuffer.h" |
| |
| class DataNode { |
| private: |
| @@ -46,7 +46,6 @@ private: |
| }; |
| |
| zip_uint64_t _id; |
| - std::unique_ptr<BigBuffer> _buffer; |
| int _open_count; |
| NodeState _state; |
| |
| @@ -72,21 +71,7 @@ public: |
| static std::shared_ptr<DataNode> createTmpDir(mode_t mode, uid_t uid, gid_t gid, dev_t dev); |
| static std::shared_ptr<DataNode> createExisting(struct zip *zip, zip_uint64_t id, mode_t mode); |
| |
| - int open(struct zip *zip); |
| - int read(char *buf, size_t size, size_t offset); |
| - int write(const char *buf, size_t size, size_t offset); |
| - int close(); |
| - |
| - /** |
| - * Invoke zip_file_add() or zip_file_replace() for file to save it. |
| - * Should be called only if item is needed to ba saved into zip file. |
| - * |
| - * @param zip zip structure pointer |
| - * @param full_name full file name |
| - * @param index file node index (updated if state is NEW) |
| - * @return 0 if success, != 0 on error |
| - */ |
| - int save(struct zip *zip, const char *full_name, zip_int64_t &index); |
| + ReaderPtr open(struct zip *zip); |
| |
| /** |
| * Save file metadata to ZIP |
| @@ -95,17 +80,6 @@ public: |
| */ |
| int saveMetadata (bool force_precise_time) const; |
| |
| - /** |
| - * Truncate file. |
| - * |
| - * @return |
| - * 0 If successful |
| - * EBADF If file is currently closed |
| - * EIO If insufficient memory available (because ENOMEM not |
| - * listed in truncate() error codes) |
| - */ |
| - int truncate(size_t offset); |
| - |
| inline bool isChanged() const { |
| return _state == NodeState::CHANGED |
| || _state == NodeState::NEW |
| diff --git a/lib/fileNode.cpp b/lib/fileNode.cpp |
| index cedaaa6..e0acd87 100644 |
| --- a/lib/fileNode.cpp |
| +++ b/lib/fileNode.cpp |
| @@ -170,28 +170,10 @@ void FileNode::rename(const char *new_name) { |
| parse_name(); |
| } |
| |
| -int FileNode::open() { |
| +ReaderPtr FileNode::open() { |
| return _data->open(zip); |
| } |
| |
| -int FileNode::read(char *buf, size_t sz, size_t offset) { |
| - return _data->read(buf, sz, offset); |
| -} |
| - |
| -int FileNode::write(const char *buf, size_t sz, size_t offset) { |
| - return _data->write(buf, sz, offset); |
| -} |
| - |
| -int FileNode::close() { |
| - return _data->close(); |
| -} |
| - |
| -int FileNode::save() { |
| - assert (!is_dir()); |
| - // index is modified if state == NEW |
| - return _data->save(zip, full_name.c_str(), _id); |
| -} |
| - |
| int FileNode::saveMetadata(bool force_precise_time) const { |
| assert(zip != NULL); |
| assert(_id >= 0); |
| @@ -209,11 +191,6 @@ int FileNode::saveComment() const { |
| return zip_file_set_comment(zip, id(), m_comment, m_commentLen, 0); |
| } |
| |
| -int FileNode::truncate(size_t offset) { |
| - assert(_data); |
| - return _data->truncate(offset); |
| -} |
| - |
| zip_uint64_t FileNode::size() const { |
| return _data->size(); |
| } |
| diff --git a/lib/fileNode.h b/lib/fileNode.h |
| index 0d18b01..08dc42c 100644 |
| --- a/lib/fileNode.h |
| +++ b/lib/fileNode.h |
| @@ -27,7 +27,6 @@ |
| #include <sys/stat.h> |
| |
| #include "types.h" |
| -#include "bigBuffer.h" |
| #include "dataNode.h" |
| |
| class FileNode { |
| @@ -110,18 +109,7 @@ public: |
| */ |
| void rename (const char *new_name); |
| |
| - int open(); |
| - int read(char *buf, size_t size, size_t offset); |
| - int write(const char *buf, size_t size, size_t offset); |
| - int close(); |
| - |
| - /** |
| - * Invoke zip_file_add() or zip_file_replace() for file to save it. |
| - * Should be called only if item is needed to ba saved into zip file. |
| - * |
| - * @return 0 if success, != 0 on error |
| - */ |
| - int save(); |
| + ReaderPtr open(); |
| |
| /** |
| * Save file metadata to ZIP |
| @@ -136,17 +124,6 @@ public: |
| */ |
| int saveComment() const; |
| |
| - /** |
| - * Truncate file. |
| - * |
| - * @return |
| - * 0 If successful |
| - * EBADF If file is currently closed |
| - * EIO If insufficient memory available (because ENOMEM not |
| - * listed in truncate() error codes) |
| - */ |
| - int truncate(size_t offset); |
| - |
| inline bool isChanged() const { |
| return _data->isChanged(); |
| } |
| diff --git a/lib/fuse-zip.cpp b/lib/fuse-zip.cpp |
| index f69e60a..a1c2ea0 100644 |
| --- a/lib/fuse-zip.cpp |
| +++ b/lib/fuse-zip.cpp |
| @@ -118,7 +118,6 @@ inline struct zip *get_zip() { |
| |
| void fusezip_destroy(void *data) { |
| FuseZipData *d = (FuseZipData*)data; |
| - d->save (); |
| delete d; |
| } |
| |
| @@ -212,35 +211,14 @@ int fusezip_open(const char *path, struct fuse_file_info *fi) try { |
| if (node->is_dir()) { |
| return -EISDIR; |
| } |
| - fi->fh = (uint64_t)node; |
| |
| - return node->open(); |
| + ReaderPtr reader = node->open(); |
| + fi->fh = reinterpret_cast<uint64_t>(reader.release()); |
| + return 0; |
| } catch (...) { |
| return exceptionToError("open file", path); |
| } |
| |
| -int fusezip_create(const char *path, mode_t mode, |
| - struct fuse_file_info *fi) try { |
| - if (*path == '\0') { |
| - return -EACCES; |
| - } |
| - FileNode *node = get_file_node(path + 1); |
| - if (node != NULL) { |
| - return -EEXIST; |
| - } |
| - node = FileNode::createFile(get_zip(), path + 1, fuse_get_context()->uid, |
| - fuse_get_context()->gid, mode); |
| - if (node == NULL) { |
| - return -ENOMEM; |
| - } |
| - get_data()->insertNode(node); |
| - fi->fh = (uint64_t)node; |
| - |
| - return node->open(); |
| -} catch (...) { |
| - return exceptionToError("create file", path); |
| -} |
| - |
| int fusezip_mknod(const char *path, mode_t mode, dev_t dev) try { |
| if (*path == '\0') { |
| return -EACCES; |
| @@ -265,64 +243,22 @@ int fusezip_read(const char *path, char *buf, size_t size, off_t offset, |
| struct fuse_file_info *fi) try { |
| if (offset < 0) |
| return -EINVAL; |
| - return reinterpret_cast<FileNode *>(fi->fh)->read( |
| - buf, size, static_cast<size_t>(offset)); |
| + return static_cast<int>( |
| + reinterpret_cast<Reader *>(fi->fh)->Read( |
| + buf, buf + std::min<size_t>(size, std::numeric_limits<int>::max()), |
| + offset) - |
| + buf); |
| } catch (...) { |
| return exceptionToError("read file", path); |
| } |
| |
| -int fusezip_write(const char *path, const char *buf, size_t size, off_t offset, |
| - struct fuse_file_info *fi) try { |
| - if (offset < 0) |
| - return -EINVAL; |
| - return reinterpret_cast<FileNode *>(fi->fh)->write( |
| - buf, size, static_cast<size_t>(offset)); |
| -} catch (...) { |
| - return exceptionToError("write file", path); |
| -} |
| - |
| int fusezip_release(const char *path, struct fuse_file_info *fi) try { |
| - return reinterpret_cast<FileNode *>(fi->fh)->close(); |
| + const ReaderPtr p(reinterpret_cast<Reader *>(fi->fh)); |
| + return 0; |
| } catch (...) { |
| return exceptionToError("close file", path); |
| } |
| |
| -int fusezip_ftruncate(const char *path, off_t offset, |
| - struct fuse_file_info *fi) try { |
| - if (offset < 0) |
| - return -EINVAL; |
| - return -reinterpret_cast<FileNode *>(fi->fh)->truncate( |
| - static_cast<size_t>(offset)); |
| -} catch (...) { |
| - return exceptionToError("truncate file", path); |
| -} |
| - |
| -int fusezip_truncate(const char *path, off_t offset) try { |
| - if (*path == '\0') { |
| - return -EACCES; |
| - } |
| - if (offset < 0) |
| - return -EINVAL; |
| - FileNode *node = get_file_node(path + 1); |
| - if (node == NULL) { |
| - return -ENOENT; |
| - } |
| - if (node->is_dir()) { |
| - return -EISDIR; |
| - } |
| - int res; |
| - if ((res = node->open()) != 0) { |
| - return res; |
| - } |
| - if ((res = node->truncate(static_cast<size_t>(offset))) != 0) { |
| - node->close(); |
| - return -res; |
| - } |
| - return node->close(); |
| -} catch (...) { |
| - return exceptionToError("truncate file", path); |
| -} |
| - |
| int fusezip_unlink(const char *path) { |
| if (*path == '\0') { |
| return -ENOENT; |
| diff --git a/lib/fuse-zip.h b/lib/fuse-zip.h |
| index 74990db..b40270c 100644 |
| --- a/lib/fuse-zip.h |
| +++ b/lib/fuse-zip.h |
| @@ -63,20 +63,12 @@ int fusezip_statfs(const char *path, struct statvfs *buf); |
| |
| int fusezip_open(const char *path, struct fuse_file_info *fi); |
| |
| -int fusezip_create(const char *path, mode_t mode, struct fuse_file_info *fi); |
| - |
| int fusezip_mknod(const char *path, mode_t mode, dev_t dev); |
| |
| int fusezip_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi); |
| |
| -int fusezip_write(const char *path, const char *buf, size_t size, off_t offset, struct fuse_file_info *fi); |
| - |
| int fusezip_release (const char *path, struct fuse_file_info *fi); |
| |
| -int fusezip_ftruncate(const char *path, off_t offset, struct fuse_file_info *fi); |
| - |
| -int fusezip_truncate(const char *path, off_t offset); |
| - |
| int fusezip_unlink(const char *path); |
| |
| int fusezip_rmdir(const char *path); |
| diff --git a/lib/fuseZipData.cpp b/lib/fuseZipData.cpp |
| index 29ff2c4..f8b4cbe 100644 |
| --- a/lib/fuseZipData.cpp |
| +++ b/lib/fuseZipData.cpp |
| @@ -706,55 +706,3 @@ FileNode *FuseZipData::find (const char *fname) const { |
| return i->second; |
| } |
| } |
| - |
| -void FuseZipData::save () { |
| - for (filemap_t::const_iterator i = files.begin(); i != files.end(); ++i) { |
| - FileNode *node = i->second; |
| - if (node == m_root) { |
| - if (node->isCommentChanged()) { |
| - int res = node->saveComment(); |
| - if (res != 0) { |
| - syslog(LOG_ERR, "Error while saving archive comment: %d", res); |
| - } |
| - } |
| - continue; |
| - } |
| - assert(node != NULL); |
| - bool saveMetadata = node->isMetadataChanged(); |
| - if (node->isChanged() && !node->is_dir()) { |
| - saveMetadata = true; |
| - int res = node->save(); |
| - if (res != 0) { |
| - saveMetadata = false; |
| - syslog(LOG_ERR, "Error while saving file %s in ZIP archive: %d", |
| - node->full_name.c_str(), res); |
| - } |
| - } |
| - if (saveMetadata) { |
| - if (node->isTemporaryDir()) { |
| - // persist temporary directory |
| - zip_int64_t idx = zip_dir_add(m_zip, |
| - node->full_name.c_str(), ZIP_FL_ENC_UTF_8); |
| - if (idx < 0) { |
| - syslog(LOG_ERR, "Unable to save directory %s in ZIP archive", |
| - node->full_name.c_str()); |
| - continue; |
| - } |
| - node->set_id(idx); |
| - } |
| - int res = node->saveMetadata(m_force_precise_time); |
| - if (res != 0) { |
| - syslog(LOG_ERR, "Error while saving metadata for file %s in ZIP archive: %d", |
| - node->full_name.c_str(), res); |
| - } |
| - } |
| - if (node->isCommentChanged()) { |
| - int res = node->saveComment(); |
| - if (res != 0) { |
| - syslog(LOG_ERR, "Error while saving comment for file %s in ZIP archive: %d", |
| - node->full_name.c_str(), res); |
| - } |
| - } |
| - } |
| -} |
| - |
| diff --git a/lib/fuseZipData.h b/lib/fuseZipData.h |
| index 92d4191..17275e7 100644 |
| --- a/lib/fuseZipData.h |
| +++ b/lib/fuseZipData.h |
| @@ -140,11 +140,6 @@ public: |
| size_t numFiles () const { |
| return files.size() - 1; |
| } |
| - |
| - /** |
| - * Save archive |
| - */ |
| - void save (); |
| }; |
| |
| #endif |
| diff --git a/lib/reader.cpp b/lib/reader.cpp |
| new file mode 100644 |
| index 0000000..45555d0 |
| --- /dev/null |
| +++ b/lib/reader.cpp |
| @@ -0,0 +1,282 @@ |
| +//////////////////////////////////////////////////////////////////////////// |
| +// Copyright 2021 Google LLC // |
| +// // |
| +// This program is free software: you can redistribute it and/or modify // |
| +// it under the terms of the GNU General Public License as published by // |
| +// the Free Software Foundation, either version 3 of the License, or // |
| +// (at your option) any later version. // |
| +// // |
| +// This program is distributed in the hope that it will be useful, // |
| +// but WITHOUT ANY WARRANTY; without even the implied warranty of // |
| +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // |
| +// GNU General Public License for more details. // |
| +// // |
| +// You should have received a copy of the GNU General Public License // |
| +// along with this program. If not, see <https://www.gnu.org/licenses/>.// |
| +//////////////////////////////////////////////////////////////////////////// |
| + |
| +#include "reader.h" |
| + |
| +#include <cassert> |
| +#include <limits> |
| +#include <stdexcept> |
| + |
| +#include <syslog.h> |
| + |
| +#include "util.h" |
| + |
| +// Flag for debug logs. |
| +static const bool debug_log = false; |
| + |
| +zip_uint64_t Reader::reader_count_ = 0; |
| + |
| +static void LimitSize(ssize_t *const a, off_t b) { |
| + if (*a > b) |
| + *a = static_cast<ssize_t>(b); |
| +} |
| + |
| +Reader::~Reader() { |
| + if (debug_log) |
| + syslog(LOG_INFO, "Reader %" PRIu64 ": Close", reader_id_); |
| +} |
| + |
| +Reader::Reader(struct zip *const archive, const zip_uint64_t file_id, |
| + const off_t expected_size) |
| + : file_id_(file_id), expected_size_(expected_size), |
| + file_(Open(archive, file_id)) { |
| + if (debug_log) |
| + syslog(LOG_INFO, "Reader %" PRIu64 ": Open file %" PRIu64, reader_id_, |
| + file_id_); |
| +} |
| + |
| +ZipFile Reader::Open(struct zip *const archive, const zip_uint64_t file_id) { |
| + ZipFile file(zip_fopen_index(archive, file_id, 0)); |
| + if (!file) |
| + throw ZipError("Cannot open file", archive); |
| + return file; |
| +} |
| + |
| +ssize_t Reader::ReadAtCurrentPosition(char *dest, ssize_t size) { |
| + assert(size >= 0); |
| + |
| + if (pos_ >= expected_size_) |
| + return 0; |
| + |
| + // Avoid reading bytes past the expected end of file. |
| + // https://github.com/nih-at/libzip/issues/261 |
| + LimitSize(&size, expected_size_ - pos_); |
| + |
| + if (size == 0) |
| + return 0; |
| + |
| + const ssize_t n = static_cast<ssize_t>(zip_fread(file_.get(), dest, size)); |
| + |
| + if (false && debug_log) |
| + syslog(LOG_INFO, "Reader %" PRIu64 ": zip_fread(%zd) returned %zd", |
| + reader_id_, size, n); |
| + |
| + if (n < 0) |
| + throw ZipError("Cannot read file", file_.get()); |
| + |
| + pos_ += n; |
| + return n; |
| +} |
| + |
| +char *UnbufferedReader::Read(char *dest, char *dest_end, off_t offset) { |
| + if (pos_ != offset) { |
| + if (debug_log) |
| + syslog(LOG_INFO, |
| + "Reader %" PRIu64 ": Jump %+" PRIi64 " from %" PRIu64 |
| + " to %" PRIu64, |
| + reader_id_, offset - pos_, pos_, offset); |
| + |
| + if (zip_fseek(file_.get(), offset, SEEK_SET) < 0) |
| + throw ZipError("Cannot fseek file", file_.get()); |
| + |
| + pos_ = offset; |
| + } |
| + |
| + assert(pos_ == offset); |
| + |
| + while (const ssize_t n = ReadAtCurrentPosition(dest, dest_end - dest)) { |
| + dest += n; |
| + } |
| + |
| + return dest; |
| +} |
| + |
| +void BufferedReader::AllocateBuffer(ssize_t buffer_size) { |
| + LimitSize(&buffer_size, expected_size_); |
| + |
| + if (buffer_size == 0) |
| + buffer_size = 1; |
| + |
| + if (buffer_size == buffer_size_) { |
| + assert(buffer_); |
| + // Already got a buffer of the right size. |
| + return; |
| + } |
| + |
| + buffer_.reset(); |
| + buffer_size_ = 0; |
| + |
| + while (true) { |
| + // Try to allocate buffer. |
| + try { |
| + buffer_.reset(new char[buffer_size]); |
| + buffer_size_ = buffer_size; |
| + if (debug_log) |
| + syslog(LOG_INFO, "Reader %" PRIu64 ": Allocate %zd KB buffer", |
| + reader_id_, buffer_size_ >> 10); |
| + return; |
| + } catch (const std::bad_alloc &error) { |
| + // Probably too big. |
| + if (debug_log) |
| + syslog(LOG_ERR, |
| + "Reader %" PRIu64 ": Cannot allocate %zd KB buffer: %s", |
| + reader_id_, buffer_size >> 10, error.what()); |
| + |
| + // If we couldn't even allocate 1KB, we ran out of memory or of |
| + // addressable space. Simply propagate the error. |
| + if (buffer_size <= 1024) |
| + throw; |
| + |
| + // Try a smaller buffer. |
| + buffer_size >>= 1; |
| + } |
| + } |
| +} |
| + |
| +void BufferedReader::Restart() { |
| + if (debug_log) |
| + syslog(LOG_INFO, "Reader %" PRIu64 ": Rewind", reader_id_); |
| + |
| + // Restart from the file beginning. |
| + file_ = Open(archive_, file_id_); |
| + pos_ = 0; |
| + buffer_start_ = 0; |
| + |
| + // Allocate a possibly bigger buffer. We have to be careful on 32-bit |
| + // devices, since they have a limited addressable space. |
| + AllocateBuffer((std::numeric_limits<ssize_t>::max() >> 1) + 1); |
| +} |
| + |
| +void BufferedReader::Advance(off_t jump) { |
| + assert(jump >= 0); |
| + |
| + if (jump <= 0) |
| + return; |
| + |
| + if (debug_log) |
| + syslog(LOG_INFO, |
| + "Reader %" PRIu64 ": Skip %" PRIi64 " bytes from %" PRIu64 |
| + " to %" PRIu64, |
| + reader_id_, jump, pos_, pos_ + jump); |
| + |
| + do { |
| + ssize_t count = buffer_size_ - buffer_start_; |
| + LimitSize(&count, jump); |
| + |
| + assert(count > 0); |
| + count = ReadAtCurrentPosition(&buffer_[buffer_start_], count); |
| + if (count == 0) |
| + return; |
| + |
| + buffer_start_ += count; |
| + if (buffer_start_ >= buffer_size_) { |
| + assert(buffer_start_ == buffer_size_); |
| + buffer_start_ = 0; |
| + } |
| + |
| + jump -= count; |
| + } while (jump > 0); |
| + |
| + assert(jump == 0); |
| +} |
| + |
| +char *BufferedReader::ReadFromBufferAndAdvance(char *dest, char *const dest_end, |
| + const off_t offset) { |
| + const off_t jump = offset - pos_; |
| + |
| + if (jump >= 0) { |
| + // Jump forwards. |
| + Advance(jump); |
| + return dest; |
| + } |
| + |
| + // Jump backwards. |
| + assert(jump < 0); |
| + |
| + if (jump + buffer_size_ < 0) { |
| + // The backwards jump is too big and falls outside the buffer. |
| + Restart(); |
| + Advance(offset); |
| + return dest; |
| + } |
| + |
| + // The backwards jump is small enough to fall inside the buffer. |
| + assert(-jump <= buffer_size_); |
| + |
| + // Read data from the buffer. |
| + ssize_t i = buffer_start_ + jump; |
| + |
| + do { |
| + ssize_t size = -i; |
| + ssize_t start = i; |
| + if (i < 0) { |
| + if (debug_log) |
| + syslog(LOG_INFO, "Reader %" PRIu64 ": Negative part of buffer", |
| + reader_id_); |
| + start += buffer_size_; |
| + } else { |
| + size += buffer_start_; |
| + } |
| + |
| + LimitSize(&size, dest_end - dest); |
| + assert(size > 0); |
| + |
| + if (debug_log) |
| + syslog(LOG_INFO, |
| + "Reader %" PRIu64 ": Read %zd bytes from cache position %zd", |
| + reader_id_, size, i - buffer_start_); |
| + |
| + std::memcpy(dest, &buffer_[start], size); |
| + dest += size; |
| + i += size; |
| + } while (i < buffer_start_ && dest < dest_end); |
| + |
| + return dest; |
| +} |
| + |
| +char *BufferedReader::Read(char *dest, char *const dest_end, |
| + const off_t offset) { |
| + if (offset >= expected_size_) |
| + return dest; |
| + |
| + if (dest == dest_end) |
| + return dest; |
| + |
| + // If we don't have a buffer, then we don't have enough memory. |
| + if (!buffer_) |
| + throw std::bad_alloc(); |
| + |
| + assert(buffer_); |
| + assert(buffer_size_ > 0); |
| + |
| + // Read data from buffer if possible. |
| + dest = ReadFromBufferAndAdvance(dest, dest_end, offset); |
| + |
| + // Read data from file while keeping the rolling buffer up to date. |
| + while ( |
| + const ssize_t size = ReadAtCurrentPosition( |
| + &buffer_[buffer_start_], |
| + std::min<ssize_t>(dest_end - dest, buffer_size_ - buffer_start_))) { |
| + memcpy(dest, &buffer_[buffer_start_], size); |
| + dest += size; |
| + buffer_start_ += size; |
| + if (buffer_start_ == buffer_size_) |
| + buffer_start_ = 0; |
| + } |
| + |
| + return dest; |
| +} |
| diff --git a/lib/reader.h b/lib/reader.h |
| new file mode 100644 |
| index 0000000..3ca195e |
| --- /dev/null |
| +++ b/lib/reader.h |
| @@ -0,0 +1,162 @@ |
| +//////////////////////////////////////////////////////////////////////////// |
| +// Copyright 2021 Google LLC // |
| +// // |
| +// This program is free software: you can redistribute it and/or modify // |
| +// it under the terms of the GNU General Public License as published by // |
| +// the Free Software Foundation, either version 3 of the License, or // |
| +// (at your option) any later version. // |
| +// // |
| +// This program is distributed in the hope that it will be useful, // |
| +// but WITHOUT ANY WARRANTY; without even the implied warranty of // |
| +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // |
| +// GNU General Public License for more details. // |
| +// // |
| +// You should have received a copy of the GNU General Public License // |
| +// along with this program. If not, see <https://www.gnu.org/licenses/>.// |
| +//////////////////////////////////////////////////////////////////////////// |
| + |
| +#ifndef READER_H |
| +#define READER_H |
| + |
| +#include <memory> |
| + |
| +#include <zip.h> |
| + |
| +#include "types.h" |
| + |
| +struct ZipClose { |
| + void operator()(zip_file_t *const file) const { zip_fclose(file); } |
| +}; |
| + |
| +using ZipFile = std::unique_ptr<zip_file_t, ZipClose>; |
| + |
| +// Base abstract class for Reader objects that reads and return bytes from a |
| +// file stored or compressed in a ZIP archive. |
| +class Reader { |
| + public: |
| + virtual ~Reader(); |
| + |
| + Reader(struct zip *archive, zip_uint64_t file_id, off_t expected_size); |
| + |
| + Reader(const Reader &) = delete; |
| + Reader &operator=(const Reader &) = delete; |
| + |
| + // Reads |dest_end - dest| bytes at the given file |offset| and stores them |
| + // into |dest|. Tries to fill the |dest| buffer, and only returns a "short |
| + // read" with fewer than |dest_end - dest| bytes if the end of the file is |
| + // reached. Returns a pointer past the last byte written in |dest|, which |
| + // should be |dest_end| if the end of the file has not been reached. Throws |
| + // ZipError in case of error |
| + virtual char *Read(char *dest, char *dest_end, off_t offset) = 0; |
| + |
| + protected: |
| + // Opens the file at index |file_id|. Throws ZipError in case of error. |
| + static ZipFile Open(struct zip *archive, zip_uint64_t file_id); |
| + |
| + // Reads up to |size| bytes at the current position pos_ and stores them |
| + // into |dest|. Returns the number of bytes actually read, which could be |
| + // less than |size|. Returns 0 if |size| is 0. Returns 0 if the end of file |
| + // has been reached, and there is nothing left to be read. Updates the |
| + // current position pos_. Throws ZipError in case of error |
| + ssize_t ReadAtCurrentPosition(char *dest, ssize_t size); |
| + |
| + // Number of created Reader objects. |
| + static zip_uint64_t reader_count_; |
| + |
| + // ID of this Reader (for debug traces). |
| + const zip_uint64_t reader_id_ = ++reader_count_; |
| + |
| + // ID of the file being read. |
| + const zip_uint64_t file_id_; |
| + |
| + // Expected size of the file being read. |
| + const off_t expected_size_; |
| + |
| + // File being read. |
| + ZipFile file_; |
| + |
| + // Current position of the file being read. |
| + off_t pos_ = 0; |
| +}; |
| + |
| +using ReaderPtr = std::unique_ptr<Reader>; |
| + |
| +// Reader used for uncompressed files, ie files that are simply stored without |
| +// compression in the ZIP archive. These files can be accessed in random order, |
| +// and don't require any buffering. |
| +class UnbufferedReader : public Reader { |
| + public: |
| + using Reader::Reader; |
| + |
| + char *Read(char *dest, char *dest_end, off_t offset) override; |
| +}; |
| + |
| +// Reader used for compressed files. It features a decompression engine and a |
| +// rolling buffer holding the latest decompressed bytes. |
| +// |
| +// During the first decompression pass, the rolling buffer contains 500KB (or |
| +// less if the |expected_size| is smaller). This is usually enough to |
| +// accommodate the possible out-of-order read operations due to the kernel's |
| +// readahead optimization. |
| +// |
| +// If a read operation starts at an offset located before the start of the |
| +// rolling buffer, then this BufferedReader restarts decompressing the file from |
| +// the beginning, but for this second pass it will use a rolling buffer as big |
| +// as possible (as big as |expected_size| if there is enough addressable space). |
| +class BufferedReader : public Reader { |
| + public: |
| + BufferedReader(struct zip *const archive, const zip_uint64_t file_id, |
| + const off_t expected_size) |
| + : Reader(archive, file_id, expected_size), archive_(archive) { |
| + AllocateBuffer(500 << 10); // 500KB |
| + } |
| + |
| + char *Read(char *dest, char *dest_end, off_t offset) override; |
| + |
| + protected: |
| + // Allocates a rolling buffer up to |buffer_size| or the |expected_size| |
| + // passed to the constructor, whichever is smaller. |
| + // Throws std::bad_alloc in case of memory allocation error. |
| + void AllocateBuffer(ssize_t buffer_size); |
| + |
| + // Allocates a bigger buffer and restarts decompressing from the beginning. |
| + // Throws std::bad_alloc in case of memory allocation error. |
| + // Throws a ZipError in case of error. |
| + void Restart(); |
| + |
| + // Advances the position of the decompression engine by |jump| bytes. |
| + // Throws a ZipError in case of error. |
| + // Precondition: the buffer is allocated. |
| + // Precondition: |jump >= 0| |
| + void Advance(off_t jump); |
| + |
| + // Reads as many bytes as possible (up to |dest_end - dest| bytes) from the |
| + // rolling buffer and stores them in |dest|. If the start |offset| is not in |
| + // the rolling buffer, then advances the position of the decompression |
| + // engine (while keeping the rolling buffer up to date) to the position |
| + // |offset| or the end of the file, whichever comes first. Returns a pointer |
| + // past the last byte written in |dest|. Throws a ZipError in case of error. |
| + // Precondition: the buffer is allocated. |
| + char *ReadFromBufferAndAdvance(char *dest, char *dest_end, off_t offset); |
| + |
| + // Pointer to the ZIP structure. Used when starting a second decompression |
| + // pass. |
| + struct zip *const archive_; |
| + |
| + // Index of the rolling buffer where the oldest byte is currently stored |
| + // (and where the next decompressed byte at the file position |pos_| will be |
| + // stored). |
| + // Invariant: 0 <= buffer_start_ < buffer_size_ once the buffer is |
| + // allocated. |
| + ssize_t buffer_start_ = 0; |
| + |
| + // Size of the rolling buffer. |
| + // Invariant: 0 < buffer_size_ once the buffer is allocated. |
| + ssize_t buffer_size_ = 0; |
| + |
| + // Rolling buffer. |
| + // Invariant: buffer_ != nullptr once the buffer is allocated. |
| + std::unique_ptr<char[]> buffer_; |
| +}; |
| + |
| +#endif |
| diff --git a/main.cpp b/main.cpp |
| index 63cc2c5..6f18201 100644 |
| --- a/main.cpp |
| +++ b/main.cpp |
| @@ -249,13 +249,11 @@ int main(int argc, char *argv[]) try { |
| fusezip_oper.statfs = fusezip_statfs; |
| fusezip_oper.open = fusezip_open; |
| fusezip_oper.read = fusezip_read; |
| - fusezip_oper.write = fusezip_write; |
| fusezip_oper.release = fusezip_release; |
| fusezip_oper.unlink = fusezip_unlink; |
| fusezip_oper.rmdir = fusezip_rmdir; |
| fusezip_oper.mkdir = fusezip_mkdir; |
| fusezip_oper.rename = fusezip_rename; |
| - fusezip_oper.create = fusezip_create; |
| fusezip_oper.mknod = fusezip_mknod; |
| fusezip_oper.chmod = fusezip_chmod; |
| fusezip_oper.chown = fusezip_chown; |
| @@ -266,8 +264,6 @@ int main(int argc, char *argv[]) try { |
| fusezip_oper.releasedir = fusezip_releasedir; |
| fusezip_oper.access = fusezip_access; |
| fusezip_oper.utimens = fusezip_utimens; |
| - fusezip_oper.ftruncate = fusezip_ftruncate; |
| - fusezip_oper.truncate = fusezip_truncate; |
| fusezip_oper.setxattr = fusezip_setxattr; |
| fusezip_oper.getxattr = fusezip_getxattr; |
| fusezip_oper.listxattr = fusezip_listxattr; |
| -- |
| 2.33.0.464.g1972c5931b-goog |
| |