blob: 8cd7277339b572c2b526cce7b2f1252e0157834f [file] [log] [blame]
From 98c02858983d8e54e0169aa8f12f6bed6a1061e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Degros?= <fdegros@chromium.org>
Date: Thu, 18 Jun 2020 23:35:55 +1000
Subject: [PATCH] Rolling cache
Use a rolling cache of uncompressed data for each file being read.
This allows to accomodate out-of-order read operations.
---
lib/dataNode.cpp | 113 ++----------------
lib/dataNode.h | 30 +----
lib/fileNode.cpp | 25 +---
lib/fileNode.h | 25 +---
lib/fuse-zip.cpp | 84 ++-----------
lib/fuse-zip.h | 8 --
lib/fuseZipData.cpp | 52 --------
lib/fuseZipData.h | 5 -
lib/reader.cpp | 282 ++++++++++++++++++++++++++++++++++++++++++++
lib/reader.h | 162 +++++++++++++++++++++++++
main.cpp | 4 -
11 files changed, 469 insertions(+), 321 deletions(-)
create mode 100644 lib/reader.cpp
create mode 100644 lib/reader.h
diff --git a/lib/dataNode.cpp b/lib/dataNode.cpp
index 0efd7bc..893e3d1 100644
--- a/lib/dataNode.cpp
+++ b/lib/dataNode.cpp
@@ -47,8 +47,6 @@ std::shared_ptr<DataNode> DataNode::createNew(mode_t mode, uid_t uid, gid_t gid,
std::shared_ptr<DataNode> n(new DataNode(FAKE_ID, mode, uid, gid, dev));
n->_state = NodeState::NEW;
- n->_buffer.reset(new BigBuffer());
-
n->_has_btime = true;
n->_metadataChanged = true;
n->_mtime = n->_atime = n->_ctime = n->_btime = currentTime();
@@ -60,8 +58,6 @@ std::shared_ptr<DataNode> DataNode::createTmpDir(mode_t mode, uid_t uid, gid_t g
std::shared_ptr<DataNode> n(new DataNode(FAKE_ID, mode, uid, gid, dev));
n->_state = NodeState::NEW;
- n->_buffer.reset(new BigBuffer());
-
n->_has_btime = true;
n->_metadataChanged = false;
n->_mtime = n->_atime = n->_ctime = n->_btime = currentTime();
@@ -102,94 +98,24 @@ std::shared_ptr<DataNode> DataNode::createExisting(struct zip *zip, zip_uint64_t
return n;
}
-int DataNode::open(struct zip *zip) {
- if (_state == NodeState::NEW || _state == NodeState::VIRTUAL_SYMLINK) {
- return 0;
- }
- if (_state == NodeState::OPENED) {
- if (_open_count == INT_MAX) {
- return -EMFILE;
- } else {
- ++_open_count;
- }
- }
- if (_state == NodeState::CLOSED) {
- _open_count = 1;
- assert(zip != NULL);
- if (_size > std::numeric_limits<size_t>::max()) {
- return -ENOMEM;
- }
- assert(_id != FAKE_ID);
- _buffer.reset(new BigBuffer(zip, _id, static_cast<size_t>(_size)));
- _state = NodeState::OPENED;
- }
- return 0;
-}
+ReaderPtr DataNode::open(struct zip *zip) {
+ struct zip_stat st = {};
+ if (zip_stat_index(zip, _id, 0, &st) < 0)
+ throw ZipError("Cannot stat file", zip);
-int DataNode::read(char *buf, size_t sz, size_t offset) {
_atime = currentTime();
- return _buffer->read(buf, sz, offset);
-}
-int DataNode::write(const char *buf, size_t sz, size_t offset) {
- assert(_state != NodeState::VIRTUAL_SYMLINK);
- if (_state == NodeState::OPENED) {
- _state = NodeState::CHANGED;
+ if ((st.valid & ZIP_STAT_COMP_METHOD) != 0 &&
+ st.comp_method == ZIP_CM_STORE) {
+ // The file is stored without compression.
+ return ReaderPtr(new UnbufferedReader(zip, _id, st.size));
}
- _mtime = currentTime();
- _metadataChanged = true;
- return _buffer->write(buf, sz, offset);
-}
-int DataNode::close() {
- _size = _buffer->len;
- if (_state == NodeState::OPENED && --_open_count == 0) {
- _buffer.reset();
- _state = NodeState::CLOSED;
- }
- return 0;
+ // The file is compressed.
+ return ReaderPtr(new BufferedReader(zip, _id, st.size));
}
-int DataNode::save(struct zip *zip, const char *full_name, zip_int64_t &index) {
- assert(zip != NULL);
- assert(full_name != NULL);
- return _buffer->saveToZip(_mtime.tv_sec, zip, full_name,
- _state == NodeState::NEW, index);
-}
-
-//int DataNode::saveMetadata(bool force_precise_time) const {
-// assert(zip != NULL);
-// assert(_id >= 0);
-//
-// int res = updateExtraFields(force_precise_time);
-// if (res != 0)
-// return res;
-// return updateExternalAttributes();
-//}
-
-int DataNode::truncate(size_t offset) {
- assert(_state != NodeState::VIRTUAL_SYMLINK);
- if (_state != NodeState::CLOSED) {
- if (_state != NodeState::NEW) {
- _state = NodeState::CHANGED;
- }
- _buffer->truncate(offset);
- _mtime = currentTime();
- _metadataChanged = true;
- return 0;
- } else {
- return EBADF;
- }
-}
-
-zip_uint64_t DataNode::size() const {
- if (_state == NodeState::NEW || _state == NodeState::OPENED || _state == NodeState::CHANGED ||
- _state == NodeState::VIRTUAL_SYMLINK) {
- return _buffer->len;
- } else {
- return _size;
- }
-}
+zip_uint64_t DataNode::size() const { return _size; }
/**
* Get timestamp information from extra fields.
@@ -355,23 +281,6 @@ void DataNode::processPkWareUnixField(zip_uint16_t type, zip_uint16_t len, const
_atime.tv_nsec = 0;
}
_device = dev;
- // use PKWARE link target only if link target in Info-ZIP format is not
- // specified (empty file content)
- if (S_ISLNK(_mode) && _size == 0 && link_len > 0) {
- assert(_state == NodeState::CLOSED || _state == NodeState::VIRTUAL_SYMLINK);
- if (_state == NodeState::VIRTUAL_SYMLINK)
- {
- _state = NodeState::CLOSED;
- _buffer.reset();
- }
- _buffer.reset(new BigBuffer());
- if (!_buffer)
- return;
- assert(link != NULL);
- _buffer->write(link, link_len, 0);
- _state = NodeState::VIRTUAL_SYMLINK;
- }
- // hardlinks are handled in FuseZipData::build_tree
}
void DataNode::chmod (mode_t mode) {
diff --git a/lib/dataNode.h b/lib/dataNode.h
index 7489ec9..e1552f5 100644
--- a/lib/dataNode.h
+++ b/lib/dataNode.h
@@ -27,8 +27,8 @@
#include <sys/stat.h>
#include <unistd.h>
+#include "reader.h"
#include "types.h"
-#include "bigBuffer.h"
class DataNode {
private:
@@ -46,7 +46,6 @@ private:
};
zip_uint64_t _id;
- std::unique_ptr<BigBuffer> _buffer;
int _open_count;
NodeState _state;
@@ -72,21 +71,7 @@ public:
static std::shared_ptr<DataNode> createTmpDir(mode_t mode, uid_t uid, gid_t gid, dev_t dev);
static std::shared_ptr<DataNode> createExisting(struct zip *zip, zip_uint64_t id, mode_t mode);
- int open(struct zip *zip);
- int read(char *buf, size_t size, size_t offset);
- int write(const char *buf, size_t size, size_t offset);
- int close();
-
- /**
- * Invoke zip_file_add() or zip_file_replace() for file to save it.
- * Should be called only if item is needed to ba saved into zip file.
- *
- * @param zip zip structure pointer
- * @param full_name full file name
- * @param index file node index (updated if state is NEW)
- * @return 0 if success, != 0 on error
- */
- int save(struct zip *zip, const char *full_name, zip_int64_t &index);
+ ReaderPtr open(struct zip *zip);
/**
* Save file metadata to ZIP
@@ -95,17 +80,6 @@ public:
*/
int saveMetadata (bool force_precise_time) const;
- /**
- * Truncate file.
- *
- * @return
- * 0 If successful
- * EBADF If file is currently closed
- * EIO If insufficient memory available (because ENOMEM not
- * listed in truncate() error codes)
- */
- int truncate(size_t offset);
-
inline bool isChanged() const {
return _state == NodeState::CHANGED
|| _state == NodeState::NEW
diff --git a/lib/fileNode.cpp b/lib/fileNode.cpp
index cedaaa6..e0acd87 100644
--- a/lib/fileNode.cpp
+++ b/lib/fileNode.cpp
@@ -170,28 +170,10 @@ void FileNode::rename(const char *new_name) {
parse_name();
}
-int FileNode::open() {
+ReaderPtr FileNode::open() {
return _data->open(zip);
}
-int FileNode::read(char *buf, size_t sz, size_t offset) {
- return _data->read(buf, sz, offset);
-}
-
-int FileNode::write(const char *buf, size_t sz, size_t offset) {
- return _data->write(buf, sz, offset);
-}
-
-int FileNode::close() {
- return _data->close();
-}
-
-int FileNode::save() {
- assert (!is_dir());
- // index is modified if state == NEW
- return _data->save(zip, full_name.c_str(), _id);
-}
-
int FileNode::saveMetadata(bool force_precise_time) const {
assert(zip != NULL);
assert(_id >= 0);
@@ -209,11 +191,6 @@ int FileNode::saveComment() const {
return zip_file_set_comment(zip, id(), m_comment, m_commentLen, 0);
}
-int FileNode::truncate(size_t offset) {
- assert(_data);
- return _data->truncate(offset);
-}
-
zip_uint64_t FileNode::size() const {
return _data->size();
}
diff --git a/lib/fileNode.h b/lib/fileNode.h
index 0d18b01..08dc42c 100644
--- a/lib/fileNode.h
+++ b/lib/fileNode.h
@@ -27,7 +27,6 @@
#include <sys/stat.h>
#include "types.h"
-#include "bigBuffer.h"
#include "dataNode.h"
class FileNode {
@@ -110,18 +109,7 @@ public:
*/
void rename (const char *new_name);
- int open();
- int read(char *buf, size_t size, size_t offset);
- int write(const char *buf, size_t size, size_t offset);
- int close();
-
- /**
- * Invoke zip_file_add() or zip_file_replace() for file to save it.
- * Should be called only if item is needed to ba saved into zip file.
- *
- * @return 0 if success, != 0 on error
- */
- int save();
+ ReaderPtr open();
/**
* Save file metadata to ZIP
@@ -136,17 +124,6 @@ public:
*/
int saveComment() const;
- /**
- * Truncate file.
- *
- * @return
- * 0 If successful
- * EBADF If file is currently closed
- * EIO If insufficient memory available (because ENOMEM not
- * listed in truncate() error codes)
- */
- int truncate(size_t offset);
-
inline bool isChanged() const {
return _data->isChanged();
}
diff --git a/lib/fuse-zip.cpp b/lib/fuse-zip.cpp
index f69e60a..a1c2ea0 100644
--- a/lib/fuse-zip.cpp
+++ b/lib/fuse-zip.cpp
@@ -118,7 +118,6 @@ inline struct zip *get_zip() {
void fusezip_destroy(void *data) {
FuseZipData *d = (FuseZipData*)data;
- d->save ();
delete d;
}
@@ -212,35 +211,14 @@ int fusezip_open(const char *path, struct fuse_file_info *fi) try {
if (node->is_dir()) {
return -EISDIR;
}
- fi->fh = (uint64_t)node;
- return node->open();
+ ReaderPtr reader = node->open();
+ fi->fh = reinterpret_cast<uint64_t>(reader.release());
+ return 0;
} catch (...) {
return exceptionToError("open file", path);
}
-int fusezip_create(const char *path, mode_t mode,
- struct fuse_file_info *fi) try {
- if (*path == '\0') {
- return -EACCES;
- }
- FileNode *node = get_file_node(path + 1);
- if (node != NULL) {
- return -EEXIST;
- }
- node = FileNode::createFile(get_zip(), path + 1, fuse_get_context()->uid,
- fuse_get_context()->gid, mode);
- if (node == NULL) {
- return -ENOMEM;
- }
- get_data()->insertNode(node);
- fi->fh = (uint64_t)node;
-
- return node->open();
-} catch (...) {
- return exceptionToError("create file", path);
-}
-
int fusezip_mknod(const char *path, mode_t mode, dev_t dev) try {
if (*path == '\0') {
return -EACCES;
@@ -265,64 +243,22 @@ int fusezip_read(const char *path, char *buf, size_t size, off_t offset,
struct fuse_file_info *fi) try {
if (offset < 0)
return -EINVAL;
- return reinterpret_cast<FileNode *>(fi->fh)->read(
- buf, size, static_cast<size_t>(offset));
+ return static_cast<int>(
+ reinterpret_cast<Reader *>(fi->fh)->Read(
+ buf, buf + std::min<size_t>(size, std::numeric_limits<int>::max()),
+ offset) -
+ buf);
} catch (...) {
return exceptionToError("read file", path);
}
-int fusezip_write(const char *path, const char *buf, size_t size, off_t offset,
- struct fuse_file_info *fi) try {
- if (offset < 0)
- return -EINVAL;
- return reinterpret_cast<FileNode *>(fi->fh)->write(
- buf, size, static_cast<size_t>(offset));
-} catch (...) {
- return exceptionToError("write file", path);
-}
-
int fusezip_release(const char *path, struct fuse_file_info *fi) try {
- return reinterpret_cast<FileNode *>(fi->fh)->close();
+ const ReaderPtr p(reinterpret_cast<Reader *>(fi->fh));
+ return 0;
} catch (...) {
return exceptionToError("close file", path);
}
-int fusezip_ftruncate(const char *path, off_t offset,
- struct fuse_file_info *fi) try {
- if (offset < 0)
- return -EINVAL;
- return -reinterpret_cast<FileNode *>(fi->fh)->truncate(
- static_cast<size_t>(offset));
-} catch (...) {
- return exceptionToError("truncate file", path);
-}
-
-int fusezip_truncate(const char *path, off_t offset) try {
- if (*path == '\0') {
- return -EACCES;
- }
- if (offset < 0)
- return -EINVAL;
- FileNode *node = get_file_node(path + 1);
- if (node == NULL) {
- return -ENOENT;
- }
- if (node->is_dir()) {
- return -EISDIR;
- }
- int res;
- if ((res = node->open()) != 0) {
- return res;
- }
- if ((res = node->truncate(static_cast<size_t>(offset))) != 0) {
- node->close();
- return -res;
- }
- return node->close();
-} catch (...) {
- return exceptionToError("truncate file", path);
-}
-
int fusezip_unlink(const char *path) {
if (*path == '\0') {
return -ENOENT;
diff --git a/lib/fuse-zip.h b/lib/fuse-zip.h
index 74990db..b40270c 100644
--- a/lib/fuse-zip.h
+++ b/lib/fuse-zip.h
@@ -63,20 +63,12 @@ int fusezip_statfs(const char *path, struct statvfs *buf);
int fusezip_open(const char *path, struct fuse_file_info *fi);
-int fusezip_create(const char *path, mode_t mode, struct fuse_file_info *fi);
-
int fusezip_mknod(const char *path, mode_t mode, dev_t dev);
int fusezip_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi);
-int fusezip_write(const char *path, const char *buf, size_t size, off_t offset, struct fuse_file_info *fi);
-
int fusezip_release (const char *path, struct fuse_file_info *fi);
-int fusezip_ftruncate(const char *path, off_t offset, struct fuse_file_info *fi);
-
-int fusezip_truncate(const char *path, off_t offset);
-
int fusezip_unlink(const char *path);
int fusezip_rmdir(const char *path);
diff --git a/lib/fuseZipData.cpp b/lib/fuseZipData.cpp
index 29ff2c4..f8b4cbe 100644
--- a/lib/fuseZipData.cpp
+++ b/lib/fuseZipData.cpp
@@ -706,55 +706,3 @@ FileNode *FuseZipData::find (const char *fname) const {
return i->second;
}
}
-
-void FuseZipData::save () {
- for (filemap_t::const_iterator i = files.begin(); i != files.end(); ++i) {
- FileNode *node = i->second;
- if (node == m_root) {
- if (node->isCommentChanged()) {
- int res = node->saveComment();
- if (res != 0) {
- syslog(LOG_ERR, "Error while saving archive comment: %d", res);
- }
- }
- continue;
- }
- assert(node != NULL);
- bool saveMetadata = node->isMetadataChanged();
- if (node->isChanged() && !node->is_dir()) {
- saveMetadata = true;
- int res = node->save();
- if (res != 0) {
- saveMetadata = false;
- syslog(LOG_ERR, "Error while saving file %s in ZIP archive: %d",
- node->full_name.c_str(), res);
- }
- }
- if (saveMetadata) {
- if (node->isTemporaryDir()) {
- // persist temporary directory
- zip_int64_t idx = zip_dir_add(m_zip,
- node->full_name.c_str(), ZIP_FL_ENC_UTF_8);
- if (idx < 0) {
- syslog(LOG_ERR, "Unable to save directory %s in ZIP archive",
- node->full_name.c_str());
- continue;
- }
- node->set_id(idx);
- }
- int res = node->saveMetadata(m_force_precise_time);
- if (res != 0) {
- syslog(LOG_ERR, "Error while saving metadata for file %s in ZIP archive: %d",
- node->full_name.c_str(), res);
- }
- }
- if (node->isCommentChanged()) {
- int res = node->saveComment();
- if (res != 0) {
- syslog(LOG_ERR, "Error while saving comment for file %s in ZIP archive: %d",
- node->full_name.c_str(), res);
- }
- }
- }
-}
-
diff --git a/lib/fuseZipData.h b/lib/fuseZipData.h
index 92d4191..17275e7 100644
--- a/lib/fuseZipData.h
+++ b/lib/fuseZipData.h
@@ -140,11 +140,6 @@ public:
size_t numFiles () const {
return files.size() - 1;
}
-
- /**
- * Save archive
- */
- void save ();
};
#endif
diff --git a/lib/reader.cpp b/lib/reader.cpp
new file mode 100644
index 0000000..45555d0
--- /dev/null
+++ b/lib/reader.cpp
@@ -0,0 +1,282 @@
+////////////////////////////////////////////////////////////////////////////
+// Copyright 2021 Google LLC //
+// //
+// This program is free software: you can redistribute it and/or modify //
+// it under the terms of the GNU General Public License as published by //
+// the Free Software Foundation, either version 3 of the License, or //
+// (at your option) any later version. //
+// //
+// This program is distributed in the hope that it will be useful, //
+// but WITHOUT ANY WARRANTY; without even the implied warranty of //
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
+// GNU General Public License for more details. //
+// //
+// You should have received a copy of the GNU General Public License //
+// along with this program. If not, see <https://www.gnu.org/licenses/>.//
+////////////////////////////////////////////////////////////////////////////
+
+#include "reader.h"
+
+#include <cassert>
+#include <limits>
+#include <stdexcept>
+
+#include <syslog.h>
+
+#include "util.h"
+
+// Flag for debug logs.
+static const bool debug_log = false;
+
+zip_uint64_t Reader::reader_count_ = 0;
+
+static void LimitSize(ssize_t *const a, off_t b) {
+ if (*a > b)
+ *a = static_cast<ssize_t>(b);
+}
+
+Reader::~Reader() {
+ if (debug_log)
+ syslog(LOG_INFO, "Reader %" PRIu64 ": Close", reader_id_);
+}
+
+Reader::Reader(struct zip *const archive, const zip_uint64_t file_id,
+ const off_t expected_size)
+ : file_id_(file_id), expected_size_(expected_size),
+ file_(Open(archive, file_id)) {
+ if (debug_log)
+ syslog(LOG_INFO, "Reader %" PRIu64 ": Open file %" PRIu64, reader_id_,
+ file_id_);
+}
+
+ZipFile Reader::Open(struct zip *const archive, const zip_uint64_t file_id) {
+ ZipFile file(zip_fopen_index(archive, file_id, 0));
+ if (!file)
+ throw ZipError("Cannot open file", archive);
+ return file;
+}
+
+ssize_t Reader::ReadAtCurrentPosition(char *dest, ssize_t size) {
+ assert(size >= 0);
+
+ if (pos_ >= expected_size_)
+ return 0;
+
+ // Avoid reading bytes past the expected end of file.
+ // https://github.com/nih-at/libzip/issues/261
+ LimitSize(&size, expected_size_ - pos_);
+
+ if (size == 0)
+ return 0;
+
+ const ssize_t n = static_cast<ssize_t>(zip_fread(file_.get(), dest, size));
+
+ if (false && debug_log)
+ syslog(LOG_INFO, "Reader %" PRIu64 ": zip_fread(%zd) returned %zd",
+ reader_id_, size, n);
+
+ if (n < 0)
+ throw ZipError("Cannot read file", file_.get());
+
+ pos_ += n;
+ return n;
+}
+
+char *UnbufferedReader::Read(char *dest, char *dest_end, off_t offset) {
+ if (pos_ != offset) {
+ if (debug_log)
+ syslog(LOG_INFO,
+ "Reader %" PRIu64 ": Jump %+" PRIi64 " from %" PRIu64
+ " to %" PRIu64,
+ reader_id_, offset - pos_, pos_, offset);
+
+ if (zip_fseek(file_.get(), offset, SEEK_SET) < 0)
+ throw ZipError("Cannot fseek file", file_.get());
+
+ pos_ = offset;
+ }
+
+ assert(pos_ == offset);
+
+ while (const ssize_t n = ReadAtCurrentPosition(dest, dest_end - dest)) {
+ dest += n;
+ }
+
+ return dest;
+}
+
+void BufferedReader::AllocateBuffer(ssize_t buffer_size) {
+ LimitSize(&buffer_size, expected_size_);
+
+ if (buffer_size == 0)
+ buffer_size = 1;
+
+ if (buffer_size == buffer_size_) {
+ assert(buffer_);
+ // Already got a buffer of the right size.
+ return;
+ }
+
+ buffer_.reset();
+ buffer_size_ = 0;
+
+ while (true) {
+ // Try to allocate buffer.
+ try {
+ buffer_.reset(new char[buffer_size]);
+ buffer_size_ = buffer_size;
+ if (debug_log)
+ syslog(LOG_INFO, "Reader %" PRIu64 ": Allocate %zd KB buffer",
+ reader_id_, buffer_size_ >> 10);
+ return;
+ } catch (const std::bad_alloc &error) {
+ // Probably too big.
+ if (debug_log)
+ syslog(LOG_ERR,
+ "Reader %" PRIu64 ": Cannot allocate %zd KB buffer: %s",
+ reader_id_, buffer_size >> 10, error.what());
+
+ // If we couldn't even allocate 1KB, we ran out of memory or of
+ // addressable space. Simply propagate the error.
+ if (buffer_size <= 1024)
+ throw;
+
+ // Try a smaller buffer.
+ buffer_size >>= 1;
+ }
+ }
+}
+
+void BufferedReader::Restart() {
+ if (debug_log)
+ syslog(LOG_INFO, "Reader %" PRIu64 ": Rewind", reader_id_);
+
+ // Restart from the file beginning.
+ file_ = Open(archive_, file_id_);
+ pos_ = 0;
+ buffer_start_ = 0;
+
+ // Allocate a possibly bigger buffer. We have to be careful on 32-bit
+ // devices, since they have a limited addressable space.
+ AllocateBuffer((std::numeric_limits<ssize_t>::max() >> 1) + 1);
+}
+
+void BufferedReader::Advance(off_t jump) {
+ assert(jump >= 0);
+
+ if (jump <= 0)
+ return;
+
+ if (debug_log)
+ syslog(LOG_INFO,
+ "Reader %" PRIu64 ": Skip %" PRIi64 " bytes from %" PRIu64
+ " to %" PRIu64,
+ reader_id_, jump, pos_, pos_ + jump);
+
+ do {
+ ssize_t count = buffer_size_ - buffer_start_;
+ LimitSize(&count, jump);
+
+ assert(count > 0);
+ count = ReadAtCurrentPosition(&buffer_[buffer_start_], count);
+ if (count == 0)
+ return;
+
+ buffer_start_ += count;
+ if (buffer_start_ >= buffer_size_) {
+ assert(buffer_start_ == buffer_size_);
+ buffer_start_ = 0;
+ }
+
+ jump -= count;
+ } while (jump > 0);
+
+ assert(jump == 0);
+}
+
+char *BufferedReader::ReadFromBufferAndAdvance(char *dest, char *const dest_end,
+ const off_t offset) {
+ const off_t jump = offset - pos_;
+
+ if (jump >= 0) {
+ // Jump forwards.
+ Advance(jump);
+ return dest;
+ }
+
+ // Jump backwards.
+ assert(jump < 0);
+
+ if (jump + buffer_size_ < 0) {
+ // The backwards jump is too big and falls outside the buffer.
+ Restart();
+ Advance(offset);
+ return dest;
+ }
+
+ // The backwards jump is small enough to fall inside the buffer.
+ assert(-jump <= buffer_size_);
+
+ // Read data from the buffer.
+ ssize_t i = buffer_start_ + jump;
+
+ do {
+ ssize_t size = -i;
+ ssize_t start = i;
+ if (i < 0) {
+ if (debug_log)
+ syslog(LOG_INFO, "Reader %" PRIu64 ": Negative part of buffer",
+ reader_id_);
+ start += buffer_size_;
+ } else {
+ size += buffer_start_;
+ }
+
+ LimitSize(&size, dest_end - dest);
+ assert(size > 0);
+
+ if (debug_log)
+ syslog(LOG_INFO,
+ "Reader %" PRIu64 ": Read %zd bytes from cache position %zd",
+ reader_id_, size, i - buffer_start_);
+
+ std::memcpy(dest, &buffer_[start], size);
+ dest += size;
+ i += size;
+ } while (i < buffer_start_ && dest < dest_end);
+
+ return dest;
+}
+
+char *BufferedReader::Read(char *dest, char *const dest_end,
+ const off_t offset) {
+ if (offset >= expected_size_)
+ return dest;
+
+ if (dest == dest_end)
+ return dest;
+
+ // If we don't have a buffer, then we don't have enough memory.
+ if (!buffer_)
+ throw std::bad_alloc();
+
+ assert(buffer_);
+ assert(buffer_size_ > 0);
+
+ // Read data from buffer if possible.
+ dest = ReadFromBufferAndAdvance(dest, dest_end, offset);
+
+ // Read data from file while keeping the rolling buffer up to date.
+ while (
+ const ssize_t size = ReadAtCurrentPosition(
+ &buffer_[buffer_start_],
+ std::min<ssize_t>(dest_end - dest, buffer_size_ - buffer_start_))) {
+ memcpy(dest, &buffer_[buffer_start_], size);
+ dest += size;
+ buffer_start_ += size;
+ if (buffer_start_ == buffer_size_)
+ buffer_start_ = 0;
+ }
+
+ return dest;
+}
diff --git a/lib/reader.h b/lib/reader.h
new file mode 100644
index 0000000..3ca195e
--- /dev/null
+++ b/lib/reader.h
@@ -0,0 +1,162 @@
+////////////////////////////////////////////////////////////////////////////
+// Copyright 2021 Google LLC //
+// //
+// This program is free software: you can redistribute it and/or modify //
+// it under the terms of the GNU General Public License as published by //
+// the Free Software Foundation, either version 3 of the License, or //
+// (at your option) any later version. //
+// //
+// This program is distributed in the hope that it will be useful, //
+// but WITHOUT ANY WARRANTY; without even the implied warranty of //
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
+// GNU General Public License for more details. //
+// //
+// You should have received a copy of the GNU General Public License //
+// along with this program. If not, see <https://www.gnu.org/licenses/>.//
+////////////////////////////////////////////////////////////////////////////
+
+#ifndef READER_H
+#define READER_H
+
+#include <memory>
+
+#include <zip.h>
+
+#include "types.h"
+
+struct ZipClose {
+ void operator()(zip_file_t *const file) const { zip_fclose(file); }
+};
+
+using ZipFile = std::unique_ptr<zip_file_t, ZipClose>;
+
+// Base abstract class for Reader objects that reads and return bytes from a
+// file stored or compressed in a ZIP archive.
+class Reader {
+ public:
+ virtual ~Reader();
+
+ Reader(struct zip *archive, zip_uint64_t file_id, off_t expected_size);
+
+ Reader(const Reader &) = delete;
+ Reader &operator=(const Reader &) = delete;
+
+ // Reads |dest_end - dest| bytes at the given file |offset| and stores them
+ // into |dest|. Tries to fill the |dest| buffer, and only returns a "short
+ // read" with fewer than |dest_end - dest| bytes if the end of the file is
+ // reached. Returns a pointer past the last byte written in |dest|, which
+ // should be |dest_end| if the end of the file has not been reached. Throws
+ // ZipError in case of error
+ virtual char *Read(char *dest, char *dest_end, off_t offset) = 0;
+
+ protected:
+ // Opens the file at index |file_id|. Throws ZipError in case of error.
+ static ZipFile Open(struct zip *archive, zip_uint64_t file_id);
+
+ // Reads up to |size| bytes at the current position pos_ and stores them
+ // into |dest|. Returns the number of bytes actually read, which could be
+ // less than |size|. Returns 0 if |size| is 0. Returns 0 if the end of file
+ // has been reached, and there is nothing left to be read. Updates the
+ // current position pos_. Throws ZipError in case of error
+ ssize_t ReadAtCurrentPosition(char *dest, ssize_t size);
+
+ // Number of created Reader objects.
+ static zip_uint64_t reader_count_;
+
+ // ID of this Reader (for debug traces).
+ const zip_uint64_t reader_id_ = ++reader_count_;
+
+ // ID of the file being read.
+ const zip_uint64_t file_id_;
+
+ // Expected size of the file being read.
+ const off_t expected_size_;
+
+ // File being read.
+ ZipFile file_;
+
+ // Current position of the file being read.
+ off_t pos_ = 0;
+};
+
+using ReaderPtr = std::unique_ptr<Reader>;
+
+// Reader used for uncompressed files, ie files that are simply stored without
+// compression in the ZIP archive. These files can be accessed in random order,
+// and don't require any buffering.
+class UnbufferedReader : public Reader {
+ public:
+ using Reader::Reader;
+
+ char *Read(char *dest, char *dest_end, off_t offset) override;
+};
+
+// Reader used for compressed files. It features a decompression engine and a
+// rolling buffer holding the latest decompressed bytes.
+//
+// During the first decompression pass, the rolling buffer contains 500KB (or
+// less if the |expected_size| is smaller). This is usually enough to
+// accommodate the possible out-of-order read operations due to the kernel's
+// readahead optimization.
+//
+// If a read operation starts at an offset located before the start of the
+// rolling buffer, then this BufferedReader restarts decompressing the file from
+// the beginning, but for this second pass it will use a rolling buffer as big
+// as possible (as big as |expected_size| if there is enough addressable space).
+class BufferedReader : public Reader {
+ public:
+ BufferedReader(struct zip *const archive, const zip_uint64_t file_id,
+ const off_t expected_size)
+ : Reader(archive, file_id, expected_size), archive_(archive) {
+ AllocateBuffer(500 << 10); // 500KB
+ }
+
+ char *Read(char *dest, char *dest_end, off_t offset) override;
+
+ protected:
+ // Allocates a rolling buffer up to |buffer_size| or the |expected_size|
+ // passed to the constructor, whichever is smaller.
+ // Throws std::bad_alloc in case of memory allocation error.
+ void AllocateBuffer(ssize_t buffer_size);
+
+ // Allocates a bigger buffer and restarts decompressing from the beginning.
+ // Throws std::bad_alloc in case of memory allocation error.
+ // Throws a ZipError in case of error.
+ void Restart();
+
+ // Advances the position of the decompression engine by |jump| bytes.
+ // Throws a ZipError in case of error.
+ // Precondition: the buffer is allocated.
+ // Precondition: |jump >= 0|
+ void Advance(off_t jump);
+
+ // Reads as many bytes as possible (up to |dest_end - dest| bytes) from the
+ // rolling buffer and stores them in |dest|. If the start |offset| is not in
+ // the rolling buffer, then advances the position of the decompression
+ // engine (while keeping the rolling buffer up to date) to the position
+ // |offset| or the end of the file, whichever comes first. Returns a pointer
+ // past the last byte written in |dest|. Throws a ZipError in case of error.
+ // Precondition: the buffer is allocated.
+ char *ReadFromBufferAndAdvance(char *dest, char *dest_end, off_t offset);
+
+ // Pointer to the ZIP structure. Used when starting a second decompression
+ // pass.
+ struct zip *const archive_;
+
+ // Index of the rolling buffer where the oldest byte is currently stored
+ // (and where the next decompressed byte at the file position |pos_| will be
+ // stored).
+ // Invariant: 0 <= buffer_start_ < buffer_size_ once the buffer is
+ // allocated.
+ ssize_t buffer_start_ = 0;
+
+ // Size of the rolling buffer.
+ // Invariant: 0 < buffer_size_ once the buffer is allocated.
+ ssize_t buffer_size_ = 0;
+
+ // Rolling buffer.
+ // Invariant: buffer_ != nullptr once the buffer is allocated.
+ std::unique_ptr<char[]> buffer_;
+};
+
+#endif
diff --git a/main.cpp b/main.cpp
index 63cc2c5..6f18201 100644
--- a/main.cpp
+++ b/main.cpp
@@ -249,13 +249,11 @@ int main(int argc, char *argv[]) try {
fusezip_oper.statfs = fusezip_statfs;
fusezip_oper.open = fusezip_open;
fusezip_oper.read = fusezip_read;
- fusezip_oper.write = fusezip_write;
fusezip_oper.release = fusezip_release;
fusezip_oper.unlink = fusezip_unlink;
fusezip_oper.rmdir = fusezip_rmdir;
fusezip_oper.mkdir = fusezip_mkdir;
fusezip_oper.rename = fusezip_rename;
- fusezip_oper.create = fusezip_create;
fusezip_oper.mknod = fusezip_mknod;
fusezip_oper.chmod = fusezip_chmod;
fusezip_oper.chown = fusezip_chown;
@@ -266,8 +264,6 @@ int main(int argc, char *argv[]) try {
fusezip_oper.releasedir = fusezip_releasedir;
fusezip_oper.access = fusezip_access;
fusezip_oper.utimens = fusezip_utimens;
- fusezip_oper.ftruncate = fusezip_ftruncate;
- fusezip_oper.truncate = fusezip_truncate;
fusezip_oper.setxattr = fusezip_setxattr;
fusezip_oper.getxattr = fusezip_getxattr;
fusezip_oper.listxattr = fusezip_listxattr;
--
2.33.0.464.g1972c5931b-goog