From 0aa0b64712f42a3bdf0878f898b51bcaa05f7c20 Mon Sep 17 00:00:00 2001 From: Sebastian Messmer Date: Sun, 26 May 2019 02:34:26 -0700 Subject: [PATCH] Refactor traversl in cryfs-stats --- src/stats/CMakeLists.txt | 1 + src/stats/main.cpp | 104 ++++++++++++++++++++------------------- src/stats/traversal.cpp | 65 ++++++++++++++++++++++++ src/stats/traversal.h | 24 +++++++++ 4 files changed, 143 insertions(+), 51 deletions(-) create mode 100644 src/stats/traversal.cpp create mode 100644 src/stats/traversal.h diff --git a/src/stats/CMakeLists.txt b/src/stats/CMakeLists.txt index c29f44ea..f049820f 100644 --- a/src/stats/CMakeLists.txt +++ b/src/stats/CMakeLists.txt @@ -2,6 +2,7 @@ project (stats) set(SOURCES main.cpp + traversal.cpp ) add_executable(${PROJECT_NAME} ${SOURCES}) diff --git a/src/stats/main.cpp b/src/stats/main.cpp index 4e8dfaa6..c534b808 100644 --- a/src/stats/main.cpp +++ b/src/stats/main.cpp @@ -15,6 +15,7 @@ #include #include #include +#include "traversal.h" #include @@ -31,6 +32,8 @@ using namespace blobstore::onblocks; using namespace blobstore::onblocks::datanodestore; using namespace cryfs::fsblobstore; +using namespace cryfs_stats; + void printNode(unique_ref node) { std::cout << "BlockId: " << node->blockId().ToString() << ", Depth: " << static_cast(node->depth()) << " "; auto innerNode = dynamic_pointer_move(node); @@ -45,39 +48,6 @@ void printNode(unique_ref node) { } } -void _forEachBlob(FsBlobStore* blobStore, const BlockId& rootId, std::function callback) { - callback(rootId); - auto rootBlob = blobStore->load(rootId); - ASSERT(rootBlob != boost::none, "Blob not found but referenced from directory entry"); - - auto rootDir = dynamic_pointer_move(*rootBlob); - if (rootDir != boost::none) { - std::vector children; - children.reserve((*rootDir)->NumChildren()); - (*rootDir)->AppendChildrenTo(&children); - - for (const auto& child : children) { - auto childEntry = (*rootDir)->GetChild(child.name); - ASSERT(childEntry != boost::none, "We just got this from the entry list, it must exist."); - auto childId = childEntry->blockId(); - _forEachBlob(blobStore, childId, callback); - } - } -} - -void _forEachBlockInBlob(DataNodeStore* nodeStore, const BlockId& rootId, std::function callback) { - callback(rootId); - - auto node = nodeStore->load(rootId); - auto innerNode = dynamic_pointer_move(*node); - if (innerNode != boost::none) { - for (uint32_t childIndex = 0; childIndex < (*innerNode)->numChildren(); ++childIndex) { - auto childId = (*innerNode)->readChild(childIndex).blockId(); - _forEachBlockInBlob(nodeStore, childId, callback); - } - } -} - unique_ref makeBlockStore(const path& basedir, const CryConfigLoader::ConfigLoadResult& config, LocalStateDir& localStateDir) { auto onDiskBlockStore = make_unique_ref(basedir); auto encryptedBlockStore = CryCiphers::find(config.configFile->config()->Cipher()).createEncryptedBlockstore(std::move(onDiskBlockStore), config.configFile->config()->EncryptionKey()); @@ -90,18 +60,52 @@ unique_ref makeBlockStore(const path& basedir, const CryConfigLoader return make_unique_ref(std::move(integrityBlockStore)); } +struct AccumulateBlockIds final { +public: + auto callback() { + return [this] (const BlockId& id) { + _blockIds.push_back(id); + }; + } + + const std::vector& blockIds() const { + return _blockIds; + } + + void reserve(size_t size) { + _blockIds.reserve(size); + } + +private: + std::vector _blockIds; +}; + +class ProgressBar final { +public: + ProgressBar(size_t numBlocks): _currentBlock(0), _numBlocks(numBlocks) {} + + auto callback() { + return [this] (const BlockId&) { + cout << "\r" << (++_currentBlock) << "/" << _numBlocks << flush; + }; + } +private: + size_t _currentBlock; + size_t _numBlocks; +}; + std::vector _getKnownBlobIds(const path& basedir, const CryConfigLoader::ConfigLoadResult& config, LocalStateDir& localStateDir) { auto blockStore = makeBlockStore(basedir, config, localStateDir); auto fsBlobStore = make_unique_ref(make_unique_ref(std::move(blockStore), config.configFile->config()->BlocksizeBytes())); std::vector result; + AccumulateBlockIds knownBlobIds; cout << "Listing all file system entities (i.e. blobs)..." << flush; auto rootId = BlockId::FromString(config.configFile->config()->RootBlob()); - _forEachBlob(fsBlobStore.get(), rootId, [&result] (const BlockId& blockId) { - result.push_back(blockId); - }); + forEachReachableBlob(fsBlobStore.get(), rootId, {knownBlobIds.callback()}); cout << "done" << endl; - return result; + + return knownBlobIds.blockIds(); } std::vector _getKnownBlockIds(const path& basedir, const CryConfigLoader::ConfigLoadResult& config, LocalStateDir& localStateDir) { @@ -109,31 +113,29 @@ std::vector _getKnownBlockIds(const path& basedir, const CryConfigLoade auto blockStore = makeBlockStore(basedir, config, localStateDir); auto nodeStore = make_unique_ref(std::move(blockStore), config.configFile->config()->BlocksizeBytes()); - std::vector result; + AccumulateBlockIds knownBlockIds; const uint32_t numNodes = nodeStore->numNodes(); - result.reserve(numNodes); + knownBlockIds.reserve(numNodes); uint32_t i = 0; cout << "Listing all blocks used by these file system entities..." << endl; for (const auto& blobId : knownBlobIds) { - _forEachBlockInBlob(nodeStore.get(), blobId, [&result, &i, numNodes] (const BlockId& blockId) { - cout << "\r" << (++i) << "/" << numNodes << flush; - result.push_back(blockId); + forEachReachableBlockInBlob(nodeStore.get(), blobId, { + ProgressBar(numNodes).callback(), + knownBlockIds.callback() }); } std::cout << "...done" << endl; - return result; + return knownBlockIds.blockIds(); } set _getAllBlockIds(const path& basedir, const CryConfigLoader::ConfigLoadResult& config, LocalStateDir& localStateDir) { - auto blockStore= makeBlockStore(basedir, config, localStateDir); - set result; - blockStore->forEachBlock([&result] (const BlockId& blockId) { - result.insert(blockId); - }); - return result; + auto blockStore = makeBlockStore(basedir, config, localStateDir); + AccumulateBlockIds allBlockIds; + allBlockIds.reserve(blockStore->numBlocks()); + forEachBlock(blockStore.get(), {allBlockIds.callback()}); + return set(allBlockIds.blockIds().begin(), allBlockIds.blockIds().end()); } - int main(int argc, char* argv[]) { if (argc != 2) { std::cerr << "Usage: cryfs-stats [basedir]" << std::endl; @@ -167,7 +169,7 @@ int main(int argc, char* argv[]) { const auto& config_ = config->configFile->config(); std::cout << "Loading filesystem of version " << config_->Version() << std::endl; #ifndef CRYFS_NO_COMPATIBILITY - const bool is_correct_format = config_->Version() == CryConfig::FilesystemFormatVersion && !config_->HasParentPointers() && !config_->HasVersionNumbers(); + const bool is_correct_format = config_->Version() == CryConfig::FilesystemFormatVersion && config_->HasParentPointers() && config_->HasVersionNumbers(); #else const bool is_correct_format = config_->Version() == CryConfig::FilesystemFormatVersion; #endif diff --git a/src/stats/traversal.cpp b/src/stats/traversal.cpp new file mode 100644 index 00000000..a33dc363 --- /dev/null +++ b/src/stats/traversal.cpp @@ -0,0 +1,65 @@ +#include "traversal.h" + +#include + +using blockstore::BlockId; +using blockstore::BlockStore; +using cryfs::fsblobstore::FsBlobStore; +using cryfs::fsblobstore::DirBlob; +using blobstore::onblocks::datanodestore::DataNodeStore; +using blobstore::onblocks::datanodestore::DataInnerNode; +using cpputils::dynamic_pointer_move; + +using std::vector; +using std::function; +using boost::none; + +namespace cryfs_stats { + +void forEachBlock(BlockStore* blockStore, const vector>& callbacks) { + blockStore->forEachBlock([&callbacks] (const BlockId& blockId) { + for(const auto& callback : callbacks) { + callback(blockId); + } + }); +} + +void forEachReachableBlob(FsBlobStore* blobStore, const BlockId& rootId, const vector>& callbacks) { + for (const auto& callback : callbacks) { + callback(rootId); + } + + auto rootBlob = blobStore->load(rootId); + ASSERT(rootBlob != none, "Blob not found but referenced from directory entry"); + + auto rootDir = dynamic_pointer_move(*rootBlob); + if (rootDir != none) { + vector children; + children.reserve((*rootDir)->NumChildren()); + (*rootDir)->AppendChildrenTo(&children); + + for (const auto& child : children) { + auto childEntry = (*rootDir)->GetChild(child.name); + ASSERT(childEntry != none, "We just got this from the entry list, it must exist."); + auto childId = childEntry->blockId(); + forEachReachableBlob(blobStore, childId, callbacks); + } + } +} + +void forEachReachableBlockInBlob(DataNodeStore* nodeStore, const BlockId& rootId, const vector>& callbacks) { + for (const auto& callback : callbacks) { + callback(rootId); + } + + auto node = nodeStore->load(rootId); + auto innerNode = dynamic_pointer_move(*node); + if (innerNode != none) { + for (uint32_t childIndex = 0; childIndex < (*innerNode)->numChildren(); ++childIndex) { + auto childId = (*innerNode)->readChild(childIndex).blockId(); + forEachReachableBlockInBlob(nodeStore, childId, callbacks); + } + } +} + +} diff --git a/src/stats/traversal.h b/src/stats/traversal.h new file mode 100644 index 00000000..082a2d4a --- /dev/null +++ b/src/stats/traversal.h @@ -0,0 +1,24 @@ +#pragma once +#ifndef CRYFS_STATS_TRAVERSAL_H +#define CRYFS_STATS_TRAVERSAL_H + +#include +#include +#include +#include +#include + +namespace cryfs_stats { + + // Call the callbacks on each existing block, whether it is connected or orphaned + void forEachBlock(blockstore::BlockStore* blockStore, const std::vector>& callbacks); + + // Call the callbacks on each existing blob that is reachable from the root blob, i.e. not orphaned + void forEachReachableBlob(cryfs::fsblobstore::FsBlobStore* blobStore, const blockstore::BlockId& rootId, const std::vector>& callbacks); + + // Call the callbacks on each block that is reachable from the given blob root, i.e. belongs to this blob. + void forEachReachableBlockInBlob(blobstore::onblocks::datanodestore::DataNodeStore* nodeStore, const blockstore::BlockId& rootId, const std::vector>& callbacks); + +} + +#endif