Refactor traversl in cryfs-stats

This commit is contained in:
Sebastian Messmer 2019-05-26 02:34:26 -07:00
parent fd52381ecd
commit 0aa0b64712
4 changed files with 143 additions and 51 deletions

View File

@ -2,6 +2,7 @@ project (stats)
set(SOURCES
main.cpp
traversal.cpp
)
add_executable(${PROJECT_NAME} ${SOURCES})

View File

@ -15,6 +15,7 @@
#include <cryfs/impl/filesystem/CryDevice.h>
#include <cpp-utils/io/IOStreamConsole.h>
#include <cpp-utils/system/homedir.h>
#include "traversal.h"
#include <set>
@ -31,6 +32,8 @@ using namespace blobstore::onblocks;
using namespace blobstore::onblocks::datanodestore;
using namespace cryfs::fsblobstore;
using namespace cryfs_stats;
void printNode(unique_ref<DataNode> node) {
std::cout << "BlockId: " << node->blockId().ToString() << ", Depth: " << static_cast<int>(node->depth()) << " ";
auto innerNode = dynamic_pointer_move<DataInnerNode>(node);
@ -45,39 +48,6 @@ void printNode(unique_ref<DataNode> node) {
}
}
void _forEachBlob(FsBlobStore* blobStore, const BlockId& rootId, std::function<void (const BlockId& blobId)> callback) {
callback(rootId);
auto rootBlob = blobStore->load(rootId);
ASSERT(rootBlob != boost::none, "Blob not found but referenced from directory entry");
auto rootDir = dynamic_pointer_move<DirBlob>(*rootBlob);
if (rootDir != boost::none) {
std::vector<fspp::Dir::Entry> children;
children.reserve((*rootDir)->NumChildren());
(*rootDir)->AppendChildrenTo(&children);
for (const auto& child : children) {
auto childEntry = (*rootDir)->GetChild(child.name);
ASSERT(childEntry != boost::none, "We just got this from the entry list, it must exist.");
auto childId = childEntry->blockId();
_forEachBlob(blobStore, childId, callback);
}
}
}
void _forEachBlockInBlob(DataNodeStore* nodeStore, const BlockId& rootId, std::function<void (const BlockId& blockId)> callback) {
callback(rootId);
auto node = nodeStore->load(rootId);
auto innerNode = dynamic_pointer_move<DataInnerNode>(*node);
if (innerNode != boost::none) {
for (uint32_t childIndex = 0; childIndex < (*innerNode)->numChildren(); ++childIndex) {
auto childId = (*innerNode)->readChild(childIndex).blockId();
_forEachBlockInBlob(nodeStore, childId, callback);
}
}
}
unique_ref<BlockStore> makeBlockStore(const path& basedir, const CryConfigLoader::ConfigLoadResult& config, LocalStateDir& localStateDir) {
auto onDiskBlockStore = make_unique_ref<OnDiskBlockStore2>(basedir);
auto encryptedBlockStore = CryCiphers::find(config.configFile->config()->Cipher()).createEncryptedBlockstore(std::move(onDiskBlockStore), config.configFile->config()->EncryptionKey());
@ -90,18 +60,52 @@ unique_ref<BlockStore> makeBlockStore(const path& basedir, const CryConfigLoader
return make_unique_ref<LowToHighLevelBlockStore>(std::move(integrityBlockStore));
}
struct AccumulateBlockIds final {
public:
auto callback() {
return [this] (const BlockId& id) {
_blockIds.push_back(id);
};
}
const std::vector<BlockId>& blockIds() const {
return _blockIds;
}
void reserve(size_t size) {
_blockIds.reserve(size);
}
private:
std::vector<BlockId> _blockIds;
};
class ProgressBar final {
public:
ProgressBar(size_t numBlocks): _currentBlock(0), _numBlocks(numBlocks) {}
auto callback() {
return [this] (const BlockId&) {
cout << "\r" << (++_currentBlock) << "/" << _numBlocks << flush;
};
}
private:
size_t _currentBlock;
size_t _numBlocks;
};
std::vector<BlockId> _getKnownBlobIds(const path& basedir, const CryConfigLoader::ConfigLoadResult& config, LocalStateDir& localStateDir) {
auto blockStore = makeBlockStore(basedir, config, localStateDir);
auto fsBlobStore = make_unique_ref<FsBlobStore>(make_unique_ref<BlobStoreOnBlocks>(std::move(blockStore), config.configFile->config()->BlocksizeBytes()));
std::vector<BlockId> result;
AccumulateBlockIds knownBlobIds;
cout << "Listing all file system entities (i.e. blobs)..." << flush;
auto rootId = BlockId::FromString(config.configFile->config()->RootBlob());
_forEachBlob(fsBlobStore.get(), rootId, [&result] (const BlockId& blockId) {
result.push_back(blockId);
});
forEachReachableBlob(fsBlobStore.get(), rootId, {knownBlobIds.callback()});
cout << "done" << endl;
return result;
return knownBlobIds.blockIds();
}
std::vector<BlockId> _getKnownBlockIds(const path& basedir, const CryConfigLoader::ConfigLoadResult& config, LocalStateDir& localStateDir) {
@ -109,31 +113,29 @@ std::vector<BlockId> _getKnownBlockIds(const path& basedir, const CryConfigLoade
auto blockStore = makeBlockStore(basedir, config, localStateDir);
auto nodeStore = make_unique_ref<DataNodeStore>(std::move(blockStore), config.configFile->config()->BlocksizeBytes());
std::vector<BlockId> result;
AccumulateBlockIds knownBlockIds;
const uint32_t numNodes = nodeStore->numNodes();
result.reserve(numNodes);
knownBlockIds.reserve(numNodes);
uint32_t i = 0;
cout << "Listing all blocks used by these file system entities..." << endl;
for (const auto& blobId : knownBlobIds) {
_forEachBlockInBlob(nodeStore.get(), blobId, [&result, &i, numNodes] (const BlockId& blockId) {
cout << "\r" << (++i) << "/" << numNodes << flush;
result.push_back(blockId);
forEachReachableBlockInBlob(nodeStore.get(), blobId, {
ProgressBar(numNodes).callback(),
knownBlockIds.callback()
});
}
std::cout << "...done" << endl;
return result;
return knownBlockIds.blockIds();
}
set<BlockId> _getAllBlockIds(const path& basedir, const CryConfigLoader::ConfigLoadResult& config, LocalStateDir& localStateDir) {
auto blockStore= makeBlockStore(basedir, config, localStateDir);
set<BlockId> result;
blockStore->forEachBlock([&result] (const BlockId& blockId) {
result.insert(blockId);
});
return result;
auto blockStore = makeBlockStore(basedir, config, localStateDir);
AccumulateBlockIds allBlockIds;
allBlockIds.reserve(blockStore->numBlocks());
forEachBlock(blockStore.get(), {allBlockIds.callback()});
return set<BlockId>(allBlockIds.blockIds().begin(), allBlockIds.blockIds().end());
}
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "Usage: cryfs-stats [basedir]" << std::endl;
@ -167,7 +169,7 @@ int main(int argc, char* argv[]) {
const auto& config_ = config->configFile->config();
std::cout << "Loading filesystem of version " << config_->Version() << std::endl;
#ifndef CRYFS_NO_COMPATIBILITY
const bool is_correct_format = config_->Version() == CryConfig::FilesystemFormatVersion && !config_->HasParentPointers() && !config_->HasVersionNumbers();
const bool is_correct_format = config_->Version() == CryConfig::FilesystemFormatVersion && config_->HasParentPointers() && config_->HasVersionNumbers();
#else
const bool is_correct_format = config_->Version() == CryConfig::FilesystemFormatVersion;
#endif

65
src/stats/traversal.cpp Normal file
View File

@ -0,0 +1,65 @@
#include "traversal.h"
#include <blobstore/implementations/onblocks/datanodestore/DataInnerNode.h>
using blockstore::BlockId;
using blockstore::BlockStore;
using cryfs::fsblobstore::FsBlobStore;
using cryfs::fsblobstore::DirBlob;
using blobstore::onblocks::datanodestore::DataNodeStore;
using blobstore::onblocks::datanodestore::DataInnerNode;
using cpputils::dynamic_pointer_move;
using std::vector;
using std::function;
using boost::none;
namespace cryfs_stats {
void forEachBlock(BlockStore* blockStore, const vector<function<void (const BlockId& blobId)>>& callbacks) {
blockStore->forEachBlock([&callbacks] (const BlockId& blockId) {
for(const auto& callback : callbacks) {
callback(blockId);
}
});
}
void forEachReachableBlob(FsBlobStore* blobStore, const BlockId& rootId, const vector<function<void (const BlockId& blobId)>>& callbacks) {
for (const auto& callback : callbacks) {
callback(rootId);
}
auto rootBlob = blobStore->load(rootId);
ASSERT(rootBlob != none, "Blob not found but referenced from directory entry");
auto rootDir = dynamic_pointer_move<DirBlob>(*rootBlob);
if (rootDir != none) {
vector<fspp::Dir::Entry> children;
children.reserve((*rootDir)->NumChildren());
(*rootDir)->AppendChildrenTo(&children);
for (const auto& child : children) {
auto childEntry = (*rootDir)->GetChild(child.name);
ASSERT(childEntry != none, "We just got this from the entry list, it must exist.");
auto childId = childEntry->blockId();
forEachReachableBlob(blobStore, childId, callbacks);
}
}
}
void forEachReachableBlockInBlob(DataNodeStore* nodeStore, const BlockId& rootId, const vector<function<void (const BlockId& blockId)>>& callbacks) {
for (const auto& callback : callbacks) {
callback(rootId);
}
auto node = nodeStore->load(rootId);
auto innerNode = dynamic_pointer_move<DataInnerNode>(*node);
if (innerNode != none) {
for (uint32_t childIndex = 0; childIndex < (*innerNode)->numChildren(); ++childIndex) {
auto childId = (*innerNode)->readChild(childIndex).blockId();
forEachReachableBlockInBlob(nodeStore, childId, callbacks);
}
}
}
}

24
src/stats/traversal.h Normal file
View File

@ -0,0 +1,24 @@
#pragma once
#ifndef CRYFS_STATS_TRAVERSAL_H
#define CRYFS_STATS_TRAVERSAL_H
#include <vector>
#include <functional>
#include <blockstore/interface/Block.h>
#include <blobstore/implementations/onblocks/datanodestore/DataNodeStore.h>
#include <cryfs/impl/filesystem/fsblobstore/FsBlobStore.h>
namespace cryfs_stats {
// Call the callbacks on each existing block, whether it is connected or orphaned
void forEachBlock(blockstore::BlockStore* blockStore, const std::vector<std::function<void (const blockstore::BlockId& blobId)>>& callbacks);
// Call the callbacks on each existing blob that is reachable from the root blob, i.e. not orphaned
void forEachReachableBlob(cryfs::fsblobstore::FsBlobStore* blobStore, const blockstore::BlockId& rootId, const std::vector<std::function<void (const blockstore::BlockId& blobId)>>& callbacks);
// Call the callbacks on each block that is reachable from the given blob root, i.e. belongs to this blob.
void forEachReachableBlockInBlob(blobstore::onblocks::datanodestore::DataNodeStore* nodeStore, const blockstore::BlockId& rootId, const std::vector<std::function<void (const blockstore::BlockId& blockId)>>& callbacks);
}
#endif