From e85019e95ba9d9b2aa43d33af21f332654cbaab8 Mon Sep 17 00:00:00 2001 From: Sebastian Messmer Date: Tue, 5 Jul 2016 23:56:44 -0700 Subject: [PATCH] Cache value of DataTree.numLeaves(). This should make read()/write() faster. --- .../onblocks/datatreestore/DataTree.cpp | 34 ++++++++++++++----- .../onblocks/datatreestore/DataTree.h | 7 +++- .../DataTreeTest_ResizeByTraversing.cpp | 10 +++++- .../DataTreeTest_ResizeNumBytes.cpp | 9 +++++ 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/blobstore/implementations/onblocks/datatreestore/DataTree.cpp b/src/blobstore/implementations/onblocks/datatreestore/DataTree.cpp index e74e4edd..c32a3536 100644 --- a/src/blobstore/implementations/onblocks/datatreestore/DataTree.cpp +++ b/src/blobstore/implementations/onblocks/datatreestore/DataTree.cpp @@ -38,7 +38,7 @@ namespace onblocks { namespace datatreestore { DataTree::DataTree(DataNodeStore *nodeStore, unique_ref rootNode) - : _mutex(), _nodeStore(nodeStore), _rootNode(std::move(rootNode)) { + : _mutex(), _nodeStore(nodeStore), _rootNode(std::move(rootNode)), _numLeavesCache(none) { } DataTree::~DataTree() { @@ -138,11 +138,23 @@ unique_ref DataTree::releaseRootNode() { //TODO Test numLeaves(), for example also two configurations with same number of bytes but different number of leaves (last leaf has 0 bytes) uint32_t DataTree::numLeaves() const { - shared_lock lock(_mutex); - return _numLeaves(*_rootNode); + shared_lock lock(_mutex); + return _numLeaves(); } -uint32_t DataTree::_numLeaves(const DataNode &node) const { +uint32_t DataTree::_numLeaves() const { + if (_numLeavesCache == none) { + _numLeavesCache = _computeNumLeaves(*_rootNode); + } + return *_numLeavesCache; +} + +uint32_t DataTree::_forceComputeNumLeaves() const { + _numLeavesCache = _computeNumLeaves(*_rootNode); + return *_numLeavesCache; +} + +uint32_t DataTree::_computeNumLeaves(const DataNode &node) const { const DataLeafNode *leaf = dynamic_cast(&node); if (leaf != nullptr) { return 1; @@ -152,7 +164,7 @@ uint32_t DataTree::_numLeaves(const DataNode &node) const { uint64_t numLeavesInLeftChildren = (inner.numChildren()-1) * leavesPerFullChild(inner); auto lastChild = _nodeStore->load(inner.LastChild()->key()); ASSERT(lastChild != none, "Couldn't load last child"); - uint64_t numLeavesInRightChild = _numLeaves(**lastChild); + uint64_t numLeavesInRightChild = _computeNumLeaves(**lastChild); return numLeavesInLeftChildren + numLeavesInRightChild; } @@ -168,7 +180,7 @@ void DataTree::traverseLeaves(uint32_t beginIndex, uint32_t endIndex, functionlayout().maxChildrenPerInnerNode(), (uint64_t)endIndex); - uint32_t numLeaves = this->_numLeaves(*_rootNode); // TODO Querying the size causes a tree traversal down to the leaves. Possible without querying the size? + uint32_t numLeaves = this->_numLeaves(); // TODO Querying the size could cause a tree traversal down to the leaves. Possible without querying the size? If yes, we probably don't need _numLeavesCache anymore, because its only meaning is to keep the value when a lot of parallel write()/read() syscalls happen. if (_rootNode->depth() < neededTreeDepth) { //TODO Test cases that actually increase it here by 0 level / 1 level / more than 1 level increaseTreeDepth(neededTreeDepth - _rootNode->depth()); @@ -177,7 +189,7 @@ void DataTree::traverseLeaves(uint32_t beginIndex, uint32_t endIndex, function= beginIndex // There is a gap between the current size and the begin of the traversal - return _traverseLeaves(_rootNode.get(), 0, numLeaves-1, endIndex, [beginIndex, numLeaves, &func, this](DataLeafNode* node, uint32_t index) { + _traverseLeaves(_rootNode.get(), 0, numLeaves-1, endIndex, [beginIndex, numLeaves, &func, this](DataLeafNode* node, uint32_t index) { if (index >= beginIndex) { func(node, index); } else if (index == numLeaves - 1) { @@ -185,15 +197,19 @@ void DataTree::traverseLeaves(uint32_t beginIndex, uint32_t endIndex, functionresize(_nodeStore->layout().maxBytesPerLeaf()); } }); + ASSERT(endIndex >= _numLeavesCache.value(), "We should be outside of the valid region, i.e. outside of the old size"); + _numLeavesCache = endIndex; } else if (numLeaves < endIndex) { // We are starting traversal in the valid region, but traverse until after it (we grow new leaves) - return _traverseLeaves(_rootNode.get(), 0, beginIndex, endIndex, [numLeaves, &func, this] (DataLeafNode *node, uint32_t index) { + _traverseLeaves(_rootNode.get(), 0, beginIndex, endIndex, [numLeaves, &func, this] (DataLeafNode *node, uint32_t index) { if (index == numLeaves - 1) { // It is the old last leaf - resize it to maximum node->resize(_nodeStore->layout().maxBytesPerLeaf()); } func(node, index); }); + ASSERT(endIndex >= _numLeavesCache.value(), "We should be outside of the valid region, i.e. outside of the old size"); + _numLeavesCache = endIndex; } else { //We are traversing entirely inside the valid region exclusiveLock.reset(); // we can allow parallel traverses, if all are entirely inside the valid region. @@ -299,6 +315,8 @@ void DataTree::resizeNumBytes(uint64_t newNumBytes) { } uint32_t newLastLeafSize = newNumBytes - (newNumLeaves-1)*_nodeStore->layout().maxBytesPerLeaf(); LastLeaf(_rootNode.get())->resize(newLastLeafSize); + + _numLeavesCache = newNumLeaves; } ASSERT(newNumBytes == _numStoredBytes(), "We resized to the wrong number of bytes ("+std::to_string(numStoredBytes())+" instead of "+std::to_string(newNumBytes)+")"); } diff --git a/src/blobstore/implementations/onblocks/datatreestore/DataTree.h b/src/blobstore/implementations/onblocks/datatreestore/DataTree.h index ae7e6c43..8fe74b62 100644 --- a/src/blobstore/implementations/onblocks/datatreestore/DataTree.h +++ b/src/blobstore/implementations/onblocks/datatreestore/DataTree.h @@ -36,12 +36,16 @@ public: uint32_t numLeaves() const; uint64_t numStoredBytes() const; + // only used by test cases + uint32_t _forceComputeNumLeaves() const; + void flush() const; private: mutable boost::shared_mutex _mutex; datanodestore::DataNodeStore *_nodeStore; cpputils::unique_ref _rootNode; + mutable boost::optional _numLeavesCache; cpputils::unique_ref addDataLeaf(); void removeLastDataLeaf(); @@ -62,7 +66,8 @@ private: uint32_t leavesPerFullChild(const datanodestore::DataInnerNode &root) const; uint64_t _numStoredBytes() const; uint64_t _numStoredBytes(const datanodestore::DataNode &root) const; - uint32_t _numLeaves(const datanodestore::DataNode &node) const; + uint32_t _numLeaves() const; + uint32_t _computeNumLeaves(const datanodestore::DataNode &node) const; cpputils::optional_ownership_ptr LastLeaf(datanodestore::DataNode *root); cpputils::unique_ref LastLeaf(cpputils::unique_ref root); datanodestore::DataInnerNode* increaseTreeDepth(unsigned int levels); diff --git a/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeByTraversing.cpp b/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeByTraversing.cpp index 18c98849..59c3c57c 100644 --- a/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeByTraversing.cpp +++ b/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeByTraversing.cpp @@ -168,13 +168,21 @@ TEST_P(DataTreeTest_ResizeByTraversing_P, StructureIsValid) { EXPECT_IS_LEFTMAXDATA_TREE(tree->key()); } -TEST_P(DataTreeTest_ResizeByTraversing_P, NumLeavesIsCorrect) { +TEST_P(DataTreeTest_ResizeByTraversing_P, NumLeavesIsCorrect_FromCache) { + tree->numLeaves(); // fill cache with old value GrowTree(tree.get(), numberOfLeavesToAdd); // tree->numLeaves() only goes down the right border nodes and expects the tree to be a left max data tree. // This is what the StructureIsValid test case is for. EXPECT_EQ(newNumberOfLeaves, tree->numLeaves()); } +TEST_P(DataTreeTest_ResizeByTraversing_P, NumLeavesIsCorrect) { + GrowTree(tree.get(), numberOfLeavesToAdd); + // tree->_forceComputeNumLeaves() only goes down the right border nodes and expects the tree to be a left max data tree. + // This is what the StructureIsValid test case is for. + EXPECT_EQ(newNumberOfLeaves, tree->_forceComputeNumLeaves()); +} + TEST_P(DataTreeTest_ResizeByTraversing_P, DepthFlagsAreCorrect) { GrowTree(tree.get(), numberOfLeavesToAdd); uint32_t depth = ceil(log(newNumberOfLeaves)/log(DataTreeTest_ResizeByTraversing::LAYOUT.maxChildrenPerInnerNode())); diff --git a/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeNumBytes.cpp b/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeNumBytes.cpp index 520752a9..7d58468a 100644 --- a/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeNumBytes.cpp +++ b/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeNumBytes.cpp @@ -177,6 +177,15 @@ TEST_P(DataTreeTest_ResizeNumBytes_P, NumBytesIsCorrect) { } TEST_P(DataTreeTest_ResizeNumBytes_P, NumLeavesIsCorrect) { + tree->resizeNumBytes(newSize); + tree->flush(); + // tree->numLeaves() only goes down the right border nodes and expects the tree to be a left max data tree. + // This is what the StructureIsValid test case is for. + EXPECT_EQ(newNumberOfLeaves, tree->_forceComputeNumLeaves()); +} + +TEST_P(DataTreeTest_ResizeNumBytes_P, NumLeavesIsCorrect_FromCache) { + tree->numLeaves(); // fill cache with old value tree->resizeNumBytes(newSize); tree->flush(); // tree->numLeaves() only goes down the right border nodes and expects the tree to be a left max data tree.