From 7a687575993f36f8051e9c29b92badf93d41b6cb Mon Sep 17 00:00:00 2001 From: Sebastian Messmer Date: Wed, 13 Jul 2016 07:08:53 +0200 Subject: [PATCH] Remove unneeded subtrees when shrinking tree --- .../onblocks/datanodestore/DataInnerNode.cpp | 11 +--------- .../onblocks/datanodestore/DataInnerNode.h | 1 - .../onblocks/datanodestore/DataNodeStore.cpp | 14 +++++++++++++ .../onblocks/datanodestore/DataNodeStore.h | 1 + .../onblocks/datatreestore/DataTree.cpp | 11 ++++++++-- .../onblocks/datatreestore/DataTreeStore.cpp | 1 + .../DataTreeTest_ResizeNumBytes.cpp | 20 +++++++++++++++++++ 7 files changed, 46 insertions(+), 13 deletions(-) diff --git a/src/blobstore/implementations/onblocks/datanodestore/DataInnerNode.cpp b/src/blobstore/implementations/onblocks/datanodestore/DataInnerNode.cpp index 81e0d431..c8ea3026 100644 --- a/src/blobstore/implementations/onblocks/datanodestore/DataInnerNode.cpp +++ b/src/blobstore/implementations/onblocks/datanodestore/DataInnerNode.cpp @@ -77,18 +77,9 @@ void DataInnerNode::addChild(const DataNode &child) { LastChild()->setKey(child.key()); } -void DataInnerNode::reduceNumChildren(uint32_t newNumChildren) { - ASSERT(node().Size() >= newNumChildren, "New num children given to reduceNumChildren() is larger than old num children."); - if (node().Size() != newNumChildren) { - for (auto entry = ChildrenBegin() + newNumChildren; entry != ChildrenEnd(); ++entry) { - entry->setKey(Key::Null()); - } - node().setSize(newNumChildren); - } -} - void DataInnerNode::removeLastChild() { ASSERT(node().Size() > 1, "There is no child to remove"); + LastChild()->setKey(Key::Null()); node().setSize(node().Size()-1); } diff --git a/src/blobstore/implementations/onblocks/datanodestore/DataInnerNode.h b/src/blobstore/implementations/onblocks/datanodestore/DataInnerNode.h index 462123a0..967d6746 100644 --- a/src/blobstore/implementations/onblocks/datanodestore/DataInnerNode.h +++ b/src/blobstore/implementations/onblocks/datanodestore/DataInnerNode.h @@ -26,7 +26,6 @@ public: uint32_t numChildren() const; void addChild(const DataNode &child_key); - void reduceNumChildren(uint32_t newNumChildren); void removeLastChild(); diff --git a/src/blobstore/implementations/onblocks/datanodestore/DataNodeStore.cpp b/src/blobstore/implementations/onblocks/datanodestore/DataNodeStore.cpp index f89907a1..11ab6e6a 100644 --- a/src/blobstore/implementations/onblocks/datanodestore/DataNodeStore.cpp +++ b/src/blobstore/implementations/onblocks/datanodestore/DataNodeStore.cpp @@ -88,6 +88,20 @@ void DataNodeStore::remove(unique_ref node) { _blockstore->remove(std::move(block)); } + +void DataNodeStore::removeSubtree(unique_ref node) { + //TODO Make this faster by not loading the leaves but just deleting them. Can be recognized, because of the depth of their parents. + DataInnerNode *inner = dynamic_cast(node.get()); + if (inner != nullptr) { + for (uint32_t i = 0; i < inner->numChildren(); ++i) { + auto child = load(inner->getChild(i)->key()); + ASSERT(child != none, "Couldn't load child node"); + removeSubtree(std::move(*child)); + } + } + remove(std::move(node)); +} + uint64_t DataNodeStore::numNodes() const { return _blockstore->numBlocks(); } diff --git a/src/blobstore/implementations/onblocks/datanodestore/DataNodeStore.h b/src/blobstore/implementations/onblocks/datanodestore/DataNodeStore.h index 1f60afae..36acf56c 100644 --- a/src/blobstore/implementations/onblocks/datanodestore/DataNodeStore.h +++ b/src/blobstore/implementations/onblocks/datanodestore/DataNodeStore.h @@ -38,6 +38,7 @@ public: cpputils::unique_ref overwriteNodeWith(cpputils::unique_ref target, const DataNode &source); void remove(cpputils::unique_ref node); + void removeSubtree(cpputils::unique_ref node); //TODO Test blocksizeBytes/numBlocks/estimateSpaceForNumBlocksLeft uint64_t virtualBlocksizeBytes() const; diff --git a/src/blobstore/implementations/onblocks/datatreestore/DataTree.cpp b/src/blobstore/implementations/onblocks/datatreestore/DataTree.cpp index 14920677..a6e9e6ce 100644 --- a/src/blobstore/implementations/onblocks/datatreestore/DataTree.cpp +++ b/src/blobstore/implementations/onblocks/datatreestore/DataTree.cpp @@ -168,14 +168,21 @@ void DataTree::resizeNumBytes(uint64_t newNumBytes) { // This is only called, if the new last leaf was not existing yet return Data(newLastLeafSize).FillWithZeroes(); }; - auto onBacktrackFromSubtree = [newNumLeaves, maxChildrenPerInnerNode] (DataInnerNode* node) { + auto onBacktrackFromSubtree = [this, newNumLeaves, maxChildrenPerInnerNode] (DataInnerNode* node) { // This is only called for the right border nodes of the new tree. // When growing size, the following is a no-op. When shrinking, we're deleting the children that aren't needed anymore. uint32_t maxLeavesPerChild = utils::intPow((uint64_t)maxChildrenPerInnerNode, ((uint64_t)node->depth()-1)); uint32_t neededNodesOnChildLevel = utils::ceilDivision(newNumLeaves, maxLeavesPerChild); uint32_t neededSiblings = utils::ceilDivision(neededNodesOnChildLevel, maxChildrenPerInnerNode); uint32_t neededChildrenForRightBorderNode = neededNodesOnChildLevel - (neededSiblings-1) * maxChildrenPerInnerNode; - node->reduceNumChildren(neededChildrenForRightBorderNode); + ASSERT(neededChildrenForRightBorderNode <= node->numChildren(), "Node has too few children"); + // All children to the right of the new right-border-node are removed including their subtree. + while(node->numChildren() > neededChildrenForRightBorderNode) { + // TODO removeSubtree() should get the key, I shouldn't load the block here. + // TODO removeSubtree() needs perf optimization: Don't load leaves. + _nodeStore->removeSubtree(_nodeStore->load(node->LastChild()->key()).value()); + node->removeLastChild(); + } }; _traverseLeaves(newNumLeaves - 1, newNumLeaves, onExistingLeaf, onCreateLeaf, onBacktrackFromSubtree); diff --git a/src/blobstore/implementations/onblocks/datatreestore/DataTreeStore.cpp b/src/blobstore/implementations/onblocks/datatreestore/DataTreeStore.cpp index 37e9617e..f5581816 100644 --- a/src/blobstore/implementations/onblocks/datatreestore/DataTreeStore.cpp +++ b/src/blobstore/implementations/onblocks/datatreestore/DataTreeStore.cpp @@ -37,6 +37,7 @@ unique_ref DataTreeStore::createNewTree() { void DataTreeStore::remove(unique_ref tree) { // Remove all nodes except for the root, which will be a leaf. tree->resizeNumBytes(0); + // Then remove the root node _nodeStore->remove(tree->releaseRootNode()); } diff --git a/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeNumBytes.cpp b/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeNumBytes.cpp index 7d58468a..d3f86c0d 100644 --- a/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeNumBytes.cpp +++ b/test/blobstore/implementations/onblocks/datatreestore/DataTreeTest_ResizeNumBytes.cpp @@ -223,6 +223,19 @@ TEST_P(DataTreeTest_ResizeNumBytes_P, DataStaysIntact) { } } +TEST_P(DataTreeTest_ResizeNumBytes_P, UnneededBlocksGetDeletedWhenShrinking) { + tree->resizeNumBytes(newSize); + tree->flush(); + + uint64_t expectedNumNodes = 1; // 1 for the root node + uint64_t nodesOnCurrentLevel = newNumberOfLeaves; + while (nodesOnCurrentLevel > 1) { + expectedNumNodes += nodesOnCurrentLevel; + nodesOnCurrentLevel = ceilDivision(nodesOnCurrentLevel, nodeStore->layout().maxChildrenPerInnerNode()); + } + EXPECT_EQ(expectedNumNodes, nodeStore->numNodes()); +} + //Resize to zero is not caught in the parametrized test above, in the following, we test it separately. TEST_F(DataTreeTest_ResizeNumBytes, ResizeToZero_NumBytesIsCorrect) { @@ -241,3 +254,10 @@ TEST_F(DataTreeTest_ResizeNumBytes, ResizeToZero_KeyDoesntChange) { tree->flush(); EXPECT_EQ(key, tree->key()); } + +TEST_F(DataTreeTest_ResizeNumBytes, ResizeToZero_UnneededBlocksGetDeletedWhenShrinking) { + auto tree = CreateThreeLevelTreeWithThreeChildrenAndLastLeafSize(10u); + tree->resizeNumBytes(0); + tree->flush(); + EXPECT_EQ(1u, nodeStore->numNodes()); +}