Remove unneeded subtrees when shrinking tree

This commit is contained in:
Sebastian Messmer 2016-07-13 07:08:53 +02:00
parent cf38eb0eb3
commit 7a68757599
7 changed files with 46 additions and 13 deletions

View File

@ -77,18 +77,9 @@ void DataInnerNode::addChild(const DataNode &child) {
LastChild()->setKey(child.key());
}
void DataInnerNode::reduceNumChildren(uint32_t newNumChildren) {
ASSERT(node().Size() >= newNumChildren, "New num children given to reduceNumChildren() is larger than old num children.");
if (node().Size() != newNumChildren) {
for (auto entry = ChildrenBegin() + newNumChildren; entry != ChildrenEnd(); ++entry) {
entry->setKey(Key::Null());
}
node().setSize(newNumChildren);
}
}
void DataInnerNode::removeLastChild() {
ASSERT(node().Size() > 1, "There is no child to remove");
LastChild()->setKey(Key::Null());
node().setSize(node().Size()-1);
}

View File

@ -26,7 +26,6 @@ public:
uint32_t numChildren() const;
void addChild(const DataNode &child_key);
void reduceNumChildren(uint32_t newNumChildren);
void removeLastChild();

View File

@ -88,6 +88,20 @@ void DataNodeStore::remove(unique_ref<DataNode> node) {
_blockstore->remove(std::move(block));
}
void DataNodeStore::removeSubtree(unique_ref<DataNode> node) {
//TODO Make this faster by not loading the leaves but just deleting them. Can be recognized, because of the depth of their parents.
DataInnerNode *inner = dynamic_cast<DataInnerNode*>(node.get());
if (inner != nullptr) {
for (uint32_t i = 0; i < inner->numChildren(); ++i) {
auto child = load(inner->getChild(i)->key());
ASSERT(child != none, "Couldn't load child node");
removeSubtree(std::move(*child));
}
}
remove(std::move(node));
}
uint64_t DataNodeStore::numNodes() const {
return _blockstore->numBlocks();
}

View File

@ -38,6 +38,7 @@ public:
cpputils::unique_ref<DataNode> overwriteNodeWith(cpputils::unique_ref<DataNode> target, const DataNode &source);
void remove(cpputils::unique_ref<DataNode> node);
void removeSubtree(cpputils::unique_ref<DataNode> node);
//TODO Test blocksizeBytes/numBlocks/estimateSpaceForNumBlocksLeft
uint64_t virtualBlocksizeBytes() const;

View File

@ -168,14 +168,21 @@ void DataTree::resizeNumBytes(uint64_t newNumBytes) {
// This is only called, if the new last leaf was not existing yet
return Data(newLastLeafSize).FillWithZeroes();
};
auto onBacktrackFromSubtree = [newNumLeaves, maxChildrenPerInnerNode] (DataInnerNode* node) {
auto onBacktrackFromSubtree = [this, newNumLeaves, maxChildrenPerInnerNode] (DataInnerNode* node) {
// This is only called for the right border nodes of the new tree.
// When growing size, the following is a no-op. When shrinking, we're deleting the children that aren't needed anymore.
uint32_t maxLeavesPerChild = utils::intPow((uint64_t)maxChildrenPerInnerNode, ((uint64_t)node->depth()-1));
uint32_t neededNodesOnChildLevel = utils::ceilDivision(newNumLeaves, maxLeavesPerChild);
uint32_t neededSiblings = utils::ceilDivision(neededNodesOnChildLevel, maxChildrenPerInnerNode);
uint32_t neededChildrenForRightBorderNode = neededNodesOnChildLevel - (neededSiblings-1) * maxChildrenPerInnerNode;
node->reduceNumChildren(neededChildrenForRightBorderNode);
ASSERT(neededChildrenForRightBorderNode <= node->numChildren(), "Node has too few children");
// All children to the right of the new right-border-node are removed including their subtree.
while(node->numChildren() > neededChildrenForRightBorderNode) {
// TODO removeSubtree() should get the key, I shouldn't load the block here.
// TODO removeSubtree() needs perf optimization: Don't load leaves.
_nodeStore->removeSubtree(_nodeStore->load(node->LastChild()->key()).value());
node->removeLastChild();
}
};
_traverseLeaves(newNumLeaves - 1, newNumLeaves, onExistingLeaf, onCreateLeaf, onBacktrackFromSubtree);

View File

@ -37,6 +37,7 @@ unique_ref<DataTree> DataTreeStore::createNewTree() {
void DataTreeStore::remove(unique_ref<DataTree> tree) {
// Remove all nodes except for the root, which will be a leaf.
tree->resizeNumBytes(0);
// Then remove the root node
_nodeStore->remove(tree->releaseRootNode());
}

View File

@ -223,6 +223,19 @@ TEST_P(DataTreeTest_ResizeNumBytes_P, DataStaysIntact) {
}
}
TEST_P(DataTreeTest_ResizeNumBytes_P, UnneededBlocksGetDeletedWhenShrinking) {
tree->resizeNumBytes(newSize);
tree->flush();
uint64_t expectedNumNodes = 1; // 1 for the root node
uint64_t nodesOnCurrentLevel = newNumberOfLeaves;
while (nodesOnCurrentLevel > 1) {
expectedNumNodes += nodesOnCurrentLevel;
nodesOnCurrentLevel = ceilDivision(nodesOnCurrentLevel, nodeStore->layout().maxChildrenPerInnerNode());
}
EXPECT_EQ(expectedNumNodes, nodeStore->numNodes());
}
//Resize to zero is not caught in the parametrized test above, in the following, we test it separately.
TEST_F(DataTreeTest_ResizeNumBytes, ResizeToZero_NumBytesIsCorrect) {
@ -241,3 +254,10 @@ TEST_F(DataTreeTest_ResizeNumBytes, ResizeToZero_KeyDoesntChange) {
tree->flush();
EXPECT_EQ(key, tree->key());
}
TEST_F(DataTreeTest_ResizeNumBytes, ResizeToZero_UnneededBlocksGetDeletedWhenShrinking) {
auto tree = CreateThreeLevelTreeWithThreeChildrenAndLastLeafSize(10u);
tree->resizeNumBytes(0);
tree->flush();
EXPECT_EQ(1u, nodeStore->numNodes());
}