Cache value of DataTree.numLeaves(). This should make read()/write() faster.

This commit is contained in:
Sebastian Messmer 2016-07-05 23:56:44 -07:00
parent 2f8e8d8157
commit e85019e95b
4 changed files with 50 additions and 10 deletions

View File

@ -38,7 +38,7 @@ namespace onblocks {
namespace datatreestore {
DataTree::DataTree(DataNodeStore *nodeStore, unique_ref<DataNode> rootNode)
: _mutex(), _nodeStore(nodeStore), _rootNode(std::move(rootNode)) {
: _mutex(), _nodeStore(nodeStore), _rootNode(std::move(rootNode)), _numLeavesCache(none) {
}
DataTree::~DataTree() {
@ -138,11 +138,23 @@ unique_ref<DataNode> DataTree::releaseRootNode() {
//TODO Test numLeaves(), for example also two configurations with same number of bytes but different number of leaves (last leaf has 0 bytes)
uint32_t DataTree::numLeaves() const {
shared_lock<shared_mutex> lock(_mutex);
return _numLeaves(*_rootNode);
shared_lock<shared_mutex> lock(_mutex);
return _numLeaves();
}
uint32_t DataTree::_numLeaves(const DataNode &node) const {
uint32_t DataTree::_numLeaves() const {
if (_numLeavesCache == none) {
_numLeavesCache = _computeNumLeaves(*_rootNode);
}
return *_numLeavesCache;
}
uint32_t DataTree::_forceComputeNumLeaves() const {
_numLeavesCache = _computeNumLeaves(*_rootNode);
return *_numLeavesCache;
}
uint32_t DataTree::_computeNumLeaves(const DataNode &node) const {
const DataLeafNode *leaf = dynamic_cast<const DataLeafNode*>(&node);
if (leaf != nullptr) {
return 1;
@ -152,7 +164,7 @@ uint32_t DataTree::_numLeaves(const DataNode &node) const {
uint64_t numLeavesInLeftChildren = (inner.numChildren()-1) * leavesPerFullChild(inner);
auto lastChild = _nodeStore->load(inner.LastChild()->key());
ASSERT(lastChild != none, "Couldn't load last child");
uint64_t numLeavesInRightChild = _numLeaves(**lastChild);
uint64_t numLeavesInRightChild = _computeNumLeaves(**lastChild);
return numLeavesInLeftChildren + numLeavesInRightChild;
}
@ -168,7 +180,7 @@ void DataTree::traverseLeaves(uint32_t beginIndex, uint32_t endIndex, function<v
}
uint8_t neededTreeDepth = utils::ceilLog(_nodeStore->layout().maxChildrenPerInnerNode(), (uint64_t)endIndex);
uint32_t numLeaves = this->_numLeaves(*_rootNode); // TODO Querying the size causes a tree traversal down to the leaves. Possible without querying the size?
uint32_t numLeaves = this->_numLeaves(); // TODO Querying the size could cause a tree traversal down to the leaves. Possible without querying the size? If yes, we probably don't need _numLeavesCache anymore, because its only meaning is to keep the value when a lot of parallel write()/read() syscalls happen.
if (_rootNode->depth() < neededTreeDepth) {
//TODO Test cases that actually increase it here by 0 level / 1 level / more than 1 level
increaseTreeDepth(neededTreeDepth - _rootNode->depth());
@ -177,7 +189,7 @@ void DataTree::traverseLeaves(uint32_t beginIndex, uint32_t endIndex, function<v
if (numLeaves <= beginIndex) {
//TODO Test cases with numLeaves < / >= beginIndex
// There is a gap between the current size and the begin of the traversal
return _traverseLeaves(_rootNode.get(), 0, numLeaves-1, endIndex, [beginIndex, numLeaves, &func, this](DataLeafNode* node, uint32_t index) {
_traverseLeaves(_rootNode.get(), 0, numLeaves-1, endIndex, [beginIndex, numLeaves, &func, this](DataLeafNode* node, uint32_t index) {
if (index >= beginIndex) {
func(node, index);
} else if (index == numLeaves - 1) {
@ -185,15 +197,19 @@ void DataTree::traverseLeaves(uint32_t beginIndex, uint32_t endIndex, function<v
node->resize(_nodeStore->layout().maxBytesPerLeaf());
}
});
ASSERT(endIndex >= _numLeavesCache.value(), "We should be outside of the valid region, i.e. outside of the old size");
_numLeavesCache = endIndex;
} else if (numLeaves < endIndex) {
// We are starting traversal in the valid region, but traverse until after it (we grow new leaves)
return _traverseLeaves(_rootNode.get(), 0, beginIndex, endIndex, [numLeaves, &func, this] (DataLeafNode *node, uint32_t index) {
_traverseLeaves(_rootNode.get(), 0, beginIndex, endIndex, [numLeaves, &func, this] (DataLeafNode *node, uint32_t index) {
if (index == numLeaves - 1) {
// It is the old last leaf - resize it to maximum
node->resize(_nodeStore->layout().maxBytesPerLeaf());
}
func(node, index);
});
ASSERT(endIndex >= _numLeavesCache.value(), "We should be outside of the valid region, i.e. outside of the old size");
_numLeavesCache = endIndex;
} else {
//We are traversing entirely inside the valid region
exclusiveLock.reset(); // we can allow parallel traverses, if all are entirely inside the valid region.
@ -299,6 +315,8 @@ void DataTree::resizeNumBytes(uint64_t newNumBytes) {
}
uint32_t newLastLeafSize = newNumBytes - (newNumLeaves-1)*_nodeStore->layout().maxBytesPerLeaf();
LastLeaf(_rootNode.get())->resize(newLastLeafSize);
_numLeavesCache = newNumLeaves;
}
ASSERT(newNumBytes == _numStoredBytes(), "We resized to the wrong number of bytes ("+std::to_string(numStoredBytes())+" instead of "+std::to_string(newNumBytes)+")");
}

View File

@ -36,12 +36,16 @@ public:
uint32_t numLeaves() const;
uint64_t numStoredBytes() const;
// only used by test cases
uint32_t _forceComputeNumLeaves() const;
void flush() const;
private:
mutable boost::shared_mutex _mutex;
datanodestore::DataNodeStore *_nodeStore;
cpputils::unique_ref<datanodestore::DataNode> _rootNode;
mutable boost::optional<uint32_t> _numLeavesCache;
cpputils::unique_ref<datanodestore::DataLeafNode> addDataLeaf();
void removeLastDataLeaf();
@ -62,7 +66,8 @@ private:
uint32_t leavesPerFullChild(const datanodestore::DataInnerNode &root) const;
uint64_t _numStoredBytes() const;
uint64_t _numStoredBytes(const datanodestore::DataNode &root) const;
uint32_t _numLeaves(const datanodestore::DataNode &node) const;
uint32_t _numLeaves() const;
uint32_t _computeNumLeaves(const datanodestore::DataNode &node) const;
cpputils::optional_ownership_ptr<datanodestore::DataLeafNode> LastLeaf(datanodestore::DataNode *root);
cpputils::unique_ref<datanodestore::DataLeafNode> LastLeaf(cpputils::unique_ref<datanodestore::DataNode> root);
datanodestore::DataInnerNode* increaseTreeDepth(unsigned int levels);

View File

@ -168,13 +168,21 @@ TEST_P(DataTreeTest_ResizeByTraversing_P, StructureIsValid) {
EXPECT_IS_LEFTMAXDATA_TREE(tree->key());
}
TEST_P(DataTreeTest_ResizeByTraversing_P, NumLeavesIsCorrect) {
TEST_P(DataTreeTest_ResizeByTraversing_P, NumLeavesIsCorrect_FromCache) {
tree->numLeaves(); // fill cache with old value
GrowTree(tree.get(), numberOfLeavesToAdd);
// tree->numLeaves() only goes down the right border nodes and expects the tree to be a left max data tree.
// This is what the StructureIsValid test case is for.
EXPECT_EQ(newNumberOfLeaves, tree->numLeaves());
}
TEST_P(DataTreeTest_ResizeByTraversing_P, NumLeavesIsCorrect) {
GrowTree(tree.get(), numberOfLeavesToAdd);
// tree->_forceComputeNumLeaves() only goes down the right border nodes and expects the tree to be a left max data tree.
// This is what the StructureIsValid test case is for.
EXPECT_EQ(newNumberOfLeaves, tree->_forceComputeNumLeaves());
}
TEST_P(DataTreeTest_ResizeByTraversing_P, DepthFlagsAreCorrect) {
GrowTree(tree.get(), numberOfLeavesToAdd);
uint32_t depth = ceil(log(newNumberOfLeaves)/log(DataTreeTest_ResizeByTraversing::LAYOUT.maxChildrenPerInnerNode()));

View File

@ -177,6 +177,15 @@ TEST_P(DataTreeTest_ResizeNumBytes_P, NumBytesIsCorrect) {
}
TEST_P(DataTreeTest_ResizeNumBytes_P, NumLeavesIsCorrect) {
tree->resizeNumBytes(newSize);
tree->flush();
// tree->numLeaves() only goes down the right border nodes and expects the tree to be a left max data tree.
// This is what the StructureIsValid test case is for.
EXPECT_EQ(newNumberOfLeaves, tree->_forceComputeNumLeaves());
}
TEST_P(DataTreeTest_ResizeNumBytes_P, NumLeavesIsCorrect_FromCache) {
tree->numLeaves(); // fill cache with old value
tree->resizeNumBytes(newSize);
tree->flush();
// tree->numLeaves() only goes down the right border nodes and expects the tree to be a left max data tree.