Improve traverseLeaves
This commit is contained in:
parent
0d860fa4f0
commit
dde89da556
@ -32,6 +32,7 @@ using cpputils::optional_ownership_ptr;
|
|||||||
using cpputils::WithOwnership;
|
using cpputils::WithOwnership;
|
||||||
using cpputils::WithoutOwnership;
|
using cpputils::WithoutOwnership;
|
||||||
using cpputils::unique_ref;
|
using cpputils::unique_ref;
|
||||||
|
using cpputils::Data;
|
||||||
|
|
||||||
namespace blobstore {
|
namespace blobstore {
|
||||||
namespace onblocks {
|
namespace onblocks {
|
||||||
@ -169,7 +170,7 @@ uint32_t DataTree::_computeNumLeaves(const DataNode &node) const {
|
|||||||
return numLeavesInLeftChildren + numLeavesInRightChild;
|
return numLeavesInLeftChildren + numLeavesInRightChild;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataTree::traverseLeaves(uint32_t beginIndex, uint32_t endIndex, function<void (DataLeafNode*, uint32_t)> func) {
|
void DataTree::traverseLeaves(uint32_t beginIndex, uint32_t endIndex, std::function<void (uint32_t index, datanodestore::DataLeafNode* leaf)> onExistingLeaf, std::function<cpputils::Data (uint32_t index)> onCreateLeaf) {
|
||||||
//TODO Can we traverse in parallel?
|
//TODO Can we traverse in parallel?
|
||||||
boost::upgrade_lock<shared_mutex> lock(_mutex); //TODO Rethink locking here. We probably need locking when the traverse resizes the blob. Otherwise, parallel traverse should be possible. We already allow it below by freeing the upgrade_lock, but we currently only allow it if ALL traverses are entirely inside the valid region. Can we allow more parallelity?
|
boost::upgrade_lock<shared_mutex> lock(_mutex); //TODO Rethink locking here. We probably need locking when the traverse resizes the blob. Otherwise, parallel traverse should be possible. We already allow it below by freeing the upgrade_lock, but we currently only allow it if ALL traverses are entirely inside the valid region. Can we allow more parallelity?
|
||||||
auto exclusiveLock = std::make_unique<boost::upgrade_to_unique_lock<shared_mutex>>(lock);
|
auto exclusiveLock = std::make_unique<boost::upgrade_to_unique_lock<shared_mutex>>(lock);
|
||||||
@ -189,40 +190,49 @@ void DataTree::traverseLeaves(uint32_t beginIndex, uint32_t endIndex, function<v
|
|||||||
if (numLeaves <= beginIndex) {
|
if (numLeaves <= beginIndex) {
|
||||||
//TODO Test cases with numLeaves < / >= beginIndex
|
//TODO Test cases with numLeaves < / >= beginIndex
|
||||||
// There is a gap between the current size and the begin of the traversal
|
// There is a gap between the current size and the begin of the traversal
|
||||||
_traverseLeaves(_rootNode.get(), 0, numLeaves-1, endIndex, [beginIndex, numLeaves, &func, this](DataLeafNode* node, uint32_t index) {
|
auto _onExistingLeaf = [numLeaves, &onExistingLeaf, this](uint32_t index, DataLeafNode* node) {
|
||||||
if (index >= beginIndex) {
|
if (index == numLeaves - 1) {
|
||||||
func(node, index);
|
// It is the old last leaf - resize it to maximum
|
||||||
} else if (index == numLeaves - 1) {
|
node->resize(_nodeStore->layout().maxBytesPerLeaf());
|
||||||
// It is the old last leaf - resize it to maximum
|
}
|
||||||
node->resize(_nodeStore->layout().maxBytesPerLeaf());
|
onExistingLeaf(index, node);
|
||||||
}
|
};
|
||||||
});
|
auto _onCreateLeaf = [beginIndex, &onCreateLeaf, this](uint32_t index) {
|
||||||
|
if (index < beginIndex) {
|
||||||
|
// Create empty leaves in the gap
|
||||||
|
return Data(_nodeStore->layout().maxBytesPerLeaf()).FillWithZeroes();
|
||||||
|
} else {
|
||||||
|
return onCreateLeaf(index);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
_traverseLeaves(_rootNode.get(), 0, numLeaves-1, endIndex, _onExistingLeaf, _onCreateLeaf);
|
||||||
ASSERT(endIndex >= _numLeavesCache.value(), "We should be outside of the valid region, i.e. outside of the old size");
|
ASSERT(endIndex >= _numLeavesCache.value(), "We should be outside of the valid region, i.e. outside of the old size");
|
||||||
_numLeavesCache = endIndex;
|
_numLeavesCache = endIndex;
|
||||||
} else if (numLeaves < endIndex) {
|
} else if (numLeaves < endIndex) {
|
||||||
// We are starting traversal in the valid region, but traverse until after it (we grow new leaves)
|
// We are starting traversal in the valid region, but traverse until after it (we grow new leaves)
|
||||||
_traverseLeaves(_rootNode.get(), 0, beginIndex, endIndex, [numLeaves, &func, this] (DataLeafNode *node, uint32_t index) {
|
auto _onExistingLeaf = [numLeaves, &onExistingLeaf, this] (uint32_t index, DataLeafNode *node) {
|
||||||
if (index == numLeaves - 1) {
|
if (index == numLeaves - 1) {
|
||||||
// It is the old last leaf - resize it to maximum
|
// It is the old last leaf - resize it to maximum
|
||||||
node->resize(_nodeStore->layout().maxBytesPerLeaf());
|
node->resize(_nodeStore->layout().maxBytesPerLeaf());
|
||||||
}
|
}
|
||||||
func(node, index);
|
onExistingLeaf(index, node);
|
||||||
});
|
};
|
||||||
|
_traverseLeaves(_rootNode.get(), 0, beginIndex, endIndex, _onExistingLeaf, onCreateLeaf);
|
||||||
ASSERT(endIndex >= _numLeavesCache.value(), "We should be outside of the valid region, i.e. outside of the old size");
|
ASSERT(endIndex >= _numLeavesCache.value(), "We should be outside of the valid region, i.e. outside of the old size");
|
||||||
_numLeavesCache = endIndex;
|
_numLeavesCache = endIndex;
|
||||||
} else {
|
} else {
|
||||||
//We are traversing entirely inside the valid region
|
//We are traversing entirely inside the valid region
|
||||||
exclusiveLock.reset(); // we can allow parallel traverses, if all are entirely inside the valid region.
|
exclusiveLock.reset(); // we can allow parallel traverses, if all are entirely inside the valid region.
|
||||||
_traverseLeaves(_rootNode.get(), 0, beginIndex, endIndex, func);
|
_traverseLeaves(_rootNode.get(), 0, beginIndex, endIndex, onExistingLeaf, onCreateLeaf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataTree::_traverseLeaves(DataNode *root, uint32_t leafOffset, uint32_t beginIndex, uint32_t endIndex, function<void (DataLeafNode*, uint32_t)> func) {
|
void DataTree::_traverseLeaves(DataNode *root, uint32_t leafOffset, uint32_t beginIndex, uint32_t endIndex, std::function<void (uint32_t index, datanodestore::DataLeafNode* leaf)> onExistingLeaf, std::function<cpputils::Data (uint32_t index)> onCreateLeaf) {
|
||||||
DataLeafNode *leaf = dynamic_cast<DataLeafNode*>(root);
|
DataLeafNode *leaf = dynamic_cast<DataLeafNode*>(root);
|
||||||
if (leaf != nullptr) {
|
if (leaf != nullptr) {
|
||||||
ASSERT(beginIndex <= 1 && endIndex <= 1, "If root node is a leaf, the (sub)tree has only one leaf - access indices must be 0 or 1.");
|
ASSERT(beginIndex <= 1 && endIndex <= 1, "If root node is a leaf, the (sub)tree has only one leaf - access indices must be 0 or 1.");
|
||||||
if (beginIndex == 0 && endIndex == 1) {
|
if (beginIndex == 0 && endIndex == 1) {
|
||||||
func(leaf, leafOffset);
|
onExistingLeaf(leafOffset, leaf);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -231,32 +241,41 @@ void DataTree::_traverseLeaves(DataNode *root, uint32_t leafOffset, uint32_t beg
|
|||||||
uint32_t leavesPerChild = leavesPerFullChild(*inner);
|
uint32_t leavesPerChild = leavesPerFullChild(*inner);
|
||||||
uint32_t beginChild = beginIndex/leavesPerChild;
|
uint32_t beginChild = beginIndex/leavesPerChild;
|
||||||
uint32_t endChild = utils::ceilDivision(endIndex, leavesPerChild);
|
uint32_t endChild = utils::ceilDivision(endIndex, leavesPerChild);
|
||||||
vector<unique_ref<DataNode>> children = getOrCreateChildren(inner, beginChild, endChild);
|
|
||||||
|
|
||||||
for (uint32_t childIndex = beginChild; childIndex < endChild; ++childIndex) {
|
for (uint32_t childIndex = beginChild; childIndex < std::min(inner->numChildren(), endChild); ++childIndex) {
|
||||||
|
auto child = _nodeStore->load(inner->getChild(childIndex)->key());
|
||||||
|
ASSERT(child != none, "Couldn't load child node");
|
||||||
uint32_t childOffset = childIndex * leavesPerChild;
|
uint32_t childOffset = childIndex * leavesPerChild;
|
||||||
uint32_t localBeginIndex = utils::maxZeroSubtraction(beginIndex, childOffset);
|
uint32_t localBeginIndex = utils::maxZeroSubtraction(beginIndex, childOffset);
|
||||||
uint32_t localEndIndex = std::min(leavesPerChild, endIndex - childOffset);
|
uint32_t localEndIndex = std::min(leavesPerChild, endIndex - childOffset);
|
||||||
auto child = std::move(children[childIndex-beginChild]);
|
_traverseLeaves(child->get(), leafOffset + childOffset, localBeginIndex, localEndIndex, onExistingLeaf, onCreateLeaf);
|
||||||
_traverseLeaves(child.get(), leafOffset + childOffset, localBeginIndex, localEndIndex, func);
|
}
|
||||||
|
for (uint32_t childIndex = inner->numChildren(); childIndex < endChild; ++childIndex) {
|
||||||
|
uint32_t childOffset = childIndex * leavesPerChild;
|
||||||
|
uint32_t localEndIndex = std::min(leavesPerChild, endIndex - childOffset);
|
||||||
|
auto child = _createSubtree(leafOffset, localEndIndex, onCreateLeaf);
|
||||||
|
inner->addChild(child.key());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<unique_ref<DataNode>> DataTree::getOrCreateChildren(DataInnerNode *node, uint32_t begin, uint32_t end) {
|
unique_ref<DataNode> DataTree::_createSubtree(uint32_t leafOffset, uint32_t numLeaves, std::function<cpputils::Data (uint32_t index)> onCreateLeaf) {
|
||||||
vector<unique_ref<DataNode>> children;
|
if (numLeaves == 1) {
|
||||||
children.reserve(end-begin);
|
auto data = onCreateLeaf(leafOffset);
|
||||||
for (uint32_t childIndex = begin; childIndex < std::min(node->numChildren(), end); ++childIndex) {
|
ASSERT(data.size() <= _nodeStore->layout().maxBytesPerLeaf(), "Too much data for a leaf");
|
||||||
auto child = _nodeStore->load(node->getChild(childIndex)->key());
|
//TODO More efficient by _nodeStore->createNewLeafNode(data);
|
||||||
ASSERT(child != none, "Couldn't load child node");
|
auto leaf = _nodeStore->createNewLeafNode();
|
||||||
children.emplace_back(std::move(*child));
|
leaf->resize(data.size());
|
||||||
|
leaf->write(data.data(), 0, data.size());
|
||||||
|
return leaf;
|
||||||
|
} else {
|
||||||
|
uint32_t numLeafGroups = utils::ceilDivision(numLeaves, _nodeStore->layout().maxChildrenPerInnerNode());
|
||||||
|
vector<unique_ref<DataNode>> children;
|
||||||
|
children.reserve(numLeafGroups);
|
||||||
|
for (uint32_t i = 0; i < numLeafGroups; ++i) {
|
||||||
|
children.push_back(_createSubtree())
|
||||||
|
...
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (uint32_t childIndex = node->numChildren(); childIndex < end; ++childIndex) {
|
|
||||||
//TODO This creates each child with one chain to one leaf only, and then on the next lower level it
|
|
||||||
// has to create the children for the child. Would be faster to directly create full trees if necessary.
|
|
||||||
children.emplace_back(addChildTo(node));
|
|
||||||
}
|
|
||||||
ASSERT(children.size() == end-begin, "Number of children in the result is wrong");
|
|
||||||
return children;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unique_ref<DataNode> DataTree::addChildTo(DataInnerNode *node) {
|
unique_ref<DataNode> DataTree::addChildTo(DataInnerNode *node) {
|
||||||
|
@ -30,7 +30,7 @@ public:
|
|||||||
//Returning uint64_t, because calculations handling this probably need to be done in 64bit to support >4GB blobs.
|
//Returning uint64_t, because calculations handling this probably need to be done in 64bit to support >4GB blobs.
|
||||||
uint64_t maxBytesPerLeaf() const;
|
uint64_t maxBytesPerLeaf() const;
|
||||||
|
|
||||||
void traverseLeaves(uint32_t beginIndex, uint32_t endIndex, std::function<void (datanodestore::DataLeafNode*, uint32_t)> func);
|
void traverseLeaves(uint32_t beginIndex, uint32_t endIndex, std::function<void (uint32_t index, datanodestore::DataLeafNode* leaf)> onExistingLeaf, std::function<cpputils::Data (uint32_t index)> onCreateLeaf);
|
||||||
void resizeNumBytes(uint64_t newNumBytes);
|
void resizeNumBytes(uint64_t newNumBytes);
|
||||||
|
|
||||||
uint32_t numLeaves() const;
|
uint32_t numLeaves() const;
|
||||||
@ -62,7 +62,7 @@ private:
|
|||||||
void ifRootHasOnlyOneChildReplaceRootWithItsChild();
|
void ifRootHasOnlyOneChildReplaceRootWithItsChild();
|
||||||
|
|
||||||
//TODO Use underscore for private methods
|
//TODO Use underscore for private methods
|
||||||
void _traverseLeaves(datanodestore::DataNode *root, uint32_t leafOffset, uint32_t beginIndex, uint32_t endIndex, std::function<void (datanodestore::DataLeafNode*, uint32_t)> func);
|
void _traverseLeaves(datanodestore::DataNode *root, uint32_t leafOffset, uint32_t beginIndex, uint32_t endIndex, std::function<void (uint32_t index, datanodestore::DataLeafNode* leaf)> onExistingLeaf, std::function<cpputils::Data (uint32_t index)> onCreateLeaf);
|
||||||
uint32_t leavesPerFullChild(const datanodestore::DataInnerNode &root) const;
|
uint32_t leavesPerFullChild(const datanodestore::DataInnerNode &root) const;
|
||||||
uint64_t _numStoredBytes() const;
|
uint64_t _numStoredBytes() const;
|
||||||
uint64_t _numStoredBytes(const datanodestore::DataNode &root) const;
|
uint64_t _numStoredBytes(const datanodestore::DataNode &root) const;
|
||||||
@ -71,7 +71,7 @@ private:
|
|||||||
cpputils::optional_ownership_ptr<datanodestore::DataLeafNode> LastLeaf(datanodestore::DataNode *root);
|
cpputils::optional_ownership_ptr<datanodestore::DataLeafNode> LastLeaf(datanodestore::DataNode *root);
|
||||||
cpputils::unique_ref<datanodestore::DataLeafNode> LastLeaf(cpputils::unique_ref<datanodestore::DataNode> root);
|
cpputils::unique_ref<datanodestore::DataLeafNode> LastLeaf(cpputils::unique_ref<datanodestore::DataNode> root);
|
||||||
datanodestore::DataInnerNode* increaseTreeDepth(unsigned int levels);
|
datanodestore::DataInnerNode* increaseTreeDepth(unsigned int levels);
|
||||||
std::vector<cpputils::unique_ref<datanodestore::DataNode>> getOrCreateChildren(datanodestore::DataInnerNode *node, uint32_t begin, uint32_t end);
|
cpputils::unique_ref<datanodestore::DataNode> _createSubtree(uint32_t leafOffset, uint32_t numLeaves, std::function<cpputils::Data (uint32_t index)> onCreateLeaf);
|
||||||
cpputils::unique_ref<datanodestore::DataNode> addChildTo(datanodestore::DataInnerNode *node);
|
cpputils::unique_ref<datanodestore::DataNode> addChildTo(datanodestore::DataInnerNode *node);
|
||||||
|
|
||||||
DISALLOW_COPY_AND_ASSIGN(DataTree);
|
DISALLOW_COPY_AND_ASSIGN(DataTree);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user