Fix traversal

This commit is contained in:
Sebastian Messmer 2016-07-12 01:04:33 +02:00
parent 98b85ea8b6
commit 582c9c1a4c
6 changed files with 82 additions and 53 deletions

View File

@ -37,26 +37,36 @@ void BlobOnBlocks::resize(uint64_t numBytes) {
_sizeCache = numBytes;
}
void BlobOnBlocks::traverseLeaves(uint64_t beginByte, uint64_t sizeBytes, function<void (uint64_t leafOffset, DataLeafNode *leaf, uint32_t begin, uint32_t count)> onExistingLeaf, function<Data (uint64_t leafOffset, uint32_t count)> onCreateLeaf) const {
void BlobOnBlocks::traverseLeaves(uint64_t beginByte, uint64_t sizeBytes, function<void (uint64_t leafOffset, DataLeafNode *leaf, uint32_t begin, uint32_t count)> onExistingLeaf, function<Data (uint64_t leafOffset, uint32_t begin, uint32_t count)> onCreateLeaf) const {
uint64_t endByte = beginByte + sizeBytes;
uint32_t firstLeaf = beginByte / _datatree->maxBytesPerLeaf();
uint32_t endLeaf = utils::ceilDivision(endByte, _datatree->maxBytesPerLeaf());
bool writingOutside = size() < endByte; // TODO Calling size() is slow because it has to traverse the tree. Instead: recognize situation by looking at current leaf size in lambda below
uint64_t maxBytesPerLeaf = _datatree->maxBytesPerLeaf();
uint32_t firstLeaf = beginByte / maxBytesPerLeaf;
uint32_t endLeaf = utils::ceilDivision(endByte, maxBytesPerLeaf);
bool writingOutside = size() < endByte; // TODO Calling size() is slow because it has to traverse the tree. Instead: recognize situation by looking at current leaf size in lambda below
auto _onExistingLeaf = [&onExistingLeaf, beginByte, endByte, endLeaf, writingOutside, maxBytesPerLeaf] (uint32_t leafIndex, DataLeafNode *leaf) {
uint64_t indexOfFirstLeafByte = leafIndex * maxBytesPerLeaf;
ASSERT(endByte > indexOfFirstLeafByte, "Traversal went too far right");
uint32_t dataBegin = utils::maxZeroSubtraction(beginByte, indexOfFirstLeafByte);
uint32_t dataEnd = std::min(maxBytesPerLeaf, endByte - indexOfFirstLeafByte);
if (leafIndex == endLeaf-1 && writingOutside) {
// If we are traversing an area that didn't exist before, then the last leaf was just created with a wrong size. We have to fix it.
// If we are traversing an area that didn't exist before (i.e. in the area of the last leaf that wasn't used before), then the last leaf might have a wrong size. We have to fix it.
leaf->resize(dataEnd);
}
onExistingLeaf(indexOfFirstLeafByte, leaf, dataBegin, dataEnd-dataBegin);
};
auto _onCreateLeaf = [&onCreateLeaf, maxBytesPerLeaf, endByte] (uint32_t leafIndex) -> Data {
auto _onCreateLeaf = [&onCreateLeaf, maxBytesPerLeaf, beginByte, endByte] (uint32_t leafIndex) -> Data {
uint64_t indexOfFirstLeafByte = leafIndex * maxBytesPerLeaf;
ASSERT(endByte > indexOfFirstLeafByte, "Traversal went too far right");
uint32_t dataBegin = utils::maxZeroSubtraction(beginByte, indexOfFirstLeafByte);
uint32_t dataEnd = std::min(maxBytesPerLeaf, endByte - indexOfFirstLeafByte);
auto data = onCreateLeaf(indexOfFirstLeafByte, dataEnd);
Data data = onCreateLeaf(indexOfFirstLeafByte, dataBegin, dataEnd-dataBegin);
// If this leaf is created but only partly in the traversed region (i.e. dataBegin > leafBegin), we have to fill the data before the traversed region with zeroes.
if (dataBegin != 0) {
Data actualData(dataBegin + data.size());
std::memset(actualData.data(), 0, dataBegin);
std::memcpy(actualData.dataOffset(dataBegin), data.data(), data.size());
data = std::move(actualData);
}
ASSERT(data.size() == dataEnd, "Returned leaf data with wrong size");
return data;
};
@ -93,7 +103,7 @@ void BlobOnBlocks::_read(void *target, uint64_t offset, uint64_t count) const {
//TODO Simplify formula, make it easier to understand
leaf->read((uint8_t*)target + indexOfFirstLeafByte - offset + leafDataOffset, leafDataOffset, leafDataSize);
};
auto onCreateLeaf = [] (uint64_t /*indexOfFirstLeafByte*/, uint32_t /*count*/) -> Data {
auto onCreateLeaf = [] (uint64_t /*indexOfFirstLeafByte*/, uint32_t /*begin*/, uint32_t /*count*/) -> Data {
ASSERT(false, "Reading shouldn't create new leaves.");
};
traverseLeaves(offset, count, onExistingLeaf, onCreateLeaf);
@ -104,9 +114,10 @@ void BlobOnBlocks::write(const void *source, uint64_t offset, uint64_t count) {
//TODO Simplify formula, make it easier to understand
leaf->write((uint8_t*)source + indexOfFirstLeafByte - offset + leafDataOffset, leafDataOffset, leafDataSize);
};
auto onCreateLeaf = [source, offset] (uint64_t indexOfFirstLeafByte, uint32_t count) -> Data {
auto onCreateLeaf = [source, offset] (uint64_t indexOfFirstLeafByte, uint32_t leafDataOffset, uint32_t count) -> Data {
Data result(count);
std::memcpy(result.data(), (uint8_t*)source + indexOfFirstLeafByte - offset, count);
//TODO Simplify formula, make it easier to understand
std::memcpy(result.data(), (uint8_t*)source + indexOfFirstLeafByte - offset + leafDataOffset, count);
return result;
};
traverseLeaves(offset, count, onExistingLeaf, onCreateLeaf);

View File

@ -38,7 +38,7 @@ public:
private:
void _read(void *target, uint64_t offset, uint64_t count) const;
void traverseLeaves(uint64_t offsetBytes, uint64_t sizeBytes, std::function<void (uint64_t leafOffset, datanodestore::DataLeafNode *leaf, uint32_t begin, uint32_t count)> onExistingLeaf, std::function<cpputils::Data (uint64_t leafOffset, uint32_t count)> onCreateLeaf) const;
void traverseLeaves(uint64_t offsetBytes, uint64_t sizeBytes, std::function<void (uint64_t leafOffset, datanodestore::DataLeafNode *leaf, uint32_t begin, uint32_t count)> onExistingLeaf, std::function<cpputils::Data (uint64_t leafOffset, uint32_t begin, uint32_t count)> onCreateLeaf) const;
cpputils::unique_ref<parallelaccessdatatreestore::DataTreeRef> _datatree;
mutable boost::optional<uint64_t> _sizeCache;

View File

@ -262,6 +262,7 @@ uint64_t DataTree::_numStoredBytes(const DataNode &root) const {
}
void DataTree::resizeNumBytes(uint64_t newNumBytes) {
//TODO Use LeafTraverser here. For growing, that should be trivial. Maybe there is even a way to make it for shrinking? Maybe a callback for inner nodes?
//TODO Can we resize in parallel? Especially creating new blocks (i.e. encrypting them) is expensive and should be done in parallel.
boost::upgrade_lock<shared_mutex> lock(_mutex);
{

View File

@ -26,19 +26,22 @@ namespace blobstore {
void LeafTraverser::traverse(DataNode *root, uint32_t beginIndex, uint32_t endIndex, function<void (uint32_t index, DataLeafNode* leaf)> onExistingLeaf, function<Data (uint32_t index)> onCreateLeaf) {
ASSERT(beginIndex <= endIndex, "Invalid parameters");
//TODO Test cases with numLeaves < / >= beginIndex, ideally test all configurations:
// beginIndex<endIndex<numLeaves, beginIndex=endIndex<numLeaves, beginIndex<endIndex=numLeaves, beginIndex=endIndex=numLeaves
// beginIndex<numLeaves<endIndex, beginIndex=numLeaves<endIndex,
// numLeaves<beginIndex<endIndex, numLeaves<beginIndex=endIndex
_traverseExistingSubtree(root, beginIndex, endIndex, 0, false, onExistingLeaf, onCreateLeaf);
}
void LeafTraverser::_traverseExistingSubtree(DataNode *root, uint32_t beginIndex, uint32_t endIndex, uint32_t leafOffset, bool growLastExistingLeaf, function<void (uint32_t index, DataLeafNode* leaf)> onExistingLeaf, function<Data (uint32_t index)> onCreateLeaf) {
void LeafTraverser::_traverseExistingSubtree(DataNode *root, uint32_t beginIndex, uint32_t endIndex, uint32_t leafOffset, bool growLastLeaf, function<void (uint32_t index, DataLeafNode* leaf)> onExistingLeaf, function<Data (uint32_t index)> onCreateLeaf) {
ASSERT(beginIndex <= endIndex, "Invalid parameters");
//TODO Test cases with numLeaves < / >= beginIndex
DataLeafNode *leaf = dynamic_cast<DataLeafNode*>(root);
if (leaf != nullptr) {
ASSERT(beginIndex <= 1 && endIndex <= 1, "If root node is a leaf, the (sub)tree has only one leaf - access indices must be 0 or 1.");
if (beginIndex == 0 && endIndex == 1) {
if (growLastExistingLeaf) {
if (growLastLeaf) {
leaf->resize(_nodeStore->layout().maxBytesPerLeaf());
}
onExistingLeaf(leafOffset, leaf);
@ -52,19 +55,23 @@ namespace blobstore {
uint32_t beginChild = beginIndex/leavesPerChild;
uint32_t endChild = utils::ceilDivision(endIndex, leavesPerChild);
uint32_t numChildren = inner->numChildren();
bool shouldGrowLastExistingLeaf = growLastExistingLeaf || endChild > numChildren;
//ASSERT(!growLastLeaf || endChild == numChildren, "Can only grow last leaf when it is existing");
bool shouldGrowLastExistingLeaf = growLastLeaf || endChild > numChildren;
// If we traverse outside of the valid region, we still have to descend into the valid region to grow the last leaf
if (beginChild >= numChildren && shouldGrowLastExistingLeaf) {
ASSERT(beginChild > 0, "This can only happen for numChildren==0.");
auto childKey = inner->getChild(beginChild-1)->key();
// If we traverse directly outside of the valid region (i.e. usually would only traverse to new leaves and not to the last leaf),
// we still have to descend to the last leaf to grow it.
if (beginChild >= numChildren) {
ASSERT(numChildren > 0, "Node doesn't have children.");
auto childKey = inner->getChild(numChildren-1)->key();
auto childNode = _nodeStore->load(childKey);
if (childNode == none) {
throw std::runtime_error("Couldn't find child node "+childKey.ToString());
}
_traverseExistingSubtree(childNode->get(), leavesPerChild-1, leavesPerChild, leafOffset - 1, true,
//TODO this causes a negative leafOffset. Better: Make leafOffset generally absolute, i.e. += beginIndex?
uint32_t negativeChildOffset = (numChildren-1) * leavesPerChild;
_traverseExistingSubtree(childNode->get(), leavesPerChild-1, leavesPerChild, leafOffset - negativeChildOffset, true,
[] (uint32_t /*index*/, DataLeafNode* /*leaf*/) {},
[] (uint32_t /*index*/) -> Data {ASSERT(false, "We only want to grow the last leaf. We shouldn't create leaves.");});
_createMaxSizeLeaf());
}
// Traverse existing children
@ -81,51 +88,52 @@ namespace blobstore {
_traverseExistingSubtree(childNode->get(), localBeginIndex, localEndIndex, leafOffset + childOffset, shouldGrowLastExistingLeaf && isLastChild, onExistingLeaf, onCreateLeaf);
}
// Traverse gap children (children after currently last leaf that are not traversed, i.e. before the first traversed leaf)
// Traverse new children (including gap children, i.e. children that are created but not traversed because they're to the right of the current size, but to the left of the traversal region)
for (uint32_t childIndex = numChildren; childIndex < endChild; ++childIndex) {
uint32_t childOffset = childIndex * leavesPerChild;
uint32_t localBeginIndex = std::min(leavesPerChild, utils::maxZeroSubtraction(beginIndex, childOffset));
uint32_t localEndIndex = std::min(leavesPerChild, endIndex - childOffset);
ASSERT(beginIndex <= childOffset, "Range for creating new children has to contain their first leaf.");
uint64_t maxBytesPerLeaf = _nodeStore->layout().maxBytesPerLeaf();
auto child = _createNewSubtree(localEndIndex, leafOffset + childOffset, inner->depth()-1, [maxBytesPerLeaf] (uint32_t /*index*/) {
return Data(maxBytesPerLeaf).FillWithZeroes();
});
inner->addChild(*child);
}
// Traverse new children (children after currently last leaf that are traversed)
for (uint32_t childIndex = std::max(beginChild, numChildren); childIndex < endChild; ++childIndex) {
uint32_t childOffset = childIndex * leavesPerChild;
uint32_t localEndIndex = std::min(leavesPerChild, endIndex - childOffset);
ASSERT(beginIndex <= childOffset, "Range for creating new children has to contain their first leaf.");
auto child = _createNewSubtree(localEndIndex, leafOffset + childOffset, inner->depth()-1, onCreateLeaf);
auto leafCreator = (childIndex >= beginChild) ? onCreateLeaf : _createMaxSizeLeaf();
auto child = _createNewSubtree(localBeginIndex, localEndIndex, leafOffset + childOffset, inner->depth() - 1, leafCreator);
inner->addChild(*child);
}
}
unique_ref<DataNode> LeafTraverser::_createNewSubtree(uint32_t numLeaves, uint32_t leafOffset, uint8_t depth, function<Data (uint32_t index)> onCreateLeaf) {
ASSERT(depth > 0, "Wrong depth given");
if (1 == depth) {
ASSERT(numLeaves == 1, "With depth 1, we can only create one leaf.");
auto data = onCreateLeaf(leafOffset);
// TODO Performance: Directly create with data.
unique_ref<DataNode> LeafTraverser::_createNewSubtree(uint32_t beginIndex, uint32_t endIndex, uint32_t leafOffset, uint8_t depth, function<Data (uint32_t index)> onCreateLeaf) {
ASSERT(beginIndex <= endIndex, "Invalid parameters");
if (0 == depth) {
ASSERT(beginIndex <= 1 && endIndex == 1, "With depth 0, we can only traverse one or zero leaves (i.e. traverse one leaf or traverse a gap leaf).");
auto leafCreator = (beginIndex == 0) ? onCreateLeaf : _createMaxSizeLeaf();
auto data = leafCreator(leafOffset);
// TODO Performance: Directly create leaf node with data.
auto node = _nodeStore->createNewLeafNode();
node->resize(data.size());
node->write(data.data(), 0, data.size());
return node;
}
uint8_t minNeededDepth = utils::ceilLog(_nodeStore->layout().maxChildrenPerInnerNode(), (uint64_t)numLeaves);
uint8_t minNeededDepth = utils::ceilLog(_nodeStore->layout().maxChildrenPerInnerNode(), (uint64_t)endIndex);
ASSERT(depth >= minNeededDepth, "Given tree depth doesn't fit given number of leaves to create.");
uint32_t leavesPerChild = _maxLeavesForTreeDepth(depth-1);
uint32_t endChild = utils::ceilDivision(numLeaves, leavesPerChild);
uint32_t beginChild = beginIndex/leavesPerChild;
uint32_t endChild = utils::ceilDivision(endIndex, leavesPerChild);
vector<unique_ref<DataNode>> children;
children.reserve(endChild);
for(uint32_t childIndex = 0; childIndex < endChild; ++childIndex) {
// TODO Remove redundancy of following two for loops by using min/max for calculating the parameters of the recursive call.
// Create gap children (i.e. children before the traversal but after the current size)
for (uint32_t childIndex = 0; childIndex < beginChild; ++childIndex) {
uint32_t childOffset = childIndex * leavesPerChild;
uint32_t localNumLeaves = std::min(leavesPerChild, numLeaves - childOffset);
auto child = _createNewSubtree(localNumLeaves, leafOffset + childOffset, depth - 1, onCreateLeaf);
auto child = _createNewSubtree(leavesPerChild, leavesPerChild, leafOffset + childOffset, depth - 1,
[] (uint32_t /*index*/)->Data {ASSERT(false, "We're only creating gap leaves here, not traversing any.");});
children.push_back(std::move(child));
}
// Create new children that are traversed
for(uint32_t childIndex = beginChild; childIndex < endChild; ++childIndex) {
uint32_t childOffset = childIndex * leavesPerChild;
uint32_t localBeginIndex = utils::maxZeroSubtraction(beginIndex, childOffset);
uint32_t localEndIndex = std::min(leavesPerChild, endIndex - childOffset);
auto child = _createNewSubtree(localBeginIndex, localEndIndex, leafOffset + childOffset, depth - 1, onCreateLeaf);
children.push_back(std::move(child));
}
@ -138,10 +146,17 @@ namespace blobstore {
return newNode;
}
uint32_t LeafTraverser::_maxLeavesForTreeDepth(uint8_t depth) {
uint32_t LeafTraverser::_maxLeavesForTreeDepth(uint8_t depth) const {
return utils::intPow(_nodeStore->layout().maxChildrenPerInnerNode(), (uint64_t)depth);
}
function<Data (uint32_t index)> LeafTraverser::_createMaxSizeLeaf() const {
uint64_t maxBytesPerLeaf = _nodeStore->layout().maxBytesPerLeaf();
return [maxBytesPerLeaf] (uint32_t /*index*/) -> Data {
return Data(maxBytesPerLeaf).FillWithZeroes();
};
}
}
}
}

View File

@ -30,9 +30,10 @@ namespace blobstore {
private:
datanodestore::DataNodeStore *_nodeStore;
void _traverseExistingSubtree(datanodestore::DataNode *root, uint32_t beginIndex, uint32_t endIndex, uint32_t leafOffset, bool growLastExistingLeaf, std::function<void (uint32_t index, datanodestore::DataLeafNode* leaf)> onExistingLeaf, std::function<cpputils::Data (uint32_t index)> onCreateLeaf);
cpputils::unique_ref<datanodestore::DataNode> _createNewSubtree(uint32_t numLeaves, uint32_t leafOffset, uint8_t depth, std::function<cpputils::Data (uint32_t index)> onCreateLeaf);
uint32_t _maxLeavesForTreeDepth(uint8_t depth);
void _traverseExistingSubtree(datanodestore::DataNode *root, uint32_t beginIndex, uint32_t endIndex, uint32_t leafOffset, bool growLastLeaf, std::function<void (uint32_t index, datanodestore::DataLeafNode* leaf)> onExistingLeaf, std::function<cpputils::Data (uint32_t index)> onCreateLeaf);
cpputils::unique_ref<datanodestore::DataNode> _createNewSubtree(uint32_t beginIndex, uint32_t endIndex, uint32_t leafOffset, uint8_t depth, std::function<cpputils::Data (uint32_t index)> onCreateLeaf);
uint32_t _maxLeavesForTreeDepth(uint8_t depth) const;
std::function<cpputils::Data (uint32_t index)> _createMaxSizeLeaf() const;
DISALLOW_COPY_AND_ASSIGN(LeafTraverser);
};

View File

@ -111,7 +111,8 @@ public:
uint32_t oldNumLeaves = tree->numLeaves();
uint32_t newNumLeaves = oldNumLeaves + numLeavesToAdd;
//TODO Test cases where beginIndex is inside of the existing leaves
tree->traverseLeaves(newNumLeaves-1, newNumLeaves, [] (uint32_t, DataLeafNode*){}, [] (uint32_t count) -> Data { return Data(count).FillWithZeroes();});
uint64_t maxBytesPerLeaf = tree->maxBytesPerLeaf();
tree->traverseLeaves(newNumLeaves-1, newNumLeaves, [] (uint32_t, DataLeafNode*){}, [maxBytesPerLeaf] (uint32_t) -> Data { return Data(maxBytesPerLeaf).FillWithZeroes();});
tree->flush();
}