Support blob sizes >4GB and add test cases for it

This commit is contained in:
Sebastian Messmer 2015-12-11 00:18:17 +01:00
parent 6dc03a50cb
commit 38c56f6764
17 changed files with 207 additions and 54 deletions

View File

@ -45,7 +45,7 @@ void BlobOnBlocks::traverseLeaves(uint64_t beginByte, uint64_t sizeBytes, functi
_datatree->traverseLeaves(firstLeaf, endLeaf, [&func, beginByte, endByte, endLeaf, writingOutside](DataLeafNode *leaf, uint32_t leafIndex) {
uint64_t indexOfFirstLeafByte = leafIndex * leaf->maxStoreableBytes();
uint32_t dataBegin = utils::maxZeroSubtraction(beginByte, indexOfFirstLeafByte);
uint32_t dataEnd = std::min((uint64_t)leaf->maxStoreableBytes(), endByte - indexOfFirstLeafByte);
uint32_t dataEnd = std::min(leaf->maxStoreableBytes(), endByte - indexOfFirstLeafByte);
if (leafIndex == endLeaf-1 && writingOutside) {
// If we are traversing an area that didn't exist before, then the last leaf was just created with a wrong size. We have to fix it.
leaf->resize(dataEnd);

View File

@ -58,7 +58,7 @@ void DataLeafNode::fillDataWithZeroesFromTo(off_t begin, off_t end) {
node().write(ZEROES.data(), begin, end-begin);
}
uint32_t DataLeafNode::maxStoreableBytes() const {
uint64_t DataLeafNode::maxStoreableBytes() const {
return node().layout().maxBytesPerLeaf();
}

View File

@ -16,7 +16,8 @@ public:
DataLeafNode(DataNodeView block);
~DataLeafNode();
uint32_t maxStoreableBytes() const;
//Returning uint64_t, because calculations handling this probably need to be done in 64bit to support >4GB blobs.
uint64_t maxStoreableBytes() const;
void read(void *target, uint64_t offset, uint64_t size) const;
void write(const void *source, uint64_t offset, uint64_t size);

View File

@ -50,7 +50,8 @@ public:
}
//Maximum number of bytes a leaf can store
constexpr uint32_t maxBytesPerLeaf() const {
//We are returning uint64_t here, because calculations involving maxBytesPerLeaf most probably should use 64bit integers to support blobs >4GB.
constexpr uint64_t maxBytesPerLeaf() const {
return datasizeBytes();
}
private:

View File

@ -245,7 +245,7 @@ unique_ref<DataNode> DataTree::addChildTo(DataInnerNode *node) {
}
uint32_t DataTree::leavesPerFullChild(const DataInnerNode &root) const {
return utils::intPow(_nodeStore->layout().maxChildrenPerInnerNode(), root.depth()-1);
return utils::intPow(_nodeStore->layout().maxChildrenPerInnerNode(), (uint32_t)root.depth()-1);
}
uint64_t DataTree::numStoredBytes() const {
@ -281,7 +281,7 @@ void DataTree::resizeNumBytes(uint64_t newNumBytes) {
uint64_t currentNumBytes = _numStoredBytes();
ASSERT(currentNumBytes % _nodeStore->layout().maxBytesPerLeaf() == 0, "The last leaf is not a max data leaf, although we just resized it to be one.");
uint32_t currentNumLeaves = currentNumBytes / _nodeStore->layout().maxBytesPerLeaf();
uint32_t newNumLeaves = std::max(1u, utils::ceilDivision(newNumBytes, _nodeStore->layout().maxBytesPerLeaf()));
uint32_t newNumLeaves = std::max(UINT64_C(1), utils::ceilDivision(newNumBytes, _nodeStore->layout().maxBytesPerLeaf()));
for(uint32_t i = currentNumLeaves; i < newNumLeaves; ++i) {
addDataLeaf()->resize(_nodeStore->layout().maxBytesPerLeaf());
@ -292,7 +292,7 @@ void DataTree::resizeNumBytes(uint64_t newNumBytes) {
uint32_t newLastLeafSize = newNumBytes - (newNumLeaves-1)*_nodeStore->layout().maxBytesPerLeaf();
LastLeaf(_rootNode.get())->resize(newLastLeafSize);
}
ASSERT(newNumBytes == numStoredBytes(), "We resized to the wrong number of bytes");
ASSERT(newNumBytes == numStoredBytes(), "We resized to the wrong number of bytes ("+std::to_string(numStoredBytes())+" instead of "+std::to_string(newNumBytes)+")");
}
optional_ownership_ptr<DataLeafNode> DataTree::LastLeaf(DataNode *root) {
@ -319,7 +319,7 @@ unique_ref<DataLeafNode> DataTree::LastLeaf(unique_ref<DataNode> root) {
return LastLeaf(std::move(*child));
}
uint32_t DataTree::maxBytesPerLeaf() const {
uint64_t DataTree::maxBytesPerLeaf() const {
return _nodeStore->layout().maxBytesPerLeaf();
}

View File

@ -27,7 +27,8 @@ public:
~DataTree();
const blockstore::Key &key() const;
uint32_t maxBytesPerLeaf() const;
//Returning uint64_t, because calculations handling this probably need to be done in 64bit to support >4GB blobs.
uint64_t maxBytesPerLeaf() const;
void traverseLeaves(uint32_t beginIndex, uint32_t endIndex, std::function<void (datanodestore::DataLeafNode*, uint32_t)> func);
void resizeNumBytes(uint64_t newNumBytes);

View File

@ -17,7 +17,7 @@ public:
return _baseTree->key();
}
uint32_t maxBytesPerLeaf() const {
uint64_t maxBytesPerLeaf() const {
return _baseTree->maxBytesPerLeaf();
}

View File

@ -1,34 +1 @@
#include "Math.h"
#include <cmath>
namespace blobstore {
namespace onblocks {
namespace utils {
uint32_t intPow(uint32_t base, uint32_t exponent) {
uint32_t result = 1;
for(uint32_t i = 0; i < exponent; ++i) {
result *= base;
}
return result;
}
uint32_t ceilDivision(uint32_t dividend, uint32_t divisor) {
return (dividend + divisor - 1)/divisor;
}
uint32_t maxZeroSubtraction(uint32_t minuend, uint32_t subtrahend) {
if (minuend < subtrahend) {
return 0u;
}
return minuend-subtrahend;
}
uint32_t ceilLog(uint32_t base, uint32_t value) {
return std::ceil((long double)std::log(value)/(long double)std::log(base));
}
}
}
}

View File

@ -3,19 +3,41 @@
#define MESSMER_BLOBSTORE_IMPLEMENTATIONS_ONBLOCKS_UTILS_MATH_H_
#include <cstdint>
#include <cmath>
namespace blobstore {
namespace onblocks {
namespace utils {
uint32_t intPow(uint32_t base, uint32_t exponent);
uint32_t ceilDivision(uint32_t dividend, uint32_t divisor);
uint32_t maxZeroSubtraction(uint32_t minuend, uint32_t subtrahend);
uint32_t ceilLog(uint32_t base, uint32_t value);
template<typename INT_TYPE>
inline INT_TYPE intPow(INT_TYPE base, INT_TYPE exponent) {
INT_TYPE result = 1;
for(INT_TYPE i = 0; i < exponent; ++i) {
result *= base;
}
return result;
}
template<typename INT_TYPE>
inline INT_TYPE ceilDivision(INT_TYPE dividend, INT_TYPE divisor) {
return (dividend + divisor - 1)/divisor;
}
template<typename INT_TYPE>
inline INT_TYPE maxZeroSubtraction(INT_TYPE minuend, INT_TYPE subtrahend) {
if (minuend < subtrahend) {
return 0u;
}
return minuend-subtrahend;
}
template<typename INT_TYPE>
inline INT_TYPE ceilLog(INT_TYPE base, INT_TYPE value) {
return std::ceil((long double)std::log(value)/(long double)std::log(base));
}
}
}
}
#endif

View File

@ -0,0 +1,131 @@
#include <gtest/gtest.h>
#include <messmer/blockstore/implementations/inmemory/InMemoryBlockStore.h>
#include <messmer/blockstore/implementations/inmemory/InMemoryBlock.h>
#include <messmer/cpp-utils/data/DataFixture.h>
#include <messmer/cpp-utils/data/Data.h>
#include "../../../implementations/onblocks/BlobStoreOnBlocks.h"
#include "../../../implementations/onblocks/BlobOnBlocks.h"
using namespace blobstore;
using namespace blobstore::onblocks;
using cpputils::unique_ref;
using cpputils::make_unique_ref;
using cpputils::DataFixture;
using cpputils::Data;
using blockstore::inmemory::InMemoryBlockStore;
// Test cases, ensuring that big blobs (>4G) work (i.e. testing that we don't use any 32bit variables for blob size, etc.)
class BigBlobsTest : public ::testing::Test {
public:
static constexpr size_t BLOCKSIZE = 32 * 1024;
static constexpr uint64_t SMALL_BLOB_SIZE = UINT64_C(1024)*1024*1024*3.5; // 3.5 GB (<4GB)
static constexpr uint64_t LARGE_BLOB_SIZE = UINT64_C(1024)*1024*1024*4.5; // 4.5 GB (>4GB)
static constexpr uint64_t max_uint_32 = std::numeric_limits<uint32_t>::max();
static_assert(SMALL_BLOB_SIZE < max_uint_32, "LARGE_BLOB_SIZE should need 64bit or the test case is mute");
static_assert(LARGE_BLOB_SIZE > max_uint_32, "LARGE_BLOB_SIZE should need 64bit or the test case is mute");
unique_ref<BlobStore> blobStore = make_unique_ref<BlobStoreOnBlocks>(make_unique_ref<InMemoryBlockStore>(), BLOCKSIZE);
unique_ref<Blob> blob = blobStore->create();
};
constexpr size_t BigBlobsTest::BLOCKSIZE;
constexpr size_t BigBlobsTest::SMALL_BLOB_SIZE;
constexpr size_t BigBlobsTest::LARGE_BLOB_SIZE;
TEST_F(BigBlobsTest, Resize) {
//These operations are in one test case and not in many small ones, because it takes quite long to create a >4GB blob.
//Resize to >4GB
blob->resize(LARGE_BLOB_SIZE);
EXPECT_EQ(LARGE_BLOB_SIZE, blob->size());
//Grow while >4GB
blob->resize(LARGE_BLOB_SIZE + 1024);
EXPECT_EQ(LARGE_BLOB_SIZE + 1024, blob->size());
//Shrink while >4GB
blob->resize(LARGE_BLOB_SIZE);
EXPECT_EQ(LARGE_BLOB_SIZE, blob->size());
//Shrink to <4GB
blob->resize(SMALL_BLOB_SIZE);
EXPECT_EQ(SMALL_BLOB_SIZE, blob->size());
//Grow to >4GB
blob->resize(LARGE_BLOB_SIZE);
EXPECT_EQ(LARGE_BLOB_SIZE, blob->size());
//Flush >4GB blob
blob->flush();
//Destruct >4GB blob
auto key = blob->key();
cpputils::destruct(std::move(blob));
//Load >4GB blob
blob = blobStore->load(key).value();
//Remove >4GB blob
blobStore->remove(std::move(blob));
}
TEST_F(BigBlobsTest, GrowByWriting_Crossing4GBBorder) {
Data fixture = DataFixture::generate(2*(LARGE_BLOB_SIZE-SMALL_BLOB_SIZE));
blob->write(fixture.data(), SMALL_BLOB_SIZE, fixture.size());
EXPECT_EQ(LARGE_BLOB_SIZE+(LARGE_BLOB_SIZE-SMALL_BLOB_SIZE), blob->size());
Data loaded(fixture.size());
blob->read(loaded.data(), SMALL_BLOB_SIZE, loaded.size());
EXPECT_EQ(0, std::memcmp(loaded.data(), fixture.data(), loaded.size()));
}
TEST_F(BigBlobsTest, GrowByWriting_Outside4GBBorder_StartingSizeZero) {
Data fixture = DataFixture::generate(1024);
blob->write(fixture.data(), LARGE_BLOB_SIZE, fixture.size());
EXPECT_EQ(LARGE_BLOB_SIZE+1024, blob->size());
Data loaded(fixture.size());
blob->read(loaded.data(), LARGE_BLOB_SIZE, loaded.size());
EXPECT_EQ(0, std::memcmp(loaded.data(), fixture.data(), loaded.size()));
}
TEST_F(BigBlobsTest, GrowByWriting_Outside4GBBorder_StartingSizeOutside4GBBorder) {
blob->resize(LARGE_BLOB_SIZE);
Data fixture = DataFixture::generate(1024);
blob->write(fixture.data(), LARGE_BLOB_SIZE+1024, fixture.size());
EXPECT_EQ(LARGE_BLOB_SIZE+2048, blob->size());
Data loaded(fixture.size());
blob->read(loaded.data(), LARGE_BLOB_SIZE+1024, loaded.size());
EXPECT_EQ(0, std::memcmp(loaded.data(), fixture.data(), loaded.size()));
}
TEST_F(BigBlobsTest, ReadWriteAfterGrown_Crossing4GBBorder) {
blob->resize(LARGE_BLOB_SIZE+(LARGE_BLOB_SIZE-SMALL_BLOB_SIZE)+1024);
Data fixture = DataFixture::generate(2*(LARGE_BLOB_SIZE-SMALL_BLOB_SIZE));
blob->write(fixture.data(), SMALL_BLOB_SIZE, fixture.size());
EXPECT_EQ(LARGE_BLOB_SIZE+(LARGE_BLOB_SIZE-SMALL_BLOB_SIZE)+1024, blob->size());
Data loaded(fixture.size());
blob->read(loaded.data(), SMALL_BLOB_SIZE, loaded.size());
EXPECT_EQ(0, std::memcmp(loaded.data(), fixture.data(), loaded.size()));
}
TEST_F(BigBlobsTest, ReadWriteAfterGrown_Outside4GBBorder) {
blob->resize(LARGE_BLOB_SIZE+2048);
Data fixture = DataFixture::generate(1024);
blob->write(fixture.data(), LARGE_BLOB_SIZE, fixture.size());
EXPECT_EQ(LARGE_BLOB_SIZE+2048, blob->size());
Data loaded(fixture.size());
blob->read(loaded.data(), LARGE_BLOB_SIZE, loaded.size());
EXPECT_EQ(0, std::memcmp(loaded.data(), fixture.data(), loaded.size()));
}
//TODO Test Blob::readAll (only on 64bit systems)

View File

@ -24,7 +24,7 @@ TEST_F(DataTreeTest_NumStoredBytes, CreatedTreeIsEmpty) {
class DataTreeTest_NumStoredBytes_P: public DataTreeTest_NumStoredBytes, public WithParamInterface<uint32_t> {};
INSTANTIATE_TEST_CASE_P(EmptyLastLeaf, DataTreeTest_NumStoredBytes_P, Values(0u));
INSTANTIATE_TEST_CASE_P(HalfFullLastLeaf, DataTreeTest_NumStoredBytes_P, Values(5u, 10u));
INSTANTIATE_TEST_CASE_P(FullLastLeaf, DataTreeTest_NumStoredBytes_P, Values(DataNodeLayout(DataTreeTest_NumStoredBytes::BLOCKSIZE_BYTES).maxBytesPerLeaf()));
INSTANTIATE_TEST_CASE_P(FullLastLeaf, DataTreeTest_NumStoredBytes_P, Values((uint32_t)DataNodeLayout(DataTreeTest_NumStoredBytes::BLOCKSIZE_BYTES).maxBytesPerLeaf()));
TEST_P(DataTreeTest_NumStoredBytes_P, SingleLeaf) {
Key key = CreateLeafWithSize(GetParam())->key();

View File

@ -189,7 +189,7 @@ TEST_P(DataTreeTest_ResizeByTraversing_P, KeyDoesntChange) {
}
TEST_P(DataTreeTest_ResizeByTraversing_P, DataStaysIntact) {
uint32_t oldNumberOfLeaves = std::max(1u, ceilDivision(tree->numStoredBytes(), nodeStore->layout().maxBytesPerLeaf()));
uint32_t oldNumberOfLeaves = std::max(UINT64_C(1), ceilDivision(tree->numStoredBytes(), (uint64_t)nodeStore->layout().maxBytesPerLeaf()));
TwoLevelDataFixture data(nodeStore, TwoLevelDataFixture::SizePolicy::Unchanged);
Key key = tree->key();
cpputils::destruct(std::move(tree));

View File

@ -191,7 +191,7 @@ TEST_P(DataTreeTest_ResizeNumBytes_P, KeyDoesntChange) {
}
TEST_P(DataTreeTest_ResizeNumBytes_P, DataStaysIntact) {
uint32_t oldNumberOfLeaves = std::max(1u, ceilDivision(tree->numStoredBytes(), nodeStore->layout().maxBytesPerLeaf()));
uint32_t oldNumberOfLeaves = std::max(UINT64_C(1), ceilDivision(tree->numStoredBytes(), (uint64_t)nodeStore->layout().maxBytesPerLeaf()));
TwoLevelDataFixture data(nodeStore, TwoLevelDataFixture::SizePolicy::Unchanged);
Key key = tree->key();
cpputils::destruct(std::move(tree));

View File

@ -80,3 +80,9 @@ TEST_F(CeilDivisionTest, Divide5_5) {
TEST_F(CeilDivisionTest, DivideLargeByItself) {
EXPECT_EQ(1, ceilDivision(183495303, 183495303));
}
TEST_F(CeilDivisionTest, 64bit) {
uint64_t base = UINT64_C(1024)*1024*1024*1024;
EXPECT_GT(base, std::numeric_limits<uint32_t>::max());
EXPECT_EQ(base/1024, ceilDivision(base, (uint64_t)1024));
}

View File

@ -29,4 +29,11 @@ TEST_F(CeilLogTest, Log3_4) {
EXPECT_EQ(2, ceilLog(3, 4));
}
TEST_F(CeilLogTest, 64bit) {
uint64_t value = UINT64_C(1024)*1024*1024*1024;
EXPECT_GT(value, std::numeric_limits<uint32_t>::max());
EXPECT_EQ(4, ceilLog((uint64_t)1024, value));
}
//TODO ...

View File

@ -70,3 +70,8 @@ TEST_F(IntPowTest, ArbitraryNumbers3) {
EXPECT_EQ(282475249, intPow(7, 10));
}
TEST_F(IntPowTest, 64bit) {
uint64_t value = UINT64_C(1024)*1024*1024*1024;
EXPECT_GT(value, std::numeric_limits<uint32_t>::max());
EXPECT_EQ(value*value*value, intPow(value, (uint64_t)3));
}

View File

@ -38,7 +38,7 @@ TEST_F(MaxZeroSubtractionTest, SubtractPositive2) {
}
TEST_F(MaxZeroSubtractionTest, SubtractPositive3) {
EXPECT_EQ(numeric_limits<uint32_t>::max()-1, maxZeroSubtraction(numeric_limits<uint32_t>::max(), 1));
EXPECT_EQ(numeric_limits<uint32_t>::max()-1, maxZeroSubtraction(numeric_limits<uint32_t>::max(), UINT32_C(1)));
}
TEST_F(MaxZeroSubtractionTest, SubtractPositive4) {
@ -62,7 +62,7 @@ TEST_F(MaxZeroSubtractionTest, SubtractNegative4) {
}
TEST_F(MaxZeroSubtractionTest, SubtractNegative5) {
EXPECT_EQ(0, maxZeroSubtraction(5, numeric_limits<uint32_t>::max()));
EXPECT_EQ(0, maxZeroSubtraction(UINT32_C(5), numeric_limits<uint32_t>::max()));
}
TEST_F(MaxZeroSubtractionTest, SubtractFromZero1) {
@ -74,5 +74,17 @@ TEST_F(MaxZeroSubtractionTest, SubtractFromZero2) {
}
TEST_F(MaxZeroSubtractionTest, SubtractFromZero3) {
EXPECT_EQ(0, maxZeroSubtraction(0, numeric_limits<uint32_t>::max()));
EXPECT_EQ(0, maxZeroSubtraction(UINT32_C(0), numeric_limits<uint32_t>::max()));
}
TEST_F(MaxZeroSubtractionTest, 64bit_valid) {
uint64_t value = UINT64_C(1024)*1024*1024*1024;
EXPECT_GT(value, std::numeric_limits<uint32_t>::max());
EXPECT_EQ(value*1024-value, maxZeroSubtraction(value*1024, value));
}
TEST_F(MaxZeroSubtractionTest, 64bit_zero) {
uint64_t value = UINT64_C(1024)*1024*1024*1024;
EXPECT_GT(value, std::numeric_limits<uint32_t>::max());
EXPECT_EQ(0, maxZeroSubtraction(value, value*1024));
}