Fixed Cache race condition and added test for it

This commit is contained in:
Sebastian Messmer 2015-10-01 01:18:05 +02:00
parent c9f07762a4
commit 90471ea6a3
4 changed files with 81 additions and 9 deletions

View File

@ -6,7 +6,8 @@
#include "QueueMap.h"
#include "PeriodicTask.h"
#include <memory>
#include <mutex>
//TODO Replace with C++14 once std::shared_mutex is supported
#include <boost/thread/shared_mutex.hpp>
#include <boost/optional.hpp>
#include <future>
#include <messmer/cpp-utils/assert/assert.h>
@ -32,10 +33,10 @@ public:
private:
void _popOldEntriesParallel();
void _popOldEntries();
boost::optional<Value> _popOldEntry();
boost::optional<Value> _popOldEntry(boost::upgrade_lock<boost::shared_mutex> *lock);
static void _destructElementsInParallel(std::vector<CacheEntry<Key, Value>> *list);
mutable std::mutex _mutex;
mutable boost::shared_mutex _mutex;
QueueMap<Key, CacheEntry<Key, Value>> _cachedBlocks;
std::unique_ptr<PeriodicTask> _timeoutFlusher;
};
@ -58,7 +59,7 @@ Cache<Key, Value>::~Cache() {
template<class Key, class Value>
boost::optional<Value> Cache<Key, Value>::pop(const Key &key) {
std::lock_guard<std::mutex> lock(_mutex);
boost::unique_lock<boost::shared_mutex> lock(_mutex);
auto found = _cachedBlocks.pop(key);
if (!found) {
return boost::none;
@ -68,7 +69,7 @@ boost::optional<Value> Cache<Key, Value>::pop(const Key &key) {
template<class Key, class Value>
void Cache<Key, Value>::push(const Key &key, Value value) {
std::lock_guard<std::mutex> lock(_mutex);
boost::unique_lock<boost::shared_mutex> lock(_mutex);
ASSERT(_cachedBlocks.size() <= MAX_ENTRIES, "Cache too full");
if (_cachedBlocks.size() == MAX_ENTRIES) {
_cachedBlocks.pop();
@ -96,15 +97,25 @@ void Cache<Key, Value>::_popOldEntries() {
// This function can be called in parallel by multiple threads and will then cause the Value destructors
// to be called in parallel. The call to _popOldEntry() is synchronized to avoid race conditions,
// but the Value destructor is called in this function which is not synchronized.
boost::optional<Value> oldEntry = _popOldEntry();
// The shared upgrade_lock in here takes care that no push() or pop() operation is running while old entries are deleted.
// This would cause race conditions because pop() could return none before the destructor of the deleted element
// has finished running. Since the destructor of a cached newly created block creates it in the base block store,
// there would be a state where the block can neither be found by a pop() in the cache, nor in the base store.
// so CachingBlockStore would return that the block doesn't exist.
// The shared lock is then upgraded to a unique lock in _popOldEntry, so that only one thread can work on the
// _cachedBlocks vector at the same time.
// There is a regression test case for this: CacheTest_RaceCondition:PopBlocksWhileRequestedElementIsThrownOut.
boost::upgrade_lock<boost::shared_mutex> lock(_mutex);
boost::optional<Value> oldEntry = _popOldEntry(&lock);
while (oldEntry != boost::none) {
oldEntry = _popOldEntry();
oldEntry = _popOldEntry(&lock);
}
}
template<class Key, class Value>
boost::optional<Value> Cache<Key, Value>::_popOldEntry() {
std::lock_guard<std::mutex> lock(_mutex);
boost::optional<Value> Cache<Key, Value>::_popOldEntry(boost::upgrade_lock<boost::shared_mutex> *lock) {
boost::upgrade_to_unique_lock<boost::shared_mutex> exclusiveLock(*lock);
if (_cachedBlocks.size() > 0 && _cachedBlocks.peek()->ageSeconds() > PURGE_LIFETIME_SEC) {
return _cachedBlocks.pop()->releaseValue();
} else {

View File

@ -0,0 +1,44 @@
#include "testutils/CacheTest.h"
#include <chrono>
#include <thread>
#include <messmer/cpp-utils/pointer/unique_ref.h>
#include <messmer/cpp-utils/lock/ConditionBarrier.h>
using namespace std::chrono_literals;
using namespace blockstore::caching;
using std::string;
using cpputils::unique_ref;
using cpputils::make_unique_ref;
using cpputils::ConditionBarrier;
class ObjectWithLongDestructor {
public:
ObjectWithLongDestructor(ConditionBarrier *onDestructorStarted, bool *destructorFinished)
: _onDestructorStarted(onDestructorStarted), _destructorFinished(destructorFinished) {}
~ObjectWithLongDestructor() {
_onDestructorStarted->release();
std::this_thread::sleep_for(1s);
*_destructorFinished = true;
}
private:
ConditionBarrier *_onDestructorStarted;
bool *_destructorFinished;
};
// Regression test for a race condition.
// An element could be in the process of being thrown out of the cache and while the destructor is running, another
// thread calls pop() for the element and gets none returned. But since the destructor isn't finished yet, the data from
// the cache element also isn't completely written back yet and an application loading it runs into a race condition.
TEST(CacheTest_RaceCondition, PopBlocksWhileRequestedElementIsThrownOut) {
ConditionBarrier destructorStarted;
bool destructorFinished;
auto obj = make_unique_ref<ObjectWithLongDestructor>(&destructorStarted, &destructorFinished);
Cache<int, unique_ref<ObjectWithLongDestructor>> cache;
cache.push(2, std::move(obj));
destructorStarted.wait();
EXPECT_FALSE(destructorFinished);
cache.pop(2);
EXPECT_TRUE(destructorFinished);
}

View File

@ -9,9 +9,19 @@ public:
CopyableMovableValueType(const CopyableMovableValueType &rhs): CopyableMovableValueType(rhs._value) {
++numCopyConstructorCalled;
}
CopyableMovableValueType &operator=(const CopyableMovableValueType &rhs) {
_value = rhs._value;
++numCopyConstructorCalled;
return *this;
}
CopyableMovableValueType(CopyableMovableValueType &&rhs): CopyableMovableValueType(rhs._value) {
//Don't increase numCopyConstructorCalled
}
CopyableMovableValueType &operator=(CopyableMovableValueType &&rhs) {
//Don't increase numCopyConstructorCalled
_value = rhs._value;
return *this;
}
int value() const {
return _value;
}

View File

@ -19,6 +19,13 @@ public:
rhs._isMoved = true;
}
MinimalValueType &operator=(MinimalValueType &&rhs) {
_value = rhs.value();
_isMoved = false;
rhs._isMoved = true;
return *this;
}
~MinimalValueType() {
ASSERT(!_isDestructed, "Object was already destructed before");
--instances;