472 lines
17 KiB
Python
Raw Normal View History

2022-06-24 17:14:37 +02:00
import asyncio
import contextlib
from concurrent.futures import ALL_COMPLETED
from async_timeout import timeout as async_timeout
from ..log import sentinel_logger
from ..pubsub import Receiver
from ..pool import create_pool, ConnectionsPool
from ..errors import (
MasterNotFoundError,
SlaveNotFoundError,
PoolClosedError,
RedisError,
MasterReplyError,
SlaveReplyError,
)
from ..util import CloseEvent
# Address marker for discovery
_NON_DISCOVERED = object()
_logger = sentinel_logger.getChild('monitor')
async def create_sentinel_pool(sentinels, *, db=None, password=None,
encoding=None, minsize=1, maxsize=10,
ssl=None, parser=None, timeout=0.2, loop=None):
"""Create SentinelPool."""
# FIXME: revise default timeout value
assert isinstance(sentinels, (list, tuple)), sentinels
# TODO: deprecation note
# if loop is None:
# loop = asyncio.get_event_loop()
pool = SentinelPool(sentinels, db=db,
password=password,
ssl=ssl,
encoding=encoding,
parser=parser,
minsize=minsize,
maxsize=maxsize,
timeout=timeout,
loop=loop)
await pool.discover()
return pool
class SentinelPool:
"""Sentinel connections pool.
Holds connection pools to known and discovered (TBD) Sentinels
as well as services' connections.
"""
def __init__(self, sentinels, *, db=None, password=None, ssl=None,
encoding=None, parser=None, minsize, maxsize, timeout,
loop=None):
# TODO: deprecation note
# if loop is None:
# loop = asyncio.get_event_loop()
# TODO: add connection/discover timeouts;
# and what to do if no master is found:
# (raise error or try forever or try until timeout)
# XXX: _sentinels is unordered
self._sentinels = set(sentinels)
self._timeout = timeout
self._pools = [] # list of sentinel pools
self._masters = {}
self._slaves = {}
self._parser_class = parser
self._redis_db = db
self._redis_password = password
self._redis_ssl = ssl
self._redis_encoding = encoding
self._redis_minsize = minsize
self._redis_maxsize = maxsize
self._close_state = CloseEvent(self._do_close)
self._close_waiter = None
self._monitor = monitor = Receiver()
async def echo_events():
try:
while await monitor.wait_message():
_, (ev, data) = await monitor.get(encoding='utf-8')
ev = ev.decode('utf-8')
_logger.debug("%s: %s", ev, data)
if ev in ('+odown',):
typ, name, *tail = data.split(' ')
if typ == 'master':
self._need_rediscover(name)
# TODO: parse messages;
# watch +new-epoch which signals `failover in progres`
# freeze reconnection
# wait / discover new master (find proper way)
# unfreeze reconnection
#
# discover master in default way
# get-master-addr...
# connnect
# role
# etc...
except asyncio.CancelledError:
pass
self._monitor_task = asyncio.ensure_future(echo_events())
@property
def discover_timeout(self):
"""Timeout (seconds) for Redis/Sentinel command calls during
master/slave address discovery.
"""
return self._timeout
def master_for(self, service):
"""Returns wrapper to master's pool for requested service."""
# TODO: make it coroutine and connect minsize connections
if service not in self._masters:
self._masters[service] = ManagedPool(
self, service, is_master=True,
db=self._redis_db,
password=self._redis_password,
encoding=self._redis_encoding,
minsize=self._redis_minsize,
maxsize=self._redis_maxsize,
ssl=self._redis_ssl,
parser=self._parser_class,
)
return self._masters[service]
def slave_for(self, service):
"""Returns wrapper to slave's pool for requested service."""
# TODO: make it coroutine and connect minsize connections
if service not in self._slaves:
self._slaves[service] = ManagedPool(
self, service, is_master=False,
db=self._redis_db,
password=self._redis_password,
encoding=self._redis_encoding,
minsize=self._redis_minsize,
maxsize=self._redis_maxsize,
ssl=self._redis_ssl,
parser=self._parser_class,
)
return self._slaves[service]
def execute(self, command, *args, **kwargs):
"""Execute sentinel command."""
# TODO: choose pool
# kwargs can be used to control which sentinel to use
if self.closed:
raise PoolClosedError("Sentinel pool is closed")
for pool in self._pools:
return pool.execute(command, *args, **kwargs)
# how to handle errors and pick other pool?
# is the only way to make it coroutine?
@property
def closed(self):
"""True if pool is closed or closing."""
return self._close_state.is_set()
def close(self):
"""Close all controlled connections (both sentinel and redis)."""
if not self._close_state.is_set():
self._close_state.set()
async def _do_close(self):
# TODO: lock
tasks = []
task, self._monitor_task = self._monitor_task, None
task.cancel()
tasks.append(task)
while self._pools:
pool = self._pools.pop(0)
pool.close()
tasks.append(pool.wait_closed())
while self._masters:
_, pool = self._masters.popitem()
pool.close()
tasks.append(pool.wait_closed())
while self._slaves:
_, pool = self._slaves.popitem()
pool.close()
tasks.append(pool.wait_closed())
await asyncio.gather(*tasks)
async def wait_closed(self):
"""Wait until pool gets closed."""
await self._close_state.wait()
async def discover(self, timeout=None): # TODO: better name?
"""Discover sentinels and all monitored services within given timeout.
If no sentinels discovered within timeout: TimeoutError is raised.
If some sentinels were discovered but not all it is ok.
If not all monitored services (masters/slaves) discovered
(or connections established) it is ok.
TBD: what if some sentinels/services unreachable;
"""
# TODO: check not closed
# TODO: discovery must be done with some customizable timeout.
if timeout is None:
timeout = self.discover_timeout
tasks = []
pools = []
for addr in self._sentinels: # iterate over unordered set
tasks.append(self._connect_sentinel(addr, timeout, pools))
done, pending = await asyncio.wait(tasks,
return_when=ALL_COMPLETED)
assert not pending, ("Expected all tasks to complete", done, pending)
for task in done:
result = task.result()
if isinstance(result, Exception):
continue # FIXME
if not pools:
raise Exception("Could not connect to any sentinel")
pools, self._pools[:] = self._pools[:], pools
# TODO: close current connections
for pool in pools:
pool.close()
await pool.wait_closed()
# TODO: discover peer sentinels
for pool in self._pools:
await pool.execute_pubsub(
b'psubscribe', self._monitor.pattern('*'))
async def _connect_sentinel(self, address, timeout, pools):
"""Try to connect to specified Sentinel returning either
connections pool or exception.
"""
try:
with async_timeout(timeout):
pool = await create_pool(
address, minsize=1, maxsize=2,
parser=self._parser_class,
)
pools.append(pool)
return pool
except asyncio.TimeoutError as err:
sentinel_logger.debug(
"Failed to connect to Sentinel(%r) within %ss timeout",
address, timeout)
return err
except Exception as err:
sentinel_logger.debug(
"Error connecting to Sentinel(%r): %r", address, err)
return err
async def discover_master(self, service, timeout):
"""Perform Master discovery for specified service."""
# TODO: get lock
idle_timeout = timeout
# FIXME: single timeout used 4 times;
# meaning discovery can take up to:
# 3 * timeout * (sentinels count)
#
# having one global timeout also can leed to
# a problem when not all sentinels are checked.
# use a copy, cause pools can change
pools = self._pools[:]
for sentinel in pools:
try:
with async_timeout(timeout):
address = await self._get_masters_address(
sentinel, service)
pool = self._masters[service]
with async_timeout(timeout), \
contextlib.ExitStack() as stack:
conn = await pool._create_new_connection(address)
stack.callback(conn.close)
await self._verify_service_role(conn, 'master')
stack.pop_all()
return conn
except asyncio.CancelledError:
# we must correctly handle CancelledError(s):
# application may be stopped or function can be cancelled
# by outer timeout, so we must stop the look up.
raise
except asyncio.TimeoutError:
continue
except DiscoverError as err:
sentinel_logger.debug("DiscoverError(%r, %s): %r",
sentinel, service, err)
await asyncio.sleep(idle_timeout)
continue
except RedisError as err:
raise MasterReplyError("Service {} error".format(service), err)
except Exception:
# TODO: clear (drop) connections to schedule reconnect
await asyncio.sleep(idle_timeout)
continue
# Otherwise
raise MasterNotFoundError("No master found for {}".format(service))
async def discover_slave(self, service, timeout, **kwargs):
"""Perform Slave discovery for specified service."""
# TODO: use kwargs to change how slaves are picked up
# (eg: round-robin, priority, random, etc)
idle_timeout = timeout
pools = self._pools[:]
for sentinel in pools:
try:
with async_timeout(timeout):
address = await self._get_slave_address(
sentinel, service) # add **kwargs
pool = self._slaves[service]
with async_timeout(timeout), \
contextlib.ExitStack() as stack:
conn = await pool._create_new_connection(address)
stack.callback(conn.close)
await self._verify_service_role(conn, 'slave')
stack.pop_all()
return conn
except asyncio.CancelledError:
raise
except asyncio.TimeoutError:
continue
except DiscoverError:
await asyncio.sleep(idle_timeout)
continue
except RedisError as err:
raise SlaveReplyError("Service {} error".format(service), err)
except Exception:
await asyncio.sleep(idle_timeout)
continue
raise SlaveNotFoundError("No slave found for {}".format(service))
async def _get_masters_address(self, sentinel, service):
# NOTE: we don't use `get-master-addr-by-name`
# as it can provide stale data so we repeat
# after redis-py and check service flags.
state = await sentinel.execute(b'sentinel', b'master',
service, encoding='utf-8')
if not state:
raise UnknownService()
state = make_dict(state)
address = state['ip'], int(state['port'])
flags = set(state['flags'].split(','))
if {'s_down', 'o_down', 'disconnected'} & flags:
raise BadState(state)
return address
async def _get_slave_address(self, sentinel, service):
# Find and return single slave address
slaves = await sentinel.execute(b'sentinel', b'slaves',
service, encoding='utf-8')
if not slaves:
raise UnknownService()
for state in map(make_dict, slaves):
address = state['ip'], int(state['port'])
flags = set(state['flags'].split(','))
if {'s_down', 'o_down', 'disconnected'} & flags:
continue
return address
raise BadState() # XXX: only last state
async def _verify_service_role(self, conn, role):
res = await conn.execute(b'role', encoding='utf-8')
if res[0] != role:
raise RoleMismatch(res)
def _need_rediscover(self, service):
sentinel_logger.debug("Must redisover service %s", service)
pool = self._masters.get(service)
if pool:
pool.need_rediscover()
pool = self._slaves.get(service)
if pool:
pool.need_rediscover()
class ManagedPool(ConnectionsPool):
def __init__(self, sentinel, service, is_master,
db=None, password=None, encoding=None, parser=None,
*, minsize, maxsize, ssl=None, loop=None):
super().__init__(_NON_DISCOVERED,
db=db, password=password, encoding=encoding,
minsize=minsize, maxsize=maxsize, ssl=ssl,
parser=parser, loop=loop)
assert self._address is _NON_DISCOVERED
self._sentinel = sentinel
self._service = service
self._is_master = is_master
# self._discover_timeout = .2
@property
def address(self):
if self._address is _NON_DISCOVERED:
return
return self._address
def get_connection(self, command, args=()):
if self._address is _NON_DISCOVERED:
return None, _NON_DISCOVERED
return super().get_connection(command, args)
async def _create_new_connection(self, address):
if address is _NON_DISCOVERED:
# Perform service discovery.
# Returns Connection or raises error if no service can be found.
await self._do_clear() # make `clear` blocking
if self._is_master:
conn = await self._sentinel.discover_master(
self._service, timeout=self._sentinel.discover_timeout)
else:
conn = await self._sentinel.discover_slave(
self._service, timeout=self._sentinel.discover_timeout)
self._address = conn.address
sentinel_logger.debug("Discoverred new address %r for %s",
conn.address, self._service)
return conn
return await super()._create_new_connection(address)
def _drop_closed(self):
diff = len(self._pool)
super()._drop_closed()
diff -= len(self._pool)
if diff:
# closed connections were in pool:
# * reset address;
# * notify sentinel pool
sentinel_logger.debug(
"Dropped %d closed connnection(s); must rediscover", diff)
self._sentinel._need_rediscover(self._service)
async def acquire(self, command=None, args=()):
if self._address is _NON_DISCOVERED:
await self.clear()
return await super().acquire(command, args)
def release(self, conn):
was_closed = conn.closed
super().release(conn)
# if connection was closed while used and not by release()
if was_closed:
sentinel_logger.debug(
"Released closed connection; must rediscover")
self._sentinel._need_rediscover(self._service)
def need_rediscover(self):
self._address = _NON_DISCOVERED
def make_dict(plain_list):
it = iter(plain_list)
return dict(zip(it, it))
class DiscoverError(Exception):
"""Internal errors for masters/slaves discovery."""
class BadState(DiscoverError):
"""Bad master's / slave's state read from sentinel."""
class UnknownService(DiscoverError):
"""Service is not monitored by specific sentinel."""
class RoleMismatch(DiscoverError):
"""Service reported to have other Role."""