2021-07-05 20:49:49 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2022-11-09 16:01:28 +01:00
|
|
|
"""A simple module to get the RDAP server for a given domain name, IP
|
|
|
|
prefix or object, from the IANA databases specified in RFC 9224/8521.
|
2021-07-05 20:49:49 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
# http://python-requests.org/ for easier HTTPS retrieval
|
|
|
|
import requests
|
|
|
|
|
|
|
|
import datetime
|
|
|
|
import json
|
|
|
|
import os
|
|
|
|
import sys
|
2021-07-07 15:23:26 +02:00
|
|
|
import time
|
2021-07-07 11:13:36 +02:00
|
|
|
import fcntl
|
2021-07-07 15:50:47 +02:00
|
|
|
import pickle
|
2022-10-08 16:59:11 +02:00
|
|
|
import pathlib
|
2021-07-05 20:49:49 +02:00
|
|
|
|
2022-11-09 16:01:28 +01:00
|
|
|
IANABASES = {"domains": "https://data.iana.org/rdap/dns.json",
|
|
|
|
"v4prefixes": "https://data.iana.org/rdap/ipv4.json",
|
|
|
|
"v6prefixes": "https://data.iana.org/rdap/ipv6.json",
|
|
|
|
"as": "https://data.iana.org/rdap/asn.json",
|
|
|
|
"objects": "https://data.iana.org/rdap/object-tags.json"}
|
2022-12-18 19:09:49 +01:00
|
|
|
CACHE = os.environ["XDG_CACHE_HOME"] + "/ianardap" if "XDG_CACHE_HOME" in os.environ else \
|
|
|
|
(os.environ["HOME"] + "/.ianardapcaches" if "HOME" in os.environ else None)
|
2022-10-08 16:59:11 +02:00
|
|
|
MAXAGE = 24 # Hours. Used only if the server no longer gives the information.
|
2021-07-07 12:08:37 +02:00
|
|
|
IANATIMEOUT = 10 # Seconds
|
2021-07-07 15:23:26 +02:00
|
|
|
MAXTESTS = 3 # Maximum attempts to get the database
|
2021-07-05 20:49:49 +02:00
|
|
|
|
2022-10-08 16:59:11 +02:00
|
|
|
# Don't touch
|
|
|
|
HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S %Z"
|
|
|
|
|
|
|
|
# RFC 9111, section 5.2
|
|
|
|
def parse_cachecontrol(h):
|
|
|
|
result = {}
|
|
|
|
directives = h.split(",")
|
|
|
|
for directive in directives:
|
|
|
|
directive = directive.strip()
|
|
|
|
if "=" in directive:
|
|
|
|
(key, value) = directive.split("=")
|
|
|
|
else:
|
|
|
|
key = directive
|
|
|
|
value = None
|
|
|
|
result[key.lower()] = value
|
|
|
|
return result
|
|
|
|
|
|
|
|
def parse_expires(h):
|
|
|
|
d = datetime.datetime.strptime(h, HTTP_DATE_FORMAT)
|
|
|
|
return d
|
|
|
|
|
2021-07-05 20:49:49 +02:00
|
|
|
class IanaRDAPDatabase():
|
|
|
|
|
2022-11-09 16:17:03 +01:00
|
|
|
def __init__(self, category="domains", maxage=None, cachedir=CACHE,
|
2022-11-09 16:01:28 +01:00
|
|
|
pickleformat=False):
|
2021-07-07 15:50:47 +02:00
|
|
|
"""Retrieves the IANA database, if not already cached. maxage is in
|
2022-11-09 16:01:28 +01:00
|
|
|
hours. The cachedir is a directory (it will be created if not already
|
|
|
|
existant). pickleformat is not the default because it is not really
|
|
|
|
faster *and* it introduces security risks if someone can write in the
|
|
|
|
file (see the documentation of the module).
|
2022-11-09 16:17:03 +01:00
|
|
|
|
2022-11-09 16:01:28 +01:00
|
|
|
"""
|
|
|
|
|
2021-07-05 20:49:49 +02:00
|
|
|
cache_valid = False
|
2022-11-09 16:01:28 +01:00
|
|
|
self.category = category
|
2022-12-18 19:09:49 +01:00
|
|
|
self.cachefile = None
|
|
|
|
self.lockname = None
|
|
|
|
self.expirationfile = None
|
|
|
|
if cachedir:
|
|
|
|
if not os.path.exists(cachedir):
|
|
|
|
os.mkdir(cachedir)
|
|
|
|
cachefile = os.path.join(cachedir, category)
|
|
|
|
if pickleformat:
|
|
|
|
self.cachefile = cachefile + ".pickle"
|
|
|
|
else:
|
|
|
|
self.cachefile = cachefile + ".json"
|
|
|
|
self.lockname = self.cachefile + ".lock"
|
|
|
|
self.expirationfile = self.cachefile + ".expires"
|
2022-11-09 16:17:03 +01:00
|
|
|
if maxage is not None:
|
2022-12-18 19:09:49 +01:00
|
|
|
self.expirationtime = time.mktime((datetime.datetime.now() + \
|
|
|
|
datetime.timedelta(hours=maxage)).timetuple())
|
|
|
|
if self.expirationfile:
|
|
|
|
with open(self.expirationfile, 'w'):
|
|
|
|
os.utime(self.expirationfile,
|
|
|
|
times = (self.expirationtime, self.expirationtime))
|
2021-07-07 15:23:26 +02:00
|
|
|
loaded = False
|
|
|
|
tests = 0
|
|
|
|
errmsg = "No error"
|
|
|
|
while not loaded and tests < MAXTESTS:
|
|
|
|
self.lock()
|
2022-12-18 19:09:49 +01:00
|
|
|
if self.cachefile and os.path.exists(self.cachefile) and \
|
2022-10-08 16:59:11 +02:00
|
|
|
(pathlib.Path(self.expirationfile).exists() and \
|
|
|
|
datetime.datetime.fromtimestamp(os.path.getmtime(self.expirationfile)) > \
|
|
|
|
datetime.datetime.now()):
|
2021-07-07 15:50:47 +02:00
|
|
|
cache = open(self.cachefile, "rb")
|
2021-07-07 15:23:26 +02:00
|
|
|
content = cache.read()
|
|
|
|
cache.close()
|
|
|
|
self.unlock()
|
2021-07-07 15:50:47 +02:00
|
|
|
if pickleformat:
|
|
|
|
try:
|
|
|
|
database = pickle.loads(content)
|
|
|
|
loaded = True
|
|
|
|
self.retrieved = datetime.datetime.fromtimestamp(os.path.getmtime(self.cachefile))
|
|
|
|
cache_valid = True
|
|
|
|
except (pickle.UnpicklingError, EOFError):
|
|
|
|
tests += 1
|
|
|
|
errmsg = "Invalid pickle content in %s" % self.cachefile
|
|
|
|
# Delete it without mercy
|
|
|
|
os.remove(self.cachefile)
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
database = json.loads(content)
|
|
|
|
loaded = True
|
|
|
|
self.retrieved = datetime.datetime.fromtimestamp(os.path.getmtime(self.cachefile))
|
|
|
|
cache_valid = True
|
|
|
|
except json.decoder.JSONDecodeError:
|
|
|
|
tests += 1
|
|
|
|
errmsg = "Invalid JSON content in %s" % self.cachefile
|
|
|
|
# Delete it without mercy
|
|
|
|
os.remove(self.cachefile)
|
|
|
|
continue
|
2021-07-07 15:23:26 +02:00
|
|
|
else:
|
|
|
|
self.unlock()
|
2022-11-09 16:01:28 +01:00
|
|
|
response = requests.get(IANABASES[category], timeout=IANATIMEOUT)
|
2022-10-08 16:59:11 +02:00
|
|
|
expirationtime = None
|
|
|
|
if "cache-control" in response.headers:
|
|
|
|
directives = parse_cachecontrol(response.headers["cache-control"])
|
|
|
|
if "max-age" in directives:
|
|
|
|
maxage = int(directives["max-age"])
|
|
|
|
expirationtime = datetime.datetime.now() + datetime.timedelta(seconds=maxage)
|
|
|
|
if not expirationtime:
|
|
|
|
if "expires" in response.headers:
|
|
|
|
expirationtime = parse_expires(response.headers["expires"])
|
|
|
|
else:
|
|
|
|
expirationtime = datetime.datetime.now() + datetime.timedelta(hours=MAXAGE)
|
|
|
|
self.expirationtime = time.mktime(expirationtime.timetuple())
|
2021-07-07 15:23:26 +02:00
|
|
|
if response.status_code != 200:
|
|
|
|
time.sleep(2)
|
|
|
|
tests += 1
|
|
|
|
errmsg = "Invalid HTTPS return code when trying to get %s: %s" % (IANABASE, response.status_code)
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
loaded = True
|
|
|
|
self.retrieved = datetime.datetime.now()
|
|
|
|
try:
|
|
|
|
content = response.content
|
|
|
|
database = json.loads(content)
|
2022-12-18 19:09:49 +01:00
|
|
|
if self.expirationfile:
|
|
|
|
with open(self.expirationfile, 'w'):
|
|
|
|
os.utime(self.expirationfile,
|
|
|
|
times = (self.expirationtime, self.expirationtime))
|
2021-07-07 15:23:26 +02:00
|
|
|
except json.decoder.JSONDecodeError:
|
|
|
|
tests += 1
|
|
|
|
errmsg = "Invalid JSON retrieved from %s" % IANABASE
|
|
|
|
continue
|
|
|
|
if not loaded:
|
|
|
|
raise Exception("Cannot read IANA database: %s" % errmsg)
|
2021-07-05 20:49:49 +02:00
|
|
|
self.description = database["description"]
|
|
|
|
self.publication = database["publication"]
|
|
|
|
self.version = database["version"]
|
|
|
|
self.services = {}
|
2022-11-09 16:01:28 +01:00
|
|
|
if self.category == "domains":
|
|
|
|
for service in database["services"]:
|
|
|
|
for tld in service[0]:
|
|
|
|
if tld.lower() not in self.services:
|
|
|
|
self.services[tld.lower()] =[]
|
|
|
|
for server in service[1]:
|
|
|
|
# server is an URL so case-sensitive.
|
|
|
|
self.services[tld.lower()].append(server)
|
|
|
|
elif self.category == "objects":
|
|
|
|
for service in database["services"]:
|
|
|
|
maintainer = service[0]
|
|
|
|
for registry in service[1]:
|
|
|
|
if registry.upper() not in self.services:
|
|
|
|
self.services[registry.upper()] =[]
|
|
|
|
for server in service[2]:
|
|
|
|
self.services[registry.upper()]
|
|
|
|
for server in service[2]:
|
|
|
|
self.services[registry.upper()].append(server)
|
|
|
|
else: # IP addresses will be complicated, because of the
|
|
|
|
# longest prefix rule.
|
|
|
|
raise Exception("Unsupported category %s" % self.category)
|
2022-12-18 19:09:49 +01:00
|
|
|
if self.cachefile and not cache_valid:
|
2021-07-07 11:13:36 +02:00
|
|
|
self.lock()
|
2021-07-07 15:50:47 +02:00
|
|
|
cache = open(self.cachefile, "wb")
|
|
|
|
if pickleformat:
|
|
|
|
cache.write(pickle.dumps(database))
|
|
|
|
else:
|
|
|
|
cache.write(content)
|
2021-07-05 20:49:49 +02:00
|
|
|
cache.close()
|
2021-07-07 11:13:36 +02:00
|
|
|
self.unlock()
|
2021-07-05 20:49:49 +02:00
|
|
|
|
2021-07-07 11:13:36 +02:00
|
|
|
def lock(self):
|
2022-12-18 19:09:49 +01:00
|
|
|
if self.lockname:
|
|
|
|
self.lockhandle = open(self.lockname, 'w')
|
|
|
|
fcntl.lockf(self.lockhandle, fcntl.LOCK_EX)
|
2021-07-07 11:13:36 +02:00
|
|
|
|
|
|
|
def unlock(self):
|
2022-12-18 19:09:49 +01:00
|
|
|
if self.lockname:
|
|
|
|
fcntl.lockf(self.lockhandle, fcntl.LOCK_UN)
|
|
|
|
self.lockhandle.close()
|
2021-07-07 11:13:36 +02:00
|
|
|
|
2022-11-09 16:01:28 +01:00
|
|
|
def find(self, id):
|
|
|
|
"""Get the RDAP server(s), as an array, for a given identifier. None
|
2022-11-09 16:17:35 +01:00
|
|
|
if there is none."""
|
2022-11-09 16:01:28 +01:00
|
|
|
if self.category == "domains":
|
|
|
|
domain = id
|
|
|
|
if domain.endswith("."):
|
|
|
|
domain = domain[:-1]
|
|
|
|
labels = domain.lower().split(".")
|
|
|
|
tld = labels[len(labels)-1]
|
|
|
|
if tld in self.services:
|
|
|
|
return self.services[tld]
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
elif self.category == "objects":
|
|
|
|
try:
|
|
|
|
(handle, registry) = id.rsplit("-", maxsplit=1)
|
|
|
|
except ValueError:
|
|
|
|
raise Exception("Not a valid RFC 8521 identifier: \"%s\"" % id)
|
|
|
|
if registry.upper() in self.services:
|
|
|
|
return self.services[registry.upper()]
|
|
|
|
else:
|
|
|
|
return None
|
2021-07-05 20:49:49 +02:00
|
|
|
else:
|
2022-11-09 16:01:28 +01:00
|
|
|
raise Exception("Unsupported category %s" % self.category)
|
2021-07-05 20:49:49 +02:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
rdap = IanaRDAPDatabase(maxage=1)
|
2021-07-07 15:23:26 +02:00
|
|
|
print("Database \"%s\", version %s published on %s, retrieved on %s, %i services" % \
|
|
|
|
(rdap.description, rdap.version, rdap.publication, rdap.retrieved, len(rdap.services)))
|
2021-07-05 20:49:49 +02:00
|
|
|
for domain in sys.argv[1:]:
|
|
|
|
print("%s -> %s" % (domain, rdap.find(domain)))
|
|
|
|
|