diff --git a/.gitignore b/.gitignore index 60baa9c..d639981 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ data/* +lib/__pycache__/ +lib/test diff --git a/download.py b/download.py index 77be64d..61af5d1 100644 --- a/download.py +++ b/download.py @@ -4,6 +4,7 @@ import os import asyncio import argparse from datetime import datetime +from lib.exif_write import ExifEdit def parse_args(argv =None): parser = argparse.ArgumentParser() @@ -18,12 +19,48 @@ def background(f): return asyncio.get_event_loop().run_in_executor(None, f, *args, **kwargs) return wrapped -@background -def download(url, fn): +#@background +def download(url, fn, metadata=None): r = requests.get(url, stream=True) with open(str(fn), "wb") as f: f.write(r.content) + write_exif(fn, metadata) +def write_exif(filename, data): + ''' + Write exif metadata + ''' + #{'thumb_original_url': 'https://scontent-cdg4-2.xx.fbcdn.net/m1/v/t6/An9Zy2SrH9vXJIF01QkBODyUbg7XSKfwL48UwHyvihSwvECGjVbG0vSw9uhxe2-Dq-k2eUcigb83buO6zo-7eVbykfp5aQIe1kgd-MJr66nU_H-o_mwBLZXgVbj5I_5WX-C9c6FxJruHkV962F228O0?ccb=10-5&oh=00_AfDOKD869DxL-4ZNCbVo8Rn29vsc0JyjMAU2ctx4aAFVMQ&oe=65256C25&_nc_sid=201bca', + # 'captured_at': 1603459736644, 'geometry': {'type': 'Point', 'coordinates': [2.5174596904057, 48.777089857534]}, 'id': '485924785946693'} + lat = data['geometry']['coordinates'][1] + long = data['geometry']['coordinates'][0] + altitude = data['altitude'] + bearing = data['compass_angle'] + timestamp=datetime.utcfromtimestamp(int(data['captured_at'])/1000) + metadata = metadata = ExifEdit(filename) + + #metadata.read() + + try: + + # add to exif + #metadata["Exif.GPSInfo.GPSLatitude"] = exiv_lat + #metadata["Exif.GPSInfo.GPSLatitudeRef"] = coordinates[3] + #metadata["Exif.GPSInfo.GPSLongitude"] = exiv_lon + #metadata["Exif.GPSInfo.GPSLongitudeRef"] = coordinates[7] + #metadata["Exif.GPSInfo.GPSMapDatum"] = "WGS-84" + #metadata["Exif.GPSInfo.GPSVersionID"] = '2 0 0 0' + #metadata["Exif.GPSInfo.GPSImgDirection"] = exiv_bearing + #metadata["Exif.GPSInfo.GPSImgDirectionRef"] = "T" + + metadata.add_lat_lon(lat, long) + metadata.add_altitude(altitude) + metadata.add_date_time_original(timestamp) + metadata.add_direction(bearing) + metadata.write() + print("Added geodata to: {0}".format(filename)) + except ValueError as e: + print("Skipping {0}: {1}".format(filename, e)) if __name__ == '__main__': parse_args() @@ -62,10 +99,10 @@ if __name__ == '__main__': r = requests.get(req_url, headers=header) data = r.json() print('getting url {} of {}'.format(x, img_num)) + #print(data['geometry']['coordinates'][1], data['geometry']['coordinates'][0]) urls.append(data) print('downloading.. this process will take a while. please wait') for i,url in enumerate(urls): path = 'data/{}/{}.jpg'.format(sequence_id, datetime.utcfromtimestamp(int(url['captured_at'])/1000).strftime('%Y-%m-%d_%HH%Mmn%S.%f')) - print(path) - download(url['thumb_original_url'],path) + download(url['thumb_original_url'],path, url) diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..9e66ef6 --- /dev/null +++ b/lib/__init__.py @@ -0,0 +1,17 @@ + +#from .geo import * +#from .exif_aux import * +#from .exif_read import * +#from .exif_write import * +#from .gps_parser import * +#from .gpmf import * + +#import geo +#import exif_aux +#import exif_read +#import exif_write +#import gps_parser +#import gpmf + + +VERSION = "0.0.2" \ No newline at end of file diff --git a/lib/exif.py b/lib/exif.py new file mode 100644 index 0000000..dedca25 --- /dev/null +++ b/lib/exif.py @@ -0,0 +1,385 @@ +#!/usr/bin/env python + +import os +import sys +import exifread +import datetime +from lib.geo import normalize_bearing +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +def eval_frac(value): + return float(value.num) / float(value.den) + +def exif_gps_fields(): + ''' + GPS fields in EXIF + ''' + return [ + ["GPS GPSLongitude", "EXIF GPS GPSLongitude"], + ["GPS GPSLatitude", "EXIF GPS GPSLatitude"] + ] + + +def exif_datetime_fields(): + ''' + Date time fields in EXIF + ''' + return [["EXIF DateTimeOriginal", + "Image DateTimeOriginal", + "EXIF DateTimeDigitized", + "Image DateTimeDigitized", + "EXIF DateTime" + "Image DateTime", + "GPS GPSDate", + "EXIF GPS GPSDate", + "EXIF DateTimeModified"]] + +def format_time(time_string): + ''' + Format time string with invalid time elements in hours/minutes/seconds + Format for the timestring needs to be "%Y_%m_%d_%H_%M_%S" + + e.g. 2014_03_31_24_10_11 => 2014_04_01_00_10_11 + ''' + data = time_string.split("_") + hours, minutes, seconds = int(data[3]), int(data[4]), int(data[5]) + date = datetime.datetime.strptime("_".join(data[:3]), "%Y_%m_%d") + date_time = date + datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds) + return date_time + +def gps_to_decimal(values, reference): + sign = 1 if reference in 'NE' else -1 + degrees = eval_frac(values[0]) + minutes = eval_frac(values[1]) + seconds = eval_frac(values[2]) + return sign * (degrees + minutes / 60 + seconds / 3600) + + +def get_float_tag(tags, key): + if key in tags: + return float(tags[key].values[0]) + else: + return None + + +def get_frac_tag(tags, key): + if key in tags: + return eval_frac(tags[key].values[0]) + else: + return None + + +def extract_exif_from_file(fileobj): + if isinstance(fileobj, (str, unicode)): + with open(fileobj) as f: + exif_data = EXIF(f) + else: + exif_data = EXIF(fileobj) + + d = exif_data.extract_exif() + return d + +def required_fields(): + return exif_gps_fields() + exif_datetime_fields() + + +def verify_exif(filename): + ''' + Check that image file has the required EXIF fields. + Incompatible files will be ignored server side. + ''' + # required tags in IFD name convention + required_exif = required_fields() + exif = EXIF(filename) + required_exif_exist = exif.fields_exist(required_exif) + return required_exif_exist + + +def verify_mapillary_tag(filename): + ''' + Check that image file has the required Mapillary tag + ''' + return EXIF(filename).mapillary_tag_exists() + + +def is_image(filename): + return filename.lower().endswith(('jpg', 'jpeg', 'png', 'tif', 'tiff', 'pgm', 'pnm', 'gif')) + + +class EXIF: + ''' + EXIF class for reading exif from an image + ''' + def __init__(self, filename, details=False): + ''' + Initialize EXIF object with FILE as filename or fileobj + ''' + self.filename = filename + if type(filename) == str: + with open(filename, 'rb') as fileobj: + self.tags = exifread.process_file(fileobj, details=details) + else: + self.tags = exifread.process_file(filename, details=details) + + + def _extract_alternative_fields(self, fields, default=None, field_type=float): + ''' + Extract a value for a list of ordered fields. + Return the value of the first existed field in the list + ''' + for field in fields: + if field in self.tags: + if field_type is float: + value = eval_frac(self.tags[field].values[0]) + if field_type is str: + value = str(self.tags[field].values) + if field_type is int: + value = int(self.tags[field].values[0]) + return value, field + return default, None + + + def exif_name(self): + ''' + Name of file in the form {lat}_{lon}_{ca}_{datetime}_{filename} + ''' + lon, lat = self.extract_lon_lat() + ca = self.extract_direction() + if ca is None: + ca = 0 + ca = int(ca) + date_time = self.extract_capture_time() + date_time = date_time.strftime("%Y-%m-%d-%H-%M-%S-%f") + date_time = date_time[:-3] + filename = '{}_{}_{}_{}_{}'.format(lat, lon, ca, date_time, os.path.basename(self.filename)) + return filename + + + def extract_altitude(self): + ''' + Extract altitude + ''' + fields = ['GPS GPSAltitude', 'EXIF GPS GPSAltitude'] + altitude, _ = self._extract_alternative_fields(fields, 0, float) + return altitude + + + def extract_capture_time(self): + ''' + Extract capture time from EXIF + return a datetime object + TODO: handle GPS DateTime + ''' + time_string = exif_datetime_fields()[0] + capture_time, time_field = self._extract_alternative_fields(time_string, 0, str) + + # if "GPSDate" in time_field: + # return self.extract_gps_time() + + if capture_time is 0: + # try interpret the filename + try: + capture_time = datetime.datetime.strptime(os.path.basename(self.filename)[:-4]+'000', '%Y_%m_%d_%H_%M_%S_%f') + except: + pass + else: + capture_time = capture_time.replace(" ", "_") + capture_time = capture_time.replace(":", "_") + capture_time = "_".join(["{0:02d}".format(int(ts)) for ts in capture_time.split("_") if ts.isdigit()]) + capture_time = format_time(capture_time) + sub_sec = self.extract_subsec() + capture_time = capture_time + datetime.timedelta(seconds=float(sub_sec)/10**len(str(sub_sec))) + + return capture_time + + + def extract_direction(self): + ''' + Extract image direction (i.e. compass, heading, bearing) + ''' + fields = ['GPS GPSImgDirection', + 'EXIF GPS GPSImgDirection', + 'GPS GPSTrack', + 'EXIF GPS GPSTrack'] + direction, _ = self._extract_alternative_fields(fields) + + if direction is not None: + direction = normalize_bearing(direction, check_hex=True) + return direction + + + def extract_dop(self): + ''' + Extract dilution of precision + ''' + fields = ['GPS GPSDOP', 'EXIF GPS GPSDOP'] + dop, _ = self._extract_alternative_fields(fields) + return dop + + + def extract_geo(self): + ''' + Extract geo-related information from exif + ''' + altitude = self.extract_altitude() + dop = self.extract_dop() + lon, lat = self.extract_lon_lat() + d = {} + if lon is not None and lat is not None: + d['latitude'] = lat + d['longitude'] = lon + if altitude is not None: + d['altitude'] = altitude + if dop is not None: + d['dop'] = dop + return d + + def extract_gps_time(self): + ''' + Extract timestamp from GPS field. + ''' + gps_date_field = "GPS GPSDate" + gps_time_field = "GPS GPSTimeStamp" + gps_time = 0 + if gps_date_field in self.tags and gps_time_field in self.tags: + date = str(self.tags[gps_date_field].values).split(":") + t = self.tags[gps_time_field] + gps_time = datetime.datetime( + year=int(date[0]), + month=int(date[1]), + day=int(date[2]), + hour=int(eval_frac(t.values[0])), + minute=int(eval_frac(t.values[1])), + second=int(eval_frac(t.values[2])), + ) + microseconds = datetime.timedelta(microseconds=int( (eval_frac(t.values[2])%1) *1e6)) + gps_time += microseconds + return gps_time + + def extract_exif(self): + ''' + Extract a list of exif infos + ''' + width, height = self.extract_image_size() + make, model = self.extract_make(), self.extract_model() + orientation = self.extract_orientation() + geo = self.extract_geo() + capture = self.extract_capture_time() + direction = self.extract_direction() + d = { + 'width': width, + 'height': height, + 'orientation': orientation, + 'direction': direction, + 'make': make, + 'model': model, + 'capture_time': capture + } + d['gps'] = geo + return d + + + def extract_image_size(self): + ''' + Extract image height and width + ''' + width, _ = self._extract_alternative_fields(['Image ImageWidth', 'EXIF ExifImageWidth'], -1, int) + height, _ = self._extract_alternative_fields(['Image ImageLength', 'EXIF ExifImageLength'], -1, int) + return width, height + + + def extract_image_description(self): + ''' + Extract image description + ''' + description, _ = self._extract_alternative_fields(['Image ImageDescription'], "{}", str) + return description + + + def extract_lon_lat(self): + if 'GPS GPSLatitude' in self.tags and 'GPS GPSLatitude' in self.tags: + lat = gps_to_decimal(self.tags['GPS GPSLatitude'].values, + self.tags['GPS GPSLatitudeRef'].values) + lon = gps_to_decimal(self.tags['GPS GPSLongitude'].values, + self.tags['GPS GPSLongitudeRef'].values) + elif 'EXIF GPS GPSLatitude' in self.tags and 'EXIF GPS GPSLatitude' in self.tags: + lat = gps_to_decimal(self.tags['EXIF GPS GPSLatitude'].values, + self.tags['EXIF GPS GPSLatitudeRef'].values) + lon = gps_to_decimal(self.tags['EXIF GPS GPSLongitude'].values, + self.tags['EXIF GPS GPSLongitudeRef'].values) + else: + lon, lat = None, None + return lon, lat + + + def extract_make(self): + ''' + Extract camera make + ''' + fields = ['EXIF LensMake', 'Image Make'] + make, _ = self._extract_alternative_fields(fields, default='none', field_type=str) + return make + + + def extract_model(self): + ''' + Extract camera model + ''' + fields = ['EXIF LensModel', 'Image Model'] + model, _ = self._extract_alternative_fields(fields, default='none', field_type=str) + return model + + + def extract_orientation(self): + ''' + Extract image orientation + ''' + fields = ['Image Orientation'] + orientation, _ = self._extract_alternative_fields(fields, default=1, field_type=int) + if orientation not in [1, 3, 6, 8]: + return 1 + return orientation + + + def extract_subsec(self): + ''' + Extract microseconds + ''' + fields = [ + 'Image SubSecTimeOriginal', + 'EXIF SubSecTimeOriginal', + 'Image SubSecTimeDigitized', + 'EXIF SubSecTimeDigitized', + 'Image SubSecTime', + 'EXIF SubSecTime' + ] + sub_sec, _ = self._extract_alternative_fields(fields, default=0, field_type=str) + sub_sec = int(sub_sec) + return sub_sec + + + def fields_exist(self, fields): + ''' + Check existence of a list fields in exif + ''' + for rexif in fields: + vflag = False + for subrexif in rexif: + if subrexif in self.tags: + vflag = True + if not vflag: + print("Missing required EXIF tag: {0} for image {1}".format(rexif[0], self.filename)) + return False + return True + + + def mapillary_tag_exists(self): + ''' + Check existence of Mapillary tag + ''' + description_tag = "Image ImageDescription" + if description_tag in self.tags: + if "MAPSequenceUUID" in self.tags[description_tag].values: + return True + return False + diff --git a/lib/exif_pil.py b/lib/exif_pil.py new file mode 100644 index 0000000..ded561b --- /dev/null +++ b/lib/exif_pil.py @@ -0,0 +1,227 @@ +import datetime +import struct # Only to catch struct.error due to error in PIL / Pillow. +from PIL import Image +from PIL.ExifTags import TAGS, GPSTAGS + +# Original: https://gist.github.com/erans/983821 +# License: MIT +# Credits: https://gist.github.com/erans + + +class ExifException(Exception): + def __init__(self, message): + self._message = message + + def __str__(self): + return self._message + + +class PILExifReader: + def __init__(self, filepath): + self._filepath = filepath + image = Image.open(filepath) + self._exif = self.get_exif_data(image) + image.close() + + def get_exif_data(self, image): + """Returns a dictionary from the exif data of an PIL Image + item. Also converts the GPS Tags""" + exif_data = {} + try: + info = image._getexif() + except OverflowError, e: + if e.message == "cannot fit 'long' into an index-sized integer": + # Error in PIL when exif data is corrupt. + return None + else: + raise e + except struct.error as e: + if e.message == "unpack requires a string argument of length 2": + # Error in PIL when exif data is corrupt. + return None + else: + raise e + if info: + for tag, value in info.items(): + decoded = TAGS.get(tag, tag) + if decoded == "GPSInfo": + gps_data = {} + for t in value: + sub_decoded = GPSTAGS.get(t, t) + gps_data[sub_decoded] = value[t] + exif_data[decoded] = gps_data + else: + exif_data[decoded] = value + return exif_data + + def read_capture_time(self): + time_tag = "DateTimeOriginal" + + # read and format capture time + if self._exif == None: + print "Exif is none." + if time_tag in self._exif: + capture_time = self._exif[time_tag] + capture_time = capture_time.replace(" ","_") + capture_time = capture_time.replace(":","_") + else: + print "No time tag in "+self._filepath + capture_time = 0 + + # return as datetime object + return datetime.datetime.strptime(capture_time, '%Y_%m_%d_%H_%M_%S') + + def _get_if_exist(self, data, key): + if key in data: + return data[key] + else: + return None + + def _convert_to_degress(self, value): + """Helper function to convert the GPS coordinates stored in + the EXIF to degrees in float format.""" + d0 = value[0][0] + d1 = value[0][1] + d = float(d0) / float(d1) + + m0 = value[1][0] + m1 = value[1][1] + m = float(m0) / float(m1) + + s0 = value[2][0] + s1 = value[2][1] + s = float(s0) / float(s1) + + return d + (m / 60.0) + (s / 3600.0) + + def get_lat_lon(self): + """Returns the latitude and longitude, if available, from the + provided exif_data (obtained through get_exif_data above).""" + lat = None + lon = None + + gps_info = self.get_gps_info() + if gps_info is None: + return None + + gps_latitude = self._get_if_exist(gps_info, "GPSLatitude") + gps_latitude_ref = self._get_if_exist(gps_info, 'GPSLatitudeRef') + gps_longitude = self._get_if_exist(gps_info, 'GPSLongitude') + gps_longitude_ref = self._get_if_exist(gps_info, 'GPSLongitudeRef') + + if (gps_latitude and gps_latitude_ref + and gps_longitude and gps_longitude_ref): + lat = self._convert_to_degress(gps_latitude) + if gps_latitude_ref != "N": + lat = 0 - lat + + lon = self._convert_to_degress(gps_longitude) + if gps_longitude_ref != "E": + lon = 0 - lon + + if isinstance(lat, float) and isinstance(lon, float): + return lat, lon + else: + return None + + def calc_tuple(self, tup): + if tup is None or len(tup) != 2 or tup[1] == 0: + return None + return int(tup[0]) / int(tup[1]) + + def get_gps_info(self): + if self._exif is None or not "GPSInfo" in self._exif: + return None + else: + return self._exif["GPSInfo"] + + def get_rotation(self): + """Returns the direction of the GPS receiver in degrees.""" + gps_info = self.get_gps_info() + if gps_info is None: + return None + + for tag in ('GPSImgDirection', 'GPSTrack'): + gps_direction = self._get_if_exist(gps_info, tag) + direction = self.calc_tuple(gps_direction) + if direction == None: + continue + else: + return direction + return None + + def get_speed(self): + """Returns the GPS speed in km/h or None if it does not exists.""" + gps_info = self.get_gps_info() + if gps_info is None: + return None + + if not "GPSSpeed" in gps_info or not "GPSSpeedRef" in gps_info: + return None + speed_frac = gps_info["GPSSpeed"] + speed_ref = gps_info["GPSSpeedRef"] + + speed = self.calc_tuple(speed_frac) + if speed is None or speed_ref is None: + return None + + speed_ref = speed_ref.lower() + if speed_ref == "k": + pass # km/h - we are happy. + elif speed_ref == "m": + #Miles pr. hour => km/h + speed *= 1.609344 + elif speed_ref == "n": + # Knots => km/h + speed *= 1.852 + else: + print "Warning: Unknown format for GPS speed '%s' in '%s'." % ( + speed_ref, self._filepath) + print "Please file a bug and attache the image." + return None + return speed + + def is_ok_num(self, val, minVal, maxVal): + try: + num = int(val) + except ValueError: + return False + if num < minVal or num > maxVal: + return False + return True + + def get_time(self): + # Example data + # GPSTimeStamp': ((9, 1), (14, 1), (9000, 1000)) + # 'GPSDateStamp': u'2015:05:17' + gps_info = self.get_gps_info() + if gps_info is None: + return None + + if not 'GPSTimeStamp' in gps_info or not 'GPSDateStamp' in gps_info: + return None + timestamp = gps_info['GPSTimeStamp'] + datestamp = gps_info['GPSDateStamp'] + + if len(timestamp) != 3: + raise ExifException("Timestamp does not have length 3: %s" % + len(timestamp)) + (timeH, timeM, timeS) = timestamp + h = self.calc_tuple(timeH) + m = self.calc_tuple(timeM) + s = self.calc_tuple(timeS) + if None in (h, m, s): + raise ExifException( + "Hour, minute or second is not valid: '%s':'%s':'%s'." % + (timeH, timeM, timeS)) + + if datestamp.count(':') != 2: + raise ExifException("Datestamp does not contain 2 colons: '%s'" % + datestamp) + (y, mon, d) = [int(str) for str in datestamp.split(':')] + if not self.is_ok_num(y, 1970, 2100) or not self.is_ok_num( + mon, 1, 12) or not self.is_ok_num(d, 1, 31): + raise ExifException( + "Date parsed from the following is not OK: '%s'" % datestamp) + + return datetime.datetime(y, mon, d, h, m, s) diff --git a/lib/exif_read.py b/lib/exif_read.py new file mode 100644 index 0000000..6b503f8 --- /dev/null +++ b/lib/exif_read.py @@ -0,0 +1,370 @@ +# coding: utf8 +#!/usr/bin/env python + +#source is exif_read.py from mapillary_tools : +#https://github.com/mapillary/mapillary_tools/blob/master/mapillary_tools/exif_read.py +import os +import sys +import exifread +import datetime +from geo import normalize_bearing +import uuid +sys.path.insert(0, os.path.abspath( + os.path.join(os.path.dirname(__file__), ".."))) +#import jsonfrom + + +def eval_frac(value): + if value.den == 0: + return -1.0 + return float(value.num) / float(value.den) + + +def format_time(time_string): + ''' + Format time string with invalid time elements in hours/minutes/seconds + Format for the timestring needs to be "%Y_%m_%d_%H_%M_%S" + + e.g. 2014_03_31_24_10_11 => 2014_04_01_00_10_11 + ''' + subseconds = False + data = time_string.split("_") + hours, minutes, seconds = int(data[3]), int(data[4]), int(data[5]) + date = datetime.datetime.strptime("_".join(data[:3]), "%Y_%m_%d") + subsec = 0.0 + if len(data) == 7: + if float(data[6]) != 0: + subsec = float(data[6]) / 10**len(data[6]) + subseconds = True + date_time = date + \ + datetime.timedelta(hours=hours, minutes=minutes, + seconds=seconds + subsec) + return date_time, subseconds + + +def gps_to_decimal(values, reference): + sign = 1 if reference in 'NE' else -1 + degrees = eval_frac(values[0]) + minutes = eval_frac(values[1]) + seconds = eval_frac(values[2]) + return sign * (degrees + minutes / 60 + seconds / 3600) + + +def exif_datetime_fields(): + ''' + Date time fields in EXIF + ''' + return [["EXIF DateTimeOriginal", + "Image DateTimeOriginal", + "EXIF DateTimeDigitized", + "Image DateTimeDigitized", + "EXIF DateTime", + "Image DateTime", + "GPS GPSDate", + "EXIF GPS GPSDate", + "EXIF DateTimeModified"]] + + +def exif_gps_date_fields(): + ''' + Date fields in EXIF GPS + ''' + return [["GPS GPSDate", + "EXIF GPS GPSDate"]] + + +class ExifRead: + ''' + EXIF class for reading exif from an image + ''' + + def __init__(self, filename, details=False): + ''' + Initialize EXIF object with FILE as filename or fileobj + ''' + self.filename = filename + if type(filename) == str: + with open(filename, 'rb') as fileobj: + self.tags = exifread.process_file(fileobj, details=details) + else: + self.tags = exifread.process_file(filename, details=details) + + def _extract_alternative_fields(self, fields, default=None, field_type=float): + ''' + Extract a value for a list of ordered fields. + Return the value of the first existed field in the list + ''' + for field in fields: + if field in self.tags: + if field_type is float: + value = eval_frac(self.tags[field].values[0]) + if field_type is str: + value = str(self.tags[field].values) + if field_type is int: + value = int(self.tags[field].values[0]) + return value, field + return default, None + + def exif_name(self): + ''' + Name of file in the form {lat}_{lon}_{ca}_{datetime}_{filename}_{hash} + ''' + mapillary_description = json.loads(self.extract_image_description()) + + lat = None + lon = None + ca = None + date_time = None + + if "MAPLatitude" in mapillary_description: + lat = mapillary_description["MAPLatitude"] + if "MAPLongitude" in mapillary_description: + lon = mapillary_description["MAPLongitude"] + if "MAPCompassHeading" in mapillary_description: + if 'TrueHeading' in mapillary_description["MAPCompassHeading"]: + ca = mapillary_description["MAPCompassHeading"]['TrueHeading'] + if "MAPCaptureTime" in mapillary_description: + date_time = datetime.datetime.strptime( + mapillary_description["MAPCaptureTime"], "%Y_%m_%d_%H_%M_%S_%f").strftime("%Y-%m-%d-%H-%M-%S-%f")[:-3] + + filename = '{}_{}_{}_{}_{}'.format( + lat, lon, ca, date_time, uuid.uuid4()) + return filename + + def extract_image_history(self): + field = ['Image Tag 0x9213'] + user_comment, _ = self._extract_alternative_fields(field, '{}', str) + return user_comment + + def extract_altitude(self): + ''' + Extract altitude + ''' + fields = ['GPS GPSAltitude', 'EXIF GPS GPSAltitude'] + altitude, _ = self._extract_alternative_fields(fields, 0, float) + return altitude + + def extract_capture_time(self): + ''' + Extract capture time from EXIF + return a datetime object + TODO: handle GPS DateTime + ''' + time_string = exif_datetime_fields()[0] + capture_time, time_field = self._extract_alternative_fields( + time_string, 0, str) + if time_field in exif_gps_date_fields()[0]: + capture_time = self.extract_gps_time() + return capture_time + if capture_time is 0: + # try interpret the filename + try: + capture_time = datetime.datetime.strptime(os.path.basename( + self.filename)[:-4] + '000', '%Y_%m_%d_%H_%M_%S_%f') + except: + return None + else: + capture_time = capture_time.replace(" ", "_") + capture_time = capture_time.replace(":", "_") + capture_time = capture_time.replace(".", "_") + capture_time = capture_time.replace("-", "_") + capture_time = "_".join( + [ts for ts in capture_time.split("_") if ts.isdigit()]) + capture_time, subseconds = format_time(capture_time) + sub_sec = "0" + if not subseconds: + sub_sec = self.extract_subsec() + + capture_time = capture_time + \ + datetime.timedelta(seconds=float("0." + sub_sec)) + + return capture_time + + def extract_direction(self): + ''' + Extract image direction (i.e. compass, heading, bearing) + ''' + fields = ['GPS GPSImgDirection', + 'EXIF GPS GPSImgDirection', + 'GPS GPSTrack', + 'EXIF GPS GPSTrack'] + direction, _ = self._extract_alternative_fields(fields) + + if direction is not None: + direction = normalize_bearing(direction, check_hex=True) + return direction + + def extract_dop(self): + ''' + Extract dilution of precision + ''' + fields = ['GPS GPSDOP', 'EXIF GPS GPSDOP'] + dop, _ = self._extract_alternative_fields(fields) + return dop + + def extract_geo(self): + ''' + Extract geo-related information from exif + ''' + altitude = self.extract_altitude() + dop = self.extract_dop() + lon, lat = self.extract_lon_lat() + d = {} + if lon is not None and lat is not None: + d['latitude'] = lat + d['longitude'] = lon + if altitude is not None: + d['altitude'] = altitude + if dop is not None: + d['dop'] = dop + return d + + def extract_gps_time(self): + ''' + Extract timestamp from GPS field. + ''' + gps_date_field = "GPS GPSDate" + gps_time_field = "GPS GPSTimeStamp" + gps_time = 0 + if gps_date_field in self.tags and gps_time_field in self.tags: + date = str(self.tags[gps_date_field].values).split(":") + t = self.tags[gps_time_field] + gps_time = datetime.datetime( + year=int(date[0]), + month=int(date[1]), + day=int(date[2]), + hour=int(eval_frac(t.values[0])), + minute=int(eval_frac(t.values[1])), + second=int(eval_frac(t.values[2])), + ) + microseconds = datetime.timedelta( + microseconds=int((eval_frac(t.values[2]) % 1) * 1e6)) + gps_time += microseconds + return gps_time + + def extract_exif(self): + ''' + Extract a list of exif infos + ''' + width, height = self.extract_image_size() + make, model = self.extract_make(), self.extract_model() + orientation = self.extract_orientation() + geo = self.extract_geo() + capture = self.extract_capture_time() + direction = self.extract_direction() + d = { + 'width': width, + 'height': height, + 'orientation': orientation, + 'direction': direction, + 'make': make, + 'model': model, + 'capture_time': capture + } + d['gps'] = geo + return d + + def extract_image_size(self): + ''' + Extract image height and width + ''' + width, _ = self._extract_alternative_fields( + ['Image ImageWidth', 'EXIF ExifImageWidth'], -1, int) + height, _ = self._extract_alternative_fields( + ['Image ImageLength', 'EXIF ExifImageLength'], -1, int) + return width, height + + def extract_image_description(self): + ''' + Extract image description + ''' + description, _ = self._extract_alternative_fields( + ['Image ImageDescription'], "{}", str) + return description + + def extract_lon_lat(self): + if 'GPS GPSLatitude' in self.tags and 'GPS GPSLatitude' in self.tags: + lat = gps_to_decimal(self.tags['GPS GPSLatitude'].values, + self.tags['GPS GPSLatitudeRef'].values) + lon = gps_to_decimal(self.tags['GPS GPSLongitude'].values, + self.tags['GPS GPSLongitudeRef'].values) + elif 'EXIF GPS GPSLatitude' in self.tags and 'EXIF GPS GPSLatitude' in self.tags: + lat = gps_to_decimal(self.tags['EXIF GPS GPSLatitude'].values, + self.tags['EXIF GPS GPSLatitudeRef'].values) + lon = gps_to_decimal(self.tags['EXIF GPS GPSLongitude'].values, + self.tags['EXIF GPS GPSLongitudeRef'].values) + else: + lon, lat = None, None + return lon, lat + + def extract_make(self): + ''' + Extract camera make + ''' + fields = ['EXIF LensMake', 'Image Make'] + make, _ = self._extract_alternative_fields( + fields, default='none', field_type=str) + return make + + def extract_model(self): + ''' + Extract camera model + ''' + fields = ['EXIF LensModel', 'Image Model'] + model, _ = self._extract_alternative_fields( + fields, default='none', field_type=str) + return model + + def extract_orientation(self): + ''' + Extract image orientation + ''' + fields = ['Image Orientation'] + orientation, _ = self._extract_alternative_fields( + fields, default=1, field_type=int) + if orientation not in range(1, 9): + return 1 + return orientation + + def extract_subsec(self): + ''' + Extract microseconds + ''' + fields = [ + 'Image SubSecTimeOriginal', + 'EXIF SubSecTimeOriginal', + 'Image SubSecTimeDigitized', + 'EXIF SubSecTimeDigitized', + 'Image SubSecTime', + 'EXIF SubSecTime' + ] + sub_sec, _ = self._extract_alternative_fields( + fields, default='', field_type=str) + return sub_sec.strip() + + def fields_exist(self, fields): + ''' + Check existence of a list fields in exif + ''' + for rexif in fields: + vflag = False + for subrexif in rexif: + if subrexif in self.tags: + vflag = True + if not vflag: + print("Missing required EXIF tag: {0} for image {1}".format( + rexif[0], self.filename)) + return False + return True + + def mapillary_tag_exists(self): + ''' + Check existence of required Mapillary tags + ''' + description_tag = "Image ImageDescription" + if description_tag not in self.tags: + return False + for requirement in ["MAPSequenceUUID", "MAPSettingsUserKey", "MAPCaptureTime", "MAPLongitude", "MAPLatitude"]: + if requirement not in self.tags[description_tag].values or json.loads(self.tags[description_tag].values)[requirement] in ["", None, " "]: + return False + return True diff --git a/lib/exif_write.py b/lib/exif_write.py new file mode 100644 index 0000000..c5db00d --- /dev/null +++ b/lib/exif_write.py @@ -0,0 +1,122 @@ +import sys +import json +import piexif + +from . geo import decimal_to_dms + +#from .error import print_error + + +class ExifEdit(object): + + def __init__(self, filename): + """Initialize the object""" + self._filename = filename + self._ef = None + try: + self._ef = piexif.load(filename) + except IOError: + etype, value, traceback = sys.exc_info() + print >> sys.stderr, "Error opening file:", value + except ValueError: + etype, value, traceback = sys.exc_info() + print >> sys.stderr, "Error opening file:", value + + def add_image_description(self, dict): + """Add a dict to image description.""" + if self._ef is not None: + self._ef['0th'][piexif.ImageIFD.ImageDescription] = json.dumps( + dict) + + def add_orientation(self, orientation): + """Add image orientation to image.""" + if not orientation in range(1, 9): + print( + "Error value for orientation, value must be in range(1,9), setting to default 1") + self._ef['0th'][piexif.ImageIFD.Orientation] = 1 + else: + self._ef['0th'][piexif.ImageIFD.Orientation] = orientation + + def add_date_time_original(self, date_time): + """Add date time original.""" + try: + DateTimeOriginal = date_time.strftime('%Y:%m:%d %H:%M:%S') + self._ef['Exif'][piexif.ExifIFD.DateTimeOriginal] = DateTimeOriginal + except Exception as e: + print("Error writing DateTimeOriginal, due to " + str(e)) + + if date_time.microsecond != 0: + self.add_subsectimeoriginal(date_time.microsecond) + + def add_subsectimeoriginal(self, subsec_value): + """Add subsecond value in the subsectimeoriginal exif tag""" + try: + subsec = str(subsec_value).zfill(6) + self._ef['Exif'][piexif.ExifIFD.SubSecTimeOriginal] = subsec + except Exception as e: + print("Error writing SubSecTimeOriginal, due to " + str(e)) + + def add_lat_lon(self, lat, lon, precision=1e7): + """Add lat, lon to gps (lat, lon in float).""" + self._ef["GPS"][piexif.GPSIFD.GPSLatitudeRef] = "N" if lat > 0 else "S" + self._ef["GPS"][piexif.GPSIFD.GPSLongitudeRef] = "E" if lon > 0 else "W" + self._ef["GPS"][piexif.GPSIFD.GPSLongitude] = decimal_to_dms( + abs(lon), int(precision)) + self._ef["GPS"][piexif.GPSIFD.GPSLatitude] = decimal_to_dms( + abs(lat), int(precision)) + + def add_image_history(self, data): + """Add arbitrary string to ImageHistory tag.""" + self._ef['0th'][piexif.ImageIFD.ImageHistory] = json.dumps(data) + + def add_camera_make_model(self, make, model): + ''' Add camera make and model.''' + self._ef['0th'][piexif.ImageIFD.Make] = make + self._ef['0th'][piexif.ImageIFD.Model] = model + + def add_dop(self, dop, precision=100): + """Add GPSDOP (float).""" + self._ef["GPS"][piexif.GPSIFD.GPSDOP] = ( + int(abs(dop) * precision), precision) + + def add_altitude(self, altitude, precision=100): + """Add altitude (pre is the precision).""" + ref = 0 if altitude > 0 else 1 + self._ef["GPS"][piexif.GPSIFD.GPSAltitude] = ( + int(abs(altitude) * precision), precision) + self._ef["GPS"][piexif.GPSIFD.GPSAltitudeRef] = ref + + def add_direction(self, direction, ref="T", precision=100): + """Add image direction.""" + # normalize direction + direction = direction % 360.0 + self._ef["GPS"][piexif.GPSIFD.GPSImgDirection] = ( + int(abs(direction) * precision), precision) + self._ef["GPS"][piexif.GPSIFD.GPSImgDirectionRef] = ref + + def add_firmware(self,firmware_string): + """Add firmware version of camera""" + self._ef['0th'][piexif.ImageIFD.Software] = firmware_string + + def add_custom_tag(self, value, main_key, tag_key): + try: + self._ef[main_key][tag_key] = value + except: + print("could not set tag {} under {} with value {}".format( + tag_key, main_key, value)) + + def write(self, filename=None): + """Save exif data to file.""" + if filename is None: + filename = self._filename + + exif_bytes = piexif.dump(self._ef) + + with open(self._filename, "rb") as fin: + img = fin.read() + try: + piexif.insert(exif_bytes, img, filename) + + except IOError: + type, value, traceback = sys.exc_info() + print >> sys.stderr, "Error saving file:", value diff --git a/lib/exifedit.py b/lib/exifedit.py new file mode 100644 index 0000000..3e6ab30 --- /dev/null +++ b/lib/exifedit.py @@ -0,0 +1,245 @@ +import sys +import json +import datetime +import hashlib +import base64 +import uuid +from lib.geo import normalize_bearing +from lib.exif import EXIF, verify_exif +from lib.pexif import JpegFile, Rational +import shutil + +def create_mapillary_description(filename, username, email, userkey, + upload_hash, sequence_uuid, + interpolated_heading=None, + offset_angle=0.0, + timestamp=None, + orientation=None, + project="", + secret_hash=None, + external_properties=None, + verbose=False): + ''' + Check that image file has the required EXIF fields. + + Incompatible files will be ignored server side. + ''' + # read exif + exif = EXIF(filename) + + if not verify_exif(filename): + return False + + if orientation is None: + orientation = exif.extract_orientation() + + # write the mapillary tag + mapillary_description = {} + + # lat, lon of the image, takes precedence over EXIF GPS values + mapillary_description["MAPLongitude"], mapillary_description["MAPLatitude"] = exif.extract_lon_lat() + + # altitude of the image, takes precedence over EXIF GPS values, assumed 0 if missing + mapillary_description["MAPAltitude"] = exif.extract_altitude() + + # capture time: required date format: 2015_01_14_09_37_01_000, TZ MUST be UTC + if timestamp is None: + timestamp = exif.extract_capture_time() + + # The capture time of the image in UTC. Will take precedence over any other time tags in the EXIF + mapillary_description["MAPCaptureTime"] = datetime.datetime.strftime(timestamp, "%Y_%m_%d_%H_%M_%S_%f")[:-3] + + # EXIF orientation of the image + mapillary_description["MAPOrientation"] = orientation + heading = exif.extract_direction() + + if heading is None: + heading = 0.0 + heading = normalize_bearing(interpolated_heading + offset_angle) if interpolated_heading is not None else normalize_bearing(heading + offset_angle) + + # bearing of the image + mapillary_description["MAPCompassHeading"] = {"TrueHeading": heading, "MagneticHeading": heading} + + # authentication + assert(email is not None or userkey is not None) + if email is not None: + mapillary_description["MAPSettingsEmail"] = email + if username is not None: + mapillary_description["MAPSettingsUsername"] = username + + # use this if available, and omit MAPSettingsUsername and MAPSettingsEmail for privacy reasons + if userkey is not None: + mapillary_description["MAPSettingsUserKey"] = userkey + if upload_hash is not None: + settings_upload_hash = hashlib.sha256("%s%s%s" % (upload_hash, email, base64.b64encode(filename))).hexdigest() + # this is not checked in the backend right now, will likely be changed to have user_key instead of email as part + # of the hash + mapillary_description['MAPSettingsUploadHash'] = settings_upload_hash + + # a unique photo ID to check for duplicates in the backend in case the image gets uploaded more than once + mapillary_description['MAPPhotoUUID'] = str(uuid.uuid4()) + # a sequene ID to make the images go together (order by MAPCaptureTime) + mapillary_description['MAPSequenceUUID'] = str(sequence_uuid) + + # The device model + mapillary_description['MAPDeviceModel'] = exif.extract_model() + + # The device manufacturer + mapillary_description['MAPDeviceMake'] = exif.extract_make() + if upload_hash is None and secret_hash is not None: + mapillary_description['MAPVideoSecure'] = secret_hash + + mapillary_description["MAPSettingsProject"] = project + + # external properties (optional) + if external_properties is not None: + # externl proerties can be saved and searched in Mapillary later on + mapillary_description['MAPExternalProperties'] = external_properties + + # write to file + if verbose: + print("tag: {0}".format(mapillary_description)) + metadata = ExifEdit(filename) + metadata.add_image_description(mapillary_description) + metadata.add_orientation(orientation) + metadata.add_direction(heading) + metadata.write() + +def add_mapillary_description(filename, username, email, + project, upload_hash, image_description, + output_file=None): + """Add Mapillary description tags directly with user info.""" + + if username is not None: + # write the mapillary tag + image_description["MAPSettingsUploadHash"] = upload_hash + image_description["MAPSettingsEmail"] = email + image_description["MAPSettingsUsername"] = username + settings_upload_hash = hashlib.sha256("%s%s%s" % (upload_hash, email, base64.b64encode(filename))).hexdigest() + + image_description['MAPSettingsUploadHash'] = settings_upload_hash + + # if this image is part of a projet, the project UUID + image_description["MAPSettingsProject"] = project + + assert("MAPSequenceUUID" in image_description) + + if output_file is not None: + shutil.copy(filename, output_file) + filename = output_file + + # write to file + json_desc = json.dumps(image_description) + metadata = ExifEdit(filename) + metadata.add_image_description(json_desc) + metadata.add_orientation(image_description.get("MAPOrientation", 1)) + metadata.add_direction(image_description["MAPCompassHeading"]["TrueHeading"]) + metadata.add_lat_lon(image_description["MAPLatitude"], image_description["MAPLongitude"]) + date_time = datetime.datetime.strptime(image_description["MAPCaptureTime"]+"000", "%Y_%m_%d_%H_%M_%S_%f") + metadata.add_date_time_original(date_time) + metadata.write() + + +def add_exif_data(filename, data, output_file=None): + """Add minimal exif data to an image""" + if output_file is not None: + shutil.copy(filename, output_file) + filename = output_file + metadata = ExifEdit(filename) + metadata.add_orientation(data.get("orientation", 1)) + metadata.add_direction(data.get("bearing", 0)) + metadata.add_lat_lon(data["lat"], data["lon"]) + metadata.add_date_time_original(data["capture_time"]) + metadata.add_camera_make_model(data["make"], data["model"]) + metadata.write() + +class ExifEdit(object): + + def __init__(self, filename): + """Initialize the object""" + self.filename = filename + self.ef = None + + if (type(filename) is str) or (type(filename) is unicode): + self.ef = JpegFile.fromFile(filename) + else: + filename.seek(0) + self.ef = JpegFile.fromString(filename.getvalue()) + try: + if (type(filename) is str) or (type(filename) is unicode): + self.ef = JpegFile.fromFile(filename) + else: + filename.seek(0) + self.ef = JpegFile.fromString(filename.getvalue()) + except IOError: + etype, value, traceback = sys.exc_info() + print >> sys.stderr, "Error opening file:", value + except JpegFile.InvalidFile: + etype, value, traceback = sys.exc_info() + print >> sys.stderr, "Error opening file:", value + + def add_image_description(self, dict): + """Add a dict to image description.""" + if self.ef is not None: + self.ef.exif.primary.ImageDescription = json.dumps(dict) + + def add_orientation(self, orientation): + """Add image orientation to image.""" + self.ef.exif.primary.Orientation = [orientation] + + def add_date_time_original(self, date_time): + """Add date time original.""" + self.ef.exif.primary.ExtendedEXIF.DateTimeOriginal = date_time.strftime('%Y:%m:%d %H:%M:%S') + """Add subsecond if the value exists""" + if date_time.microsecond: + subsec = str(date_time.microsecond).zfill(6) + self.add_subsec_time_original(subsec) + #if date_time.microsecond: + # self.ef.exif.primary.ExtendedEXIF.SubSecTimeOriginal = str(date_time.microsecond).zfill(6) + + def add_subsec_time_original(self, subsec): + """Add subsecond.""" + self.ef.exif.primary.ExtendedEXIF.SubSecTimeOriginal = subsec + + def add_lat_lon(self, lat, lon): + """Add lat, lon to gps (lat, lon in float).""" + self.ef.set_geo(float(lat), float(lon)) + + def add_camera_make_model(self, make, model): + ''' Add camera make and model.''' + self.ef.exif.primary.Make = make + self.ef.exif.primary.Model = model + + def add_dop(self, dop, perc=100): + """Add GPSDOP (float).""" + self.ef.exif.primary.GPS.GPSDOP = [Rational(abs(dop * perc), perc)] + + def add_altitude(self, altitude, precision=100): + """Add altitude (pre is the precision).""" + ref = '\x00' if altitude > 0 else '\x01' + self.ef.exif.primary.GPS.GPSAltitude = [Rational(abs(altitude * precision), precision)] + self.ef.exif.primary.GPS.GPSAltitudeRef = [ref] + + def add_direction(self, direction, ref="T", precision=100): + """Add image direction.""" + self.ef.exif.primary.GPS.GPSImgDirection = [Rational(abs(direction * precision), precision)] + self.ef.exif.primary.GPS.GPSImgDirectionRef = ref + + def write(self, filename=None): + """Save exif data to file.""" + try: + if filename is None: + filename = self.filename + self.ef.writeFile(filename) + except IOError: + type, value, traceback = sys.exc_info() + print >> sys.stderr, "Error saving file:", value + + def write_to_string(self): + """Save exif data to StringIO object.""" + return self.ef.writeString() + + def write_to_file_object(self): + """Save exif data to file object.""" + return self.ef.writeFd() + diff --git a/lib/ffprobe.py b/lib/ffprobe.py new file mode 100644 index 0000000..0071ce9 --- /dev/null +++ b/lib/ffprobe.py @@ -0,0 +1,222 @@ +#!/usr/bin/python +# Filename: ffprobe.py +""" +Based on Python wrapper for ffprobe command line tool. ffprobe must exist in the path. +Author: Simon Hargreaves + +""" + +version='0.5' + +import subprocess +import re +import sys +import os +import platform + +class FFProbe: + """ + FFProbe wraps the ffprobe command and pulls the data into an object form:: + metadata=FFProbe('multimedia-file.mov') + """ + def __init__(self,video_file): + self.video_file=video_file + try: + with open(os.devnull, 'w') as tempf: + subprocess.check_call(["ffprobe","-h"],stdout=tempf,stderr=tempf) + except: + raise IOError('ffprobe not found.') + if os.path.isfile(video_file): + video_file = self.video_file.replace(" ", "\ ") + + if str(platform.system())=='Windows': + cmd=["ffprobe", "-show_streams", video_file] + else: + cmd=["ffprobe -show_streams " + video_file] + + p = subprocess.Popen(cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE,shell=True) + self.format=None + self.created=None + self.duration=None + self.start=None + self.bitrate=None + self.creation_time=None + self.streams=[] + self.video=[] + self.audio=[] + datalines=[] + + for a in iter(p.stdout.readline, b''): + + if re.match('\[STREAM\]',a): + datalines=[] + elif re.match('\[\/STREAM\]',a): + self.streams.append(FFStream(datalines)) + datalines=[] + else: + datalines.append(a) + for a in iter(p.stderr.readline, b''): + if re.match('\[STREAM\]',a): + datalines=[] + elif re.match('\[\/STREAM\]',a): + self.streams.append(FFStream(datalines)) + datalines=[] + else: + datalines.append(a) + p.stdout.close() + p.stderr.close() + for a in self.streams: + if a.isAudio(): + self.audio.append(a) + if a.isVideo(): + self.video.append(a) + else: + raise IOError('No such media file ' + video_file) + + +class FFStream: + """ + An object representation of an individual stream in a multimedia file. + """ + def __init__(self,datalines): + for a in datalines: + if re.match(r'^.+=.+$', a) is None: + print "Warning: detected incorrect stream metadata line format: %s" % a + else: + (key,val)=a.strip().split('=') + key = key.lstrip("TAG:") + self.__dict__[key]=val + + def isAudio(self): + """ + Is this stream labelled as an audio stream? + """ + val=False + if self.__dict__['codec_type']: + if str(self.__dict__['codec_type']) == 'audio': + val=True + return val + + def isVideo(self): + """ + Is the stream labelled as a video stream. + """ + val=False + if self.__dict__['codec_type']: + if self.codec_type == 'video': + val=True + return val + + def isSubtitle(self): + """ + Is the stream labelled as a subtitle stream. + """ + val=False + if self.__dict__['codec_type']: + if str(self.codec_type)=='subtitle': + val=True + return val + + def frameSize(self): + """ + Returns the pixel frame size as an integer tuple (width,height) if the stream is a video stream. + Returns None if it is not a video stream. + """ + size=None + if self.isVideo(): + if self.__dict__['width'] and self.__dict__['height']: + try: + size=(int(self.__dict__['width']),int(self.__dict__['height'])) + except Exception as e: + print "None integer size %s:%s" %(str(self.__dict__['width']),str(+self.__dict__['height'])) + size=(0,0) + return size + + def pixelFormat(self): + """ + Returns a string representing the pixel format of the video stream. e.g. yuv420p. + Returns none is it is not a video stream. + """ + f=None + if self.isVideo(): + if self.__dict__['pix_fmt']: + f=self.__dict__['pix_fmt'] + return f + + def frames(self): + """ + Returns the length of a video stream in frames. Returns 0 if not a video stream. + """ + f=0 + if self.isVideo() or self.isAudio(): + if self.__dict__['nb_frames']: + try: + f=int(self.__dict__['nb_frames']) + except Exception as e: + print "None integer frame count" + return f + + def durationSeconds(self): + """ + Returns the runtime duration of the video stream as a floating point number of seconds. + Returns 0.0 if not a video stream. + """ + f=0.0 + if self.isVideo() or self.isAudio(): + if self.__dict__['duration']: + try: + f=float(self.__dict__['duration']) + except Exception as e: + print "None numeric duration" + return f + + def language(self): + """ + Returns language tag of stream. e.g. eng + """ + lang=None + if self.__dict__['TAG:language']: + lang=self.__dict__['TAG:language'] + return lang + + def codec(self): + """ + Returns a string representation of the stream codec. + """ + codec_name=None + if self.__dict__['codec_name']: + codec_name=self.__dict__['codec_name'] + return codec_name + + def codecDescription(self): + """ + Returns a long representation of the stream codec. + """ + codec_d=None + if self.__dict__['codec_long_name']: + codec_d=self.__dict__['codec_long_name'] + return codec_d + + def codecTag(self): + """ + Returns a short representative tag of the stream codec. + """ + codec_t=None + if self.__dict__['codec_tag_string']: + codec_t=self.__dict__['codec_tag_string'] + return codec_t + + def bitrate(self): + """ + Returns bitrate as an integer in bps + """ + b=0 + if self.__dict__['bit_rate']: + try: + b=int(self.__dict__['bit_rate']) + except Exception as e: + print "None integer bitrate" + return b + +if __name__ == '__main__': + print "Module ffprobe" diff --git a/lib/geo.py b/lib/geo.py new file mode 100644 index 0000000..c9399bd --- /dev/null +++ b/lib/geo.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- + +import datetime +import math + +WGS84_a = 6378137.0 +WGS84_b = 6356752.314245 + + +def ecef_from_lla(lat, lon, alt): + ''' + Compute ECEF XYZ from latitude, longitude and altitude. + + All using the WGS94 model. + Altitude is the distance to the WGS94 ellipsoid. + Check results here http://www.oc.nps.edu/oc2902w/coord/llhxyz.htm + + ''' + a2 = WGS84_a**2 + b2 = WGS84_b**2 + lat = math.radians(lat) + lon = math.radians(lon) + L = 1.0 / math.sqrt(a2 * math.cos(lat)**2 + b2 * math.sin(lat)**2) + x = (a2 * L + alt) * math.cos(lat) * math.cos(lon) + y = (a2 * L + alt) * math.cos(lat) * math.sin(lon) + z = (b2 * L + alt) * math.sin(lat) + return x, y, z + + +def gps_distance(latlon_1, latlon_2): + ''' + Distance between two (lat,lon) pairs. + + >>> p1 = (42.1, -11.1) + >>> p2 = (42.2, -11.3) + >>> 19000 < gps_distance(p1, p2) < 20000 + True + ''' + x1, y1, z1 = ecef_from_lla(latlon_1[0], latlon_1[1], 0.) + x2, y2, z2 = ecef_from_lla(latlon_2[0], latlon_2[1], 0.) + + dis = math.sqrt((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2) + + return dis + +def dms_to_decimal(degrees, minutes, seconds, hemisphere): + ''' + Convert from degrees, minutes, seconds to decimal degrees. + @author: mprins + ''' + dms = float(degrees) + float(minutes) / 60 + float(seconds) / 3600 + if hemisphere in "WwSs": + dms = -1 * dms + + return dms + +def decimal_to_dms(value, precision): + ''' + Convert decimal position to degrees, minutes, seconds + ''' + deg = math.floor(value) + min = math.floor((value - deg) * 60) + sec = math.floor((value - deg - min / 60) * 3600 * precision) + + return (deg, 1), (min, 1), (sec, precision) + +def gpgga_to_dms(gpgga): + ''' + Convert GPS coordinate in GPGGA format to degree/minute/second + + Reference: http://us.cactii.net/~bb/gps.py + ''' + deg_min, dmin = gpgga.split('.') + degrees = int(deg_min[:-2]) + minutes = float('%s.%s' % (deg_min[-2:], dmin)) + decimal = degrees + (minutes/60) + return decimal + +def utc_to_localtime(utc_time): + utc_offset_timedelta = datetime.datetime.utcnow() - datetime.datetime.now() + return utc_time - utc_offset_timedelta + + +def compute_bearing(start_lat, start_lon, end_lat, end_lon): + ''' + Get the compass bearing from start to end. + + Formula from + http://www.movable-type.co.uk/scripts/latlong.html + ''' + # make sure everything is in radians + start_lat = math.radians(start_lat) + start_lon = math.radians(start_lon) + end_lat = math.radians(end_lat) + end_lon = math.radians(end_lon) + + dLong = end_lon - start_lon + + dPhi = math.log(math.tan(end_lat/2.0+math.pi/4.0)/math.tan(start_lat/2.0+math.pi/4.0)) + if abs(dLong) > math.pi: + if dLong > 0.0: + dLong = -(2.0 * math.pi - dLong) + else: + dLong = (2.0 * math.pi + dLong) + + y = math.sin(dLong)*math.cos(end_lat) + x = math.cos(start_lat)*math.sin(end_lat) - math.sin(start_lat)*math.cos(end_lat)*math.cos(dLong) + bearing = (math.degrees(math.atan2(y, x)) + 360.0) % 360.0 + + return bearing + +def diff_bearing(b1, b2): + ''' + Compute difference between two bearings + ''' + d = abs(b2-b1) + d = 360-d if d>180 else d + return d + + +def offset_bearing(bearing, offset): + ''' + Add offset to bearing + ''' + bearing = (bearing + offset) % 360 + return bearing + +def normalize_bearing(bearing, check_hex=False): + ''' + Normalize bearing and convert from hex if + ''' + if bearing > 360 and check_hex: + # fix negative value wrongly parsed in exifread + # -360 degree -> 4294966935 when converting from hex + bearing = bin(int(bearing))[2:] + bearing = ''.join([str(int(int(a)==0)) for a in bearing]) + bearing = -float(int(bearing, 2)) + bearing %= 360 + return bearing + +def interpolate_lat_lon(points, t, max_dt=1): + ''' + Return interpolated lat, lon and compass bearing for time t. + + Points is a list of tuples (time, lat, lon, elevation), t a datetime object. + ''' + # find the enclosing points in sorted list + if (t<=points[0][0]) or (t>=points[-1][0]): + if t<=points[0][0]: + dt = abs((points[0][0]-t).total_seconds()) + else: + dt = (t-points[-1][0]).total_seconds() + if dt>max_dt: + raise ValueError("Time t not in scope of gpx file.") + else: + print ("Warning: Time t not in scope of gpx file by {} seconds, extrapolating...".format(dt)) + + if t < points[0][0]: + before = points[0] + after = points[1] + else: + before = points[-2] + after = points[-1] + bearing = compute_bearing(before[1], before[2], after[1], after[2]) + + if t==points[0][0]: + x = points[0] + return (x[1], x[2], bearing, x[3]) + + if t==points[-1][0]: + x = points[-1] + return (x[1], x[2], bearing, x[3]) + else: + for i,point in enumerate(points): + if t0: + before = points[i-1] + else: + before = points[i] + after = points[i] + break + + # time diff + dt_before = (t-before[0]).total_seconds() + dt_after = (after[0]-t).total_seconds() + + # simple linear interpolation + lat = (before[1]*dt_after + after[1]*dt_before) / (dt_before + dt_after) + lon = (before[2]*dt_after + after[2]*dt_before) / (dt_before + dt_after) + + bearing = compute_bearing(before[1], before[2], after[1], after[2]) + + if before[3] is not None: + ele = (before[3]*dt_after + after[3]*dt_before) / (dt_before + dt_after) + else: + ele = None + + return lat, lon, bearing, ele diff --git a/lib/gps_parser.py b/lib/gps_parser.py new file mode 100644 index 0000000..b1cb5ae --- /dev/null +++ b/lib/gps_parser.py @@ -0,0 +1,89 @@ +#!/usr/bin/python + +import sys +import os +import datetime +import time +from .geo import gpgga_to_dms, utc_to_localtime + + +import gpxpy +import pynmea2 + +''' +Methods for parsing gps data from various file format e.g. GPX, NMEA, SRT. +''' + + +def get_lat_lon_time_from_gpx(gpx_file, local_time=True): + ''' + Read location and time stamps from a track in a GPX file. + + Returns a list of tuples (time, lat, lon). + + GPX stores time in UTC, by default we assume your camera used the local time + and convert accordingly. + ''' + with open(gpx_file, 'r') as f: + gpx = gpxpy.parse(f) + + points = [] + if len(gpx.tracks)>0: + for track in gpx.tracks: + for segment in track.segments: + for point in segment.points: + + t = utc_to_localtime(point.time) if local_time else point.time + points.append( (t, point.latitude, point.longitude, point.elevation) ) + + '''if len(gpx.waypoints) > 0: + for point in gpx.waypoints: + t = utc_to_localtime(point.time) if local_time else point.time + points.append( (t, point.latitude, point.longitude, point.elevation) )''' + + # sort by time just in case + points.sort() + + + return points + + +def get_lat_lon_time_from_nmea(nmea_file, local_time=True): + ''' + Read location and time stamps from a track in a NMEA file. + + Returns a list of tuples (time, lat, lon). + + GPX stores time in UTC, by default we assume your camera used the local time + and convert accordingly. + ''' + + gga_Talker_id = ("$GNGGA", "$GPGGA", "$GLGGA", "$GBGGA", "$GAGGA") + rmc_Talker_id = ("$GNRMC", "$GPRMC", "$GLRMC", "$GBRMC", "$GARMC") + + with open(nmea_file, "r") as f: + lines = f.readlines() + lines = [l.rstrip("\n\r") for l in lines] + + # Get initial date + for l in lines: + if any(rmc in l for rmc in rmc_Talker_id): + data = pynmea2.parse(l, check=False) + date = data.datetime.date() + break + + # Parse GPS trace + points = [] + for l in lines: + if any(rmc in l for rmc in rmc_Talker_id): + data = pynmea2.parse(l, check=False) + date = data.datetime.date() + + if any(gga in l for gga in gga_Talker_id): + data = pynmea2.parse(l, check=False) + timestamp = datetime.datetime.combine(date, data.timestamp) + lat, lon, alt = data.latitude, data.longitude, data.altitude + points.append((timestamp, lat, lon, alt)) + + points.sort() + return points \ No newline at end of file diff --git a/lib/io.py b/lib/io.py new file mode 100644 index 0000000..4c3bda1 --- /dev/null +++ b/lib/io.py @@ -0,0 +1,27 @@ +import os +import errno +import sys + + +def mkdir_p(path): + ''' + Make a directory including parent directories. + ''' + try: + os.makedirs(path) + except os.error as exc: + if exc.errno != errno.EEXIST or not os.path.isdir(path): + raise + + +def progress(count, total, suffix=''): + ''' + Display progress bar + sources: https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 + ''' + bar_len = 60 + filled_len = int(round(bar_len * count / float(total))) + percents = round(100.0 * count / float(total), 1) + bar = '=' * filled_len + '-' * (bar_len - filled_len) + sys.stdout.write('[%s] %s%s %s\r' % (bar, percents, '%', suffix)) + sys.stdout.flush() diff --git a/lib/pexif.py b/lib/pexif.py new file mode 100644 index 0000000..c536220 --- /dev/null +++ b/lib/pexif.py @@ -0,0 +1,1153 @@ +""" +pexif is a module which allows you to view and modify meta-data in +JPEG/JFIF/EXIF files. +The main way to use this is to create an instance of the JpegFile class. +This should be done using one of the static factory methods fromFile, +fromString or fromFd. +After manipulating the object you can then write it out using one of the +writeFile, writeString or writeFd methods. +The get_exif() method on JpegFile returns the ExifSegment if one exists. +Example: +jpeg = pexif.JpegFile.fromFile("foo.jpg") +exif = jpeg.get_exif() +.... +jpeg.writeFile("new.jpg") +For photos that don't currently have an exef segment you can specify +an argument which will create the exef segment if it doesn't exist. +Example: +jpeg = pexif.JpegFile.fromFile("foo.jpg") +exif = jpeg.get_exif(create=True) +.... +jpeg.writeFile("new.jpg") +The JpegFile class handles file that are formatted in something +approach the JPEG specification (ISO/IEC 10918-1) Annex B 'Compressed +Data Formats', and JFIF and EXIF standard. +In particular, the way a 'jpeg' file is treated by pexif is that +a JPEG file is made of a series of segments followed by the image +data. In particular it should look something like: +[ SOI | | SOS | image data | EOI ] +So, the library expects a Start-of-Image marker, followed +by an arbitrary number of segment (assuming that a segment +has the format: +[ <0xFF> ] +and that there are no gaps between segments. +The last segment must be the Start-of-Scan header, and the library +assumes that following Start-of-Scan comes the image data, finally +followed by the End-of-Image marker. +This is probably not sufficient to handle arbitrary files conforming +to the JPEG specs, but it should handle files that conform to +JFIF or EXIF, as well as files that conform to neither but +have both JFIF and EXIF application segment (which is the majority +of files in existence!). +When writing out files all segment will be written out in the order +in which they were read. Any 'unknown' segment will be written out +as is. Note: This may or may not corrupt the data. If the segment +format relies on absolute references then this library may still +corrupt that segment! +Can have a JpegFile in two modes: Read Only and Read Write. +Read Only mode: trying to access missing elements will result in +an AttributeError. +Read Write mode: trying to access missing elements will automatically +create them. +E.g: +img.exif.primary. + .geo + .interop + .exif. + .exif.makernote. + .thumbnail +img.flashpix.<...> +img.jfif. +img.xmp +E.g: +try: + print img.exif.tiff.exif.FocalLength +except AttributeError: + print "No Focal Length data" +Source: https://github.com/bennoleslie/pexif +""" + +import StringIO +import sys +from struct import unpack, pack + +MAX_HEADER_SIZE = 64 * 1024 +DELIM = 0xff +EOI = 0xd9 +SOI_MARKER = chr(DELIM) + '\xd8' +EOI_MARKER = chr(DELIM) + '\xd9' + +EXIF_OFFSET = 0x8769 +GPSIFD = 0x8825 + +TIFF_OFFSET = 6 +TIFF_TAG = 0x2a + +DEBUG = 0 + +# By default, if we find a makernote with an unknown format, we +# simply skip over it. In some cases, it makes sense to raise a +# real error. +# +# Set to `unknown_make_note_as_error` to True, if errors should +# be raised. +unknown_maker_note_as_error = False + +def debug(*debug_string): + """Used for print style debugging. Enable by setting the global + DEBUG to 1.""" + if DEBUG: + for each in debug_string: + print each, + print + +class DefaultSegment: + """DefaultSegment represents a particluar segment of a JPEG file. + This class is instantiated by JpegFile when parsing Jpeg files + and is not intended to be used directly by the programmer. This + base class is used as a default which doesn't know about the internal + structure of the segment. Other classes subclass this to provide + extra information about a particular segment. + """ + + def __init__(self, marker, fd, data, mode): + """The constructor for DefaultSegment takes the marker which + identifies the segments, a file object which is currently positioned + at the end of the segment. This allows any subclasses to potentially + extract extra data from the stream. Data contains the contents of the + segment.""" + self.marker = marker + self.data = data + self.mode = mode + self.fd = fd + assert mode in ["rw", "ro"] + if not self.data is None: + self.parse_data(data) + + class InvalidSegment(Exception): + """This exception may be raised by sub-classes in cases when they + can't correctly identify the segment.""" + pass + + def write(self, fd): + """This method is called by JpegFile when writing out the file. It + must write out any data in the segment. This shouldn't in general be + overloaded by subclasses, they should instead override the get_data() + method.""" + fd.write('\xff') + fd.write(pack('B', self.marker)) + data = self.get_data() + fd.write(pack('>H', len(data) + 2)) + fd.write(data) + + def get_data(self): + """This method is called by write to generate the data for this segment. + It should be overloaded by subclasses.""" + return self.data + + def parse_data(self, data): + """This method is called be init to parse any data for the segment. It + should be overloaded by subclasses rather than overloading __init__""" + pass + + def dump(self, fd): + """This is called by JpegFile.dump() to output a human readable + representation of the segment. Subclasses should overload this to provide + extra information.""" + print >> fd, " Section: [%5s] Size: %6d" % \ + (jpeg_markers[self.marker][0], len(self.data)) + +class StartOfScanSegment(DefaultSegment): + """The StartOfScan segment needs to be treated specially as the actual + image data directly follows this segment, and that data is not included + in the size as reported in the segment header. This instances of this class + are created by JpegFile and it should not be subclassed. + """ + def __init__(self, marker, fd, data, mode): + DefaultSegment.__init__(self, marker, fd, data, mode) + # For SOS we also pull out the actual data + img_data = fd.read() + + # Usually the EOI marker will be at the end of the file, + # optimise for this case + if img_data[-2:] == EOI_MARKER: + remaining = 2 + else: + # We need to search + for i in range(len(img_data) - 2): + if img_data[i:i + 2] == EOI_MARKER: + break + else: + raise JpegFile.InvalidFile("Unable to find EOI marker.") + remaining = len(img_data) - i + + self.img_data = img_data[:-remaining] + fd.seek(-remaining, 1) + + def write(self, fd): + """Write segment data to a given file object""" + DefaultSegment.write(self, fd) + fd.write(self.img_data) + + def dump(self, fd): + """Dump as ascii readable data to a given file object""" + print >> fd, " Section: [ SOS] Size: %6d Image data size: %6d" % \ + (len(self.data), len(self.img_data)) + +class ExifType: + """The ExifType class encapsulates the data types used + in the Exif spec. These should really be called TIFF types + probably. This could be replaced by named tuples in python 2.6.""" + lookup = {} + + def __init__(self, type_id, name, size): + """Create an ExifType with a given name, size and type_id""" + self.id = type_id + self.name = name + self.size = size + ExifType.lookup[type_id] = self + +BYTE = ExifType(1, "byte", 1).id +ASCII = ExifType(2, "ascii", 1).id +SHORT = ExifType(3, "short", 2).id +LONG = ExifType(4, "long", 4).id +RATIONAL = ExifType(5, "rational", 8).id +UNDEFINED = ExifType(7, "undefined", 1).id +SLONG = ExifType(9, "slong", 4).id +SRATIONAL = ExifType(10, "srational", 8).id + +def exif_type_size(exif_type): + """Return the size of a type""" + return ExifType.lookup.get(exif_type).size + +class Rational: + """A simple fraction class. Python 2.6 could use the inbuilt Fraction class.""" + + def __init__(self, num, den): + """Create a number fraction num/den.""" + self.num = num + self.den = den + + def __repr__(self): + """Return a string representation of the fraction.""" + return "%s / %s" % (self.num, self.den) + + def as_tuple(self): + """Return the fraction a numerator, denominator tuple.""" + return (self.num, self.den) + +class IfdData: + """Base class for IFD""" + + name = "Generic Ifd" + tags = {} + embedded_tags = {} + + def special_handler(self, tag, data): + """special_handler method can be over-ridden by subclasses + to specially handle the conversion of tags from raw format + into Python data types.""" + pass + + def ifd_handler(self, data): + """ifd_handler method can be over-ridden by subclasses to + specially handle conversion of the Ifd as a whole into a + suitable python representation.""" + pass + + def extra_ifd_data(self, offset): + """extra_ifd_data method can be over-ridden by subclasses + to specially handle conversion of the Python Ifd representation + back into a byte stream.""" + return "" + + + def has_key(self, key): + return self[key] != None + + def __setattr__(self, name, value): + for key, entry in self.tags.items(): + if entry[1] == name: + self[key] = value + self.__dict__[name] = value + + def __delattr__(self, name): + for key, entry in self.tags.items(): + if entry[1] == name: + del self[key] + del self.__dict__[name] + + def __getattr__(self, name): + for key, entry in self.tags.items(): + if entry[1] == name: + x = self[key] + if x is None: + raise AttributeError + return x + for key, entry in self.embedded_tags.items(): + if entry[0] == name: + if self.has_key(key): + return self[key] + else: + if self.mode == "rw": + new = entry[1](self.e, 0, "rw", self.exif_file) + self[key] = new + return new + else: + raise AttributeError + raise AttributeError, "%s not found.. %s" % (name, self.embedded_tags) + + def __getitem__(self, key): + if type(key) == type(""): + try: + return self.__getattr__(key) + except AttributeError: + return None + for entry in self.entries: + if key == entry[0]: + if entry[1] == ASCII and not entry[2] is None: + return entry[2].strip('\0') + else: + return entry[2] + return None + + def __delitem__(self, key): + if type(key) == type(""): + try: + return self.__delattr__(key) + except AttributeError: + return None + for entry in self.entries: + if key == entry[0]: + self.entries.remove(entry) + + def __setitem__(self, key, value): + if type(key) == type(""): + return self.__setattr__(key, value) + found = 0 + if len(self.tags[key]) < 3: + raise "Error: Tags aren't set up correctly, should have tag type." + if self.tags[key][2] == ASCII: + if not value is None and not value.endswith('\0'): + value = value + '\0' + for i in range(len(self.entries)): + if key == self.entries[i][0]: + found = 1 + entry = list(self.entries[i]) + if value is None: + del self.entries[i] + else: + entry[2] = value + self.entries[i] = tuple(entry) + break + if not found: + # Find type... + # Not quite enough yet... + self.entries.append((key, self.tags[key][2], value)) + return + + def __init__(self, e, offset, exif_file, mode, data = None): + self.exif_file = exif_file + self.mode = mode + self.e = e + self.entries = [] + if data is None: + return + num_entries = unpack(e + 'H', data[offset:offset+2])[0] + next = unpack(e + "I", data[offset+2+12*num_entries: + offset+2+12*num_entries+4])[0] + debug("OFFSET %s - %s" % (offset, next)) + + for i in range(num_entries): + start = (i * 12) + 2 + offset + debug("START: ", start) + entry = unpack(e + "HHII", data[start:start+12]) + tag, exif_type, components, the_data = entry + + debug("%s %s %s %s %s" % (hex(tag), exif_type, + exif_type_size(exif_type), components, + the_data)) + byte_size = exif_type_size(exif_type) * components + + + if tag in self.embedded_tags: + try: + actual_data = self.embedded_tags[tag][1](e, + the_data, exif_file, self.mode, data) + except JpegFile.SkipTag as exc: + # If the tag couldn't be parsed, and raised 'SkipTag' + # then we just continue. + continue + else: + if byte_size > 4: + debug(" ...offset %s" % the_data) + the_data = data[the_data:the_data+byte_size] + else: + the_data = data[start+8:start+8+byte_size] + + if exif_type == BYTE or exif_type == UNDEFINED: + actual_data = list(the_data) + elif exif_type == ASCII: + if the_data[-1] != '\0': + actual_data = the_data + '\0' + #raise JpegFile.InvalidFile("ASCII tag '%s' not + # NULL-terminated: %s [%s]" % (self.tags.get(tag, + # (hex(tag), 0))[0], the_data, map(ord, the_data))) + #print "ASCII tag '%s' not NULL-terminated: + # %s [%s]" % (self.tags.get(tag, (hex(tag), 0))[0], + # the_data, map(ord, the_data)) + actual_data = the_data + elif exif_type == SHORT: + actual_data = list(unpack(e + ("H" * components), the_data)) + elif exif_type == LONG: + actual_data = list(unpack(e + ("I" * components), the_data)) + elif exif_type == SLONG: + actual_data = list(unpack(e + ("i" * components), the_data)) + elif exif_type == RATIONAL or exif_type == SRATIONAL: + if exif_type == RATIONAL: t = "II" + else: t = "ii" + actual_data = [] + for i in range(components): + actual_data.append(Rational(*unpack(e + t, + the_data[i*8: + i*8+8]))) + else: + raise "Can't handle this" + + if (byte_size > 4): + debug("%s" % actual_data) + + self.special_handler(tag, actual_data) + entry = (tag, exif_type, actual_data) + self.entries.append(entry) + + debug("%-40s %-10s %6d %s" % (self.tags.get(tag, (hex(tag), 0))[0], + ExifType.lookup[exif_type], + components, actual_data)) + self.ifd_handler(data) + + def isifd(self, other): + """Return true if other is an IFD""" + return issubclass(other.__class__, IfdData) + + def getdata(self, e, offset, last = 0): + data_offset = offset+2+len(self.entries)*12+4 + output_data = "" + + out_entries = [] + + # Add any specifc data for the particular type + extra_data = self.extra_ifd_data(data_offset) + data_offset += len(extra_data) + output_data += extra_data + + for tag, exif_type, the_data in self.entries: + magic_type = exif_type + if (self.isifd(the_data)): + debug("-> Magic..") + sub_data, next_offset = the_data.getdata(e, data_offset, 1) + the_data = [data_offset] + debug("<- Magic", next_offset, data_offset, len(sub_data), + data_offset + len(sub_data)) + data_offset += len(sub_data) + assert(next_offset == data_offset) + output_data += sub_data + magic_type = exif_type + if exif_type != 4: + magic_components = len(sub_data) + else: + magic_components = 1 + exif_type = 4 # LONG + byte_size = 4 + components = 1 + else: + magic_components = components = len(the_data) + byte_size = exif_type_size(exif_type) * components + + if exif_type == BYTE or exif_type == UNDEFINED: + actual_data = "".join(the_data) + elif exif_type == ASCII: + actual_data = the_data + elif exif_type == SHORT: + actual_data = pack(e + ("H" * components), *the_data) + elif exif_type == LONG: + actual_data = pack(e + ("I" * components), *the_data) + elif exif_type == SLONG: + actual_data = pack(e + ("i" * components), *the_data) + elif exif_type == RATIONAL or exif_type == SRATIONAL: + if exif_type == RATIONAL: t = "II" + else: t = "ii" + actual_data = "" + for i in range(components): + actual_data += pack(e + t, *the_data[i].as_tuple()) + else: + raise "Can't handle this", exif_type + if (byte_size) > 4: + output_data += actual_data + actual_data = pack(e + "I", data_offset) + data_offset += byte_size + else: + actual_data = actual_data + '\0' * (4 - len(actual_data)) + out_entries.append((tag, magic_type, + magic_components, actual_data)) + + data = pack(e + 'H', len(self.entries)) + for entry in out_entries: + data += pack(self.e + "HHI", *entry[:3]) + data += entry[3] + + next_offset = data_offset + if last: + data += pack(self.e + "I", 0) + else: + data += pack(self.e + "I", next_offset) + data += output_data + + assert (next_offset == offset+len(data)) + + return data, next_offset + + def dump(self, f, indent = ""): + """Dump the IFD file""" + print >> f, indent + "<--- %s start --->" % self.name + for entry in self.entries: + tag, exif_type, data = entry + if exif_type == ASCII: + data = data.strip('\0') + if (self.isifd(data)): + data.dump(f, indent + " ") + else: + if data and len(data) == 1: + data = data[0] + print >> f, indent + " %-40s %s" % \ + (self.tags.get(tag, (hex(tag), 0))[0], data) + print >> f, indent + "<--- %s end --->" % self.name + +class IfdInterop(IfdData): + name = "Interop" + tags = { + # Interop stuff + 0x0001: ("Interoperability index", "InteroperabilityIndex"), + 0x0002: ("Interoperability version", "InteroperabilityVersion"), + 0x1000: ("Related image file format", "RelatedImageFileFormat"), + 0x1001: ("Related image file width", "RelatedImageFileWidth"), + 0x1002: ("Related image file length", "RelatedImageFileLength"), + } + +class CanonIFD(IfdData): + tags = { + 0x0006: ("Image Type", "ImageType"), + 0x0007: ("Firmware Revision", "FirmwareRevision"), + 0x0008: ("Image Number", "ImageNumber"), + 0x0009: ("Owner Name", "OwnerName"), + 0x000c: ("Camera serial number", "SerialNumber"), + 0x000f: ("Customer functions", "CustomerFunctions") + } + name = "Canon" + + +class FujiIFD(IfdData): + tags = { + 0x0000: ("Note version", "NoteVersion"), + 0x1000: ("Quality", "Quality"), + 0x1001: ("Sharpness", "Sharpness"), + 0x1002: ("White balance", "WhiteBalance"), + 0x1003: ("Color", "Color"), + 0x1004: ("Tone", "Tone"), + 0x1010: ("Flash mode", "FlashMode"), + 0x1011: ("Flash strength", "FlashStrength"), + 0x1020: ("Macro", "Macro"), + 0x1021: ("Focus mode", "FocusMode"), + 0x1030: ("Slow sync", "SlowSync"), + 0x1031: ("Picture mode", "PictureMode"), + 0x1100: ("Motor or bracket", "MotorOrBracket"), + 0x1101: ("Sequence number", "SequenceNumber"), + 0x1210: ("FinePix Color", "FinePixColor"), + 0x1300: ("Blur warning", "BlurWarning"), + 0x1301: ("Focus warning", "FocusWarning"), + 0x1302: ("AE warning", "AEWarning") + } + name = "FujiFilm" + + def getdata(self, e, offset, last = 0): + pre_data = "FUJIFILM" + pre_data += pack(". Got <%s>." % header) + # The it has its own offset + ifd_offset = unpack(", "\ + "expecting " % exif) + + tiff_data = data[TIFF_OFFSET:] + data = None # Don't need or want data for now on.. + + self.tiff_endian = tiff_data[:2] + if self.tiff_endian == "II": + self.e = "<" + elif self.tiff_endian == "MM": + self.e = ">" + else: + raise JpegFile.InvalidFile("Bad TIFF endian header. Got <%s>, " + "expecting or " % + self.tiff_endian) + + tiff_tag, tiff_offset = unpack(self.e + 'HI', tiff_data[2:8]) + + if (tiff_tag != TIFF_TAG): + raise JpegFile.InvalidFile("Bad TIFF tag. Got <%x>, expecting "\ + "<%x>" % (tiff_tag, TIFF_TAG)) + + # Ok, the header parse out OK. Now we parse the IFDs contained in + # the APP1 header. + + # We use this loop, even though we can really only expect and support + # two IFDs, the Attribute data and the Thumbnail data + offset = tiff_offset + count = 0 + + while offset: + count += 1 + num_entries = unpack(self.e + 'H', tiff_data[offset:offset+2])[0] + start = 2 + offset + (num_entries*12) + if (count == 1): + ifd = IfdTIFF(self.e, offset, self, self.mode, tiff_data) + elif (count == 2): + try: + ifd = IfdThumbnail(self.e, offset, self, self.mode, tiff_data) + except: + ifd = None + # print("thumbnail passing") + else: + raise JpegFile.InvalidFile() + + if ifd: + self.ifds.append(ifd) + + # Get next offset + offset = unpack(self.e + "I", tiff_data[start:start+4])[0] + + def dump(self, fd): + print >> fd, " Section: [ EXIF] Size: %6d" % \ + (len(self.data)) + for ifd in self.ifds: + ifd.dump(fd) + + def get_data(self): + ifds_data = "" + next_offset = 8 + for ifd in self.ifds: + debug("OUT IFD") + new_data, next_offset = ifd.getdata(self.e, next_offset, + ifd == self.ifds[-1]) + ifds_data += new_data + + data = "" + data += "Exif\0\0" + data += self.tiff_endian + data += pack(self.e + "HI", 42, 8) + data += ifds_data + + return data + + def get_primary(self, create=False): + """Return the attributes image file descriptor. If it doesn't + exit return None, unless create is True in which case a new + descriptor is created.""" + if len(self.ifds) > 0: + return self.ifds[0] + else: + if create: + assert self.mode == "rw" + new_ifd = IfdTIFF(self.e, None, self, "rw") + self.ifds.insert(0, new_ifd) + return new_ifd + else: + return None + + def _get_property(self): + if self.mode == "rw": + return self.get_primary(True) + else: + primary = self.get_primary() + if primary is None: + raise AttributeError + return primary + + primary = property(_get_property) + +jpeg_markers = { + 0xc0: ("SOF0", []), + 0xc2: ("SOF2", []), + 0xc4: ("DHT", []), + + 0xda: ("SOS", [StartOfScanSegment]), + 0xdb: ("DQT", []), + 0xdd: ("DRI", []), + + 0xe0: ("APP0", []), + 0xe1: ("APP1", [ExifSegment]), + 0xe2: ("APP2", []), + 0xe3: ("APP3", []), + 0xe4: ("APP4", []), + 0xe5: ("APP5", []), + 0xe6: ("APP6", []), + 0xe7: ("APP7", []), + 0xe8: ("APP8", []), + 0xe9: ("APP9", []), + 0xea: ("APP10", []), + 0xeb: ("APP11", []), + 0xec: ("APP12", []), + 0xed: ("APP13", []), + 0xee: ("APP14", []), + 0xef: ("APP15", []), + + 0xfe: ("COM", []), + } + +APP1 = 0xe1 + +class JpegFile: + """JpegFile object. You should create this using one of the static methods + fromFile, fromString or fromFd. The JpegFile object allows you to examine and + modify the contents of the file. To write out the data use one of the methods + writeFile, writeString or writeFd. To get an ASCII dump of the data in a file + use the dump method.""" + + def fromFile(filename, mode="rw"): + """Return a new JpegFile object from a given filename.""" + with open(filename, "rb") as f: + return JpegFile(f, filename=filename, mode=mode) + fromFile = staticmethod(fromFile) + + def fromString(str, mode="rw"): + """Return a new JpegFile object taking data from a string.""" + return JpegFile(StringIO.StringIO(str), "from buffer", mode=mode) + fromString = staticmethod(fromString) + + def fromFd(fd, mode="rw"): + """Return a new JpegFile object taking data from a file object.""" + return JpegFile(fd, "fd <%d>" % fd.fileno(), mode=mode) + fromFd = staticmethod(fromFd) + + class SkipTag(Exception): + """This exception is raised if a give tag should be skipped.""" + pass + + class InvalidFile(Exception): + """This exception is raised if a given file is not able to be parsed.""" + pass + + class NoSection(Exception): + """This exception is raised if a section is unable to be found.""" + pass + + def __init__(self, input, filename=None, mode="rw"): + """JpegFile Constructor. input is a file object, and filename + is a string used to name the file. (filename is used only for + display functions). You shouldn't use this function directly, + but rather call one of the static methods fromFile, fromString + or fromFd.""" + self.filename = filename + self.mode = mode + # input is the file descriptor + soi_marker = input.read(len(SOI_MARKER)) + + # The very first thing should be a start of image marker + if (soi_marker != SOI_MARKER): + raise self.InvalidFile("Error reading soi_marker. Got <%s> "\ + "should be <%s>" % (soi_marker, SOI_MARKER)) + + # Now go through and find all the blocks of data + segments = [] + while 1: + head = input.read(2) + delim, mark = unpack(">BB", head) + if (delim != DELIM): + raise self.InvalidFile("Error, expecting delimiter. "\ + "Got <%s> should be <%s>" % + (delim, DELIM)) + if mark == EOI: + # Hit end of image marker, game-over! + break + head2 = input.read(2) + size = unpack(">H", head2)[0] + data = input.read(size-2) + possible_segment_classes = jpeg_markers[mark][1] + [DefaultSegment] + # Try and find a valid segment class to handle + # this data + for segment_class in possible_segment_classes: + try: + # Note: Segment class may modify the input file + # descriptor. This is expected. + attempt = segment_class(mark, input, data, self.mode) + segments.append(attempt) + break + except DefaultSegment.InvalidSegment: + # It wasn't this one so we try the next type. + # DefaultSegment will always work. + continue + + self._segments = segments + + def writeString(self): + """Write the JpegFile out to a string. Returns a string.""" + f = StringIO.StringIO() + self.writeFd(f) + return f.getvalue() + + def writeFile(self, filename): + """Write the JpegFile out to a file named filename.""" + output = open(filename, "wb") + self.writeFd(output) + + def writeFd(self, output): + """Write the JpegFile out on the file object output.""" + output.write(SOI_MARKER) + for segment in self._segments: + segment.write(output) + output.write(EOI_MARKER) + + def dump(self, f = sys.stdout): + """Write out ASCII representation of the file on a given file + object. Output default to stdout.""" + print >> f, "" % self.filename + for segment in self._segments: + segment.dump(f) + + def get_exif(self, create=False): + """get_exif returns a ExifSegment if one exists for this file. + If the file does not have an exif segment and the create is + false, then return None. If create is true, a new exif segment is + added to the file and returned.""" + for segment in self._segments: + if segment.__class__ == ExifSegment: + return segment + if create: + return self.add_exif() + else: + return None + + def add_exif(self): + """add_exif adds a new ExifSegment to a file, and returns + it. When adding an EXIF segment is will add it at the start of + the list of segments.""" + assert self.mode == "rw" + new_segment = ExifSegment(APP1, None, None, "rw") + self._segments.insert(0, new_segment) + return new_segment + + + def _get_exif(self): + """Exif Attribute property""" + if self.mode == "rw": + return self.get_exif(True) + else: + exif = self.get_exif(False) + if exif is None: + raise AttributeError + return exif + + exif = property(_get_exif) + + def get_geo(self): + """Return a tuple of (latitude, longitude).""" + def convert(x): + (deg, min, sec) = x + return (float(deg.num) / deg.den) + \ + (1/60.0 * float(min.num) / min.den) + \ + (1/3600.0 * float(sec.num) / sec.den) + if not self.exif.primary.has_key(GPSIFD): + raise self.NoSection, "File %s doesn't have a GPS section." % \ + self.filename + + gps = self.exif.primary.GPS + lat = convert(gps.GPSLatitude) + lng = convert(gps.GPSLongitude) + if gps.GPSLatitudeRef == "S": + lat = -lat + if gps.GPSLongitudeRef == "W": + lng = -lng + + return lat, lng + + SEC_DEN = 50000000 + + def _parse(val): + sign = 1 + if val < 0: + val = -val + sign = -1 + + deg = int(val) + other = (val - deg) * 60 + minutes = int(other) + secs = (other - minutes) * 60 + secs = long(secs * JpegFile.SEC_DEN) + return (sign, deg, minutes, secs) + + _parse = staticmethod(_parse) + + def set_geo(self, lat, lng): + """Set the GeoLocation to a given lat and lng""" + if self.mode != "rw": + raise RWError + + gps = self.exif.primary.GPS + + sign, deg, min, sec = JpegFile._parse(lat) + ref = "N" + if sign < 0: + ref = "S" + + gps.GPSLatitudeRef = ref + gps.GPSLatitude = [Rational(deg, 1), Rational(min, 1), + Rational(sec, JpegFile.SEC_DEN)] + + sign, deg, min, sec = JpegFile._parse(lng) + ref = "E" + if sign < 0: + ref = "W" + gps.GPSLongitudeRef = ref + gps.GPSLongitude = [Rational(deg, 1), Rational(min, 1), + Rational(sec, JpegFile.SEC_DEN)] \ No newline at end of file diff --git a/lib/sequence.py b/lib/sequence.py new file mode 100644 index 0000000..018a8cb --- /dev/null +++ b/lib/sequence.py @@ -0,0 +1,317 @@ +import os +import sys +import lib.io +import lib.geo +from lib.exif import EXIF, verify_exif +from collections import OrderedDict +import datetime + +''' +Sequence class for organizing/cleaning up photos in a folder + - split to sequences based on time intervals + - split to sequences based on gps distances + - remove duplicate images (e.g. waiting for red light, in traffic etc) @simonmikkelsen +''' + +MAXIMUM_SEQUENCE_LENGTH = 1000 + +class Sequence(object): + + def __init__(self, filepath, skip_folders=[], skip_subfolders=False, check_exif=True): + self.filepath = filepath + self._skip_folders = skip_folders + self._skip_subfolders = skip_subfolders + self.file_list = self.get_file_list(filepath, check_exif) + self.num_images = len(self.file_list) + + def _is_skip(self, filepath): + ''' + Skip photos in specified folders + - filepath/duplicates: it stores potential duplicate photos + detected by method 'remove_duplicates' + - filepath/success: it stores photos that have been successfully + ''' + _is_skip = False + for folder in self._skip_folders: + if folder in filepath: + _is_skip = True + if self._skip_subfolders and filepath != self.filepath: + _is_skip = True + return _is_skip + + def _read_capture_time(self, filename): + ''' + Use EXIF class to parse capture time from EXIF. + ''' + exif = EXIF(filename) + return exif.extract_capture_time() + + def _read_lat_lon(self, filename): + ''' + Use EXIF class to parse latitude and longitude from EXIF. + ''' + exif = EXIF(filename) + lon, lat = exif.extract_lon_lat() + return lat, lon + + def _read_direction(self, filename): + ''' + Use EXIF class to parse compass direction from EXIF. + ''' + exif = EXIF(filename) + direction = exif.extract_direction() + return direction + + def get_file_list(self, filepath, check_exif=True): + ''' + Get the list of JPEGs in the folder (nested folders) + ''' + if filepath.lower().endswith(".jpg"): + # single file + file_list = [filepath] + else: + file_list = [] + for root, sub_folders, files in os.walk(self.filepath): + if not self._is_skip(root): + image_files = [os.path.join(root, filename) for filename in files if (filename.lower().endswith(".jpg"))] + if check_exif: + image_files = [f for f in image_files if verify_exif(f)] + file_list += image_files + return file_list + + def sort_file_list(self, file_list): + ''' + Read capture times and sort files in time order. + ''' + if len(file_list) == 0: + return [], [] + capture_times = [self._read_capture_time(filepath) for filepath in file_list] + sorted_times_files = zip(capture_times, file_list) + sorted_times_files.sort() + return zip(*sorted_times_files) + + def move_groups(self, groups, sub_path=''): + ''' + Move the files in the groups to new folders. + ''' + for i,group in enumerate(groups): + new_dir = os.path.join(self.filepath, sub_path, str(i)) + lib.io.mkdir_p(new_dir) + for filepath in group: + os.rename(filepath, os.path.join(new_dir, os.path.basename(filepath))) + print("Moved {0} photos to {1}".format(len(group), new_dir)) + + def set_skip_folders(self, folders): + ''' + Set folders to skip when iterating through the path + ''' + self._skip_folders = folders + + def set_file_list(self, file_list): + ''' + Set file list for the sequence + ''' + self.file_list = file_list + + def split(self, cutoff_distance=500., cutoff_time=None, max_sequence_length=MAXIMUM_SEQUENCE_LENGTH, move_files=True, verbose=False, skip_cutoff=False): + ''' + Split photos into sequences in case of large distance gap or large time interval + @params cutoff_distance: maximum distance gap in meters + @params cutoff_time: maximum time interval in seconds (if None, use 1.5 x median time interval in the sequence) + ''' + + file_list = self.file_list + groups = [] + + if len(file_list) >= 1: + # sort based on EXIF capture time + capture_times, file_list = self.sort_file_list(file_list) + + # diff in capture time + capture_deltas = [t2-t1 for t1,t2 in zip(capture_times, capture_times[1:])] + + # read gps for ordered files + latlons = [self._read_lat_lon(filepath) for filepath in file_list] + + # distance between consecutive images + distances = [lib.geo.gps_distance(ll1, ll2) for ll1, ll2 in zip(latlons, latlons[1:])] + + # if cutoff time is given use that, else assume cutoff is 1.5x median time delta + if cutoff_time is None: + if verbose: + print "Cut-off time is None" + median = sorted(capture_deltas)[len(capture_deltas)//2] + if type(median) is not int: + median = median.total_seconds() + cutoff_time = 1.5*median + + # extract groups by cutting using cutoff time + group = [file_list[0]] + cut = 0 + for i,filepath in enumerate(file_list[1:]): + cut_time = capture_deltas[i].total_seconds() > cutoff_time + cut_distance = distances[i] > cutoff_distance + cut_sequence_length = len(group) > max_sequence_length + if cut_time or cut_distance or cut_sequence_length: + cut += 1 + # delta too big, save current group, start new + groups.append(group) + group = [filepath] + if verbose: + if cut_distance: + print 'Cut {}: Delta in distance {} meters is too bigger than cutoff_distance {} meters at {}'.format(cut,distances[i], cutoff_distance, file_list[i+1]) + elif cut_time: + print 'Cut {}: Delta in time {} seconds is bigger then cutoff_time {} seconds at {}'.format(cut, capture_deltas[i].total_seconds(), cutoff_time, file_list[i+1]) + elif cut_sequence_length: + print 'Cut {}: Maximum sequence length {} reached at {}'.format(cut, max_sequence_length, file_list[i+1]) + else: + group.append(filepath) + + groups.append(group) + + # move groups to subfolders + if move_files: + self.move_groups(groups) + + print("Done split photos in {} into {} sequences".format(self.filepath, len(groups))) + return groups + + def interpolate_direction(self, offset=0): + ''' + Interpolate bearing of photos in a sequence with an offset + @author: mprins + ''' + + bearings = {} + file_list = self.file_list + num_file = len(file_list) + + if num_file > 1: + # sort based on EXIF capture time + capture_times, file_list = self.sort_file_list(file_list) + + # read gps for ordered files + latlons = [self._read_lat_lon(filepath) for filepath in file_list] + + if len(file_list) > 1: + # bearing between consecutive images + bearings = [lib.geo.compute_bearing(ll1[0], ll1[1], ll2[0], ll2[1]) + for ll1, ll2 in zip(latlons, latlons[1:])] + bearings.append(bearings[-1]) + bearings = {file_list[i]: lib.geo.offset_bearing(b, offset) for i, b in enumerate(bearings)} + elif num_file == 1: + #if there is only one file in the list, just write the direction 0 and offset + bearings = {file_list[0]: lib.geo.offset_bearing(0.0, offset)} + + return bearings + + def interpolate_timestamp(self): + ''' + Interpolate time stamps in case of identical timestamps within a sequence + ''' + timestamps = [] + file_list = self.file_list + num_file = len(file_list) + + time_dict = OrderedDict() + capture_times, file_list = self.sort_file_list(file_list) + + if num_file < 2: + return capture_times, file_list + + # trace identical timestamps (always assume capture_times is sorted) + time_dict = OrderedDict() + for i, t in enumerate(capture_times): + if t not in time_dict: + time_dict[t] = { + "count": 0, + "pointer": 0 + } + + interval = 0 + if i != 0: + interval = (t - capture_times[i-1]).total_seconds() + time_dict[capture_times[i-1]]["interval"] = interval + + time_dict[t]["count"] += 1 + + if len(time_dict) >= 2: + # set time interval as the last available time interval + time_dict[time_dict.keys()[-1]]["interval"] = time_dict[time_dict.keys()[-2]]["interval"] + else: + # set time interval assuming capture interval is 1 second + time_dict[time_dict.keys()[0]]["interval"] = time_dict[time_dict.keys()[0]]["count"] * 1. + + # interpolate timestampes + for f, t in zip(file_list, capture_times): + d = time_dict[t] + s = datetime.timedelta(seconds=d["pointer"] * d["interval"] / float(d["count"])) + updated_time = t + s + time_dict[t]["pointer"] += 1 + timestamps.append(updated_time) + + return timestamps, file_list + + + def remove_duplicates(self, min_distance=1e-5, min_angle=5): + ''' + Detect duplidate photos in a folder + @source: a less general version of @simonmikkelsen's duplicate remover + ''' + file_list = self.file_list + + # ordered list by time + capture_times, file_list = self.sort_file_list(file_list) + + # read gps for ordered files + latlons = [self._read_lat_lon(filepath) for filepath in file_list] + + # read bearing for ordered files + bearings = [self._read_direction(filepath) for filepath in file_list] + + # interploated bearings + interpolated_bearings = [lib.geo.compute_bearing(ll1[0], ll1[1], ll2[0], ll2[1]) + for ll1, ll2 in zip(latlons, latlons[1:])] + interpolated_bearings.append(bearings[-1]) + + # use interploated bearings if bearing not available in EXIF + for i, b in enumerate(bearings): + bearings[i] = b if b is not None else interpolated_bearings[i] + + is_duplicate = False + + prev_unique = file_list[0] + prev_latlon = latlons[0] + prev_bearing = bearings[0] + groups = [] + group = [] + for i, filename in enumerate(file_list[1:]): + k = i+1 + distance = lib.geo.gps_distance(latlons[k], prev_latlon) + if bearings[k] is not None and prev_bearing is not None: + bearing_diff = lib.geo.diff_bearing(bearings[k], prev_bearing) + else: + # Not use bearing difference if no bearings are available + bearing_diff = 360 + if distance < min_distance and bearing_diff < min_angle: + is_duplicate = True + else: + prev_latlon = latlons[k] + prev_bearing = bearings[k] + + if is_duplicate: + group.append(filename) + else: + if group: + groups.append(group) + group = [] + + is_duplicate = False + groups.append(group) + + # move to filepath/duplicates/group_id (TODO: uploader should skip the duplicate folder) + self.move_groups(groups, 'duplicates') + print("Done remove duplicate photos in {} into {} groups".format(self.filepath, len(groups))) + + return groups + diff --git a/lib/uploader.py b/lib/uploader.py new file mode 100644 index 0000000..fe71ab5 --- /dev/null +++ b/lib/uploader.py @@ -0,0 +1,356 @@ +from lib.exif import EXIF +import lib.io +import json +import os +import string +import threading +import sys +import urllib2, urllib, httplib +import socket +import mimetypes +import random +import string +from Queue import Queue +import threading +import exifread +import time + + +MAPILLARY_UPLOAD_URL = "https://d22zcsn13kp53w.cloudfront.net/" +MAPILLARY_DIRECT_UPLOAD_URL = "https://s3-eu-west-1.amazonaws.com/mapillary.uploads.images" +PERMISSION_HASH = "eyJleHBpcmF0aW9uIjoiMjAyMC0wMS0wMVQwMDowMDowMFoiLCJjb25kaXRpb25zIjpbeyJidWNrZXQiOiJtYXBpbGxhcnkudXBsb2Fkcy5pbWFnZXMifSxbInN0YXJ0cy13aXRoIiwiJGtleSIsIiJdLHsiYWNsIjoicHJpdmF0ZSJ9LFsic3RhcnRzLXdpdGgiLCIkQ29udGVudC1UeXBlIiwiIl0sWyJjb250ZW50LWxlbmd0aC1yYW5nZSIsMCwyMDQ4NTc2MF1dfQ==" +SIGNATURE_HASH = "f6MHj3JdEq8xQ/CmxOOS7LvMxoI=" +BOUNDARY_CHARS = string.digits + string.ascii_letters +NUMBER_THREADS = int(os.getenv('NUMBER_THREADS', '4')) +MAX_ATTEMPTS = int(os.getenv('MAX_ATTEMPTS', '10')) +UPLOAD_PARAMS = {"url": MAPILLARY_UPLOAD_URL, "permission": PERMISSION_HASH, "signature": SIGNATURE_HASH, "move_files":True, "keep_file_names": True} +CLIENT_ID = "MkJKbDA0bnZuZlcxeTJHTmFqN3g1dzo1YTM0NjRkM2EyZGU5MzBh" +LOGIN_URL = "https://a.mapillary.com/v2/ua/login?client_id={}".format(CLIENT_ID) +PROJECTS_URL = "https://a.mapillary.com/v3/users/{}/projects?client_id={}" +ME_URL = "https://a.mapillary.com/v3/me?client_id={}".format(CLIENT_ID) + +class UploadThread(threading.Thread): + def __init__(self, queue, params=UPLOAD_PARAMS): + threading.Thread.__init__(self) + self.q = queue + self.params = params + self.total_task = self.q.qsize() + + def run(self): + while True: + # fetch file from the queue and upload + filepath = self.q.get() + if filepath is None: + self.q.task_done() + break + else: + lib.io.progress(self.total_task-self.q.qsize(), self.total_task, '... {} images left.'.format(self.q.qsize())) + upload_file(filepath, **self.params) + self.q.task_done() + + +def create_dirs(root_path=''): + lib.io.mkdir_p(os.path.join(root_path, "success")) + lib.io.mkdir_p(os.path.join(root_path, "failed")) + + +def encode_multipart(fields, files, boundary=None): + """ + Encode dict of form fields and dict of files as multipart/form-data. + Return tuple of (body_string, headers_dict). Each value in files is a dict + with required keys 'filename' and 'content', and optional 'mimetype' (if + not specified, tries to guess mime type or uses 'application/octet-stream'). + + From MIT licensed recipe at + http://code.activestate.com/recipes/578668-encode-multipart-form-data-for-uploading-files-via/ + """ + def escape_quote(s): + return s.replace('"', '\\"') + + if boundary is None: + boundary = ''.join(random.choice(BOUNDARY_CHARS) for i in range(30)) + lines = [] + + for name, value in fields.items(): + lines.extend(( + '--{0}'.format(boundary), + 'Content-Disposition: form-data; name="{0}"'.format(escape_quote(name)), + '', + str(value), + )) + + for name, value in files.items(): + filename = value['filename'] + if 'mimetype' in value: + mimetype = value['mimetype'] + else: + mimetype = mimetypes.guess_type(filename)[0] or 'application/octet-stream' + lines.extend(( + '--{0}'.format(boundary), + 'Content-Disposition: form-data; name="{0}"; filename="{1}"'.format( + escape_quote(name), escape_quote(filename)), + 'Content-Type: {0}'.format(mimetype), + '', + value['content'], + )) + + lines.extend(( + '--{0}--'.format(boundary), + '', + )) + body = '\r\n'.join(lines) + + headers = { + 'Content-Type': 'multipart/form-data; boundary={0}'.format(boundary), + 'Content-Length': str(len(body)), + } + return (body, headers) + + +def finalize_upload(params, retry=3, auto_done=False): + ''' + Finalize and confirm upload + ''' + # retry if input is unclear + for i in range(retry): + if not auto_done: + proceed = raw_input("Finalize upload? [y/n]: ") + else: + proceed = "y" + if proceed in ["y", "Y", "yes", "Yes"]: + # upload an empty DONE file + upload_done_file(params) + print("Done uploading.") + break + elif proceed in ["n", "N", "no", "No"]: + print("Aborted. No files were submitted. Try again if you had failures.") + break + else: + if i==2: + print("Aborted. No files were submitted. Try again if you had failures.") + else: + print('Please answer y or n. Try again.') + +def get_upload_token(mail, pwd): + ''' + Get upload token + ''' + params = urllib.urlencode({"email": mail, "password": pwd}) + response = urllib.urlopen(LOGIN_URL, params) + resp = json.loads(response.read()) + return resp['token'] + + +def get_authentication_info(): + ''' + Get authentication information from env + ''' + try: + MAPILLARY_USERNAME = os.environ['MAPILLARY_USERNAME'] + MAPILLARY_EMAIL = os.environ['MAPILLARY_EMAIL'] + MAPILLARY_PASSWORD = os.environ['MAPILLARY_PASSWORD'] + except KeyError: + return None + return MAPILLARY_USERNAME, MAPILLARY_EMAIL, MAPILLARY_PASSWORD + + +def get_full_authentication_info(user=None, email=None): + # Fetch full authetication info + try: + MAPILLARY_EMAIL = email if email is not None else os.environ['MAPILLARY_EMAIL'] + MAPILLARY_SECRET_HASH = os.environ.get('MAPILLARY_SECRET_HASH', None) + MAPILLARY_UPLOAD_TOKEN = None + + if MAPILLARY_SECRET_HASH is None: + MAPILLARY_PASSWORD = os.environ['MAPILLARY_PASSWORD'] + MAPILLARY_PERMISSION_HASH = os.environ['MAPILLARY_PERMISSION_HASH'] + MAPILLARY_SIGNATURE_HASH = os.environ['MAPILLARY_SIGNATURE_HASH'] + MAPILLARY_UPLOAD_TOKEN = get_upload_token(MAPILLARY_EMAIL, MAPILLARY_PASSWORD) + UPLOAD_URL = MAPILLARY_UPLOAD_URL + else: + secret_hash = MAPILLARY_SECRET_HASH + MAPILLARY_PERMISSION_HASH = PERMISSION_HASH + MAPILLARY_SIGNATURE_HASH = SIGNATURE_HASH + UPLOAD_URL = MAPILLARY_DIRECT_UPLOAD_URL + return MAPILLARY_EMAIL, MAPILLARY_UPLOAD_TOKEN, MAPILLARY_SECRET_HASH, UPLOAD_URL + except KeyError: + print("You are missing one of the environment variables MAPILLARY_USERNAME, MAPILLARY_EMAIL, MAPILLARY_PASSWORD, MAPILLARY_PERMISSION_HASH or MAPILLARY_SIGNATURE_HASH. These are required.") + sys.exit() + + +def get_project_key(project_name, project_key=None): + ''' + Get project key given project name + ''' + if project_name is not None or project_key is not None: + + # Get the JWT token + MAPILLARY_USERNAME, MAPILLARY_EMAIL, MAPILLARY_PASSWORD = get_authentication_info() + params = urllib.urlencode( {"email": MAPILLARY_EMAIL, "password": MAPILLARY_PASSWORD }) + resp = json.loads(urllib.urlopen(LOGIN_URL, params).read()) + token = resp['token'] + + # Get the user key + req = urllib2.Request(ME_URL) + req.add_header('Authorization', 'Bearer {}'.format(token)) + resp = json.loads(urllib2.urlopen(req).read()) + userkey = resp['key'] + + # Get the user key + req = urllib2.Request(PROJECTS_URL.format(userkey, CLIENT_ID)) + req.add_header('Authorization', 'Bearer {}'.format(token)) + resp = json.loads(urllib2.urlopen(req).read()) + projects = resp + + # check projects + found = False + print "Your projects: " + for project in projects: + print(project["name"]) + project_name_matched = project['name'].encode('utf-8').decode('utf-8') == project_name + project_key_matched = project["key"] == project_key + if project_name_matched or project_key_matched: + found = True + return project['key'] + + if not found: + print "Project {} not found.".format(project_name) + + return "" + + +def upload_done_file(params): + print("Upload a DONE file {} to indicate the sequence is all uploaded and ready to submit.".format(params['key'])) + if not os.path.exists("DONE"): + open("DONE", 'a').close() + #upload + upload_file("DONE", **params) + #remove + if os.path.exists("DONE"): + os.remove("DONE") + + +def upload_file(filepath, url, permission, signature, key=None, move_files=True, keep_file_names=True): + ''' + Upload file at filepath. + + Move to subfolders 'success'/'failed' on completion if move_files is True. + ''' + filename = os.path.basename(filepath) + + if keep_file_names: + s3_filename = filename + else: + try: + s3_filename = EXIF(filepath).exif_name() + except: + s3_filename = filename + + # add S3 'path' if given + if key is None: + s3_key = s3_filename + else: + s3_key = key+s3_filename + + parameters = {"key": s3_key, "AWSAccessKeyId": "AKIAI2X3BJAT2W75HILA", "acl": "private", + "policy": permission, "signature": signature, "Content-Type":"image/jpeg" } + + with open(filepath, "rb") as f: + encoded_string = f.read() + + data, headers = encode_multipart(parameters, {'file': {'filename': filename, 'content': encoded_string}}) + + root_path = os.path.dirname(filepath) + success_path = os.path.join(root_path, 'success') + failed_path = os.path.join(root_path, 'failed') + lib.io.mkdir_p(success_path) + lib.io.mkdir_p(failed_path) + + for attempt in range(MAX_ATTEMPTS): + + # Initialize response before each attempt + response = None + + try: + request = urllib2.Request(url, data=data, headers=headers) + response = urllib2.urlopen(request) + + if response.getcode()==204: + if move_files: + os.rename(filepath, os.path.join(success_path, filename)) + # print("Success: {0}".format(filename)) + else: + if move_files: + os.rename(filepath, os.path.join(failed_path, filename)) + print("Failed: {0}".format(filename)) + break # attempts + + except urllib2.HTTPError as e: + print("HTTP error: {0} on {1}".format(e, filename)) + time.sleep(5) + except urllib2.URLError as e: + print("URL error: {0} on {1}".format(e, filename)) + time.sleep(5) + except httplib.HTTPException as e: + print("HTTP exception: {0} on {1}".format(e, filename)) + time.sleep(5) + except OSError as e: + print("OS error: {0} on {1}".format(e, filename)) + time.sleep(5) + except socket.timeout as e: + # Specific timeout handling for Python 2.7 + print("Timeout error: {0} (retrying)".format(filename)) + finally: + if response is not None: + response.close() + + +def upload_file_list(file_list, params=UPLOAD_PARAMS): + # create upload queue with all files + q = Queue() + for filepath in file_list: + q.put(filepath) + + # create uploader threads + uploaders = [UploadThread(q, params) for i in range(NUMBER_THREADS)] + + # start uploaders as daemon threads that can be stopped (ctrl-c) + try: + print("Uploading with {} threads".format(NUMBER_THREADS)) + for uploader in uploaders: + uploader.daemon = True + uploader.start() + + for uploader in uploaders: + uploaders[i].join(1) + + while q.unfinished_tasks: + time.sleep(1) + q.join() + except (KeyboardInterrupt, SystemExit): + print("\nBREAK: Stopping upload.") + sys.exit() + + +def upload_summary(file_list, total_uploads, split_groups, duplicate_groups, missing_groups): + total_success = len([f for f in file_list if 'success' in f]) + total_failed = len([f for f in file_list if 'failed' in f]) + lines = [] + if duplicate_groups: + lines.append('Duplicates (skipping):') + lines.append(' groups: {}'.format(len(duplicate_groups))) + lines.append(' total: {}'.format(sum([len(g) for g in duplicate_groups]))) + if missing_groups: + lines.append('Missing Required EXIF (skipping):') + lines.append(' total: {}'.format(sum([len(g) for g in missing_groups]))) + + lines.append('Sequences:') + lines.append(' groups: {}'.format(len(split_groups))) + lines.append(' total: {}'.format(sum([len(g) for g in split_groups]))) + lines.append('Uploads:') + lines.append(' total uploads this run: {}'.format(total_uploads)) + lines.append(' total: {}'.format(total_success+total_failed)) + lines.append(' success: {}'.format(total_success)) + lines.append(' failed: {}'.format(total_failed)) + lines = '\n'.join(lines) + return lines