mapillary_download/download.py

154 lines
6.7 KiB
Python
Raw Normal View History

2023-01-20 04:19:37 +01:00
import requests
2023-09-14 20:57:41 +02:00
from requests.adapters import HTTPAdapter
from requests.adapters import Retry
2023-01-20 04:19:37 +01:00
import json
import os
import asyncio
2023-09-14 23:27:03 +02:00
import concurrent.futures
2023-01-20 04:19:37 +01:00
import argparse
2023-09-10 20:04:26 +02:00
from datetime import datetime
import writer
2023-09-12 14:31:10 +02:00
from model import PictureType
2023-09-14 00:05:12 +02:00
import sys
2023-01-20 04:19:37 +01:00
2023-09-14 20:57:41 +02:00
session = requests.Session()
retries_strategies = Retry(
total=5,
backoff_factor=1,
status_forcelist=[429,502, 503, 504],
)
session.mount('https://', HTTPAdapter(max_retries=retries_strategies))
2023-01-20 04:19:37 +01:00
def parse_args(argv =None):
parser = argparse.ArgumentParser()
2023-09-14 00:05:12 +02:00
parser.add_argument('--sequence_ids', type=str, nargs='+', help='The mapillary sequence id(s) to download')
2023-01-20 04:19:37 +01:00
parser.add_argument('--access_token', type=str, help='Your mapillary access token')
2023-09-14 10:12:26 +02:00
parser.add_argument('--image_limit', type=int, default=None, help='How many images you want to download')
2023-09-15 11:22:11 +02:00
parser.add_argument('--overwrite', type=bool, default = False, help='overwrite existing images')
2023-01-20 04:19:37 +01:00
global args
args = parser.parse_args(argv)
def background(f):
def wrapped(*args, **kwargs):
return asyncio.get_event_loop().run_in_executor(None, f, *args, **kwargs)
return wrapped
2023-09-14 23:27:03 +02:00
#@background
def download(url, filepath, metadata=None):
2023-09-14 20:57:41 +02:00
r = session.get(url, stream=True, timeout=6)
2023-09-12 14:31:10 +02:00
image = write_exif(r.content, metadata)
2023-09-14 23:27:03 +02:00
with open(str(filepath), "wb") as f:
2023-09-12 14:31:10 +02:00
f.write(image)
2023-09-14 23:48:20 +02:00
print("{} downloaded".format(filepath))
2023-01-20 04:19:37 +01:00
2023-09-14 00:05:12 +02:00
def get_single_image_data(image_id, mly_header):
req_url = 'https://graph.mapillary.com/{}?fields=thumb_original_url,altitude,camera_type,captured_at,compass_angle,geometry,exif_orientation'.format(image_id)
2023-09-14 20:57:41 +02:00
r = session.get(req_url, headers=mly_header)
2023-09-14 00:05:12 +02:00
data = r.json()
#print(data)
return data
def get_image_data_from_sequences(sequences_id, mly_header):
for i,sequence_id in enumerate(sequences_id):
url = 'https://graph.mapillary.com/image_ids?sequence_id={}'.format(sequence_id)
r = requests.get(url, headers=header)
data = r.json()
image_ids = data['data']
total_image = len(image_ids)
2023-09-14 00:32:11 +02:00
print("{} images in sequence {} of {} - id : {}".format(total_image, i+1, len(sequences_id), sequence_id))
2023-09-14 00:05:12 +02:00
print('getting images data')
for x in range(0, total_image):
image_id = image_ids[x]['id']
image_data = get_single_image_data(image_id, mly_header)
image_data['sequence_id'] = sequence_id
yield image_data
2023-09-14 23:27:03 +02:00
def get_image_data_from_sequences__future(sequences_id, mly_header):
for i,sequence_id in enumerate(sequences_id):
url = 'https://graph.mapillary.com/image_ids?sequence_id={}'.format(sequence_id)
r = requests.get(url, headers=header)
data = r.json()
image_ids = data['data']
total_image = len(image_ids)
print("{} images in sequence {} of {} - id : {}".format(total_image, i+1, len(sequences_id), sequence_id))
print('getting images data')
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
future_to_url = {}
for x in range(0, total_image):
image_id = image_ids[x]['id']
future_to_url[executor.submit(get_single_image_data, image_id, mly_header)] = image_id
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
image_data = future.result()
image_data['sequence_id'] = sequence_id
#print(image_data)
yield image_data
#image_data = get_single_image_data(image_id, mly_header)
#image_data['sequence_id'] = sequence_id
#yield image_data
2023-09-12 14:31:10 +02:00
def write_exif(picture, img_metadata):
2023-09-10 21:02:33 +02:00
'''
Write exif metadata
'''
#{'thumb_original_url': 'https://scontent-cdg4-2.xx.fbcdn.net/m1/v/t6/An9Zy2SrH9vXJIF01QkBODyUbg7XSKfwL48UwHyvihSwvECGjVbG0vSw9uhxe2-Dq-k2eUcigb83buO6zo-7eVbykfp5aQIe1kgd-MJr66nU_H-o_mwBLZXgVbj5I_5WX-C9c6FxJruHkV962F228O0?ccb=10-5&oh=00_AfDOKD869DxL-4ZNCbVo8Rn29vsc0JyjMAU2ctx4aAFVMQ&oe=65256C25&_nc_sid=201bca',
# 'captured_at': 1603459736644, 'geometry': {'type': 'Point', 'coordinates': [2.5174596904057, 48.777089857534]}, 'id': '485924785946693'}
2023-09-12 14:31:10 +02:00
picture = writer.writePictureMetadata(picture, img_metadata)
picture = writer.add_altitude(picture, img_metadata)
picture = writer.add_direction(picture, img_metadata)
return picture
2023-01-20 04:19:37 +01:00
if __name__ == '__main__':
parse_args()
2023-09-14 00:05:12 +02:00
if args.sequence_ids == None:
2023-01-20 04:19:37 +01:00
print('please provide the sequence_id')
exit()
if args.access_token == None:
print('please provide the access_token')
exit()
2023-09-14 00:05:12 +02:00
sequence_ids= args.sequence_ids
2023-01-20 04:19:37 +01:00
access_token = args.access_token
2023-09-14 10:12:26 +02:00
images_data = []
header = {'Authorization' : 'OAuth {}'.format(access_token)}
2023-01-20 04:19:37 +01:00
# create the data folder
if not os.path.exists('data'):
os.makedirs('data')
2023-09-14 23:27:03 +02:00
#for i,image_data in enumerate(get_image_data_from_sequences(sequence_ids, header)):
for i,image_data in enumerate(get_image_data_from_sequences__future(sequence_ids, header)):
2023-09-14 10:12:26 +02:00
if args.image_limit is not None and i >= args.image_limit:
2023-09-14 00:05:12 +02:00
break
images_data.append(image_data)
2023-09-14 23:27:03 +02:00
#sys.exit()
2023-01-20 04:19:37 +01:00
2023-01-20 04:21:02 +01:00
print('downloading.. this process will take a while. please wait')
2023-09-14 23:48:20 +02:00
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
for i,image_data in enumerate(images_data):
# create a folder for each unique sequence ID to group images by sequence
if not os.path.exists('data/{}'.format(image_data['sequence_id'])):
os.makedirs('data/{}'.format(image_data['sequence_id']))
date_time_image_filename = datetime.utcfromtimestamp(int(image_data['captured_at'])/1000).strftime('%Y-%m-%d_%HH%Mmn%Ss%f')[:-3]
path = 'data/{}/{}.jpg'.format(image_data['sequence_id'], date_time_image_filename)
img_metadata = writer.PictureMetadata(
capture_time = datetime.utcfromtimestamp(int(image_data['captured_at'])/1000),
longitude = image_data['geometry']['coordinates'][0],
latitude = image_data['geometry']['coordinates'][1],
picture_type = PictureType("equirectangular") if image_data['camera_type'] == 'spherical' else None,
direction = image_data['compass_angle'],
altitude = image_data['altitude'],
)
2023-09-15 11:22:11 +02:00
image_exists = os.path.exists(path)
if not args.overwrite and image_exists:
print("{} already exists. Skipping ".format(path))
continue
2023-09-14 23:27:03 +02:00
executor.submit(download, url=image_data['thumb_original_url'], filepath=path, metadata=img_metadata)
2023-09-14 23:48:20 +02:00
#download(image_data['thumb_original_url'],path, img_metadata)