add retry on http connections

This commit is contained in:
Stefal 2023-09-14 20:57:41 +02:00
parent dc4f2f9883
commit e9af1c34f2

View File

@ -1,4 +1,6 @@
import requests import requests
from requests.adapters import HTTPAdapter
from requests.adapters import Retry
import json import json
import os import os
import asyncio import asyncio
@ -8,6 +10,14 @@ import writer
from model import PictureType from model import PictureType
import sys import sys
session = requests.Session()
retries_strategies = Retry(
total=5,
backoff_factor=1,
status_forcelist=[429,502, 503, 504],
)
session.mount('https://', HTTPAdapter(max_retries=retries_strategies))
def parse_args(argv =None): def parse_args(argv =None):
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--sequence_ids', type=str, nargs='+', help='The mapillary sequence id(s) to download') parser.add_argument('--sequence_ids', type=str, nargs='+', help='The mapillary sequence id(s) to download')
@ -23,16 +33,16 @@ def background(f):
return wrapped return wrapped
#TODO add try/except and retry (see https://www.zenrows.com/blog/python-requests-retry#avoid-getting-blocked) #TODO add try/except and retry (see https://www.zenrows.com/blog/python-requests-retry#avoid-getting-blocked)
#@background @background
def download(url, fn, metadata=None): def download(url, fn, metadata=None):
r = requests.get(url, stream=True) r = session.get(url, stream=True, timeout=6)
image = write_exif(r.content, metadata) image = write_exif(r.content, metadata)
with open(str(fn), "wb") as f: with open(str(fn), "wb") as f:
f.write(image) f.write(image)
def get_single_image_data(image_id, mly_header): def get_single_image_data(image_id, mly_header):
req_url = 'https://graph.mapillary.com/{}?fields=thumb_original_url,altitude,camera_type,captured_at,compass_angle,geometry,exif_orientation'.format(image_id) req_url = 'https://graph.mapillary.com/{}?fields=thumb_original_url,altitude,camera_type,captured_at,compass_angle,geometry,exif_orientation'.format(image_id)
r = requests.get(req_url, headers=mly_header) r = session.get(req_url, headers=mly_header)
data = r.json() data = r.json()
#print(data) #print(data)
return data return data