Merge pull request 'remove JFIF data in case EXIF extraction fails' (#7) from matthias/mapillary_download:workaround/faulty-exif into tykayn/batch-get-mapillary-sequences

Reviewed-on: tykayn/mapillary_download#7
This commit is contained in:
tykayn 2024-10-27 09:04:42 +01:00
commit efe40c10d2
2 changed files with 37 additions and 4 deletions

View File

@ -1,7 +1,8 @@
import requests import requests
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.adapters import Retry from requests.adapters import Retry
import json from PIL import Image
import io
import os import os
import concurrent.futures import concurrent.futures
import argparse import argparse
@ -64,10 +65,41 @@ def download(url, filepath, metadata=None):
with open(str(filepath), "wb") as f: with open(str(filepath), "wb") as f:
r = session.get(url, stream=True, timeout=6) r = session.get(url, stream=True, timeout=6)
try: try:
image = write_exif(r.content, metadata) img = write_exif(r.content, metadata)
except Exception as e: except Exception as e:
print(f"FAILED to write exif data for {filepath}. Error: {e}") print(
f.write(image) f"{filepath} FAILED to write exif data. Error: {e} Retrying with reduced EXIF.".replace(
"\n", " | "
)
)
# write_exif(img_byte_arr, metadata) crashes when JFIF fields present
# so here is a workaround to remove those fields with pillow
# definitely not the most elegant solution...
try:
r = session.get(url, stream=True, timeout=6)
im = Image.open(r.raw)
exif_fields = list(im.info.keys())
# print(f"{filepath} detected exif fields : {exif_fields}")
fields_to_keep = ("exif", "dpi")
for k in exif_fields:
if k not in fields_to_keep:
del im.info[k]
# print(f"{filepath} deleted exif field: {k}")
# done cleaning, now converting pillow image back to bytearray
img_byte_arr = io.BytesIO()
im.save(img_byte_arr, format="JPEG")
img_byte_arr = img_byte_arr.getvalue()
img = write_exif(img_byte_arr, metadata)
except Exception as e:
print(
f"{filepath} FAILED WORKAROUND. Error: {e} Saving image without EXIF data.".replace(
"\n", " | "
)
)
img = r.content
f.write(img)
print("{} downloaded {}".format(filepath, r)) print("{} downloaded {}".format(filepath, r))

View File

@ -3,3 +3,4 @@ pytz >= 2023.3
timezonefinder >=6.2.0 timezonefinder >=6.2.0
pyexiv2 >= 2.8.2 pyexiv2 >= 2.8.2
panoramax_cli >= 1.1.1 panoramax_cli >= 1.1.1
pillow >= 11.0.0