Merge pull request 'remove JFIF data in case EXIF extraction fails' (#7) from matthias/mapillary_download:workaround/faulty-exif into tykayn/batch-get-mapillary-sequences

Reviewed-on: #7
This commit is contained in:
tykayn 2024-10-27 09:04:42 +01:00
commit efe40c10d2
2 changed files with 37 additions and 4 deletions

View File

@ -1,7 +1,8 @@
import requests
from requests.adapters import HTTPAdapter
from requests.adapters import Retry
import json
from PIL import Image
import io
import os
import concurrent.futures
import argparse
@ -64,10 +65,41 @@ def download(url, filepath, metadata=None):
with open(str(filepath), "wb") as f:
r = session.get(url, stream=True, timeout=6)
try:
image = write_exif(r.content, metadata)
img = write_exif(r.content, metadata)
except Exception as e:
print(f"FAILED to write exif data for {filepath}. Error: {e}")
f.write(image)
print(
f"{filepath} FAILED to write exif data. Error: {e} Retrying with reduced EXIF.".replace(
"\n", " | "
)
)
# write_exif(img_byte_arr, metadata) crashes when JFIF fields present
# so here is a workaround to remove those fields with pillow
# definitely not the most elegant solution...
try:
r = session.get(url, stream=True, timeout=6)
im = Image.open(r.raw)
exif_fields = list(im.info.keys())
# print(f"{filepath} detected exif fields : {exif_fields}")
fields_to_keep = ("exif", "dpi")
for k in exif_fields:
if k not in fields_to_keep:
del im.info[k]
# print(f"{filepath} deleted exif field: {k}")
# done cleaning, now converting pillow image back to bytearray
img_byte_arr = io.BytesIO()
im.save(img_byte_arr, format="JPEG")
img_byte_arr = img_byte_arr.getvalue()
img = write_exif(img_byte_arr, metadata)
except Exception as e:
print(
f"{filepath} FAILED WORKAROUND. Error: {e} Saving image without EXIF data.".replace(
"\n", " | "
)
)
img = r.content
f.write(img)
print("{} downloaded {}".format(filepath, r))

View File

@ -3,3 +3,4 @@ pytz >= 2023.3
timezonefinder >=6.2.0
pyexiv2 >= 2.8.2
panoramax_cli >= 1.1.1
pillow >= 11.0.0