From 0f3a3a8d6e73e19df10e2b1beb11e50efc01ae7d Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 27 Oct 2024 00:45:15 +0200 Subject: [PATCH] remove JFIF data in case EXIF extraction fails Signed-off-by: Matthias --- mapillary_download.py | 40 ++++++++++++++++++++++++++++++++++++---- requirements.txt | 1 + 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/mapillary_download.py b/mapillary_download.py index 47badc0..6b16554 100644 --- a/mapillary_download.py +++ b/mapillary_download.py @@ -1,7 +1,8 @@ import requests from requests.adapters import HTTPAdapter from requests.adapters import Retry -import json +from PIL import Image +import io import os import concurrent.futures import argparse @@ -64,10 +65,41 @@ def download(url, filepath, metadata=None): with open(str(filepath), "wb") as f: r = session.get(url, stream=True, timeout=6) try: - image = write_exif(r.content, metadata) + img = write_exif(r.content, metadata) except Exception as e: - print(f"FAILED to write exif data for {filepath}. Error: {e}") - f.write(image) + print( + f"{filepath} FAILED to write exif data. Error: {e} Retrying with reduced EXIF.".replace( + "\n", " | " + ) + ) + # write_exif(img_byte_arr, metadata) crashes when JFIF fields present + # so here is a workaround to remove those fields with pillow + # definitely not the most elegant solution... + try: + r = session.get(url, stream=True, timeout=6) + im = Image.open(r.raw) + exif_fields = list(im.info.keys()) + # print(f"{filepath} detected exif fields : {exif_fields}") + fields_to_keep = ("exif", "dpi") + for k in exif_fields: + if k not in fields_to_keep: + del im.info[k] + # print(f"{filepath} deleted exif field: {k}") + # done cleaning, now converting pillow image back to bytearray + img_byte_arr = io.BytesIO() + im.save(img_byte_arr, format="JPEG") + img_byte_arr = img_byte_arr.getvalue() + + img = write_exif(img_byte_arr, metadata) + except Exception as e: + print( + f"{filepath} FAILED WORKAROUND. Error: {e} Saving image without EXIF data.".replace( + "\n", " | " + ) + ) + img = r.content + + f.write(img) print("{} downloaded {}".format(filepath, r)) diff --git a/requirements.txt b/requirements.txt index 4db3c20..c1d6b51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pytz >= 2023.3 timezonefinder >=6.2.0 pyexiv2 >= 2.8.2 panoramax_cli >= 1.1.1 +pillow >= 11.0.0 \ No newline at end of file -- 2.39.5