remove JFIF data in case EXIF extraction fails #7

Merged
2 changed files with 37 additions and 4 deletions

View File

@ -1,7 +1,8 @@
import requests import requests
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.adapters import Retry from requests.adapters import Retry
import json from PIL import Image
import io
import os import os
import concurrent.futures import concurrent.futures
import argparse import argparse
@ -64,10 +65,41 @@ def download(url, filepath, metadata=None):
with open(str(filepath), "wb") as f: with open(str(filepath), "wb") as f:
r = session.get(url, stream=True, timeout=6) r = session.get(url, stream=True, timeout=6)
try: try:
image = write_exif(r.content, metadata) img = write_exif(r.content, metadata)
except Exception as e: except Exception as e:
print(f"FAILED to write exif data for {filepath}. Error: {e}") print(
f.write(image) f"{filepath} FAILED to write exif data. Error: {e} Retrying with reduced EXIF.".replace(
"\n", " | "
)
)
# write_exif(img_byte_arr, metadata) crashes when JFIF fields present
# so here is a workaround to remove those fields with pillow
# definitely not the most elegant solution...
try:
r = session.get(url, stream=True, timeout=6)
im = Image.open(r.raw)
exif_fields = list(im.info.keys())
# print(f"{filepath} detected exif fields : {exif_fields}")
fields_to_keep = ("exif", "dpi")
for k in exif_fields:
if k not in fields_to_keep:
del im.info[k]
# print(f"{filepath} deleted exif field: {k}")
# done cleaning, now converting pillow image back to bytearray
img_byte_arr = io.BytesIO()
im.save(img_byte_arr, format="JPEG")
img_byte_arr = img_byte_arr.getvalue()
img = write_exif(img_byte_arr, metadata)
except Exception as e:
print(
f"{filepath} FAILED WORKAROUND. Error: {e} Saving image without EXIF data.".replace(
"\n", " | "
)
)
img = r.content
f.write(img)
print("{} downloaded {}".format(filepath, r)) print("{} downloaded {}".format(filepath, r))

View File

@ -3,3 +3,4 @@ pytz >= 2023.3
timezonefinder >=6.2.0 timezonefinder >=6.2.0
pyexiv2 >= 2.8.2 pyexiv2 >= 2.8.2
panoramax_cli >= 1.1.1 panoramax_cli >= 1.1.1
pillow >= 11.0.0