diff --git a/.gitignore b/.gitignore index f6a166c..ccd110d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ data/* __pycache__/ build/ dist/ -*.spec \ No newline at end of file +*.spec +secrets_variables.sh \ No newline at end of file diff --git a/README.md b/README.md index 2ecba98..d6a455c 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,44 @@ Simple code to download images in one or several mapillary sequences. The images will be geotagged and oriented. ## How to use -change the access token with your access token and the sequence ids with the ids of the sequences you want to download +### Setup env variables +copy the secret variables files to setup your maipllary dev token, and your panoramax OpenStreetMap token. +```Shell +cp secrets_variables_examples.sh secrets_variables.sh +editor secrets_variables.sh +``` + +### Get ALL the sequences of SEVERAL users +To avoid to download everything at once, this is a 2 steps process: +- 1/ We set a list of users for which we want to get the list of sequences to download, and get a bash script for each user. + Change the list of users in the batch file `batch_get_username.sh`. + change the access token with your access token and the sequence ids with the ids of the sequences you want to download + ```bash + usernames=( "someone_having_nice_pictures" "someone_else" "oh_look_a_these_usernames" ) # use real accounts, it might work better + ``` + This will give you a few files that the other scripts will use to generate a script to download all sequences of each user. One user, one bash script generated. + + +- 2/ We can launch the download of all the sequences of a user for every user specified in the batch script. + ```bash + bash script_bash_get_sequences_for_user_SOMEONE.sh + ``` + **Or all of the generated batch scripts at once.** + + ```bash + bash script_bash_get_sequences_for_user_* + ``` + +Don't worry if you rerun the same user `script_bash_get_sequences_for_user_someone.sh` twice, the pictures will not be downloaded twice. Every run of mapillary_download.py checks that pictures are not existing before willing to download them. + +### Get only one sequence with its ID +To find a sequence id you can use the batch script, or click on a sequence on the Mapillary website, click on a picture, click on advanced data, and copy the sequence ID. + ```Shell python mapillary_download.py "MLY|xxxx|xxxxxxx" --sequence_ids xxxxxxxxxxx xxxxxxxxxxx ``` -## Available arguments +### Available arguments ```Shell python mapillary_download.py -h usage: mapillary_download.py [-h] [--sequence_ids [SEQUENCE_IDS ...]] [--image_ids [IMAGE_IDS ...]] [--destination DESTINATION] @@ -30,8 +62,18 @@ optional arguments: --overwrite overwrite existing images -v, --version show program's version number and exit ``` +## Sort pictures depending of a bounding box in their Exif GPS Data in a separate folder -## How to get my access token +This is used to restrict zones for national instances of Panoramax to upload to reuse the sequences of multi national users. + +Edit the bounding boxes in `find_france_photos_and_move.py` and change the destination folder. +Running the script will check in the /data folder for every user sequences and move the files being in the bouding boxes in the `destionation` folder path of the script. + +For this script to read the exif of pictures, you will need the python package `exifread` +It was tested with pictures taken with the App Open Camera. + + +## How to get my Mapillary access token - Go to https://www.mapillary.com/dashboard/developers - Click on "Registrer Application", enter the needed informations, enable the application to "Read" data, then click on register : @@ -62,4 +104,18 @@ python -m pip install -r requirements.txt Then you can run `python mapillary_download "MLY|xxxx|xxxxxxx" --sequence_ids xxxxxxxxxxx` When you're done with the script, simply run `deactivate` to exit the virtual environnement. -On windows you can create a virtual environnement too, or use the prebuilt `mapillary_download.exe` available on the release page. \ No newline at end of file +On windows you can create a virtual environnement too, or use the prebuilt `mapillary_download.exe` available on the release page. + + +## How to batch the upload to panoramax + +After having downloaded the sequences of a user you will find them in the folder `data`. +go in your `data` folder, and to the user subfolder, and assuming you have already sent pictures with geovisio_cli, you can run this command to send all the sequences of the current folder you're in: + + +```Shell +cd data/some_user + +for dir in */; do dir=${dir%?} ; geovisio upload --api-url https://panoramax.openstreetmap.fr "$dir" --token=BLAH_BLAH ; done +``` +Have fun! diff --git a/batch_get_username.sh b/batch_get_username.sh new file mode 100644 index 0000000..da8cab1 --- /dev/null +++ b/batch_get_username.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# récupérer les séquences pour un tas d'utilisateurs + + +# Liste des usernames +# example: +# usernames=( "riri" "fifi" "loulou") +usernames=( "someone_having_nice_pictures" "someone_else" "oh_look_a_these_usernames" ) + +# check env variables are valid +if [ -f "secrets_variables.sh" ]; then + source "secrets_variables.sh" + if [ "$MAPILLARY_DEV_TOKEN" = "MLY|blahblah_replace_it" ]; then + echo "Erreur : La variable MAPILLARY_DEV_TOKEN doit être modifiée pour que le script fonctionne." + echo "Veuillez remplacer la valeur par défaut \"MLY|blahblah_replace_it\" par votre propre token de développement Mapillary." + exit 1 + fi +else + echo "Erreur : Le fichier secrets_variables.sh n'a pas été trouvé." + exit 1 +fi + +# Boucle sur la liste des usernames +for username in "${usernames[@]}"; do + # Lancer la commande pour chaque username + +echo "---------- utilisateur: $username" + + if [ ! -f "out_$username.json" ]; then + bash find_user_id.sh $username + fi + # Vérifier si le fichier sequences_$username.txt existe + if [ ! -f "sequences_$username.txt" ]; then + python3 get_sequences_of_username.py --username="$username" --dev_token="$MAPILLARY_DEV_TOKEN" --max_sequence=9999 + else + echo "le fichier sequences txt existe pour $username" + fi + # Lancer la commande pour chaque username + if [ ! -f "script_bash_get_sequences_for_user_$username.sh" ]; then + python3 text_array_to_download_script.py --username=$username --dev_token="$MAPILLARY_DEV_TOKEN" + fi + +done +echo "---------- finished getting users ------------" \ No newline at end of file diff --git a/find_france_photos_and_move.py b/find_france_photos_and_move.py new file mode 100644 index 0000000..2a81845 --- /dev/null +++ b/find_france_photos_and_move.py @@ -0,0 +1,193 @@ +### +# Filtrage de photos situées en France dans un dossier contenant des séquences de photo géolocalisées +# Rechercher et déplacer automatiquement les photos géolocalisées dans une certaine bounding box et les déplacer dans le dossier de destination. +# +# utilisation: +# python find_france_photos_and_move.py --source_dir /chemin/du/répertoire/source --destination_dir /chemin/du/répertoire/destination +# +### +import argparse +import os +import shutil +import exifread + +# Définition du rectangle entourant la France métropolitaine et un peu autour +france_bbox: tuple[float, float, float, float] = (42.0, -5.0, 51.0, 10.0) # (lat_min, lon_min, lat_max, lon_max) +# Définition du rectangle entourant la France métropolitaine et un peu autour +france_bbox: tuple[float, float, float, float] = (42.0, -5.0, 51.0, 10.0) # (lat_min, lon_min, lat_max, lon_max) + +# Définition du rectangle entourant la Guadeloupe +guadeloupe_bbox: tuple[float, float, float, float] = (15.8, -61.8, 17.3, -59.3) + +# Définition du rectangle entourant la Martinique +martinique_bbox: tuple[float, float, float, float] = (14.3, -61.3, 15.1, -59.3) + +# Définition du rectangle entourant la Guyane française +guyane_bbox: tuple[float, float, float, float] = (2.0, -54.5, 6.5, -51.5) + +# Définition du rectangle entourant La Réunion +reunion_bbox: tuple[float, float, float, float] = (-21.3, 55.2, -20.8, 55.8) + +# Définition du rectangle entourant Mayotte +mayotte_bbox: tuple[float, float, float, float] = (-13.0, 45.0, -12.5, 45.5) + +# Définition du rectangle entourant Saint-Pierre-et-Miquelon +spm_bbox: tuple[float, float, float, float] = (46.7, -56.2, 47.1, -55.6) + +# Définition du rectangle entourant les îles de Saint-Martin et Saint-Barthélemy +stm_sbh_bbox: tuple[float, float, float, float] = (18.0, -64.5, 18.5, -62.5) + +# Définition du rectangle entourant Wallis-et-Futuna +wf_bbox: tuple[float, float, float, float] = (-13.3, -176.2, -13.1, -175.8) + +# Définition du rectangle entourant la Nouvelle-Calédonie +nc_bbox: tuple[float, float, float, float] = (-22.5, 165.5, -18.5, 169.5) + +# Définition du rectangle entourant la Polynésie française +pf_bbox: tuple[float, float, float, float] = (-27.5, -140.0, -7.5, -134.0) + +# Définition du rectangle entourant les Terres australes et antarctiques françaises +taaf_bbox: tuple[float, float, float, float] = (-49.5, 68.5, -37.5, 77.5) + +# Chemin du répertoire source +source_dir: str = '/home/cipherbliss/Téléchargements/FIBRELAND/TEST_IN_FR/' + +# Chemin du répertoire destination +destination_dir: str = '/home/cipherbliss/Téléchargements/FIBRELAND/IN_FRANCE/' +sequence_folder: str = 'principale_sequence' +count_files_all: int = 0 +count_files_moved: int = 0 +# Crée le répertoire destination si il n'existe pas +if not os.path.exists(destination_dir): + os.makedirs(destination_dir) + + +# Fonction pour déplacer un fichier si il est dans le rectangle de la France +def move_file_if_in_france(filepath, sequence_folder): + global count_files_all + global count_files_moved + + # Ouvre le fichier image et lit les informations EXIF + latitude, longitude = get_gps_info(filepath) + + if latitude and longitude: + print(f'Latitude: {latitude}, Longitude: {longitude}') + if are_lat_lon_in_france(latitude, longitude): + move_file_in_destination(filepath, sequence_folder) + else: + print('Informations GPS non trouvées') + + +def move_file_in_destination(filepath, sequence_folder): + global count_files_moved + # Déplace le fichier dans le sous-répertoire "photos_in_france" + dest_subdir = os.path.join(destination_dir, sequence_folder, + os.path.basename(os.path.dirname(filepath))) + if not os.path.exists(dest_subdir): + os.makedirs(dest_subdir) + shutil.move(filepath, os.path.join(dest_subdir, filepath)) + count_files_moved += 1 + print(f"Moved {filepath} to {dest_subdir}") + return True + +def are_lat_lon_in_france(gps_lat, gps_lon): + """ + recherche d'une zone du territoire français + + + France métropolitaine : 551 695 km² + Terres australes et antarctiques françaises : 432 000 km² + Guyane française : 83 534 km² + Nouvelle-Calédonie : 18 575 km² + Polynésie française : 4 167 km² + La Réunion : 2 512 km² + Martinique : 1 128 km² + Guadeloupe : 1 628 km² + Mayotte : 374 km² + Saint-Pierre-et-Miquelon : 242 km² + Wallis-et-Futuna : 142 km² + Saint-Martin et Saint-Barthélemy : 53 km² + + + :param gps_lat: + :param gps_lon: + :return: + """ + global france_bbox, guyane_bbox, nc_bbox, pf_bbox, reunion_bbox, guadeloupe_bbox, martinique_bbox, mayotte_bbox, spm_bbox, stm_sbh_bbox, wf_bbox, taaf_bbox + + print("lat lon :", gps_lat, gps_lon) + + if (france_bbox[0] <= gps_lat <= france_bbox[2] and france_bbox[1] <= gps_lon <= france_bbox[3]): + return "France métropolitaine" + elif (taaf_bbox[0] <= gps_lat <= taaf_bbox[2] and taaf_bbox[1] <= gps_lon <= taaf_bbox[3]): + return "Terres australes et antarctiques françaises" + elif (guyane_bbox[0] <= gps_lat <= guyane_bbox[2] and guyane_bbox[1] <= gps_lon <= guyane_bbox[3]): + return "Guyane française" + elif (reunion_bbox[0] <= gps_lat <= reunion_bbox[2] and reunion_bbox[1] <= gps_lon <= reunion_bbox[3]): + return "La Réunion" + elif (wf_bbox[0] <= gps_lat <= wf_bbox[2] and wf_bbox[1] <= gps_lon <= wf_bbox[3]): + return "Wallis-et-Futuna" + elif (stm_sbh_bbox[0] <= gps_lat <= stm_sbh_bbox[2] and stm_sbh_bbox[1] <= gps_lon <= stm_sbh_bbox[3]): + return "Saint-Martin et Saint-Barthélemy" + elif (spm_bbox[0] <= gps_lat <= spm_bbox[2] and spm_bbox[1] <= gps_lon <= spm_bbox[3]): + return "Saint-Pierre-et-Miquelon" + elif (mayotte_bbox[0] <= gps_lat <= mayotte_bbox[2] and mayotte_bbox[1] <= gps_lon <= mayotte_bbox[3]): + return "Mayotte" + elif (martinique_bbox[0] <= gps_lat <= martinique_bbox[2] and martinique_bbox[1] <= gps_lon <= martinique_bbox[3]): + return "Martinique" + elif (guadeloupe_bbox[0] <= gps_lat <= guadeloupe_bbox[2] and guadeloupe_bbox[1] <= gps_lon <= guadeloupe_bbox[3]): + return "Guadeloupe" + + elif (pf_bbox[0] <= gps_lat <= pf_bbox[2] and pf_bbox[1] <= gps_lon <= pf_bbox[3]): + return "Polynésie française" + elif (nc_bbox[0] <= gps_lat <= nc_bbox[2] and nc_bbox[1] <= gps_lon <= nc_bbox[3]): + return "Nouvelle-Calédonie" + else: + return None # "Hors de France" + + +def get_gps_info(filepath): + with open(filepath, 'rb') as f: + tags = exifread.process_file(f) + gps_info = {} + + # Recherche les informations GPS dans les informations EXIF + + # print("clés exif ", tags.keys()) + for tag in tags.keys(): + if tag.startswith('GPS'): + gps_info[tag] = tags[tag] + + # Extraction des informations de latitude et de longitude + gps_latitude = convert_rational_to_float(gps_info.get('GPS GPSLatitude')) + gps_longitude = convert_rational_to_float(gps_info.get('GPS GPSLongitude')) + + if gps_latitude and gps_longitude: + return gps_latitude, gps_longitude + else: + return None, None + + +def convert_rational_to_float(rational): + return float(rational.values[0].num) / float(rational.values[0].den) + + + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--source_dir', default='/home/cipherbliss/Téléchargements/FIBRELAND/TEST_IN_FR/', help='Chemin du répertoire source') + parser.add_argument('--destination_dir', default='/home/cipherbliss/Téléchargements/FIBRELAND/IN_FRANCE/', help='Chemin du répertoire destination') + parser.add_argument('--sequence_folder', default='principale_sequence', help='Nom du dossier de séquence') + args = parser.parse_args() + + # Parcourt tous les fichiers dans le répertoire source et ses sous-répertoires + for root, dirs, files in os.walk(args.source_dir): + for filename in files: + # Vérifie si le fichier est une image + if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tif')): + filepath = os.path.join(root, filename) + move_file_if_in_france(filepath, sequence_folder) + + print('fichiers se situant en france déplacés: ', count_files_moved, ' / ', count_files_all) \ No newline at end of file diff --git a/get_sequences_of_username.py b/get_sequences_of_username.py new file mode 100644 index 0000000..4184112 --- /dev/null +++ b/get_sequences_of_username.py @@ -0,0 +1,89 @@ +import json +import requests +# lit un json listant les id de photo de chaque séquence et va +# chercher la séquence par API. + +import argparse + +def parse_args(argv =None): + parser = argparse.ArgumentParser() + parser.add_argument('--username', type=str, help='Username to get the sequences id of') + parser.add_argument('--dev_token', type=str, help='Your mapillary developer token') + parser.add_argument('--max_sequence', type=str, help='Username to get the sequences id of') + + global args + args = parser.parse_args(argv) + print(args) + + + +# Initialisation de la liste pour stocker les réponses +responses = [] +sequences = [] + +def get_image_data_from_sequences(): + username = args.username + input_file = "out_"+username+".json" + + + # Chargement du fichier JSON d'entrée + with open(input_file, "r") as file: + input_data = json.load(file) + + # Itération sur les noeuds pour collectionner les image_ids + nodelist = input_data["data"]["fetch__User"]["feed"]["nodes"] + print( 'séquences : ', len(nodelist)) + image_ids = [node["image_id"] for node in nodelist] + print(image_ids) + + dev_token = args.dev_token + + # Préparation de la tête d'autorisation pour toutes les futures requêtes + header = {"Access-Token": dev_token} + + ii=0 + limit_requests = 1000000000 +# limit_requests = 5 # pour tester + # Boucle sur chaque image_id pour interroger l'API Mapillary + for image_id in image_ids: + ii+=1 + if limit_requests >= ii and image_id: + params = {"id": image_id, "fields": "id,sequence"} + request_url = "https://graph.mapillary.com/" + str(image_id)+"?access_token="+dev_token+"&fields=id,sequence" + # print("requete: "+request_url) + + response = requests.get(request_url) + + # Analyse de la réponse + parsed_response = {} + if response.ok and response.status_code == 200: + raw_response = response.json() + + parsed_response["id"] = raw_response["id"] + parsed_response["sequence"] = raw_response["sequence"] + sequences.append(parsed_response["sequence"]) + + print("séquence trouvée: "+str(ii)+"/"+args.max_sequence+" : "+raw_response["sequence"]) + else: + print(response) + + responses.append(parsed_response) + +def persist_files(): + # Sauvegarde des nouveaux résultats dans le fichier output.json + output_file = "sequences_"+args.username+".json" + + with open(output_file, "w") as file: + json.dump(responses, file) + + sequence_filename = "sequences_"+args.username+".txt" + with open(sequence_filename, "w") as file: + json.dump(sequences, file) + print('fichier sauvegardé: '+sequence_filename) + + +parse_args() +get_image_data_from_sequences() +persist_files() + +# si la requete donne moins du max de noeuds on prévoit d'en relancer une nouvelle pour avoir la suite diff --git a/get_user.sh b/get_user.sh new file mode 100644 index 0000000..53187ae --- /dev/null +++ b/get_user.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# lancement de la récupération des identifiants de séquences +# exemple: +# bash get_user.sh binerf 102718865306727 + +source secrets_variables.sh + +export username=$1 +export num_user=$2 +echo "télécharger la séquence pour l'utilisateur $username, $num_user" +bash curl_land.sh "$username" "$num_user" > "out_${username}.json" + +echo "séquences récupérées:" +num_sequences=$(grep -o -w 'image_id' "out_${username}.json" | wc -l) +# +if (( num_sequences > 0 )) +then + echo "Séquences trouvées: (${num_sequences}). Noice." + python3 get_sequences_of_username.py --username="$username" --max_sequence="$num_sequences" --dev_token="$MAPILLARY_DEV_TOKEN" + python3 text_array_to_download_script.py --username="$username" --dev_token="$MAPILLARY_DEV_TOKEN" +## +else + echo "Aucune séquence trouvée (${num_sequences}) ! Pas d'autres actions à entreprendre." +# cat "out_${username}.json" +fi + + diff --git a/images_par_username.py b/images_par_username.py new file mode 100644 index 0000000..a181bcf --- /dev/null +++ b/images_par_username.py @@ -0,0 +1,38 @@ +import requests, json +import argparse +from urllib.parse import quote + +def parse_args(argv =None): + parser = argparse.ArgumentParser() + parser.add_argument('--access_token', type=str, help='Your mapillary access token') + parser.add_argument('--username', type=str, help='Username to get the sequences id of') + parser.add_argument('--pictures', type=str, help='Limit of pictures to fetch') + + global args + args = parser.parse_args(argv) + + +if __name__ == '__main__': + parse_args() + + if args.access_token == None: + print('please provide the access_token') + exit() + + mly_key = args.access_token + creator_username = args.username + max_img= args.pictures + + url = f'https://graph.mapillary.com/images?access_token={mly_key}&creator_username={creator_username}&limit={max_img}&fields=id,sequence' + + response = requests.get(url) + + if response.status_code == 200: + json = response.json() + + # tri des séquences uniques + sequences_ids = [obj['sequence'] for obj in json['data']] + unique_ids = list(set(sequences_ids)) + print(unique_ids) + else: + print(response) diff --git a/secrets_variables_example.sh b/secrets_variables_example.sh new file mode 100644 index 0000000..80d88d8 --- /dev/null +++ b/secrets_variables_example.sh @@ -0,0 +1,5 @@ +# scripts common shared variables +# this is to copy as a file named "secrets_variables.sh" which is ignored in this git repo + + +export MAPILLARY_DEV_TOKEN="MLY|blahblah_replace_it" diff --git a/text_array_to_download_script.py b/text_array_to_download_script.py new file mode 100644 index 0000000..98b7d88 --- /dev/null +++ b/text_array_to_download_script.py @@ -0,0 +1,53 @@ +import os + +input_file = 'input_file' + +import argparse +def parse_args(argv =None): + parser = argparse.ArgumentParser() + parser.add_argument('--dev_token', type=str, help='Your mapillary access token') + parser.add_argument('--username', type=str, help='Username to get the sequences id of') + + global args + args = parser.parse_args(argv) + + + +if __name__ == '__main__': + print("Construction du script bash de récupération des images de chaque séquences pour Mapillary_download (https://github.com/Stefal/mapillary_download.git)") + + parse_args() + + username=args.username + input_file = f"sequences_{username}.txt" + + if not args.dev_token: + print(f"Erreur : Le token de développeur de mapillary manque, vérifiez le fichier de variables secretes. Arrêt du script.") + exit(1) + + if not os.path.exists(input_file) or not os.path.isfile(input_file): + print(f"Erreur : Le fichier '{input_file}' n'a pas été trouvé. Arrêt du script.") + exit(1) + else: + print(f"Fichier '{input_file}' trouvé.") + + + output_file = f"script_bash_get_sequences_for_user_{username}.sh" + + access_token = "--access_token='"+args.dev_token+"' " + format_string = "/usr/bin/python3 mapillary_download.py {} --sequence_id={}\n" + + + with open(output_file, "w") as output: + with open(input_file, "r") as input_handle: + content = input_handle.read() + sequences = eval(content) + for seq in sequences: + full_cmd = f"/usr/bin/python3 mapillary_download.py {access_token} --sequence_id='{seq}' --username={username}\n" + output.write(full_cmd) + + print(output_file) + + print(f"\n Script Bash généré avec succès.") + print(f"Lancez le pour récupérer les photos de l'utilisateur {username}: \n bash {output_file}") +