scripts/irve_bornes_recharge/reduce.py

import geopandas as gpd
from geopandas import GeoDataFrame
import pandas as pd
import json


def convert_string_dict(value):
    if isinstance(value, str):
        try:
            return eval(value)
        except Exception:
            print("Impossible d'évaluer", repr(value))
            return value
    return value


# Chargement du fichier GeoJSON
with open('irve.geojson') as file:
    data = json.load(file)
# Traitement spécial pour les chaines de caractères
strings = []
dictionaries = []
others = []

for element in data:
    if isinstance(element, str):
        strings.append(element)
    elif isinstance(element, list):
        others.extend(element)
    elif isinstance(element, dict):
        dictionaries.append(element)
    else:
        others.append(element)

# Application de la fonction de conversion aux chaînes de caractères
strings = [convert_string_dict(item) for item in strings]

# Combinaison des éléments identifiés
data = dictionaries + strings + others

# Vérification que les entrées ont bien un schéma correct
assert all(isinstance(entry['geometry'], dict) for entry in data), \
    "Le format du fichier JSON doit comporter des géometries valides (dict)."

# Transformation en DataFrame GeoPandas
gdf = gpd.GeoDataFrame.from_features(data)

# Création d'un nouveau DataFrame vide
gdf_small = GeoDataFrame(columns=['geometry', 'properties'])

# Itération sur chaque nom d'aménageur unique
for name in set(gdf['properties'].apply(lambda x: x.get('nom_amenageur'))):
    # Extraction des lignes correspondantes au nom d'aménageur actuel
    sub_gdf = gdf[gdf['properties'].apply(lambda x: x.get('nom_amenageur')) == name].sample(n=3, random_state=42)

    # Concaténation du sous-DataFrame courant avec les précédents
    if len(gdf_small) > 0:
        gdf_small = pd.concat([gdf_small, sub_gdf], ignore_index=True)
    else:
        gdf_small = sub_gdf

# Enregistrement du résultat dans un nouveau fichier GeoJSON
gdf_small.to_file('irve_small.geojson', driver='GeoJSON')