scripts/irve_bornes_recharge/reduce.py

import geopandas as gpd
from geopandas import GeoDataFrame
import pandas as pd
import json


def convert_string_dict(value):
    if isinstance(value, str):
        try:
            return eval(value)
        except Exception:
            print("Impossible d'évaluer", repr(value))
            return value
    return value


# Chargement du fichier GeoJSON
with open('irve.geojson') as file:
    data = json.load(file)
# Traitement spécial pour les chaines de caractères
strings = []
dictionaries = []
others = []

for element in data:
    if isinstance(element, str):
        strings.append(element)
    elif isinstance(element, list):
        others.extend(element)
    elif isinstance(element, dict):
        dictionaries.append(element)
    else:
        others.append(element)

# Application de la fonction de conversion aux chaînes de caractères
strings = [convert_string_dict(item) for item in strings]

# Combinaison des éléments identifiés
data = dictionaries + strings + others

# Vérification que les entrées ont bien un schéma correct
assert all(isinstance(entry['geometry'], dict) for entry in data), \
    "Le format du fichier JSON doit comporter des géometries valides (dict)."

# Transformation en DataFrame GeoPandas
gdf = gpd.GeoDataFrame.from_features(data)

# Création d'un nouveau DataFrame vide
gdf_small = GeoDataFrame(columns=['geometry', 'properties'])

# Itération sur chaque nom d'aménageur unique
for name in set(gdf['properties'].apply(lambda x: x.get('nom_amenageur'))):
    # Extraction des lignes correspondantes au nom d'aménageur actuel
    sub_gdf = gdf[gdf['properties'].apply(lambda x: x.get('nom_amenageur')) == name].sample(n=3, random_state=42)

    # Concaténation du sous-DataFrame courant avec les précédents
    if len(gdf_small) > 0:
        gdf_small = pd.concat([gdf_small, sub_gdf], ignore_index=True)
    else:
        gdf_small = sub_gdf

# Enregistrement du résultat dans un nouveau fichier GeoJSON
gdf_small.to_file('irve_small.geojson', driver='GeoJSON')
up results bad data 2024-10-08 14:32:51 +02:00			`import geopandas as gpd`
			`from geopandas import GeoDataFrame`
			`import pandas as pd`
			`import json`


			`def convert_string_dict(value):`
			`if isinstance(value, str):`
			`try:`
			`return eval(value)`
			`except Exception:`
			`print("Impossible d'évaluer", repr(value))`
			`return value`
			`return value`


			`# Chargement du fichier GeoJSON`
			`with open('irve.geojson') as file:`
			`data = json.load(file)`
			`# Traitement spécial pour les chaines de caractères`
			`strings = []`
			`dictionaries = []`
			`others = []`

			`for element in data:`
			`if isinstance(element, str):`
			`strings.append(element)`
			`elif isinstance(element, list):`
			`others.extend(element)`
			`elif isinstance(element, dict):`
			`dictionaries.append(element)`
			`else:`
			`others.append(element)`

			`# Application de la fonction de conversion aux chaînes de caractères`
			`strings = [convert_string_dict(item) for item in strings]`

			`# Combinaison des éléments identifiés`
			`data = dictionaries + strings + others`

			`# Vérification que les entrées ont bien un schéma correct`
			`assert all(isinstance(entry['geometry'], dict) for entry in data), \`
			`"Le format du fichier JSON doit comporter des géometries valides (dict)."`

			`# Transformation en DataFrame GeoPandas`
			`gdf = gpd.GeoDataFrame.from_features(data)`

			`# Création d'un nouveau DataFrame vide`
			`gdf_small = GeoDataFrame(columns=['geometry', 'properties'])`

			`# Itération sur chaque nom d'aménageur unique`
			`for name in set(gdf['properties'].apply(lambda x: x.get('nom_amenageur'))):`
			`# Extraction des lignes correspondantes au nom d'aménageur actuel`
			`sub_gdf = gdf[gdf['properties'].apply(lambda x: x.get('nom_amenageur')) == name].sample(n=3, random_state=42)`

			`# Concaténation du sous-DataFrame courant avec les précédents`
			`if len(gdf_small) > 0:`
			`gdf_small = pd.concat([gdf_small, sub_gdf], ignore_index=True)`
			`else:`
			`gdf_small = sub_gdf`

			`# Enregistrement du résultat dans un nouveau fichier GeoJSON`
			`gdf_small.to_file('irve_small.geojson', driver='GeoJSON')`