mapping-geojson-osm/update_scripts/make_variance_from_geojson.ts

89 lines
2.5 KiB
TypeScript
Raw Normal View History

2023-08-11 18:41:32 +02:00
/**
prendre un CSV,
examiner toutes les colonnes et leurs valeurs,
garder en mémoire les valeurs uniques de chaque colonne
faire un nouveau csv qui ne montre que les valeurs uniques pour chacune des colonnes
et qui compte le nombre de valeurs
**/
2024-10-27 10:14:32 +01:00
import utils from '../mappings/utils'
2023-08-11 18:41:32 +02:00
import {parse} from 'csv'
const fs = require('fs')
const minimist = require('minimist')
interface VarianceType {
[key: string]: Array<string>
}
let csv_content = 'variance de dataset\n';
let separator = ';';
// let columns_headings = [];
let data_variance: VarianceType = {};
const inputPath = './etalab_data/etalab_data/éoliennes/éoliennes_en_france_export.geojson'
// const inputPath = './etalab_data/small.csv'
let columns_headings: Array<string> = [];
let lines_count = 0;
let longest_variance_count = 0;
console.log('open file ', inputPath)
fs.readFile(inputPath, function (err: any, fileData: any) {
if (err) {
throw new Error(err)
} else {
2024-10-27 10:14:32 +01:00
parse(fileData, {columns: false, trim: true}, function (err: any, rows: any) {
2023-08-11 18:41:32 +02:00
if(err){
throw new Error(err)
}
console.log('line ', lines_count)
2024-10-27 10:14:32 +01:00
console.log('rows', rows)
2023-08-11 18:41:32 +02:00
console.log('longest_variance_count', longest_variance_count)
2024-10-27 10:14:32 +01:00
2023-08-11 18:41:32 +02:00
})
}
2024-10-27 10:14:32 +01:00
console.log('parsing done')
2023-08-11 18:41:32 +02:00
})
/**
* écrit un csv avec les données de variance du dataset donné
*/
function writeCSVVariance() {
let csv_content = ';variance de ' + inputPath + ';' + new Date() + '\n'
let columns = Object.keys(data_variance);
// add headings
columns_headings.forEach((heading: string) => {
csv_content = csv_content + separator + heading
})
csv_content = csv_content + '\n'
// add max length of variance for each column
let ii = 0
columns.forEach((column: string) => {
// console.log('column', column, data_variance[column].length)
csv_content = csv_content + separator + data_variance[column].length
ii++
})
csv_content = csv_content + '\n\n'
// add content of values
for (let ii = 0; ii < longest_variance_count; ii++) {
csv_content = csv_content + '\n'
columns.forEach((column: any) => {
if (ii < data_variance[column].length) {
let currentValue = data_variance[column][ii]
csv_content = csv_content + separator + currentValue
} else {
csv_content = csv_content + separator
}
})
}
return csv_content;
}