89 lines
2.5 KiB
TypeScript
89 lines
2.5 KiB
TypeScript
/**
|
|
prendre un CSV,
|
|
examiner toutes les colonnes et leurs valeurs,
|
|
garder en mémoire les valeurs uniques de chaque colonne
|
|
faire un nouveau csv qui ne montre que les valeurs uniques pour chacune des colonnes
|
|
et qui compte le nombre de valeurs
|
|
**/
|
|
import utils from '../mappings/utils'
|
|
import {parse} from 'csv'
|
|
|
|
const fs = require('fs')
|
|
const minimist = require('minimist')
|
|
|
|
interface VarianceType {
|
|
[key: string]: Array<string>
|
|
}
|
|
|
|
let csv_content = 'variance de dataset\n';
|
|
let separator = ';';
|
|
// let columns_headings = [];
|
|
let data_variance: VarianceType = {};
|
|
|
|
const inputPath = './etalab_data/etalab_data/éoliennes/éoliennes_en_france_export.geojson'
|
|
// const inputPath = './etalab_data/small.csv'
|
|
let columns_headings: Array<string> = [];
|
|
let lines_count = 0;
|
|
let longest_variance_count = 0;
|
|
|
|
console.log('open file ', inputPath)
|
|
fs.readFile(inputPath, function (err: any, fileData: any) {
|
|
|
|
if (err) {
|
|
throw new Error(err)
|
|
} else {
|
|
parse(fileData, {columns: false, trim: true}, function (err: any, rows: any) {
|
|
if(err){
|
|
throw new Error(err)
|
|
}
|
|
console.log('line ', lines_count)
|
|
|
|
console.log('rows', rows)
|
|
console.log('longest_variance_count', longest_variance_count)
|
|
|
|
|
|
})
|
|
}
|
|
|
|
console.log('parsing done')
|
|
})
|
|
|
|
/**
|
|
* écrit un csv avec les données de variance du dataset donné
|
|
*/
|
|
function writeCSVVariance() {
|
|
|
|
let csv_content = ';variance de ' + inputPath + ';' + new Date() + '\n'
|
|
let columns = Object.keys(data_variance);
|
|
|
|
// add headings
|
|
columns_headings.forEach((heading: string) => {
|
|
csv_content = csv_content + separator + heading
|
|
})
|
|
csv_content = csv_content + '\n'
|
|
// add max length of variance for each column
|
|
let ii = 0
|
|
columns.forEach((column: string) => {
|
|
// console.log('column', column, data_variance[column].length)
|
|
csv_content = csv_content + separator + data_variance[column].length
|
|
ii++
|
|
})
|
|
|
|
csv_content = csv_content + '\n\n'
|
|
// add content of values
|
|
for (let ii = 0; ii < longest_variance_count; ii++) {
|
|
csv_content = csv_content + '\n'
|
|
columns.forEach((column: any) => {
|
|
if (ii < data_variance[column].length) {
|
|
|
|
let currentValue = data_variance[column][ii]
|
|
csv_content = csv_content + separator + currentValue
|
|
} else {
|
|
csv_content = csv_content + separator
|
|
}
|
|
})
|
|
}
|
|
|
|
return csv_content;
|
|
}
|