/** * convertir un fichier .org vers des données structurées en json * @type {*} */ import fs from 'node-fs'; import moment from 'moment'; /********************** * initialize configs **********************/ const sourceFileName = 'all_tasks.org' const sourceFilePath = './sources/' + sourceFileName; const outputAbsolutePath = '/home/tykayn/Nextcloud/ressources/social sorting/output/'; const outputFileNameJson = 'export_' + sourceFileName + '_parsed.json'; let headers = [] let tasksObjectsForJsonExport = [] let headersByKind = {} let writeJsonAfterParse = false; writeJsonAfterParse = true; moment.locale('FR') /************************************************************** * fetch the source orgmode file to read its contents *************************************************************/ console.log('---------- parse some org file', sourceFilePath) if (!sourceFilePath) { console.error('pas de fichier à ouvrir') } fs.stat(sourceFilePath, function (err, stat) { if (err == null) { console.log(`File ${sourceFilePath} exists`); } else if (err.code === 'ENOENT') { // file does not exist console.error(`le fichier ${sourceFilePath} est introuvable. Impossible d en extraire des infos.`, err); } else { console.log('Some other error: ', err.code); } }); /********************** * search elements *********************/ let stateKeywordList = ['SOMEDAY', 'NEXT', 'TODO', 'CANCELLED', 'DONE', 'WAITING']; let dateKeywordList = ['CREATED', 'SCHEDULED', 'DEADLINE', 'CLOSED', 'Refiled']; let sectionKeywordList = ['PROPERTIES', 'LOGBOOK', 'END']; let propertiesSection = {} // TODO properties listing let logBookSection = {} // TODO logbook listing let statistics = { tags: {}, words: {}, dates: { havingDate: 0, havingNoDate: 0, oldEst: 0, mostRecent: 0, years: {}, weeks: {}, months: {}, days: {} } } let headerKeywordSearch = '[' + stateKeywordList.join('|') + ']' /** * task object example * @type {{level: string, header: string, dates: {CREATED: string, DONE: string, REFILED: string}, state: string, content: string, properties: {}, tags: [], tagsInherited: []}} */ let task = { header: "", level: "", corpus: "", state: "", tags: [], children: [], // TODO list children tasks with a reference to the parent when level is superior to previous task tagsInherited: [], // TODO inherit tags dates: {}, logbook: {}, properties: {}, } let isHeader = false; let isProperty = false; let isLogbook = false; let isFirst = true; // init first task object as empty clone let currentTask = Object.create(task); /** * add to tasks to export and refresh current task */ function addAndRefreshCurrentTask() { tasksObjectsForJsonExport.push(currentTask) // réinitialisation de tâche pour remplir de nouveau currentTask = Object.create(task); currentTask.dates = {}; currentTask.tags = []; }; function makeWordsStatistics(sentence) { let split = sentence.split(' '); if (split && split.length) { split.forEach(word => { if (!statistics.words[word]) { statistics.words[word] = 0 } statistics.words[word]++ }) } } /** * pour chaque période de temps, compter les tâches créées et fermées * @param keyword * @param dateFoundElement */ function statisticDateFill(keyword, dateFoundElement) { // décompte par années let convertedDate = new Date(dateFoundElement) let yearOfDate = convertedDate.getFullYear() let monthOfDate = yearOfDate + '-' + convertedDate.getMonth() // add zeros let convertedWeek = moment(convertedDate).week() < 10 ? '0' + moment(convertedDate).week() : moment(convertedDate).week() let weekOfDate = yearOfDate + '-' + convertedWeek let convertedMonth = convertedDate.getMonth() < 10 ? '0' + convertedDate.getMonth() : convertedDate.getMonth() let convertedDay = convertedDate.getDay() < 10 ? '0' + convertedDate.getDay() : convertedDate.getDay() let dayOfDate = convertedDate.getFullYear() + '-' + convertedMonth + '-' + convertedDay if (!statistics.dates.years[yearOfDate]) { statistics.dates.years[yearOfDate] = { created: 0, closed: 0, } } if (keyword === 'CLOSED') { statistics.dates.years[yearOfDate].closed++; } if (keyword === 'CREATED') { statistics.dates.years[yearOfDate].created++; } // par année-semaine if (!statistics.dates.weeks[weekOfDate]) { statistics.dates.weeks[weekOfDate] = { created: 0, closed: 0, } } if (keyword === 'CLOSED') { statistics.dates.weeks[weekOfDate].closed++; } if (keyword === 'CREATED') { statistics.dates.weeks[weekOfDate].created++; } // décompte par mois if (!statistics.dates.months[monthOfDate]) { statistics.dates.months[monthOfDate] = { created: 0, closed: 0, } } if (keyword === 'CLOSED') { statistics.dates.months[monthOfDate].closed++; } if (keyword === 'CREATED') { statistics.dates.months[monthOfDate].created++; } // décompte par jours if (!statistics.dates.days[dayOfDate]) { statistics.dates.days[dayOfDate] = { created: 0, closed: 0, } } if (keyword === 'CLOSED') { statistics.dates.days[dayOfDate].closed++; } if (keyword === 'CREATED') { statistics.dates.days[dayOfDate].created++; } } function findOldestDate(currentDate) { // trouver la plus ancienne date if (!statistics.dates.oldEst) { statistics.dates.oldEst = currentDate; } else { var beginningTime = moment(statistics.dates.oldEst); var endTime = moment(currentDate); if (!beginningTime.isBefore(endTime)) { statistics.dates.oldEst = currentDate; } } } /********************** * loop to parse all *********************/ fs.readFile(sourceFilePath, 'utf8', function (err, data) { if (err) { return console.log(err); } console.log(" parsing...") // parcourir chaque ligne du fichier org let everyline = data.split('\n'); // trouver les entêtes toutes les lignes qui commencent par * et espace. everyline.forEach((line) => { // gérer la création d'objets définissant les tâches et leurs propriétés if (line.match(/^\*+? /)) { // add last task to export list if (!isFirst) { addAndRefreshCurrentTask(); } else { isFirst = false; } isHeader = true; // compter les étoiles pour trouver le niveau du header if (line.match(/\*/g)) { let match = line.match(/\*/g); currentTask.level = match.length } // create a new task headers.push(cleanHeader(line)) currentTask.header = cleanHeader(line); // makeWordsStatistics(cleanHeader(line)); stateKeywordList.forEach(keyword => { let keywordIsFound = lineHasKeyword(line, keyword) if (keywordIsFound) { currentTask.state = keyword } }) // trouver les tags let tagsList = line.match(/\:([\w\_]*)\:/g) if (tagsList) { tagsList = tagsList[0]; let tagList = tagsList.split(':'); if (tagList.length) { tagList.forEach(tag => { if (tag.length > 1) { if (!statistics.tags[tag]) { statistics.tags[tag] = 0 } statistics.tags[tag]++ currentTask.tags.push(tag) } }) } } // ------------- fin des recherches dans la ligne de Header ------------- } else { isHeader = false; } // examen des lignes de corps de tâche, ou de corps de section suite au header. // classer les dates de création, cloture, et de logbook let dateFound = searchDate(line) if (dateFound) { /** * we have found a date in the current line */ statistics.dates.havingDate += 1; dateKeywordList.forEach(keyword => { if (lineHasSubstring(line, keyword)) { if (!currentTask.dates[keyword]) { currentTask.dates[keyword] = ''; } let convertedDate = dateFound[0].substring(0, 10); if (dateFound[0].length == 15) { // sans heure: "2022-12-21 mer." convertedDate = moment(dateFound[0], 'YYYY-MM-DD ddd') } else if (dateFound[0].length == 21) { // avec heure: "2022-11-01 mar. 00:44" convertedDate = moment(dateFound[0], 'YYYY-MM-DD ddd HH:mm') } let formattedDate = moment(convertedDate).format() statisticDateFill(keyword, convertedDate) findOldestDate(convertedDate) currentTask.dates[keyword] = formattedDate; } else { // console.log('keyword', keyword) } }) } else { statistics.dates.havingNoDate += 1; if ( line.indexOf(dateKeywordList) !== -1 && line.indexOf(stateKeywordList) !== -1 && line.indexOf(sectionKeywordList) !== -1 ) { // ajouter le corps complet de la section après le header if (line.length && !isHeader) { let cleanedLine = line.replace(/\s\s/g, ' '); cleanedLine = line.replace(/ {2,}/g, ' ') currentTask.corpus += `${cleanedLine} ` makeWordsStatistics(cleanedLine) } } } }) // ajouter la dernière tâche parsée addAndRefreshCurrentTask(); console.log(" parsing fini") // ranger par valeur décroissante les tags let sorted_stats = []; // rangement par valeur et par date console.log('write file ', outputAbsolutePath, outputFileNameJson); statistics.dates.years = sortByKey(statistics.dates.years) statistics.dates.weeks = sortByKey(statistics.dates.weeks) statistics.dates.months = sortByKey(statistics.dates.months) statistics.dates.days = sortByKey(statistics.dates.days) statistics.tags = sortByValue(statistics.tags) statistics.words = sortByValue(statistics.tags) sorted_stats = sortByKey(statistics) const jsonContent = { statistics: { lines_count: everyline.length, headers_count: headers.length, statistics: sorted_stats }, meta_data: { author: '@tykayn@mastodon.Cipherbliss.com', generated_at: new Date(), generated_from_file: sourceFilePath + sourceFileName, sources: 'https://forge.chapril.org/tykayn/org-report-stats.git' }, tasks_list: tasksObjectsForJsonExport } if (writeJsonAfterParse) { writeFileInOuputFolderFromJsonObject(outputFileNameJson, jsonContent); } }) /** * ranger un objet littéral selon la valeur décroissante de ses paires * @param literalobject * @returns {any} */ function sortByValue(literalobject) { return Object.fromEntries( Object.entries(literalobject).sort(([, a], [, b]) => b - a) ); } function lineHasKeyword(line, keyword = 'TODO') { let isFound = (line.indexOf('* ' + keyword) !== -1) if (isFound) { createNewHeaderKind(keyword) headersByKind[keyword].push(line); if (!statistics[keyword]) { statistics[keyword] = 0 } statistics[keyword]++ } return isFound; } function lineHasSubstring(line, keyword) { let isFound = (line.indexOf(keyword) !== -1) if (!statistics[keyword]) { statistics[keyword] = 0 } statistics[keyword]++ return isFound } function createNewHeaderKind(keyword) { if (!headersByKind[keyword]) { headersByKind[keyword] = []; } } /** * chercher des dates et heures au format * YYYY-MM-DD HH:II:SS * * @param line * @returns {*} */ function searchDate(line) { // return line.match(/[(\d{4}\-\d{2}\-\d{2} ?\d{2}?\:?\d{2}?\:?\d{2}?)(\d{4}\-\d{2}\-\d{2})]/) let simpleDay = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.?/) let simpleDayHour = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}/) let simpleDayHourSec = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}\:\d{2}/) if (simpleDayHourSec) { return simpleDayHourSec; } if (simpleDayHour) { return simpleDayHour; } if (simpleDay) { return simpleDay; } } /** * get the cleaned content of the header * @param line */ function cleanHeader(line) { line = '' + line; stateKeywordList.forEach(keyword => { line = line.replace(keyword, '') }) line = line.replace(/\** /, ''); line = line.replace(/\[.*\]/g, ''); line = line.replace(/\:.*\:/g, ''); line = line.replace(' ', ''); return line.trim(); } /** * ranger un objet littéral par ordre alphabétique de ses clés * @param objectStuff * @returns {{}} */ function sortByKey(objectStuff) { return Object.keys(objectStuff).sort().reduce( (obj, key) => { obj[key] = objectStuff[key]; return obj; }, {} ); } export async function writeFileInOuputFolderFromJsonObject(fileName, jsonObjectThing) { console.log('statistics.words', statistics.words) return await fs.writeFile( `${outputAbsolutePath}${fileName}`, JSON.stringify(jsonObjectThing), "utf8", (err) => { if (err) { console.log(`Error writing file: ${err}`); } else { console.log(`File ${fileName} is written successfully!`); } } ); }