/** * convertir un fichier .org vers des données structurées en json * @type {*} */ import fs from 'node-fs'; import moment from 'moment'; /********************** * initialize configs **********************/ const sourceFileName = 'all_tasks.org' const sourceFilePath = './sources/' + sourceFileName; let headers = [] let tasksObjectsForJsonExport = [] let headersByKind = {} let writeJsonAfterParse = false; writeJsonAfterParse = true; /************************************************************** * fetch the source orgmode file to read its contents *************************************************************/ console.log('parse some org file', sourceFilePath) if (!sourceFilePath) { console.error('pas de fichier à ouvrir') } fs.stat(sourceFilePath, function (err, stat) { if (err == null) { console.log(`File ${sourceFilePath} exists`); } else if (err.code === 'ENOENT') { // file does not exist console.error(`le fichier ${sourceFilePath} est introuvable. Impossible d en extraire des infos.`, err); } else { console.log('Some other error: ', err.code); } }); /********************** * search elements *********************/ let stateKeywordList = ['SOMEDAY', 'NEXT', 'TODO', 'CANCELLED', 'DONE', 'WAITING']; let dateKeywordList = ['CREATED', 'SCHEDULED', 'DEADLINE', 'CLOSED', 'Refiled']; let sectionKeywordList = ['PROPERTIES', 'LOGBOOK', 'END']; let propertiesSection = {} // TODO properties listing let logBookSection = {} // TODO logbook listing let statistics = { tags: {}, words: {} } let headerKeywordSearch = '[' + stateKeywordList.join('|') + ']' /** * task object example * @type {{level: string, header: string, dates: {CREATED: string, DONE: string, REFILED: string}, state: string, content: string, properties: {}, tags: [], tagsInherited: []}} */ let task = { header: "", level: "", corpus: "", state: "", tags: [], tagsInherited: [], dates: {}, logbook: {}, properties: {}, } let isHeader = false; let isProperty = false; let isLogbook = false; let isFirst = true; // init first task object as empty clone let currentTask = {...task}; /** * add to tasks to export and refresh current task */ function addAndRefreshCurrentTask() { tasksObjectsForJsonExport.push(currentTask) currentTask = {...task}; currentTask.dates = {}; }; function makeWordsStatistics(sentence) { sentence.split(' ')?.forEach(word => { if (!statistics.words[word]) { statistics.words[word] = 0 } statistics.words[word]++ }) } /********************** * loop to parse all *********************/ fs.readFile(sourceFilePath, 'utf8', function (err, data) { if (err) { return console.log(err); } console.log(" parsing...") // parcourir chaque ligne du fichier org let everyline = data.split('\n'); // trouver les entêtes toutes les lignes qui commencent par * et espace. everyline.forEach((line) => { // gérer la création d'objets définissant les tâches et leurs propriétés if (line.match(/^\*+? /)) { // add last task to export list if (!isFirst) { addAndRefreshCurrentTask(); } else { isFirst = false; } isHeader = true; // compter les étoiles pour trouver le niveau du header currentTask.level = line.match(/\*/g)?.length // create a new task headers.push(cleanHeader(line)) currentTask.header = cleanHeader(line); makeWordsStatistics(cleanHeader(line)); stateKeywordList.forEach(keyword => { let keywordIsFound = lineHasKeyword(line, keyword) if (keywordIsFound) { currentTask.state = keyword } }) // trouver les tags let tagsFound = line.match(/\:(.*)\:/g) if (tagsFound) { tagsFound = tagsFound[0]; let tagList = tagsFound.split(':'); tagList?.forEach(tag => { if (tag.length > 1) { if (!statistics.tags[tag]) { statistics.tags[tag] = 0 } statistics.tags[tag]++ currentTask.tags.push(tag) } }) } // ------------- fin des recherches dans la ligne de Header ------------- } else { isHeader = false; } // examen des lignes de corps de tâche, ou de corps de section suite au header. // classer les dates de création, cloture, et de logbook let dateFound = searchDate(line) if (dateFound) { dateKeywordList.forEach(keyword => { if (lineHasSubstring(line, keyword)) { if (!currentTask.dates[keyword]) { currentTask.dates[keyword] = ''; } currentTask.dates[keyword] = new Date(dateFound[0]); } else { // console.log('keyword', keyword) } }) } else { if (line.indexOf(dateKeywordList) !== -1 && line.indexOf(stateKeywordList) !== -1 && line.indexOf(sectionKeywordList) !== -1) { makeWordsStatistics(line) // ajouter le corps complet de la section après le header if (line.length && !isHeader) { let cleanedLine = line.replace(/\s\s/g, ' '); cleanedLine = line.replace(/ {2,}/g, ' ') currentTask.corpus += `${cleanedLine} ` } } } }) // ajouter la dernière tâche parsée addAndRefreshCurrentTask(); console.log(" parsing fini") // stateKeywordList.forEach(keyword => console.log('nombre de headers', keyword, headersByKind[keyword]?.length)) const jsonContent = { statistics: { lines_count: everyline.length, headers_count: headers.length, statistics: Object.keys(statistics).sort(function (a, b) { return statistics[a] - statistics[b] }) }, meta_data: { author: '@tykayn@mastodon.Cipherbliss.com', generated_at: new Date(), generated_from_file: sourceFilePath + sourceFileName, sources: 'https://forge.chapril.org/tykayn/org-report-stats.git' }, tasks_list: tasksObjectsForJsonExport } console.log('statistics', statistics) // console.log('tasksObjectsForJsonExport', jsonContent) if (writeJsonAfterParse) { writeJsonFile('export_' + sourceFileName + '_parsed.json', JSON.stringify(jsonContent)); } }) function lineHasKeyword(line, keyword = 'TODO') { let isFound = (line.indexOf('* ' + keyword) !== -1) if (isFound) { createNewHeaderKind(keyword) headersByKind[keyword].push(line); if (!statistics[keyword]) { statistics[keyword] = 0 } statistics[keyword]++ } return isFound; } function lineHasSubstring(line, keyword) { let isFound = (line.indexOf(keyword) !== -1) if (!statistics[keyword]) { statistics[keyword] = 0 } statistics[keyword]++ return isFound } function createNewHeaderKind(keyword) { if (!headersByKind[keyword]) { headersByKind[keyword] = []; } } /** * chercher des dates et heures au format * YYYY-MM-DD HH:II:SS * * @param line * @returns {*} */ function searchDate(line) { // return line.match(/[(\d{4}\-\d{2}\-\d{2} ?\d{2}?\:?\d{2}?\:?\d{2}?)(\d{4}\-\d{2}\-\d{2})]/) let simpleDay = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.?/) let simpleDayHour = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}/) let simpleDayHourSec = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}\:\d{2}/) if (simpleDayHourSec) { return simpleDayHourSec; } if (simpleDayHour) { return simpleDayHour; } if (simpleDay) { return simpleDay; } } /** * afin de trouver la première date liée à une tâche parmi celles mentionnées, il faut comparer les dates * @param date1 * @param date2 */ function compareDatesAndKeepOldest(date1, date2) { date1 = moment(date1) date2 = moment(date2) } /** * get the cleaned content of the header * @param line */ function cleanHeader(line) { line = '' + line; stateKeywordList.forEach(keyword => { line = line.replace(keyword, '') }) line = line.replace(/\** /, ''); line = line.replace(/\[.*\]/g, ''); line = line.replace(/\:.*\:/g, ''); line = line.replace(' ', ''); return line.trim(); } function writeJsonFile(fileName, fileContent) { console.log('write file ', fileName); return fs.writeFile( `./output/${fileName}`, fileContent, "utf8", (err) => { if (err) { console.log(`Error writing file: ${err}`); } else { console.log(`File ${fileName} is written successfully!`); } } ); }