diff --git a/parse_orgmode_to_json.mjs b/parse_orgmode_to_json.mjs index 1329881..8ad80c5 100644 --- a/parse_orgmode_to_json.mjs +++ b/parse_orgmode_to_json.mjs @@ -17,6 +17,7 @@ let headers = [] let tasksObjectsForJsonExport = [] let headersByKind = {} let writeJsonAfterParse = false; +writeJsonAfterParse = true; /************************************************************** * fetch the source orgmode file to read its contents @@ -42,10 +43,16 @@ fs.stat(sourceFilePath, function (err, stat) { * search elements *********************/ let stateKeywordList = ['SOMEDAY', 'NEXT', 'TODO', 'CANCELLED', 'DONE', 'WAITING']; -let dateKeywordList = ['CREATED', 'SCHEDULED', 'DEADLINE', 'CLOSED','Refiled']; +let dateKeywordList = ['CREATED', 'SCHEDULED', 'DEADLINE', 'CLOSED', 'Refiled']; let sectionKeywordList = ['PROPERTIES', 'LOGBOOK', 'END']; -let propertiesSection = {} -let logBookSection = {} + +let propertiesSection = {} // TODO properties listing +let logBookSection = {} // TODO logbook listing + +let statistics = { + tags: {}, + words: {} +} let headerKeywordSearch = '[' + stateKeywordList.join('|') + ']' /** @@ -55,7 +62,7 @@ let headerKeywordSearch = '[' + stateKeywordList.join('|') + ']' let task = { header: "", level: "", - content: "", + corpus: "", state: "", tags: [], tagsInherited: [], @@ -63,17 +70,39 @@ let task = { logbook: {}, properties: {}, } -// init first task object as empty clone -let currentTask = {...task}; + let isHeader = false; let isProperty = false; let isLogbook = false; let isFirst = true; +// init first task object as empty clone +let currentTask = {...task}; + +/** + * add to tasks to export and refresh current task + */ +function addAndRefreshCurrentTask() { + tasksObjectsForJsonExport.push(currentTask) + currentTask = {...task}; + currentTask.dates = {}; +}; + +function makeWordsStatistics(sentence) { + sentence.split(' ')?.forEach(word => { + if (!statistics.words[word]) { + statistics.words[word] = 0 + } + statistics.words[word]++ + }) +} + /********************** * loop to parse all *********************/ fs.readFile(sourceFilePath, 'utf8', function (err, data) { + + if (err) { return console.log(err); } @@ -90,10 +119,8 @@ fs.readFile(sourceFilePath, 'utf8', function (err, data) { if (line.match(/^\*+? /)) { // add last task to export list if (!isFirst) { - tasksObjectsForJsonExport.push(currentTask) - console.log('currentTask.dates', currentTask.dates) - currentTask = {...task}; + addAndRefreshCurrentTask(); } else { isFirst = false; } @@ -105,11 +132,9 @@ fs.readFile(sourceFilePath, 'utf8', function (err, data) { // create a new task - line = line.replace('*', '') - line = line.replace(stateKeywordList, [].fill('', 0, stateKeywordList.length)) - - headers.push(line) - currentTask.header = line; + headers.push(cleanHeader(line)) + currentTask.header = cleanHeader(line); + makeWordsStatistics(cleanHeader(line)); stateKeywordList.forEach(keyword => { let keywordIsFound = lineHasKeyword(line, keyword) @@ -123,57 +148,72 @@ fs.readFile(sourceFilePath, 'utf8', function (err, data) { let tagsFound = line.match(/\:(.*)\:/g) if (tagsFound) { tagsFound = tagsFound[0]; - console.log('tagsFound', tagsFound) - tagsFound = tagsFound.split(':').filter(item => item.length) - currentTask.tags = tagsFound; + let tagList = tagsFound.split(':'); + tagList?.forEach(tag => { + if (tag.length > 1) { + + if (!statistics.tags[tag]) { + statistics.tags[tag] = 0 + } + statistics.tags[tag]++ + + currentTask.tags.push(tag) + } + }) } - // fin des recherches dans la ligne de Header + // ------------- fin des recherches dans la ligne de Header ------------- } else { isHeader = false; } // examen des lignes de corps de tâche, ou de corps de section suite au header. - // classer les dates de création, cloture, et de logbook let dateFound = searchDate(line) - if(dateFound){ + if (dateFound) { - dateKeywordList.forEach(keyword => { - if (lineHasSubstring(line, keyword)) { - if (!currentTask.dates[keyword]) { - currentTask.dates[keyword] = ''; + dateKeywordList.forEach(keyword => { + if (lineHasSubstring(line, keyword)) { + if (!currentTask.dates[keyword]) { + currentTask.dates[keyword] = ''; + } + currentTask.dates[keyword] = new Date(dateFound[0]); + } else { + // console.log('keyword', keyword) + } + }) + } else { + + if (line.indexOf(dateKeywordList) !== -1 && line.indexOf(stateKeywordList) !== -1 && line.indexOf(sectionKeywordList) !== -1) { + + makeWordsStatistics(line) + // ajouter le corps complet de la section après le header + if (line.length && !isHeader) { + + let cleanedLine = line.replace(/\s\s/g, ' '); + cleanedLine = line.replace(/ {2,}/g, ' ') + + currentTask.corpus += `${cleanedLine} +` } - currentTask.dates[keyword] = new Date(dateFound[0]); - } else { - // console.log('keyword', keyword) } - }) } - - // ajouter le corps complet de la section après le header - if (line.length && !isHeader) { - - let cleanedLine = line.replace(/\s\s/g, ' ') - cleanedLine = line.replace(/ {2,}/g, ' ') - console.log('line', cleanedLine) - currentTask.corpus += ` - ` + cleanedLine; - } - }) // ajouter la dernière tâche parsée - tasksObjectsForJsonExport.push(currentTask) + addAndRefreshCurrentTask(); - console.log('headers', headers) console.log(" parsing fini") - stateKeywordList.forEach(keyword => console.log('nombre de headers', keyword, headersByKind[keyword]?.length)) + // stateKeywordList.forEach(keyword => console.log('nombre de headers', keyword, headersByKind[keyword]?.length)) const jsonContent = { statistics: { lines_count: everyline.length, headers_count: headers.length, + statistics: Object.keys(statistics).sort(function (a, b) { + return statistics[a] - statistics[b] + }) + }, meta_data: { author: '@tykayn@mastodon.Cipherbliss.com', @@ -183,13 +223,14 @@ fs.readFile(sourceFilePath, 'utf8', function (err, data) { }, tasks_list: tasksObjectsForJsonExport } + + console.log('statistics', statistics) // console.log('tasksObjectsForJsonExport', jsonContent) if (writeJsonAfterParse) { - writeJsonFile('export_' + sourceFileName + '.json', JSON.stringify(jsonContent)); + writeJsonFile('export_' + sourceFileName + '_parsed.json', JSON.stringify(jsonContent)); } - return; }) function lineHasKeyword(line, keyword = 'TODO') { @@ -198,13 +239,22 @@ function lineHasKeyword(line, keyword = 'TODO') { if (isFound) { createNewHeaderKind(keyword) headersByKind[keyword].push(line); + if (!statistics[keyword]) { + statistics[keyword] = 0 + } + statistics[keyword]++ } return isFound; } function lineHasSubstring(line, keyword) { + let isFound = (line.indexOf(keyword) !== -1) + if (!statistics[keyword]) { + statistics[keyword] = 0 + } + statistics[keyword]++ - return (line.indexOf(keyword) !== -1) + return isFound } function createNewHeaderKind(keyword) { @@ -226,14 +276,14 @@ function searchDate(line) { let simpleDayHour = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}/) let simpleDayHourSec = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}\:\d{2}/) - if(simpleDayHourSec){ + if (simpleDayHourSec) { return simpleDayHourSec; } - if(simpleDayHour){ + if (simpleDayHour) { return simpleDayHour; } - if(simpleDay){ + if (simpleDay) { return simpleDay; } @@ -249,6 +299,23 @@ function compareDatesAndKeepOldest(date1, date2) { date2 = moment(date2) } +/** + * get the cleaned content of the header + * @param line + */ +function cleanHeader(line) { + + line = '' + line; + stateKeywordList.forEach(keyword => { + line = line.replace(keyword, '') + }) + line = line.replace(/\** /, ''); + line = line.replace(/\[.*\]/g, ''); + line = line.replace(/\:.*\:/g, ''); + line = line.replace(' ', ''); + return line.trim(); +} + function writeJsonFile(fileName, fileContent) { console.log('write file ', fileName);