org-report-stats/parse_orgmode_to_json.mjs

524 lines
13 KiB
JavaScript
Raw Permalink Normal View History

2023-03-04 23:52:46 +01:00
/**
* convertir un fichier .org vers des données structurées en json
* @type {*}
*/
2023-09-22 10:16:46 +02:00
import fs from 'node-fs'
import moment from 'moment'
import * as emoji from 'node-emoji'
2023-03-05 23:13:16 +01:00
/**********************
* initialize configs
**********************/
const sourceFileName = 'all_tasks.org'
2023-09-22 10:16:46 +02:00
const sourceFilePath = './sources/' + sourceFileName
const outputAbsolutePath = '~/Nextcloud/ressources/social sorting/output/'
const outputFileNameJson = 'export_' + sourceFileName + '_parsed.json'
2023-03-04 23:52:46 +01:00
let headers = []
2023-03-04 23:57:03 +01:00
let tasksObjectsForJsonExport = []
2023-03-04 23:52:46 +01:00
let headersByKind = {}
2023-09-22 10:16:46 +02:00
let writeJsonAfterParse = false
writeJsonAfterParse = true
2023-04-18 12:23:52 +02:00
moment.locale('FR')
2023-03-04 23:52:46 +01:00
2023-09-22 10:16:46 +02:00
const tada = emoji.get('tada')
const gift = emoji.get('gift')
2023-07-22 23:03:29 +02:00
2023-03-05 23:13:16 +01:00
/**************************************************************
* fetch the source orgmode file to read its contents
*************************************************************/
2023-03-04 23:52:46 +01:00
console.log('---------- parse some org file', sourceFilePath)
2023-03-05 23:13:16 +01:00
if (!sourceFilePath) {
2023-09-22 10:16:46 +02:00
console.error('pas de fichier à ouvrir')
2023-03-05 23:13:16 +01:00
}
2023-03-04 23:52:46 +01:00
fs.stat(sourceFilePath, function (err, stat) {
2023-09-22 10:16:46 +02:00
if (err === null) {
console.log(`File ${sourceFilePath} exists`)
} else if (err.code === 'ENOENT') {
// file does not exist
console.error(`le fichier ${sourceFilePath} est introuvable. Impossible d en extraire des infos.`, err)
} else {
console.log('Some other error: ', err.code)
}
})
2023-03-04 23:52:46 +01:00
2023-03-05 23:13:16 +01:00
/**********************
* search elements
*********************/
2023-09-22 10:16:46 +02:00
let stateKeywordList = ['SOMEDAY', 'NEXT', 'TODO', 'CANCELLED', 'DONE', 'WAITING']
let dateKeywordList = ['CREATED', 'SCHEDULED', 'DEADLINE', 'CLOSED', 'Refiled']
let sectionKeywordList = ['PROPERTIES', 'LOGBOOK', 'END']
let propertiesSection = {} // TODO properties listing
let logBookSection = {} // TODO logbook listing
let statistics = {
2023-09-22 10:16:46 +02:00
tags: {},
words: {},
dates: {
havingDate: 0,
havingNoDate: 0,
oldEst: 0,
mostRecent: 0,
years: {},
weeks: {},
months: {},
days: {}
}
}
2023-03-04 23:52:46 +01:00
2023-03-05 23:13:16 +01:00
let headerKeywordSearch = '[' + stateKeywordList.join('|') + ']'
/**
* task object example
* @type {{level: string, header: string, dates: {CREATED: string, DONE: string, REFILED: string}, state: string, content: string, properties: {}, tags: [], tagsInherited: []}}
*/
2023-03-05 11:05:06 +01:00
let task = {
2023-09-22 10:16:46 +02:00
header: '',
level: '',
corpus: '',
state: '',
tags: [],
children: [], // TODO list children tasks with a reference to the parent when level is superior to previous task
tagsInherited: [], // TODO inherit tags
dates: {
havingDate: 0,
havingNoDate: 0,
oldEst: 0,
mostRecent: 0,
years: {},
weeks: {},
months: {},
days: {},
},
logbook: {},
properties: {},
2023-03-05 11:05:06 +01:00
}
2023-09-22 10:16:46 +02:00
let isHeader = false
let isProperty = false
let isLogbook = false
let isFirst = true
2023-03-05 11:05:06 +01:00
// init first task object as empty clone
2023-09-22 10:16:46 +02:00
let currentTask = Object.create(task)
/**
* add to tasks to export and refresh current task
*/
2023-09-22 10:16:46 +02:00
function addAndRefreshCurrentTask () {
2023-04-18 13:24:20 +02:00
2023-09-22 10:16:46 +02:00
makeWordsStatistics(currentTask.header.trim())
makeWordsStatistics(currentTask.corpus.trim())
2023-04-18 13:24:20 +02:00
2023-09-22 10:16:46 +02:00
tasksObjectsForJsonExport.push(currentTask)
// réinitialisation de tâche pour remplir de nouveau
currentTask = Object.create(task)
currentTask.dates = {}
currentTask.tags = []
2023-04-18 13:24:20 +02:00
}
2023-09-22 10:16:46 +02:00
function makeWordsStatistics (sentence) {
let split = sentence.split(' ')
if (split && split.length) {
split.forEach(word => {
if (!statistics.words[word]) {
statistics.words[word] = 0
}
statistics.words[word]++
})
}
}
2023-04-18 13:24:20 +02:00
const dateStats = {
2023-09-22 10:16:46 +02:00
created: 0,
refiled: 0,
closed: 0,
cancelled: 0,
scheduled: 0,
deadline: 0,
tasks_done: []
2023-04-18 13:24:20 +02:00
}
2023-09-22 10:16:46 +02:00
function fillPeriodTime (periodStat, keyword) {
if (keyword === 'CLOSED') {
periodStat.closed++
}
if (keyword === 'CREATED') {
periodStat.created++
}
if (keyword === 'Refiled') {
periodStat.refiled++
}
if (keyword === 'CANCELLED') {
periodStat.cancelled++
}
if (keyword === 'SCHEDULED') {
periodStat.scheduled++
}
if (keyword === 'DEADLINE') {
periodStat.deadline++
}
2023-04-18 13:24:20 +02:00
}
/**
* pour chaque période de temps, compter les tâches créées et fermées
* @param keyword
* @param dateFoundElement
*/
2023-09-22 10:16:46 +02:00
function statisticDateFill (keyword, dateFoundElement, header) {
// décompte par années
let convertedDate = new Date(dateFoundElement)
let yearOfDate = convertedDate.getFullYear()
let monthOfDate = yearOfDate + '-' + convertedDate.getMonth()
// add zeros
let convertedWeek = moment(convertedDate).week() < 10 ? '0' + moment(convertedDate).week() : moment(convertedDate).week()
let weekOfDate = yearOfDate + '-' + convertedWeek
let convertedMonth = convertedDate.getMonth() < 10 ? '0' + convertedDate.getMonth() : convertedDate.getMonth()
let convertedDay = convertedDate.getDay() < 10 ? '0' + convertedDate.getDay() : convertedDate.getDay()
let dayOfDate = convertedDate.getFullYear() + '-' + convertedMonth + '-' + convertedDay
if (!statistics.dates.years[yearOfDate]) {
statistics.dates.years[yearOfDate] = Object.create(dateStats)
}
fillPeriodTime(statistics.dates.years[yearOfDate], keyword)
// par année-semaine
if (!statistics.dates.weeks[weekOfDate]) {
statistics.dates.weeks[weekOfDate] = Object.create(dateStats)
}
fillPeriodTime(statistics.dates.weeks[weekOfDate], keyword)
statistics.dates.weeks[weekOfDate].tasks_done.push(header)
// décompte par mois
if (!statistics.dates.months[monthOfDate]) {
statistics.dates.months[monthOfDate] = Object.create(dateStats)
}
fillPeriodTime(statistics.dates.months[monthOfDate], keyword)
// décompte par jours
if (!statistics.dates.days[dayOfDate]) {
statistics.dates.days[dayOfDate] = Object.create(dateStats)
}
fillPeriodTime(statistics.dates.days[dayOfDate], keyword)
statistics.dates.days[dayOfDate].tasks_done.push(header)
}
2023-09-22 10:16:46 +02:00
function findOldestDate (currentDate) {
// trouver la plus ancienne date
if (!statistics.dates.oldEst) {
statistics.dates.oldEst = currentDate
} else {
var beginningTime = moment(statistics.dates.oldEst)
var endTime = moment(currentDate)
if (!beginningTime.isBefore(endTime)) {
statistics.dates.oldEst = currentDate
}
}
2023-04-18 12:23:52 +02:00
}
2023-03-05 23:13:16 +01:00
/**********************
* loop to parse all
*********************/
2023-03-04 23:52:46 +01:00
fs.readFile(sourceFilePath, 'utf8', function (err, data) {
2023-09-22 10:16:46 +02:00
if (err) {
return console.log(err)
}
console.log(' parsing...')
// parcourir chaque ligne du fichier org
let everyline = data.split('\n')
// trouver les entêtes toutes les lignes qui commencent par * et espace.
everyline.forEach((line) => {
// gérer la création d'objets définissant les tâches et leurs propriétés
if (line.match(/^\*+? /)) {
// add last task to export list
if (!isFirst) {
addAndRefreshCurrentTask()
} else {
isFirst = false
}
isHeader = true
// compter les étoiles pour trouver le niveau du header
if (line.match(/\*/g)) {
let match = line.match(/\*/g)
currentTask.level = match.length
}
// create a new task
headers.push(cleanHeader(line))
currentTask.header = cleanHeader(line)
stateKeywordList.forEach(keyword => {
let keywordIsFound = lineHasKeyword(line, keyword)
if (keywordIsFound) {
currentTask.state = keyword
}
})
// trouver les tags
let tagsList = line.match(/\:([\w\_]*)\:/g)
if (tagsList) {
tagsList = tagsList[0]
let tagList = tagsList.split(':')
if (tagList.length) {
tagList.forEach(tag => {
if (tag.length > 1) {
if (!statistics.tags[tag]) {
statistics.tags[tag] = 0
}
statistics.tags[tag]++
currentTask.tags.push(tag)
}
})
}
}
// ------------- fin des recherches dans la ligne de Header -------------
} else {
isHeader = false
}
// examen des lignes de corps de tâche, ou de corps de section suite au header.
// classer les dates de création, cloture, et de logbook
let dateFound = searchDate(line)
if (dateFound) {
/**
* we have found a date in the current line
*/
statistics.dates.havingDate += 1
dateKeywordList.forEach(keyword => {
if (lineHasSubstring(line, keyword)) {
if (!currentTask.dates[keyword]) {
currentTask.dates[keyword] = ''
}
let convertedDate = dateFound[0].substring(0, 10)
if (dateFound[0].length === 15) {
// sans heure: "2022-12-21 mer."
convertedDate = moment(dateFound[0], 'YYYY-MM-DD ddd')
2023-09-24 10:05:17 +02:00
}
else if (dateFound[0].length === 19) {
// avec secondes: "2022-11-01 00:44:12"
convertedDate = moment(dateFound[0], 'YYYY-MM-DD HH:mm:ss')
}
else if (dateFound[0].length === 21) {
2023-09-22 10:16:46 +02:00
// avec heure: "2022-11-01 mar. 00:44"
convertedDate = moment(dateFound[0], 'YYYY-MM-DD ddd HH:mm')
}
let formattedDate = moment(convertedDate).format()
// console.log('currentTask.header', currentTask.header)
statisticDateFill(keyword, convertedDate, currentTask.header)
findOldestDate(convertedDate)
currentTask.dates[keyword] = formattedDate
} else {
// console.log('keyword', keyword)
}
})
} else {
statistics.dates.havingNoDate += 1
if (
line.indexOf(dateKeywordList) !== -1 &&
line.indexOf(stateKeywordList) !== -1 &&
line.indexOf(sectionKeywordList) !== -1
) {
// ajouter le corps complet de la section après le header
if (line.length && !isHeader) {
let cleanedLine = line.replace(/\s\s/g, ' ')
cleanedLine = line.replace(/ {2,}/g, ' ')
cleanedLine = cleanedLine.trim()
currentTask.corpus += `${cleanedLine}
`
2023-09-22 10:16:46 +02:00
}
}
}
})
// ajouter la dernière tâche parsée
addAndRefreshCurrentTask()
console.log('tasks : ', tasksObjectsForJsonExport.length)
console.log(tada + ' parsing fini ' + tada)
// ranger par valeur décroissante les tags
let sorted_stats = []
// rangement par valeur et par date
console.log('write file ', outputAbsolutePath, outputFileNameJson)
statistics.dates.years = sortByKey(statistics.dates.years)
statistics.dates.weeks = sortByKey(statistics.dates.weeks)
statistics.dates.months = sortByKey(statistics.dates.months)
statistics.dates.days = sortByKey(statistics.dates.days)
statistics = sortByKey(statistics)
const jsonContent = {
statistics: {
lines_count: everyline.length,
headers_count: headers.length,
statistics
},
meta_data: {
author: '@tykayn@mastodon.Cipherbliss.com',
generated_at: new Date(),
generated_from_file: sourceFilePath + sourceFileName,
sources: 'https://forge.chapril.org/tykayn/org-report-stats.git'
},
tasks_list: tasksObjectsForJsonExport
}
if (writeJsonAfterParse) {
writeFileInOuputFolderFromJsonObject(outputFileNameJson, jsonContent)
}
2023-03-04 23:57:03 +01:00
2023-03-04 23:52:46 +01:00
})
2023-09-22 10:16:46 +02:00
function lineHasKeyword (line, keyword = 'TODO') {
let isFound = (line.indexOf('* ' + keyword) !== -1)
if (isFound) {
createNewHeaderKind(keyword)
headersByKind[keyword].push(line)
if (!statistics[keyword]) {
statistics[keyword] = 0
}
statistics[keyword]++
}
return isFound
2023-03-05 23:13:16 +01:00
}
2023-09-22 10:16:46 +02:00
function lineHasSubstring (line, keyword) {
let isFound = (line.indexOf(keyword) !== -1)
if (!statistics[keyword]) {
statistics[keyword] = 0
}
statistics[keyword]++
2023-03-05 23:13:16 +01:00
2023-09-22 10:16:46 +02:00
return isFound
2023-03-04 23:52:46 +01:00
}
2023-09-22 10:16:46 +02:00
function createNewHeaderKind (keyword) {
if (!headersByKind[keyword]) {
headersByKind[keyword] = []
}
2023-03-04 23:57:03 +01:00
}
2023-03-05 23:13:16 +01:00
/**
* chercher des dates et heures au format
* YYYY-MM-DD HH:II:SS
*
* @param line
* @returns {*}
*/
2023-09-22 10:16:46 +02:00
function searchDate (line) {
// return line.match(/[(\d{4}\-\d{2}\-\d{2} ?\d{2}?\:?\d{2}?\:?\d{2}?)(\d{4}\-\d{2}\-\d{2})]/)
let simpleDay = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.?/)
let simpleDayHour = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}/)
let simpleDayHourSec = line.match(/\d{4}\-\d{2}\-\d{2} \w{3}?\.? \d{2}\:\d{2}\:\d{2}/)
if (simpleDayHourSec) {
return simpleDayHourSec
}
if (simpleDayHour) {
return simpleDayHour
}
if (simpleDay) {
return simpleDay
}
2023-03-05 23:13:16 +01:00
}
/**
* get the cleaned content of the header
* @param line
*/
2023-09-22 10:16:46 +02:00
function cleanHeader (line) {
line = '' + line
stateKeywordList.forEach(keyword => {
line = line.replace(keyword, '')
})
line = line.replace(/\** /, '')
line = line.replace(/\[.*\]/g, '')
line = line.replace(/\:.*\:/g, '')
line = line.replace(' ', '')
return line.trim()
}
2023-04-18 12:23:52 +02:00
/**
* ranger un objet littéral par ordre alphabétique de ses clés
* @param objectStuff
* @returns {{}}
*/
2023-09-22 10:16:46 +02:00
function sortByKey (objectStuff) {
return Object.keys(objectStuff).sort().reduce(
(obj, key) => {
obj[key] = objectStuff[key]
return obj
},
{}
)
2023-04-18 12:23:52 +02:00
}
2023-04-18 13:24:20 +02:00
/**
* ranger un objet littéral selon la valeur décroissante de ses paires
* @param literalobject
* @returns {any}
*/
2023-09-22 10:16:46 +02:00
function sortByValue (literalobject) {
let sortable = []
for (var keyName in literalobject) {
sortable[keyName] = literalobject[keyName]
}
// return literalobject
return sortable.sort(function (a, b) {
return b[1] - a[1]
})
2023-04-18 13:24:20 +02:00
}
2023-04-18 12:23:52 +02:00
2023-09-22 10:16:46 +02:00
export async function writeFileInOuputFolderFromJsonObject (fileName, jsonObjectThing) {
// console.log('statistics.dates', statistics.dates)
return await fs.writeFile(
`${outputAbsolutePath}${fileName}`,
JSON.stringify(jsonObjectThing, null, 2),
'utf8',
(err) => {
if (err) {
console.log(`Error writing file: ${err}`)
} else {
console.log(`\n ${gift} File ${fileName} is written successfully!`)
}
}
)
2023-03-04 23:52:46 +01:00
}