page fr osm wiki

This commit is contained in:
Tykayn 2025-01-12 12:28:59 +01:00 committed by tykayn
parent 400e1b2b42
commit 6b8e111d77
4 changed files with 667 additions and 589 deletions

View File

@ -6,18 +6,19 @@ import https from 'https'
import moment from 'moment' import moment from 'moment'
import Parser from 'rss-parser' import Parser from 'rss-parser'
import { load } from 'cheerio' import {load} from 'cheerio'
let local_node_env_conf = dotenv.config() let local_node_env_conf = dotenv.config()
const myArgs = process.argv.slice(2) const myArgs = process.argv.slice(2)
export const reallySendPost = hasCliArgument('--force') export const reallySendPost = hasCliArgument('--force')
console.log('reallySendPost', reallySendPost)
export const folderBlogPostsPreview = process.cwd() + '/assets/blog_posts_medias/' export const folderBlogPostsPreview = process.cwd() + '/assets/blog_posts_medias/'
export function randomIntFromInterval (min, max) { // min and max included export function randomIntFromInterval(min, max) { // min and max included
return Math.floor(Math.random() * (max - min + 1) + min) | 1 return Math.floor(Math.random() * (max - min + 1) + min) | 1
} }
export function getRandomElementOfArray (listItems) { export function getRandomElementOfArray(listItems) {
return listItems[Math.floor(Math.random() * listItems.length)] return listItems[Math.floor(Math.random() * listItems.length)]
} }
@ -41,7 +42,7 @@ export let defaultConfigMasto = {
postObject: {}, postObject: {},
} }
export function tokenForAuthorIsPresentInDotEnv (author) { export function tokenForAuthorIsPresentInDotEnv(author) {
return process.env['TOKEN_' + author.toUpperCase()] return process.env['TOKEN_' + author.toUpperCase()]
} }
@ -50,7 +51,7 @@ export function tokenForAuthorIsPresentInDotEnv (author) {
* @param userNickName * @param userNickName
* @returns {Mastodon} * @returns {Mastodon}
*/ */
export function createMastoFetcherWithAuthorLogin (userNickName) { export function createMastoFetcherWithAuthorLogin(userNickName) {
let accessToken = process.env['TOKEN_' + userNickName.toUpperCase()] let accessToken = process.env['TOKEN_' + userNickName.toUpperCase()]
const masto = new Masto({ const masto = new Masto({
access_token: accessToken, access_token: accessToken,
@ -64,7 +65,7 @@ export function createMastoFetcherWithAuthorLogin (userNickName) {
* @param config * @param config
* @returns {*} * @returns {*}
*/ */
export default function sendPostMastodon (config) { export default function sendPostMastodon(config) {
// console.log('send post', config.postObject.post_guid , config.postObject.guid ) // console.log('send post', config.postObject.post_guid , config.postObject.guid )
// override defaults with input argument // override defaults with input argument
@ -136,7 +137,7 @@ export default function sendPostMastodon (config) {
var id var id
console.log('envoi du média', config.image) console.log('envoi du média', config.image)
// upload new media // upload new media
return masto.post('media', { file: fs.createReadStream(config.image) }) return masto.post('media', {file: fs.createReadStream(config.image)})
.then(resp => { .then(resp => {
id = resp.data.id id = resp.data.id
params.media_ids = [id] params.media_ids = [id]
@ -162,7 +163,7 @@ export default function sendPostMastodon (config) {
} }
// Slugify a string // Slugify a string
export function slugify (str) { export function slugify(str) {
str = str.replace(/^\s+|\s+$/g, '') str = str.replace(/^\s+|\s+$/g, '')
// Make the string lowercase // Make the string lowercase
@ -190,7 +191,7 @@ export function slugify (str) {
* lister les noms de fichier que l'on peut publier dans un dossier. * lister les noms de fichier que l'on peut publier dans un dossier.
* retourne un tableau * retourne un tableau
*/ */
export function listFilesOfFolder (folderPath) { export function listFilesOfFolder(folderPath) {
let filesNames = [] let filesNames = []
fs.readdirSync(folderPath).map(fileName => { fs.readdirSync(folderPath).map(fileName => {
return filesNames.push(fileName) return filesNames.push(fileName)
@ -204,7 +205,7 @@ export function listFilesOfFolder (folderPath) {
* crée un dossier d'assets, avec ses sous dossiers not_published et published si ils manquent. * crée un dossier d'assets, avec ses sous dossiers not_published et published si ils manquent.
* une fois que l'on prendra une image dans le dossier non publié, on la déplacera dans le dossier des images publées. * une fois que l'on prendra une image dans le dossier non publié, on la déplacera dans le dossier des images publées.
*/ */
export function initializeFolderForPictures (folderName) { export function initializeFolderForPictures(folderName) {
try { try {
if (!fs.existsSync(folderName)) { if (!fs.existsSync(folderName)) {
fs.mkdirSync(folderName) fs.mkdirSync(folderName)
@ -219,7 +220,7 @@ export function initializeFolderForPictures (folderName) {
* @param htmlContent * @param htmlContent
* @returns {string} * @returns {string}
*/ */
export function findFirstImageInContent (htmlContent = '') { export function findFirstImageInContent(htmlContent = '') {
let result = '' let result = ''
let foundPictures = htmlContent.match(/<img\s[^>]*?src\s*=\s*['\"]([^'\"]*?)['\"][^>]*?>/) let foundPictures = htmlContent.match(/<img\s[^>]*?src\s*=\s*['\"]([^'\"]*?)['\"][^>]*?>/)
let first = '' let first = ''
@ -242,7 +243,7 @@ export function findFirstImageInContent (htmlContent = '') {
return result return result
} }
function clearLink (linkString) { function clearLink(linkString) {
linkString = linkString.replace('http:', 'https:') linkString = linkString.replace('http:', 'https:')
linkString = linkString.replace('https://www.ailesse.info/~tykayn/bazar/kotlife', 'https://www.tykayn.fr/wp-content/uploads/i/kotlife') linkString = linkString.replace('https://www.ailesse.info/~tykayn/bazar/kotlife', 'https://www.tykayn.fr/wp-content/uploads/i/kotlife')
linkString = linkString.replace('https://blog.artlemoine.com/public/i', 'https://www.tykayn.fr/wp-content/uploads/i') linkString = linkString.replace('https://blog.artlemoine.com/public/i', 'https://www.tykayn.fr/wp-content/uploads/i')
@ -260,10 +261,10 @@ function clearLink (linkString) {
* @param filepath * @param filepath
* @returns {Promise<unknown>} * @returns {Promise<unknown>}
*/ */
export function downloadImage (url, filepath) { export function downloadImage(url, filepath) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const options = { const options = {
headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.52' } headers: {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.52'}
} }
https.get(url, options, (res) => { https.get(url, options, (res) => {
@ -289,11 +290,11 @@ export function downloadImage (url, filepath) {
* @returns {Promise<Object>} * @returns {Promise<Object>}
* @constructor * @constructor
*/ */
export function CropPicture (pictureName, width = 500, height = 300) { export function CropPicture(pictureName, width = 500, height = 300) {
return sharp(pictureName) return sharp(pictureName)
.extract({ left: 0, top: 0, width, height }) .extract({left: 0, top: 0, width, height})
.toFile('thumb_' + pictureName, function (err) { .toFile('thumb_' + pictureName, function (err) {
if (err) console.log(err) if (err) console.log(err)
}) })
@ -303,7 +304,7 @@ export function CropPicture (pictureName, width = 500, height = 300) {
* prendre un post parmi tous ceux du blog, dans ceux qui ont été publiés * prendre un post parmi tous ceux du blog, dans ceux qui ont été publiés
* @returns {*} * @returns {*}
*/ */
export function getRandomLinkGeneral (tkpostsjson) { export function getRandomLinkGeneral(tkpostsjson) {
let filteredLinks = [] let filteredLinks = []
if (tkpostsjson[0].post_status) { if (tkpostsjson[0].post_status) {
@ -319,7 +320,7 @@ export function getRandomLinkGeneral (tkpostsjson) {
* @param postContent * @param postContent
* @param configPost * @param configPost
*/ */
export function findPictureAndSendPost (postContent, configPost) { export function findPictureAndSendPost(postContent, configPost) {
let firstPictureSource = findFirstImageInContent(postContent) let firstPictureSource = findFirstImageInContent(postContent)
@ -364,14 +365,14 @@ export function findPictureAndSendPost (postContent, configPost) {
* @param argument * @param argument
* @returns {boolean} * @returns {boolean}
*/ */
export function hasCliArgument (argument) { export function hasCliArgument(argument) {
return myArgs.indexOf(argument) !== -1 return myArgs.indexOf(argument) !== -1
} }
let parser = new Parser() let parser = new Parser()
export function diffDaysBetweenTwoDates (date1, date2) { export function diffDaysBetweenTwoDates(date1, date2) {
const a = moment(date1) const a = moment(date1)
const b = moment(date2) const b = moment(date2)
@ -379,7 +380,7 @@ export function diffDaysBetweenTwoDates (date1, date2) {
} }
export function filterRegionAgendaDuLibreEvents (events_list, filter_critera) { export function filterRegionAgendaDuLibreEvents(events_list, filter_critera) {
let selection = [] let selection = []
events_list.forEach(item => { events_list.forEach(item => {
if (item.region_id == filter_critera) { if (item.region_id == filter_critera) {
@ -391,7 +392,7 @@ export function filterRegionAgendaDuLibreEvents (events_list, filter_critera) {
moment.locale('fr') moment.locale('fr')
export function groupEventsByDay (events_list) { export function groupEventsByDay(events_list) {
let selection = {} let selection = {}
events_list.forEach(item => { events_list.forEach(item => {
@ -405,7 +406,7 @@ export function groupEventsByDay (events_list) {
return selection return selection
} }
export function convertHTMLtoMD (htmlContent) { export function convertHTMLtoMD(htmlContent) {
const $ = load(htmlContent) const $ = load(htmlContent)
translateNode($) translateNode($)
@ -413,20 +414,20 @@ export function convertHTMLtoMD (htmlContent) {
return $.html() return $.html()
} }
export function translateNode ($) { export function translateNode($) {
const elementsToTranslate = [ const elementsToTranslate = [
{ tag: 'h1', mdTag: '##' }, {tag: 'h1', mdTag: '##'},
{ tag: 'h2', mdTag: '###' }, {tag: 'h2', mdTag: '###'},
{ tag: 'h3', mdTag: '####' }, {tag: 'h3', mdTag: '####'},
{ tag: 'h4', mdTag: '#####' }, {tag: 'h4', mdTag: '#####'},
{ tag: 'h5', mdTag: '######' }, {tag: 'h5', mdTag: '######'},
{ tag: 'p', mdTag: '\n\n' }, {tag: 'p', mdTag: '\n\n'},
{ tag: 'ul', preserveChildren: true, transformChild: ($child) => '\n- ' + $.text($child) }, {tag: 'ul', preserveChildren: true, transformChild: ($child) => '\n- ' + $.text($child)},
{ tag: 'ol', preserveChildren: true, transformChild: ($child, idx) => `\n${idx + 1}. ${$.text($child)}` }, {tag: 'ol', preserveChildren: true, transformChild: ($child, idx) => `\n${idx + 1}. ${$.text($child)}`},
{ tag: 'a', parseAttr: ('href', link => `[${link}](${link})`) }, {tag: 'a', parseAttr: ('href', link => `[${link}](${link})`)},
{ tag: 'strong', mdFormat: '$&' }, {tag: 'strong', mdFormat: '$&'},
{ tag: 'em', mdFormat: '_$&_' }, {tag: 'em', mdFormat: '_$&_'},
{ tag: 'code', mdFormat: '`$&`' }, {tag: 'code', mdFormat: '`$&`'},
] ]
elementsToTranslate.forEach(element => { elementsToTranslate.forEach(element => {
@ -452,7 +453,7 @@ export function translateNode ($) {
}) })
} }
export function getRequestOptions (host, port, path) { export function getRequestOptions(host, port, path) {
const options = { const options = {
host: host, host: host,
port: port, port: port,
@ -467,7 +468,7 @@ export function getRequestOptions (host, port, path) {
return options return options
} }
export async function sendGetRequest (options) { export async function sendGetRequest(options) {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const req = http.request(options, (res) => { const req = http.request(options, (res) => {
let body = Buffer.alloc(0) let body = Buffer.alloc(0)
@ -514,7 +515,7 @@ const splitTextIntoChunks = (text, limit) => {
return chunks return chunks
} }
export function splitLongDescription (text, limit) { export function splitLongDescription(text, limit) {
let chunks = splitTextIntoChunks(text, limit) let chunks = splitTextIntoChunks(text, limit)
if (chunks) { if (chunks) {
if (chunks[0]) { if (chunks[0]) {

View File

@ -1,12 +1,8 @@
import fetch from "node-fetch"
import rp from "request-promise";
import $ from "cheerio"; import $ from "cheerio";
import fs from "fs";
import path from 'path';
let url = 'https://wiki.openstreetmap.org/wiki/FR:Key:building:material'; let url = 'https://wiki.openstreetmap.org/wiki/FR:Key:building:material';
let pictureDescrFinder = '.d_image img' let pictureDescrFinder = '.d_image img'
// rp(url).then(function (html) { // rp(url).then(function (html) {
// getSourceOfDescriptorPageContent(html) // getSourceOfDescriptorPageContent(html)
@ -30,7 +26,7 @@ export function getSourceOfDescriptorPageContent(HTMLcontent) {
} }
console.log(sourcesSet) console.log(sourcesSet)
console.log(selectedPicture) console.log(selectedPicture)
return selectedPicture; return `https://wiki.openstreetmap.org${selectedPicture.trim()}`;
} else { } else {
console.log("pas d'image de description dans le HTML") console.log("pas d'image de description dans le HTML")
} }

View File

@ -1,7 +1,15 @@
/**
* Envoi d'une page du wiki au hasard parmi les pages en Français répertoriées dans le json all_wiki_osm.json
* Cette liste d'environ 3000 pages du wiki contient toutes les pages francophones, pas seulement les tags.
* Utiliser l'argument --force pour réellement envoyer le post avec le compte Curator.
* nécessite d'avoir le fichier .env rempli.
*/
import fs from 'fs'; import fs from 'fs';
import path, {dirname} from 'path'; import path, {dirname} from 'path';
import axios from 'axios'; import axios from 'axios';
import {fileURLToPath} from 'url'; import {fileURLToPath} from 'url';
import {getSourceOfDescriptorPageContent} from "./osm_get_description_picture.mjs";
import sendPostMastodon, {downloadImage, randomIntFromInterval} from "./libs/utils.mjs";
const __dirname = dirname(fileURLToPath(import.meta.url)); const __dirname = dirname(fileURLToPath(import.meta.url));
@ -13,10 +21,22 @@ const randomIndex = Math.floor(Math.random() * wikiArticles['elements'].length);
console.log('wikiArticles[\'elements\']', wikiArticles['elements'].length) console.log('wikiArticles[\'elements\']', wikiArticles['elements'].length)
const selectedArticle = wikiArticles['elements'][randomIndex]; const selectedArticle = wikiArticles['elements'][randomIndex];
console.log('selectedArticle', selectedArticle)
// Récupérer le titre et la description de l'article // Récupérer le titre et la description de l'article
const title = selectedArticle.title; const title = selectedArticle.title;
const pageId = selectedArticle.pageid; const pageId = selectedArticle.pageid;
function makePostMessageFromObj(post_obj) {
return `
# Le tag OSM du jour : ${post_obj.key}=${post_obj.value} 🗺🏷
${post_obj.link}
${post_obj.description ? post_obj.description.trim() : ''}
${post_obj.long_desc ? post_obj.long_desc.trim() : ''}
#osm #openstreetmap #wiki #rtfw
`
}
// Récupérer le contenu de l'article via l'API de MediaWiki // Récupérer le contenu de l'article via l'API de MediaWiki
const wikiApiUrl = `https://wiki.openstreetmap.org/w/api.php`; const wikiApiUrl = `https://wiki.openstreetmap.org/w/api.php`;
const params = { const params = {
@ -26,10 +46,23 @@ const params = {
prop: 'text', prop: 'text',
section: 0 section: 0
}; };
// console.log('params', params)
let message = ''
let download_description_src = ''
axios.get(wikiApiUrl, {params}) axios.get(wikiApiUrl, {params})
.then(response => { .then(response => {
// console.log('wikiApiUrl', wikiApiUrl)
let keys = Object.keys(response.data)
// console.log('keys', keys)
const articleContent = response.data.parse.text['*']; const articleContent = response.data.parse.text['*'];
let resultpicture = getSourceOfDescriptorPageContent(articleContent)
// console.log('resultpicture', resultpicture)
if (resultpicture === "pas d'image de description dans le HTML") {
download_description_src = `${process.cwd()}/assets/blog_posts_medias/osm_wiki_description_page.jpg`
}
const firstParagraph = articleContent.split('<p>')[1].split('</p>')[0]; const firstParagraph = articleContent.split('<p>')[1].split('</p>')[0];
// Sanitizer le texte // Sanitizer le texte
@ -38,13 +71,67 @@ axios.get(wikiApiUrl, {params})
return String.fromCharCode(code); return String.fromCharCode(code);
}); });
console.log('image', download_description_src)
console.log('-----------------')
// Générer le message // Générer le message
const message = `Je vous recommande de lire l'article "${title}" sur le wiki d'OpenStreetMap : message = `Le page #OSM du jour : ${title}.
https://wiki.openstreetmap.org/wiki/${title}
${sanitizedTextWithoutEntities} ${sanitizedTextWithoutEntities}
Lire la suite sur : https://wiki.openstreetmap.org/wiki/${title}`;
#openstreetmap #wiki #RTFW
`;
console.log(message); console.log(message);
}) })
.catch(error => { .catch(error => {
console.error(error); console.error(error);
}); });
export function sendMessageWikiOSMOfTheDay(message, download_description_src) {
let configPost = {
author: 'curator',
// document.querySelector('.d_image img').attributes['src']
// image: '/home/tykayn/www/multi-account-post-schedule-mastodon/assets/osm_post_' + randomIntFromInterval(1 , 5) + '.jpg',
image: `${process.cwd()}/assets/blog_posts_medias/assets/osm_post_${randomIntFromInterval(1, 5)}.jpg`,
message,
}
if (download_description_src) {
let filePathImage = `${process.cwd()}/assets/blog_posts_medias/osm_wiki_description_page.jpg`
if (download_description_src) {
console.log("firstPictureSource found", download_description_src)
// check if picture already exist
console.log('on récupère l image de description : ', filePathImage)
downloadImage(download_description_src, filePathImage)
.then((res) => {
// suite du poste avec upload d'image
console.log('média téléchargé, on envoie le post')
configPost.image = filePathImage;
sendPostMastodon(configPost)
},
(err) => {
console.log('pas dimage trouvée pour l URL ', download_description_src, err)
sendPostMastodon(configPost)
}
)
.catch((err) => {
console.log('erreur avec cette URL ', download_description_src, err)
sendPostMastodon(configPost)
})
}
} else {
console.log('no image description')
sendPostMastodon(configPost)
}
}
sendMessageWikiOSMOfTheDay(message, download_description_src)

View File

@ -1,6 +1,8 @@
// https://www.mediawiki.org/wiki/Manual:Random_page // https://www.mediawiki.org/wiki/Manual:Random_page
/** /**
* Post de page aléatoire du wiki osm avec le compte curator * Post de page aléatoire du wiki osm avec le compte curator
* utiliser l'argument --force pour réellement envoyer le post avec le compte Curator.
* nécessite d'avoir le fichier .env rempli.
*/ */
import fetch from "node-fetch" import fetch from "node-fetch"
import rp from "request-promise"; import rp from "request-promise";
@ -168,7 +170,7 @@ export default function getElementCartographique() {
// let imgSelector = ".description a.image img" // let imgSelector = ".description a.image img"
console.log("✅ cette page existe bien en Français sur le wiki OSM") console.log("✅ cette page existe bien en Français sur le wiki OSM")
foundExistingWikiPageInFrench = true; foundExistingWikiPageInFrench = true;
sendMessageWikiTagOfTheDay(makePostMessageFromObj(configPost), configPost.download_description_src) sendMessageWikiOSMOfTheDay(makePostMessageFromObj(configPost), configPost.download_description_src)
}, (err) => { }, (err) => {
@ -192,7 +194,7 @@ export default function getElementCartographique() {
console.log("result", configPost) console.log("result", configPost)
rp(configPost.link).then((result) => { rp(configPost.link).then((result) => {
console.log("oui cette page existe!", url) console.log("oui cette page existe!", url)
sendMessageWikiTagOfTheDay(makePostMessageFromObj(result)) sendMessageWikiOSMOfTheDay(makePostMessageFromObj(result))
}, (err) => { }, (err) => {
console.log("hé non. WTF ?") console.log("hé non. WTF ?")
// getElementCartographique() // getElementCartographique()
@ -277,15 +279,7 @@ function getQuery() {
} }
export function sendMessageWikiOSMOfTheDay(message, download_description_src) {
// run
const res = getElementCartographique()
// console.log("res", res)
function sendMessageWikiTagOfTheDay(message, download_description_src) {
let configPost = { let configPost = {