add compare function for titles ADL

This commit is contained in:
Tykayn 2022-01-13 11:18:44 +01:00 committed by tykayn
parent a297e20777
commit f3cb3673a9
5 changed files with 16747 additions and 18 deletions

View File

@ -27,7 +27,7 @@ npm i
ajouter un cronjob qui exécute régulièrement (une fois par jour)
```bash
ts-node index.ts
ts-node importers/adl.ts
```
# Example de requête
@ -131,7 +131,7 @@ Fait avec puppeteer, lancer la commande:
ts-node scrapers/ccpl.js
`
Pour l'agenda du libre:
Pour l'agenda du libre on récupère un fichier json:
`
ts-node scrapers/adl.js
@ -139,7 +139,13 @@ ts-node scrapers/adl.js
on peut aussi filtrer l'agenda par un certain tag en mettant un argument en plus à la commande
`
ts-node scrapers/adl.js openstreetmap
`
`
# data import to mobilizon
Il faut que le serveur mobilizon soit fonctionnel (et lancé, avec `mix phx.server` ) et que le script node ait les accès à la base de données postgresql. Voir les paramètres dans [config.ts].
Ensuite on fait fonctionner un importateur, comme celui de l'agenda du libre:
---

64
importers/adl.ts Normal file
View File

@ -0,0 +1,64 @@
import * as fs from "fs";
import utils from "../utils";
let utilsTools = new utils();
console.log('importation depuis le fichier local de l\' agenda du libre');
let filepath = './output/adl_json.json'
let filecontent;
let counterOfEventsToAdd = 0;
const addEventQuery = utilsTools.agendadulibre.addQuery;
const runCreationQuery = utilsTools.runCreationQuery;
async function runImportEvents() {
console.log('File exists');
// build list of existing events in mobilizon database
await utilsTools.setupClientPostgresql();
await utilsTools.client.connect();
console.log('✅ OK connecté à postgresql');
console.log(' ');
const res = await utilsTools.client.query('SELECT * from events');
console.log('💾 évènements enregistrés dans mobilizon : ', res.rows.length);
res.rows.forEach((bdd_event: any) => {
console.log('eventObject', bdd_event);
utilsTools.localMobilizonEventsByTitle.push(utilsTools.agendadulibre.uniqTitleBDD(bdd_event));
})
console.log('utilsTools.localMobilizonEventsByTitle', utilsTools.localMobilizonEventsByTitle);
// get json file for ADL
fs.readFile(filepath, 'utf8', function (err, data) {
if (err) {
return console.log(err);
}
filecontent = JSON.parse(data)
// console.log('events in the scrapped json', filecontent.length);
let ii=0;
filecontent.forEach((event:any)=>{
ii++
let date = new Date(event.start_time);
// console.log(ii, date.toLocaleDateString('fr-fr', { weekday:"long", year:"numeric", month:"short", day:"numeric"}) , event.title );
utilsTools.agendadulibre.doesEventExistsFromJsonScrap(event);
utilsTools.agendadulibre.addQueryFromJsonScrap(event);
})
// compare events
// import only new events
});
}
fs.stat(filepath, function (err, stat) {
if (err == null) {
runImportEvents();
} else if (err.code === 'ENOENT') {
// file does not exist
console.log('Scrapped json file does not exist. Run a scraper like "ts-node scrapers/adl.ts" before using this importer: ', err.code);
} else {
console.log('Some other error: ', err.code);
}
});

View File

@ -0,0 +1,57 @@
# Event model from mobilizon
```json
{
id: '22',
title: 'Chambéry: Mapathon Missing Maps, Le lundi 13 juin 2022 de 18h00 à 20h00.',
description: '<p></p><p><img src="https://www.missingmaps.org/assets/graphics/meta/MM-White.svg" alt="..." width="94px"/>Un mapathon c&#39;est quoi? C&#39;est un atelier en ligne de cartographie solidaire et participative en soutien aux organisations humanitaires et/ou de développement.</p><p>CartONG organise ce mapathon dans le cadre du projet Missing Maps visant à cartographier toutes les zones encore invisibles sur les cartes, qui permettent par la suite aux communautés locales et acteur·rice·s de l&#39;humanitaire et du développement de pouvoir agir plus efficacement en cas de crise ou initier des projets de développement local. </p><p>Avec quel outil? La plateforme de cartographie libre et contributive<strong> </strong><a href="https://www.openstreetmap.org/"><strong>OpenStreetMap </strong></a>(OSM, <strong>le «Wikipédia des cartes»</strong>) où tout le monde peut participer à la cartographie de n&#39;importe quelle zone de la planète: il suffit d&#39;un ordinateur, d&#39;une souris et d&#39;une connexion internet! Aucune connaissance en cartographie ou en informatique nest requise.</p><p>Grâce à la couverture globale d&#39;images satellites disponibles aujourd&#39;hui, il est possible de tracer facilement routes, bâtiments ou cours d&#39;eau, autant d&#39;informations très utiles pour les organisations humanitaires et de développement sur le terrain.</p><p>Pas besoin d&#39;être un·e expert·e, c&#39;est conviviale et accessible à tout le monde!</p><p>Pour s&#39;inscrire : <a href="https://www.eventbrite.ca/e/billets-en-ligne-mapathons-missing-maps-2022-2023-133090064967" target="_blank" rel="noopener noreferrer ugc">https://www.eventbrite.ca/e/billets-en-ligne-mapathons-missing-maps-2022-2023-133090064967</a></p><p></p>',
organizer_actor_id: '3',
physical_address_id: null,
inserted_at: 2022-01-10T11:47:06.000Z,
updated_at: 2022-01-10T11:47:06.000Z,
url: 'http://localmob.lan/events/1a442e44-973d-46be-b730-2ac74cf8df92',
local: true,
uuid: '1a442e44-973d-46be-b730-2ac74cf8df92',
attributed_to_id: null,
online_address: 'https://www.agendadulibre.org/events/24707',
phone_address: null,
visibility: 'public',
status: 'confirmed',
join_options: 'free',
begins_on: 2022-06-13T16:00:00.000Z,
ends_on: 2022-06-13T16:00:00.000Z,
publish_at: null,
category: 'meeting',
slug: null,
picture_id: null,
options: {
offers: [],
program: null,
timezone: 'Europe/Paris',
attendees: [],
is_online: false,
show_end_time: true,
show_start_time: true,
comment_moderation: 'allow_all',
anonymous_participation: true,
participation_condition: [],
show_participation_price: false,
maximum_attendee_capacity: 200,
remaining_attendee_capacity: 0,
hide_organizer_when_group_event: false,
show_remaining_attendee_capacity: false
},
draft: false,
participant_stats: {
creator: 1,
rejected: 0,
moderator: 0,
participant: 0,
not_approved: 0,
administrator: 0,
not_confirmed: 0
},
metadata: null,
language: 'fr'
}
```

File diff suppressed because one or more lines are too long

102
utils.ts
View File

@ -8,19 +8,21 @@ const moment = require("moment");
const fs = require("fs");
let createEventQueryMobilizon = "mutation createEvent($organizerActorId: ID!, $attributedToId: ID, $title: String!, $description: String!, $beginsOn: DateTime!, $endsOn: DateTime, $status: EventStatus, $visibility: EventVisibility, $joinOptions: EventJoinOptions, $draft: Boolean, $tags: [String], $picture: MediaInput, $onlineAddress: String, $phoneAddress: String, $category: String, $physicalAddress: AddressInput, $options: EventOptionsInput, $contacts: [Contact]) {\n createEvent(\n organizerActorId: $organizerActorId\n attributedToId: $attributedToId\n title: $title\n description: $description\n beginsOn: $beginsOn\n endsOn: $endsOn\n status: $status\n visibility: $visibility\n joinOptions: $joinOptions\n draft: $draft\n tags: $tags\n picture: $picture\n onlineAddress: $onlineAddress\n phoneAddress: $phoneAddress\n category: $category\n physicalAddress: $physicalAddress\n options: $options\n contacts: $contacts\n ) {\n ...FullEvent\n __typename\n }\n}\n\nfragment FullEvent on Event {\n id\n uuid\n url\n local\n title\n description\n beginsOn\n endsOn\n status\n visibility\n joinOptions\n draft\n picture {\n id\n url\n name\n metadata {\n width\n height\n blurhash\n __typename\n }\n __typename\n }\n publishAt\n onlineAddress\n phoneAddress\n physicalAddress {\n ...AdressFragment\n __typename\n }\n organizerActor {\n avatar {\n id\n url\n __typename\n }\n preferredUsername\n domain\n name\n url\n id\n summary\n __typename\n }\n contacts {\n avatar {\n id\n url\n __typename\n }\n preferredUsername\n name\n summary\n domain\n url\n id\n __typename\n }\n attributedTo {\n avatar {\n id\n url\n __typename\n }\n preferredUsername\n name\n summary\n domain\n url\n id\n __typename\n }\n participantStats {\n going\n notApproved\n participant\n __typename\n }\n tags {\n ...TagFragment\n __typename\n }\n relatedEvents {\n id\n uuid\n title\n beginsOn\n picture {\n id\n url\n name\n metadata {\n width\n height\n blurhash\n __typename\n }\n __typename\n }\n physicalAddress {\n id\n description\n __typename\n }\n organizerActor {\n id\n avatar {\n id\n url\n __typename\n }\n preferredUsername\n domain\n name\n __typename\n }\n __typename\n }\n options {\n ...EventOptions\n __typename\n }\n metadata {\n key\n title\n value\n type\n __typename\n }\n __typename\n}\n\nfragment AdressFragment on Address {\n id\n description\n geom\n street\n locality\n postalCode\n region\n country\n type\n url\n originId\n __typename\n}\n\nfragment TagFragment on Tag {\n id\n slug\n title\n __typename\n}\n\nfragment EventOptions on EventOptions {\n maximumAttendeeCapacity\n remainingAttendeeCapacity\n showRemainingAttendeeCapacity\n anonymousParticipation\n showStartTime\n showEndTime\n offers {\n price\n priceCurrency\n url\n __typename\n }\n participationConditions {\n title\n content\n url\n __typename\n }\n attendees\n program\n commentModeration\n showParticipationPrice\n hideOrganizerWhenGroupEvent\n __typename\n}\n";
/**
* utilitaries to manipulate scraped object and prepare queries to import in mobilizon
*/
class utils {
/**
* postgres functions
*/
client: any;
createEventQueries: string;
makeQuery = () => {
this.createEventQueries = `INSERT INTO events(title, description, organizer_actor_id, inserted_at, updated_at,
uuid, url, status, category, options, participants_stats,
begins_on, ends_on)
VALUES
${this.agendadulibre.queryToAdd}
${this.osmcal.queryToAdd};`;
let createEventQueries: string = `INSERT INTO events(title, description, organizer_actor_id, inserted_at, updated_at, uuid, url, status, category, options, participants_stats, begins_on, ends_on) VALUES ${this.agendadulibre.queryToAdd} ${this.osmcal.queryToAdd};`;
this.createEventQueries = createEventQueries;
this.writeFile("event_creation_query.psql", this.createEventQueries, "psql");
}
runCreationQuery = async () => {
@ -42,7 +44,6 @@ class utils {
/**
* memorizing properties
*/
createEventQueries = "";
counterOfEventsToAdd = 0;
localMobilizonEventsByTitle: Array<string> = [];
@ -100,7 +101,7 @@ class utils {
/**
* file management
*/
static writeFile(fileName: string, data: any, formatData: any = 'json'){
writeFile(fileName: string, data: any, formatData: any = 'json') {
let dataToSave = data;
if (formatData == 'json') {
dataToSave = JSON.stringify(data, null, 4)
@ -124,7 +125,7 @@ class utils {
* importation sources
*/
osmcal = {
public osmcal = {
queryToAdd: "",
counterOfEventsToAdd: 0,
getTitle: (event: any) => {
@ -143,6 +144,7 @@ class utils {
return eventAlreadyExists;
},
addQuery: (event: any) => {
if (this.osmcal.queryToAdd) {
@ -184,17 +186,17 @@ class utils {
},
};
agendadulibre:any = {
public agendadulibre: any = {
queryToAdd: [],
queryToAddBDD: "",
counterOfEventsToAdd: 0,
doesEventExists: (event: any) => {
if (this.localMobilizonEventsByTitle.length) {
const eventAlreadyExists =
-1 !== this.localMobilizonEventsByTitle.indexOf(event.title);
-1 !== this.localMobilizonEventsByTitle.indexOf(this.agendadulibre.uniqTitle(event));
if (!eventAlreadyExists) {
if (parserConfig.debug) {
console.log('ajouter l event ', htmlEscape(event.title));
console.log('ajouter l event ', htmlEscape(this.agendadulibre.uniqTitle(event)));
}
this.agendadulibre.addQuery(event);
}
@ -204,17 +206,89 @@ class utils {
}
return false;
},
/**
* convert events from data scraping of the agenda du libre, to a string used as a comparison with new events
* @param event
*/
uniqTitle(event: any): string {
return event.start_date + ' ' + event.title
},
/**
* convert events from bdd to a string used as a comparison with new events
* @param event
*/
uniqTitleBDD(event: any): string {
return event.begins_on + ' ' + event.title
},
doesEventExistsFromJsonScrap: (event: any): boolean => {
const eventAlreadyExists =
-1 !== this.localMobilizonEventsByTitle.indexOf(htmlEscape(event.start_date + ' ' + event.title));
if (!eventAlreadyExists) {
if (parserConfig.debug) {
console.log('ajouter l event ', htmlEscape(event.start_date + ' ' + event.title));
}
this.agendadulibre.addQuery(event);
}
return eventAlreadyExists;
},
addQueryFromJsonScrap: (event: any) => {
console.log('event', event.title);
this.agendadulibre.queryToAdd.push(
{
operationName: "createEvent",
query: createEventQueryMobilizon,
variables: {
attributedToId: null,
beginsOn: event.start_time,
contacts: [],
description: "<p>" + event.description + "</p>",
draft: false,
endsOn: event.end_time,
joinOptions: "FREE",
onlineAddress: event.url,
options: {
anonymousParticipation: true,
attendees: [],
commentModeration: "ALLOW_ALL",
hideOrganizerWhenGroupEvent: false,
maximumAttendeeCapacity: 200,
offers: [],
participationConditions: [],
program: "",
remainingAttendeeCapacity: 0,
showEndTime: true,
showParticipationPrice: false,
showRemainingAttendeeCapacity: false,
showStartTime: true
},
organizerActorId: "3",
phoneAddress: "",
status: "CONFIRMED",
tags: [
"osm",
"openstreetmap",
"imported"
],
title: event.title,
visibility: "PUBLIC"
}
}
);
return this.agendadulibre.queryToAdd;
},
addQuery: (event: any) => {
console.log('event', event.title);
this.agendadulibre.queryToAdd.push(
{
operationName: "createEvent",
query: "mutation createEvent($organizerActorId: ID!, $attributedToId: ID, $title: String!, $description: String!, $beginsOn: DateTime!, $endsOn: DateTime, $status: EventStatus, $visibility: EventVisibility, $joinOptions: EventJoinOptions, $draft: Boolean, $tags: [String], $picture: MediaInput, $onlineAddress: String, $phoneAddress: String, $category: String, $physicalAddress: AddressInput, $options: EventOptionsInput, $contacts: [Contact]) {\n createEvent(\n organizerActorId: $organizerActorId\n attributedToId: $attributedToId\n title: $title\n description: $description\n beginsOn: $beginsOn\n endsOn: $endsOn\n status: $status\n visibility: $visibility\n joinOptions: $joinOptions\n draft: $draft\n tags: $tags\n picture: $picture\n onlineAddress: $onlineAddress\n phoneAddress: $phoneAddress\n category: $category\n physicalAddress: $physicalAddress\n options: $options\n contacts: $contacts\n ) {\n ...FullEvent\n __typename\n }\n}\n\nfragment FullEvent on Event {\n id\n uuid\n url\n local\n title\n description\n beginsOn\n endsOn\n status\n visibility\n joinOptions\n draft\n picture {\n id\n url\n name\n metadata {\n width\n height\n blurhash\n __typename\n }\n __typename\n }\n publishAt\n onlineAddress\n phoneAddress\n physicalAddress {\n ...AdressFragment\n __typename\n }\n organizerActor {\n avatar {\n id\n url\n __typename\n }\n preferredUsername\n domain\n name\n url\n id\n summary\n __typename\n }\n contacts {\n avatar {\n id\n url\n __typename\n }\n preferredUsername\n name\n summary\n domain\n url\n id\n __typename\n }\n attributedTo {\n avatar {\n id\n url\n __typename\n }\n preferredUsername\n name\n summary\n domain\n url\n id\n __typename\n }\n participantStats {\n going\n notApproved\n participant\n __typename\n }\n tags {\n ...TagFragment\n __typename\n }\n relatedEvents {\n id\n uuid\n title\n beginsOn\n picture {\n id\n url\n name\n metadata {\n width\n height\n blurhash\n __typename\n }\n __typename\n }\n physicalAddress {\n id\n description\n __typename\n }\n organizerActor {\n id\n avatar {\n id\n url\n __typename\n }\n preferredUsername\n domain\n name\n __typename\n }\n __typename\n }\n options {\n ...EventOptions\n __typename\n }\n metadata {\n key\n title\n value\n type\n __typename\n }\n __typename\n}\n\nfragment AdressFragment on Address {\n id\n description\n geom\n street\n locality\n postalCode\n region\n country\n type\n url\n originId\n __typename\n}\n\nfragment TagFragment on Tag {\n id\n slug\n title\n __typename\n}\n\nfragment EventOptions on EventOptions {\n maximumAttendeeCapacity\n remainingAttendeeCapacity\n showRemainingAttendeeCapacity\n anonymousParticipation\n showStartTime\n showEndTime\n offers {\n price\n priceCurrency\n url\n __typename\n }\n participationConditions {\n title\n content\n url\n __typename\n }\n attendees\n program\n commentModeration\n showParticipationPrice\n hideOrganizerWhenGroupEvent\n __typename\n}\n",
query: createEventQueryMobilizon,
variables: {
attributedToId: null,
beginsOn: event.date,
contacts: [],
description: "<p>"+event.content+"</p>",
description: "<p>" + event.content + "</p>",
draft: false,
endsOn: event.date,
joinOptions: "FREE",