stats departements
This commit is contained in:
parent
ccaf76f8be
commit
15c83d8636
71
chatons.js
71
chatons.js
@ -7,18 +7,17 @@ const cheerio = require('cheerio');
|
|||||||
const pretty = require('pretty');
|
const pretty = require('pretty');
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
|
|
||||||
|
const limitPagesToFetch = 200;
|
||||||
const limitPagesToFetch = 2
|
|
||||||
|
|
||||||
const pageLinkList = [];
|
const pageLinkList = [];
|
||||||
const pagesChatons = [];
|
const pagesChatons = [];
|
||||||
const departments = [];
|
const departements = {};
|
||||||
const softwaresGeneral = [];
|
const softwaresGeneral = {};
|
||||||
const chatonsCatalog = {
|
const chatonsCatalog = {
|
||||||
linkList: [],
|
linkList: [],
|
||||||
pages : [],
|
pages : [],
|
||||||
departments,
|
departements,
|
||||||
softwaresGeneral
|
softwaresGeneral,
|
||||||
};
|
};
|
||||||
|
|
||||||
// récupérer les liens de chaque chaton, exemple
|
// récupérer les liens de chaque chaton, exemple
|
||||||
@ -56,7 +55,7 @@ async function scrapeDataPages() {
|
|||||||
let indexPage = 0;
|
let indexPage = 0;
|
||||||
|
|
||||||
pageLinkList.forEach((urlPage) => {
|
pageLinkList.forEach((urlPage) => {
|
||||||
if (indexPage < limitPagesToFetch) {
|
if (indexPage <= limitPagesToFetch) {
|
||||||
let shouldWeSave = (indexPage === limitPagesToFetch || indexPage === pageLinkList.length - 1);
|
let shouldWeSave = (indexPage === limitPagesToFetch || indexPage === pageLinkList.length - 1);
|
||||||
|
|
||||||
scrapeDataCatalogcontent(urlPage, shouldWeSave);
|
scrapeDataCatalogcontent(urlPage, shouldWeSave);
|
||||||
@ -74,6 +73,7 @@ async function scrapeDataPages() {
|
|||||||
|
|
||||||
function persistCatalog() {
|
function persistCatalog() {
|
||||||
|
|
||||||
|
console.log('saving catalog...');
|
||||||
chatonsCatalog.pages = pagesChatons;
|
chatonsCatalog.pages = pagesChatons;
|
||||||
// console.log('pagesChatons', pagesChatons)
|
// console.log('pagesChatons', pagesChatons)
|
||||||
fs.writeFile('output/chatons_links.json', JSON.stringify(chatonsCatalog, null, 2), (err) => {
|
fs.writeFile('output/chatons_links.json', JSON.stringify(chatonsCatalog, null, 2), (err) => {
|
||||||
@ -97,29 +97,50 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) {
|
|||||||
// logiciels du chatons
|
// logiciels du chatons
|
||||||
|
|
||||||
let softwares = [];
|
let softwares = [];
|
||||||
console.log('$(\'.view-kitten-software ul li\').length', $('.view-kitten-software ul li .field-content a').length);
|
|
||||||
|
let chaton_name = $('h2.chatons-public-subtitle').eq(0).text().trim();
|
||||||
|
|
||||||
|
|
||||||
|
// stats départements
|
||||||
|
let dep = $('.field--name-field-zip-code .field__item')?.text().substring(0, 2);
|
||||||
|
|
||||||
|
if (dep != null && chaton_name && chaton_name.length) {
|
||||||
|
if (!departements[dep]) {
|
||||||
|
console.log('ajout département ',dep)
|
||||||
|
departements[dep] = [];
|
||||||
|
}
|
||||||
|
console.log('zip', dep, chaton_name);
|
||||||
|
departements[dep].push(chaton_name);
|
||||||
|
console.log('departements', departements)
|
||||||
|
}
|
||||||
|
|
||||||
|
// propriétés de la page chaton
|
||||||
$('.view-kitten-software ul li .field-content a').each((idx, el) => {
|
$('.view-kitten-software ul li .field-content a').each((idx, el) => {
|
||||||
// console.log('idx', idx)
|
|
||||||
// console.log('el', el)
|
let soft_name = el ? $(el).text().split(' - ')[1] : '';
|
||||||
let soft = {
|
let soft = {
|
||||||
name: el ? $(el).text() : '',
|
name : el ? $(el).text() : '',
|
||||||
software_name: el ? $(el).text().split(' - ')[1] : '',
|
software_name: soft_name,
|
||||||
link: el ? 'https://www.chatons.org' + el?.attribs['href']:"",
|
link : el ? 'https://www.chatons.org' + el?.attribs['href'] : '',
|
||||||
};
|
};
|
||||||
|
|
||||||
if(!softwaresGeneral[$(el).text().split(' - ')[1]]){
|
|
||||||
softwaresGeneral[$(el).text().split(' - ')[1]].push({
|
|
||||||
name : $('h2.chatons-public-subtitle').eq(0).text().trim(),
|
|
||||||
url : $('.field--name-field-website-url .field__item')?.text().trim(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('soft', soft);
|
|
||||||
softwares.push(soft);
|
softwares.push(soft);
|
||||||
|
if (soft_name) {
|
||||||
|
|
||||||
|
console.log('soft_name', soft_name)
|
||||||
|
if (!softwaresGeneral[soft_name]) {
|
||||||
|
softwaresGeneral[soft_name] = [];
|
||||||
|
|
||||||
|
}
|
||||||
|
softwaresGeneral[soft_name].push({
|
||||||
|
name: chaton_name,
|
||||||
|
url : $('.field--name-field-website-url .field__item')?.text().trim(),
|
||||||
|
});
|
||||||
|
}
|
||||||
});
|
});
|
||||||
// console.log('org', $('.field--name-field-structure-organization')?.text());
|
|
||||||
pagesChatons.push({
|
pagesChatons.push({
|
||||||
name : $('h2.chatons-public-subtitle').eq(0).text().trim(),
|
chatons_url : url_page,
|
||||||
|
name : chaton_name,
|
||||||
url : $('.field--name-field-website-url .field__item')?.text().trim(),
|
url : $('.field--name-field-website-url .field__item')?.text().trim(),
|
||||||
rss : $('.field--name-field-rss-feed .field__item')?.text().trim(),
|
rss : $('.field--name-field-rss-feed .field__item')?.text().trim(),
|
||||||
organization: $('.field--name-field-structure-organization .field__item')?.text(),
|
organization: $('.field--name-field-structure-organization .field__item')?.text(),
|
||||||
@ -132,10 +153,6 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) {
|
|||||||
softwares,
|
softwares,
|
||||||
});
|
});
|
||||||
|
|
||||||
if(!departements[$('.field--name-field-zip-code .field__item')?.text()]){
|
|
||||||
departements[$('.field--name-field-zip-code .field__item')?.text()] = []
|
|
||||||
}
|
|
||||||
departements[$('.field--name-field-zip-code .field__item')?.text()].push($('h2.chatons-public-subtitle').eq(0).text().trim())
|
|
||||||
|
|
||||||
if (shouldWeSave) {
|
if (shouldWeSave) {
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user