add software name

This commit is contained in:
Tykayn 2022-07-11 12:50:36 +02:00 committed by caligulanorris
parent d6a41df40c
commit f14cf2c4f2
1 changed files with 29 additions and 22 deletions

View File

@ -9,9 +9,13 @@ const fs = require('fs');
const pageLinkList = []; const pageLinkList = [];
const pagesChatons = []; const pagesChatons = [];
const departments = [];
const softwares = [];
const chatonsCatalog = { const chatonsCatalog = {
linkList: [], linkList: [],
pages : [], pages : [],
departments,
softwares
}; };
// récupérer les liens de chaque chaton, exemple // récupérer les liens de chaque chaton, exemple
@ -49,13 +53,12 @@ async function scrapeDataPages() {
let indexPage = 0; let indexPage = 0;
pageLinkList.forEach((urlPage) => { pageLinkList.forEach((urlPage) => {
if (indexPage < 2) { if (indexPage < 2) {
let shouldWeSave = indexPage === pageLinkList.length - 1 let shouldWeSave = indexPage === pageLinkList.length - 1;
scrapeDataCatalogcontent(urlPage, shouldWeSave); scrapeDataCatalogcontent(urlPage, shouldWeSave);
} }
// console.log('indexPage', indexPage, pageLinkList.length) // console.log('indexPage', indexPage, pageLinkList.length)
indexPage++; indexPage++;
}); });
@ -87,36 +90,40 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) {
// Load HTML we fetched in the previous line // Load HTML we fetched in the previous line
const $ = cheerio.load(data); const $ = cheerio.load(data);
// logiciels du chatons // logiciels du chatons
let softwares =[] let softwares = [];
$('.view-kitten-software ul li a').each((idx, el) => { console.log('$(\'.view-kitten-software ul li\').length', $('.view-kitten-software ul li .field-content a').length);
$('.view-kitten-software ul li .field-content a').each((idx, el) => {
// console.log('idx', idx) // console.log('idx', idx)
// console.log('el', el) // console.log('el', el)
softwares.push({ let soft = {
name: el.valueOf(), name: el ? $(el).text() : '',
link: el.attribs['href'] software_name: el ? $(el).text().split(' - ')[1] : '',
}) link: el ? 'https://www.chatons.org' + el?.attribs['href']:"",
}) };
console.log('soft', soft);
softwares.push(soft);
});
// console.log('org', $('.field--name-field-structure-organization')?.text()); // console.log('org', $('.field--name-field-structure-organization')?.text());
pagesChatons.push({ pagesChatons.push({
name: $('h2.chatons-public-subtitle').eq(0).text().trim(), name : $('h2.chatons-public-subtitle').eq(0).text().trim(),
url : $('.field--name-field-website-url .field__item')?.text().trim(), url : $('.field--name-field-website-url .field__item')?.text().trim(),
rss : $('.field--name-field-rss-feed .field__item')?.text().trim(), rss : $('.field--name-field-rss-feed .field__item')?.text().trim(),
organization : $('.field--name-field-structure-organization .field__item')?.text(), organization: $('.field--name-field-structure-organization .field__item')?.text(),
structure : $('.field--name-field-structure-type .field__item')?.text(), structure : $('.field--name-field-structure-type .field__item')?.text(),
geo_area : $('.field--name-field-geo-area .field__item')?.text(), geo_area : $('.field--name-field-geo-area .field__item')?.text(),
creation : $('.field--name-field-structure-creation .field__item')?.text(), creation : $('.field--name-field-structure-creation .field__item')?.text(),
since : $('.field--name-field-member-since .field__item')?.text(), since : $('.field--name-field-member-since .field__item')?.text(),
zip_code : $('.field--name-field-zip-code .field__item')?.text(), zip_code : $('.field--name-field-zip-code .field__item')?.text(),
city : $('.field--name-field-city .field__item')?.text(), city : $('.field--name-field-city .field__item')?.text(),
softwares, softwares,
}); });
if(shouldWeSave){ if (shouldWeSave) {
setTimeout(persistCatalog,2000) setTimeout(persistCatalog, 2000);
} }
} catch (e) { } catch (e) {
console.error('e', e); console.error('e', e);