add software name
This commit is contained in:
parent
d6a41df40c
commit
f14cf2c4f2
29
chatons.js
29
chatons.js
|
@ -9,9 +9,13 @@ const fs = require('fs');
|
||||||
|
|
||||||
const pageLinkList = [];
|
const pageLinkList = [];
|
||||||
const pagesChatons = [];
|
const pagesChatons = [];
|
||||||
|
const departments = [];
|
||||||
|
const softwares = [];
|
||||||
const chatonsCatalog = {
|
const chatonsCatalog = {
|
||||||
linkList: [],
|
linkList: [],
|
||||||
pages : [],
|
pages : [],
|
||||||
|
departments,
|
||||||
|
softwares
|
||||||
};
|
};
|
||||||
|
|
||||||
// récupérer les liens de chaque chaton, exemple
|
// récupérer les liens de chaque chaton, exemple
|
||||||
|
@ -49,13 +53,12 @@ async function scrapeDataPages() {
|
||||||
let indexPage = 0;
|
let indexPage = 0;
|
||||||
pageLinkList.forEach((urlPage) => {
|
pageLinkList.forEach((urlPage) => {
|
||||||
if (indexPage < 2) {
|
if (indexPage < 2) {
|
||||||
let shouldWeSave = indexPage === pageLinkList.length - 1
|
let shouldWeSave = indexPage === pageLinkList.length - 1;
|
||||||
|
|
||||||
scrapeDataCatalogcontent(urlPage, shouldWeSave);
|
scrapeDataCatalogcontent(urlPage, shouldWeSave);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// console.log('indexPage', indexPage, pageLinkList.length)
|
// console.log('indexPage', indexPage, pageLinkList.length)
|
||||||
indexPage++;
|
indexPage++;
|
||||||
});
|
});
|
||||||
|
@ -87,18 +90,22 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) {
|
||||||
// Load HTML we fetched in the previous line
|
// Load HTML we fetched in the previous line
|
||||||
const $ = cheerio.load(data);
|
const $ = cheerio.load(data);
|
||||||
|
|
||||||
|
|
||||||
// logiciels du chatons
|
// logiciels du chatons
|
||||||
|
|
||||||
let softwares =[]
|
let softwares = [];
|
||||||
$('.view-kitten-software ul li a').each((idx, el) => {
|
console.log('$(\'.view-kitten-software ul li\').length', $('.view-kitten-software ul li .field-content a').length);
|
||||||
|
$('.view-kitten-software ul li .field-content a').each((idx, el) => {
|
||||||
// console.log('idx', idx)
|
// console.log('idx', idx)
|
||||||
// console.log('el', el)
|
// console.log('el', el)
|
||||||
softwares.push({
|
let soft = {
|
||||||
name: el.valueOf(),
|
name: el ? $(el).text() : '',
|
||||||
link: el.attribs['href']
|
software_name: el ? $(el).text().split(' - ')[1] : '',
|
||||||
})
|
link: el ? 'https://www.chatons.org' + el?.attribs['href']:"",
|
||||||
})
|
};
|
||||||
|
|
||||||
|
console.log('soft', soft);
|
||||||
|
softwares.push(soft);
|
||||||
|
});
|
||||||
// console.log('org', $('.field--name-field-structure-organization')?.text());
|
// console.log('org', $('.field--name-field-structure-organization')?.text());
|
||||||
pagesChatons.push({
|
pagesChatons.push({
|
||||||
name : $('h2.chatons-public-subtitle').eq(0).text().trim(),
|
name : $('h2.chatons-public-subtitle').eq(0).text().trim(),
|
||||||
|
@ -116,7 +123,7 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) {
|
||||||
|
|
||||||
if (shouldWeSave) {
|
if (shouldWeSave) {
|
||||||
|
|
||||||
setTimeout(persistCatalog,2000)
|
setTimeout(persistCatalog, 2000);
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('e', e);
|
console.error('e', e);
|
||||||
|
|
Loading…
Reference in New Issue