From f14cf2c4f28d1c6077f8f9da7d269b25cd52057d Mon Sep 17 00:00:00 2001 From: Tykayn Date: Mon, 11 Jul 2022 12:50:36 +0200 Subject: [PATCH] add software name --- chatons.js | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/chatons.js b/chatons.js index 6e65287..4a09a43 100644 --- a/chatons.js +++ b/chatons.js @@ -9,9 +9,13 @@ const fs = require('fs'); const pageLinkList = []; const pagesChatons = []; +const departments = []; +const softwares = []; const chatonsCatalog = { linkList: [], pages : [], + departments, + softwares }; // récupérer les liens de chaque chaton, exemple @@ -49,13 +53,12 @@ async function scrapeDataPages() { let indexPage = 0; pageLinkList.forEach((urlPage) => { if (indexPage < 2) { - let shouldWeSave = indexPage === pageLinkList.length - 1 + let shouldWeSave = indexPage === pageLinkList.length - 1; scrapeDataCatalogcontent(urlPage, shouldWeSave); } - // console.log('indexPage', indexPage, pageLinkList.length) indexPage++; }); @@ -87,36 +90,40 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) { // Load HTML we fetched in the previous line const $ = cheerio.load(data); - // logiciels du chatons - let softwares =[] - $('.view-kitten-software ul li a').each((idx, el) => { + let softwares = []; + console.log('$(\'.view-kitten-software ul li\').length', $('.view-kitten-software ul li .field-content a').length); + $('.view-kitten-software ul li .field-content a').each((idx, el) => { // console.log('idx', idx) // console.log('el', el) - softwares.push({ - name: el.valueOf(), - link: el.attribs['href'] - }) - }) + let soft = { + name: el ? $(el).text() : '', + software_name: el ? $(el).text().split(' - ')[1] : '', + link: el ? 'https://www.chatons.org' + el?.attribs['href']:"", + }; + + console.log('soft', soft); + softwares.push(soft); + }); // console.log('org', $('.field--name-field-structure-organization')?.text()); pagesChatons.push({ - name: $('h2.chatons-public-subtitle').eq(0).text().trim(), - url : $('.field--name-field-website-url .field__item')?.text().trim(), - rss : $('.field--name-field-rss-feed .field__item')?.text().trim(), - organization : $('.field--name-field-structure-organization .field__item')?.text(), - structure : $('.field--name-field-structure-type .field__item')?.text(), - geo_area : $('.field--name-field-geo-area .field__item')?.text(), - creation : $('.field--name-field-structure-creation .field__item')?.text(), - since : $('.field--name-field-member-since .field__item')?.text(), - zip_code : $('.field--name-field-zip-code .field__item')?.text(), - city : $('.field--name-field-city .field__item')?.text(), + name : $('h2.chatons-public-subtitle').eq(0).text().trim(), + url : $('.field--name-field-website-url .field__item')?.text().trim(), + rss : $('.field--name-field-rss-feed .field__item')?.text().trim(), + organization: $('.field--name-field-structure-organization .field__item')?.text(), + structure : $('.field--name-field-structure-type .field__item')?.text(), + geo_area : $('.field--name-field-geo-area .field__item')?.text(), + creation : $('.field--name-field-structure-creation .field__item')?.text(), + since : $('.field--name-field-member-since .field__item')?.text(), + zip_code : $('.field--name-field-zip-code .field__item')?.text(), + city : $('.field--name-field-city .field__item')?.text(), softwares, }); - if(shouldWeSave){ + if (shouldWeSave) { - setTimeout(persistCatalog,2000) + setTimeout(persistCatalog, 2000); } } catch (e) { console.error('e', e);