From ccaf76f8be9739596f08029917a5cd2d0a11aeed Mon Sep 17 00:00:00 2001 From: Tykayn Date: Mon, 11 Jul 2022 12:57:18 +0200 Subject: [PATCH] limit page --- chatons.js | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/chatons.js b/chatons.js index 4a09a43..f5de9ce 100644 --- a/chatons.js +++ b/chatons.js @@ -7,15 +7,18 @@ const cheerio = require('cheerio'); const pretty = require('pretty'); const fs = require('fs'); + +const limitPagesToFetch = 2 + const pageLinkList = []; const pagesChatons = []; const departments = []; -const softwares = []; +const softwaresGeneral = []; const chatonsCatalog = { linkList: [], pages : [], departments, - softwares + softwaresGeneral }; // récupérer les liens de chaque chaton, exemple @@ -51,9 +54,10 @@ async function scrapeDataPages() { console.log('pageLinkList.length', pageLinkList.length); let indexPage = 0; + pageLinkList.forEach((urlPage) => { - if (indexPage < 2) { - let shouldWeSave = indexPage === pageLinkList.length - 1; + if (indexPage < limitPagesToFetch) { + let shouldWeSave = (indexPage === limitPagesToFetch || indexPage === pageLinkList.length - 1); scrapeDataCatalogcontent(urlPage, shouldWeSave); @@ -103,6 +107,13 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) { link: el ? 'https://www.chatons.org' + el?.attribs['href']:"", }; + if(!softwaresGeneral[$(el).text().split(' - ')[1]]){ + softwaresGeneral[$(el).text().split(' - ')[1]].push({ + name : $('h2.chatons-public-subtitle').eq(0).text().trim(), + url : $('.field--name-field-website-url .field__item')?.text().trim(), + }) + } + console.log('soft', soft); softwares.push(soft); }); @@ -121,6 +132,11 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) { softwares, }); + if(!departements[$('.field--name-field-zip-code .field__item')?.text()]){ + departements[$('.field--name-field-zip-code .field__item')?.text()] = [] + } + departements[$('.field--name-field-zip-code .field__item')?.text()].push($('h2.chatons-public-subtitle').eq(0).text().trim()) + if (shouldWeSave) { setTimeout(persistCatalog, 2000);