limit page

This commit is contained in:
Tykayn 2022-07-11 12:57:18 +02:00 committed by caligulanorris
parent f14cf2c4f2
commit ccaf76f8be
1 changed files with 20 additions and 4 deletions

View File

@ -7,15 +7,18 @@ const cheerio = require('cheerio');
const pretty = require('pretty'); const pretty = require('pretty');
const fs = require('fs'); const fs = require('fs');
const limitPagesToFetch = 2
const pageLinkList = []; const pageLinkList = [];
const pagesChatons = []; const pagesChatons = [];
const departments = []; const departments = [];
const softwares = []; const softwaresGeneral = [];
const chatonsCatalog = { const chatonsCatalog = {
linkList: [], linkList: [],
pages : [], pages : [],
departments, departments,
softwares softwaresGeneral
}; };
// récupérer les liens de chaque chaton, exemple // récupérer les liens de chaque chaton, exemple
@ -51,9 +54,10 @@ async function scrapeDataPages() {
console.log('pageLinkList.length', pageLinkList.length); console.log('pageLinkList.length', pageLinkList.length);
let indexPage = 0; let indexPage = 0;
pageLinkList.forEach((urlPage) => { pageLinkList.forEach((urlPage) => {
if (indexPage < 2) { if (indexPage < limitPagesToFetch) {
let shouldWeSave = indexPage === pageLinkList.length - 1; let shouldWeSave = (indexPage === limitPagesToFetch || indexPage === pageLinkList.length - 1);
scrapeDataCatalogcontent(urlPage, shouldWeSave); scrapeDataCatalogcontent(urlPage, shouldWeSave);
@ -103,6 +107,13 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) {
link: el ? 'https://www.chatons.org' + el?.attribs['href']:"", link: el ? 'https://www.chatons.org' + el?.attribs['href']:"",
}; };
if(!softwaresGeneral[$(el).text().split(' - ')[1]]){
softwaresGeneral[$(el).text().split(' - ')[1]].push({
name : $('h2.chatons-public-subtitle').eq(0).text().trim(),
url : $('.field--name-field-website-url .field__item')?.text().trim(),
})
}
console.log('soft', soft); console.log('soft', soft);
softwares.push(soft); softwares.push(soft);
}); });
@ -121,6 +132,11 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) {
softwares, softwares,
}); });
if(!departements[$('.field--name-field-zip-code .field__item')?.text()]){
departements[$('.field--name-field-zip-code .field__item')?.text()] = []
}
departements[$('.field--name-field-zip-code .field__item')?.text()].push($('h2.chatons-public-subtitle').eq(0).text().trim())
if (shouldWeSave) { if (shouldWeSave) {
setTimeout(persistCatalog, 2000); setTimeout(persistCatalog, 2000);