limit page
This commit is contained in:
parent
f14cf2c4f2
commit
ccaf76f8be
24
chatons.js
24
chatons.js
@ -7,15 +7,18 @@ const cheerio = require('cheerio');
|
|||||||
const pretty = require('pretty');
|
const pretty = require('pretty');
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
|
|
||||||
|
|
||||||
|
const limitPagesToFetch = 2
|
||||||
|
|
||||||
const pageLinkList = [];
|
const pageLinkList = [];
|
||||||
const pagesChatons = [];
|
const pagesChatons = [];
|
||||||
const departments = [];
|
const departments = [];
|
||||||
const softwares = [];
|
const softwaresGeneral = [];
|
||||||
const chatonsCatalog = {
|
const chatonsCatalog = {
|
||||||
linkList: [],
|
linkList: [],
|
||||||
pages : [],
|
pages : [],
|
||||||
departments,
|
departments,
|
||||||
softwares
|
softwaresGeneral
|
||||||
};
|
};
|
||||||
|
|
||||||
// récupérer les liens de chaque chaton, exemple
|
// récupérer les liens de chaque chaton, exemple
|
||||||
@ -51,9 +54,10 @@ async function scrapeDataPages() {
|
|||||||
|
|
||||||
console.log('pageLinkList.length', pageLinkList.length);
|
console.log('pageLinkList.length', pageLinkList.length);
|
||||||
let indexPage = 0;
|
let indexPage = 0;
|
||||||
|
|
||||||
pageLinkList.forEach((urlPage) => {
|
pageLinkList.forEach((urlPage) => {
|
||||||
if (indexPage < 2) {
|
if (indexPage < limitPagesToFetch) {
|
||||||
let shouldWeSave = indexPage === pageLinkList.length - 1;
|
let shouldWeSave = (indexPage === limitPagesToFetch || indexPage === pageLinkList.length - 1);
|
||||||
|
|
||||||
scrapeDataCatalogcontent(urlPage, shouldWeSave);
|
scrapeDataCatalogcontent(urlPage, shouldWeSave);
|
||||||
|
|
||||||
@ -103,6 +107,13 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) {
|
|||||||
link: el ? 'https://www.chatons.org' + el?.attribs['href']:"",
|
link: el ? 'https://www.chatons.org' + el?.attribs['href']:"",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if(!softwaresGeneral[$(el).text().split(' - ')[1]]){
|
||||||
|
softwaresGeneral[$(el).text().split(' - ')[1]].push({
|
||||||
|
name : $('h2.chatons-public-subtitle').eq(0).text().trim(),
|
||||||
|
url : $('.field--name-field-website-url .field__item')?.text().trim(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
console.log('soft', soft);
|
console.log('soft', soft);
|
||||||
softwares.push(soft);
|
softwares.push(soft);
|
||||||
});
|
});
|
||||||
@ -121,6 +132,11 @@ async function scrapeDataCatalogcontent(url_page, shouldWeSave) {
|
|||||||
softwares,
|
softwares,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if(!departements[$('.field--name-field-zip-code .field__item')?.text()]){
|
||||||
|
departements[$('.field--name-field-zip-code .field__item')?.text()] = []
|
||||||
|
}
|
||||||
|
departements[$('.field--name-field-zip-code .field__item')?.text()].push($('h2.chatons-public-subtitle').eq(0).text().trim())
|
||||||
|
|
||||||
if (shouldWeSave) {
|
if (shouldWeSave) {
|
||||||
|
|
||||||
setTimeout(persistCatalog, 2000);
|
setTimeout(persistCatalog, 2000);
|
||||||
|
Loading…
Reference in New Issue
Block a user