// récupérer des infos sur tous les chatons de chatons.org // https://www.chatons.org/chatons/all const url = 'https://www.chatons.org/chatons/all'; const axios = require('axios'); const cheerio = require('cheerio'); const pretty = require('pretty'); const fs = require('fs'); const pageLinkList = []; const chatonsCatalog = { linkList: [], pages : [], }; // récupérer les liens de chaque chaton, exemple // https://www.chatons.org/chatons/bastet-parinux // // // infos dans la div: // .chatons-public-column // .chatons-public-subtitle nom // Async function which scrapes the data async function scrapeDataPages() { try { // Fetch HTML of the page we want to scrape const { data } = await axios.get(url); // Load HTML we fetched in the previous line const $ = cheerio.load(data); const listItems = $('.view-content a '); listItems.each((index, element) => { // console.log('element', element) if (element.attribs && element.attribs['hreflang']) { let url = 'https://www.chatons.org' + element.attribs['href']; console.log('url', url); console.log('element.getAttribute(\'href\')', url); pageLinkList.push(url); } }); chatonsCatalog.linkList = pageLinkList; console.log('pageLinkList.length', pageLinkList.length) pageLinkList.forEach((indexPage, urlPage) => { if (indexPage === 0) { scrapeDataCatalogcontent(urlPage); } if (indexPage === pageLinkList.length - 1){ persistCatalog(); } console.log('indexPage', indexPage, pageLinkList.length) }); } catch (e) { console.error('e', e); } } function persistCatalog() { fs.writeFile('output/chatons_links.json', JSON.stringify(chatonsCatalog, null, 2), (err) => { if (err) { console.error(err); return; } console.log('Successfully written data to file chatons_links.json'); }); } async function scrapeDataCatalogcontent(url_page) { try { // Fetch HTML of the page we want to scrape const { data } = await axios.get(url_page); console.log('fetching url_page', url_page); // Load HTML we fetched in the previous line const $ = cheerio.load(data); chatonsCatalog.pages.push({ name: $('.chatons-public-subtitle').eq(0).innerText, url : $('.field--name-field-website-url').eq(0)?.innerText, rss : $('.field--name-field-rss-feed').eq(0)?.innerText, }); } catch (e) { console.error('e', e); } } // rundown all the pages scrapeDataPages();