diff --git a/index.ts b/index.ts index 46471c0..82a75ae 100644 --- a/index.ts +++ b/index.ts @@ -71,7 +71,7 @@ async function init() { console.log( '⌛ trouver les évènements qui ne font pas partie de la BDD mobilizon postgresql', ); - feed.items.forEach((eventFound) => { + feed.items.forEach((eventFound:any) => { utilsTools.agendadulibre.doesEventExists(eventFound); }); } diff --git a/scrapers/ccpl.ts b/scrapers/ccpl.ts index f6ea8f5..4d44da1 100644 --- a/scrapers/ccpl.ts +++ b/scrapers/ccpl.ts @@ -1,25 +1,10 @@ const puppeteer = require('puppeteer'); import * as fs from 'fs'; +import parserConfig from "../config"; // configure database // get data from webpage -function saveScrappeddata(data: any) { - let fileName = 'ccpl_scrapped.json'; - - fs.writeFile( - `./sources_examples/${fileName}`, - JSON.stringify(data, null, 4), - "utf8", - (err: any) => { - if (err) { - console.log(`Error writing file: ${err}`); - } else { - console.log(`File ${fileName} is written successfully!`); - } - } - ); -} let options: any = {}; let scrappedData: any = { @@ -71,7 +56,7 @@ async function run() { const scrapped: any = { selector: sel, titleList, - hrefsDetails, + hrefsDetails, listOfElementsLinks }; @@ -80,24 +65,42 @@ async function run() { }); - let ii=0; - dataRun.listOfElementsLinks.forEach((url: string) => { - // add delay - console.log('url', url); - // if(ii<3){ - // getEventPageInfo(url) - // - // } - ii++; - }) + let ii = 0; + + + dataRun.listOfElementsLinks.forEach((url: string) => { + // add delay + console.log('url', url); + if (ii < 3) { + let eventInfo = getEventPageInfo(url) + scrappedData.pages.push(eventInfo); + } + ii++; + }) + - // browser.close(); console.log('DONE'); - saveScrappeddata({scrappedData, ...dataRun}); + let data = {scrappedData, ...dataRun}; + let fileName = 'ccpl_scrapped.json'; + + fs.writeFile( + `./sources_examples/${fileName}`, + JSON.stringify(data, null, 4), + "utf8", + (err: any) => { + if (err) { + console.log(`Error writing file: ${err}`); + } else { + console.log(`File ${fileName} is written successfully!`); + } + } + ); + + browser.close(); return dataRun; } -async function getEventPageInfo(url: string) { +async function getEventPageInfo(url: string):Promise { const browser = await puppeteer.launch(options); const page = await browser.newPage(); await page.goto(url);