Tykayn 4 months ago committed by tykayn
parent
commit
7c5e018a3c
  1. 2
      index.ts
  2. 63
      scrapers/ccpl.ts

2
index.ts

@ -71,7 +71,7 @@ async function init() {
console.log(
'⌛ trouver les évènements qui ne font pas partie de la BDD mobilizon postgresql',
);
feed.items.forEach((eventFound) => {
feed.items.forEach((eventFound:any) => {
utilsTools.agendadulibre.doesEventExists(eventFound);
});
}

63
scrapers/ccpl.ts

@ -1,25 +1,10 @@
const puppeteer = require('puppeteer');
import * as fs from 'fs';
import parserConfig from "../config";
// configure database
// get data from webpage
function saveScrappeddata(data: any) {
let fileName = 'ccpl_scrapped.json';
fs.writeFile(
`./sources_examples/${fileName}`,
JSON.stringify(data, null, 4),
"utf8",
(err: any) => {
if (err) {
console.log(`Error writing file: ${err}`);
} else {
console.log(`File ${fileName} is written successfully!`);
}
}
);
}
let options: any = {};
let scrappedData: any = {
@ -71,7 +56,7 @@ async function run() {
const scrapped: any = {
selector: sel,
titleList,
hrefsDetails,
hrefsDetails,
listOfElementsLinks
};
@ -80,24 +65,42 @@ async function run() {
});
let ii=0;
dataRun.listOfElementsLinks.forEach((url: string) => {
// add delay
console.log('url', url);
// if(ii<3){
// getEventPageInfo(url)
//
// }
ii++;
})
let ii = 0;
dataRun.listOfElementsLinks.forEach((url: string) => {
// add delay
console.log('url', url);
if (ii < 3) {
let eventInfo = getEventPageInfo(url)
scrappedData.pages.push(eventInfo);
}
ii++;
})
// browser.close();
console.log('DONE');
saveScrappeddata({scrappedData, ...dataRun});
let data = {scrappedData, ...dataRun};
let fileName = 'ccpl_scrapped.json';
fs.writeFile(
`./sources_examples/${fileName}`,
JSON.stringify(data, null, 4),
"utf8",
(err: any) => {
if (err) {
console.log(`Error writing file: ${err}`);
} else {
console.log(`File ${fileName} is written successfully!`);
}
}
);
browser.close();
return dataRun;
}
async function getEventPageInfo(url: string) {
async function getEventPageInfo(url: string):Promise<any> {
const browser = await puppeteer.launch(options);
const page = await browser.newPage();
await page.goto(url);

Loading…
Cancel
Save