rss-feeder-mobilizon/scrapers/ccpl.ts

83 lines
2.3 KiB
TypeScript

const puppeteer = require('puppeteer');
import * as fs from 'fs';
// configure database
const mongoose = require('mongoose');
const EventMobilizon = require('./../models/Event');
// get data from webpage
async function run() {
let options = {};
options = {headless: false, devtools: true }
const browser = await puppeteer.launch(options);
const page = await browser.newPage();
await page.goto('https://www.cc-paysdelimours.fr/agenda');
// chaque lien d'évènement est un .widgit_result
// titre: .widgit_result .widgit_title
// titre: #widgit_event_details .widgit_title
await page.evaluate(() => {
const scrapped: any = {};
let sel = '#widgit_results_agenda .widgit_result .title';
let listOfElements: any = document.querySelectorAll(sel);
let titleList: any = [];
if (listOfElements.length) {
listOfElements.forEach((elem: any) => {
console.log('title', elem.innerHTML);
titleList.push(elem.innerHTML);
})
}
scrapped.selector = sel;
scrapped.titleList = titleList;
// sauver les contenus dans un fichier json
let fileName = 'ccpl_scrapped.json';
fs.writeFile(
`./sources_examples/${fileName}`,
JSON.stringify(scrapped, null, 4),
"utf8",
(err: any) => {
if (err) {
console.log(`Error writing file: ${err}`);
} else {
console.log(`File ${fileName} is written successfully!`);
}
}
);
console.log('titleList', titleList);
return titleList;
});
// browser.close();
console.log('DONE');
}
function upsertEventMobilizon(eventObject:any) {
const DB_URL = 'mongodb://localhost/thal';
if (mongoose.connection.readyState == 0) { mongoose.connect(DB_URL); }
// if this email exists, update the entry, don't insert
let conditions = { title: eventObject.title };
let options = { upsert: true, new: true, setDefaultsOnInsert: true };
EventMobilizon.findOneAndUpdate(conditions, eventObject, options, (err:any, result:any) => {
if (err){
console.log('result,err', result,err);
throw err;
}
});
}
run();