This commit is contained in:
Tykayn 2022-01-13 09:35:57 +01:00 committed by tykayn
parent 4faa6ef364
commit 7c5e018a3c
2 changed files with 34 additions and 31 deletions

View File

@ -71,7 +71,7 @@ async function init() {
console.log(
'⌛ trouver les évènements qui ne font pas partie de la BDD mobilizon postgresql',
);
feed.items.forEach((eventFound) => {
feed.items.forEach((eventFound:any) => {
utilsTools.agendadulibre.doesEventExists(eventFound);
});
}

View File

@ -1,25 +1,10 @@
const puppeteer = require('puppeteer');
import * as fs from 'fs';
import parserConfig from "../config";
// configure database
// get data from webpage
function saveScrappeddata(data: any) {
let fileName = 'ccpl_scrapped.json';
fs.writeFile(
`./sources_examples/${fileName}`,
JSON.stringify(data, null, 4),
"utf8",
(err: any) => {
if (err) {
console.log(`Error writing file: ${err}`);
} else {
console.log(`File ${fileName} is written successfully!`);
}
}
);
}
let options: any = {};
let scrappedData: any = {
@ -71,7 +56,7 @@ async function run() {
const scrapped: any = {
selector: sel,
titleList,
hrefsDetails,
hrefsDetails,
listOfElementsLinks
};
@ -80,24 +65,42 @@ async function run() {
});
let ii=0;
dataRun.listOfElementsLinks.forEach((url: string) => {
// add delay
console.log('url', url);
// if(ii<3){
// getEventPageInfo(url)
//
// }
ii++;
})
let ii = 0;
dataRun.listOfElementsLinks.forEach((url: string) => {
// add delay
console.log('url', url);
if (ii < 3) {
let eventInfo = getEventPageInfo(url)
scrappedData.pages.push(eventInfo);
}
ii++;
})
// browser.close();
console.log('DONE');
saveScrappeddata({scrappedData, ...dataRun});
let data = {scrappedData, ...dataRun};
let fileName = 'ccpl_scrapped.json';
fs.writeFile(
`./sources_examples/${fileName}`,
JSON.stringify(data, null, 4),
"utf8",
(err: any) => {
if (err) {
console.log(`Error writing file: ${err}`);
} else {
console.log(`File ${fileName} is written successfully!`);
}
}
);
browser.close();
return dataRun;
}
async function getEventPageInfo(url: string) {
async function getEventPageInfo(url: string):Promise<any> {
const browser = await puppeteer.launch(options);
const page = await browser.newPage();
await page.goto(url);