This commit is contained in:
Tykayn 2022-01-13 09:35:57 +01:00 committed by tykayn
parent 4faa6ef364
commit 7c5e018a3c
2 changed files with 34 additions and 31 deletions

View File

@ -71,7 +71,7 @@ async function init() {
console.log( console.log(
'⌛ trouver les évènements qui ne font pas partie de la BDD mobilizon postgresql', '⌛ trouver les évènements qui ne font pas partie de la BDD mobilizon postgresql',
); );
feed.items.forEach((eventFound) => { feed.items.forEach((eventFound:any) => {
utilsTools.agendadulibre.doesEventExists(eventFound); utilsTools.agendadulibre.doesEventExists(eventFound);
}); });
} }

View File

@ -1,25 +1,10 @@
const puppeteer = require('puppeteer'); const puppeteer = require('puppeteer');
import * as fs from 'fs'; import * as fs from 'fs';
import parserConfig from "../config";
// configure database // configure database
// get data from webpage // get data from webpage
function saveScrappeddata(data: any) {
let fileName = 'ccpl_scrapped.json';
fs.writeFile(
`./sources_examples/${fileName}`,
JSON.stringify(data, null, 4),
"utf8",
(err: any) => {
if (err) {
console.log(`Error writing file: ${err}`);
} else {
console.log(`File ${fileName} is written successfully!`);
}
}
);
}
let options: any = {}; let options: any = {};
let scrappedData: any = { let scrappedData: any = {
@ -71,7 +56,7 @@ async function run() {
const scrapped: any = { const scrapped: any = {
selector: sel, selector: sel,
titleList, titleList,
hrefsDetails, hrefsDetails,
listOfElementsLinks listOfElementsLinks
}; };
@ -80,24 +65,42 @@ async function run() {
}); });
let ii=0; let ii = 0;
dataRun.listOfElementsLinks.forEach((url: string) => {
// add delay
console.log('url', url); dataRun.listOfElementsLinks.forEach((url: string) => {
// if(ii<3){ // add delay
// getEventPageInfo(url) console.log('url', url);
// if (ii < 3) {
// } let eventInfo = getEventPageInfo(url)
ii++; scrappedData.pages.push(eventInfo);
}) }
ii++;
})
// browser.close();
console.log('DONE'); console.log('DONE');
saveScrappeddata({scrappedData, ...dataRun}); let data = {scrappedData, ...dataRun};
let fileName = 'ccpl_scrapped.json';
fs.writeFile(
`./sources_examples/${fileName}`,
JSON.stringify(data, null, 4),
"utf8",
(err: any) => {
if (err) {
console.log(`Error writing file: ${err}`);
} else {
console.log(`File ${fileName} is written successfully!`);
}
}
);
browser.close();
return dataRun; return dataRun;
} }
async function getEventPageInfo(url: string) { async function getEventPageInfo(url: string):Promise<any> {
const browser = await puppeteer.launch(options); const browser = await puppeteer.launch(options);
const page = await browser.newPage(); const page = await browser.newPage();
await page.goto(url); await page.goto(url);