gather links

This commit is contained in:
Tykayn 2022-01-12 00:29:01 +01:00 committed by tykayn
parent 04f561a9d3
commit 4faa6ef364
2 changed files with 73 additions and 16 deletions

View File

@ -29,7 +29,11 @@ let scrappedData: any = {
};
options = {headless: false, devtools: true}
/**
* fetch CCPL agenda
* find all links to events details
* scrap data on each event page
*/
async function run() {
const browser = await puppeteer.launch(options);
@ -40,26 +44,35 @@ async function run() {
// titre: #widgit_event_details .widgit_title
let scrappedData = await page.evaluate(() => {
let dataRun = await page.evaluate(() => {
let sel = '#widgit_results_agenda .widgit_result .title';
let listOfElements: any = document.querySelectorAll(sel);
let linkTitleEvent: any = [];
let listOfElements: any = document.querySelectorAll('#widgit_results_agenda .widgit_result .title');
let listOfElementsLinks: any = document.querySelectorAll('#widgit_results_agenda a');
let hrefsDetails: any = [];
let titleList: any = [];
if (listOfElementsLinks.length) {
listOfElementsLinks.forEach((elem: any) => {
hrefsDetails.push(elem.getAttribute('data-w-href'));
})
}
if (listOfElements.length) {
listOfElements.forEach((elem: any) => {
console.log('title', elem.innerHTML);
titleList.push(elem.innerHTML);
linkTitleEvent.push(elem.attribute['href']);
})
}
const scrapped: any = {
selector: sel,
titleList,
linkTitleEvent
hrefsDetails,
listOfElementsLinks
};
console.log('titleList', titleList);
@ -67,13 +80,21 @@ async function run() {
});
// scrappedData.linkTitleEvent.forEach((url: string) => {
// // add delay
// getEventPageInfo(url)
// })
let ii=0;
dataRun.listOfElementsLinks.forEach((url: string) => {
// add delay
console.log('url', url);
// if(ii<3){
// getEventPageInfo(url)
//
// }
ii++;
})
browser.close();
// browser.close();
console.log('DONE');
saveScrappeddata({scrappedData, ...dataRun});
return dataRun;
}
async function getEventPageInfo(url: string) {
@ -117,7 +138,7 @@ async function getEventPageInfo(url: string) {
}
run();
scrappedData = run();
async function getOnePage() {
@ -130,4 +151,4 @@ async function getOnePage() {
}
// getOnePage()
saveScrappeddata(scrappedData);
// saveScrappeddata(scrappedData);

View File

@ -1,5 +1,41 @@
{
"pages": [],
"titleList": null,
"linkTitleEvent": null
"scrappedData": {},
"selector": "#widgit_results_agenda .widgit_result .title",
"titleList": [
"Soirée jeux",
"Conférence Patrimoine : les cloches de l'église des Molières",
"Contes en famille",
"Les Hivernales, spectacle Hip Hop",
"Théâtre - À cause des garçons",
"Soirée jeux",
"Soirée Jazz Blues Latino",
"Soirée jeux",
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
"Soirée jeux",
"Fête de la création et des métiers d'art",
"Fête de la création et des métiers d'art",
"Fête de la création et des métiers d'art",
"Soirée jeux"
],
"hrefsDetails": [
null,
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997492",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6002033",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5995489",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6002454",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6001129",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388"
]
}