gather links
This commit is contained in:
parent
04f561a9d3
commit
4faa6ef364
@ -29,7 +29,11 @@ let scrappedData: any = {
|
|||||||
};
|
};
|
||||||
options = {headless: false, devtools: true}
|
options = {headless: false, devtools: true}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* fetch CCPL agenda
|
||||||
|
* find all links to events details
|
||||||
|
* scrap data on each event page
|
||||||
|
*/
|
||||||
async function run() {
|
async function run() {
|
||||||
|
|
||||||
const browser = await puppeteer.launch(options);
|
const browser = await puppeteer.launch(options);
|
||||||
@ -40,26 +44,35 @@ async function run() {
|
|||||||
// titre: #widgit_event_details .widgit_title
|
// titre: #widgit_event_details .widgit_title
|
||||||
|
|
||||||
|
|
||||||
let scrappedData = await page.evaluate(() => {
|
let dataRun = await page.evaluate(() => {
|
||||||
|
|
||||||
|
|
||||||
let sel = '#widgit_results_agenda .widgit_result .title';
|
let sel = '#widgit_results_agenda .widgit_result .title';
|
||||||
let listOfElements: any = document.querySelectorAll(sel);
|
let listOfElements: any = document.querySelectorAll('#widgit_results_agenda .widgit_result .title');
|
||||||
let linkTitleEvent: any = [];
|
let listOfElementsLinks: any = document.querySelectorAll('#widgit_results_agenda a');
|
||||||
|
|
||||||
|
let hrefsDetails: any = [];
|
||||||
let titleList: any = [];
|
let titleList: any = [];
|
||||||
|
|
||||||
|
|
||||||
|
if (listOfElementsLinks.length) {
|
||||||
|
listOfElementsLinks.forEach((elem: any) => {
|
||||||
|
hrefsDetails.push(elem.getAttribute('data-w-href'));
|
||||||
|
})
|
||||||
|
}
|
||||||
if (listOfElements.length) {
|
if (listOfElements.length) {
|
||||||
listOfElements.forEach((elem: any) => {
|
listOfElements.forEach((elem: any) => {
|
||||||
console.log('title', elem.innerHTML);
|
console.log('title', elem.innerHTML);
|
||||||
titleList.push(elem.innerHTML);
|
titleList.push(elem.innerHTML);
|
||||||
linkTitleEvent.push(elem.attribute['href']);
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const scrapped: any = {
|
const scrapped: any = {
|
||||||
selector: sel,
|
selector: sel,
|
||||||
titleList,
|
titleList,
|
||||||
linkTitleEvent
|
hrefsDetails,
|
||||||
|
listOfElementsLinks
|
||||||
};
|
};
|
||||||
|
|
||||||
console.log('titleList', titleList);
|
console.log('titleList', titleList);
|
||||||
@ -67,13 +80,21 @@ async function run() {
|
|||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
// scrappedData.linkTitleEvent.forEach((url: string) => {
|
let ii=0;
|
||||||
// // add delay
|
dataRun.listOfElementsLinks.forEach((url: string) => {
|
||||||
|
// add delay
|
||||||
|
console.log('url', url);
|
||||||
|
// if(ii<3){
|
||||||
// getEventPageInfo(url)
|
// getEventPageInfo(url)
|
||||||
// })
|
//
|
||||||
|
// }
|
||||||
|
ii++;
|
||||||
|
})
|
||||||
|
|
||||||
browser.close();
|
// browser.close();
|
||||||
console.log('DONE');
|
console.log('DONE');
|
||||||
|
saveScrappeddata({scrappedData, ...dataRun});
|
||||||
|
return dataRun;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getEventPageInfo(url: string) {
|
async function getEventPageInfo(url: string) {
|
||||||
@ -117,7 +138,7 @@ async function getEventPageInfo(url: string) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
run();
|
scrappedData = run();
|
||||||
|
|
||||||
async function getOnePage() {
|
async function getOnePage() {
|
||||||
|
|
||||||
@ -130,4 +151,4 @@ async function getOnePage() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// getOnePage()
|
// getOnePage()
|
||||||
saveScrappeddata(scrappedData);
|
// saveScrappeddata(scrappedData);
|
@ -1,5 +1,41 @@
|
|||||||
{
|
{
|
||||||
"pages": [],
|
"scrappedData": {},
|
||||||
"titleList": null,
|
"selector": "#widgit_results_agenda .widgit_result .title",
|
||||||
"linkTitleEvent": null
|
"titleList": [
|
||||||
|
"Soirée jeux",
|
||||||
|
"Conférence Patrimoine : les cloches de l'église des Molières",
|
||||||
|
"Contes en famille",
|
||||||
|
"Les Hivernales, spectacle Hip Hop",
|
||||||
|
"Théâtre - À cause des garçons",
|
||||||
|
"Soirée jeux",
|
||||||
|
"Soirée Jazz Blues Latino",
|
||||||
|
"Soirée jeux",
|
||||||
|
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
|
||||||
|
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
|
||||||
|
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
|
||||||
|
"Soirée jeux",
|
||||||
|
"Fête de la création et des métiers d'art",
|
||||||
|
"Fête de la création et des métiers d'art",
|
||||||
|
"Fête de la création et des métiers d'art",
|
||||||
|
"Soirée jeux"
|
||||||
|
],
|
||||||
|
"hrefsDetails": [
|
||||||
|
null,
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997492",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6002033",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5995489",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6002454",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6001129",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
|
||||||
|
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388"
|
||||||
|
]
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user