gather links
This commit is contained in:
parent
04f561a9d3
commit
4faa6ef364
@ -29,7 +29,11 @@ let scrappedData: any = {
|
||||
};
|
||||
options = {headless: false, devtools: true}
|
||||
|
||||
|
||||
/**
|
||||
* fetch CCPL agenda
|
||||
* find all links to events details
|
||||
* scrap data on each event page
|
||||
*/
|
||||
async function run() {
|
||||
|
||||
const browser = await puppeteer.launch(options);
|
||||
@ -40,26 +44,35 @@ async function run() {
|
||||
// titre: #widgit_event_details .widgit_title
|
||||
|
||||
|
||||
let scrappedData = await page.evaluate(() => {
|
||||
let dataRun = await page.evaluate(() => {
|
||||
|
||||
|
||||
let sel = '#widgit_results_agenda .widgit_result .title';
|
||||
let listOfElements: any = document.querySelectorAll(sel);
|
||||
let linkTitleEvent: any = [];
|
||||
let listOfElements: any = document.querySelectorAll('#widgit_results_agenda .widgit_result .title');
|
||||
let listOfElementsLinks: any = document.querySelectorAll('#widgit_results_agenda a');
|
||||
|
||||
let hrefsDetails: any = [];
|
||||
let titleList: any = [];
|
||||
|
||||
|
||||
if (listOfElementsLinks.length) {
|
||||
listOfElementsLinks.forEach((elem: any) => {
|
||||
hrefsDetails.push(elem.getAttribute('data-w-href'));
|
||||
})
|
||||
}
|
||||
if (listOfElements.length) {
|
||||
listOfElements.forEach((elem: any) => {
|
||||
console.log('title', elem.innerHTML);
|
||||
titleList.push(elem.innerHTML);
|
||||
linkTitleEvent.push(elem.attribute['href']);
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
const scrapped: any = {
|
||||
selector: sel,
|
||||
titleList,
|
||||
linkTitleEvent
|
||||
hrefsDetails,
|
||||
listOfElementsLinks
|
||||
};
|
||||
|
||||
console.log('titleList', titleList);
|
||||
@ -67,13 +80,21 @@ async function run() {
|
||||
});
|
||||
|
||||
|
||||
// scrappedData.linkTitleEvent.forEach((url: string) => {
|
||||
// // add delay
|
||||
let ii=0;
|
||||
dataRun.listOfElementsLinks.forEach((url: string) => {
|
||||
// add delay
|
||||
console.log('url', url);
|
||||
// if(ii<3){
|
||||
// getEventPageInfo(url)
|
||||
// })
|
||||
//
|
||||
// }
|
||||
ii++;
|
||||
})
|
||||
|
||||
browser.close();
|
||||
// browser.close();
|
||||
console.log('DONE');
|
||||
saveScrappeddata({scrappedData, ...dataRun});
|
||||
return dataRun;
|
||||
}
|
||||
|
||||
async function getEventPageInfo(url: string) {
|
||||
@ -117,7 +138,7 @@ async function getEventPageInfo(url: string) {
|
||||
|
||||
}
|
||||
|
||||
run();
|
||||
scrappedData = run();
|
||||
|
||||
async function getOnePage() {
|
||||
|
||||
@ -130,4 +151,4 @@ async function getOnePage() {
|
||||
}
|
||||
|
||||
// getOnePage()
|
||||
saveScrappeddata(scrappedData);
|
||||
// saveScrappeddata(scrappedData);
|
@ -1,5 +1,41 @@
|
||||
{
|
||||
"pages": [],
|
||||
"titleList": null,
|
||||
"linkTitleEvent": null
|
||||
"scrappedData": {},
|
||||
"selector": "#widgit_results_agenda .widgit_result .title",
|
||||
"titleList": [
|
||||
"Soirée jeux",
|
||||
"Conférence Patrimoine : les cloches de l'église des Molières",
|
||||
"Contes en famille",
|
||||
"Les Hivernales, spectacle Hip Hop",
|
||||
"Théâtre - À cause des garçons",
|
||||
"Soirée jeux",
|
||||
"Soirée Jazz Blues Latino",
|
||||
"Soirée jeux",
|
||||
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
|
||||
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
|
||||
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
|
||||
"Soirée jeux",
|
||||
"Fête de la création et des métiers d'art",
|
||||
"Fête de la création et des métiers d'art",
|
||||
"Fête de la création et des métiers d'art",
|
||||
"Soirée jeux"
|
||||
],
|
||||
"hrefsDetails": [
|
||||
null,
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997492",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6002033",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5995489",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6002454",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6001129",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
|
||||
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388"
|
||||
]
|
||||
}
|
Loading…
Reference in New Issue
Block a user