gather links

This commit is contained in:
Tykayn 2022-01-12 00:29:01 +01:00 committed by tykayn
parent 04f561a9d3
commit 4faa6ef364
2 changed files with 73 additions and 16 deletions

View File

@ -29,7 +29,11 @@ let scrappedData: any = {
}; };
options = {headless: false, devtools: true} options = {headless: false, devtools: true}
/**
* fetch CCPL agenda
* find all links to events details
* scrap data on each event page
*/
async function run() { async function run() {
const browser = await puppeteer.launch(options); const browser = await puppeteer.launch(options);
@ -40,26 +44,35 @@ async function run() {
// titre: #widgit_event_details .widgit_title // titre: #widgit_event_details .widgit_title
let scrappedData = await page.evaluate(() => { let dataRun = await page.evaluate(() => {
let sel = '#widgit_results_agenda .widgit_result .title'; let sel = '#widgit_results_agenda .widgit_result .title';
let listOfElements: any = document.querySelectorAll(sel); let listOfElements: any = document.querySelectorAll('#widgit_results_agenda .widgit_result .title');
let linkTitleEvent: any = []; let listOfElementsLinks: any = document.querySelectorAll('#widgit_results_agenda a');
let hrefsDetails: any = [];
let titleList: any = []; let titleList: any = [];
if (listOfElementsLinks.length) {
listOfElementsLinks.forEach((elem: any) => {
hrefsDetails.push(elem.getAttribute('data-w-href'));
})
}
if (listOfElements.length) { if (listOfElements.length) {
listOfElements.forEach((elem: any) => { listOfElements.forEach((elem: any) => {
console.log('title', elem.innerHTML); console.log('title', elem.innerHTML);
titleList.push(elem.innerHTML); titleList.push(elem.innerHTML);
linkTitleEvent.push(elem.attribute['href']);
}) })
} }
const scrapped: any = { const scrapped: any = {
selector: sel, selector: sel,
titleList, titleList,
linkTitleEvent hrefsDetails,
listOfElementsLinks
}; };
console.log('titleList', titleList); console.log('titleList', titleList);
@ -67,13 +80,21 @@ async function run() {
}); });
// scrappedData.linkTitleEvent.forEach((url: string) => { let ii=0;
// // add delay dataRun.listOfElementsLinks.forEach((url: string) => {
// getEventPageInfo(url) // add delay
// }) console.log('url', url);
// if(ii<3){
// getEventPageInfo(url)
//
// }
ii++;
})
browser.close(); // browser.close();
console.log('DONE'); console.log('DONE');
saveScrappeddata({scrappedData, ...dataRun});
return dataRun;
} }
async function getEventPageInfo(url: string) { async function getEventPageInfo(url: string) {
@ -117,7 +138,7 @@ async function getEventPageInfo(url: string) {
} }
run(); scrappedData = run();
async function getOnePage() { async function getOnePage() {
@ -130,4 +151,4 @@ async function getOnePage() {
} }
// getOnePage() // getOnePage()
saveScrappeddata(scrappedData); // saveScrappeddata(scrappedData);

View File

@ -1,5 +1,41 @@
{ {
"pages": [], "scrappedData": {},
"titleList": null, "selector": "#widgit_results_agenda .widgit_result .title",
"linkTitleEvent": null "titleList": [
"Soirée jeux",
"Conférence Patrimoine : les cloches de l'église des Molières",
"Contes en famille",
"Les Hivernales, spectacle Hip Hop",
"Théâtre - À cause des garçons",
"Soirée jeux",
"Soirée Jazz Blues Latino",
"Soirée jeux",
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
"Soirée jeux",
"Fête de la création et des métiers d'art",
"Fête de la création et des métiers d'art",
"Fête de la création et des métiers d'art",
"Soirée jeux"
],
"hrefsDetails": [
null,
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997492",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6002033",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5995489",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6002454",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6001129",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388"
]
} }