Browse Source

gather links

master
Tykayn 5 months ago committed by tykayn
parent
commit
4faa6ef364
  1. 47
      scrapers/ccpl.ts
  2. 42
      sources_examples/ccpl_scrapped.json

47
scrapers/ccpl.ts

@ -29,7 +29,11 @@ let scrappedData: any = {
};
options = {headless: false, devtools: true}
/**
* fetch CCPL agenda
* find all links to events details
* scrap data on each event page
*/
async function run() {
const browser = await puppeteer.launch(options);
@ -40,26 +44,35 @@ async function run() {
// titre: #widgit_event_details .widgit_title
let scrappedData = await page.evaluate(() => {
let dataRun = await page.evaluate(() => {
let sel = '#widgit_results_agenda .widgit_result .title';
let listOfElements: any = document.querySelectorAll(sel);
let linkTitleEvent: any = [];
let listOfElements: any = document.querySelectorAll('#widgit_results_agenda .widgit_result .title');
let listOfElementsLinks: any = document.querySelectorAll('#widgit_results_agenda a');
let hrefsDetails: any = [];
let titleList: any = [];
if (listOfElementsLinks.length) {
listOfElementsLinks.forEach((elem: any) => {
hrefsDetails.push(elem.getAttribute('data-w-href'));
})
}
if (listOfElements.length) {
listOfElements.forEach((elem: any) => {
console.log('title', elem.innerHTML);
titleList.push(elem.innerHTML);
linkTitleEvent.push(elem.attribute['href']);
})
}
const scrapped: any = {
selector: sel,
titleList,
linkTitleEvent
hrefsDetails,
listOfElementsLinks
};
console.log('titleList', titleList);
@ -67,13 +80,21 @@ async function run() {
});
// scrappedData.linkTitleEvent.forEach((url: string) => {
// // add delay
// getEventPageInfo(url)
// })
let ii=0;
dataRun.listOfElementsLinks.forEach((url: string) => {
// add delay
console.log('url', url);
// if(ii<3){
// getEventPageInfo(url)
//
// }
ii++;
})
browser.close();
// browser.close();
console.log('DONE');
saveScrappeddata({scrappedData, ...dataRun});
return dataRun;
}
async function getEventPageInfo(url: string) {
@ -117,7 +138,7 @@ async function getEventPageInfo(url: string) {
}
run();
scrappedData = run();
async function getOnePage() {
@ -130,4 +151,4 @@ async function getOnePage() {
}
// getOnePage()
saveScrappeddata(scrappedData);
// saveScrappeddata(scrappedData);

42
sources_examples/ccpl_scrapped.json

@ -1,5 +1,41 @@
{
"pages": [],
"titleList": null,
"linkTitleEvent": null
"scrappedData": {},
"selector": "#widgit_results_agenda .widgit_result .title",
"titleList": [
"Soirée jeux",
"Conférence Patrimoine : les cloches de l'église des Molières",
"Contes en famille",
"Les Hivernales, spectacle Hip Hop",
"Théâtre - À cause des garçons",
"Soirée jeux",
"Soirée Jazz Blues Latino",
"Soirée jeux",
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
"Fête des Plantes de Printemps du Domaine de Saint-Jean de Beauregard",
"Soirée jeux",
"Fête de la création et des métiers d'art",
"Fête de la création et des métiers d'art",
"Fête de la création et des métiers d'art",
"Soirée jeux"
],
"hrefsDetails": [
null,
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997492",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6002033",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5995489",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6002454",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=6001129",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4718535",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=4747897",
"https://widgets.apidae-tourisme.com/details/247.js?apidae_id=5997388"
]
}
Loading…
Cancel
Save