|
|
|
@ -1,25 +1,10 @@
|
|
|
|
|
const puppeteer = require('puppeteer');
|
|
|
|
|
import * as fs from 'fs';
|
|
|
|
|
import parserConfig from "../config";
|
|
|
|
|
|
|
|
|
|
// configure database
|
|
|
|
|
|
|
|
|
|
// get data from webpage
|
|
|
|
|
function saveScrappeddata(data: any) {
|
|
|
|
|
let fileName = 'ccpl_scrapped.json';
|
|
|
|
|
|
|
|
|
|
fs.writeFile(
|
|
|
|
|
`./sources_examples/${fileName}`,
|
|
|
|
|
JSON.stringify(data, null, 4),
|
|
|
|
|
"utf8",
|
|
|
|
|
(err: any) => {
|
|
|
|
|
if (err) {
|
|
|
|
|
console.log(`Error writing file: ${err}`);
|
|
|
|
|
} else {
|
|
|
|
|
console.log(`File ${fileName} is written successfully!`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let options: any = {};
|
|
|
|
|
let scrappedData: any = {
|
|
|
|
@ -71,7 +56,7 @@ async function run() {
|
|
|
|
|
const scrapped: any = {
|
|
|
|
|
selector: sel,
|
|
|
|
|
titleList,
|
|
|
|
|
hrefsDetails,
|
|
|
|
|
hrefsDetails,
|
|
|
|
|
listOfElementsLinks
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
@ -80,24 +65,42 @@ async function run() {
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let ii=0;
|
|
|
|
|
dataRun.listOfElementsLinks.forEach((url: string) => {
|
|
|
|
|
// add delay
|
|
|
|
|
console.log('url', url);
|
|
|
|
|
// if(ii<3){
|
|
|
|
|
// getEventPageInfo(url)
|
|
|
|
|
//
|
|
|
|
|
// }
|
|
|
|
|
ii++;
|
|
|
|
|
})
|
|
|
|
|
let ii = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dataRun.listOfElementsLinks.forEach((url: string) => {
|
|
|
|
|
// add delay
|
|
|
|
|
console.log('url', url);
|
|
|
|
|
if (ii < 3) {
|
|
|
|
|
let eventInfo = getEventPageInfo(url)
|
|
|
|
|
scrappedData.pages.push(eventInfo);
|
|
|
|
|
}
|
|
|
|
|
ii++;
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// browser.close();
|
|
|
|
|
console.log('DONE');
|
|
|
|
|
saveScrappeddata({scrappedData, ...dataRun});
|
|
|
|
|
let data = {scrappedData, ...dataRun};
|
|
|
|
|
let fileName = 'ccpl_scrapped.json';
|
|
|
|
|
|
|
|
|
|
fs.writeFile(
|
|
|
|
|
`./sources_examples/${fileName}`,
|
|
|
|
|
JSON.stringify(data, null, 4),
|
|
|
|
|
"utf8",
|
|
|
|
|
(err: any) => {
|
|
|
|
|
if (err) {
|
|
|
|
|
console.log(`Error writing file: ${err}`);
|
|
|
|
|
} else {
|
|
|
|
|
console.log(`File ${fileName} is written successfully!`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
browser.close();
|
|
|
|
|
return dataRun;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function getEventPageInfo(url: string) {
|
|
|
|
|
async function getEventPageInfo(url: string):Promise<any> {
|
|
|
|
|
const browser = await puppeteer.launch(options);
|
|
|
|
|
const page = await browser.newPage();
|
|
|
|
|
await page.goto(url);
|
|
|
|
|