|
|
|
@ -1,25 +1,10 @@
|
|
|
|
|
const puppeteer = require('puppeteer'); |
|
|
|
|
import * as fs from 'fs'; |
|
|
|
|
import parserConfig from "../config"; |
|
|
|
|
|
|
|
|
|
// configure database
|
|
|
|
|
|
|
|
|
|
// get data from webpage
|
|
|
|
|
function saveScrappeddata(data: any) { |
|
|
|
|
let fileName = 'ccpl_scrapped.json'; |
|
|
|
|
|
|
|
|
|
fs.writeFile( |
|
|
|
|
`./sources_examples/${fileName}`, |
|
|
|
|
JSON.stringify(data, null, 4), |
|
|
|
|
"utf8", |
|
|
|
|
(err: any) => { |
|
|
|
|
if (err) { |
|
|
|
|
console.log(`Error writing file: ${err}`); |
|
|
|
|
} else { |
|
|
|
|
console.log(`File ${fileName} is written successfully!`); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
let options: any = {}; |
|
|
|
|
let scrappedData: any = { |
|
|
|
@ -71,7 +56,7 @@ async function run() {
|
|
|
|
|
const scrapped: any = { |
|
|
|
|
selector: sel, |
|
|
|
|
titleList, |
|
|
|
|
hrefsDetails, |
|
|
|
|
hrefsDetails, |
|
|
|
|
listOfElementsLinks |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
@ -80,24 +65,42 @@ async function run() {
|
|
|
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let ii=0; |
|
|
|
|
dataRun.listOfElementsLinks.forEach((url: string) => { |
|
|
|
|
// add delay
|
|
|
|
|
console.log('url', url); |
|
|
|
|
// if(ii<3){
|
|
|
|
|
// getEventPageInfo(url)
|
|
|
|
|
//
|
|
|
|
|
// }
|
|
|
|
|
ii++; |
|
|
|
|
}) |
|
|
|
|
let ii = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dataRun.listOfElementsLinks.forEach((url: string) => { |
|
|
|
|
// add delay
|
|
|
|
|
console.log('url', url); |
|
|
|
|
if (ii < 3) { |
|
|
|
|
let eventInfo = getEventPageInfo(url) |
|
|
|
|
scrappedData.pages.push(eventInfo); |
|
|
|
|
} |
|
|
|
|
ii++; |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// browser.close();
|
|
|
|
|
console.log('DONE'); |
|
|
|
|
saveScrappeddata({scrappedData, ...dataRun}); |
|
|
|
|
let data = {scrappedData, ...dataRun}; |
|
|
|
|
let fileName = 'ccpl_scrapped.json'; |
|
|
|
|
|
|
|
|
|
fs.writeFile( |
|
|
|
|
`./sources_examples/${fileName}`, |
|
|
|
|
JSON.stringify(data, null, 4), |
|
|
|
|
"utf8", |
|
|
|
|
(err: any) => { |
|
|
|
|
if (err) { |
|
|
|
|
console.log(`Error writing file: ${err}`); |
|
|
|
|
} else { |
|
|
|
|
console.log(`File ${fileName} is written successfully!`); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
browser.close(); |
|
|
|
|
return dataRun; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
async function getEventPageInfo(url: string) { |
|
|
|
|
async function getEventPageInfo(url: string):Promise<any> { |
|
|
|
|
const browser = await puppeteer.launch(options); |
|
|
|
|
const page = await browser.newPage(); |
|
|
|
|
await page.goto(url); |
|
|
|
|