get json from agenda du libre

This commit is contained in:
Tykayn 2022-01-13 10:04:14 +01:00 committed by tykayn
parent 7c5e018a3c
commit a297e20777
8 changed files with 123 additions and 15 deletions

View File

@ -128,7 +128,17 @@ entêtes:
# data scraping # data scraping
Fait avec puppeteer, lancer la commande: Fait avec puppeteer, lancer la commande:
` `
node scrapers/ccpl.js ts-node scrapers/ccpl.js
`
Pour l'agenda du libre:
`
ts-node scrapers/adl.js
`
on peut aussi filtrer l'agenda par un certain tag en mettant un argument en plus à la commande
`
ts-node scrapers/adl.js openstreetmap
` `
--- ---

1
output/adl_json.json Normal file

File diff suppressed because one or more lines are too long

57
package-lock.json generated
View File

@ -14,7 +14,7 @@
"esm": "^3.2.25", "esm": "^3.2.25",
"moment": "^2.29.1", "moment": "^2.29.1",
"mongoose": "^6.1.6", "mongoose": "^6.1.6",
"node-fetch": "^2.6.1", "node-fetch": "^2.6.6",
"rss-parser": "^3.10.0", "rss-parser": "^3.10.0",
"ts-node": "^10.4.0", "ts-node": "^10.4.0",
"typescript": "^4.5.4", "typescript": "^4.5.4",
@ -22,6 +22,7 @@
}, },
"devDependencies": { "devDependencies": {
"@types/node": "^17.0.8", "@types/node": "^17.0.8",
"@types/node-fetch": "^3.0.3",
"@types/pg": "^8.6.3", "@types/pg": "^8.6.3",
"@types/puppeteer": "^5.4.4", "@types/puppeteer": "^5.4.4",
"@types/uuid": "^8.3.4", "@types/uuid": "^8.3.4",
@ -74,6 +75,16 @@
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz",
"integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg==" "integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
}, },
"node_modules/@types/node-fetch": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz",
"integrity": "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g==",
"deprecated": "This is a stub types definition. node-fetch provides its own type definitions, so you do not need this installed.",
"dev": true,
"dependencies": {
"node-fetch": "*"
}
},
"node_modules/@types/pg": { "node_modules/@types/pg": {
"version": "8.6.3", "version": "8.6.3",
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz", "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz",
@ -666,9 +677,9 @@
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
}, },
"node_modules/node-fetch": { "node_modules/node-fetch": {
"version": "2.6.5", "version": "2.6.6",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz",
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==", "integrity": "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA==",
"dependencies": { "dependencies": {
"whatwg-url": "^5.0.0" "whatwg-url": "^5.0.0"
}, },
@ -940,6 +951,18 @@
"node": ">=10.18.1" "node": ">=10.18.1"
} }
}, },
"node_modules/puppeteer/node_modules/node-fetch": {
"version": "2.6.5",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
"dev": true,
"dependencies": {
"whatwg-url": "^5.0.0"
},
"engines": {
"node": "4.x || >=6.0.0"
}
},
"node_modules/readable-stream": { "node_modules/readable-stream": {
"version": "3.6.0", "version": "3.6.0",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz",
@ -1300,6 +1323,15 @@
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz",
"integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg==" "integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
}, },
"@types/node-fetch": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz",
"integrity": "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g==",
"dev": true,
"requires": {
"node-fetch": "*"
}
},
"@types/pg": { "@types/pg": {
"version": "8.6.3", "version": "8.6.3",
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz", "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz",
@ -1742,9 +1774,9 @@
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
}, },
"node-fetch": { "node-fetch": {
"version": "2.6.5", "version": "2.6.6",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz",
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==", "integrity": "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA==",
"requires": { "requires": {
"whatwg-url": "^5.0.0" "whatwg-url": "^5.0.0"
} }
@ -1949,6 +1981,17 @@
"tar-fs": "2.1.1", "tar-fs": "2.1.1",
"unbzip2-stream": "1.4.3", "unbzip2-stream": "1.4.3",
"ws": "8.2.3" "ws": "8.2.3"
},
"dependencies": {
"node-fetch": {
"version": "2.6.5",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
"dev": true,
"requires": {
"whatwg-url": "^5.0.0"
}
}
} }
}, },
"readable-stream": { "readable-stream": {

View File

@ -17,7 +17,7 @@
"esm": "^3.2.25", "esm": "^3.2.25",
"moment": "^2.29.1", "moment": "^2.29.1",
"mongoose": "^6.1.6", "mongoose": "^6.1.6",
"node-fetch": "^2.6.1", "node-fetch": "^2.6.6",
"rss-parser": "^3.10.0", "rss-parser": "^3.10.0",
"ts-node": "^10.4.0", "ts-node": "^10.4.0",
"typescript": "^4.5.4", "typescript": "^4.5.4",
@ -25,6 +25,7 @@
}, },
"devDependencies": { "devDependencies": {
"@types/node": "^17.0.8", "@types/node": "^17.0.8",
"@types/node-fetch": "^3.0.3",
"@types/pg": "^8.6.3", "@types/pg": "^8.6.3",
"@types/puppeteer": "^5.4.4", "@types/puppeteer": "^5.4.4",
"@types/uuid": "^8.3.4", "@types/uuid": "^8.3.4",

42
scrapers/adl.ts Normal file
View File

@ -0,0 +1,42 @@
import * as fs from "fs";
// @ts-ignore
import fetch from 'node-fetch';
const puppeteer = require('puppeteer');
// get data from webpage
let options: any = {};
let scrappedData: any = {
pages: [],
titleList: null,
linkTitleEvent: null
};
options = {headless: false, devtools: true}
async function run() {
let tag = process.argv[2] ? process.argv[2] : '';
console.log('argument de tag', process.argv[2]);
const response = await fetch('https://www.agendadulibre.org/events.json?city=&near%5Blocation%5D=&near%5Bdistance%5D=&region=&tag=' + tag);
const data = await response.json();
console.log('évènements comptés: ', data.length);
let fileName = 'adl_json.json';
fs.writeFile(
`./output/${fileName}`,
JSON.stringify(data),
"utf8",
(err: any) => {
if (err) {
console.log(`Error writing file: ${err}`);
} else {
console.log(`File ${fileName} is written successfully!`);
}
}
);
}
run();

View File

@ -1,9 +1,6 @@
const puppeteer = require('puppeteer'); const puppeteer = require('puppeteer');
import * as fs from 'fs'; import * as fs from 'fs';
import parserConfig from "../config"; import parserConfig from "../config";
// configure database
// get data from webpage // get data from webpage
let options: any = {}; let options: any = {};

View File

@ -100,14 +100,14 @@ class utils {
/** /**
* file management * file management
*/ */
writeFile(fileName: string, data: any, formatData: any){ static writeFile(fileName: string, data: any, formatData: any = 'json'){
let dataToSave = data; let dataToSave = data;
if (formatData == 'json') { if (formatData == 'json') {
dataToSave = JSON.stringify(data, null, 4) dataToSave = JSON.stringify(data, null, 4)
} }
// write file to disk // write file to disk
fs.writeFile( fs.writeFile(
`./sources_examples/${fileName}`, `./output/${fileName}`,
dataToSave, dataToSave,
"utf8", "utf8",
(err: any) => { (err: any) => {

View File

@ -34,6 +34,13 @@
"resolved" "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.2.tgz" "resolved" "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.2.tgz"
"version" "1.0.2" "version" "1.0.2"
"@types/node-fetch@^3.0.3":
"integrity" "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g=="
"resolved" "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz"
"version" "3.0.3"
dependencies:
"node-fetch" "*"
"@types/node@*", "@types/node@^17.0.8": "@types/node@*", "@types/node@^17.0.8":
"integrity" "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg==" "integrity" "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
"resolved" "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz" "resolved" "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz"
@ -402,7 +409,14 @@
"resolved" "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz" "resolved" "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz"
"version" "2.1.2" "version" "2.1.2"
"node-fetch@^2.6.1", "node-fetch@2.6.5": "node-fetch@*", "node-fetch@^2.6.6":
"integrity" "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA=="
"resolved" "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz"
"version" "2.6.6"
dependencies:
"whatwg-url" "^5.0.0"
"node-fetch@2.6.5":
"integrity" "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==" "integrity" "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ=="
"resolved" "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz" "resolved" "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz"
"version" "2.6.5" "version" "2.6.5"