get json from agenda du libre
This commit is contained in:
parent
7c5e018a3c
commit
a297e20777
12
README.md
12
README.md
@ -128,7 +128,17 @@ entêtes:
|
||||
# data scraping
|
||||
Fait avec puppeteer, lancer la commande:
|
||||
`
|
||||
node scrapers/ccpl.js
|
||||
ts-node scrapers/ccpl.js
|
||||
`
|
||||
|
||||
Pour l'agenda du libre:
|
||||
|
||||
`
|
||||
ts-node scrapers/adl.js
|
||||
`
|
||||
on peut aussi filtrer l'agenda par un certain tag en mettant un argument en plus à la commande
|
||||
`
|
||||
ts-node scrapers/adl.js openstreetmap
|
||||
`
|
||||
|
||||
---
|
||||
|
1
output/adl_json.json
Normal file
1
output/adl_json.json
Normal file
File diff suppressed because one or more lines are too long
57
package-lock.json
generated
57
package-lock.json
generated
@ -14,7 +14,7 @@
|
||||
"esm": "^3.2.25",
|
||||
"moment": "^2.29.1",
|
||||
"mongoose": "^6.1.6",
|
||||
"node-fetch": "^2.6.1",
|
||||
"node-fetch": "^2.6.6",
|
||||
"rss-parser": "^3.10.0",
|
||||
"ts-node": "^10.4.0",
|
||||
"typescript": "^4.5.4",
|
||||
@ -22,6 +22,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^17.0.8",
|
||||
"@types/node-fetch": "^3.0.3",
|
||||
"@types/pg": "^8.6.3",
|
||||
"@types/puppeteer": "^5.4.4",
|
||||
"@types/uuid": "^8.3.4",
|
||||
@ -74,6 +75,16 @@
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz",
|
||||
"integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
|
||||
},
|
||||
"node_modules/@types/node-fetch": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz",
|
||||
"integrity": "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g==",
|
||||
"deprecated": "This is a stub types definition. node-fetch provides its own type definitions, so you do not need this installed.",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"node-fetch": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/pg": {
|
||||
"version": "8.6.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz",
|
||||
@ -666,9 +677,9 @@
|
||||
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
|
||||
},
|
||||
"node_modules/node-fetch": {
|
||||
"version": "2.6.5",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
|
||||
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
|
||||
"version": "2.6.6",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz",
|
||||
"integrity": "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA==",
|
||||
"dependencies": {
|
||||
"whatwg-url": "^5.0.0"
|
||||
},
|
||||
@ -940,6 +951,18 @@
|
||||
"node": ">=10.18.1"
|
||||
}
|
||||
},
|
||||
"node_modules/puppeteer/node_modules/node-fetch": {
|
||||
"version": "2.6.5",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
|
||||
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"whatwg-url": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "4.x || >=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/readable-stream": {
|
||||
"version": "3.6.0",
|
||||
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz",
|
||||
@ -1300,6 +1323,15 @@
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz",
|
||||
"integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
|
||||
},
|
||||
"@types/node-fetch": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz",
|
||||
"integrity": "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"node-fetch": "*"
|
||||
}
|
||||
},
|
||||
"@types/pg": {
|
||||
"version": "8.6.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz",
|
||||
@ -1742,9 +1774,9 @@
|
||||
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
|
||||
},
|
||||
"node-fetch": {
|
||||
"version": "2.6.5",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
|
||||
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
|
||||
"version": "2.6.6",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz",
|
||||
"integrity": "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA==",
|
||||
"requires": {
|
||||
"whatwg-url": "^5.0.0"
|
||||
}
|
||||
@ -1949,6 +1981,17 @@
|
||||
"tar-fs": "2.1.1",
|
||||
"unbzip2-stream": "1.4.3",
|
||||
"ws": "8.2.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"node-fetch": {
|
||||
"version": "2.6.5",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
|
||||
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"whatwg-url": "^5.0.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"readable-stream": {
|
||||
|
@ -17,7 +17,7 @@
|
||||
"esm": "^3.2.25",
|
||||
"moment": "^2.29.1",
|
||||
"mongoose": "^6.1.6",
|
||||
"node-fetch": "^2.6.1",
|
||||
"node-fetch": "^2.6.6",
|
||||
"rss-parser": "^3.10.0",
|
||||
"ts-node": "^10.4.0",
|
||||
"typescript": "^4.5.4",
|
||||
@ -25,6 +25,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^17.0.8",
|
||||
"@types/node-fetch": "^3.0.3",
|
||||
"@types/pg": "^8.6.3",
|
||||
"@types/puppeteer": "^5.4.4",
|
||||
"@types/uuid": "^8.3.4",
|
||||
|
42
scrapers/adl.ts
Normal file
42
scrapers/adl.ts
Normal file
@ -0,0 +1,42 @@
|
||||
import * as fs from "fs";
|
||||
// @ts-ignore
|
||||
import fetch from 'node-fetch';
|
||||
|
||||
const puppeteer = require('puppeteer');
|
||||
// get data from webpage
|
||||
|
||||
let options: any = {};
|
||||
let scrappedData: any = {
|
||||
pages: [],
|
||||
titleList: null,
|
||||
linkTitleEvent: null
|
||||
};
|
||||
options = {headless: false, devtools: true}
|
||||
|
||||
async function run() {
|
||||
|
||||
let tag = process.argv[2] ? process.argv[2] : '';
|
||||
|
||||
console.log('argument de tag', process.argv[2]);
|
||||
const response = await fetch('https://www.agendadulibre.org/events.json?city=&near%5Blocation%5D=&near%5Bdistance%5D=®ion=&tag=' + tag);
|
||||
const data = await response.json();
|
||||
console.log('évènements comptés: ', data.length);
|
||||
let fileName = 'adl_json.json';
|
||||
|
||||
|
||||
fs.writeFile(
|
||||
`./output/${fileName}`,
|
||||
JSON.stringify(data),
|
||||
"utf8",
|
||||
(err: any) => {
|
||||
if (err) {
|
||||
console.log(`Error writing file: ${err}`);
|
||||
} else {
|
||||
console.log(`File ${fileName} is written successfully!`);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
run();
|
@ -1,9 +1,6 @@
|
||||
const puppeteer = require('puppeteer');
|
||||
import * as fs from 'fs';
|
||||
import parserConfig from "../config";
|
||||
|
||||
// configure database
|
||||
|
||||
// get data from webpage
|
||||
|
||||
let options: any = {};
|
||||
|
4
utils.ts
4
utils.ts
@ -100,14 +100,14 @@ class utils {
|
||||
/**
|
||||
* file management
|
||||
*/
|
||||
writeFile(fileName: string, data: any, formatData: any){
|
||||
static writeFile(fileName: string, data: any, formatData: any = 'json'){
|
||||
let dataToSave = data;
|
||||
if (formatData == 'json') {
|
||||
dataToSave = JSON.stringify(data, null, 4)
|
||||
}
|
||||
// write file to disk
|
||||
fs.writeFile(
|
||||
`./sources_examples/${fileName}`,
|
||||
`./output/${fileName}`,
|
||||
dataToSave,
|
||||
"utf8",
|
||||
(err: any) => {
|
||||
|
16
yarn.lock
16
yarn.lock
@ -34,6 +34,13 @@
|
||||
"resolved" "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.2.tgz"
|
||||
"version" "1.0.2"
|
||||
|
||||
"@types/node-fetch@^3.0.3":
|
||||
"integrity" "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g=="
|
||||
"resolved" "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz"
|
||||
"version" "3.0.3"
|
||||
dependencies:
|
||||
"node-fetch" "*"
|
||||
|
||||
"@types/node@*", "@types/node@^17.0.8":
|
||||
"integrity" "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
|
||||
"resolved" "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz"
|
||||
@ -402,7 +409,14 @@
|
||||
"resolved" "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz"
|
||||
"version" "2.1.2"
|
||||
|
||||
"node-fetch@^2.6.1", "node-fetch@2.6.5":
|
||||
"node-fetch@*", "node-fetch@^2.6.6":
|
||||
"integrity" "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA=="
|
||||
"resolved" "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz"
|
||||
"version" "2.6.6"
|
||||
dependencies:
|
||||
"whatwg-url" "^5.0.0"
|
||||
|
||||
"node-fetch@2.6.5":
|
||||
"integrity" "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ=="
|
||||
"resolved" "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz"
|
||||
"version" "2.6.5"
|
||||
|
Loading…
Reference in New Issue
Block a user