get json from agenda du libre
This commit is contained in:
parent
7c5e018a3c
commit
a297e20777
12
README.md
12
README.md
@ -128,7 +128,17 @@ entêtes:
|
|||||||
# data scraping
|
# data scraping
|
||||||
Fait avec puppeteer, lancer la commande:
|
Fait avec puppeteer, lancer la commande:
|
||||||
`
|
`
|
||||||
node scrapers/ccpl.js
|
ts-node scrapers/ccpl.js
|
||||||
|
`
|
||||||
|
|
||||||
|
Pour l'agenda du libre:
|
||||||
|
|
||||||
|
`
|
||||||
|
ts-node scrapers/adl.js
|
||||||
|
`
|
||||||
|
on peut aussi filtrer l'agenda par un certain tag en mettant un argument en plus à la commande
|
||||||
|
`
|
||||||
|
ts-node scrapers/adl.js openstreetmap
|
||||||
`
|
`
|
||||||
|
|
||||||
---
|
---
|
||||||
|
1
output/adl_json.json
Normal file
1
output/adl_json.json
Normal file
File diff suppressed because one or more lines are too long
57
package-lock.json
generated
57
package-lock.json
generated
@ -14,7 +14,7 @@
|
|||||||
"esm": "^3.2.25",
|
"esm": "^3.2.25",
|
||||||
"moment": "^2.29.1",
|
"moment": "^2.29.1",
|
||||||
"mongoose": "^6.1.6",
|
"mongoose": "^6.1.6",
|
||||||
"node-fetch": "^2.6.1",
|
"node-fetch": "^2.6.6",
|
||||||
"rss-parser": "^3.10.0",
|
"rss-parser": "^3.10.0",
|
||||||
"ts-node": "^10.4.0",
|
"ts-node": "^10.4.0",
|
||||||
"typescript": "^4.5.4",
|
"typescript": "^4.5.4",
|
||||||
@ -22,6 +22,7 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/node": "^17.0.8",
|
"@types/node": "^17.0.8",
|
||||||
|
"@types/node-fetch": "^3.0.3",
|
||||||
"@types/pg": "^8.6.3",
|
"@types/pg": "^8.6.3",
|
||||||
"@types/puppeteer": "^5.4.4",
|
"@types/puppeteer": "^5.4.4",
|
||||||
"@types/uuid": "^8.3.4",
|
"@types/uuid": "^8.3.4",
|
||||||
@ -74,6 +75,16 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz",
|
||||||
"integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
|
"integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/node-fetch": {
|
||||||
|
"version": "3.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz",
|
||||||
|
"integrity": "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g==",
|
||||||
|
"deprecated": "This is a stub types definition. node-fetch provides its own type definitions, so you do not need this installed.",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"node-fetch": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/pg": {
|
"node_modules/@types/pg": {
|
||||||
"version": "8.6.3",
|
"version": "8.6.3",
|
||||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz",
|
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz",
|
||||||
@ -666,9 +677,9 @@
|
|||||||
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
|
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
|
||||||
},
|
},
|
||||||
"node_modules/node-fetch": {
|
"node_modules/node-fetch": {
|
||||||
"version": "2.6.5",
|
"version": "2.6.6",
|
||||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
|
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz",
|
||||||
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
|
"integrity": "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"whatwg-url": "^5.0.0"
|
"whatwg-url": "^5.0.0"
|
||||||
},
|
},
|
||||||
@ -940,6 +951,18 @@
|
|||||||
"node": ">=10.18.1"
|
"node": ">=10.18.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/puppeteer/node_modules/node-fetch": {
|
||||||
|
"version": "2.6.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
|
||||||
|
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"whatwg-url": "^5.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": "4.x || >=6.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/readable-stream": {
|
"node_modules/readable-stream": {
|
||||||
"version": "3.6.0",
|
"version": "3.6.0",
|
||||||
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz",
|
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz",
|
||||||
@ -1300,6 +1323,15 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz",
|
||||||
"integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
|
"integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
|
||||||
},
|
},
|
||||||
|
"@types/node-fetch": {
|
||||||
|
"version": "3.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz",
|
||||||
|
"integrity": "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g==",
|
||||||
|
"dev": true,
|
||||||
|
"requires": {
|
||||||
|
"node-fetch": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"@types/pg": {
|
"@types/pg": {
|
||||||
"version": "8.6.3",
|
"version": "8.6.3",
|
||||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz",
|
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz",
|
||||||
@ -1742,9 +1774,9 @@
|
|||||||
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
|
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
|
||||||
},
|
},
|
||||||
"node-fetch": {
|
"node-fetch": {
|
||||||
"version": "2.6.5",
|
"version": "2.6.6",
|
||||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
|
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz",
|
||||||
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
|
"integrity": "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA==",
|
||||||
"requires": {
|
"requires": {
|
||||||
"whatwg-url": "^5.0.0"
|
"whatwg-url": "^5.0.0"
|
||||||
}
|
}
|
||||||
@ -1949,6 +1981,17 @@
|
|||||||
"tar-fs": "2.1.1",
|
"tar-fs": "2.1.1",
|
||||||
"unbzip2-stream": "1.4.3",
|
"unbzip2-stream": "1.4.3",
|
||||||
"ws": "8.2.3"
|
"ws": "8.2.3"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"node-fetch": {
|
||||||
|
"version": "2.6.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
|
||||||
|
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
|
||||||
|
"dev": true,
|
||||||
|
"requires": {
|
||||||
|
"whatwg-url": "^5.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"readable-stream": {
|
"readable-stream": {
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
"esm": "^3.2.25",
|
"esm": "^3.2.25",
|
||||||
"moment": "^2.29.1",
|
"moment": "^2.29.1",
|
||||||
"mongoose": "^6.1.6",
|
"mongoose": "^6.1.6",
|
||||||
"node-fetch": "^2.6.1",
|
"node-fetch": "^2.6.6",
|
||||||
"rss-parser": "^3.10.0",
|
"rss-parser": "^3.10.0",
|
||||||
"ts-node": "^10.4.0",
|
"ts-node": "^10.4.0",
|
||||||
"typescript": "^4.5.4",
|
"typescript": "^4.5.4",
|
||||||
@ -25,6 +25,7 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/node": "^17.0.8",
|
"@types/node": "^17.0.8",
|
||||||
|
"@types/node-fetch": "^3.0.3",
|
||||||
"@types/pg": "^8.6.3",
|
"@types/pg": "^8.6.3",
|
||||||
"@types/puppeteer": "^5.4.4",
|
"@types/puppeteer": "^5.4.4",
|
||||||
"@types/uuid": "^8.3.4",
|
"@types/uuid": "^8.3.4",
|
||||||
|
42
scrapers/adl.ts
Normal file
42
scrapers/adl.ts
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import * as fs from "fs";
|
||||||
|
// @ts-ignore
|
||||||
|
import fetch from 'node-fetch';
|
||||||
|
|
||||||
|
const puppeteer = require('puppeteer');
|
||||||
|
// get data from webpage
|
||||||
|
|
||||||
|
let options: any = {};
|
||||||
|
let scrappedData: any = {
|
||||||
|
pages: [],
|
||||||
|
titleList: null,
|
||||||
|
linkTitleEvent: null
|
||||||
|
};
|
||||||
|
options = {headless: false, devtools: true}
|
||||||
|
|
||||||
|
async function run() {
|
||||||
|
|
||||||
|
let tag = process.argv[2] ? process.argv[2] : '';
|
||||||
|
|
||||||
|
console.log('argument de tag', process.argv[2]);
|
||||||
|
const response = await fetch('https://www.agendadulibre.org/events.json?city=&near%5Blocation%5D=&near%5Bdistance%5D=®ion=&tag=' + tag);
|
||||||
|
const data = await response.json();
|
||||||
|
console.log('évènements comptés: ', data.length);
|
||||||
|
let fileName = 'adl_json.json';
|
||||||
|
|
||||||
|
|
||||||
|
fs.writeFile(
|
||||||
|
`./output/${fileName}`,
|
||||||
|
JSON.stringify(data),
|
||||||
|
"utf8",
|
||||||
|
(err: any) => {
|
||||||
|
if (err) {
|
||||||
|
console.log(`Error writing file: ${err}`);
|
||||||
|
} else {
|
||||||
|
console.log(`File ${fileName} is written successfully!`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
run();
|
@ -1,9 +1,6 @@
|
|||||||
const puppeteer = require('puppeteer');
|
const puppeteer = require('puppeteer');
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import parserConfig from "../config";
|
import parserConfig from "../config";
|
||||||
|
|
||||||
// configure database
|
|
||||||
|
|
||||||
// get data from webpage
|
// get data from webpage
|
||||||
|
|
||||||
let options: any = {};
|
let options: any = {};
|
||||||
|
4
utils.ts
4
utils.ts
@ -100,14 +100,14 @@ class utils {
|
|||||||
/**
|
/**
|
||||||
* file management
|
* file management
|
||||||
*/
|
*/
|
||||||
writeFile(fileName: string, data: any, formatData: any){
|
static writeFile(fileName: string, data: any, formatData: any = 'json'){
|
||||||
let dataToSave = data;
|
let dataToSave = data;
|
||||||
if (formatData == 'json') {
|
if (formatData == 'json') {
|
||||||
dataToSave = JSON.stringify(data, null, 4)
|
dataToSave = JSON.stringify(data, null, 4)
|
||||||
}
|
}
|
||||||
// write file to disk
|
// write file to disk
|
||||||
fs.writeFile(
|
fs.writeFile(
|
||||||
`./sources_examples/${fileName}`,
|
`./output/${fileName}`,
|
||||||
dataToSave,
|
dataToSave,
|
||||||
"utf8",
|
"utf8",
|
||||||
(err: any) => {
|
(err: any) => {
|
||||||
|
16
yarn.lock
16
yarn.lock
@ -34,6 +34,13 @@
|
|||||||
"resolved" "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.2.tgz"
|
"resolved" "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.2.tgz"
|
||||||
"version" "1.0.2"
|
"version" "1.0.2"
|
||||||
|
|
||||||
|
"@types/node-fetch@^3.0.3":
|
||||||
|
"integrity" "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g=="
|
||||||
|
"resolved" "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz"
|
||||||
|
"version" "3.0.3"
|
||||||
|
dependencies:
|
||||||
|
"node-fetch" "*"
|
||||||
|
|
||||||
"@types/node@*", "@types/node@^17.0.8":
|
"@types/node@*", "@types/node@^17.0.8":
|
||||||
"integrity" "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
|
"integrity" "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
|
||||||
"resolved" "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz"
|
"resolved" "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz"
|
||||||
@ -402,7 +409,14 @@
|
|||||||
"resolved" "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz"
|
"resolved" "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz"
|
||||||
"version" "2.1.2"
|
"version" "2.1.2"
|
||||||
|
|
||||||
"node-fetch@^2.6.1", "node-fetch@2.6.5":
|
"node-fetch@*", "node-fetch@^2.6.6":
|
||||||
|
"integrity" "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA=="
|
||||||
|
"resolved" "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz"
|
||||||
|
"version" "2.6.6"
|
||||||
|
dependencies:
|
||||||
|
"whatwg-url" "^5.0.0"
|
||||||
|
|
||||||
|
"node-fetch@2.6.5":
|
||||||
"integrity" "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ=="
|
"integrity" "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ=="
|
||||||
"resolved" "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz"
|
"resolved" "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz"
|
||||||
"version" "2.6.5"
|
"version" "2.6.5"
|
||||||
|
Loading…
Reference in New Issue
Block a user