Browse Source

get json from agenda du libre

master
Tykayn 4 months ago committed by tykayn
parent
commit
a297e20777
  1. 12
      README.md
  2. 1
      output/adl_json.json
  3. 57
      package-lock.json
  4. 3
      package.json
  5. 42
      scrapers/adl.ts
  6. 3
      scrapers/ccpl.ts
  7. 4
      utils.ts
  8. 16
      yarn.lock

12
README.md

@ -128,7 +128,17 @@ entêtes:
# data scraping
Fait avec puppeteer, lancer la commande:
`
node scrapers/ccpl.js
ts-node scrapers/ccpl.js
`
Pour l'agenda du libre:
`
ts-node scrapers/adl.js
`
on peut aussi filtrer l'agenda par un certain tag en mettant un argument en plus à la commande
`
ts-node scrapers/adl.js openstreetmap
`
---

1
output/adl_json.json

File diff suppressed because one or more lines are too long

57
package-lock.json generated

@ -14,7 +14,7 @@
"esm": "^3.2.25",
"moment": "^2.29.1",
"mongoose": "^6.1.6",
"node-fetch": "^2.6.1",
"node-fetch": "^2.6.6",
"rss-parser": "^3.10.0",
"ts-node": "^10.4.0",
"typescript": "^4.5.4",
@ -22,6 +22,7 @@
},
"devDependencies": {
"@types/node": "^17.0.8",
"@types/node-fetch": "^3.0.3",
"@types/pg": "^8.6.3",
"@types/puppeteer": "^5.4.4",
"@types/uuid": "^8.3.4",
@ -74,6 +75,16 @@
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz",
"integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
},
"node_modules/@types/node-fetch": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz",
"integrity": "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g==",
"deprecated": "This is a stub types definition. node-fetch provides its own type definitions, so you do not need this installed.",
"dev": true,
"dependencies": {
"node-fetch": "*"
}
},
"node_modules/@types/pg": {
"version": "8.6.3",
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz",
@ -666,9 +677,9 @@
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
},
"node_modules/node-fetch": {
"version": "2.6.5",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
"version": "2.6.6",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz",
"integrity": "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA==",
"dependencies": {
"whatwg-url": "^5.0.0"
},
@ -940,6 +951,18 @@
"node": ">=10.18.1"
}
},
"node_modules/puppeteer/node_modules/node-fetch": {
"version": "2.6.5",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
"dev": true,
"dependencies": {
"whatwg-url": "^5.0.0"
},
"engines": {
"node": "4.x || >=6.0.0"
}
},
"node_modules/readable-stream": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz",
@ -1300,6 +1323,15 @@
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz",
"integrity": "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
},
"@types/node-fetch": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz",
"integrity": "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g==",
"dev": true,
"requires": {
"node-fetch": "*"
}
},
"@types/pg": {
"version": "8.6.3",
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.3.tgz",
@ -1742,9 +1774,9 @@
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
},
"node-fetch": {
"version": "2.6.5",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
"version": "2.6.6",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz",
"integrity": "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA==",
"requires": {
"whatwg-url": "^5.0.0"
}
@ -1949,6 +1981,17 @@
"tar-fs": "2.1.1",
"unbzip2-stream": "1.4.3",
"ws": "8.2.3"
},
"dependencies": {
"node-fetch": {
"version": "2.6.5",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz",
"integrity": "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ==",
"dev": true,
"requires": {
"whatwg-url": "^5.0.0"
}
}
}
},
"readable-stream": {

3
package.json

@ -17,7 +17,7 @@
"esm": "^3.2.25",
"moment": "^2.29.1",
"mongoose": "^6.1.6",
"node-fetch": "^2.6.1",
"node-fetch": "^2.6.6",
"rss-parser": "^3.10.0",
"ts-node": "^10.4.0",
"typescript": "^4.5.4",
@ -25,6 +25,7 @@
},
"devDependencies": {
"@types/node": "^17.0.8",
"@types/node-fetch": "^3.0.3",
"@types/pg": "^8.6.3",
"@types/puppeteer": "^5.4.4",
"@types/uuid": "^8.3.4",

42
scrapers/adl.ts

@ -0,0 +1,42 @@
import * as fs from "fs";
// @ts-ignore
import fetch from 'node-fetch';
const puppeteer = require('puppeteer');
// get data from webpage
let options: any = {};
let scrappedData: any = {
pages: [],
titleList: null,
linkTitleEvent: null
};
options = {headless: false, devtools: true}
async function run() {
let tag = process.argv[2] ? process.argv[2] : '';
console.log('argument de tag', process.argv[2]);
const response = await fetch('https://www.agendadulibre.org/events.json?city=&near%5Blocation%5D=&near%5Bdistance%5D=&region=&tag=' + tag);
const data = await response.json();
console.log('évènements comptés: ', data.length);
let fileName = 'adl_json.json';
fs.writeFile(
`./output/${fileName}`,
JSON.stringify(data),
"utf8",
(err: any) => {
if (err) {
console.log(`Error writing file: ${err}`);
} else {
console.log(`File ${fileName} is written successfully!`);
}
}
);
}
run();

3
scrapers/ccpl.ts

@ -1,9 +1,6 @@
const puppeteer = require('puppeteer');
import * as fs from 'fs';
import parserConfig from "../config";
// configure database
// get data from webpage
let options: any = {};

4
utils.ts

@ -100,14 +100,14 @@ class utils {
/**
* file management
*/
writeFile(fileName: string, data: any, formatData: any){
static writeFile(fileName: string, data: any, formatData: any = 'json'){
let dataToSave = data;
if (formatData == 'json') {
dataToSave = JSON.stringify(data, null, 4)
}
// write file to disk
fs.writeFile(
`./sources_examples/${fileName}`,
`./output/${fileName}`,
dataToSave,
"utf8",
(err: any) => {

16
yarn.lock

@ -34,6 +34,13 @@
"resolved" "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.2.tgz"
"version" "1.0.2"
"@types/node-fetch@^3.0.3":
"integrity" "sha512-HhggYPH5N+AQe/OmN6fmhKmRRt2XuNJow+R3pQwJxOOF9GuwM7O2mheyGeIrs5MOIeNjDEdgdoyHBOrFeJBR3g=="
"resolved" "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-3.0.3.tgz"
"version" "3.0.3"
dependencies:
"node-fetch" "*"
"@types/node@*", "@types/node@^17.0.8":
"integrity" "sha512-YofkM6fGv4gDJq78g4j0mMuGMkZVxZDgtU0JRdx6FgiJDG+0fY0GKVolOV8WqVmEhLCXkQRjwDdKyPxJp/uucg=="
"resolved" "https://registry.npmjs.org/@types/node/-/node-17.0.8.tgz"
@ -402,7 +409,14 @@
"resolved" "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz"
"version" "2.1.2"
"node-fetch@^2.6.1", "node-fetch@2.6.5":
"node-fetch@*", "node-fetch@^2.6.6":
"integrity" "sha512-Z8/6vRlTUChSdIgMa51jxQ4lrw/Jy5SOW10ObaA47/RElsAN2c5Pn8bTgFGWn/ibwzXTE8qwr1Yzx28vsecXEA=="
"resolved" "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.6.tgz"
"version" "2.6.6"
dependencies:
"whatwg-url" "^5.0.0"
"node-fetch@2.6.5":
"integrity" "sha512-mmlIVHJEu5rnIxgEgez6b9GgWXbkZj5YZ7fx+2r94a2E+Uirsp6HsPTPlomfdHtpt/B0cdKviwkoaM6pyvUOpQ=="
"resolved" "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.5.tgz"
"version" "2.6.5"

Loading…
Cancel
Save