From 31d8fd53c8e5b2263b706ad2fbe422b0ca4ff812 Mon Sep 17 00:00:00 2001 From: Tykayn Date: Thu, 17 Aug 2023 12:40:49 +0200 Subject: [PATCH] scrap one list of books --- scraping/main.ts | 101 +++++++++++++++++++++++++++++++++++++ scraping/output/books.json | 62 +++++++++++++++++++++++ scraping/utils.ts | 15 ++++++ 3 files changed, 178 insertions(+) create mode 100644 scraping/main.ts create mode 100644 scraping/output/books.json create mode 100644 scraping/utils.ts diff --git a/scraping/main.ts b/scraping/main.ts new file mode 100644 index 00000000..4955eb3c --- /dev/null +++ b/scraping/main.ts @@ -0,0 +1,101 @@ +/** + scrapping des livres de la médiathèque de briis + **/ +// @ts-ignore +import https from 'https'; +import WriteFile from "./utils"; + +const axios = require('axios'); +const cheerio = require('cheerio'); +const url: string = "www.mediatheque-de-briis-sous-forges.net"; +const fetching_path: string = "/mediatheque-de-briis-sous-forges.net/opac/recherche/catalogue?node=0&value=0&page=2"; + +interface Book { + author: string + title: string + description: string + img: string +} +const books: Book[] = []; +// autres pages: +// http://www.mediatheque-de-briis-sous-forges.net/mediatheque-de-briis-sous-forges.net/opac/recherche/catalogue?node=0&value=0&page=2 +const page_max = 1927 + +const getTables = (html: string): any => { + const $ = cheerio.load(html); + const tableElements = $( + "table.notice" + ); + return tableElements; +}; + +const getHtml = async (hostname: string, path: string): Promise => + new Promise((resolve, reject) => { + https + .get( + { + hostname, + path, + // port:80, + method: "GET", + }, + (res) => { + let html = ""; + res.on("data", function (chunk) { + html += chunk; + }); + res.on("end", function () { + resolve(html); + }); + } + ) + .on("error", (error) => { + console.error(error); + reject(error); + }); + }); + +// fetchData(url).then((res: any) => { +// const html = res.data; +// const $ = cheerio.load(html); +// const statsTable :any = $('table.notice'); +// console.log('statsTable', statsTable) +// statsTable.each(function(){ +// let elem:any = this; +// let author = $(elem).find('td').eq(2).text(); +// // let img = $(this).find('img').attr('src'); +// // console.log(elem); +// console.log(author); +// }); +// }) + + +function writeBookScrapping() { + WriteFile('books.json', JSON.stringify(books, null , 2)) +} + +getHtml(url, fetching_path) + .then(getTables) + .then( + (tables: any) => tables.each( + (_: any, table: any) => { + const $ = cheerio.load(table); + // console.log('une table') + // let author = $().find('a.notice').text(); + let text_description = $(table).find('td').eq(1).text(); + let boom = text_description.split('\n'); + let splitting = boom[1].split('/') + let img_src = $(table).find('td img').attr('src'); + console.log(img_src); + books.push({ + author: boom[0], + title: splitting[0], + description: splitting[1], + img: img_src + }) + // console.log(cheerio.load(table).html()) + } + ) + ) + .then(writeBookScrapping) + .catch((error) => console.log(error)); diff --git a/scraping/output/books.json b/scraping/output/books.json new file mode 100644 index 00000000..a68cc6b3 --- /dev/null +++ b/scraping/output/books.json @@ -0,0 +1,62 @@ +[ + { + "author": "ALLENDE, Isabel", + "title": "Portrait sépia ", + "description": " Isabel Allende ; Trad. de l'espagnol par Claude de Frayssinet. - Paris : Grasset et Fasquelle, 2001. - 1 vol. , 391 p. : couv. ill. ; 24 x 15 cm.", + "img": "http://images-eu.amazon.com/images/P/2246617715.08.MZZZZZZZ.jpg" + }, + { + "author": "AMETTE, Jacques-Pierre", + "title": "La Maîtresse de Brecht : roman ", + "description": " Jacques-Pierre Amette. - Paris : Albin Michel, 2003. - 300 p. : jaquette ill. ; 20 cm.", + "img": "http://images-eu.amazon.com/images/P/2226141634.08.MZZZZZZZ.jpg" + }, + { + "author": "ANDRIÂC, Ivo", + "title": "Mara la courtisane : et autres nouvelles ", + "description": " Ivo Andriâc ; Trad. du serbo-croate par Pascale Delpech. - Paris : Belfond, 1999. - 234 p. : couv. ill. en coul. ; 23 cm. - (Littérature étrangère).", + "img": "http://images-eu.amazon.com/images/P/2714435572.08.MZZZZZZZ.jpg" + }, + { + "author": "ANGLADE, Jean", + "title": "Un Lit d'aubépine : roman ", + "description": " Jean Anglade. - Paris : Presses de la Cité, 1995. - 325 p. : couv. ill. en coul. ; 23 cm. - (Production Jeannine Balland).", + "img": "http://images-eu.amazon.com/images/P/2258039568.08.MZZZZZZZ.jpg" + }, + { + "author": "ARNOTHY, Christine", + "title": "J'ai quinze ans et je ne veux pas mourir ; (suivi de) Il n'est pas si facile de vivre ", + "description": " Christine Arnothy. - Paris : France loisirs, 1981. - 330 p ; 23 cm.", + "img": "http://images-eu.amazon.com/images/P/2724211065.08.MZZZZZZZ.jpg" + }, + { + "author": "CHAUVIN, Rémy", + "title": "Le Monde animal et ses comportements complexes ", + "description": " Rémy Chauvin, Bernadette Chauvin. - Paris : Plon, 1977. - 282 p : ill ; 21 cm.", + "img": "http://images-eu.amazon.com/images/P/2259002331.08.MZZZZZZZ.jpg" + }, + { + "author": "D'ARZO, Silvio", + "title": "Maison des autres , (Contient) Un moment comme ça ", + "description": " texte de Silvio D'Arzo ; Trad. de l'italien par Bernard Simeone, Philippe Renard ; Préf. Attilio Bertolucci. - Lagrasse : Verdier, 1997. - 1 vol. , 86 p. : - ; 22 x 14 cm. - (Terra d'altri, ISSN 0989-4160).", + "img": "http://images-eu.amazon.com/images/P/2864322838.08.MZZZZZZZ.jpg" + }, + { + "author": "ASSOULINE, Pierre", + "title": "La Cliente : roman ", + "description": " Pierre Assouline. - Paris : Gallimard, 1998. - 191 p. ; 21 cm.", + "img": "http://images-eu.amazon.com/images/P/207075278X.08.MZZZZZZZ.jpg" + }, + { + "author": "ATKINSON, Kate", + "title": "Dans les replis du temps ", + "description": " texte de Kate Atkinson ; Trad. de l'anglais par Jean Bourdier. - Paris : Librairie générale française, 1999. - 1 vol. , 403 p. : ill., couv. ill. en coul. ; 18 x 11 cm. - (Le livre de poche ; 14687).", + "img": "http://images-eu.amazon.com/images/P/2253146870.08.MZZZZZZZ.jpg" + }, + { + "author": "ATKINSON, Kate", + "title": "Dans les coulisses du musée : roman ", + "description": " Kate Atkinson ; Trad. de l'anglais par Jean Bourdier. - Paris : Bernard de Fallois, 1996. - 348 p. : couv. ill. en coul. ; 23 cm.", + "img": "http://images-eu.amazon.com/images/P/2877062775.08.MZZZZZZZ.jpg" + } +] \ No newline at end of file diff --git a/scraping/utils.ts b/scraping/utils.ts new file mode 100644 index 00000000..763977e9 --- /dev/null +++ b/scraping/utils.ts @@ -0,0 +1,15 @@ +import * as fs from "node:fs"; + +export default function WriteFile(fileName: string, fileContent: any) { + console.log('write file', fileName) + return fs.writeFile( + `./output/${fileName}`, + fileContent, + 'utf8', + (err) => { + if (err) { + console.log(`Error writing file: ${err}`) + } + } + ) +} \ No newline at end of file