From 265f50291d3e75a55fbcaaafd03535e5b8f7efdb Mon Sep 17 00:00:00 2001 From: Tykayn Date: Thu, 17 Aug 2023 12:53:32 +0200 Subject: [PATCH] scrap multiple pages --- scraping/main.ts | 82 ++++++++++----- scraping/output/books.json | 199 +++++++++++++++++++++++++++++++++++++ 2 files changed, 254 insertions(+), 27 deletions(-) diff --git a/scraping/main.ts b/scraping/main.ts index 4955eb3c..bf582141 100644 --- a/scraping/main.ts +++ b/scraping/main.ts @@ -8,18 +8,21 @@ import WriteFile from "./utils"; const axios = require('axios'); const cheerio = require('cheerio'); const url: string = "www.mediatheque-de-briis-sous-forges.net"; -const fetching_path: string = "/mediatheque-de-briis-sous-forges.net/opac/recherche/catalogue?node=0&value=0&page=2"; +let fetching_path: string = "/mediatheque-de-briis-sous-forges.net/opac/recherche/catalogue?node=0&value=0&page="; interface Book { author: string title: string description: string + format: string img: string } + const books: Book[] = []; // autres pages: // http://www.mediatheque-de-briis-sous-forges.net/mediatheque-de-briis-sous-forges.net/opac/recherche/catalogue?node=0&value=0&page=2 -const page_max = 1927 +let page_max = 1927 +page_max = 4 const getTables = (html: string): any => { const $ = cheerio.load(html); @@ -71,31 +74,56 @@ const getHtml = async (hostname: string, path: string): Promise => function writeBookScrapping() { - WriteFile('books.json', JSON.stringify(books, null , 2)) + WriteFile('books.json', JSON.stringify(books, null, 2)) } -getHtml(url, fetching_path) - .then(getTables) - .then( - (tables: any) => tables.each( - (_: any, table: any) => { - const $ = cheerio.load(table); - // console.log('une table') - // let author = $().find('a.notice').text(); - let text_description = $(table).find('td').eq(1).text(); - let boom = text_description.split('\n'); - let splitting = boom[1].split('/') - let img_src = $(table).find('td img').attr('src'); - console.log(img_src); - books.push({ - author: boom[0], - title: splitting[0], - description: splitting[1], - img: img_src - }) - // console.log(cheerio.load(table).html()) - } - ) + +// loop on all pages + +const scrapOnePage = (tables: any) => { + tables.each( + (_: any, table: any) => { + const $ = cheerio.load(table); + // console.log('une table') + // let author = $().find('a.notice').text(); + let text_description = $(table).find('td').eq(1).text(); + let boom = text_description.split('\n'); + let splitting = boom[1].split('/') + let format = splitting[0].split(':') + let img_src = $(table).find('td img').attr('src'); + console.log(img_src); + books.push({ + author: boom[0].trim(), + title: splitting[0].trim(), + description: splitting[1].trim(), + format: format[1].trim(), + img: img_src + }) + // console.log(cheerio.load(table).html()) + } ) - .then(writeBookScrapping) - .catch((error) => console.log(error)); +} + +async function main() { + + for (let page_counter = 1; page_counter <= page_max; page_counter++) { + + fetching_path = "/mediatheque-de-briis-sous-forges.net/opac/recherche/catalogue?node=0&value=0&page=" + page_counter + await getHtml(url, fetching_path) + .then(getTables) + .then( + scrapOnePage + ) + // .then(() => { + // if (page_counter === page_max) { + // } + // }) + .catch((error) => console.log(error)); + + } + writeBookScrapping() +} + +main() + + diff --git a/scraping/output/books.json b/scraping/output/books.json index a68cc6b3..d741b5e1 100644 --- a/scraping/output/books.json +++ b/scraping/output/books.json @@ -1,4 +1,70 @@ [ + { + "author": "PONTI, Claude", + "title": "La Boîte ", + "description": " Claude Ponti. - Paris : L'Ecole des loisirs, 1995. - [16] p. : ill. en coul., couv. ill. en coul. ; 12 x 16 cm. - (Tromboline et Foulbazar).", + "img": "http://images-eu.amazon.com/images/P/2211031838.08.MZZZZZZZ.jpg" + }, + { + "author": "KADARÉ, Ismail", + "title": "L'Ombre : roman ", + "description": " Ismail Kadaré ; Trad. de l'albanais par Jusuf Vrioni. - Paris : Fayard, 1994. - 257 p. : couv. ill. en coul. ; 24 cm.", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2213027560.08.MZZZZZZZ.jpg" + }, + { + "author": "AARON, Soazig", + "title": "Le Non de Klara ", + "description": " Soazig Aaron. - Paris : Nadeau Maurice, 2001. - 186 p. : couv. ill. ; 21 cm.", + "img": "http://images-eu.amazon.com/images/P/2862311723.08.MZZZZZZZ.jpg" + }, + { + "author": "ABÉCASSIS, Eliette", + "title": "Mon père : roman ", + "description": " Eliette Abécassis. - Paris : Albin Michel, 2002. - 136 p. ; 20 cm.", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2226134484.08.MZZZZZZZ.jpg" + }, + { + "author": "ABÉCASSIS, Eliette", + "title": "Mon père : roman ", + "description": " Eliette Abécassis. - Paris : Albin Michel, 2002. - 136 p. ; 20 cm.", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2226134484.08.MZZZZZZZ.jpg" + }, + { + "author": "ABE, Kôbô", + "title": "L'homme-boîte ", + "description": " texte de Kôbô Abe ; Trad. du japonais par Suzanne Rosset. - Paris : Stock, 1986. - 1 vol. , 200 p. ; 18 x 11 cm. - (Bibliothèque cosmopolite, ISSN 0224-5833 ; 74).", + "img": "http://images-eu.amazon.com/images/P/2234019885.08.MZZZZZZZ.jpg" + }, + { + "author": "ABE, Kôbô", + "title": "La Femme des sables ", + "description": " Kôbô Abe, Georges Bonneau. - Paris : Stock, 1979. - 282 p. ; 18 cm. - (Bibliothèque cosmopolite, ISSN 0224-5833 ; 10).", + "img": "http://images-eu.amazon.com/images/P/2234011752.08.MZZZZZZZ.jpg" + }, + { + "author": "ABÉCASSIS, Eliette", + "title": "Le Trésor du Temple : roman ", + "description": " Eliette Abécassis. - Paris : Albin Michel, 2001. - 344 p. : couv. ill. en coul. ; 24 cm.", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2226125728.08.MZZZZZZZ.jpg" + }, + { + "author": "ABÉCASSIS, Eliette", + "title": "Clandestin : roman ", + "description": " Eliette Abécassis. - Paris : Albin Michel, 2003. - 142 p. : jaquette ill. ; 20 cm.", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2226141626.08.MZZZZZZZ.jpg" + }, + { + "author": "ABÉCASSIS, Eliette", + "title": "La Répudiée : roman ", + "description": " Eliette Abécassis. - Paris : Albin Michel, 2000. - 129 p. ; 20 cm.", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2226110577.08.MZZZZZZZ.jpg" + }, { "author": "ALLENDE, Isabel", "title": "Portrait sépia ", @@ -9,18 +75,21 @@ "author": "AMETTE, Jacques-Pierre", "title": "La Maîtresse de Brecht : roman ", "description": " Jacques-Pierre Amette. - Paris : Albin Michel, 2003. - 300 p. : jaquette ill. ; 20 cm.", + "format": " roman ", "img": "http://images-eu.amazon.com/images/P/2226141634.08.MZZZZZZZ.jpg" }, { "author": "ANDRIÂC, Ivo", "title": "Mara la courtisane : et autres nouvelles ", "description": " Ivo Andriâc ; Trad. du serbo-croate par Pascale Delpech. - Paris : Belfond, 1999. - 234 p. : couv. ill. en coul. ; 23 cm. - (Littérature étrangère).", + "format": " et autres nouvelles ", "img": "http://images-eu.amazon.com/images/P/2714435572.08.MZZZZZZZ.jpg" }, { "author": "ANGLADE, Jean", "title": "Un Lit d'aubépine : roman ", "description": " Jean Anglade. - Paris : Presses de la Cité, 1995. - 325 p. : couv. ill. en coul. ; 23 cm. - (Production Jeannine Balland).", + "format": " roman ", "img": "http://images-eu.amazon.com/images/P/2258039568.08.MZZZZZZZ.jpg" }, { @@ -45,6 +114,7 @@ "author": "ASSOULINE, Pierre", "title": "La Cliente : roman ", "description": " Pierre Assouline. - Paris : Gallimard, 1998. - 191 p. ; 21 cm.", + "format": " roman ", "img": "http://images-eu.amazon.com/images/P/207075278X.08.MZZZZZZZ.jpg" }, { @@ -57,6 +127,135 @@ "author": "ATKINSON, Kate", "title": "Dans les coulisses du musée : roman ", "description": " Kate Atkinson ; Trad. de l'anglais par Jean Bourdier. - Paris : Bernard de Fallois, 1996. - 348 p. : couv. ill. en coul. ; 23 cm.", + "format": " roman ", "img": "http://images-eu.amazon.com/images/P/2877062775.08.MZZZZZZZ.jpg" + }, + { + "author": "ATTALI, Jacques", + "title": "La Femme du menteur : roman ", + "description": " Jacques Attali. - Paris : Fayard, 1999. - 282 p. : jaquette ill. en coul. ; 22 cm.", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2213603391.08.MZZZZZZZ.jpg" + }, + { + "author": "AUBRAC, Lucie", + "title": "Ils Partiront Dans L'Ivresse ", + "description": " Lucie Aubrac. - Paris : Ed. du Seuil, 1997.", + "img": "http://images-eu.amazon.com/images/P/2020316544.08.MZZZZZZZ.jpg" + }, + { + "author": "AUDOUARD, Antoine", + "title": "Le Messager des sables : roman ", + "description": " Antoine Audouard, Léonard Anthony. - Paris : Laffont, Robert, 2003. - 385 p. : couv. ill. ; 24 cm.", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2221100131.08.MZZZZZZZ.jpg" + }, + { + "author": "AUEL, Jean Marie", + "title": "Les enfants de la terre ", + "description": " Jean Marie Auel. - Paris : Presses de la Cité. - 5 vol.", + "img": "http://images-eu.amazon.com/images/P/2258059321.08.MZZZZZZZ.jpg" + }, + { + "author": "AUEL, Jean Marie", + "title": "Les enfants de la terre ", + "description": " Jean Marie Auel. - Paris : Presses de la Cité. - 5 vol.", + "img": "http://images-eu.amazon.com/images/P/2258059313.08.MZZZZZZZ.jpg" + }, + { + "author": "AUEL, Jean Marie", + "title": "Les enfants de la terre ", + "description": " Jean Marie Auel. - Paris : Presses de la Cité. - 5 vol.", + "img": "http://images-eu.amazon.com/images/P/2258059305.08.MZZZZZZZ.jpg" + }, + { + "author": "AUEL, Jean Marie", + "title": "Les enfants de la terre ", + "description": " Jean Marie Auel. - Paris : Presses de la Cité. - 5 vol.", + "img": "http://images-eu.amazon.com/images/P/2258059461.08.MZZZZZZZ.jpg" + }, + { + "author": "AUEL, Jean Marie", + "title": "Les enfants de la terre ", + "description": " Jean Marie Auel. - Paris : Presses de la Cité. - 5 vol.", + "img": "http://images-eu.amazon.com/images/P/2258058376.08.MZZZZZZZ.jpg" + }, + { + "author": "AUSTER, Paul", + "title": "Je pensais que mon père était Dieu et autres récits de la réalité américaine : 172 histoires racontées pour le National Story Project et l'émission de radio intitulée Weekend All Things Considered ", + "description": " réunis par Paul Auster, Nelly Reifler ; Trad. de l'américain par Christine Le Boeuf. - Arles : Actes sud, 2001. - 1 vol. , 460 p. : couv. ill. ; 22x 12 cm.", + "format": " 172 histoires racontées pour le National Story Project et l'émission de radio intitulée Weekend All Things Considered ", + "img": "http://images-eu.amazon.com/images/P/2742733213.08.MZZZZZZZ.jpg" + }, + { + "author": "AVRIL, Nicole", + "title": "Moi, Dora Maar : roman ", + "description": " Nicole Avril. - Paris : Plon, 2001. - 224 p. : couv. ill. en coul. ; 23 cm.", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2259190642.08.MZZZZZZZ.jpg" + }, + { + "author": "AYMÉ, Marcel", + "title": "La Vouivre ", + "description": " Marcel Aymé. - Paris : Gallimard, 1972. - 251 p. : couv. ill. en coul. ; 18 cm. - (Folio ; 167).", + "img": "http://images-eu.amazon.com/images/P/2070361675.08.MZZZZZZZ.jpg" + }, + { + "author": "BACHARAN, Nicole", + "title": "Némo en Amérique ", + "description": " texte de Nicole Bacharan, Dominique Simonnet. - Paris : Ed. du Seuil, 2001. - 1 vol. , 214 p. : carte, couv. ill. en coul. ; 24 x 16 cm.", + "img": "http://images-eu.amazon.com/images/P/2020354209.08.MZZZZZZZ.jpg" + }, + { + "author": "BALZAC, Honoré de", + "title": "Eugénie Grandet ", + "description": " texte de Honoré de Balzac ; Préf. Maurice Bardèche. - Paris : Librairie générale française, 1983. - 1 vol. , 338 p. : couv. ill. en coul ; 17 x 11 cm. - (Livre de poche (Le) ; 1414).", + "img": "http://images-eu.amazon.com/images/P/2253003867.08.MZZZZZZZ.jpg" + }, + { + "author": "BALZAC, Honoré de", + "title": "Le Père Goriot ", + "description": " texte de Honoré de Balzac ; Présenté par Philippe Berthier. - Paris : Flammarion, 1995. - 1 vol. , 374 p. : couv. ill. en coul ; 18 x 11 cm.", + "img": "http://images-eu.amazon.com/images/P/2253004278.08.MZZZZZZZ.jpg" + }, + { + "author": "BANKS, Russell", + "title": "Sous le règne de Bone : roman ", + "description": " Russell Banks ; Trad. de l'américain par Pierre Furlan. - Arles : Actes sud, 1995. - 409 p. : couv. ill. en coul. ; 22 cm. - (Lettres anglo-américaines . 0) (Lettres anglo-américaines).", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2742706038.08.MZZZZZZZ.jpg" + }, + { + "author": "BARNES, Julian", + "title": "Love, etc. ", + "description": " Julian Barnes ; Trad. de l'anglais par Raymond Las Vergnas. - Paris : Denoël, 1992. - 314 p. ; 21 cm. - (Empreinte).", + "img": "http://images-eu.amazon.com/images/P/2207239926.08.MZZZZZZZ.jpg" + }, + { + "author": "BARICCO, Alessandro", + "title": "Océan mer : roman ", + "description": " Alessandro Baricco ; Trad. de l'italien par Françoise Brun. - Paris : Albin Michel, 1997. - 274 p. : jaquette ill. en coul. ; 20 cm. - (Les grandes traductions . 0) (Les grandes traductions).", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2226095705.08.MZZZZZZZ.jpg" + }, + { + "author": "BARICCO, Alessandro", + "title": "Sans sang ", + "description": " Alessandro Baricco ; Trad. de l'italien par Françoise Brun. - Paris : Albin Michel, 2002. - 112 p. ; 19 cm.", + "img": "http://images-eu.amazon.com/images/P/222613610X.08.MZZZZZZZ.jpg" + }, + { + "author": "BARJAVEL, René", + "title": "L'Enchanteur : roman ", + "description": " René Barjavel. - Paris : Le grand livre du mois, 1984. - 349 p ; 24 cm. - (Le Grand livre du mois).", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2207229742.08.MZZZZZZZ.jpg" + }, + { + "author": "BARRY, Mariama", + "title": "La Petite Peule : roman ", + "description": " Mariama Barry. - Paris : Mazarine, 2000. - 268 p. : couv. ill. ; 22 cm.", + "format": " roman ", + "img": "http://images-eu.amazon.com/images/P/2863743228.08.MZZZZZZZ.jpg" } ] \ No newline at end of file